1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.77 1997/09/01 02:28:32 bde Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/proc.h> 56 #include <sys/buf.h> 57 #include <sys/vnode.h> 58 #include <sys/malloc.h> 59 #include <sys/vmmeter.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_prot.h> 64 #include <vm/vm_object.h> 65 #include <vm/vm_page.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_pageout.h> 68 #include <vm/swap_pager.h> 69 #include <vm/vm_extern.h> 70 71 #ifndef NPENDINGIO 72 #define NPENDINGIO 10 73 #endif 74 75 static int nswiodone; 76 int swap_pager_full; 77 extern int vm_swap_size; 78 static int no_swap_space = 1; 79 struct rlisthdr swaplist; 80 81 #define MAX_PAGEOUT_CLUSTER 16 82 83 TAILQ_HEAD(swpclean, swpagerclean); 84 85 typedef struct swpagerclean *swp_clean_t; 86 87 static struct swpagerclean { 88 TAILQ_ENTRY(swpagerclean) spc_list; 89 int spc_flags; 90 struct buf *spc_bp; 91 vm_object_t spc_object; 92 vm_offset_t spc_kva; 93 int spc_count; 94 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 95 } swcleanlist[NPENDINGIO]; 96 97 98 /* spc_flags values */ 99 #define SPC_ERROR 0x01 100 101 #define SWB_EMPTY (-1) 102 103 /* list of completed page cleans */ 104 static struct swpclean swap_pager_done; 105 106 /* list of pending page cleans */ 107 static struct swpclean swap_pager_inuse; 108 109 /* list of free pager clean structs */ 110 static struct swpclean swap_pager_free; 111 int swap_pager_free_count; 112 113 /* list of "named" anon region objects */ 114 static struct pagerlst swap_pager_object_list; 115 116 /* list of "unnamed" anon region objects */ 117 struct pagerlst swap_pager_un_object_list; 118 119 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 120 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2 121 static int swap_pager_needflags; 122 123 static struct pagerlst *swp_qs[] = { 124 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0 125 }; 126 127 /* 128 * pagerops for OBJT_SWAP - "swap pager". 129 */ 130 static vm_object_t 131 swap_pager_alloc __P((void *handle, vm_size_t size, 132 vm_prot_t prot, vm_ooffset_t offset)); 133 static void swap_pager_dealloc __P((vm_object_t object)); 134 static boolean_t 135 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex, 136 int *before, int *after)); 137 static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); 138 static void swap_pager_init __P((void)); 139 static void swap_pager_sync __P((void)); 140 141 struct pagerops swappagerops = { 142 swap_pager_init, 143 swap_pager_alloc, 144 swap_pager_dealloc, 145 swap_pager_getpages, 146 swap_pager_putpages, 147 swap_pager_haspage, 148 swap_pager_sync 149 }; 150 151 static int npendingio = NPENDINGIO; 152 static int dmmin; 153 int dmmax; 154 155 static int swap_pager_block_index __P((vm_pindex_t pindex)); 156 static int swap_pager_block_offset __P((vm_pindex_t pindex)); 157 static daddr_t *swap_pager_diskaddr __P((vm_object_t object, 158 vm_pindex_t pindex, int *valid)); 159 static void swap_pager_finish __P((swp_clean_t spc)); 160 static void swap_pager_freepage __P((vm_page_t m)); 161 static void swap_pager_free_swap __P((vm_object_t object)); 162 static void swap_pager_freeswapspace __P((vm_object_t object, 163 unsigned int from, 164 unsigned int to)); 165 static int swap_pager_getswapspace __P((vm_object_t object, 166 unsigned int amount, 167 daddr_t *rtval)); 168 static void swap_pager_iodone __P((struct buf *)); 169 static void swap_pager_iodone1 __P((struct buf *bp)); 170 static void swap_pager_reclaim __P((void)); 171 static void swap_pager_ridpages __P((vm_page_t *m, int count, 172 int reqpage)); 173 static void swap_pager_setvalid __P((vm_object_t object, 174 vm_offset_t offset, int valid)); 175 static void swapsizecheck __P((void)); 176 177 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE))) 178 179 static inline void 180 swapsizecheck() 181 { 182 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 183 if (swap_pager_full == 0) 184 printf("swap_pager: out of swap space\n"); 185 swap_pager_full = 1; 186 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 187 swap_pager_full = 0; 188 } 189 190 static void 191 swap_pager_init() 192 { 193 TAILQ_INIT(&swap_pager_object_list); 194 TAILQ_INIT(&swap_pager_un_object_list); 195 196 /* 197 * Initialize clean lists 198 */ 199 TAILQ_INIT(&swap_pager_inuse); 200 TAILQ_INIT(&swap_pager_done); 201 TAILQ_INIT(&swap_pager_free); 202 swap_pager_free_count = 0; 203 204 /* 205 * Calculate the swap allocation constants. 206 */ 207 dmmin = PAGE_SIZE / DEV_BSIZE; 208 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 209 } 210 211 void 212 swap_pager_swap_init() 213 { 214 swp_clean_t spc; 215 struct buf *bp; 216 int i; 217 218 /* 219 * kva's are allocated here so that we dont need to keep doing 220 * kmem_alloc pageables at runtime 221 */ 222 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 223 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER); 224 if (!spc->spc_kva) { 225 break; 226 } 227 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL); 228 if (!spc->spc_bp) { 229 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 230 break; 231 } 232 spc->spc_flags = 0; 233 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 234 swap_pager_free_count++; 235 } 236 } 237 238 int 239 swap_pager_swp_alloc(object, wait) 240 vm_object_t object; 241 int wait; 242 { 243 sw_blk_t swb; 244 int nblocks; 245 int i, j; 246 247 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES; 248 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait); 249 if (swb == NULL) 250 return 1; 251 252 for (i = 0; i < nblocks; i++) { 253 swb[i].swb_valid = 0; 254 swb[i].swb_locked = 0; 255 for (j = 0; j < SWB_NPAGES; j++) 256 swb[i].swb_block[j] = SWB_EMPTY; 257 } 258 259 object->un_pager.swp.swp_nblocks = nblocks; 260 object->un_pager.swp.swp_allocsize = 0; 261 object->un_pager.swp.swp_blocks = swb; 262 object->un_pager.swp.swp_poip = 0; 263 264 if (object->handle != NULL) { 265 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list); 266 } else { 267 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 268 } 269 270 return 0; 271 } 272 273 /* 274 * Allocate an object and associated resources. 275 * Note that if we are called from the pageout daemon (handle == NULL) 276 * we should not wait for memory as it could resulting in deadlock. 277 */ 278 static vm_object_t 279 swap_pager_alloc(void *handle, vm_size_t size, vm_prot_t prot, 280 vm_ooffset_t offset) 281 { 282 vm_object_t object; 283 284 /* 285 * If this is a "named" anonymous region, look it up and use the 286 * object if it exists, otherwise allocate a new one. 287 */ 288 if (handle) { 289 object = vm_pager_object_lookup(&swap_pager_object_list, handle); 290 if (object != NULL) { 291 vm_object_reference(object); 292 } else { 293 /* 294 * XXX - there is a race condition here. Two processes 295 * can request the same named object simultaneuously, 296 * and if one blocks for memory, the result is a disaster. 297 * Probably quite rare, but is yet another reason to just 298 * rip support of "named anonymous regions" out altogether. 299 */ 300 object = vm_object_allocate(OBJT_SWAP, 301 OFF_TO_IDX(offset + PAGE_MASK) + size); 302 object->handle = handle; 303 (void) swap_pager_swp_alloc(object, M_WAITOK); 304 } 305 } else { 306 object = vm_object_allocate(OBJT_SWAP, 307 OFF_TO_IDX(offset + PAGE_MASK) + size); 308 (void) swap_pager_swp_alloc(object, M_WAITOK); 309 } 310 311 return (object); 312 } 313 314 /* 315 * returns disk block associated with pager and offset 316 * additionally, as a side effect returns a flag indicating 317 * if the block has been written 318 */ 319 320 inline static daddr_t * 321 swap_pager_diskaddr(object, pindex, valid) 322 vm_object_t object; 323 vm_pindex_t pindex; 324 int *valid; 325 { 326 register sw_blk_t swb; 327 int ix; 328 329 if (valid) 330 *valid = 0; 331 ix = pindex / SWB_NPAGES; 332 if ((ix >= object->un_pager.swp.swp_nblocks) || 333 (pindex >= object->size)) { 334 return (FALSE); 335 } 336 swb = &object->un_pager.swp.swp_blocks[ix]; 337 ix = pindex % SWB_NPAGES; 338 if (valid) 339 *valid = swb->swb_valid & (1 << ix); 340 return &swb->swb_block[ix]; 341 } 342 343 /* 344 * Utility routine to set the valid (written) bit for 345 * a block associated with a pager and offset 346 */ 347 static void 348 swap_pager_setvalid(object, offset, valid) 349 vm_object_t object; 350 vm_offset_t offset; 351 int valid; 352 { 353 register sw_blk_t swb; 354 int ix; 355 356 ix = offset / SWB_NPAGES; 357 if (ix >= object->un_pager.swp.swp_nblocks) 358 return; 359 360 swb = &object->un_pager.swp.swp_blocks[ix]; 361 ix = offset % SWB_NPAGES; 362 if (valid) 363 swb->swb_valid |= (1 << ix); 364 else 365 swb->swb_valid &= ~(1 << ix); 366 return; 367 } 368 369 /* 370 * this routine allocates swap space with a fragmentation 371 * minimization policy. 372 */ 373 static int 374 swap_pager_getswapspace(object, amount, rtval) 375 vm_object_t object; 376 unsigned int amount; 377 daddr_t *rtval; 378 { 379 unsigned location; 380 vm_swap_size -= amount; 381 if (!rlist_alloc(&swaplist, amount, &location)) { 382 vm_swap_size += amount; 383 return 0; 384 } else { 385 swapsizecheck(); 386 object->un_pager.swp.swp_allocsize += amount; 387 *rtval = location; 388 return 1; 389 } 390 } 391 392 /* 393 * this routine frees swap space with a fragmentation 394 * minimization policy. 395 */ 396 static void 397 swap_pager_freeswapspace(object, from, to) 398 vm_object_t object; 399 unsigned int from; 400 unsigned int to; 401 { 402 rlist_free(&swaplist, from, to); 403 vm_swap_size += (to - from) + 1; 404 object->un_pager.swp.swp_allocsize -= (to - from) + 1; 405 swapsizecheck(); 406 } 407 /* 408 * this routine frees swap blocks from a specified pager 409 */ 410 void 411 swap_pager_freespace(object, start, size) 412 vm_object_t object; 413 vm_pindex_t start; 414 vm_size_t size; 415 { 416 vm_pindex_t i; 417 int s; 418 419 s = splbio(); 420 for (i = start; i < start + size; i += 1) { 421 int valid; 422 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 423 424 if (addr && *addr != SWB_EMPTY) { 425 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1); 426 if (valid) { 427 swap_pager_setvalid(object, i, 0); 428 } 429 *addr = SWB_EMPTY; 430 } 431 } 432 splx(s); 433 } 434 435 /* 436 * same as freespace, but don't free, just force a DMZ next time 437 */ 438 void 439 swap_pager_dmzspace(object, start, size) 440 vm_object_t object; 441 vm_pindex_t start; 442 vm_size_t size; 443 { 444 vm_pindex_t i; 445 int s; 446 447 s = splbio(); 448 for (i = start; i < start + size; i += 1) { 449 int valid; 450 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 451 452 if (addr && *addr != SWB_EMPTY) { 453 if (valid) { 454 swap_pager_setvalid(object, i, 0); 455 } 456 } 457 } 458 splx(s); 459 } 460 461 static void 462 swap_pager_free_swap(object) 463 vm_object_t object; 464 { 465 register int i, j; 466 register sw_blk_t swb; 467 int first_block=0, block_count=0; 468 int s; 469 /* 470 * Free left over swap blocks 471 */ 472 s = splbio(); 473 for (i = 0, swb = object->un_pager.swp.swp_blocks; 474 i < object->un_pager.swp.swp_nblocks; i++, swb++) { 475 for (j = 0; j < SWB_NPAGES; j++) { 476 if (swb->swb_block[j] != SWB_EMPTY) { 477 /* 478 * initially the length of the run is zero 479 */ 480 if (block_count == 0) { 481 first_block = swb->swb_block[j]; 482 block_count = btodb(PAGE_SIZE); 483 swb->swb_block[j] = SWB_EMPTY; 484 /* 485 * if the new block can be included into the current run 486 */ 487 } else if (swb->swb_block[j] == first_block + block_count) { 488 block_count += btodb(PAGE_SIZE); 489 swb->swb_block[j] = SWB_EMPTY; 490 /* 491 * terminate the previous run, and start a new one 492 */ 493 } else { 494 swap_pager_freeswapspace(object, first_block, 495 (unsigned) first_block + block_count - 1); 496 first_block = swb->swb_block[j]; 497 block_count = btodb(PAGE_SIZE); 498 swb->swb_block[j] = SWB_EMPTY; 499 } 500 } 501 } 502 } 503 504 if (block_count) { 505 swap_pager_freeswapspace(object, first_block, 506 (unsigned) first_block + block_count - 1); 507 } 508 splx(s); 509 } 510 511 512 /* 513 * swap_pager_reclaim frees up over-allocated space from all pagers 514 * this eliminates internal fragmentation due to allocation of space 515 * for segments that are never swapped to. It has been written so that 516 * it does not block until the rlist_free operation occurs; it keeps 517 * the queues consistant. 518 */ 519 520 /* 521 * Maximum number of blocks (pages) to reclaim per pass 522 */ 523 #define MAXRECLAIM 128 524 525 static void 526 swap_pager_reclaim() 527 { 528 vm_object_t object; 529 int i, j, k; 530 int s; 531 int reclaimcount; 532 static struct { 533 int address; 534 vm_object_t object; 535 } reclaims[MAXRECLAIM]; 536 static int in_reclaim; 537 538 /* 539 * allow only one process to be in the swap_pager_reclaim subroutine 540 */ 541 s = splbio(); 542 if (in_reclaim) { 543 tsleep(&in_reclaim, PSWP, "swrclm", 0); 544 splx(s); 545 return; 546 } 547 in_reclaim = 1; 548 reclaimcount = 0; 549 550 /* for each pager queue */ 551 for (k = 0; swp_qs[k]; k++) { 552 553 object = TAILQ_FIRST(swp_qs[k]); 554 while (object && (reclaimcount < MAXRECLAIM)) { 555 556 /* 557 * see if any blocks associated with a pager has been 558 * allocated but not used (written) 559 */ 560 if ((object->flags & OBJ_DEAD) == 0 && 561 (object->paging_in_progress == 0)) { 562 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) { 563 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i]; 564 565 if (swb->swb_locked) 566 continue; 567 for (j = 0; j < SWB_NPAGES; j++) { 568 if (swb->swb_block[j] != SWB_EMPTY && 569 (swb->swb_valid & (1 << j)) == 0) { 570 reclaims[reclaimcount].address = swb->swb_block[j]; 571 reclaims[reclaimcount++].object = object; 572 swb->swb_block[j] = SWB_EMPTY; 573 if (reclaimcount >= MAXRECLAIM) 574 goto rfinished; 575 } 576 } 577 } 578 } 579 object = TAILQ_NEXT(object, pager_object_list); 580 } 581 } 582 583 rfinished: 584 585 /* 586 * free the blocks that have been added to the reclaim list 587 */ 588 for (i = 0; i < reclaimcount; i++) { 589 swap_pager_freeswapspace(reclaims[i].object, 590 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1); 591 } 592 splx(s); 593 in_reclaim = 0; 594 wakeup(&in_reclaim); 595 } 596 597 598 /* 599 * swap_pager_copy copies blocks from one pager to another and 600 * destroys the source pager 601 */ 602 603 void 604 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset) 605 vm_object_t srcobject; 606 vm_pindex_t srcoffset; 607 vm_object_t dstobject; 608 vm_pindex_t dstoffset; 609 vm_pindex_t offset; 610 { 611 vm_pindex_t i; 612 int origsize; 613 int s; 614 615 if (vm_swap_size) 616 no_swap_space = 0; 617 618 origsize = srcobject->un_pager.swp.swp_allocsize; 619 620 /* 621 * remove the source object from the swap_pager internal queue 622 */ 623 if (srcobject->handle == NULL) { 624 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list); 625 } else { 626 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list); 627 } 628 629 s = splbio(); 630 while (srcobject->un_pager.swp.swp_poip) { 631 tsleep(srcobject, PVM, "spgout", 0); 632 } 633 splx(s); 634 635 /* 636 * clean all of the pages that are currently active and finished 637 */ 638 swap_pager_sync(); 639 640 s = splbio(); 641 /* 642 * transfer source to destination 643 */ 644 for (i = 0; i < dstobject->size; i += 1) { 645 int srcvalid, dstvalid; 646 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset, 647 &srcvalid); 648 daddr_t *dstaddrp; 649 650 /* 651 * see if the source has space allocated 652 */ 653 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 654 /* 655 * if the source is valid and the dest has no space, 656 * then copy the allocation from the srouce to the 657 * dest. 658 */ 659 if (srcvalid) { 660 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset, 661 &dstvalid); 662 /* 663 * if the dest already has a valid block, 664 * deallocate the source block without 665 * copying. 666 */ 667 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 668 swap_pager_freeswapspace(dstobject, *dstaddrp, 669 *dstaddrp + btodb(PAGE_SIZE) - 1); 670 *dstaddrp = SWB_EMPTY; 671 } 672 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 673 *dstaddrp = *srcaddrp; 674 *srcaddrp = SWB_EMPTY; 675 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE); 676 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE); 677 swap_pager_setvalid(dstobject, i + dstoffset, 1); 678 } 679 } 680 /* 681 * if the source is not empty at this point, then 682 * deallocate the space. 683 */ 684 if (*srcaddrp != SWB_EMPTY) { 685 swap_pager_freeswapspace(srcobject, *srcaddrp, 686 *srcaddrp + btodb(PAGE_SIZE) - 1); 687 *srcaddrp = SWB_EMPTY; 688 } 689 } 690 } 691 splx(s); 692 693 /* 694 * Free left over swap blocks 695 */ 696 swap_pager_free_swap(srcobject); 697 698 if (srcobject->un_pager.swp.swp_allocsize) { 699 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", 700 srcobject->un_pager.swp.swp_allocsize, origsize); 701 } 702 703 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA); 704 srcobject->un_pager.swp.swp_blocks = NULL; 705 706 return; 707 } 708 709 static void 710 swap_pager_dealloc(object) 711 vm_object_t object; 712 { 713 int s; 714 715 /* 716 * Remove from list right away so lookups will fail if we block for 717 * pageout completion. 718 */ 719 if (object->handle == NULL) { 720 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list); 721 } else { 722 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list); 723 } 724 725 /* 726 * Wait for all pageouts to finish and remove all entries from 727 * cleaning list. 728 */ 729 730 s = splbio(); 731 while (object->un_pager.swp.swp_poip) { 732 tsleep(object, PVM, "swpout", 0); 733 } 734 splx(s); 735 736 737 swap_pager_sync(); 738 739 /* 740 * Free left over swap blocks 741 */ 742 swap_pager_free_swap(object); 743 744 if (object->un_pager.swp.swp_allocsize) { 745 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", 746 object->un_pager.swp.swp_allocsize); 747 } 748 /* 749 * Free swap management resources 750 */ 751 free(object->un_pager.swp.swp_blocks, M_VMPGDATA); 752 object->un_pager.swp.swp_blocks = NULL; 753 } 754 755 static inline int 756 swap_pager_block_index(pindex) 757 vm_pindex_t pindex; 758 { 759 return (pindex / SWB_NPAGES); 760 } 761 762 static inline int 763 swap_pager_block_offset(pindex) 764 vm_pindex_t pindex; 765 { 766 return (pindex % SWB_NPAGES); 767 } 768 769 /* 770 * swap_pager_haspage returns TRUE if the pager has data that has 771 * been written out. 772 */ 773 static boolean_t 774 swap_pager_haspage(object, pindex, before, after) 775 vm_object_t object; 776 vm_pindex_t pindex; 777 int *before; 778 int *after; 779 { 780 register sw_blk_t swb; 781 int ix; 782 783 if (before != NULL) 784 *before = 0; 785 if (after != NULL) 786 *after = 0; 787 ix = pindex / SWB_NPAGES; 788 if (ix >= object->un_pager.swp.swp_nblocks) { 789 return (FALSE); 790 } 791 swb = &object->un_pager.swp.swp_blocks[ix]; 792 ix = pindex % SWB_NPAGES; 793 794 if (swb->swb_block[ix] != SWB_EMPTY) { 795 796 if (swb->swb_valid & (1 << ix)) { 797 int tix; 798 if (before) { 799 for(tix = ix - 1; tix >= 0; --tix) { 800 if ((swb->swb_valid & (1 << tix)) == 0) 801 break; 802 if ((swb->swb_block[tix] + 803 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) != 804 swb->swb_block[ix]) 805 break; 806 (*before)++; 807 } 808 } 809 810 if (after) { 811 for(tix = ix + 1; tix < SWB_NPAGES; tix++) { 812 if ((swb->swb_valid & (1 << tix)) == 0) 813 break; 814 if ((swb->swb_block[tix] - 815 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) != 816 swb->swb_block[ix]) 817 break; 818 (*after)++; 819 } 820 } 821 822 return TRUE; 823 } 824 } 825 return (FALSE); 826 } 827 828 /* 829 * swap_pager_freepage is a convienience routine that clears the busy 830 * bit and deallocates a page. 831 */ 832 static void 833 swap_pager_freepage(m) 834 vm_page_t m; 835 { 836 PAGE_WAKEUP(m); 837 vm_page_free(m); 838 } 839 840 /* 841 * swap_pager_ridpages is a convienience routine that deallocates all 842 * but the required page. this is usually used in error returns that 843 * need to invalidate the "extra" readahead pages. 844 */ 845 static void 846 swap_pager_ridpages(m, count, reqpage) 847 vm_page_t *m; 848 int count; 849 int reqpage; 850 { 851 int i; 852 853 for (i = 0; i < count; i++) 854 if (i != reqpage) 855 swap_pager_freepage(m[i]); 856 } 857 858 /* 859 * swap_pager_iodone1 is the completion routine for both reads and async writes 860 */ 861 static void 862 swap_pager_iodone1(bp) 863 struct buf *bp; 864 { 865 bp->b_flags |= B_DONE; 866 bp->b_flags &= ~B_ASYNC; 867 wakeup(bp); 868 } 869 870 static int 871 swap_pager_getpages(object, m, count, reqpage) 872 vm_object_t object; 873 vm_page_t *m; 874 int count, reqpage; 875 { 876 register struct buf *bp; 877 sw_blk_t swb[count]; 878 register int s; 879 int i; 880 boolean_t rv; 881 vm_offset_t kva, off[count]; 882 swp_clean_t spc; 883 vm_pindex_t paging_offset; 884 int reqaddr[count]; 885 int sequential; 886 887 int first, last; 888 int failed; 889 int reqdskregion; 890 891 object = m[reqpage]->object; 892 paging_offset = OFF_TO_IDX(object->paging_offset); 893 sequential = (m[reqpage]->pindex == (object->last_read + 1)); 894 895 for (i = 0; i < count; i++) { 896 vm_pindex_t fidx = m[i]->pindex + paging_offset; 897 int ix = swap_pager_block_index(fidx); 898 899 if (ix >= object->un_pager.swp.swp_nblocks) { 900 int j; 901 902 if (i <= reqpage) { 903 swap_pager_ridpages(m, count, reqpage); 904 return (VM_PAGER_FAIL); 905 } 906 for (j = i; j < count; j++) { 907 swap_pager_freepage(m[j]); 908 } 909 count = i; 910 break; 911 } 912 swb[i] = &object->un_pager.swp.swp_blocks[ix]; 913 off[i] = swap_pager_block_offset(fidx); 914 reqaddr[i] = swb[i]->swb_block[off[i]]; 915 } 916 917 /* make sure that our required input request is existant */ 918 919 if (reqaddr[reqpage] == SWB_EMPTY || 920 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 921 swap_pager_ridpages(m, count, reqpage); 922 return (VM_PAGER_FAIL); 923 } 924 reqdskregion = reqaddr[reqpage] / dmmax; 925 926 /* 927 * search backwards for the first contiguous page to transfer 928 */ 929 failed = 0; 930 first = 0; 931 for (i = reqpage - 1; i >= 0; --i) { 932 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) || 933 (swb[i]->swb_valid & (1 << off[i])) == 0 || 934 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 935 ((reqaddr[i] / dmmax) != reqdskregion)) { 936 failed = 1; 937 swap_pager_freepage(m[i]); 938 if (first == 0) 939 first = i + 1; 940 } 941 } 942 /* 943 * search forwards for the last contiguous page to transfer 944 */ 945 failed = 0; 946 last = count; 947 for (i = reqpage + 1; i < count; i++) { 948 if (failed || (reqaddr[i] == SWB_EMPTY) || 949 (swb[i]->swb_valid & (1 << off[i])) == 0 || 950 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 951 ((reqaddr[i] / dmmax) != reqdskregion)) { 952 failed = 1; 953 swap_pager_freepage(m[i]); 954 if (last == count) 955 last = i; 956 } 957 } 958 959 count = last; 960 if (first != 0) { 961 for (i = first; i < count; i++) { 962 m[i - first] = m[i]; 963 reqaddr[i - first] = reqaddr[i]; 964 off[i - first] = off[i]; 965 } 966 count -= first; 967 reqpage -= first; 968 } 969 ++swb[reqpage]->swb_locked; 970 971 /* 972 * at this point: "m" is a pointer to the array of vm_page_t for 973 * paging I/O "count" is the number of vm_page_t entries represented 974 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 975 * into "m" for the page actually faulted 976 */ 977 978 spc = NULL; 979 if ((count == 1) && ((spc = TAILQ_FIRST(&swap_pager_free)) != NULL)) { 980 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 981 swap_pager_free_count--; 982 kva = spc->spc_kva; 983 bp = spc->spc_bp; 984 bzero(bp, sizeof *bp); 985 bp->b_spc = spc; 986 bp->b_vnbufs.le_next = NOLIST; 987 } else { 988 /* 989 * Get a swap buffer header to perform the IO 990 */ 991 bp = getpbuf(); 992 kva = (vm_offset_t) bp->b_data; 993 } 994 995 /* 996 * map our page(s) into kva for input 997 */ 998 pmap_qenter(kva, m, count); 999 1000 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING; 1001 bp->b_iodone = swap_pager_iodone1; 1002 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1003 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1004 crhold(bp->b_rcred); 1005 crhold(bp->b_wcred); 1006 bp->b_un.b_addr = (caddr_t) kva; 1007 bp->b_blkno = reqaddr[0]; 1008 bp->b_bcount = PAGE_SIZE * count; 1009 bp->b_bufsize = PAGE_SIZE * count; 1010 1011 pbgetvp(swapdev_vp, bp); 1012 1013 cnt.v_swapin++; 1014 cnt.v_swappgsin += count; 1015 /* 1016 * perform the I/O 1017 */ 1018 VOP_STRATEGY(bp); 1019 1020 /* 1021 * wait for the sync I/O to complete 1022 */ 1023 s = splbio(); 1024 while ((bp->b_flags & B_DONE) == 0) { 1025 if (tsleep(bp, PVM, "swread", hz*20)) { 1026 printf("swap_pager: indefinite wait buffer: device: %#x, blkno: %d, size: %d\n", 1027 bp->b_dev, bp->b_blkno, bp->b_bcount); 1028 } 1029 } 1030 1031 if (bp->b_flags & B_ERROR) { 1032 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 1033 bp->b_blkno, bp->b_bcount, bp->b_error); 1034 rv = VM_PAGER_ERROR; 1035 } else { 1036 rv = VM_PAGER_OK; 1037 } 1038 1039 /* 1040 * relpbuf does this, but we maintain our own buffer list also... 1041 */ 1042 if (bp->b_vp) 1043 pbrelvp(bp); 1044 1045 splx(s); 1046 swb[reqpage]->swb_locked--; 1047 1048 /* 1049 * remove the mapping for kernel virtual 1050 */ 1051 pmap_qremove(kva, count); 1052 1053 if (spc) { 1054 m[reqpage]->object->last_read = m[reqpage]->pindex; 1055 if (bp->b_flags & B_WANTED) 1056 wakeup(bp); 1057 /* 1058 * if we have used an spc, we need to free it. 1059 */ 1060 if (bp->b_rcred != NOCRED) 1061 crfree(bp->b_rcred); 1062 if (bp->b_wcred != NOCRED) 1063 crfree(bp->b_wcred); 1064 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1065 swap_pager_free_count++; 1066 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1067 wakeup(&swap_pager_free); 1068 } 1069 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1070 pagedaemon_wakeup(); 1071 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1072 if (rv == VM_PAGER_OK) { 1073 #if notneeded 1074 pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); 1075 #endif 1076 m[reqpage]->valid = VM_PAGE_BITS_ALL; 1077 m[reqpage]->dirty = 0; 1078 } 1079 } else { 1080 /* 1081 * release the physical I/O buffer 1082 */ 1083 relpbuf(bp); 1084 /* 1085 * finish up input if everything is ok 1086 */ 1087 if (rv == VM_PAGER_OK) { 1088 for (i = 0; i < count; i++) { 1089 #if notneeded 1090 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1091 #endif 1092 m[i]->dirty = 0; 1093 m[i]->flags &= ~PG_ZERO; 1094 if (i != reqpage) { 1095 /* 1096 * whether or not to leave the page 1097 * activated is up in the air, but we 1098 * should put the page on a page queue 1099 * somewhere. (it already is in the 1100 * object). After some emperical 1101 * results, it is best to deactivate 1102 * the readahead pages. 1103 */ 1104 vm_page_deactivate(m[i]); 1105 1106 /* 1107 * just in case someone was asking for 1108 * this page we now tell them that it 1109 * is ok to use 1110 */ 1111 m[i]->valid = VM_PAGE_BITS_ALL; 1112 PAGE_WAKEUP(m[i]); 1113 } 1114 } 1115 1116 m[reqpage]->object->last_read = m[count-1]->pindex; 1117 1118 /* 1119 * If we're out of swap space, then attempt to free 1120 * some whenever multiple pages are brought in. We 1121 * must set the dirty bits so that the page contents 1122 * will be preserved. 1123 */ 1124 if (SWAPLOW) { 1125 for (i = 0; i < count; i++) { 1126 m[i]->dirty = VM_PAGE_BITS_ALL; 1127 } 1128 swap_pager_freespace(object, m[0]->pindex + paging_offset, count); 1129 } 1130 } else { 1131 swap_pager_ridpages(m, count, reqpage); 1132 } 1133 } 1134 return (rv); 1135 } 1136 1137 int 1138 swap_pager_putpages(object, m, count, sync, rtvals) 1139 vm_object_t object; 1140 vm_page_t *m; 1141 int count; 1142 boolean_t sync; 1143 int *rtvals; 1144 { 1145 register struct buf *bp; 1146 sw_blk_t swb[count]; 1147 register int s; 1148 int i, j, ix; 1149 boolean_t rv; 1150 vm_offset_t kva, off, fidx; 1151 swp_clean_t spc; 1152 vm_pindex_t paging_pindex; 1153 int reqaddr[count]; 1154 int failed; 1155 1156 if (vm_swap_size) 1157 no_swap_space = 0; 1158 if (no_swap_space) { 1159 for (i = 0; i < count; i++) 1160 rtvals[i] = VM_PAGER_FAIL; 1161 return VM_PAGER_FAIL; 1162 } 1163 spc = NULL; 1164 1165 object = m[0]->object; 1166 paging_pindex = OFF_TO_IDX(object->paging_offset); 1167 1168 failed = 0; 1169 for (j = 0; j < count; j++) { 1170 fidx = m[j]->pindex + paging_pindex; 1171 ix = swap_pager_block_index(fidx); 1172 swb[j] = 0; 1173 if (ix >= object->un_pager.swp.swp_nblocks) { 1174 rtvals[j] = VM_PAGER_FAIL; 1175 failed = 1; 1176 continue; 1177 } else { 1178 rtvals[j] = VM_PAGER_OK; 1179 } 1180 swb[j] = &object->un_pager.swp.swp_blocks[ix]; 1181 swb[j]->swb_locked++; 1182 if (failed) { 1183 rtvals[j] = VM_PAGER_FAIL; 1184 continue; 1185 } 1186 off = swap_pager_block_offset(fidx); 1187 reqaddr[j] = swb[j]->swb_block[off]; 1188 if (reqaddr[j] == SWB_EMPTY) { 1189 daddr_t blk; 1190 int tries; 1191 int ntoget; 1192 1193 tries = 0; 1194 s = splbio(); 1195 1196 /* 1197 * if any other pages have been allocated in this 1198 * block, we only try to get one page. 1199 */ 1200 for (i = 0; i < SWB_NPAGES; i++) { 1201 if (swb[j]->swb_block[i] != SWB_EMPTY) 1202 break; 1203 } 1204 1205 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1206 /* 1207 * this code is alittle conservative, but works (the 1208 * intent of this code is to allocate small chunks for 1209 * small objects) 1210 */ 1211 if ((off == 0) && ((fidx + ntoget) > object->size)) { 1212 ntoget = object->size - fidx; 1213 } 1214 retrygetspace: 1215 if (!swap_pager_full && ntoget > 1 && 1216 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE), 1217 &blk)) { 1218 1219 for (i = 0; i < ntoget; i++) { 1220 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1221 swb[j]->swb_valid = 0; 1222 } 1223 1224 reqaddr[j] = swb[j]->swb_block[off]; 1225 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE), 1226 &swb[j]->swb_block[off])) { 1227 /* 1228 * if the allocation has failed, we try to 1229 * reclaim space and retry. 1230 */ 1231 if (++tries == 1) { 1232 swap_pager_reclaim(); 1233 goto retrygetspace; 1234 } 1235 rtvals[j] = VM_PAGER_AGAIN; 1236 failed = 1; 1237 swap_pager_full = 1; 1238 } else { 1239 reqaddr[j] = swb[j]->swb_block[off]; 1240 swb[j]->swb_valid &= ~(1 << off); 1241 } 1242 splx(s); 1243 } 1244 } 1245 1246 /* 1247 * search forwards for the last contiguous page to transfer 1248 */ 1249 failed = 0; 1250 for (i = 0; i < count; i++) { 1251 if (failed || 1252 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1253 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) || 1254 (rtvals[i] != VM_PAGER_OK)) { 1255 failed = 1; 1256 if (rtvals[i] == VM_PAGER_OK) 1257 rtvals[i] = VM_PAGER_AGAIN; 1258 } 1259 } 1260 1261 for (i = 0; i < count; i++) { 1262 if (rtvals[i] != VM_PAGER_OK) { 1263 if (swb[i]) 1264 --swb[i]->swb_locked; 1265 } 1266 } 1267 1268 for (i = 0; i < count; i++) 1269 if (rtvals[i] != VM_PAGER_OK) 1270 break; 1271 1272 if (i == 0) { 1273 return VM_PAGER_AGAIN; 1274 } 1275 count = i; 1276 for (i = 0; i < count; i++) { 1277 if (reqaddr[i] == SWB_EMPTY) { 1278 printf("I/O to empty block???? -- pindex: %d, i: %d\n", 1279 m[i]->pindex, i); 1280 } 1281 } 1282 1283 /* 1284 * For synchronous writes, we clean up all completed async pageouts. 1285 */ 1286 if (sync == TRUE) { 1287 swap_pager_sync(); 1288 } 1289 kva = 0; 1290 1291 /* 1292 * get a swap pager clean data structure, block until we get it 1293 */ 1294 if (swap_pager_free_count <= 3) { 1295 s = splbio(); 1296 if (curproc == pageproc) { 1297 retryfree: 1298 /* 1299 * pageout daemon needs a swap control block 1300 */ 1301 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED; 1302 /* 1303 * if it does not get one within a short time, then 1304 * there is a potential deadlock, so we go-on trying 1305 * to free pages. It is important to block here as opposed 1306 * to returning, thereby allowing the pageout daemon to continue. 1307 * It is likely that pageout daemon will start suboptimally 1308 * reclaiming vnode backed pages if we don't block. Since the 1309 * I/O subsystem is probably already fully utilized, might as 1310 * well wait. 1311 */ 1312 if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) { 1313 swap_pager_sync(); 1314 if (swap_pager_free_count <= 3) { 1315 splx(s); 1316 return VM_PAGER_AGAIN; 1317 } 1318 } else { 1319 /* 1320 * we make sure that pageouts aren't taking up all of 1321 * the free swap control blocks. 1322 */ 1323 swap_pager_sync(); 1324 if (swap_pager_free_count <= 3) { 1325 goto retryfree; 1326 } 1327 } 1328 } else { 1329 pagedaemon_wakeup(); 1330 while (swap_pager_free_count <= 3) { 1331 swap_pager_needflags |= SWAP_FREE_NEEDED; 1332 tsleep(&swap_pager_free, PVM, "swpfre", 0); 1333 pagedaemon_wakeup(); 1334 } 1335 } 1336 splx(s); 1337 } 1338 spc = TAILQ_FIRST(&swap_pager_free); 1339 if (spc == NULL) 1340 panic("swap_pager_putpages: free queue is empty, %d expected\n", swap_pager_free_count); 1341 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1342 swap_pager_free_count--; 1343 1344 kva = spc->spc_kva; 1345 1346 /* 1347 * map our page(s) into kva for I/O 1348 */ 1349 pmap_qenter(kva, m, count); 1350 1351 /* 1352 * get the base I/O offset into the swap file 1353 */ 1354 for (i = 0; i < count; i++) { 1355 fidx = m[i]->pindex + paging_pindex; 1356 off = swap_pager_block_offset(fidx); 1357 /* 1358 * set the valid bit 1359 */ 1360 swb[i]->swb_valid |= (1 << off); 1361 /* 1362 * and unlock the data structure 1363 */ 1364 swb[i]->swb_locked--; 1365 } 1366 1367 /* 1368 * Get a swap buffer header and perform the IO 1369 */ 1370 bp = spc->spc_bp; 1371 bzero(bp, sizeof *bp); 1372 bp->b_spc = spc; 1373 bp->b_vnbufs.le_next = NOLIST; 1374 1375 bp->b_flags = B_BUSY | B_PAGING; 1376 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1377 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1378 if (bp->b_rcred != NOCRED) 1379 crhold(bp->b_rcred); 1380 if (bp->b_wcred != NOCRED) 1381 crhold(bp->b_wcred); 1382 bp->b_data = (caddr_t) kva; 1383 bp->b_blkno = reqaddr[0]; 1384 pbgetvp(swapdev_vp, bp); 1385 1386 bp->b_bcount = PAGE_SIZE * count; 1387 bp->b_bufsize = PAGE_SIZE * count; 1388 swapdev_vp->v_numoutput++; 1389 1390 /* 1391 * If this is an async write we set up additional buffer fields and 1392 * place a "cleaning" entry on the inuse queue. 1393 */ 1394 s = splbio(); 1395 if (sync == FALSE) { 1396 spc->spc_flags = 0; 1397 spc->spc_object = object; 1398 for (i = 0; i < count; i++) 1399 spc->spc_m[i] = m[i]; 1400 spc->spc_count = count; 1401 /* 1402 * the completion routine for async writes 1403 */ 1404 bp->b_flags |= B_CALL; 1405 bp->b_iodone = swap_pager_iodone; 1406 bp->b_dirtyoff = 0; 1407 bp->b_dirtyend = bp->b_bcount; 1408 object->un_pager.swp.swp_poip++; 1409 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1410 } else { 1411 object->un_pager.swp.swp_poip++; 1412 bp->b_flags |= B_CALL; 1413 bp->b_iodone = swap_pager_iodone1; 1414 } 1415 1416 cnt.v_swapout++; 1417 cnt.v_swappgsout += count; 1418 /* 1419 * perform the I/O 1420 */ 1421 VOP_STRATEGY(bp); 1422 if (sync == FALSE) { 1423 if ((bp->b_flags & B_DONE) == B_DONE) { 1424 swap_pager_sync(); 1425 } 1426 splx(s); 1427 for (i = 0; i < count; i++) { 1428 rtvals[i] = VM_PAGER_PEND; 1429 } 1430 return VM_PAGER_PEND; 1431 } 1432 /* 1433 * wait for the sync I/O to complete 1434 */ 1435 while ((bp->b_flags & B_DONE) == 0) { 1436 tsleep(bp, PVM, "swwrt", 0); 1437 } 1438 if (bp->b_flags & B_ERROR) { 1439 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 1440 bp->b_blkno, bp->b_bcount, bp->b_error); 1441 rv = VM_PAGER_ERROR; 1442 } else { 1443 rv = VM_PAGER_OK; 1444 } 1445 1446 object->un_pager.swp.swp_poip--; 1447 if (object->un_pager.swp.swp_poip == 0) 1448 wakeup(object); 1449 1450 if (bp->b_vp) 1451 pbrelvp(bp); 1452 if (bp->b_flags & B_WANTED) 1453 wakeup(bp); 1454 1455 splx(s); 1456 1457 /* 1458 * remove the mapping for kernel virtual 1459 */ 1460 pmap_qremove(kva, count); 1461 1462 /* 1463 * if we have written the page, then indicate that the page is clean. 1464 */ 1465 if (rv == VM_PAGER_OK) { 1466 for (i = 0; i < count; i++) { 1467 if (rtvals[i] == VM_PAGER_OK) { 1468 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1469 m[i]->dirty = 0; 1470 /* 1471 * optimization, if a page has been read 1472 * during the pageout process, we activate it. 1473 */ 1474 if ((m[i]->queue != PQ_ACTIVE) && 1475 ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || 1476 pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) { 1477 vm_page_activate(m[i]); 1478 } 1479 } 1480 } 1481 } else { 1482 for (i = 0; i < count; i++) { 1483 rtvals[i] = rv; 1484 } 1485 } 1486 1487 if (bp->b_rcred != NOCRED) 1488 crfree(bp->b_rcred); 1489 if (bp->b_wcred != NOCRED) 1490 crfree(bp->b_wcred); 1491 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1492 swap_pager_free_count++; 1493 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1494 wakeup(&swap_pager_free); 1495 } 1496 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1497 pagedaemon_wakeup(); 1498 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1499 return (rv); 1500 } 1501 1502 static void 1503 swap_pager_sync() 1504 { 1505 register swp_clean_t spc, tspc; 1506 register int s; 1507 1508 tspc = NULL; 1509 if (TAILQ_FIRST(&swap_pager_done) == NULL) 1510 return; 1511 for (;;) { 1512 s = splbio(); 1513 /* 1514 * Look up and removal from done list must be done at splbio() 1515 * to avoid conflicts with swap_pager_iodone. 1516 */ 1517 while ((spc = TAILQ_FIRST(&swap_pager_done)) != 0) { 1518 pmap_qremove(spc->spc_kva, spc->spc_count); 1519 swap_pager_finish(spc); 1520 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1521 goto doclean; 1522 } 1523 1524 /* 1525 * No operations done, thats all we can do for now. 1526 */ 1527 1528 splx(s); 1529 break; 1530 1531 /* 1532 * The desired page was found to be busy earlier in the scan 1533 * but has since completed. 1534 */ 1535 doclean: 1536 if (tspc && tspc == spc) { 1537 tspc = NULL; 1538 } 1539 spc->spc_flags = 0; 1540 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1541 swap_pager_free_count++; 1542 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1543 wakeup(&swap_pager_free); 1544 } 1545 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1546 pagedaemon_wakeup(); 1547 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1548 splx(s); 1549 } 1550 1551 return; 1552 } 1553 1554 void 1555 swap_pager_finish(spc) 1556 register swp_clean_t spc; 1557 { 1558 vm_object_t object = spc->spc_m[0]->object; 1559 int i; 1560 1561 object->paging_in_progress -= spc->spc_count; 1562 if ((object->paging_in_progress == 0) && 1563 (object->flags & OBJ_PIPWNT)) { 1564 object->flags &= ~OBJ_PIPWNT; 1565 wakeup(object); 1566 } 1567 1568 /* 1569 * If no error, mark as clean and inform the pmap system. If error, 1570 * mark as dirty so we will try again. (XXX could get stuck doing 1571 * this, should give up after awhile) 1572 */ 1573 if (spc->spc_flags & SPC_ERROR) { 1574 for (i = 0; i < spc->spc_count; i++) { 1575 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1576 (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i])); 1577 } 1578 } else { 1579 for (i = 0; i < spc->spc_count; i++) { 1580 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1581 spc->spc_m[i]->dirty = 0; 1582 if ((spc->spc_m[i]->queue != PQ_ACTIVE) && 1583 ((spc->spc_m[i]->flags & PG_WANTED) || pmap_ts_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) 1584 vm_page_activate(spc->spc_m[i]); 1585 } 1586 } 1587 1588 1589 for (i = 0; i < spc->spc_count; i++) { 1590 /* 1591 * we wakeup any processes that are waiting on these pages. 1592 */ 1593 PAGE_WAKEUP(spc->spc_m[i]); 1594 } 1595 nswiodone -= spc->spc_count; 1596 1597 return; 1598 } 1599 1600 /* 1601 * swap_pager_iodone 1602 */ 1603 static void 1604 swap_pager_iodone(bp) 1605 register struct buf *bp; 1606 { 1607 register swp_clean_t spc; 1608 int s; 1609 1610 s = splbio(); 1611 spc = (swp_clean_t) bp->b_spc; 1612 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1613 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1614 if (bp->b_flags & B_ERROR) { 1615 spc->spc_flags |= SPC_ERROR; 1616 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n", 1617 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1618 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error); 1619 } 1620 1621 if (bp->b_vp) 1622 pbrelvp(bp); 1623 1624 /* 1625 if (bp->b_flags & B_WANTED) 1626 */ 1627 wakeup(bp); 1628 1629 if (bp->b_rcred != NOCRED) 1630 crfree(bp->b_rcred); 1631 if (bp->b_wcred != NOCRED) 1632 crfree(bp->b_wcred); 1633 1634 nswiodone += spc->spc_count; 1635 if (--spc->spc_object->un_pager.swp.swp_poip == 0) { 1636 wakeup(spc->spc_object); 1637 } 1638 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1639 TAILQ_FIRST(&swap_pager_inuse) == 0) { 1640 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1641 wakeup(&swap_pager_free); 1642 } 1643 1644 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) { 1645 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT; 1646 pagedaemon_wakeup(); 1647 } 1648 1649 if (vm_pageout_pages_needed) { 1650 wakeup(&vm_pageout_pages_needed); 1651 vm_pageout_pages_needed = 0; 1652 } 1653 if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) || 1654 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && 1655 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { 1656 pagedaemon_wakeup(); 1657 } 1658 splx(s); 1659 } 1660