1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.105 1998/12/29 22:53:51 dt Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/proc.h> 56 #include <sys/buf.h> 57 #include <sys/vnode.h> 58 #include <sys/malloc.h> 59 #include <sys/vmmeter.h> 60 #include <sys/rlist.h> 61 62 #ifndef MAX_PAGEOUT_CLUSTER 63 #define MAX_PAGEOUT_CLUSTER 16 64 #endif 65 66 #ifndef NPENDINGIO 67 #define NPENDINGIO 16 68 #endif 69 70 #define SWB_NPAGES MAX_PAGEOUT_CLUSTER 71 72 #include <vm/vm.h> 73 #include <vm/vm_prot.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_pager.h> 77 #include <vm/vm_pageout.h> 78 #include <vm/swap_pager.h> 79 #include <vm/vm_extern.h> 80 81 static int nswiodone; 82 int swap_pager_full; 83 extern int vm_swap_size; 84 static int no_swap_space = 1; 85 static int max_pageout_cluster; 86 struct rlisthdr swaplist; 87 88 TAILQ_HEAD(swpclean, swpagerclean); 89 90 typedef struct swpagerclean *swp_clean_t; 91 92 static struct swpagerclean { 93 TAILQ_ENTRY(swpagerclean) spc_list; 94 int spc_flags; 95 struct buf *spc_bp; 96 vm_object_t spc_object; 97 vm_offset_t spc_kva; 98 int spc_first; 99 int spc_count; 100 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 101 } swcleanlist[NPENDINGIO]; 102 103 104 /* spc_flags values */ 105 #define SPC_ERROR 0x01 106 107 #define SWB_EMPTY (-1) 108 109 /* list of completed page cleans */ 110 static struct swpclean swap_pager_done; 111 112 /* list of pending page cleans */ 113 static struct swpclean swap_pager_inuse; 114 115 /* list of free pager clean structs */ 116 static struct swpclean swap_pager_free; 117 static int swap_pager_free_count; 118 static int swap_pager_free_pending; 119 120 /* list of "named" anon region objects */ 121 static struct pagerlst swap_pager_object_list; 122 123 /* list of "unnamed" anon region objects */ 124 struct pagerlst swap_pager_un_object_list; 125 126 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 127 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2 128 static int swap_pager_needflags; 129 130 static struct pagerlst *swp_qs[] = { 131 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0 132 }; 133 134 /* 135 * pagerops for OBJT_SWAP - "swap pager". 136 */ 137 static vm_object_t 138 swap_pager_alloc __P((void *handle, vm_ooffset_t size, 139 vm_prot_t prot, vm_ooffset_t offset)); 140 static void swap_pager_dealloc __P((vm_object_t object)); 141 static boolean_t 142 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex, 143 int *before, int *after)); 144 static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); 145 static void swap_pager_init __P((void)); 146 static void spc_free __P((swp_clean_t)); 147 148 struct pagerops swappagerops = { 149 swap_pager_init, 150 swap_pager_alloc, 151 swap_pager_dealloc, 152 swap_pager_getpages, 153 swap_pager_putpages, 154 swap_pager_haspage, 155 swap_pager_sync 156 }; 157 158 static int npendingio; 159 static int dmmin; 160 int dmmax; 161 162 static int swap_pager_block_index __P((vm_pindex_t pindex)); 163 static int swap_pager_block_offset __P((vm_pindex_t pindex)); 164 static daddr_t *swap_pager_diskaddr __P((vm_object_t object, 165 vm_pindex_t pindex, int *valid)); 166 static void swap_pager_finish __P((swp_clean_t spc)); 167 static void swap_pager_free_swap __P((vm_object_t object)); 168 static void swap_pager_freeswapspace __P((vm_object_t object, 169 unsigned int from, 170 unsigned int to)); 171 static int swap_pager_getswapspace __P((vm_object_t object, 172 unsigned int amount, 173 daddr_t *rtval)); 174 static void swap_pager_iodone __P((struct buf *)); 175 static void swap_pager_iodone1 __P((struct buf *bp)); 176 static void swap_pager_reclaim __P((void)); 177 static void swap_pager_ridpages __P((vm_page_t *m, int count, 178 int reqpage)); 179 static void swap_pager_setvalid __P((vm_object_t object, 180 vm_offset_t offset, int valid)); 181 static __inline void swapsizecheck __P((void)); 182 183 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE))) 184 185 static __inline void 186 swapsizecheck() 187 { 188 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 189 if (swap_pager_full == 0) 190 printf("swap_pager: out of swap space\n"); 191 swap_pager_full = 1; 192 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 193 swap_pager_full = 0; 194 } 195 196 static void 197 swap_pager_init() 198 { 199 int maxsafepending; 200 TAILQ_INIT(&swap_pager_object_list); 201 TAILQ_INIT(&swap_pager_un_object_list); 202 203 /* 204 * Initialize clean lists 205 */ 206 TAILQ_INIT(&swap_pager_inuse); 207 TAILQ_INIT(&swap_pager_done); 208 TAILQ_INIT(&swap_pager_free); 209 swap_pager_free_count = 0; 210 211 /* 212 * Calculate the swap allocation constants. 213 */ 214 dmmin = PAGE_SIZE / DEV_BSIZE; 215 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 216 217 maxsafepending = cnt.v_free_min - cnt.v_free_reserved; 218 npendingio = NPENDINGIO; 219 max_pageout_cluster = MAX_PAGEOUT_CLUSTER; 220 221 if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) { 222 max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2; 223 npendingio = maxsafepending / (2 * max_pageout_cluster); 224 if (npendingio < 2) 225 npendingio = 2; 226 } 227 } 228 229 void 230 swap_pager_swap_init() 231 { 232 swp_clean_t spc; 233 struct buf *bp; 234 int i; 235 236 /* 237 * kva's are allocated here so that we dont need to keep doing 238 * kmem_alloc pageables at runtime 239 */ 240 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 241 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster); 242 if (!spc->spc_kva) { 243 break; 244 } 245 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL); 246 if (!spc->spc_bp) { 247 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 248 break; 249 } 250 spc->spc_flags = 0; 251 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 252 swap_pager_free_count++; 253 } 254 } 255 256 int 257 swap_pager_swp_alloc(object, wait) 258 vm_object_t object; 259 int wait; 260 { 261 sw_blk_t swb; 262 int nblocks; 263 int i, j; 264 265 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES; 266 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait); 267 if (swb == NULL) 268 return 1; 269 270 for (i = 0; i < nblocks; i++) { 271 swb[i].swb_valid = 0; 272 swb[i].swb_locked = 0; 273 for (j = 0; j < SWB_NPAGES; j++) 274 swb[i].swb_block[j] = SWB_EMPTY; 275 } 276 277 object->un_pager.swp.swp_nblocks = nblocks; 278 object->un_pager.swp.swp_allocsize = 0; 279 object->un_pager.swp.swp_blocks = swb; 280 object->un_pager.swp.swp_poip = 0; 281 282 if (object->handle != NULL) { 283 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list); 284 } else { 285 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 286 } 287 288 return 0; 289 } 290 291 /* 292 * Allocate an object and associated resources. 293 * Note that if we are called from the pageout daemon (handle == NULL) 294 * we should not wait for memory as it could resulting in deadlock. 295 */ 296 static vm_object_t 297 swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 298 vm_ooffset_t offset) 299 { 300 vm_object_t object; 301 302 /* 303 * If this is a "named" anonymous region, look it up and use the 304 * object if it exists, otherwise allocate a new one. 305 */ 306 if (handle) { 307 object = vm_pager_object_lookup(&swap_pager_object_list, handle); 308 if (object != NULL) { 309 vm_object_reference(object); 310 } else { 311 /* 312 * XXX - there is a race condition here. Two processes 313 * can request the same named object simultaneuously, 314 * and if one blocks for memory, the result is a disaster. 315 * Probably quite rare, but is yet another reason to just 316 * rip support of "named anonymous regions" out altogether. 317 */ 318 object = vm_object_allocate(OBJT_SWAP, 319 OFF_TO_IDX(offset + PAGE_MASK + size)); 320 object->handle = handle; 321 (void) swap_pager_swp_alloc(object, M_WAITOK); 322 } 323 } else { 324 object = vm_object_allocate(OBJT_SWAP, 325 OFF_TO_IDX(offset + PAGE_MASK + size)); 326 (void) swap_pager_swp_alloc(object, M_WAITOK); 327 } 328 329 return (object); 330 } 331 332 /* 333 * returns disk block associated with pager and offset 334 * additionally, as a side effect returns a flag indicating 335 * if the block has been written 336 */ 337 338 static __inline daddr_t * 339 swap_pager_diskaddr(object, pindex, valid) 340 vm_object_t object; 341 vm_pindex_t pindex; 342 int *valid; 343 { 344 register sw_blk_t swb; 345 int ix; 346 347 if (valid) 348 *valid = 0; 349 ix = pindex / SWB_NPAGES; 350 if ((ix >= object->un_pager.swp.swp_nblocks) || 351 (pindex >= object->size)) { 352 return (FALSE); 353 } 354 swb = &object->un_pager.swp.swp_blocks[ix]; 355 ix = pindex % SWB_NPAGES; 356 if (valid) 357 *valid = swb->swb_valid & (1 << ix); 358 return &swb->swb_block[ix]; 359 } 360 361 /* 362 * Utility routine to set the valid (written) bit for 363 * a block associated with a pager and offset 364 */ 365 static void 366 swap_pager_setvalid(object, offset, valid) 367 vm_object_t object; 368 vm_offset_t offset; 369 int valid; 370 { 371 register sw_blk_t swb; 372 int ix; 373 374 ix = offset / SWB_NPAGES; 375 if (ix >= object->un_pager.swp.swp_nblocks) 376 return; 377 378 swb = &object->un_pager.swp.swp_blocks[ix]; 379 ix = offset % SWB_NPAGES; 380 if (valid) 381 swb->swb_valid |= (1 << ix); 382 else 383 swb->swb_valid &= ~(1 << ix); 384 return; 385 } 386 387 /* 388 * this routine allocates swap space with a fragmentation 389 * minimization policy. 390 */ 391 static int 392 swap_pager_getswapspace(object, amount, rtval) 393 vm_object_t object; 394 unsigned int amount; 395 daddr_t *rtval; 396 { 397 unsigned location; 398 399 vm_swap_size -= amount; 400 401 if (!rlist_alloc(&swaplist, amount, &location)) { 402 vm_swap_size += amount; 403 return 0; 404 } else { 405 swapsizecheck(); 406 object->un_pager.swp.swp_allocsize += amount; 407 *rtval = location; 408 return 1; 409 } 410 } 411 412 /* 413 * this routine frees swap space with a fragmentation 414 * minimization policy. 415 */ 416 static void 417 swap_pager_freeswapspace(object, from, to) 418 vm_object_t object; 419 unsigned int from; 420 unsigned int to; 421 { 422 rlist_free(&swaplist, from, to); 423 vm_swap_size += (to - from) + 1; 424 object->un_pager.swp.swp_allocsize -= (to - from) + 1; 425 swapsizecheck(); 426 } 427 /* 428 * this routine frees swap blocks from a specified pager 429 */ 430 void 431 swap_pager_freespace(object, start, size) 432 vm_object_t object; 433 vm_pindex_t start; 434 vm_size_t size; 435 { 436 vm_pindex_t i; 437 int s; 438 439 s = splvm(); 440 for (i = start; i < start + size; i += 1) { 441 int valid; 442 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 443 444 if (addr && *addr != SWB_EMPTY) { 445 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1); 446 if (valid) { 447 swap_pager_setvalid(object, i, 0); 448 } 449 *addr = SWB_EMPTY; 450 } 451 } 452 splx(s); 453 } 454 455 /* 456 * same as freespace, but don't free, just force a DMZ next time 457 */ 458 void 459 swap_pager_dmzspace(object, start, size) 460 vm_object_t object; 461 vm_pindex_t start; 462 vm_size_t size; 463 { 464 vm_pindex_t i; 465 int s; 466 467 s = splvm(); 468 for (i = start; i < start + size; i += 1) { 469 int valid; 470 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 471 472 if (addr && *addr != SWB_EMPTY) { 473 if (valid) { 474 swap_pager_setvalid(object, i, 0); 475 } 476 } 477 } 478 splx(s); 479 } 480 481 static void 482 swap_pager_free_swap(object) 483 vm_object_t object; 484 { 485 register int i, j; 486 register sw_blk_t swb; 487 int first_block=0, block_count=0; 488 int s; 489 /* 490 * Free left over swap blocks 491 */ 492 swb = object->un_pager.swp.swp_blocks; 493 if (swb == NULL) { 494 return; 495 } 496 497 s = splvm(); 498 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) { 499 for (j = 0; j < SWB_NPAGES; j++) { 500 if (swb->swb_block[j] != SWB_EMPTY) { 501 /* 502 * initially the length of the run is zero 503 */ 504 if (block_count == 0) { 505 first_block = swb->swb_block[j]; 506 block_count = btodb(PAGE_SIZE); 507 swb->swb_block[j] = SWB_EMPTY; 508 /* 509 * if the new block can be included into the current run 510 */ 511 } else if (swb->swb_block[j] == first_block + block_count) { 512 block_count += btodb(PAGE_SIZE); 513 swb->swb_block[j] = SWB_EMPTY; 514 /* 515 * terminate the previous run, and start a new one 516 */ 517 } else { 518 swap_pager_freeswapspace(object, first_block, 519 (unsigned) first_block + block_count - 1); 520 first_block = swb->swb_block[j]; 521 block_count = btodb(PAGE_SIZE); 522 swb->swb_block[j] = SWB_EMPTY; 523 } 524 } 525 } 526 } 527 528 if (block_count) { 529 swap_pager_freeswapspace(object, first_block, 530 (unsigned) first_block + block_count - 1); 531 } 532 splx(s); 533 } 534 535 536 /* 537 * swap_pager_reclaim frees up over-allocated space from all pagers 538 * this eliminates internal fragmentation due to allocation of space 539 * for segments that are never swapped to. It has been written so that 540 * it does not block until the rlist_free operation occurs; it keeps 541 * the queues consistant. 542 */ 543 544 /* 545 * Maximum number of blocks (pages) to reclaim per pass 546 */ 547 #define MAXRECLAIM 128 548 549 static void 550 swap_pager_reclaim() 551 { 552 vm_object_t object; 553 int i, j, k; 554 int s; 555 int reclaimcount; 556 static struct { 557 int address; 558 vm_object_t object; 559 } reclaims[MAXRECLAIM]; 560 static int in_reclaim; 561 562 /* 563 * allow only one process to be in the swap_pager_reclaim subroutine 564 */ 565 s = splvm(); 566 if (in_reclaim) { 567 tsleep(&in_reclaim, PSWP, "swrclm", 0); 568 splx(s); 569 return; 570 } 571 in_reclaim = 1; 572 reclaimcount = 0; 573 574 /* for each pager queue */ 575 for (k = 0; swp_qs[k]; k++) { 576 577 object = TAILQ_FIRST(swp_qs[k]); 578 while (object && (reclaimcount < MAXRECLAIM)) { 579 580 /* 581 * see if any blocks associated with a pager has been 582 * allocated but not used (written) 583 */ 584 if ((object->flags & OBJ_DEAD) == 0 && 585 (object->paging_in_progress == 0)) { 586 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) { 587 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i]; 588 589 if (swb->swb_locked) 590 continue; 591 for (j = 0; j < SWB_NPAGES; j++) { 592 if (swb->swb_block[j] != SWB_EMPTY && 593 (swb->swb_valid & (1 << j)) == 0) { 594 reclaims[reclaimcount].address = swb->swb_block[j]; 595 reclaims[reclaimcount++].object = object; 596 swb->swb_block[j] = SWB_EMPTY; 597 if (reclaimcount >= MAXRECLAIM) 598 goto rfinished; 599 } 600 } 601 } 602 } 603 object = TAILQ_NEXT(object, pager_object_list); 604 } 605 } 606 607 rfinished: 608 609 /* 610 * free the blocks that have been added to the reclaim list 611 */ 612 for (i = 0; i < reclaimcount; i++) { 613 swap_pager_freeswapspace(reclaims[i].object, 614 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1); 615 } 616 splx(s); 617 in_reclaim = 0; 618 wakeup(&in_reclaim); 619 } 620 621 622 /* 623 * swap_pager_copy copies blocks from one pager to another and 624 * destroys the source pager 625 */ 626 627 void 628 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, 629 offset, destroysource) 630 vm_object_t srcobject; 631 vm_pindex_t srcoffset; 632 vm_object_t dstobject; 633 vm_pindex_t dstoffset; 634 vm_pindex_t offset; 635 int destroysource; 636 { 637 vm_pindex_t i; 638 int origsize; 639 int s; 640 641 if (vm_swap_size) 642 no_swap_space = 0; 643 644 origsize = srcobject->un_pager.swp.swp_allocsize; 645 646 /* 647 * remove the source object from the swap_pager internal queue 648 */ 649 if (destroysource) { 650 if (srcobject->handle == NULL) { 651 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list); 652 } else { 653 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list); 654 } 655 } 656 657 s = splvm(); 658 while (srcobject->un_pager.swp.swp_poip) { 659 tsleep(srcobject, PVM, "spgout", 0); 660 } 661 662 /* 663 * clean all of the pages that are currently active and finished 664 */ 665 if (swap_pager_free_pending) 666 swap_pager_sync(); 667 668 /* 669 * transfer source to destination 670 */ 671 for (i = 0; i < dstobject->size; i += 1) { 672 int srcvalid, dstvalid; 673 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, 674 i + offset + srcoffset, &srcvalid); 675 daddr_t *dstaddrp; 676 677 /* 678 * see if the source has space allocated 679 */ 680 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 681 /* 682 * if the source is valid and the dest has no space, 683 * then copy the allocation from the srouce to the 684 * dest. 685 */ 686 if (srcvalid) { 687 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset, 688 &dstvalid); 689 /* 690 * if the dest already has a valid block, 691 * deallocate the source block without 692 * copying. 693 */ 694 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 695 swap_pager_freeswapspace(dstobject, *dstaddrp, 696 *dstaddrp + btodb(PAGE_SIZE) - 1); 697 *dstaddrp = SWB_EMPTY; 698 } 699 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 700 *dstaddrp = *srcaddrp; 701 *srcaddrp = SWB_EMPTY; 702 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE); 703 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE); 704 swap_pager_setvalid(dstobject, i + dstoffset, 1); 705 } 706 } 707 /* 708 * if the source is not empty at this point, then 709 * deallocate the space. 710 */ 711 if (*srcaddrp != SWB_EMPTY) { 712 swap_pager_freeswapspace(srcobject, *srcaddrp, 713 *srcaddrp + btodb(PAGE_SIZE) - 1); 714 *srcaddrp = SWB_EMPTY; 715 } 716 } 717 } 718 splx(s); 719 720 /* 721 * Free left over swap blocks 722 */ 723 if (destroysource) { 724 swap_pager_free_swap(srcobject); 725 726 if (srcobject->un_pager.swp.swp_allocsize) { 727 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", 728 srcobject->un_pager.swp.swp_allocsize, origsize); 729 } 730 731 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA); 732 srcobject->un_pager.swp.swp_blocks = NULL; 733 } 734 return; 735 } 736 737 static void 738 swap_pager_dealloc(object) 739 vm_object_t object; 740 { 741 int s; 742 sw_blk_t swb; 743 744 /* 745 * Remove from list right away so lookups will fail if we block for 746 * pageout completion. 747 */ 748 if (object->handle == NULL) { 749 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list); 750 } else { 751 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list); 752 } 753 754 /* 755 * Wait for all pageouts to finish and remove all entries from 756 * cleaning list. 757 */ 758 759 s = splvm(); 760 while (object->un_pager.swp.swp_poip) { 761 tsleep(object, PVM, "swpout", 0); 762 } 763 splx(s); 764 765 if (swap_pager_free_pending) 766 swap_pager_sync(); 767 768 /* 769 * Free left over swap blocks 770 */ 771 swap_pager_free_swap(object); 772 773 if (object->un_pager.swp.swp_allocsize) { 774 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", 775 object->un_pager.swp.swp_allocsize); 776 } 777 swb = object->un_pager.swp.swp_blocks; 778 if (swb) { 779 /* 780 * Free swap management resources 781 */ 782 free(swb, M_VMPGDATA); 783 object->un_pager.swp.swp_blocks = NULL; 784 } 785 } 786 787 static __inline int 788 swap_pager_block_index(pindex) 789 vm_pindex_t pindex; 790 { 791 return (pindex / SWB_NPAGES); 792 } 793 794 static __inline int 795 swap_pager_block_offset(pindex) 796 vm_pindex_t pindex; 797 { 798 return (pindex % SWB_NPAGES); 799 } 800 801 /* 802 * swap_pager_haspage returns TRUE if the pager has data that has 803 * been written out. 804 */ 805 static boolean_t 806 swap_pager_haspage(object, pindex, before, after) 807 vm_object_t object; 808 vm_pindex_t pindex; 809 int *before; 810 int *after; 811 { 812 register sw_blk_t swb; 813 int ix; 814 815 if (before != NULL) 816 *before = 0; 817 if (after != NULL) 818 *after = 0; 819 ix = pindex / SWB_NPAGES; 820 if (ix >= object->un_pager.swp.swp_nblocks) { 821 return (FALSE); 822 } 823 swb = &object->un_pager.swp.swp_blocks[ix]; 824 ix = pindex % SWB_NPAGES; 825 826 if (swb->swb_block[ix] != SWB_EMPTY) { 827 828 if (swb->swb_valid & (1 << ix)) { 829 int tix; 830 if (before) { 831 for(tix = ix - 1; tix >= 0; --tix) { 832 if ((swb->swb_valid & (1 << tix)) == 0) 833 break; 834 if ((swb->swb_block[tix] + 835 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) != 836 swb->swb_block[ix]) 837 break; 838 (*before)++; 839 } 840 } 841 842 if (after) { 843 for(tix = ix + 1; tix < SWB_NPAGES; tix++) { 844 if ((swb->swb_valid & (1 << tix)) == 0) 845 break; 846 if ((swb->swb_block[tix] - 847 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) != 848 swb->swb_block[ix]) 849 break; 850 (*after)++; 851 } 852 } 853 854 return TRUE; 855 } 856 } 857 return (FALSE); 858 } 859 860 /* 861 * Wakeup based upon spc state 862 */ 863 static void 864 spc_wakeup(void) 865 { 866 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) { 867 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT; 868 wakeup(&swap_pager_needflags); 869 } else if ((swap_pager_needflags & SWAP_FREE_NEEDED) && 870 swap_pager_free_count >= ((2 * npendingio) / 3)) { 871 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 872 wakeup(&swap_pager_free); 873 } 874 } 875 876 /* 877 * Free an spc structure 878 */ 879 static void 880 spc_free(spc) 881 swp_clean_t spc; 882 { 883 spc->spc_flags = 0; 884 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 885 swap_pager_free_count++; 886 if (swap_pager_needflags) { 887 spc_wakeup(); 888 } 889 } 890 891 /* 892 * swap_pager_ridpages is a convienience routine that deallocates all 893 * but the required page. this is usually used in error returns that 894 * need to invalidate the "extra" readahead pages. 895 */ 896 static void 897 swap_pager_ridpages(m, count, reqpage) 898 vm_page_t *m; 899 int count; 900 int reqpage; 901 { 902 int i; 903 904 for (i = 0; i < count; i++) { 905 if (i != reqpage) { 906 vm_page_free(m[i]); 907 } 908 } 909 } 910 911 /* 912 * swap_pager_iodone1 is the completion routine for both reads and async writes 913 */ 914 static void 915 swap_pager_iodone1(bp) 916 struct buf *bp; 917 { 918 bp->b_flags |= B_DONE; 919 bp->b_flags &= ~B_ASYNC; 920 wakeup(bp); 921 } 922 923 static int 924 swap_pager_getpages(object, m, count, reqpage) 925 vm_object_t object; 926 vm_page_t *m; 927 int count, reqpage; 928 { 929 register struct buf *bp; 930 sw_blk_t swb[count]; 931 register int s; 932 int i; 933 boolean_t rv; 934 vm_offset_t kva, off[count]; 935 vm_pindex_t paging_offset; 936 int reqaddr[count]; 937 int sequential; 938 939 int first, last; 940 int failed; 941 int reqdskregion; 942 943 object = m[reqpage]->object; 944 paging_offset = OFF_TO_IDX(object->paging_offset); 945 sequential = (m[reqpage]->pindex == (object->last_read + 1)); 946 947 for (i = 0; i < count; i++) { 948 vm_pindex_t fidx = m[i]->pindex + paging_offset; 949 int ix = swap_pager_block_index(fidx); 950 951 if (ix >= object->un_pager.swp.swp_nblocks) { 952 int j; 953 954 if (i <= reqpage) { 955 swap_pager_ridpages(m, count, reqpage); 956 return (VM_PAGER_FAIL); 957 } 958 for (j = i; j < count; j++) { 959 vm_page_free(m[j]); 960 } 961 count = i; 962 break; 963 } 964 swb[i] = &object->un_pager.swp.swp_blocks[ix]; 965 off[i] = swap_pager_block_offset(fidx); 966 reqaddr[i] = swb[i]->swb_block[off[i]]; 967 } 968 969 /* make sure that our required input request is existant */ 970 971 if (reqaddr[reqpage] == SWB_EMPTY || 972 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 973 swap_pager_ridpages(m, count, reqpage); 974 return (VM_PAGER_FAIL); 975 } 976 reqdskregion = reqaddr[reqpage] / dmmax; 977 978 /* 979 * search backwards for the first contiguous page to transfer 980 */ 981 failed = 0; 982 first = 0; 983 for (i = reqpage - 1; i >= 0; --i) { 984 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) || 985 (swb[i]->swb_valid & (1 << off[i])) == 0 || 986 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 987 ((reqaddr[i] / dmmax) != reqdskregion)) { 988 failed = 1; 989 vm_page_free(m[i]); 990 if (first == 0) 991 first = i + 1; 992 } 993 } 994 /* 995 * search forwards for the last contiguous page to transfer 996 */ 997 failed = 0; 998 last = count; 999 for (i = reqpage + 1; i < count; i++) { 1000 if (failed || (reqaddr[i] == SWB_EMPTY) || 1001 (swb[i]->swb_valid & (1 << off[i])) == 0 || 1002 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 1003 ((reqaddr[i] / dmmax) != reqdskregion)) { 1004 failed = 1; 1005 vm_page_free(m[i]); 1006 if (last == count) 1007 last = i; 1008 } 1009 } 1010 1011 count = last; 1012 if (first != 0) { 1013 for (i = first; i < count; i++) { 1014 m[i - first] = m[i]; 1015 reqaddr[i - first] = reqaddr[i]; 1016 off[i - first] = off[i]; 1017 } 1018 count -= first; 1019 reqpage -= first; 1020 } 1021 ++swb[reqpage]->swb_locked; 1022 1023 /* 1024 * at this point: "m" is a pointer to the array of vm_page_t for 1025 * paging I/O "count" is the number of vm_page_t entries represented 1026 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 1027 * into "m" for the page actually faulted 1028 */ 1029 1030 /* 1031 * Get a swap buffer header to perform the IO 1032 */ 1033 bp = getpbuf(); 1034 kva = (vm_offset_t) bp->b_data; 1035 1036 /* 1037 * map our page(s) into kva for input 1038 */ 1039 pmap_qenter(kva, m, count); 1040 1041 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING; 1042 bp->b_iodone = swap_pager_iodone1; 1043 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1044 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1045 crhold(bp->b_rcred); 1046 crhold(bp->b_wcred); 1047 bp->b_data = (caddr_t) kva; 1048 bp->b_blkno = reqaddr[0]; 1049 bp->b_bcount = PAGE_SIZE * count; 1050 bp->b_bufsize = PAGE_SIZE * count; 1051 1052 pbgetvp(swapdev_vp, bp); 1053 1054 cnt.v_swapin++; 1055 cnt.v_swappgsin += count; 1056 /* 1057 * perform the I/O 1058 */ 1059 VOP_STRATEGY(bp->b_vp, bp); 1060 1061 /* 1062 * wait for the sync I/O to complete 1063 */ 1064 s = splvm(); 1065 while ((bp->b_flags & B_DONE) == 0) { 1066 if (tsleep(bp, PVM, "swread", hz*20)) { 1067 printf( 1068 "swap_pager: indefinite wait buffer: device: %#lx, blkno: %ld, size: %ld\n", 1069 (u_long)bp->b_dev, (long)bp->b_blkno, 1070 (long)bp->b_bcount); 1071 } 1072 } 1073 1074 if (bp->b_flags & B_ERROR) { 1075 printf( 1076 "swap_pager: I/O error - pagein failed; blkno %ld, size %ld, error %d\n", 1077 (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error); 1078 rv = VM_PAGER_ERROR; 1079 } else { 1080 rv = VM_PAGER_OK; 1081 } 1082 1083 splx(s); 1084 swb[reqpage]->swb_locked--; 1085 1086 /* 1087 * remove the mapping for kernel virtual 1088 */ 1089 pmap_qremove(kva, count); 1090 1091 /* 1092 * release the physical I/O buffer 1093 */ 1094 relpbuf(bp); 1095 /* 1096 * finish up input if everything is ok 1097 */ 1098 if (rv == VM_PAGER_OK) { 1099 for (i = 0; i < count; i++) { 1100 m[i]->dirty = 0; 1101 vm_page_flag_clear(m[i], PG_ZERO); 1102 if (i != reqpage) { 1103 /* 1104 * whether or not to leave the page 1105 * activated is up in the air, but we 1106 * should put the page on a page queue 1107 * somewhere. (it already is in the 1108 * object). After some emperical 1109 * results, it is best to deactivate 1110 * the readahead pages. 1111 */ 1112 vm_page_deactivate(m[i]); 1113 1114 /* 1115 * just in case someone was asking for 1116 * this page we now tell them that it 1117 * is ok to use 1118 */ 1119 m[i]->valid = VM_PAGE_BITS_ALL; 1120 vm_page_wakeup(m[i]); 1121 } 1122 } 1123 1124 m[reqpage]->object->last_read = m[count-1]->pindex; 1125 } else { 1126 swap_pager_ridpages(m, count, reqpage); 1127 } 1128 return (rv); 1129 } 1130 1131 int 1132 swap_pager_putpages(object, m, count, sync, rtvals) 1133 vm_object_t object; 1134 vm_page_t *m; 1135 int count; 1136 boolean_t sync; 1137 int *rtvals; 1138 { 1139 register struct buf *bp; 1140 sw_blk_t swb[count]; 1141 register int s; 1142 int i, j, ix, firstidx, lastidx; 1143 boolean_t rv; 1144 vm_offset_t kva, off, fidx; 1145 swp_clean_t spc; 1146 vm_pindex_t paging_pindex; 1147 int reqaddr[count]; 1148 int failed; 1149 1150 if (vm_swap_size) 1151 no_swap_space = 0; 1152 1153 if (no_swap_space) { 1154 for (i = 0; i < count; i++) 1155 rtvals[i] = VM_PAGER_FAIL; 1156 return VM_PAGER_FAIL; 1157 } 1158 1159 if (curproc != pageproc) 1160 sync = TRUE; 1161 1162 object = m[0]->object; 1163 paging_pindex = OFF_TO_IDX(object->paging_offset); 1164 1165 failed = 0; 1166 for (j = 0; j < count; j++) { 1167 fidx = m[j]->pindex + paging_pindex; 1168 ix = swap_pager_block_index(fidx); 1169 swb[j] = 0; 1170 if (ix >= object->un_pager.swp.swp_nblocks) { 1171 rtvals[j] = VM_PAGER_FAIL; 1172 failed = 1; 1173 continue; 1174 } else { 1175 rtvals[j] = VM_PAGER_OK; 1176 } 1177 swb[j] = &object->un_pager.swp.swp_blocks[ix]; 1178 swb[j]->swb_locked++; 1179 if (failed) { 1180 rtvals[j] = VM_PAGER_FAIL; 1181 continue; 1182 } 1183 off = swap_pager_block_offset(fidx); 1184 reqaddr[j] = swb[j]->swb_block[off]; 1185 if (reqaddr[j] == SWB_EMPTY) { 1186 daddr_t blk; 1187 int tries; 1188 int ntoget; 1189 1190 tries = 0; 1191 s = splvm(); 1192 1193 /* 1194 * if any other pages have been allocated in this 1195 * block, we only try to get one page. 1196 */ 1197 for (i = 0; i < SWB_NPAGES; i++) { 1198 if (swb[j]->swb_block[i] != SWB_EMPTY) 1199 break; 1200 } 1201 1202 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1203 /* 1204 * this code is alittle conservative, but works (the 1205 * intent of this code is to allocate small chunks for 1206 * small objects) 1207 */ 1208 if ((off == 0) && ((fidx + ntoget) > object->size)) { 1209 ntoget = object->size - fidx; 1210 } 1211 retrygetspace: 1212 if (!swap_pager_full && ntoget > 1 && 1213 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE), 1214 &blk)) { 1215 1216 for (i = 0; i < ntoget; i++) { 1217 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1218 swb[j]->swb_valid = 0; 1219 } 1220 1221 reqaddr[j] = swb[j]->swb_block[off]; 1222 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE), 1223 &swb[j]->swb_block[off])) { 1224 /* 1225 * if the allocation has failed, we try to 1226 * reclaim space and retry. 1227 */ 1228 if (++tries == 1) { 1229 swap_pager_reclaim(); 1230 goto retrygetspace; 1231 } 1232 rtvals[j] = VM_PAGER_AGAIN; 1233 failed = 1; 1234 swap_pager_full = 1; 1235 } else { 1236 reqaddr[j] = swb[j]->swb_block[off]; 1237 swb[j]->swb_valid &= ~(1 << off); 1238 } 1239 splx(s); 1240 } 1241 } 1242 1243 /* 1244 * search forwards for the last contiguous page to transfer 1245 */ 1246 failed = 0; 1247 for (i = 0; i < count; i++) { 1248 if (failed || 1249 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1250 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) || 1251 (rtvals[i] != VM_PAGER_OK)) { 1252 failed = 1; 1253 if (rtvals[i] == VM_PAGER_OK) 1254 rtvals[i] = VM_PAGER_AGAIN; 1255 } 1256 } 1257 1258 ix = 0; 1259 firstidx = -1; 1260 for (i = 0; i < count; i++) { 1261 if (rtvals[i] == VM_PAGER_OK) { 1262 ix++; 1263 if (firstidx == -1) { 1264 firstidx = i; 1265 } 1266 } else if (firstidx >= 0) { 1267 break; 1268 } 1269 } 1270 1271 if (firstidx == -1) { 1272 for (i = 0; i < count; i++) { 1273 if (rtvals[i] == VM_PAGER_OK) 1274 rtvals[i] = VM_PAGER_AGAIN; 1275 } 1276 return VM_PAGER_AGAIN; 1277 } 1278 1279 lastidx = firstidx + ix; 1280 1281 if (ix > max_pageout_cluster) { 1282 for (i = firstidx + max_pageout_cluster; i < lastidx; i++) { 1283 if (rtvals[i] == VM_PAGER_OK) 1284 rtvals[i] = VM_PAGER_AGAIN; 1285 } 1286 ix = max_pageout_cluster; 1287 lastidx = firstidx + ix; 1288 } 1289 1290 for (i = 0; i < firstidx; i++) { 1291 if (swb[i]) 1292 swb[i]->swb_locked--; 1293 } 1294 1295 for (i = lastidx; i < count; i++) { 1296 if (swb[i]) 1297 swb[i]->swb_locked--; 1298 } 1299 1300 #if defined(INVARIANTS) 1301 for (i = firstidx; i < lastidx; i++) { 1302 if (reqaddr[i] == SWB_EMPTY) { 1303 printf("I/O to empty block???? -- pindex: %d, i: %d\n", 1304 m[i]->pindex, i); 1305 } 1306 } 1307 #endif 1308 1309 /* 1310 * Clean up all completed async pageouts. 1311 */ 1312 if (swap_pager_free_pending) 1313 swap_pager_sync(); 1314 1315 /* 1316 * get a swap pager clean data structure, block until we get it 1317 */ 1318 if (curproc == pageproc) { 1319 if (swap_pager_free_count == 0) { 1320 s = splvm(); 1321 while (swap_pager_free_count == 0) { 1322 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT; 1323 /* 1324 * if it does not get one within a short time, then 1325 * there is a potential deadlock, so we go-on trying 1326 * to free pages. It is important to block here as opposed 1327 * to returning, thereby allowing the pageout daemon to continue. 1328 * It is likely that pageout daemon will start suboptimally 1329 * reclaiming vnode backed pages if we don't block. Since the 1330 * I/O subsystem is probably already fully utilized, might as 1331 * well wait. 1332 */ 1333 if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) { 1334 if (swap_pager_free_pending) 1335 swap_pager_sync(); 1336 if (swap_pager_free_count == 0) { 1337 for (i = firstidx; i < lastidx; i++) { 1338 rtvals[i] = VM_PAGER_AGAIN; 1339 } 1340 splx(s); 1341 return VM_PAGER_AGAIN; 1342 } 1343 } else { 1344 swap_pager_sync(); 1345 } 1346 } 1347 splx(s); 1348 } 1349 1350 spc = TAILQ_FIRST(&swap_pager_free); 1351 KASSERT(spc, 1352 ("swap_pager_putpages: free queue is empty, %d expected\n", 1353 swap_pager_free_count)); 1354 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1355 swap_pager_free_count--; 1356 1357 kva = spc->spc_kva; 1358 bp = spc->spc_bp; 1359 bzero(bp, sizeof *bp); 1360 bp->b_spc = spc; 1361 bp->b_xflags = 0; 1362 bp->b_data = (caddr_t) kva; 1363 } else { 1364 spc = NULL; 1365 bp = getpbuf(); 1366 kva = (vm_offset_t) bp->b_data; 1367 bp->b_spc = NULL; 1368 } 1369 1370 /* 1371 * map our page(s) into kva for I/O 1372 */ 1373 pmap_qenter(kva, &m[firstidx], ix); 1374 1375 /* 1376 * get the base I/O offset into the swap file 1377 */ 1378 for (i = firstidx; i < lastidx ; i++) { 1379 fidx = m[i]->pindex + paging_pindex; 1380 off = swap_pager_block_offset(fidx); 1381 /* 1382 * set the valid bit 1383 */ 1384 swb[i]->swb_valid |= (1 << off); 1385 /* 1386 * and unlock the data structure 1387 */ 1388 swb[i]->swb_locked--; 1389 } 1390 1391 bp->b_flags = B_BUSY | B_PAGING; 1392 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1393 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1394 if (bp->b_rcred != NOCRED) 1395 crhold(bp->b_rcred); 1396 if (bp->b_wcred != NOCRED) 1397 crhold(bp->b_wcred); 1398 bp->b_blkno = reqaddr[firstidx]; 1399 pbgetvp(swapdev_vp, bp); 1400 1401 bp->b_bcount = PAGE_SIZE * ix; 1402 bp->b_bufsize = PAGE_SIZE * ix; 1403 1404 s = splvm(); 1405 swapdev_vp->v_numoutput++; 1406 1407 /* 1408 * If this is an async write we set up additional buffer fields and 1409 * place a "cleaning" entry on the inuse queue. 1410 */ 1411 object->un_pager.swp.swp_poip++; 1412 1413 if (spc) { 1414 spc->spc_flags = 0; 1415 spc->spc_object = object; 1416 bp->b_npages = ix; 1417 for (i = firstidx; i < lastidx; i++) { 1418 spc->spc_m[i] = m[i]; 1419 bp->b_pages[i - firstidx] = m[i]; 1420 vm_page_protect(m[i], VM_PROT_READ); 1421 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1422 m[i]->dirty = 0; 1423 } 1424 spc->spc_first = firstidx; 1425 spc->spc_count = ix; 1426 /* 1427 * the completion routine for async writes 1428 */ 1429 bp->b_flags |= B_CALL; 1430 bp->b_iodone = swap_pager_iodone; 1431 bp->b_dirtyoff = 0; 1432 bp->b_dirtyend = bp->b_bcount; 1433 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1434 } else { 1435 bp->b_flags |= B_CALL; 1436 bp->b_iodone = swap_pager_iodone1; 1437 bp->b_npages = ix; 1438 for (i = firstidx; i < lastidx; i++) 1439 bp->b_pages[i - firstidx] = m[i]; 1440 } 1441 1442 cnt.v_swapout++; 1443 cnt.v_swappgsout += ix; 1444 1445 /* 1446 * perform the I/O 1447 */ 1448 VOP_STRATEGY(bp->b_vp, bp); 1449 if (sync == FALSE) { 1450 if (swap_pager_free_pending) { 1451 swap_pager_sync(); 1452 } 1453 for (i = firstidx; i < lastidx; i++) { 1454 rtvals[i] = VM_PAGER_PEND; 1455 } 1456 splx(s); 1457 return VM_PAGER_PEND; 1458 } 1459 1460 /* 1461 * wait for the sync I/O to complete 1462 */ 1463 while ((bp->b_flags & B_DONE) == 0) { 1464 tsleep(bp, PVM, "swwrt", 0); 1465 } 1466 1467 if (bp->b_flags & B_ERROR) { 1468 printf( 1469 "swap_pager: I/O error - pageout failed; blkno %ld, size %ld, error %d\n", 1470 (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error); 1471 rv = VM_PAGER_ERROR; 1472 } else { 1473 rv = VM_PAGER_OK; 1474 } 1475 1476 object->un_pager.swp.swp_poip--; 1477 if (object->un_pager.swp.swp_poip == 0) 1478 wakeup(object); 1479 1480 if (bp->b_vp) 1481 pbrelvp(bp); 1482 1483 splx(s); 1484 1485 /* 1486 * remove the mapping for kernel virtual 1487 */ 1488 pmap_qremove(kva, ix); 1489 1490 /* 1491 * if we have written the page, then indicate that the page is clean. 1492 */ 1493 if (rv == VM_PAGER_OK) { 1494 for (i = firstidx; i < lastidx; i++) { 1495 if (rtvals[i] == VM_PAGER_OK) { 1496 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1497 m[i]->dirty = 0; 1498 /* 1499 * optimization, if a page has been read 1500 * during the pageout process, we activate it. 1501 */ 1502 if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || 1503 pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) { 1504 vm_page_activate(m[i]); 1505 } 1506 } 1507 } 1508 } else { 1509 for (i = firstidx; i < lastidx; i++) { 1510 rtvals[i] = rv; 1511 } 1512 } 1513 1514 if (spc != NULL) { 1515 if (bp->b_rcred != NOCRED) 1516 crfree(bp->b_rcred); 1517 if (bp->b_wcred != NOCRED) 1518 crfree(bp->b_wcred); 1519 spc_free(spc); 1520 } else 1521 relpbuf(bp); 1522 if (swap_pager_free_pending) 1523 swap_pager_sync(); 1524 1525 return (rv); 1526 } 1527 1528 void 1529 swap_pager_sync() 1530 { 1531 swp_clean_t spc; 1532 1533 while (spc = TAILQ_FIRST(&swap_pager_done)) { 1534 swap_pager_finish(spc); 1535 } 1536 return; 1537 } 1538 1539 static void 1540 swap_pager_finish(spc) 1541 register swp_clean_t spc; 1542 { 1543 int i, s, lastidx; 1544 vm_object_t object; 1545 vm_page_t *ma; 1546 1547 ma = spc->spc_m; 1548 object = spc->spc_object; 1549 lastidx = spc->spc_first + spc->spc_count; 1550 1551 s = splvm(); 1552 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1553 splx(s); 1554 1555 pmap_qremove(spc->spc_kva, spc->spc_count); 1556 1557 /* 1558 * If no error, mark as clean and inform the pmap system. If error, 1559 * mark as dirty so we will try again. (XXX could get stuck doing 1560 * this, should give up after awhile) 1561 */ 1562 if (spc->spc_flags & SPC_ERROR) { 1563 1564 for (i = spc->spc_first; i < lastidx; i++) { 1565 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1566 (u_long) VM_PAGE_TO_PHYS(ma[i])); 1567 ma[i]->dirty = VM_PAGE_BITS_ALL; 1568 vm_page_io_finish(ma[i]); 1569 } 1570 1571 vm_object_pip_subtract(object, spc->spc_count); 1572 if ((object->paging_in_progress == 0) && 1573 (object->flags & OBJ_PIPWNT)) { 1574 vm_object_clear_flag(object, OBJ_PIPWNT); 1575 wakeup(object); 1576 } 1577 1578 } else { 1579 for (i = spc->spc_first; i < lastidx; i++) { 1580 if ((ma[i]->queue != PQ_ACTIVE) && 1581 ((ma[i]->flags & PG_WANTED) || 1582 pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i])))) { 1583 vm_page_activate(ma[i]); 1584 } 1585 } 1586 } 1587 1588 nswiodone -= spc->spc_count; 1589 swap_pager_free_pending--; 1590 spc_free(spc); 1591 1592 return; 1593 } 1594 1595 /* 1596 * swap_pager_iodone 1597 */ 1598 static void 1599 swap_pager_iodone(bp) 1600 register struct buf *bp; 1601 { 1602 int i, s, lastidx; 1603 register swp_clean_t spc; 1604 vm_object_t object; 1605 vm_page_t *ma; 1606 1607 1608 s = splvm(); 1609 spc = (swp_clean_t) bp->b_spc; 1610 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1611 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1612 1613 object = spc->spc_object; 1614 1615 #if defined(DIAGNOSTIC) 1616 if (object->paging_in_progress < spc->spc_count) 1617 printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n", 1618 object->paging_in_progress, spc->spc_count); 1619 #endif 1620 1621 if (bp->b_flags & B_ERROR) { 1622 spc->spc_flags |= SPC_ERROR; 1623 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n", 1624 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1625 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error); 1626 } else { 1627 vm_object_pip_subtract(object, spc->spc_count); 1628 if ((object->paging_in_progress == 0) && 1629 (object->flags & OBJ_PIPWNT)) { 1630 vm_object_clear_flag(object, OBJ_PIPWNT); 1631 wakeup(object); 1632 } 1633 ma = spc->spc_m; 1634 lastidx = spc->spc_first + spc->spc_count; 1635 for (i = spc->spc_first; i < lastidx; i++) { 1636 /* 1637 * we wakeup any processes that are waiting on these pages. 1638 */ 1639 vm_page_io_finish(ma[i]); 1640 } 1641 } 1642 1643 if (bp->b_vp) 1644 pbrelvp(bp); 1645 1646 if (bp->b_rcred != NOCRED) 1647 crfree(bp->b_rcred); 1648 if (bp->b_wcred != NOCRED) 1649 crfree(bp->b_wcred); 1650 1651 nswiodone += spc->spc_count; 1652 swap_pager_free_pending++; 1653 if (--spc->spc_object->un_pager.swp.swp_poip == 0) { 1654 wakeup(spc->spc_object); 1655 } 1656 1657 if (swap_pager_needflags && 1658 ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) { 1659 spc_wakeup(); 1660 } 1661 1662 if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) && 1663 vm_pageout_pages_needed) { 1664 wakeup(&vm_pageout_pages_needed); 1665 vm_pageout_pages_needed = 0; 1666 } 1667 1668 splx(s); 1669 } 1670