1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.102 1998/10/13 08:24:42 dg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/proc.h> 56 #include <sys/buf.h> 57 #include <sys/vnode.h> 58 #include <sys/malloc.h> 59 #include <sys/vmmeter.h> 60 #include <sys/rlist.h> 61 62 #ifndef MAX_PAGEOUT_CLUSTER 63 #define MAX_PAGEOUT_CLUSTER 16 64 #endif 65 66 #ifndef NPENDINGIO 67 #define NPENDINGIO 16 68 #endif 69 70 #define SWB_NPAGES MAX_PAGEOUT_CLUSTER 71 72 #include <vm/vm.h> 73 #include <vm/vm_prot.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_pager.h> 77 #include <vm/vm_pageout.h> 78 #include <vm/swap_pager.h> 79 #include <vm/vm_extern.h> 80 81 static int nswiodone; 82 int swap_pager_full; 83 extern int vm_swap_size; 84 static int suggest_more_swap = 0; 85 static int no_swap_space = 1; 86 static int max_pageout_cluster; 87 struct rlisthdr swaplist; 88 89 TAILQ_HEAD(swpclean, swpagerclean); 90 91 typedef struct swpagerclean *swp_clean_t; 92 93 static struct swpagerclean { 94 TAILQ_ENTRY(swpagerclean) spc_list; 95 int spc_flags; 96 struct buf *spc_bp; 97 vm_object_t spc_object; 98 vm_offset_t spc_kva; 99 int spc_first; 100 int spc_count; 101 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 102 } swcleanlist[NPENDINGIO]; 103 104 105 /* spc_flags values */ 106 #define SPC_ERROR 0x01 107 108 #define SWB_EMPTY (-1) 109 110 /* list of completed page cleans */ 111 static struct swpclean swap_pager_done; 112 113 /* list of pending page cleans */ 114 static struct swpclean swap_pager_inuse; 115 116 /* list of free pager clean structs */ 117 static struct swpclean swap_pager_free; 118 static int swap_pager_free_count; 119 static int swap_pager_free_pending; 120 121 /* list of "named" anon region objects */ 122 static struct pagerlst swap_pager_object_list; 123 124 /* list of "unnamed" anon region objects */ 125 struct pagerlst swap_pager_un_object_list; 126 127 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 128 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2 129 static int swap_pager_needflags; 130 131 static struct pagerlst *swp_qs[] = { 132 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0 133 }; 134 135 /* 136 * pagerops for OBJT_SWAP - "swap pager". 137 */ 138 static vm_object_t 139 swap_pager_alloc __P((void *handle, vm_ooffset_t size, 140 vm_prot_t prot, vm_ooffset_t offset)); 141 static void swap_pager_dealloc __P((vm_object_t object)); 142 static boolean_t 143 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex, 144 int *before, int *after)); 145 static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); 146 static void swap_pager_init __P((void)); 147 static void spc_free __P((swp_clean_t)); 148 149 struct pagerops swappagerops = { 150 swap_pager_init, 151 swap_pager_alloc, 152 swap_pager_dealloc, 153 swap_pager_getpages, 154 swap_pager_putpages, 155 swap_pager_haspage, 156 swap_pager_sync 157 }; 158 159 static int npendingio; 160 static int dmmin; 161 int dmmax; 162 163 static int swap_pager_block_index __P((vm_pindex_t pindex)); 164 static int swap_pager_block_offset __P((vm_pindex_t pindex)); 165 static daddr_t *swap_pager_diskaddr __P((vm_object_t object, 166 vm_pindex_t pindex, int *valid)); 167 static void swap_pager_finish __P((swp_clean_t spc)); 168 static void swap_pager_free_swap __P((vm_object_t object)); 169 static void swap_pager_freeswapspace __P((vm_object_t object, 170 unsigned int from, 171 unsigned int to)); 172 static int swap_pager_getswapspace __P((vm_object_t object, 173 unsigned int amount, 174 daddr_t *rtval)); 175 static void swap_pager_iodone __P((struct buf *)); 176 static void swap_pager_iodone1 __P((struct buf *bp)); 177 static void swap_pager_reclaim __P((void)); 178 static void swap_pager_ridpages __P((vm_page_t *m, int count, 179 int reqpage)); 180 static void swap_pager_setvalid __P((vm_object_t object, 181 vm_offset_t offset, int valid)); 182 static __inline void swapsizecheck __P((void)); 183 184 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE))) 185 186 static __inline void 187 swapsizecheck() 188 { 189 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 190 if (swap_pager_full == 0) 191 printf("swap_pager: out of swap space\n"); 192 swap_pager_full = 1; 193 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 194 swap_pager_full = 0; 195 } 196 197 static void 198 swap_pager_init() 199 { 200 int maxsafepending; 201 TAILQ_INIT(&swap_pager_object_list); 202 TAILQ_INIT(&swap_pager_un_object_list); 203 204 /* 205 * Initialize clean lists 206 */ 207 TAILQ_INIT(&swap_pager_inuse); 208 TAILQ_INIT(&swap_pager_done); 209 TAILQ_INIT(&swap_pager_free); 210 swap_pager_free_count = 0; 211 212 /* 213 * Calculate the swap allocation constants. 214 */ 215 dmmin = PAGE_SIZE / DEV_BSIZE; 216 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 217 218 maxsafepending = cnt.v_free_min - cnt.v_free_reserved; 219 npendingio = NPENDINGIO; 220 max_pageout_cluster = MAX_PAGEOUT_CLUSTER; 221 222 if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) { 223 max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2; 224 npendingio = maxsafepending / (2 * max_pageout_cluster); 225 if (npendingio < 2) 226 npendingio = 2; 227 } 228 } 229 230 void 231 swap_pager_swap_init() 232 { 233 swp_clean_t spc; 234 struct buf *bp; 235 int i; 236 237 /* 238 * kva's are allocated here so that we dont need to keep doing 239 * kmem_alloc pageables at runtime 240 */ 241 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 242 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster); 243 if (!spc->spc_kva) { 244 break; 245 } 246 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL); 247 if (!spc->spc_bp) { 248 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 249 break; 250 } 251 spc->spc_flags = 0; 252 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 253 swap_pager_free_count++; 254 } 255 } 256 257 int 258 swap_pager_swp_alloc(object, wait) 259 vm_object_t object; 260 int wait; 261 { 262 sw_blk_t swb; 263 int nblocks; 264 int i, j; 265 266 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES; 267 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait); 268 if (swb == NULL) 269 return 1; 270 271 for (i = 0; i < nblocks; i++) { 272 swb[i].swb_valid = 0; 273 swb[i].swb_locked = 0; 274 for (j = 0; j < SWB_NPAGES; j++) 275 swb[i].swb_block[j] = SWB_EMPTY; 276 } 277 278 object->un_pager.swp.swp_nblocks = nblocks; 279 object->un_pager.swp.swp_allocsize = 0; 280 object->un_pager.swp.swp_blocks = swb; 281 object->un_pager.swp.swp_poip = 0; 282 283 if (object->handle != NULL) { 284 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list); 285 } else { 286 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 287 } 288 289 return 0; 290 } 291 292 /* 293 * Allocate an object and associated resources. 294 * Note that if we are called from the pageout daemon (handle == NULL) 295 * we should not wait for memory as it could resulting in deadlock. 296 */ 297 static vm_object_t 298 swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 299 vm_ooffset_t offset) 300 { 301 vm_object_t object; 302 303 /* 304 * If this is a "named" anonymous region, look it up and use the 305 * object if it exists, otherwise allocate a new one. 306 */ 307 if (handle) { 308 object = vm_pager_object_lookup(&swap_pager_object_list, handle); 309 if (object != NULL) { 310 vm_object_reference(object); 311 } else { 312 /* 313 * XXX - there is a race condition here. Two processes 314 * can request the same named object simultaneuously, 315 * and if one blocks for memory, the result is a disaster. 316 * Probably quite rare, but is yet another reason to just 317 * rip support of "named anonymous regions" out altogether. 318 */ 319 object = vm_object_allocate(OBJT_SWAP, 320 OFF_TO_IDX(offset + PAGE_MASK + size)); 321 object->handle = handle; 322 (void) swap_pager_swp_alloc(object, M_WAITOK); 323 } 324 } else { 325 object = vm_object_allocate(OBJT_SWAP, 326 OFF_TO_IDX(offset + PAGE_MASK + size)); 327 (void) swap_pager_swp_alloc(object, M_WAITOK); 328 } 329 330 return (object); 331 } 332 333 /* 334 * returns disk block associated with pager and offset 335 * additionally, as a side effect returns a flag indicating 336 * if the block has been written 337 */ 338 339 static __inline daddr_t * 340 swap_pager_diskaddr(object, pindex, valid) 341 vm_object_t object; 342 vm_pindex_t pindex; 343 int *valid; 344 { 345 register sw_blk_t swb; 346 int ix; 347 348 if (valid) 349 *valid = 0; 350 ix = pindex / SWB_NPAGES; 351 if ((ix >= object->un_pager.swp.swp_nblocks) || 352 (pindex >= object->size)) { 353 return (FALSE); 354 } 355 swb = &object->un_pager.swp.swp_blocks[ix]; 356 ix = pindex % SWB_NPAGES; 357 if (valid) 358 *valid = swb->swb_valid & (1 << ix); 359 return &swb->swb_block[ix]; 360 } 361 362 /* 363 * Utility routine to set the valid (written) bit for 364 * a block associated with a pager and offset 365 */ 366 static void 367 swap_pager_setvalid(object, offset, valid) 368 vm_object_t object; 369 vm_offset_t offset; 370 int valid; 371 { 372 register sw_blk_t swb; 373 int ix; 374 375 ix = offset / SWB_NPAGES; 376 if (ix >= object->un_pager.swp.swp_nblocks) 377 return; 378 379 swb = &object->un_pager.swp.swp_blocks[ix]; 380 ix = offset % SWB_NPAGES; 381 if (valid) 382 swb->swb_valid |= (1 << ix); 383 else 384 swb->swb_valid &= ~(1 << ix); 385 return; 386 } 387 388 /* 389 * this routine allocates swap space with a fragmentation 390 * minimization policy. 391 */ 392 static int 393 swap_pager_getswapspace(object, amount, rtval) 394 vm_object_t object; 395 unsigned int amount; 396 daddr_t *rtval; 397 { 398 unsigned location; 399 400 vm_swap_size -= amount; 401 if (!suggest_more_swap && (vm_swap_size < btodb(cnt.v_page_count * PAGE_SIZE))) { 402 printf("swap_pager: suggest more swap space: %d MB\n", 403 (2 * cnt.v_page_count * (PAGE_SIZE / 1024)) / 1000); 404 suggest_more_swap = 1; 405 } 406 407 if (!rlist_alloc(&swaplist, amount, &location)) { 408 vm_swap_size += amount; 409 return 0; 410 } else { 411 swapsizecheck(); 412 object->un_pager.swp.swp_allocsize += amount; 413 *rtval = location; 414 return 1; 415 } 416 } 417 418 /* 419 * this routine frees swap space with a fragmentation 420 * minimization policy. 421 */ 422 static void 423 swap_pager_freeswapspace(object, from, to) 424 vm_object_t object; 425 unsigned int from; 426 unsigned int to; 427 { 428 rlist_free(&swaplist, from, to); 429 vm_swap_size += (to - from) + 1; 430 object->un_pager.swp.swp_allocsize -= (to - from) + 1; 431 swapsizecheck(); 432 } 433 /* 434 * this routine frees swap blocks from a specified pager 435 */ 436 void 437 swap_pager_freespace(object, start, size) 438 vm_object_t object; 439 vm_pindex_t start; 440 vm_size_t size; 441 { 442 vm_pindex_t i; 443 int s; 444 445 s = splvm(); 446 for (i = start; i < start + size; i += 1) { 447 int valid; 448 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 449 450 if (addr && *addr != SWB_EMPTY) { 451 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1); 452 if (valid) { 453 swap_pager_setvalid(object, i, 0); 454 } 455 *addr = SWB_EMPTY; 456 } 457 } 458 splx(s); 459 } 460 461 /* 462 * same as freespace, but don't free, just force a DMZ next time 463 */ 464 void 465 swap_pager_dmzspace(object, start, size) 466 vm_object_t object; 467 vm_pindex_t start; 468 vm_size_t size; 469 { 470 vm_pindex_t i; 471 int s; 472 473 s = splvm(); 474 for (i = start; i < start + size; i += 1) { 475 int valid; 476 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 477 478 if (addr && *addr != SWB_EMPTY) { 479 if (valid) { 480 swap_pager_setvalid(object, i, 0); 481 } 482 } 483 } 484 splx(s); 485 } 486 487 static void 488 swap_pager_free_swap(object) 489 vm_object_t object; 490 { 491 register int i, j; 492 register sw_blk_t swb; 493 int first_block=0, block_count=0; 494 int s; 495 /* 496 * Free left over swap blocks 497 */ 498 swb = object->un_pager.swp.swp_blocks; 499 if (swb == NULL) { 500 return; 501 } 502 503 s = splvm(); 504 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) { 505 for (j = 0; j < SWB_NPAGES; j++) { 506 if (swb->swb_block[j] != SWB_EMPTY) { 507 /* 508 * initially the length of the run is zero 509 */ 510 if (block_count == 0) { 511 first_block = swb->swb_block[j]; 512 block_count = btodb(PAGE_SIZE); 513 swb->swb_block[j] = SWB_EMPTY; 514 /* 515 * if the new block can be included into the current run 516 */ 517 } else if (swb->swb_block[j] == first_block + block_count) { 518 block_count += btodb(PAGE_SIZE); 519 swb->swb_block[j] = SWB_EMPTY; 520 /* 521 * terminate the previous run, and start a new one 522 */ 523 } else { 524 swap_pager_freeswapspace(object, first_block, 525 (unsigned) first_block + block_count - 1); 526 first_block = swb->swb_block[j]; 527 block_count = btodb(PAGE_SIZE); 528 swb->swb_block[j] = SWB_EMPTY; 529 } 530 } 531 } 532 } 533 534 if (block_count) { 535 swap_pager_freeswapspace(object, first_block, 536 (unsigned) first_block + block_count - 1); 537 } 538 splx(s); 539 } 540 541 542 /* 543 * swap_pager_reclaim frees up over-allocated space from all pagers 544 * this eliminates internal fragmentation due to allocation of space 545 * for segments that are never swapped to. It has been written so that 546 * it does not block until the rlist_free operation occurs; it keeps 547 * the queues consistant. 548 */ 549 550 /* 551 * Maximum number of blocks (pages) to reclaim per pass 552 */ 553 #define MAXRECLAIM 128 554 555 static void 556 swap_pager_reclaim() 557 { 558 vm_object_t object; 559 int i, j, k; 560 int s; 561 int reclaimcount; 562 static struct { 563 int address; 564 vm_object_t object; 565 } reclaims[MAXRECLAIM]; 566 static int in_reclaim; 567 568 /* 569 * allow only one process to be in the swap_pager_reclaim subroutine 570 */ 571 s = splvm(); 572 if (in_reclaim) { 573 tsleep(&in_reclaim, PSWP, "swrclm", 0); 574 splx(s); 575 return; 576 } 577 in_reclaim = 1; 578 reclaimcount = 0; 579 580 /* for each pager queue */ 581 for (k = 0; swp_qs[k]; k++) { 582 583 object = TAILQ_FIRST(swp_qs[k]); 584 while (object && (reclaimcount < MAXRECLAIM)) { 585 586 /* 587 * see if any blocks associated with a pager has been 588 * allocated but not used (written) 589 */ 590 if ((object->flags & OBJ_DEAD) == 0 && 591 (object->paging_in_progress == 0)) { 592 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) { 593 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i]; 594 595 if (swb->swb_locked) 596 continue; 597 for (j = 0; j < SWB_NPAGES; j++) { 598 if (swb->swb_block[j] != SWB_EMPTY && 599 (swb->swb_valid & (1 << j)) == 0) { 600 reclaims[reclaimcount].address = swb->swb_block[j]; 601 reclaims[reclaimcount++].object = object; 602 swb->swb_block[j] = SWB_EMPTY; 603 if (reclaimcount >= MAXRECLAIM) 604 goto rfinished; 605 } 606 } 607 } 608 } 609 object = TAILQ_NEXT(object, pager_object_list); 610 } 611 } 612 613 rfinished: 614 615 /* 616 * free the blocks that have been added to the reclaim list 617 */ 618 for (i = 0; i < reclaimcount; i++) { 619 swap_pager_freeswapspace(reclaims[i].object, 620 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1); 621 } 622 splx(s); 623 in_reclaim = 0; 624 wakeup(&in_reclaim); 625 } 626 627 628 /* 629 * swap_pager_copy copies blocks from one pager to another and 630 * destroys the source pager 631 */ 632 633 void 634 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, 635 offset, destroysource) 636 vm_object_t srcobject; 637 vm_pindex_t srcoffset; 638 vm_object_t dstobject; 639 vm_pindex_t dstoffset; 640 vm_pindex_t offset; 641 int destroysource; 642 { 643 vm_pindex_t i; 644 int origsize; 645 int s; 646 647 if (vm_swap_size) 648 no_swap_space = 0; 649 650 origsize = srcobject->un_pager.swp.swp_allocsize; 651 652 /* 653 * remove the source object from the swap_pager internal queue 654 */ 655 if (destroysource) { 656 if (srcobject->handle == NULL) { 657 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list); 658 } else { 659 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list); 660 } 661 } 662 663 s = splvm(); 664 while (srcobject->un_pager.swp.swp_poip) { 665 tsleep(srcobject, PVM, "spgout", 0); 666 } 667 668 /* 669 * clean all of the pages that are currently active and finished 670 */ 671 if (swap_pager_free_pending) 672 swap_pager_sync(); 673 674 /* 675 * transfer source to destination 676 */ 677 for (i = 0; i < dstobject->size; i += 1) { 678 int srcvalid, dstvalid; 679 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, 680 i + offset + srcoffset, &srcvalid); 681 daddr_t *dstaddrp; 682 683 /* 684 * see if the source has space allocated 685 */ 686 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 687 /* 688 * if the source is valid and the dest has no space, 689 * then copy the allocation from the srouce to the 690 * dest. 691 */ 692 if (srcvalid) { 693 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset, 694 &dstvalid); 695 /* 696 * if the dest already has a valid block, 697 * deallocate the source block without 698 * copying. 699 */ 700 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 701 swap_pager_freeswapspace(dstobject, *dstaddrp, 702 *dstaddrp + btodb(PAGE_SIZE) - 1); 703 *dstaddrp = SWB_EMPTY; 704 } 705 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 706 *dstaddrp = *srcaddrp; 707 *srcaddrp = SWB_EMPTY; 708 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE); 709 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE); 710 swap_pager_setvalid(dstobject, i + dstoffset, 1); 711 } 712 } 713 /* 714 * if the source is not empty at this point, then 715 * deallocate the space. 716 */ 717 if (*srcaddrp != SWB_EMPTY) { 718 swap_pager_freeswapspace(srcobject, *srcaddrp, 719 *srcaddrp + btodb(PAGE_SIZE) - 1); 720 *srcaddrp = SWB_EMPTY; 721 } 722 } 723 } 724 splx(s); 725 726 /* 727 * Free left over swap blocks 728 */ 729 if (destroysource) { 730 swap_pager_free_swap(srcobject); 731 732 if (srcobject->un_pager.swp.swp_allocsize) { 733 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", 734 srcobject->un_pager.swp.swp_allocsize, origsize); 735 } 736 737 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA); 738 srcobject->un_pager.swp.swp_blocks = NULL; 739 } 740 return; 741 } 742 743 static void 744 swap_pager_dealloc(object) 745 vm_object_t object; 746 { 747 int s; 748 sw_blk_t swb; 749 750 /* 751 * Remove from list right away so lookups will fail if we block for 752 * pageout completion. 753 */ 754 if (object->handle == NULL) { 755 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list); 756 } else { 757 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list); 758 } 759 760 /* 761 * Wait for all pageouts to finish and remove all entries from 762 * cleaning list. 763 */ 764 765 s = splvm(); 766 while (object->un_pager.swp.swp_poip) { 767 tsleep(object, PVM, "swpout", 0); 768 } 769 splx(s); 770 771 if (swap_pager_free_pending) 772 swap_pager_sync(); 773 774 /* 775 * Free left over swap blocks 776 */ 777 swap_pager_free_swap(object); 778 779 if (object->un_pager.swp.swp_allocsize) { 780 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", 781 object->un_pager.swp.swp_allocsize); 782 } 783 swb = object->un_pager.swp.swp_blocks; 784 if (swb) { 785 /* 786 * Free swap management resources 787 */ 788 free(swb, M_VMPGDATA); 789 object->un_pager.swp.swp_blocks = NULL; 790 } 791 } 792 793 static __inline int 794 swap_pager_block_index(pindex) 795 vm_pindex_t pindex; 796 { 797 return (pindex / SWB_NPAGES); 798 } 799 800 static __inline int 801 swap_pager_block_offset(pindex) 802 vm_pindex_t pindex; 803 { 804 return (pindex % SWB_NPAGES); 805 } 806 807 /* 808 * swap_pager_haspage returns TRUE if the pager has data that has 809 * been written out. 810 */ 811 static boolean_t 812 swap_pager_haspage(object, pindex, before, after) 813 vm_object_t object; 814 vm_pindex_t pindex; 815 int *before; 816 int *after; 817 { 818 register sw_blk_t swb; 819 int ix; 820 821 if (before != NULL) 822 *before = 0; 823 if (after != NULL) 824 *after = 0; 825 ix = pindex / SWB_NPAGES; 826 if (ix >= object->un_pager.swp.swp_nblocks) { 827 return (FALSE); 828 } 829 swb = &object->un_pager.swp.swp_blocks[ix]; 830 ix = pindex % SWB_NPAGES; 831 832 if (swb->swb_block[ix] != SWB_EMPTY) { 833 834 if (swb->swb_valid & (1 << ix)) { 835 int tix; 836 if (before) { 837 for(tix = ix - 1; tix >= 0; --tix) { 838 if ((swb->swb_valid & (1 << tix)) == 0) 839 break; 840 if ((swb->swb_block[tix] + 841 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) != 842 swb->swb_block[ix]) 843 break; 844 (*before)++; 845 } 846 } 847 848 if (after) { 849 for(tix = ix + 1; tix < SWB_NPAGES; tix++) { 850 if ((swb->swb_valid & (1 << tix)) == 0) 851 break; 852 if ((swb->swb_block[tix] - 853 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) != 854 swb->swb_block[ix]) 855 break; 856 (*after)++; 857 } 858 } 859 860 return TRUE; 861 } 862 } 863 return (FALSE); 864 } 865 866 /* 867 * Wakeup based upon spc state 868 */ 869 static void 870 spc_wakeup(void) 871 { 872 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) { 873 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT; 874 wakeup(&swap_pager_needflags); 875 } else if ((swap_pager_needflags & SWAP_FREE_NEEDED) && 876 swap_pager_free_count >= ((2 * npendingio) / 3)) { 877 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 878 wakeup(&swap_pager_free); 879 } 880 } 881 882 /* 883 * Free an spc structure 884 */ 885 static void 886 spc_free(spc) 887 swp_clean_t spc; 888 { 889 spc->spc_flags = 0; 890 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 891 swap_pager_free_count++; 892 if (swap_pager_needflags) { 893 spc_wakeup(); 894 } 895 } 896 897 /* 898 * swap_pager_ridpages is a convienience routine that deallocates all 899 * but the required page. this is usually used in error returns that 900 * need to invalidate the "extra" readahead pages. 901 */ 902 static void 903 swap_pager_ridpages(m, count, reqpage) 904 vm_page_t *m; 905 int count; 906 int reqpage; 907 { 908 int i; 909 910 for (i = 0; i < count; i++) { 911 if (i != reqpage) { 912 vm_page_free(m[i]); 913 } 914 } 915 } 916 917 /* 918 * swap_pager_iodone1 is the completion routine for both reads and async writes 919 */ 920 static void 921 swap_pager_iodone1(bp) 922 struct buf *bp; 923 { 924 bp->b_flags |= B_DONE; 925 bp->b_flags &= ~B_ASYNC; 926 wakeup(bp); 927 } 928 929 static int 930 swap_pager_getpages(object, m, count, reqpage) 931 vm_object_t object; 932 vm_page_t *m; 933 int count, reqpage; 934 { 935 register struct buf *bp; 936 sw_blk_t swb[count]; 937 register int s; 938 int i; 939 boolean_t rv; 940 vm_offset_t kva, off[count]; 941 vm_pindex_t paging_offset; 942 int reqaddr[count]; 943 int sequential; 944 945 int first, last; 946 int failed; 947 int reqdskregion; 948 949 object = m[reqpage]->object; 950 paging_offset = OFF_TO_IDX(object->paging_offset); 951 sequential = (m[reqpage]->pindex == (object->last_read + 1)); 952 953 for (i = 0; i < count; i++) { 954 vm_pindex_t fidx = m[i]->pindex + paging_offset; 955 int ix = swap_pager_block_index(fidx); 956 957 if (ix >= object->un_pager.swp.swp_nblocks) { 958 int j; 959 960 if (i <= reqpage) { 961 swap_pager_ridpages(m, count, reqpage); 962 return (VM_PAGER_FAIL); 963 } 964 for (j = i; j < count; j++) { 965 vm_page_free(m[j]); 966 } 967 count = i; 968 break; 969 } 970 swb[i] = &object->un_pager.swp.swp_blocks[ix]; 971 off[i] = swap_pager_block_offset(fidx); 972 reqaddr[i] = swb[i]->swb_block[off[i]]; 973 } 974 975 /* make sure that our required input request is existant */ 976 977 if (reqaddr[reqpage] == SWB_EMPTY || 978 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 979 swap_pager_ridpages(m, count, reqpage); 980 return (VM_PAGER_FAIL); 981 } 982 reqdskregion = reqaddr[reqpage] / dmmax; 983 984 /* 985 * search backwards for the first contiguous page to transfer 986 */ 987 failed = 0; 988 first = 0; 989 for (i = reqpage - 1; i >= 0; --i) { 990 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) || 991 (swb[i]->swb_valid & (1 << off[i])) == 0 || 992 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 993 ((reqaddr[i] / dmmax) != reqdskregion)) { 994 failed = 1; 995 vm_page_free(m[i]); 996 if (first == 0) 997 first = i + 1; 998 } 999 } 1000 /* 1001 * search forwards for the last contiguous page to transfer 1002 */ 1003 failed = 0; 1004 last = count; 1005 for (i = reqpage + 1; i < count; i++) { 1006 if (failed || (reqaddr[i] == SWB_EMPTY) || 1007 (swb[i]->swb_valid & (1 << off[i])) == 0 || 1008 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 1009 ((reqaddr[i] / dmmax) != reqdskregion)) { 1010 failed = 1; 1011 vm_page_free(m[i]); 1012 if (last == count) 1013 last = i; 1014 } 1015 } 1016 1017 count = last; 1018 if (first != 0) { 1019 for (i = first; i < count; i++) { 1020 m[i - first] = m[i]; 1021 reqaddr[i - first] = reqaddr[i]; 1022 off[i - first] = off[i]; 1023 } 1024 count -= first; 1025 reqpage -= first; 1026 } 1027 ++swb[reqpage]->swb_locked; 1028 1029 /* 1030 * at this point: "m" is a pointer to the array of vm_page_t for 1031 * paging I/O "count" is the number of vm_page_t entries represented 1032 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 1033 * into "m" for the page actually faulted 1034 */ 1035 1036 /* 1037 * Get a swap buffer header to perform the IO 1038 */ 1039 bp = getpbuf(); 1040 kva = (vm_offset_t) bp->b_data; 1041 1042 /* 1043 * map our page(s) into kva for input 1044 */ 1045 pmap_qenter(kva, m, count); 1046 1047 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING; 1048 bp->b_iodone = swap_pager_iodone1; 1049 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1050 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1051 crhold(bp->b_rcred); 1052 crhold(bp->b_wcred); 1053 bp->b_data = (caddr_t) kva; 1054 bp->b_blkno = reqaddr[0]; 1055 bp->b_bcount = PAGE_SIZE * count; 1056 bp->b_bufsize = PAGE_SIZE * count; 1057 1058 pbgetvp(swapdev_vp, bp); 1059 1060 cnt.v_swapin++; 1061 cnt.v_swappgsin += count; 1062 /* 1063 * perform the I/O 1064 */ 1065 VOP_STRATEGY(bp->b_vp, bp); 1066 1067 /* 1068 * wait for the sync I/O to complete 1069 */ 1070 s = splvm(); 1071 while ((bp->b_flags & B_DONE) == 0) { 1072 if (tsleep(bp, PVM, "swread", hz*20)) { 1073 printf( 1074 "swap_pager: indefinite wait buffer: device: %#lx, blkno: %ld, size: %ld\n", 1075 (u_long)bp->b_dev, (long)bp->b_blkno, 1076 (long)bp->b_bcount); 1077 } 1078 } 1079 1080 if (bp->b_flags & B_ERROR) { 1081 printf( 1082 "swap_pager: I/O error - pagein failed; blkno %ld, size %ld, error %d\n", 1083 (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error); 1084 rv = VM_PAGER_ERROR; 1085 } else { 1086 rv = VM_PAGER_OK; 1087 } 1088 1089 splx(s); 1090 swb[reqpage]->swb_locked--; 1091 1092 /* 1093 * remove the mapping for kernel virtual 1094 */ 1095 pmap_qremove(kva, count); 1096 1097 /* 1098 * release the physical I/O buffer 1099 */ 1100 relpbuf(bp); 1101 /* 1102 * finish up input if everything is ok 1103 */ 1104 if (rv == VM_PAGER_OK) { 1105 for (i = 0; i < count; i++) { 1106 m[i]->dirty = 0; 1107 vm_page_flag_clear(m[i], PG_ZERO); 1108 if (i != reqpage) { 1109 /* 1110 * whether or not to leave the page 1111 * activated is up in the air, but we 1112 * should put the page on a page queue 1113 * somewhere. (it already is in the 1114 * object). After some emperical 1115 * results, it is best to deactivate 1116 * the readahead pages. 1117 */ 1118 vm_page_deactivate(m[i]); 1119 1120 /* 1121 * just in case someone was asking for 1122 * this page we now tell them that it 1123 * is ok to use 1124 */ 1125 m[i]->valid = VM_PAGE_BITS_ALL; 1126 vm_page_wakeup(m[i]); 1127 } 1128 } 1129 1130 m[reqpage]->object->last_read = m[count-1]->pindex; 1131 1132 /* 1133 * If we're out of swap space, then attempt to free 1134 * some whenever multiple pages are brought in. We 1135 * must set the dirty bits so that the page contents 1136 * will be preserved. 1137 */ 1138 if (SWAPLOW || 1139 (vm_swap_size < btodb((cnt.v_page_count - cnt.v_wire_count)) * PAGE_SIZE)) { 1140 for (i = 0; i < count; i++) { 1141 m[i]->dirty = VM_PAGE_BITS_ALL; 1142 } 1143 swap_pager_freespace(object, 1144 m[0]->pindex + paging_offset, count); 1145 } 1146 1147 } else { 1148 swap_pager_ridpages(m, count, reqpage); 1149 } 1150 return (rv); 1151 } 1152 1153 int 1154 swap_pager_putpages(object, m, count, sync, rtvals) 1155 vm_object_t object; 1156 vm_page_t *m; 1157 int count; 1158 boolean_t sync; 1159 int *rtvals; 1160 { 1161 register struct buf *bp; 1162 sw_blk_t swb[count]; 1163 register int s; 1164 int i, j, ix, firstidx, lastidx; 1165 boolean_t rv; 1166 vm_offset_t kva, off, fidx; 1167 swp_clean_t spc; 1168 vm_pindex_t paging_pindex; 1169 int reqaddr[count]; 1170 int failed; 1171 1172 if (vm_swap_size) 1173 no_swap_space = 0; 1174 1175 if (no_swap_space) { 1176 for (i = 0; i < count; i++) 1177 rtvals[i] = VM_PAGER_FAIL; 1178 return VM_PAGER_FAIL; 1179 } 1180 1181 if (curproc != pageproc) 1182 sync = TRUE; 1183 1184 object = m[0]->object; 1185 paging_pindex = OFF_TO_IDX(object->paging_offset); 1186 1187 failed = 0; 1188 for (j = 0; j < count; j++) { 1189 fidx = m[j]->pindex + paging_pindex; 1190 ix = swap_pager_block_index(fidx); 1191 swb[j] = 0; 1192 if (ix >= object->un_pager.swp.swp_nblocks) { 1193 rtvals[j] = VM_PAGER_FAIL; 1194 failed = 1; 1195 continue; 1196 } else { 1197 rtvals[j] = VM_PAGER_OK; 1198 } 1199 swb[j] = &object->un_pager.swp.swp_blocks[ix]; 1200 swb[j]->swb_locked++; 1201 if (failed) { 1202 rtvals[j] = VM_PAGER_FAIL; 1203 continue; 1204 } 1205 off = swap_pager_block_offset(fidx); 1206 reqaddr[j] = swb[j]->swb_block[off]; 1207 if (reqaddr[j] == SWB_EMPTY) { 1208 daddr_t blk; 1209 int tries; 1210 int ntoget; 1211 1212 tries = 0; 1213 s = splvm(); 1214 1215 /* 1216 * if any other pages have been allocated in this 1217 * block, we only try to get one page. 1218 */ 1219 for (i = 0; i < SWB_NPAGES; i++) { 1220 if (swb[j]->swb_block[i] != SWB_EMPTY) 1221 break; 1222 } 1223 1224 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1225 /* 1226 * this code is alittle conservative, but works (the 1227 * intent of this code is to allocate small chunks for 1228 * small objects) 1229 */ 1230 if ((off == 0) && ((fidx + ntoget) > object->size)) { 1231 ntoget = object->size - fidx; 1232 } 1233 retrygetspace: 1234 if (!swap_pager_full && ntoget > 1 && 1235 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE), 1236 &blk)) { 1237 1238 for (i = 0; i < ntoget; i++) { 1239 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1240 swb[j]->swb_valid = 0; 1241 } 1242 1243 reqaddr[j] = swb[j]->swb_block[off]; 1244 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE), 1245 &swb[j]->swb_block[off])) { 1246 /* 1247 * if the allocation has failed, we try to 1248 * reclaim space and retry. 1249 */ 1250 if (++tries == 1) { 1251 swap_pager_reclaim(); 1252 goto retrygetspace; 1253 } 1254 rtvals[j] = VM_PAGER_AGAIN; 1255 failed = 1; 1256 swap_pager_full = 1; 1257 } else { 1258 reqaddr[j] = swb[j]->swb_block[off]; 1259 swb[j]->swb_valid &= ~(1 << off); 1260 } 1261 splx(s); 1262 } 1263 } 1264 1265 /* 1266 * search forwards for the last contiguous page to transfer 1267 */ 1268 failed = 0; 1269 for (i = 0; i < count; i++) { 1270 if (failed || 1271 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1272 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) || 1273 (rtvals[i] != VM_PAGER_OK)) { 1274 failed = 1; 1275 if (rtvals[i] == VM_PAGER_OK) 1276 rtvals[i] = VM_PAGER_AGAIN; 1277 } 1278 } 1279 1280 ix = 0; 1281 firstidx = -1; 1282 for (i = 0; i < count; i++) { 1283 if (rtvals[i] == VM_PAGER_OK) { 1284 ix++; 1285 if (firstidx == -1) { 1286 firstidx = i; 1287 } 1288 } else if (firstidx >= 0) { 1289 break; 1290 } 1291 } 1292 1293 if (firstidx == -1) { 1294 for (i = 0; i < count; i++) { 1295 if (rtvals[i] == VM_PAGER_OK) 1296 rtvals[i] = VM_PAGER_AGAIN; 1297 } 1298 return VM_PAGER_AGAIN; 1299 } 1300 1301 lastidx = firstidx + ix; 1302 1303 if (ix > max_pageout_cluster) { 1304 for (i = firstidx + max_pageout_cluster; i < lastidx; i++) { 1305 if (rtvals[i] == VM_PAGER_OK) 1306 rtvals[i] = VM_PAGER_AGAIN; 1307 } 1308 ix = max_pageout_cluster; 1309 lastidx = firstidx + ix; 1310 } 1311 1312 for (i = 0; i < firstidx; i++) { 1313 if (swb[i]) 1314 swb[i]->swb_locked--; 1315 } 1316 1317 for (i = lastidx; i < count; i++) { 1318 if (swb[i]) 1319 swb[i]->swb_locked--; 1320 } 1321 1322 #if defined(DIAGNOSTIC) 1323 for (i = firstidx; i < lastidx; i++) { 1324 if (reqaddr[i] == SWB_EMPTY) { 1325 printf("I/O to empty block???? -- pindex: %d, i: %d\n", 1326 m[i]->pindex, i); 1327 } 1328 } 1329 #endif 1330 1331 /* 1332 * Clean up all completed async pageouts. 1333 */ 1334 if (swap_pager_free_pending) 1335 swap_pager_sync(); 1336 1337 /* 1338 * get a swap pager clean data structure, block until we get it 1339 */ 1340 if (curproc == pageproc) { 1341 if (swap_pager_free_count == 0) { 1342 s = splvm(); 1343 while (swap_pager_free_count == 0) { 1344 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT; 1345 /* 1346 * if it does not get one within a short time, then 1347 * there is a potential deadlock, so we go-on trying 1348 * to free pages. It is important to block here as opposed 1349 * to returning, thereby allowing the pageout daemon to continue. 1350 * It is likely that pageout daemon will start suboptimally 1351 * reclaiming vnode backed pages if we don't block. Since the 1352 * I/O subsystem is probably already fully utilized, might as 1353 * well wait. 1354 */ 1355 if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) { 1356 if (swap_pager_free_pending) 1357 swap_pager_sync(); 1358 if (swap_pager_free_count == 0) { 1359 for (i = firstidx; i < lastidx; i++) { 1360 rtvals[i] = VM_PAGER_AGAIN; 1361 } 1362 splx(s); 1363 return VM_PAGER_AGAIN; 1364 } 1365 } else { 1366 swap_pager_sync(); 1367 } 1368 } 1369 splx(s); 1370 } 1371 1372 spc = TAILQ_FIRST(&swap_pager_free); 1373 #if defined(DIAGNOSTIC) 1374 if (spc == NULL) 1375 panic("swap_pager_putpages: free queue is empty, %d expected\n", 1376 swap_pager_free_count); 1377 #endif 1378 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1379 swap_pager_free_count--; 1380 1381 kva = spc->spc_kva; 1382 bp = spc->spc_bp; 1383 bzero(bp, sizeof *bp); 1384 bp->b_spc = spc; 1385 bp->b_xflags = 0; 1386 bp->b_data = (caddr_t) kva; 1387 } else { 1388 spc = NULL; 1389 bp = getpbuf(); 1390 kva = (vm_offset_t) bp->b_data; 1391 bp->b_spc = NULL; 1392 } 1393 1394 /* 1395 * map our page(s) into kva for I/O 1396 */ 1397 pmap_qenter(kva, &m[firstidx], ix); 1398 1399 /* 1400 * get the base I/O offset into the swap file 1401 */ 1402 for (i = firstidx; i < lastidx ; i++) { 1403 fidx = m[i]->pindex + paging_pindex; 1404 off = swap_pager_block_offset(fidx); 1405 /* 1406 * set the valid bit 1407 */ 1408 swb[i]->swb_valid |= (1 << off); 1409 /* 1410 * and unlock the data structure 1411 */ 1412 swb[i]->swb_locked--; 1413 } 1414 1415 bp->b_flags = B_BUSY | B_PAGING; 1416 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1417 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1418 if (bp->b_rcred != NOCRED) 1419 crhold(bp->b_rcred); 1420 if (bp->b_wcred != NOCRED) 1421 crhold(bp->b_wcred); 1422 bp->b_blkno = reqaddr[firstidx]; 1423 pbgetvp(swapdev_vp, bp); 1424 1425 bp->b_bcount = PAGE_SIZE * ix; 1426 bp->b_bufsize = PAGE_SIZE * ix; 1427 1428 s = splvm(); 1429 swapdev_vp->v_numoutput++; 1430 1431 /* 1432 * If this is an async write we set up additional buffer fields and 1433 * place a "cleaning" entry on the inuse queue. 1434 */ 1435 object->un_pager.swp.swp_poip++; 1436 1437 if (spc) { 1438 spc->spc_flags = 0; 1439 spc->spc_object = object; 1440 bp->b_npages = ix; 1441 for (i = firstidx; i < lastidx; i++) { 1442 spc->spc_m[i] = m[i]; 1443 bp->b_pages[i - firstidx] = m[i]; 1444 vm_page_protect(m[i], VM_PROT_READ); 1445 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1446 m[i]->dirty = 0; 1447 } 1448 spc->spc_first = firstidx; 1449 spc->spc_count = ix; 1450 /* 1451 * the completion routine for async writes 1452 */ 1453 bp->b_flags |= B_CALL; 1454 bp->b_iodone = swap_pager_iodone; 1455 bp->b_dirtyoff = 0; 1456 bp->b_dirtyend = bp->b_bcount; 1457 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1458 } else { 1459 bp->b_flags |= B_CALL; 1460 bp->b_iodone = swap_pager_iodone1; 1461 bp->b_npages = ix; 1462 for (i = firstidx; i < lastidx; i++) 1463 bp->b_pages[i - firstidx] = m[i]; 1464 } 1465 1466 cnt.v_swapout++; 1467 cnt.v_swappgsout += ix; 1468 1469 /* 1470 * perform the I/O 1471 */ 1472 VOP_STRATEGY(bp->b_vp, bp); 1473 if (sync == FALSE) { 1474 if (swap_pager_free_pending) { 1475 swap_pager_sync(); 1476 } 1477 for (i = firstidx; i < lastidx; i++) { 1478 rtvals[i] = VM_PAGER_PEND; 1479 } 1480 splx(s); 1481 return VM_PAGER_PEND; 1482 } 1483 1484 /* 1485 * wait for the sync I/O to complete 1486 */ 1487 while ((bp->b_flags & B_DONE) == 0) { 1488 tsleep(bp, PVM, "swwrt", 0); 1489 } 1490 1491 if (bp->b_flags & B_ERROR) { 1492 printf( 1493 "swap_pager: I/O error - pageout failed; blkno %ld, size %ld, error %d\n", 1494 (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error); 1495 rv = VM_PAGER_ERROR; 1496 } else { 1497 rv = VM_PAGER_OK; 1498 } 1499 1500 object->un_pager.swp.swp_poip--; 1501 if (object->un_pager.swp.swp_poip == 0) 1502 wakeup(object); 1503 1504 if (bp->b_vp) 1505 pbrelvp(bp); 1506 1507 splx(s); 1508 1509 /* 1510 * remove the mapping for kernel virtual 1511 */ 1512 pmap_qremove(kva, ix); 1513 1514 /* 1515 * if we have written the page, then indicate that the page is clean. 1516 */ 1517 if (rv == VM_PAGER_OK) { 1518 for (i = firstidx; i < lastidx; i++) { 1519 if (rtvals[i] == VM_PAGER_OK) { 1520 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1521 m[i]->dirty = 0; 1522 /* 1523 * optimization, if a page has been read 1524 * during the pageout process, we activate it. 1525 */ 1526 if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || 1527 pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) { 1528 vm_page_activate(m[i]); 1529 } 1530 } 1531 } 1532 } else { 1533 for (i = firstidx; i < lastidx; i++) { 1534 rtvals[i] = rv; 1535 } 1536 } 1537 1538 if (bp->b_rcred != NOCRED) 1539 crfree(bp->b_rcred); 1540 if (bp->b_wcred != NOCRED) 1541 crfree(bp->b_wcred); 1542 1543 spc_free(spc); 1544 if (swap_pager_free_pending) 1545 swap_pager_sync(); 1546 1547 return (rv); 1548 } 1549 1550 void 1551 swap_pager_sync() 1552 { 1553 swp_clean_t spc; 1554 1555 while (spc = TAILQ_FIRST(&swap_pager_done)) { 1556 swap_pager_finish(spc); 1557 } 1558 return; 1559 } 1560 1561 static void 1562 swap_pager_finish(spc) 1563 register swp_clean_t spc; 1564 { 1565 int i, s, lastidx; 1566 vm_object_t object; 1567 vm_page_t *ma; 1568 1569 ma = spc->spc_m; 1570 object = spc->spc_object; 1571 lastidx = spc->spc_first + spc->spc_count; 1572 1573 s = splvm(); 1574 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1575 splx(s); 1576 1577 pmap_qremove(spc->spc_kva, spc->spc_count); 1578 1579 /* 1580 * If no error, mark as clean and inform the pmap system. If error, 1581 * mark as dirty so we will try again. (XXX could get stuck doing 1582 * this, should give up after awhile) 1583 */ 1584 if (spc->spc_flags & SPC_ERROR) { 1585 1586 for (i = spc->spc_first; i < lastidx; i++) { 1587 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1588 (u_long) VM_PAGE_TO_PHYS(ma[i])); 1589 ma[i]->dirty = VM_PAGE_BITS_ALL; 1590 vm_page_io_finish(ma[i]); 1591 } 1592 1593 vm_object_pip_subtract(object, spc->spc_count); 1594 if ((object->paging_in_progress == 0) && 1595 (object->flags & OBJ_PIPWNT)) { 1596 vm_object_clear_flag(object, OBJ_PIPWNT); 1597 wakeup(object); 1598 } 1599 1600 } else { 1601 for (i = spc->spc_first; i < lastidx; i++) { 1602 if ((ma[i]->queue != PQ_ACTIVE) && 1603 ((ma[i]->flags & PG_WANTED) || 1604 pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i])))) { 1605 vm_page_activate(ma[i]); 1606 } 1607 } 1608 } 1609 1610 nswiodone -= spc->spc_count; 1611 swap_pager_free_pending--; 1612 spc_free(spc); 1613 1614 return; 1615 } 1616 1617 /* 1618 * swap_pager_iodone 1619 */ 1620 static void 1621 swap_pager_iodone(bp) 1622 register struct buf *bp; 1623 { 1624 int i, s, lastidx; 1625 register swp_clean_t spc; 1626 vm_object_t object; 1627 vm_page_t *ma; 1628 1629 1630 s = splvm(); 1631 spc = (swp_clean_t) bp->b_spc; 1632 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1633 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1634 1635 object = spc->spc_object; 1636 1637 #if defined(DIAGNOSTIC) 1638 if (object->paging_in_progress < spc->spc_count) 1639 printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n", 1640 object->paging_in_progress, spc->spc_count); 1641 #endif 1642 1643 if (bp->b_flags & B_ERROR) { 1644 spc->spc_flags |= SPC_ERROR; 1645 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n", 1646 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1647 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error); 1648 } else { 1649 vm_object_pip_subtract(object, spc->spc_count); 1650 if ((object->paging_in_progress == 0) && 1651 (object->flags & OBJ_PIPWNT)) { 1652 vm_object_clear_flag(object, OBJ_PIPWNT); 1653 wakeup(object); 1654 } 1655 ma = spc->spc_m; 1656 lastidx = spc->spc_first + spc->spc_count; 1657 for (i = spc->spc_first; i < lastidx; i++) { 1658 /* 1659 * we wakeup any processes that are waiting on these pages. 1660 */ 1661 vm_page_io_finish(ma[i]); 1662 } 1663 } 1664 1665 if (bp->b_vp) 1666 pbrelvp(bp); 1667 1668 if (bp->b_rcred != NOCRED) 1669 crfree(bp->b_rcred); 1670 if (bp->b_wcred != NOCRED) 1671 crfree(bp->b_wcred); 1672 1673 nswiodone += spc->spc_count; 1674 swap_pager_free_pending++; 1675 if (--spc->spc_object->un_pager.swp.swp_poip == 0) { 1676 wakeup(spc->spc_object); 1677 } 1678 1679 if (swap_pager_needflags && 1680 ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) { 1681 spc_wakeup(); 1682 } 1683 1684 if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) && 1685 vm_pageout_pages_needed) { 1686 wakeup(&vm_pageout_pages_needed); 1687 vm_pageout_pages_needed = 0; 1688 } 1689 1690 splx(s); 1691 } 1692