1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.70 1996/07/30 03:08:05 dyson Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/proc.h> 56 #include <sys/buf.h> 57 #include <sys/vnode.h> 58 #include <sys/malloc.h> 59 #include <sys/vmmeter.h> 60 61 #include <miscfs/specfs/specdev.h> 62 #include <sys/rlist.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_prot.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_pager.h> 70 #include <vm/vm_pageout.h> 71 #include <vm/swap_pager.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 75 #ifndef NPENDINGIO 76 #define NPENDINGIO 10 77 #endif 78 79 static int nswiodone; 80 int swap_pager_full; 81 extern int vm_swap_size; 82 static int no_swap_space = 1; 83 struct rlisthdr swaplist; 84 85 #define MAX_PAGEOUT_CLUSTER 16 86 87 TAILQ_HEAD(swpclean, swpagerclean); 88 89 typedef struct swpagerclean *swp_clean_t; 90 91 static struct swpagerclean { 92 TAILQ_ENTRY(swpagerclean) spc_list; 93 int spc_flags; 94 struct buf *spc_bp; 95 vm_object_t spc_object; 96 vm_offset_t spc_kva; 97 int spc_count; 98 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 99 } swcleanlist[NPENDINGIO]; 100 101 102 /* spc_flags values */ 103 #define SPC_ERROR 0x01 104 105 #define SWB_EMPTY (-1) 106 107 /* list of completed page cleans */ 108 static struct swpclean swap_pager_done; 109 110 /* list of pending page cleans */ 111 static struct swpclean swap_pager_inuse; 112 113 /* list of free pager clean structs */ 114 static struct swpclean swap_pager_free; 115 int swap_pager_free_count; 116 117 /* list of "named" anon region objects */ 118 static struct pagerlst swap_pager_object_list; 119 120 /* list of "unnamed" anon region objects */ 121 struct pagerlst swap_pager_un_object_list; 122 123 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 124 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2 125 static int swap_pager_needflags; 126 127 static struct pagerlst *swp_qs[] = { 128 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0 129 }; 130 131 /* 132 * pagerops for OBJT_SWAP - "swap pager". 133 */ 134 static vm_object_t 135 swap_pager_alloc __P((void *handle, vm_size_t size, 136 vm_prot_t prot, vm_ooffset_t offset)); 137 static void swap_pager_dealloc __P((vm_object_t object)); 138 static boolean_t 139 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex, 140 int *before, int *after)); 141 static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); 142 static void swap_pager_init __P((void)); 143 static void swap_pager_sync __P((void)); 144 145 struct pagerops swappagerops = { 146 swap_pager_init, 147 swap_pager_alloc, 148 swap_pager_dealloc, 149 swap_pager_getpages, 150 swap_pager_putpages, 151 swap_pager_haspage, 152 swap_pager_sync 153 }; 154 155 static int npendingio = NPENDINGIO; 156 static int dmmin; 157 int dmmax; 158 159 static __pure int 160 swap_pager_block_index __P((vm_pindex_t pindex)) __pure2; 161 static __pure int 162 swap_pager_block_offset __P((vm_pindex_t pindex)) __pure2; 163 static daddr_t *swap_pager_diskaddr __P((vm_object_t object, 164 vm_pindex_t pindex, int *valid)); 165 static void swap_pager_finish __P((swp_clean_t spc)); 166 static void swap_pager_freepage __P((vm_page_t m)); 167 static void swap_pager_free_swap __P((vm_object_t object)); 168 static void swap_pager_freeswapspace __P((vm_object_t object, 169 unsigned int from, 170 unsigned int to)); 171 static int swap_pager_getswapspace __P((vm_object_t object, 172 unsigned int amount, 173 daddr_t *rtval)); 174 static void swap_pager_iodone __P((struct buf *)); 175 static void swap_pager_iodone1 __P((struct buf *bp)); 176 static void swap_pager_reclaim __P((void)); 177 static void swap_pager_ridpages __P((vm_page_t *m, int count, 178 int reqpage)); 179 static void swap_pager_setvalid __P((vm_object_t object, 180 vm_offset_t offset, int valid)); 181 static void swapsizecheck __P((void)); 182 183 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE))) 184 185 static inline void 186 swapsizecheck() 187 { 188 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 189 if (swap_pager_full == 0) 190 printf("swap_pager: out of swap space\n"); 191 swap_pager_full = 1; 192 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 193 swap_pager_full = 0; 194 } 195 196 static void 197 swap_pager_init() 198 { 199 TAILQ_INIT(&swap_pager_object_list); 200 TAILQ_INIT(&swap_pager_un_object_list); 201 202 /* 203 * Initialize clean lists 204 */ 205 TAILQ_INIT(&swap_pager_inuse); 206 TAILQ_INIT(&swap_pager_done); 207 TAILQ_INIT(&swap_pager_free); 208 swap_pager_free_count = 0; 209 210 /* 211 * Calculate the swap allocation constants. 212 */ 213 dmmin = PAGE_SIZE / DEV_BSIZE; 214 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 215 } 216 217 void 218 swap_pager_swap_init() 219 { 220 swp_clean_t spc; 221 struct buf *bp; 222 int i; 223 224 /* 225 * kva's are allocated here so that we dont need to keep doing 226 * kmem_alloc pageables at runtime 227 */ 228 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 229 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER); 230 if (!spc->spc_kva) { 231 break; 232 } 233 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL); 234 if (!spc->spc_bp) { 235 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 236 break; 237 } 238 spc->spc_flags = 0; 239 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 240 swap_pager_free_count++; 241 } 242 } 243 244 int 245 swap_pager_swp_alloc(object, wait) 246 vm_object_t object; 247 int wait; 248 { 249 sw_blk_t swb; 250 int nblocks; 251 int i, j; 252 253 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES; 254 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait); 255 if (swb == NULL) 256 return 1; 257 258 for (i = 0; i < nblocks; i++) { 259 swb[i].swb_valid = 0; 260 swb[i].swb_locked = 0; 261 for (j = 0; j < SWB_NPAGES; j++) 262 swb[i].swb_block[j] = SWB_EMPTY; 263 } 264 265 object->un_pager.swp.swp_nblocks = nblocks; 266 object->un_pager.swp.swp_allocsize = 0; 267 object->un_pager.swp.swp_blocks = swb; 268 object->un_pager.swp.swp_poip = 0; 269 270 if (object->handle != NULL) { 271 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list); 272 } else { 273 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 274 } 275 276 return 0; 277 } 278 279 /* 280 * Allocate an object and associated resources. 281 * Note that if we are called from the pageout daemon (handle == NULL) 282 * we should not wait for memory as it could resulting in deadlock. 283 */ 284 static vm_object_t 285 swap_pager_alloc(handle, size, prot, offset) 286 void *handle; 287 register vm_size_t size; 288 vm_prot_t prot; 289 vm_ooffset_t offset; 290 { 291 vm_object_t object; 292 293 /* 294 * If this is a "named" anonymous region, look it up and use the 295 * object if it exists, otherwise allocate a new one. 296 */ 297 if (handle) { 298 object = vm_pager_object_lookup(&swap_pager_object_list, handle); 299 if (object != NULL) { 300 vm_object_reference(object); 301 } else { 302 /* 303 * XXX - there is a race condition here. Two processes 304 * can request the same named object simultaneuously, 305 * and if one blocks for memory, the result is a disaster. 306 * Probably quite rare, but is yet another reason to just 307 * rip support of "named anonymous regions" out altogether. 308 */ 309 object = vm_object_allocate(OBJT_SWAP, 310 OFF_TO_IDX(offset + PAGE_MASK) + size); 311 object->handle = handle; 312 (void) swap_pager_swp_alloc(object, M_WAITOK); 313 } 314 } else { 315 object = vm_object_allocate(OBJT_SWAP, 316 OFF_TO_IDX(offset + PAGE_MASK) + size); 317 (void) swap_pager_swp_alloc(object, M_WAITOK); 318 } 319 320 return (object); 321 } 322 323 /* 324 * returns disk block associated with pager and offset 325 * additionally, as a side effect returns a flag indicating 326 * if the block has been written 327 */ 328 329 inline static daddr_t * 330 swap_pager_diskaddr(object, pindex, valid) 331 vm_object_t object; 332 vm_pindex_t pindex; 333 int *valid; 334 { 335 register sw_blk_t swb; 336 int ix; 337 338 if (valid) 339 *valid = 0; 340 ix = pindex / SWB_NPAGES; 341 if ((ix >= object->un_pager.swp.swp_nblocks) || 342 (pindex >= object->size)) { 343 return (FALSE); 344 } 345 swb = &object->un_pager.swp.swp_blocks[ix]; 346 ix = pindex % SWB_NPAGES; 347 if (valid) 348 *valid = swb->swb_valid & (1 << ix); 349 return &swb->swb_block[ix]; 350 } 351 352 /* 353 * Utility routine to set the valid (written) bit for 354 * a block associated with a pager and offset 355 */ 356 static void 357 swap_pager_setvalid(object, offset, valid) 358 vm_object_t object; 359 vm_offset_t offset; 360 int valid; 361 { 362 register sw_blk_t swb; 363 int ix; 364 365 ix = offset / SWB_NPAGES; 366 if (ix >= object->un_pager.swp.swp_nblocks) 367 return; 368 369 swb = &object->un_pager.swp.swp_blocks[ix]; 370 ix = offset % SWB_NPAGES; 371 if (valid) 372 swb->swb_valid |= (1 << ix); 373 else 374 swb->swb_valid &= ~(1 << ix); 375 return; 376 } 377 378 /* 379 * this routine allocates swap space with a fragmentation 380 * minimization policy. 381 */ 382 static int 383 swap_pager_getswapspace(object, amount, rtval) 384 vm_object_t object; 385 unsigned int amount; 386 daddr_t *rtval; 387 { 388 unsigned location; 389 vm_swap_size -= amount; 390 if (!rlist_alloc(&swaplist, amount, &location)) { 391 vm_swap_size += amount; 392 return 0; 393 } else { 394 swapsizecheck(); 395 object->un_pager.swp.swp_allocsize += amount; 396 *rtval = location; 397 return 1; 398 } 399 } 400 401 /* 402 * this routine frees swap space with a fragmentation 403 * minimization policy. 404 */ 405 static void 406 swap_pager_freeswapspace(object, from, to) 407 vm_object_t object; 408 unsigned int from; 409 unsigned int to; 410 { 411 rlist_free(&swaplist, from, to); 412 vm_swap_size += (to - from) + 1; 413 object->un_pager.swp.swp_allocsize -= (to - from) + 1; 414 swapsizecheck(); 415 } 416 /* 417 * this routine frees swap blocks from a specified pager 418 */ 419 void 420 swap_pager_freespace(object, start, size) 421 vm_object_t object; 422 vm_pindex_t start; 423 vm_size_t size; 424 { 425 vm_pindex_t i; 426 int s; 427 428 s = splbio(); 429 for (i = start; i < start + size; i += 1) { 430 int valid; 431 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 432 433 if (addr && *addr != SWB_EMPTY) { 434 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1); 435 if (valid) { 436 swap_pager_setvalid(object, i, 0); 437 } 438 *addr = SWB_EMPTY; 439 } 440 } 441 splx(s); 442 } 443 444 /* 445 * same as freespace, but don't free, just force a DMZ next time 446 */ 447 void 448 swap_pager_dmzspace(object, start, size) 449 vm_object_t object; 450 vm_pindex_t start; 451 vm_size_t size; 452 { 453 vm_pindex_t i; 454 int s; 455 456 s = splbio(); 457 for (i = start; i < start + size; i += 1) { 458 int valid; 459 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 460 461 if (addr && *addr != SWB_EMPTY) { 462 if (valid) { 463 swap_pager_setvalid(object, i, 0); 464 } 465 } 466 } 467 splx(s); 468 } 469 470 static void 471 swap_pager_free_swap(object) 472 vm_object_t object; 473 { 474 register int i, j; 475 register sw_blk_t swb; 476 int first_block=0, block_count=0; 477 int s; 478 /* 479 * Free left over swap blocks 480 */ 481 s = splbio(); 482 for (i = 0, swb = object->un_pager.swp.swp_blocks; 483 i < object->un_pager.swp.swp_nblocks; i++, swb++) { 484 for (j = 0; j < SWB_NPAGES; j++) { 485 if (swb->swb_block[j] != SWB_EMPTY) { 486 /* 487 * initially the length of the run is zero 488 */ 489 if (block_count == 0) { 490 first_block = swb->swb_block[j]; 491 block_count = btodb(PAGE_SIZE); 492 swb->swb_block[j] = SWB_EMPTY; 493 /* 494 * if the new block can be included into the current run 495 */ 496 } else if (swb->swb_block[j] == first_block + block_count) { 497 block_count += btodb(PAGE_SIZE); 498 swb->swb_block[j] = SWB_EMPTY; 499 /* 500 * terminate the previous run, and start a new one 501 */ 502 } else { 503 swap_pager_freeswapspace(object, first_block, 504 (unsigned) first_block + block_count - 1); 505 first_block = swb->swb_block[j]; 506 block_count = btodb(PAGE_SIZE); 507 swb->swb_block[j] = SWB_EMPTY; 508 } 509 } 510 } 511 } 512 513 if (block_count) { 514 swap_pager_freeswapspace(object, first_block, 515 (unsigned) first_block + block_count - 1); 516 } 517 splx(s); 518 } 519 520 521 /* 522 * swap_pager_reclaim frees up over-allocated space from all pagers 523 * this eliminates internal fragmentation due to allocation of space 524 * for segments that are never swapped to. It has been written so that 525 * it does not block until the rlist_free operation occurs; it keeps 526 * the queues consistant. 527 */ 528 529 /* 530 * Maximum number of blocks (pages) to reclaim per pass 531 */ 532 #define MAXRECLAIM 128 533 534 static void 535 swap_pager_reclaim() 536 { 537 vm_object_t object; 538 int i, j, k; 539 int s; 540 int reclaimcount; 541 static struct { 542 int address; 543 vm_object_t object; 544 } reclaims[MAXRECLAIM]; 545 static int in_reclaim; 546 547 /* 548 * allow only one process to be in the swap_pager_reclaim subroutine 549 */ 550 s = splbio(); 551 if (in_reclaim) { 552 tsleep(&in_reclaim, PSWP, "swrclm", 0); 553 splx(s); 554 return; 555 } 556 in_reclaim = 1; 557 reclaimcount = 0; 558 559 /* for each pager queue */ 560 for (k = 0; swp_qs[k]; k++) { 561 562 object = TAILQ_FIRST(swp_qs[k]); 563 while (object && (reclaimcount < MAXRECLAIM)) { 564 565 /* 566 * see if any blocks associated with a pager has been 567 * allocated but not used (written) 568 */ 569 if ((object->flags & OBJ_DEAD) == 0 && 570 (object->paging_in_progress == 0)) { 571 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) { 572 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i]; 573 574 if (swb->swb_locked) 575 continue; 576 for (j = 0; j < SWB_NPAGES; j++) { 577 if (swb->swb_block[j] != SWB_EMPTY && 578 (swb->swb_valid & (1 << j)) == 0) { 579 reclaims[reclaimcount].address = swb->swb_block[j]; 580 reclaims[reclaimcount++].object = object; 581 swb->swb_block[j] = SWB_EMPTY; 582 if (reclaimcount >= MAXRECLAIM) 583 goto rfinished; 584 } 585 } 586 } 587 } 588 object = TAILQ_NEXT(object, pager_object_list); 589 } 590 } 591 592 rfinished: 593 594 /* 595 * free the blocks that have been added to the reclaim list 596 */ 597 for (i = 0; i < reclaimcount; i++) { 598 swap_pager_freeswapspace(reclaims[i].object, 599 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1); 600 } 601 splx(s); 602 in_reclaim = 0; 603 wakeup(&in_reclaim); 604 } 605 606 607 /* 608 * swap_pager_copy copies blocks from one pager to another and 609 * destroys the source pager 610 */ 611 612 void 613 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset) 614 vm_object_t srcobject; 615 vm_pindex_t srcoffset; 616 vm_object_t dstobject; 617 vm_pindex_t dstoffset; 618 vm_pindex_t offset; 619 { 620 vm_pindex_t i; 621 int origsize; 622 int s; 623 624 if (vm_swap_size) 625 no_swap_space = 0; 626 627 origsize = srcobject->un_pager.swp.swp_allocsize; 628 629 /* 630 * remove the source object from the swap_pager internal queue 631 */ 632 if (srcobject->handle == NULL) { 633 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list); 634 } else { 635 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list); 636 } 637 638 s = splbio(); 639 while (srcobject->un_pager.swp.swp_poip) { 640 tsleep(srcobject, PVM, "spgout", 0); 641 } 642 splx(s); 643 644 /* 645 * clean all of the pages that are currently active and finished 646 */ 647 swap_pager_sync(); 648 649 s = splbio(); 650 /* 651 * transfer source to destination 652 */ 653 for (i = 0; i < dstobject->size; i += 1) { 654 int srcvalid, dstvalid; 655 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset, 656 &srcvalid); 657 daddr_t *dstaddrp; 658 659 /* 660 * see if the source has space allocated 661 */ 662 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 663 /* 664 * if the source is valid and the dest has no space, 665 * then copy the allocation from the srouce to the 666 * dest. 667 */ 668 if (srcvalid) { 669 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset, 670 &dstvalid); 671 /* 672 * if the dest already has a valid block, 673 * deallocate the source block without 674 * copying. 675 */ 676 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 677 swap_pager_freeswapspace(dstobject, *dstaddrp, 678 *dstaddrp + btodb(PAGE_SIZE) - 1); 679 *dstaddrp = SWB_EMPTY; 680 } 681 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 682 *dstaddrp = *srcaddrp; 683 *srcaddrp = SWB_EMPTY; 684 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE); 685 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE); 686 swap_pager_setvalid(dstobject, i + dstoffset, 1); 687 } 688 } 689 /* 690 * if the source is not empty at this point, then 691 * deallocate the space. 692 */ 693 if (*srcaddrp != SWB_EMPTY) { 694 swap_pager_freeswapspace(srcobject, *srcaddrp, 695 *srcaddrp + btodb(PAGE_SIZE) - 1); 696 *srcaddrp = SWB_EMPTY; 697 } 698 } 699 } 700 splx(s); 701 702 /* 703 * Free left over swap blocks 704 */ 705 swap_pager_free_swap(srcobject); 706 707 if (srcobject->un_pager.swp.swp_allocsize) { 708 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", 709 srcobject->un_pager.swp.swp_allocsize, origsize); 710 } 711 712 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA); 713 srcobject->un_pager.swp.swp_blocks = NULL; 714 715 return; 716 } 717 718 static void 719 swap_pager_dealloc(object) 720 vm_object_t object; 721 { 722 int s; 723 724 /* 725 * Remove from list right away so lookups will fail if we block for 726 * pageout completion. 727 */ 728 if (object->handle == NULL) { 729 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list); 730 } else { 731 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list); 732 } 733 734 /* 735 * Wait for all pageouts to finish and remove all entries from 736 * cleaning list. 737 */ 738 739 s = splbio(); 740 while (object->un_pager.swp.swp_poip) { 741 tsleep(object, PVM, "swpout", 0); 742 } 743 splx(s); 744 745 746 swap_pager_sync(); 747 748 /* 749 * Free left over swap blocks 750 */ 751 swap_pager_free_swap(object); 752 753 if (object->un_pager.swp.swp_allocsize) { 754 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", 755 object->un_pager.swp.swp_allocsize); 756 } 757 /* 758 * Free swap management resources 759 */ 760 free(object->un_pager.swp.swp_blocks, M_VMPGDATA); 761 object->un_pager.swp.swp_blocks = NULL; 762 } 763 764 static inline __pure int 765 swap_pager_block_index(pindex) 766 vm_pindex_t pindex; 767 { 768 return (pindex / SWB_NPAGES); 769 } 770 771 static inline __pure int 772 swap_pager_block_offset(pindex) 773 vm_pindex_t pindex; 774 { 775 return (pindex % SWB_NPAGES); 776 } 777 778 /* 779 * swap_pager_haspage returns TRUE if the pager has data that has 780 * been written out. 781 */ 782 static boolean_t 783 swap_pager_haspage(object, pindex, before, after) 784 vm_object_t object; 785 vm_pindex_t pindex; 786 int *before; 787 int *after; 788 { 789 register sw_blk_t swb; 790 int ix; 791 792 if (before != NULL) 793 *before = 0; 794 if (after != NULL) 795 *after = 0; 796 ix = pindex / SWB_NPAGES; 797 if (ix >= object->un_pager.swp.swp_nblocks) { 798 return (FALSE); 799 } 800 swb = &object->un_pager.swp.swp_blocks[ix]; 801 ix = pindex % SWB_NPAGES; 802 803 if (swb->swb_block[ix] != SWB_EMPTY) { 804 805 if (swb->swb_valid & (1 << ix)) { 806 int tix; 807 if (before) { 808 for(tix = ix - 1; tix >= 0; --tix) { 809 if ((swb->swb_valid & (1 << tix)) == 0) 810 break; 811 if ((swb->swb_block[tix] + 812 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) != 813 swb->swb_block[ix]) 814 break; 815 (*before)++; 816 } 817 } 818 819 if (after) { 820 for(tix = ix + 1; tix < SWB_NPAGES; tix++) { 821 if ((swb->swb_valid & (1 << tix)) == 0) 822 break; 823 if ((swb->swb_block[tix] - 824 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) != 825 swb->swb_block[ix]) 826 break; 827 (*after)++; 828 } 829 } 830 831 return TRUE; 832 } 833 } 834 return (FALSE); 835 } 836 837 /* 838 * swap_pager_freepage is a convienience routine that clears the busy 839 * bit and deallocates a page. 840 */ 841 static void 842 swap_pager_freepage(m) 843 vm_page_t m; 844 { 845 PAGE_WAKEUP(m); 846 vm_page_free(m); 847 } 848 849 /* 850 * swap_pager_ridpages is a convienience routine that deallocates all 851 * but the required page. this is usually used in error returns that 852 * need to invalidate the "extra" readahead pages. 853 */ 854 static void 855 swap_pager_ridpages(m, count, reqpage) 856 vm_page_t *m; 857 int count; 858 int reqpage; 859 { 860 int i; 861 862 for (i = 0; i < count; i++) 863 if (i != reqpage) 864 swap_pager_freepage(m[i]); 865 } 866 867 /* 868 * swap_pager_iodone1 is the completion routine for both reads and async writes 869 */ 870 static void 871 swap_pager_iodone1(bp) 872 struct buf *bp; 873 { 874 bp->b_flags |= B_DONE; 875 bp->b_flags &= ~B_ASYNC; 876 wakeup(bp); 877 } 878 879 static int 880 swap_pager_getpages(object, m, count, reqpage) 881 vm_object_t object; 882 vm_page_t *m; 883 int count, reqpage; 884 { 885 register struct buf *bp; 886 sw_blk_t swb[count]; 887 register int s; 888 int i; 889 boolean_t rv; 890 vm_offset_t kva, off[count]; 891 swp_clean_t spc; 892 vm_pindex_t paging_offset; 893 int reqaddr[count]; 894 int sequential; 895 896 int first, last; 897 int failed; 898 int reqdskregion; 899 900 object = m[reqpage]->object; 901 paging_offset = OFF_TO_IDX(object->paging_offset); 902 sequential = (m[reqpage]->pindex == (object->last_read + 1)); 903 904 for (i = 0; i < count; i++) { 905 vm_pindex_t fidx = m[i]->pindex + paging_offset; 906 int ix = swap_pager_block_index(fidx); 907 908 if (ix >= object->un_pager.swp.swp_nblocks) { 909 int j; 910 911 if (i <= reqpage) { 912 swap_pager_ridpages(m, count, reqpage); 913 return (VM_PAGER_FAIL); 914 } 915 for (j = i; j < count; j++) { 916 swap_pager_freepage(m[j]); 917 } 918 count = i; 919 break; 920 } 921 swb[i] = &object->un_pager.swp.swp_blocks[ix]; 922 off[i] = swap_pager_block_offset(fidx); 923 reqaddr[i] = swb[i]->swb_block[off[i]]; 924 } 925 926 /* make sure that our required input request is existant */ 927 928 if (reqaddr[reqpage] == SWB_EMPTY || 929 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 930 swap_pager_ridpages(m, count, reqpage); 931 return (VM_PAGER_FAIL); 932 } 933 reqdskregion = reqaddr[reqpage] / dmmax; 934 935 /* 936 * search backwards for the first contiguous page to transfer 937 */ 938 failed = 0; 939 first = 0; 940 for (i = reqpage - 1; i >= 0; --i) { 941 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) || 942 (swb[i]->swb_valid & (1 << off[i])) == 0 || 943 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 944 ((reqaddr[i] / dmmax) != reqdskregion)) { 945 failed = 1; 946 swap_pager_freepage(m[i]); 947 if (first == 0) 948 first = i + 1; 949 } 950 } 951 /* 952 * search forwards for the last contiguous page to transfer 953 */ 954 failed = 0; 955 last = count; 956 for (i = reqpage + 1; i < count; i++) { 957 if (failed || (reqaddr[i] == SWB_EMPTY) || 958 (swb[i]->swb_valid & (1 << off[i])) == 0 || 959 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 960 ((reqaddr[i] / dmmax) != reqdskregion)) { 961 failed = 1; 962 swap_pager_freepage(m[i]); 963 if (last == count) 964 last = i; 965 } 966 } 967 968 count = last; 969 if (first != 0) { 970 for (i = first; i < count; i++) { 971 m[i - first] = m[i]; 972 reqaddr[i - first] = reqaddr[i]; 973 off[i - first] = off[i]; 974 } 975 count -= first; 976 reqpage -= first; 977 } 978 ++swb[reqpage]->swb_locked; 979 980 /* 981 * at this point: "m" is a pointer to the array of vm_page_t for 982 * paging I/O "count" is the number of vm_page_t entries represented 983 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 984 * into "m" for the page actually faulted 985 */ 986 987 spc = NULL; 988 if ((count == 1) && ((spc = TAILQ_FIRST(&swap_pager_free)) != NULL)) { 989 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 990 swap_pager_free_count--; 991 kva = spc->spc_kva; 992 bp = spc->spc_bp; 993 bzero(bp, sizeof *bp); 994 bp->b_spc = spc; 995 bp->b_vnbufs.le_next = NOLIST; 996 } else { 997 /* 998 * Get a swap buffer header to perform the IO 999 */ 1000 bp = getpbuf(); 1001 kva = (vm_offset_t) bp->b_data; 1002 } 1003 1004 /* 1005 * map our page(s) into kva for input 1006 */ 1007 pmap_qenter(kva, m, count); 1008 1009 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING; 1010 bp->b_iodone = swap_pager_iodone1; 1011 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1012 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1013 crhold(bp->b_rcred); 1014 crhold(bp->b_wcred); 1015 bp->b_un.b_addr = (caddr_t) kva; 1016 bp->b_blkno = reqaddr[0]; 1017 bp->b_bcount = PAGE_SIZE * count; 1018 bp->b_bufsize = PAGE_SIZE * count; 1019 1020 pbgetvp(swapdev_vp, bp); 1021 1022 cnt.v_swapin++; 1023 cnt.v_swappgsin += count; 1024 /* 1025 * perform the I/O 1026 */ 1027 VOP_STRATEGY(bp); 1028 1029 /* 1030 * wait for the sync I/O to complete 1031 */ 1032 s = splbio(); 1033 while ((bp->b_flags & B_DONE) == 0) { 1034 if (tsleep(bp, PVM, "swread", hz*20)) { 1035 printf("swap_pager: indefinite wait buffer: device: %d, blkno: %d, size: %d\n", 1036 bp->b_dev, bp->b_blkno, bp->b_bcount); 1037 } 1038 } 1039 1040 if (bp->b_flags & B_ERROR) { 1041 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 1042 bp->b_blkno, bp->b_bcount, bp->b_error); 1043 rv = VM_PAGER_ERROR; 1044 } else { 1045 rv = VM_PAGER_OK; 1046 } 1047 1048 /* 1049 * relpbuf does this, but we maintain our own buffer list also... 1050 */ 1051 if (bp->b_vp) 1052 pbrelvp(bp); 1053 1054 splx(s); 1055 swb[reqpage]->swb_locked--; 1056 1057 /* 1058 * remove the mapping for kernel virtual 1059 */ 1060 pmap_qremove(kva, count); 1061 1062 if (spc) { 1063 m[reqpage]->object->last_read = m[reqpage]->pindex; 1064 if (bp->b_flags & B_WANTED) 1065 wakeup(bp); 1066 /* 1067 * if we have used an spc, we need to free it. 1068 */ 1069 if (bp->b_rcred != NOCRED) 1070 crfree(bp->b_rcred); 1071 if (bp->b_wcred != NOCRED) 1072 crfree(bp->b_wcred); 1073 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1074 swap_pager_free_count++; 1075 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1076 wakeup(&swap_pager_free); 1077 } 1078 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1079 pagedaemon_wakeup(); 1080 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1081 if (rv == VM_PAGER_OK) { 1082 pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); 1083 m[reqpage]->valid = VM_PAGE_BITS_ALL; 1084 m[reqpage]->dirty = 0; 1085 } 1086 } else { 1087 /* 1088 * release the physical I/O buffer 1089 */ 1090 relpbuf(bp); 1091 /* 1092 * finish up input if everything is ok 1093 */ 1094 if (rv == VM_PAGER_OK) { 1095 for (i = 0; i < count; i++) { 1096 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1097 m[i]->dirty = 0; 1098 m[i]->flags &= ~PG_ZERO; 1099 if (i != reqpage) { 1100 /* 1101 * whether or not to leave the page 1102 * activated is up in the air, but we 1103 * should put the page on a page queue 1104 * somewhere. (it already is in the 1105 * object). After some emperical 1106 * results, it is best to deactivate 1107 * the readahead pages. 1108 */ 1109 vm_page_deactivate(m[i]); 1110 1111 /* 1112 * just in case someone was asking for 1113 * this page we now tell them that it 1114 * is ok to use 1115 */ 1116 m[i]->valid = VM_PAGE_BITS_ALL; 1117 PAGE_WAKEUP(m[i]); 1118 } 1119 } 1120 1121 m[reqpage]->object->last_read = m[count-1]->pindex; 1122 1123 /* 1124 * If we're out of swap space, then attempt to free 1125 * some whenever multiple pages are brought in. We 1126 * must set the dirty bits so that the page contents 1127 * will be preserved. 1128 */ 1129 if (SWAPLOW) { 1130 for (i = 0; i < count; i++) { 1131 m[i]->dirty = VM_PAGE_BITS_ALL; 1132 } 1133 swap_pager_freespace(object, m[0]->pindex + paging_offset, count); 1134 } 1135 } else { 1136 swap_pager_ridpages(m, count, reqpage); 1137 } 1138 } 1139 return (rv); 1140 } 1141 1142 int 1143 swap_pager_putpages(object, m, count, sync, rtvals) 1144 vm_object_t object; 1145 vm_page_t *m; 1146 int count; 1147 boolean_t sync; 1148 int *rtvals; 1149 { 1150 register struct buf *bp; 1151 sw_blk_t swb[count]; 1152 register int s; 1153 int i, j, ix; 1154 boolean_t rv; 1155 vm_offset_t kva, off, fidx; 1156 swp_clean_t spc; 1157 vm_pindex_t paging_pindex; 1158 int reqaddr[count]; 1159 int failed; 1160 1161 if (vm_swap_size) 1162 no_swap_space = 0; 1163 if (no_swap_space) { 1164 for (i = 0; i < count; i++) 1165 rtvals[i] = VM_PAGER_FAIL; 1166 return VM_PAGER_FAIL; 1167 } 1168 spc = NULL; 1169 1170 object = m[0]->object; 1171 paging_pindex = OFF_TO_IDX(object->paging_offset); 1172 1173 failed = 0; 1174 for (j = 0; j < count; j++) { 1175 fidx = m[j]->pindex + paging_pindex; 1176 ix = swap_pager_block_index(fidx); 1177 swb[j] = 0; 1178 if (ix >= object->un_pager.swp.swp_nblocks) { 1179 rtvals[j] = VM_PAGER_FAIL; 1180 failed = 1; 1181 continue; 1182 } else { 1183 rtvals[j] = VM_PAGER_OK; 1184 } 1185 swb[j] = &object->un_pager.swp.swp_blocks[ix]; 1186 swb[j]->swb_locked++; 1187 if (failed) { 1188 rtvals[j] = VM_PAGER_FAIL; 1189 continue; 1190 } 1191 off = swap_pager_block_offset(fidx); 1192 reqaddr[j] = swb[j]->swb_block[off]; 1193 if (reqaddr[j] == SWB_EMPTY) { 1194 daddr_t blk; 1195 int tries; 1196 int ntoget; 1197 1198 tries = 0; 1199 s = splbio(); 1200 1201 /* 1202 * if any other pages have been allocated in this 1203 * block, we only try to get one page. 1204 */ 1205 for (i = 0; i < SWB_NPAGES; i++) { 1206 if (swb[j]->swb_block[i] != SWB_EMPTY) 1207 break; 1208 } 1209 1210 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1211 /* 1212 * this code is alittle conservative, but works (the 1213 * intent of this code is to allocate small chunks for 1214 * small objects) 1215 */ 1216 if ((off == 0) && ((fidx + ntoget) > object->size)) { 1217 ntoget = object->size - fidx; 1218 } 1219 retrygetspace: 1220 if (!swap_pager_full && ntoget > 1 && 1221 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE), 1222 &blk)) { 1223 1224 for (i = 0; i < ntoget; i++) { 1225 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1226 swb[j]->swb_valid = 0; 1227 } 1228 1229 reqaddr[j] = swb[j]->swb_block[off]; 1230 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE), 1231 &swb[j]->swb_block[off])) { 1232 /* 1233 * if the allocation has failed, we try to 1234 * reclaim space and retry. 1235 */ 1236 if (++tries == 1) { 1237 swap_pager_reclaim(); 1238 goto retrygetspace; 1239 } 1240 rtvals[j] = VM_PAGER_AGAIN; 1241 failed = 1; 1242 swap_pager_full = 1; 1243 } else { 1244 reqaddr[j] = swb[j]->swb_block[off]; 1245 swb[j]->swb_valid &= ~(1 << off); 1246 } 1247 splx(s); 1248 } 1249 } 1250 1251 /* 1252 * search forwards for the last contiguous page to transfer 1253 */ 1254 failed = 0; 1255 for (i = 0; i < count; i++) { 1256 if (failed || 1257 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1258 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) || 1259 (rtvals[i] != VM_PAGER_OK)) { 1260 failed = 1; 1261 if (rtvals[i] == VM_PAGER_OK) 1262 rtvals[i] = VM_PAGER_AGAIN; 1263 } 1264 } 1265 1266 for (i = 0; i < count; i++) { 1267 if (rtvals[i] != VM_PAGER_OK) { 1268 if (swb[i]) 1269 --swb[i]->swb_locked; 1270 } 1271 } 1272 1273 for (i = 0; i < count; i++) 1274 if (rtvals[i] != VM_PAGER_OK) 1275 break; 1276 1277 if (i == 0) { 1278 return VM_PAGER_AGAIN; 1279 } 1280 count = i; 1281 for (i = 0; i < count; i++) { 1282 if (reqaddr[i] == SWB_EMPTY) { 1283 printf("I/O to empty block???? -- pindex: %d, i: %d\n", 1284 m[i]->pindex, i); 1285 } 1286 } 1287 1288 /* 1289 * For synchronous writes, we clean up all completed async pageouts. 1290 */ 1291 if (sync == TRUE) { 1292 swap_pager_sync(); 1293 } 1294 kva = 0; 1295 1296 /* 1297 * get a swap pager clean data structure, block until we get it 1298 */ 1299 if (swap_pager_free_count <= 3) { 1300 s = splbio(); 1301 if (curproc == pageproc) { 1302 retryfree: 1303 /* 1304 * pageout daemon needs a swap control block 1305 */ 1306 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED; 1307 /* 1308 * if it does not get one within a short time, then 1309 * there is a potential deadlock, so we go-on trying 1310 * to free pages. It is important to block here as opposed 1311 * to returning, thereby allowing the pageout daemon to continue. 1312 * It is likely that pageout daemon will start suboptimally 1313 * reclaiming vnode backed pages if we don't block. Since the 1314 * I/O subsystem is probably already fully utilized, might as 1315 * well wait. 1316 */ 1317 if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) { 1318 swap_pager_sync(); 1319 if (swap_pager_free_count <= 3) { 1320 splx(s); 1321 return VM_PAGER_AGAIN; 1322 } 1323 } else { 1324 /* 1325 * we make sure that pageouts aren't taking up all of 1326 * the free swap control blocks. 1327 */ 1328 swap_pager_sync(); 1329 if (swap_pager_free_count <= 3) { 1330 goto retryfree; 1331 } 1332 } 1333 } else { 1334 pagedaemon_wakeup(); 1335 while (swap_pager_free_count <= 3) { 1336 swap_pager_needflags |= SWAP_FREE_NEEDED; 1337 tsleep(&swap_pager_free, PVM, "swpfre", 0); 1338 pagedaemon_wakeup(); 1339 } 1340 } 1341 splx(s); 1342 } 1343 spc = TAILQ_FIRST(&swap_pager_free); 1344 if (spc == NULL) 1345 panic("swap_pager_putpages: free queue is empty, %d expected\n", swap_pager_free_count); 1346 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1347 swap_pager_free_count--; 1348 1349 kva = spc->spc_kva; 1350 1351 /* 1352 * map our page(s) into kva for I/O 1353 */ 1354 pmap_qenter(kva, m, count); 1355 1356 /* 1357 * get the base I/O offset into the swap file 1358 */ 1359 for (i = 0; i < count; i++) { 1360 fidx = m[i]->pindex + paging_pindex; 1361 off = swap_pager_block_offset(fidx); 1362 /* 1363 * set the valid bit 1364 */ 1365 swb[i]->swb_valid |= (1 << off); 1366 /* 1367 * and unlock the data structure 1368 */ 1369 swb[i]->swb_locked--; 1370 } 1371 1372 /* 1373 * Get a swap buffer header and perform the IO 1374 */ 1375 bp = spc->spc_bp; 1376 bzero(bp, sizeof *bp); 1377 bp->b_spc = spc; 1378 bp->b_vnbufs.le_next = NOLIST; 1379 1380 bp->b_flags = B_BUSY | B_PAGING; 1381 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1382 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1383 if (bp->b_rcred != NOCRED) 1384 crhold(bp->b_rcred); 1385 if (bp->b_wcred != NOCRED) 1386 crhold(bp->b_wcred); 1387 bp->b_data = (caddr_t) kva; 1388 bp->b_blkno = reqaddr[0]; 1389 pbgetvp(swapdev_vp, bp); 1390 1391 bp->b_bcount = PAGE_SIZE * count; 1392 bp->b_bufsize = PAGE_SIZE * count; 1393 swapdev_vp->v_numoutput++; 1394 1395 /* 1396 * If this is an async write we set up additional buffer fields and 1397 * place a "cleaning" entry on the inuse queue. 1398 */ 1399 s = splbio(); 1400 if (sync == FALSE) { 1401 spc->spc_flags = 0; 1402 spc->spc_object = object; 1403 for (i = 0; i < count; i++) 1404 spc->spc_m[i] = m[i]; 1405 spc->spc_count = count; 1406 /* 1407 * the completion routine for async writes 1408 */ 1409 bp->b_flags |= B_CALL; 1410 bp->b_iodone = swap_pager_iodone; 1411 bp->b_dirtyoff = 0; 1412 bp->b_dirtyend = bp->b_bcount; 1413 object->un_pager.swp.swp_poip++; 1414 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1415 } else { 1416 object->un_pager.swp.swp_poip++; 1417 bp->b_flags |= B_CALL; 1418 bp->b_iodone = swap_pager_iodone1; 1419 } 1420 1421 cnt.v_swapout++; 1422 cnt.v_swappgsout += count; 1423 /* 1424 * perform the I/O 1425 */ 1426 VOP_STRATEGY(bp); 1427 if (sync == FALSE) { 1428 if ((bp->b_flags & B_DONE) == B_DONE) { 1429 swap_pager_sync(); 1430 } 1431 splx(s); 1432 for (i = 0; i < count; i++) { 1433 rtvals[i] = VM_PAGER_PEND; 1434 } 1435 return VM_PAGER_PEND; 1436 } 1437 /* 1438 * wait for the sync I/O to complete 1439 */ 1440 while ((bp->b_flags & B_DONE) == 0) { 1441 tsleep(bp, PVM, "swwrt", 0); 1442 } 1443 if (bp->b_flags & B_ERROR) { 1444 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 1445 bp->b_blkno, bp->b_bcount, bp->b_error); 1446 rv = VM_PAGER_ERROR; 1447 } else { 1448 rv = VM_PAGER_OK; 1449 } 1450 1451 object->un_pager.swp.swp_poip--; 1452 if (object->un_pager.swp.swp_poip == 0) 1453 wakeup(object); 1454 1455 if (bp->b_vp) 1456 pbrelvp(bp); 1457 if (bp->b_flags & B_WANTED) 1458 wakeup(bp); 1459 1460 splx(s); 1461 1462 /* 1463 * remove the mapping for kernel virtual 1464 */ 1465 pmap_qremove(kva, count); 1466 1467 /* 1468 * if we have written the page, then indicate that the page is clean. 1469 */ 1470 if (rv == VM_PAGER_OK) { 1471 for (i = 0; i < count; i++) { 1472 if (rtvals[i] == VM_PAGER_OK) { 1473 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1474 m[i]->dirty = 0; 1475 /* 1476 * optimization, if a page has been read 1477 * during the pageout process, we activate it. 1478 */ 1479 if ((m[i]->queue != PQ_ACTIVE) && 1480 ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || 1481 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) { 1482 vm_page_activate(m[i]); 1483 } 1484 } 1485 } 1486 } else { 1487 for (i = 0; i < count; i++) { 1488 rtvals[i] = rv; 1489 } 1490 } 1491 1492 if (bp->b_rcred != NOCRED) 1493 crfree(bp->b_rcred); 1494 if (bp->b_wcred != NOCRED) 1495 crfree(bp->b_wcred); 1496 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1497 swap_pager_free_count++; 1498 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1499 wakeup(&swap_pager_free); 1500 } 1501 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1502 pagedaemon_wakeup(); 1503 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1504 return (rv); 1505 } 1506 1507 static void 1508 swap_pager_sync() 1509 { 1510 register swp_clean_t spc, tspc; 1511 register int s; 1512 1513 tspc = NULL; 1514 if (TAILQ_FIRST(&swap_pager_done) == NULL) 1515 return; 1516 for (;;) { 1517 s = splbio(); 1518 /* 1519 * Look up and removal from done list must be done at splbio() 1520 * to avoid conflicts with swap_pager_iodone. 1521 */ 1522 while ((spc = TAILQ_FIRST(&swap_pager_done)) != 0) { 1523 pmap_qremove(spc->spc_kva, spc->spc_count); 1524 swap_pager_finish(spc); 1525 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1526 goto doclean; 1527 } 1528 1529 /* 1530 * No operations done, thats all we can do for now. 1531 */ 1532 1533 splx(s); 1534 break; 1535 1536 /* 1537 * The desired page was found to be busy earlier in the scan 1538 * but has since completed. 1539 */ 1540 doclean: 1541 if (tspc && tspc == spc) { 1542 tspc = NULL; 1543 } 1544 spc->spc_flags = 0; 1545 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1546 swap_pager_free_count++; 1547 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1548 wakeup(&swap_pager_free); 1549 } 1550 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1551 pagedaemon_wakeup(); 1552 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1553 splx(s); 1554 } 1555 1556 return; 1557 } 1558 1559 void 1560 swap_pager_finish(spc) 1561 register swp_clean_t spc; 1562 { 1563 vm_object_t object = spc->spc_m[0]->object; 1564 int i; 1565 1566 object->paging_in_progress -= spc->spc_count; 1567 if ((object->paging_in_progress == 0) && 1568 (object->flags & OBJ_PIPWNT)) { 1569 object->flags &= ~OBJ_PIPWNT; 1570 wakeup(object); 1571 } 1572 1573 /* 1574 * If no error, mark as clean and inform the pmap system. If error, 1575 * mark as dirty so we will try again. (XXX could get stuck doing 1576 * this, should give up after awhile) 1577 */ 1578 if (spc->spc_flags & SPC_ERROR) { 1579 for (i = 0; i < spc->spc_count; i++) { 1580 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1581 (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i])); 1582 } 1583 } else { 1584 for (i = 0; i < spc->spc_count; i++) { 1585 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1586 spc->spc_m[i]->dirty = 0; 1587 if ((spc->spc_m[i]->queue != PQ_ACTIVE) && 1588 ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) 1589 vm_page_activate(spc->spc_m[i]); 1590 } 1591 } 1592 1593 1594 for (i = 0; i < spc->spc_count; i++) { 1595 /* 1596 * we wakeup any processes that are waiting on these pages. 1597 */ 1598 PAGE_WAKEUP(spc->spc_m[i]); 1599 } 1600 nswiodone -= spc->spc_count; 1601 1602 return; 1603 } 1604 1605 /* 1606 * swap_pager_iodone 1607 */ 1608 static void 1609 swap_pager_iodone(bp) 1610 register struct buf *bp; 1611 { 1612 register swp_clean_t spc; 1613 int s; 1614 1615 s = splbio(); 1616 spc = (swp_clean_t) bp->b_spc; 1617 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1618 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1619 if (bp->b_flags & B_ERROR) { 1620 spc->spc_flags |= SPC_ERROR; 1621 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n", 1622 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1623 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error); 1624 } 1625 1626 if (bp->b_vp) 1627 pbrelvp(bp); 1628 1629 /* 1630 if (bp->b_flags & B_WANTED) 1631 */ 1632 wakeup(bp); 1633 1634 if (bp->b_rcred != NOCRED) 1635 crfree(bp->b_rcred); 1636 if (bp->b_wcred != NOCRED) 1637 crfree(bp->b_wcred); 1638 1639 nswiodone += spc->spc_count; 1640 if (--spc->spc_object->un_pager.swp.swp_poip == 0) { 1641 wakeup(spc->spc_object); 1642 } 1643 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1644 TAILQ_FIRST(&swap_pager_inuse) == 0) { 1645 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1646 wakeup(&swap_pager_free); 1647 } 1648 1649 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) { 1650 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT; 1651 pagedaemon_wakeup(); 1652 } 1653 1654 if (vm_pageout_pages_needed) { 1655 wakeup(&vm_pageout_pages_needed); 1656 vm_pageout_pages_needed = 0; 1657 } 1658 if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) || 1659 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && 1660 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { 1661 pagedaemon_wakeup(); 1662 } 1663 splx(s); 1664 } 1665