1 /* 2 * Copyright (c) 1990 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 39 * 40 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 41 */ 42 43 /* 44 * Quick hack to page to dedicated partition(s). 45 * TODO: 46 * Add multiprocessor locks 47 * Deal with async writes in a better fashion 48 */ 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/proc.h> 53 #include <sys/buf.h> 54 #include <sys/map.h> 55 #include <sys/vnode.h> 56 #include <sys/malloc.h> 57 58 #include <miscfs/specfs/specdev.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_page.h> 62 #include <vm/vm_pageout.h> 63 #include <vm/swap_pager.h> 64 65 #define NSWSIZES 16 /* size of swtab */ 66 #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ 67 #ifndef NPENDINGIO 68 #define NPENDINGIO 64 /* max # of pending cleans */ 69 #endif 70 71 #ifdef DEBUG 72 int swpagerdebug = 0x100; 73 #define SDB_FOLLOW 0x001 74 #define SDB_INIT 0x002 75 #define SDB_ALLOC 0x004 76 #define SDB_IO 0x008 77 #define SDB_WRITE 0x010 78 #define SDB_FAIL 0x020 79 #define SDB_ALLOCBLK 0x040 80 #define SDB_FULL 0x080 81 #define SDB_ANOM 0x100 82 #define SDB_ANOMPANIC 0x200 83 #define SDB_CLUSTER 0x400 84 #define SDB_PARANOIA 0x800 85 #endif 86 87 TAILQ_HEAD(swpclean, swpagerclean); 88 89 struct swpagerclean { 90 TAILQ_ENTRY(swpagerclean) spc_list; 91 int spc_flags; 92 struct buf *spc_bp; 93 sw_pager_t spc_swp; 94 vm_offset_t spc_kva; 95 vm_page_t spc_m; 96 int spc_npages; 97 } swcleanlist[NPENDINGIO]; 98 typedef struct swpagerclean *swp_clean_t; 99 100 /* spc_flags values */ 101 #define SPC_FREE 0x00 102 #define SPC_BUSY 0x01 103 #define SPC_DONE 0x02 104 #define SPC_ERROR 0x04 105 106 struct swtab { 107 vm_size_t st_osize; /* size of object (bytes) */ 108 int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ 109 #ifdef DEBUG 110 u_long st_inuse; /* number in this range in use */ 111 u_long st_usecnt; /* total used of this size */ 112 #endif 113 } swtab[NSWSIZES+1]; 114 115 #ifdef DEBUG 116 int swap_pager_poip; /* pageouts in progress */ 117 int swap_pager_piip; /* pageins in progress */ 118 #endif 119 120 int swap_pager_maxcluster; /* maximum cluster size */ 121 int swap_pager_npendingio; /* number of pager clean structs */ 122 123 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 124 struct swpclean swap_pager_free; /* list of free pager clean structs */ 125 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 126 127 static void swap_pager_init __P((void)); 128 static vm_pager_t swap_pager_alloc 129 __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); 130 static void swap_pager_clean __P((int)); 131 #ifdef DEBUG 132 static void swap_pager_clean_check __P((vm_page_t *, int, int)); 133 #endif 134 static void swap_pager_cluster 135 __P((vm_pager_t, vm_offset_t, 136 vm_offset_t *, vm_offset_t *)); 137 static void swap_pager_dealloc __P((vm_pager_t)); 138 static int swap_pager_getpage 139 __P((vm_pager_t, vm_page_t *, int, boolean_t)); 140 static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t)); 141 static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int)); 142 static void swap_pager_iodone __P((struct buf *)); 143 static int swap_pager_putpage 144 __P((vm_pager_t, vm_page_t *, int, boolean_t)); 145 146 struct pagerops swappagerops = { 147 swap_pager_init, 148 swap_pager_alloc, 149 swap_pager_dealloc, 150 swap_pager_getpage, 151 swap_pager_putpage, 152 swap_pager_haspage, 153 swap_pager_cluster 154 }; 155 156 static void 157 swap_pager_init() 158 { 159 register swp_clean_t spc; 160 register int i, bsize; 161 extern int dmmin, dmmax; 162 int maxbsize; 163 164 #ifdef DEBUG 165 if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) 166 printf("swpg_init()\n"); 167 #endif 168 dfltpagerops = &swappagerops; 169 TAILQ_INIT(&swap_pager_list); 170 171 /* 172 * Allocate async IO structures. 173 * 174 * XXX it would be nice if we could do this dynamically based on 175 * the value of nswbuf (since we are ultimately limited by that) 176 * but neither nswbuf or malloc has been initialized yet. So the 177 * structs are statically allocated above. 178 */ 179 swap_pager_npendingio = NPENDINGIO; 180 181 /* 182 * Initialize clean lists 183 */ 184 TAILQ_INIT(&swap_pager_inuse); 185 TAILQ_INIT(&swap_pager_free); 186 for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) { 187 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 188 spc->spc_flags = SPC_FREE; 189 } 190 191 /* 192 * Calculate the swap allocation constants. 193 */ 194 if (dmmin == 0) { 195 dmmin = DMMIN; 196 if (dmmin < CLBYTES/DEV_BSIZE) 197 dmmin = CLBYTES/DEV_BSIZE; 198 } 199 if (dmmax == 0) 200 dmmax = DMMAX; 201 202 /* 203 * Fill in our table of object size vs. allocation size 204 */ 205 bsize = btodb(PAGE_SIZE); 206 if (bsize < dmmin) 207 bsize = dmmin; 208 maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); 209 if (maxbsize > dmmax) 210 maxbsize = dmmax; 211 for (i = 0; i < NSWSIZES; i++) { 212 swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); 213 swtab[i].st_bsize = bsize; 214 if (bsize <= btodb(MAXPHYS)) 215 swap_pager_maxcluster = dbtob(bsize); 216 #ifdef DEBUG 217 if (swpagerdebug & SDB_INIT) 218 printf("swpg_init: ix %d, size %x, bsize %x\n", 219 i, swtab[i].st_osize, swtab[i].st_bsize); 220 #endif 221 if (bsize >= maxbsize) 222 break; 223 bsize *= 2; 224 } 225 swtab[i].st_osize = 0; 226 swtab[i].st_bsize = bsize; 227 } 228 229 /* 230 * Allocate a pager structure and associated resources. 231 * Note that if we are called from the pageout daemon (handle == NULL) 232 * we should not wait for memory as it could resulting in deadlock. 233 */ 234 static vm_pager_t 235 swap_pager_alloc(handle, size, prot, foff) 236 caddr_t handle; 237 register vm_size_t size; 238 vm_prot_t prot; 239 vm_offset_t foff; 240 { 241 register vm_pager_t pager; 242 register sw_pager_t swp; 243 struct swtab *swt; 244 int waitok; 245 246 #ifdef DEBUG 247 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 248 printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); 249 #endif 250 /* 251 * If this is a "named" anonymous region, look it up and 252 * return the appropriate pager if it exists. 253 */ 254 if (handle) { 255 pager = vm_pager_lookup(&swap_pager_list, handle); 256 if (pager != NULL) { 257 /* 258 * Use vm_object_lookup to gain a reference 259 * to the object and also to remove from the 260 * object cache. 261 */ 262 if (vm_object_lookup(pager) == NULL) 263 panic("swap_pager_alloc: bad object"); 264 return(pager); 265 } 266 } 267 /* 268 * Pager doesn't exist, allocate swap management resources 269 * and initialize. 270 */ 271 waitok = handle ? M_WAITOK : M_NOWAIT; 272 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 273 if (pager == NULL) 274 return(NULL); 275 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 276 if (swp == NULL) { 277 #ifdef DEBUG 278 if (swpagerdebug & SDB_FAIL) 279 printf("swpg_alloc: swpager malloc failed\n"); 280 #endif 281 free((caddr_t)pager, M_VMPAGER); 282 return(NULL); 283 } 284 size = round_page(size); 285 for (swt = swtab; swt->st_osize; swt++) 286 if (size <= swt->st_osize) 287 break; 288 #ifdef DEBUG 289 swt->st_inuse++; 290 swt->st_usecnt++; 291 #endif 292 swp->sw_osize = size; 293 swp->sw_bsize = swt->st_bsize; 294 swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; 295 swp->sw_blocks = (sw_blk_t) 296 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 297 M_VMPGDATA, M_NOWAIT); 298 if (swp->sw_blocks == NULL) { 299 free((caddr_t)swp, M_VMPGDATA); 300 free((caddr_t)pager, M_VMPAGER); 301 #ifdef DEBUG 302 if (swpagerdebug & SDB_FAIL) 303 printf("swpg_alloc: sw_blocks malloc failed\n"); 304 swt->st_inuse--; 305 swt->st_usecnt--; 306 #endif 307 return(FALSE); 308 } 309 bzero((caddr_t)swp->sw_blocks, 310 swp->sw_nblocks * sizeof(*swp->sw_blocks)); 311 swp->sw_poip = 0; 312 if (handle) { 313 vm_object_t object; 314 315 swp->sw_flags = SW_NAMED; 316 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 317 /* 318 * Consistant with other pagers: return with object 319 * referenced. Can't do this with handle == NULL 320 * since it might be the pageout daemon calling. 321 */ 322 object = vm_object_allocate(size); 323 vm_object_enter(object, pager); 324 vm_object_setpager(object, pager, 0, FALSE); 325 } else { 326 swp->sw_flags = 0; 327 pager->pg_list.tqe_next = NULL; 328 pager->pg_list.tqe_prev = NULL; 329 } 330 pager->pg_handle = handle; 331 pager->pg_ops = &swappagerops; 332 pager->pg_type = PG_SWAP; 333 pager->pg_flags = PG_CLUSTERPUT; 334 pager->pg_data = swp; 335 336 #ifdef DEBUG 337 if (swpagerdebug & SDB_ALLOC) 338 printf("swpg_alloc: pg_data %x, %x of %x at %x\n", 339 swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); 340 #endif 341 return(pager); 342 } 343 344 static void 345 swap_pager_dealloc(pager) 346 vm_pager_t pager; 347 { 348 register int i; 349 register sw_blk_t bp; 350 register sw_pager_t swp; 351 struct swtab *swt; 352 int s; 353 354 #ifdef DEBUG 355 /* save panic time state */ 356 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 357 return; 358 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 359 printf("swpg_dealloc(%x)\n", pager); 360 #endif 361 /* 362 * Remove from list right away so lookups will fail if we 363 * block for pageout completion. 364 */ 365 swp = (sw_pager_t) pager->pg_data; 366 if (swp->sw_flags & SW_NAMED) { 367 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 368 swp->sw_flags &= ~SW_NAMED; 369 } 370 #ifdef DEBUG 371 for (swt = swtab; swt->st_osize; swt++) 372 if (swp->sw_osize <= swt->st_osize) 373 break; 374 swt->st_inuse--; 375 #endif 376 377 /* 378 * Wait for all pageouts to finish and remove 379 * all entries from cleaning list. 380 */ 381 s = splbio(); 382 while (swp->sw_poip) { 383 swp->sw_flags |= SW_WANTED; 384 (void) tsleep(swp, PVM, "swpgdealloc", 0); 385 } 386 splx(s); 387 swap_pager_clean(B_WRITE); 388 389 /* 390 * Free left over swap blocks 391 */ 392 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) 393 if (bp->swb_block) { 394 #ifdef DEBUG 395 if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) 396 printf("swpg_dealloc: blk %x\n", 397 bp->swb_block); 398 #endif 399 rmfree(swapmap, swp->sw_bsize, bp->swb_block); 400 } 401 /* 402 * Free swap management resources 403 */ 404 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 405 free((caddr_t)swp, M_VMPGDATA); 406 free((caddr_t)pager, M_VMPAGER); 407 } 408 409 static int 410 swap_pager_getpage(pager, mlist, npages, sync) 411 vm_pager_t pager; 412 vm_page_t *mlist; 413 int npages; 414 boolean_t sync; 415 { 416 #ifdef DEBUG 417 if (swpagerdebug & SDB_FOLLOW) 418 printf("swpg_getpage(%x, %x, %x, %x)\n", 419 pager, mlist, npages, sync); 420 #endif 421 return(swap_pager_io((sw_pager_t)pager->pg_data, 422 mlist, npages, B_READ)); 423 } 424 425 static int 426 swap_pager_putpage(pager, mlist, npages, sync) 427 vm_pager_t pager; 428 vm_page_t *mlist; 429 int npages; 430 boolean_t sync; 431 { 432 int flags; 433 434 #ifdef DEBUG 435 if (swpagerdebug & SDB_FOLLOW) 436 printf("swpg_putpage(%x, %x, %x, %x)\n", 437 pager, mlist, npages, sync); 438 #endif 439 if (pager == NULL) { 440 swap_pager_clean(B_WRITE); 441 return (VM_PAGER_OK); /* ??? */ 442 } 443 flags = B_WRITE; 444 if (!sync) 445 flags |= B_ASYNC; 446 return(swap_pager_io((sw_pager_t)pager->pg_data, 447 mlist, npages, flags)); 448 } 449 450 static boolean_t 451 swap_pager_haspage(pager, offset) 452 vm_pager_t pager; 453 vm_offset_t offset; 454 { 455 register sw_pager_t swp; 456 register sw_blk_t swb; 457 int ix; 458 459 #ifdef DEBUG 460 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 461 printf("swpg_haspage(%x, %x) ", pager, offset); 462 #endif 463 swp = (sw_pager_t) pager->pg_data; 464 ix = offset / dbtob(swp->sw_bsize); 465 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 466 #ifdef DEBUG 467 if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) 468 printf("swpg_haspage: %x bad offset %x, ix %x\n", 469 swp->sw_blocks, offset, ix); 470 #endif 471 return(FALSE); 472 } 473 swb = &swp->sw_blocks[ix]; 474 if (swb->swb_block) 475 ix = atop(offset % dbtob(swp->sw_bsize)); 476 #ifdef DEBUG 477 if (swpagerdebug & SDB_ALLOCBLK) 478 printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); 479 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 480 printf("-> %c\n", 481 "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); 482 #endif 483 if (swb->swb_block && (swb->swb_mask & (1 << ix))) 484 return(TRUE); 485 return(FALSE); 486 } 487 488 static void 489 swap_pager_cluster(pager, offset, loffset, hoffset) 490 vm_pager_t pager; 491 vm_offset_t offset; 492 vm_offset_t *loffset; 493 vm_offset_t *hoffset; 494 { 495 sw_pager_t swp; 496 register int bsize; 497 vm_offset_t loff, hoff; 498 499 #ifdef DEBUG 500 if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER)) 501 printf("swpg_cluster(%x, %x) ", pager, offset); 502 #endif 503 swp = (sw_pager_t) pager->pg_data; 504 bsize = dbtob(swp->sw_bsize); 505 if (bsize > swap_pager_maxcluster) 506 bsize = swap_pager_maxcluster; 507 508 loff = offset - (offset % bsize); 509 if (loff >= swp->sw_osize) 510 panic("swap_pager_cluster: bad offset"); 511 512 hoff = loff + bsize; 513 if (hoff > swp->sw_osize) 514 hoff = swp->sw_osize; 515 516 *loffset = loff; 517 *hoffset = hoff; 518 #ifdef DEBUG 519 if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER)) 520 printf("returns [%x-%x]\n", loff, hoff); 521 #endif 522 } 523 524 /* 525 * Scaled down version of swap(). 526 * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. 527 * BOGUS: lower level IO routines expect a KVA so we have to map our 528 * provided physical page into the KVA to keep them happy. 529 */ 530 static int 531 swap_pager_io(swp, mlist, npages, flags) 532 register sw_pager_t swp; 533 vm_page_t *mlist; 534 int npages; 535 int flags; 536 { 537 register struct buf *bp; 538 register sw_blk_t swb; 539 register int s; 540 int ix, mask; 541 boolean_t rv; 542 vm_offset_t kva, off; 543 swp_clean_t spc; 544 vm_page_t m; 545 546 #ifdef DEBUG 547 /* save panic time state */ 548 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 549 return (VM_PAGER_FAIL); /* XXX: correct return? */ 550 if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) 551 printf("swpg_io(%x, %x, %x, %x)\n", swp, mlist, npages, flags); 552 if (flags & B_READ) { 553 if (flags & B_ASYNC) 554 panic("swap_pager_io: cannot do ASYNC reads"); 555 if (npages != 1) 556 panic("swap_pager_io: cannot do clustered reads"); 557 } 558 #endif 559 560 /* 561 * First determine if the page exists in the pager if this is 562 * a sync read. This quickly handles cases where we are 563 * following shadow chains looking for the top level object 564 * with the page. 565 */ 566 m = *mlist; 567 off = m->offset + m->object->paging_offset; 568 ix = off / dbtob(swp->sw_bsize); 569 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 570 #ifdef DEBUG 571 if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) { 572 printf("swap_pager_io: no swap block on write\n"); 573 return(VM_PAGER_BAD); 574 } 575 #endif 576 return(VM_PAGER_FAIL); 577 } 578 swb = &swp->sw_blocks[ix]; 579 off = off % dbtob(swp->sw_bsize); 580 if ((flags & B_READ) && 581 (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0)) 582 return(VM_PAGER_FAIL); 583 584 /* 585 * For reads (pageins) and synchronous writes, we clean up 586 * all completed async pageouts. 587 */ 588 if ((flags & B_ASYNC) == 0) { 589 s = splbio(); 590 swap_pager_clean(flags&B_READ); 591 #ifdef DEBUG 592 if (swpagerdebug & SDB_PARANOIA) 593 swap_pager_clean_check(mlist, npages, flags&B_READ); 594 #endif 595 splx(s); 596 } 597 /* 598 * For async writes (pageouts), we cleanup completed pageouts so 599 * that all available resources are freed. Also tells us if this 600 * page is already being cleaned. If it is, or no resources 601 * are available, we try again later. 602 */ 603 else { 604 swap_pager_clean(B_WRITE); 605 #ifdef DEBUG 606 if (swpagerdebug & SDB_PARANOIA) 607 swap_pager_clean_check(mlist, npages, B_WRITE); 608 #endif 609 if (swap_pager_free.tqh_first == NULL) { 610 #ifdef DEBUG 611 if (swpagerdebug & SDB_FAIL) 612 printf("%s: no available io headers\n", 613 "swap_pager_io"); 614 #endif 615 return(VM_PAGER_AGAIN); 616 } 617 } 618 619 /* 620 * Allocate a swap block if necessary. 621 */ 622 if (swb->swb_block == 0) { 623 swb->swb_block = rmalloc(swapmap, swp->sw_bsize); 624 if (swb->swb_block == 0) { 625 #ifdef DEBUG 626 if (swpagerdebug & SDB_FAIL) 627 printf("swpg_io: rmalloc of %x failed\n", 628 swp->sw_bsize); 629 #endif 630 /* 631 * XXX this is technically a resource shortage that 632 * should return AGAIN, but the situation isn't likely 633 * to be remedied just by delaying a little while and 634 * trying again (the pageout daemon's current response 635 * to AGAIN) so we just return FAIL. 636 */ 637 return(VM_PAGER_FAIL); 638 } 639 #ifdef DEBUG 640 if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) 641 printf("swpg_io: %x alloc blk %x at ix %x\n", 642 swp->sw_blocks, swb->swb_block, ix); 643 #endif 644 } 645 646 /* 647 * Allocate a kernel virtual address and initialize so that PTE 648 * is available for lower level IO drivers. 649 */ 650 kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC)); 651 if (kva == NULL) { 652 #ifdef DEBUG 653 if (swpagerdebug & SDB_FAIL) 654 printf("%s: no KVA space to map pages\n", 655 "swap_pager_io"); 656 #endif 657 return(VM_PAGER_AGAIN); 658 } 659 660 /* 661 * Get a swap buffer header and initialize it. 662 */ 663 s = splbio(); 664 while (bswlist.b_actf == NULL) { 665 #ifdef DEBUG 666 if (swpagerdebug & SDB_ANOM) 667 printf("swap_pager_io: wait on swbuf for %x (%d)\n", 668 m, flags); 669 #endif 670 bswlist.b_flags |= B_WANTED; 671 tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0); 672 } 673 bp = bswlist.b_actf; 674 bswlist.b_actf = bp->b_actf; 675 splx(s); 676 bp->b_flags = B_BUSY | (flags & B_READ); 677 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 678 bp->b_data = (caddr_t)kva; 679 bp->b_blkno = swb->swb_block + btodb(off); 680 VHOLD(swapdev_vp); 681 bp->b_vp = swapdev_vp; 682 if (swapdev_vp->v_type == VBLK) 683 bp->b_dev = swapdev_vp->v_rdev; 684 bp->b_bcount = npages * PAGE_SIZE; 685 686 /* 687 * For writes we set up additional buffer fields, record a pageout 688 * in progress and mark that these swap blocks are now allocated. 689 */ 690 if ((bp->b_flags & B_READ) == 0) { 691 bp->b_dirtyoff = 0; 692 bp->b_dirtyend = npages * PAGE_SIZE; 693 swapdev_vp->v_numoutput++; 694 s = splbio(); 695 swp->sw_poip++; 696 splx(s); 697 mask = (~(~0 << npages)) << atop(off); 698 #ifdef DEBUG 699 swap_pager_poip++; 700 if (swpagerdebug & SDB_WRITE) 701 printf("swpg_io: write: bp=%x swp=%x poip=%d\n", 702 bp, swp, swp->sw_poip); 703 if ((swpagerdebug & SDB_ALLOCBLK) && 704 (swb->swb_mask & mask) != mask) 705 printf("swpg_io: %x write %d pages at %x+%x\n", 706 swp->sw_blocks, npages, swb->swb_block, 707 atop(off)); 708 if (swpagerdebug & SDB_CLUSTER) 709 printf("swpg_io: off=%x, npg=%x, mask=%x, bmask=%x\n", 710 off, npages, mask, swb->swb_mask); 711 #endif 712 swb->swb_mask |= mask; 713 } 714 /* 715 * If this is an async write we set up still more buffer fields 716 * and place a "cleaning" entry on the inuse queue. 717 */ 718 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 719 #ifdef DEBUG 720 if (swap_pager_free.tqh_first == NULL) 721 panic("swpg_io: lost spc"); 722 #endif 723 spc = swap_pager_free.tqh_first; 724 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 725 #ifdef DEBUG 726 if (spc->spc_flags != SPC_FREE) 727 panic("swpg_io: bad free spc"); 728 #endif 729 spc->spc_flags = SPC_BUSY; 730 spc->spc_bp = bp; 731 spc->spc_swp = swp; 732 spc->spc_kva = kva; 733 /* 734 * Record the first page. This allows swap_pager_clean 735 * to efficiently handle the common case of a single page. 736 * For clusters, it allows us to locate the object easily 737 * and we then reconstruct the rest of the mlist from spc_kva. 738 */ 739 spc->spc_m = m; 740 spc->spc_npages = npages; 741 bp->b_flags |= B_CALL; 742 bp->b_iodone = swap_pager_iodone; 743 s = splbio(); 744 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 745 splx(s); 746 } 747 748 /* 749 * Finally, start the IO operation. 750 * If it is async we are all done, otherwise we must wait for 751 * completion and cleanup afterwards. 752 */ 753 #ifdef DEBUG 754 if (swpagerdebug & SDB_IO) 755 printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", 756 bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); 757 #endif 758 VOP_STRATEGY(bp); 759 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 760 #ifdef DEBUG 761 if (swpagerdebug & SDB_IO) 762 printf("swpg_io: IO started: bp %x\n", bp); 763 #endif 764 return(VM_PAGER_PEND); 765 } 766 s = splbio(); 767 #ifdef DEBUG 768 if (flags & B_READ) 769 swap_pager_piip++; 770 else 771 swap_pager_poip++; 772 #endif 773 while ((bp->b_flags & B_DONE) == 0) 774 (void) tsleep(bp, PVM, "swpgio", 0); 775 if ((flags & B_READ) == 0) 776 --swp->sw_poip; 777 #ifdef DEBUG 778 if (flags & B_READ) 779 --swap_pager_piip; 780 else 781 --swap_pager_poip; 782 #endif 783 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; 784 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 785 bp->b_actf = bswlist.b_actf; 786 bswlist.b_actf = bp; 787 if (bp->b_vp) 788 brelvp(bp); 789 if (bswlist.b_flags & B_WANTED) { 790 bswlist.b_flags &= ~B_WANTED; 791 wakeup(&bswlist); 792 } 793 if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { 794 m->flags |= PG_CLEAN; 795 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 796 } 797 splx(s); 798 #ifdef DEBUG 799 if (swpagerdebug & SDB_IO) 800 printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); 801 if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR) 802 printf("swpg_io: IO error\n"); 803 #endif 804 vm_pager_unmap_pages(kva, npages); 805 return(rv); 806 } 807 808 static void 809 swap_pager_clean(rw) 810 int rw; 811 { 812 register swp_clean_t spc; 813 register int s, i; 814 vm_object_t object; 815 vm_page_t m; 816 817 #ifdef DEBUG 818 /* save panic time state */ 819 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 820 return; 821 if (swpagerdebug & SDB_FOLLOW) 822 printf("swpg_clean(%x)\n", rw); 823 #endif 824 825 for (;;) { 826 /* 827 * Look up and removal from inuse list must be done 828 * at splbio() to avoid conflicts with swap_pager_iodone. 829 */ 830 s = splbio(); 831 for (spc = swap_pager_inuse.tqh_first; 832 spc != NULL; 833 spc = spc->spc_list.tqe_next) { 834 /* 835 * If the operation is done, remove it from the 836 * list and process it. 837 * 838 * XXX if we can't get the object lock we also 839 * leave it on the list and try again later. 840 * Is there something better we could do? 841 */ 842 if ((spc->spc_flags & SPC_DONE) && 843 vm_object_lock_try(spc->spc_m->object)) { 844 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 845 break; 846 } 847 } 848 splx(s); 849 850 /* 851 * No operations done, thats all we can do for now. 852 */ 853 if (spc == NULL) 854 break; 855 856 /* 857 * Found a completed operation so finish it off. 858 * Note: no longer at splbio since entry is off the list. 859 */ 860 m = spc->spc_m; 861 object = m->object; 862 863 /* 864 * Process each page in the cluster. 865 * The first page is explicitly kept in the cleaning 866 * entry, others must be reconstructed from the KVA. 867 */ 868 for (i = 0; i < spc->spc_npages; i++) { 869 if (i) 870 m = vm_pager_atop(spc->spc_kva + ptoa(i)); 871 /* 872 * If no error mark as clean and inform the pmap 873 * system. If there was an error, mark as dirty 874 * so we will try again. 875 * 876 * XXX could get stuck doing this, should give up 877 * after awhile. 878 */ 879 if (spc->spc_flags & SPC_ERROR) { 880 printf("%s: clean of page %x failed\n", 881 "swap_pager_clean", 882 VM_PAGE_TO_PHYS(m)); 883 m->flags |= PG_LAUNDRY; 884 } else { 885 m->flags |= PG_CLEAN; 886 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 887 } 888 m->flags &= ~PG_BUSY; 889 PAGE_WAKEUP(m); 890 } 891 892 /* 893 * Done with the object, decrement the paging count 894 * and unlock it. 895 */ 896 if (--object->paging_in_progress == 0) 897 wakeup(object); 898 vm_object_unlock(object); 899 900 /* 901 * Free up KVM used and put the entry back on the list. 902 */ 903 vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages); 904 spc->spc_flags = SPC_FREE; 905 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 906 #ifdef DEBUG 907 if (swpagerdebug & SDB_WRITE) 908 printf("swpg_clean: free spc %x\n", spc); 909 #endif 910 } 911 } 912 913 #ifdef DEBUG 914 static void 915 swap_pager_clean_check(mlist, npages, rw) 916 vm_page_t *mlist; 917 int npages; 918 int rw; 919 { 920 register swp_clean_t spc; 921 boolean_t bad; 922 int i, j, s; 923 vm_page_t m; 924 925 if (panicstr) 926 return; 927 928 bad = FALSE; 929 s = splbio(); 930 for (spc = swap_pager_inuse.tqh_first; 931 spc != NULL; 932 spc = spc->spc_list.tqe_next) { 933 for (j = 0; j < spc->spc_npages; j++) { 934 m = vm_pager_atop(spc->spc_kva + ptoa(j)); 935 for (i = 0; i < npages; i++) 936 if (m == mlist[i]) { 937 if (swpagerdebug & SDB_ANOM) 938 printf( 939 "swpg_clean_check: %s: page %x on list, flags %x\n", 940 rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags); 941 bad = TRUE; 942 } 943 } 944 } 945 splx(s); 946 if (bad) 947 panic("swpg_clean_check"); 948 } 949 #endif 950 951 static void 952 swap_pager_iodone(bp) 953 register struct buf *bp; 954 { 955 register swp_clean_t spc; 956 daddr_t blk; 957 int s; 958 959 #ifdef DEBUG 960 /* save panic time state */ 961 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 962 return; 963 if (swpagerdebug & SDB_FOLLOW) 964 printf("swpg_iodone(%x)\n", bp); 965 #endif 966 s = splbio(); 967 for (spc = swap_pager_inuse.tqh_first; 968 spc != NULL; 969 spc = spc->spc_list.tqe_next) 970 if (spc->spc_bp == bp) 971 break; 972 #ifdef DEBUG 973 if (spc == NULL) 974 panic("swap_pager_iodone: bp not found"); 975 #endif 976 977 spc->spc_flags &= ~SPC_BUSY; 978 spc->spc_flags |= SPC_DONE; 979 if (bp->b_flags & B_ERROR) 980 spc->spc_flags |= SPC_ERROR; 981 spc->spc_bp = NULL; 982 blk = bp->b_blkno; 983 984 #ifdef DEBUG 985 --swap_pager_poip; 986 if (swpagerdebug & SDB_WRITE) 987 printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", 988 bp, spc->spc_swp, spc->spc_swp->sw_flags, 989 spc, spc->spc_swp->sw_poip); 990 #endif 991 992 spc->spc_swp->sw_poip--; 993 if (spc->spc_swp->sw_flags & SW_WANTED) { 994 spc->spc_swp->sw_flags &= ~SW_WANTED; 995 wakeup(spc->spc_swp); 996 } 997 998 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 999 bp->b_actf = bswlist.b_actf; 1000 bswlist.b_actf = bp; 1001 if (bp->b_vp) 1002 brelvp(bp); 1003 if (bswlist.b_flags & B_WANTED) { 1004 bswlist.b_flags &= ~B_WANTED; 1005 wakeup(&bswlist); 1006 } 1007 wakeup(&vm_pages_needed); 1008 splx(s); 1009 } 1010