1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42 * 43 * 44 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45 * All rights reserved. 46 * 47 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 * 69 * $Id: vm_fault.c,v 1.3 1994/08/02 07:55:18 davidg Exp $ 70 */ 71 72 /* 73 * Page fault handling module. 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/resourcevar.h> 80 81 #include <vm/vm.h> 82 #include <vm/vm_page.h> 83 #include <vm/vm_pageout.h> 84 85 86 #define VM_FAULT_READ_AHEAD 4 87 #define VM_FAULT_READ_AHEAD_MIN 1 88 #define VM_FAULT_READ_BEHIND 3 89 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 90 extern int swap_pager_full; 91 extern int vm_pageout_proc_limit; 92 93 /* 94 * vm_fault: 95 * 96 * Handle a page fault occuring at the given address, 97 * requiring the given permissions, in the map specified. 98 * If successful, the page is inserted into the 99 * associated physical map. 100 * 101 * NOTE: the given address should be truncated to the 102 * proper page address. 103 * 104 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 105 * a standard error specifying why the fault is fatal is returned. 106 * 107 * 108 * The map in question must be referenced, and remains so. 109 * Caller may hold no locks. 110 */ 111 int 112 vm_fault(map, vaddr, fault_type, change_wiring) 113 vm_map_t map; 114 vm_offset_t vaddr; 115 vm_prot_t fault_type; 116 boolean_t change_wiring; 117 { 118 vm_object_t first_object; 119 vm_offset_t first_offset; 120 vm_map_entry_t entry; 121 register vm_object_t object; 122 register vm_offset_t offset; 123 vm_page_t m; 124 vm_page_t first_m; 125 vm_prot_t prot; 126 int result; 127 boolean_t wired; 128 boolean_t su; 129 boolean_t lookup_still_valid; 130 boolean_t page_exists; 131 vm_page_t old_m; 132 vm_object_t next_object; 133 vm_page_t marray[VM_FAULT_READ]; 134 int reqpage; 135 int spl; 136 int hardfault=0; 137 138 cnt.v_faults++; /* needs lock XXX */ 139 /* 140 * Recovery actions 141 */ 142 #define FREE_PAGE(m) { \ 143 PAGE_WAKEUP(m); \ 144 vm_page_lock_queues(); \ 145 vm_page_free(m); \ 146 vm_page_unlock_queues(); \ 147 } 148 149 #define RELEASE_PAGE(m) { \ 150 PAGE_WAKEUP(m); \ 151 vm_page_lock_queues(); \ 152 vm_page_activate(m); \ 153 vm_page_unlock_queues(); \ 154 } 155 156 #define UNLOCK_MAP { \ 157 if (lookup_still_valid) { \ 158 vm_map_lookup_done(map, entry); \ 159 lookup_still_valid = FALSE; \ 160 } \ 161 } 162 163 #define UNLOCK_THINGS { \ 164 object->paging_in_progress--; \ 165 if (object->paging_in_progress == 0) \ 166 wakeup((caddr_t)object); \ 167 vm_object_unlock(object); \ 168 if (object != first_object) { \ 169 vm_object_lock(first_object); \ 170 FREE_PAGE(first_m); \ 171 first_object->paging_in_progress--; \ 172 if (first_object->paging_in_progress == 0) \ 173 wakeup((caddr_t)first_object); \ 174 vm_object_unlock(first_object); \ 175 } \ 176 UNLOCK_MAP; \ 177 } 178 179 #define UNLOCK_AND_DEALLOCATE { \ 180 UNLOCK_THINGS; \ 181 vm_object_deallocate(first_object); \ 182 } 183 184 185 RetryFault: ; 186 187 /* 188 * Find the backing store object and offset into 189 * it to begin the search. 190 */ 191 192 if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, 193 &first_object, &first_offset, 194 &prot, &wired, &su)) != KERN_SUCCESS) { 195 return(result); 196 } 197 lookup_still_valid = TRUE; 198 199 if (wired) 200 fault_type = prot; 201 202 first_m = NULL; 203 204 /* 205 * Make a reference to this object to 206 * prevent its disposal while we are messing with 207 * it. Once we have the reference, the map is free 208 * to be diddled. Since objects reference their 209 * shadows (and copies), they will stay around as well. 210 */ 211 212 vm_object_lock(first_object); 213 214 first_object->ref_count++; 215 first_object->paging_in_progress++; 216 217 /* 218 * INVARIANTS (through entire routine): 219 * 220 * 1) At all times, we must either have the object 221 * lock or a busy page in some object to prevent 222 * some other thread from trying to bring in 223 * the same page. 224 * 225 * Note that we cannot hold any locks during the 226 * pager access or when waiting for memory, so 227 * we use a busy page then. 228 * 229 * Note also that we aren't as concerned about 230 * more than one thead attempting to pager_data_unlock 231 * the same page at once, so we don't hold the page 232 * as busy then, but do record the highest unlock 233 * value so far. [Unlock requests may also be delivered 234 * out of order.] 235 * 236 * 2) Once we have a busy page, we must remove it from 237 * the pageout queues, so that the pageout daemon 238 * will not grab it away. 239 * 240 * 3) To prevent another thread from racing us down the 241 * shadow chain and entering a new page in the top 242 * object before we do, we must keep a busy page in 243 * the top object while following the shadow chain. 244 * 245 * 4) We must increment paging_in_progress on any object 246 * for which we have a busy page, to prevent 247 * vm_object_collapse from removing the busy page 248 * without our noticing. 249 */ 250 251 /* 252 * Search for the page at object/offset. 253 */ 254 255 object = first_object; 256 offset = first_offset; 257 258 /* 259 * See whether this page is resident 260 */ 261 262 while (TRUE) { 263 m = vm_page_lookup(object, offset); 264 if (m != NULL) { 265 /* 266 * If the page is being brought in, 267 * wait for it and then retry. 268 */ 269 if (m->flags & PG_BUSY) { 270 int s; 271 UNLOCK_THINGS; 272 s = splhigh(); 273 if (m->flags & PG_BUSY) { 274 m->flags |= PG_WANTED; 275 tsleep((caddr_t)m,PSWP,"vmpfw",0); 276 } 277 splx(s); 278 vm_object_deallocate(first_object); 279 goto RetryFault; 280 } 281 282 /* 283 * Remove the page from the pageout daemon's 284 * reach while we play with it. 285 */ 286 287 vm_page_lock_queues(); 288 spl = splhigh(); 289 if (m->flags & PG_INACTIVE) { 290 TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); 291 m->flags &= ~PG_INACTIVE; 292 cnt.v_inactive_count--; 293 cnt.v_reactivated++; 294 } 295 296 if (m->flags & PG_ACTIVE) { 297 TAILQ_REMOVE(&vm_page_queue_active, m, pageq); 298 m->flags &= ~PG_ACTIVE; 299 cnt.v_active_count--; 300 } 301 splx(spl); 302 vm_page_unlock_queues(); 303 304 /* 305 * Mark page busy for other threads. 306 */ 307 m->flags |= PG_BUSY; 308 break; 309 } 310 311 if (((object->pager != NULL) && 312 (!change_wiring || wired)) 313 || (object == first_object)) { 314 315 #if 0 316 if (curproc && (vaddr < VM_MAXUSER_ADDRESS) && 317 (curproc->p_rlimit[RLIMIT_RSS].rlim_max < 318 curproc->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG)) { 319 UNLOCK_AND_DEALLOCATE; 320 vm_fault_free_pages(curproc); 321 goto RetryFault; 322 } 323 #endif 324 325 if (swap_pager_full && !object->shadow && (!object->pager || 326 (object->pager && object->pager->pg_type == PG_SWAP && 327 !vm_pager_has_page(object->pager, offset+object->paging_offset)))) { 328 if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) /* XXX */ { 329 printf("Process %d killed by vm_fault -- out of swap\n", curproc->p_pid); 330 psignal(curproc, SIGKILL); 331 curproc->p_estcpu = 0; 332 curproc->p_nice = PRIO_MIN; 333 setpriority(curproc); 334 } 335 } 336 337 /* 338 * Allocate a new page for this object/offset 339 * pair. 340 */ 341 342 m = vm_page_alloc(object, offset); 343 344 if (m == NULL) { 345 UNLOCK_AND_DEALLOCATE; 346 VM_WAIT; 347 goto RetryFault; 348 } 349 } 350 351 if (object->pager != NULL && (!change_wiring || wired)) { 352 int rv; 353 int faultcount; 354 int reqpage; 355 356 /* 357 * Now that we have a busy page, we can 358 * release the object lock. 359 */ 360 vm_object_unlock(object); 361 /* 362 * now we find out if any other pages should 363 * be paged in at this time 364 * this routine checks to see if the pages surrounding this fault 365 * reside in the same object as the page for this fault. If 366 * they do, then they are faulted in also into the 367 * object. The array "marray" returned contains an array of 368 * vm_page_t structs where one of them is the vm_page_t passed to 369 * the routine. The reqpage return value is the index into the 370 * marray for the vm_page_t passed to the routine. 371 */ 372 cnt.v_pageins++; 373 faultcount = vm_fault_additional_pages(first_object, first_offset, 374 m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, marray, &reqpage); 375 376 /* 377 * Call the pager to retrieve the data, if any, 378 * after releasing the lock on the map. 379 */ 380 UNLOCK_MAP; 381 382 rv = faultcount ? 383 vm_pager_get_pages(object->pager, 384 marray, faultcount, reqpage, TRUE): VM_PAGER_FAIL; 385 if (rv == VM_PAGER_OK) { 386 /* 387 * Found the page. 388 * Leave it busy while we play with it. 389 */ 390 vm_object_lock(object); 391 392 /* 393 * Relookup in case pager changed page. 394 * Pager is responsible for disposition 395 * of old page if moved. 396 */ 397 m = vm_page_lookup(object, offset); 398 399 cnt.v_pgpgin++; 400 m->flags &= ~PG_FAKE; 401 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 402 hardfault++; 403 break; 404 } 405 406 /* 407 * Remove the bogus page (which does not 408 * exist at this object/offset); before 409 * doing so, we must get back our object 410 * lock to preserve our invariant. 411 * 412 * Also wake up any other thread that may want 413 * to bring in this page. 414 * 415 * If this is the top-level object, we must 416 * leave the busy page to prevent another 417 * thread from rushing past us, and inserting 418 * the page in that object at the same time 419 * that we are. 420 */ 421 422 vm_object_lock(object); 423 /* 424 * Data outside the range of the pager; an error 425 */ 426 if ((rv == VM_PAGER_ERROR) || (rv == VM_PAGER_BAD)) { 427 FREE_PAGE(m); 428 UNLOCK_AND_DEALLOCATE; 429 return(KERN_PROTECTION_FAILURE); /* XXX */ 430 } 431 if (object != first_object) { 432 FREE_PAGE(m); 433 /* 434 * XXX - we cannot just fall out at this 435 * point, m has been freed and is invalid! 436 */ 437 } 438 } 439 440 /* 441 * We get here if the object has no pager (or unwiring) 442 * or the pager doesn't have the page. 443 */ 444 if (object == first_object) 445 first_m = m; 446 447 /* 448 * Move on to the next object. Lock the next 449 * object before unlocking the current one. 450 */ 451 452 offset += object->shadow_offset; 453 next_object = object->shadow; 454 if (next_object == NULL) { 455 /* 456 * If there's no object left, fill the page 457 * in the top object with zeros. 458 */ 459 if (object != first_object) { 460 object->paging_in_progress--; 461 if (object->paging_in_progress == 0) 462 wakeup((caddr_t) object); 463 vm_object_unlock(object); 464 465 object = first_object; 466 offset = first_offset; 467 m = first_m; 468 vm_object_lock(object); 469 } 470 first_m = NULL; 471 472 vm_page_zero_fill(m); 473 cnt.v_zfod++; 474 m->flags &= ~PG_FAKE; 475 break; 476 } 477 else { 478 vm_object_lock(next_object); 479 if (object != first_object) { 480 object->paging_in_progress--; 481 if (object->paging_in_progress == 0) 482 wakeup((caddr_t) object); 483 } 484 vm_object_unlock(object); 485 object = next_object; 486 object->paging_in_progress++; 487 } 488 } 489 490 if ((m->flags & (PG_ACTIVE|PG_INACTIVE) != 0) || 491 (m->flags & PG_BUSY) == 0) 492 panic("vm_fault: absent or active or inactive or not busy after main loop"); 493 494 /* 495 * PAGE HAS BEEN FOUND. 496 * [Loop invariant still holds -- the object lock 497 * is held.] 498 */ 499 500 old_m = m; /* save page that would be copied */ 501 502 /* 503 * If the page is being written, but isn't 504 * already owned by the top-level object, 505 * we have to copy it into a new page owned 506 * by the top-level object. 507 */ 508 509 if (object != first_object) { 510 /* 511 * We only really need to copy if we 512 * want to write it. 513 */ 514 515 if (fault_type & VM_PROT_WRITE) { 516 517 /* 518 * If we try to collapse first_object at this 519 * point, we may deadlock when we try to get 520 * the lock on an intermediate object (since we 521 * have the bottom object locked). We can't 522 * unlock the bottom object, because the page 523 * we found may move (by collapse) if we do. 524 * 525 * Instead, we first copy the page. Then, when 526 * we have no more use for the bottom object, 527 * we unlock it and try to collapse. 528 * 529 * Note that we copy the page even if we didn't 530 * need to... that's the breaks. 531 */ 532 533 /* 534 * We already have an empty page in 535 * first_object - use it. 536 */ 537 538 vm_page_copy(m, first_m); 539 first_m->flags &= ~PG_FAKE; 540 541 /* 542 * If another map is truly sharing this 543 * page with us, we have to flush all 544 * uses of the original page, since we 545 * can't distinguish those which want the 546 * original from those which need the 547 * new copy. 548 * 549 * XXX If we know that only one map has 550 * access to this page, then we could 551 * avoid the pmap_page_protect() call. 552 */ 553 554 vm_page_lock_queues(); 555 556 vm_page_activate(m); 557 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 558 if ((m->flags & PG_CLEAN) == 0) 559 m->flags |= PG_LAUNDRY; 560 vm_page_unlock_queues(); 561 562 /* 563 * We no longer need the old page or object. 564 */ 565 PAGE_WAKEUP(m); 566 object->paging_in_progress--; 567 if (object->paging_in_progress == 0) 568 wakeup((caddr_t) object); 569 vm_object_unlock(object); 570 571 /* 572 * Only use the new page below... 573 */ 574 575 cnt.v_cow_faults++; 576 m = first_m; 577 object = first_object; 578 offset = first_offset; 579 580 /* 581 * Now that we've gotten the copy out of the 582 * way, let's try to collapse the top object. 583 */ 584 vm_object_lock(object); 585 /* 586 * But we have to play ugly games with 587 * paging_in_progress to do that... 588 */ 589 object->paging_in_progress--; 590 if (object->paging_in_progress == 0) 591 wakeup((caddr_t) object); 592 vm_object_collapse(object); 593 object->paging_in_progress++; 594 } 595 else { 596 prot &= ~VM_PROT_WRITE; 597 m->flags |= PG_COPYONWRITE; 598 } 599 } 600 601 if (m->flags & (PG_ACTIVE|PG_INACTIVE)) 602 panic("vm_fault: active or inactive before copy object handling"); 603 604 /* 605 * If the page is being written, but hasn't been 606 * copied to the copy-object, we have to copy it there. 607 */ 608 RetryCopy: 609 if (first_object->copy != NULL) { 610 vm_object_t copy_object = first_object->copy; 611 vm_offset_t copy_offset; 612 vm_page_t copy_m; 613 614 /* 615 * We only need to copy if we want to write it. 616 */ 617 if ((fault_type & VM_PROT_WRITE) == 0) { 618 prot &= ~VM_PROT_WRITE; 619 m->flags |= PG_COPYONWRITE; 620 } 621 else { 622 /* 623 * Try to get the lock on the copy_object. 624 */ 625 if (!vm_object_lock_try(copy_object)) { 626 vm_object_unlock(object); 627 /* should spin a bit here... */ 628 vm_object_lock(object); 629 goto RetryCopy; 630 } 631 632 /* 633 * Make another reference to the copy-object, 634 * to keep it from disappearing during the 635 * copy. 636 */ 637 copy_object->ref_count++; 638 639 /* 640 * Does the page exist in the copy? 641 */ 642 copy_offset = first_offset 643 - copy_object->shadow_offset; 644 copy_m = vm_page_lookup(copy_object, copy_offset); 645 if (page_exists = (copy_m != NULL)) { 646 if (copy_m->flags & PG_BUSY) { 647 /* 648 * If the page is being brought 649 * in, wait for it and then retry. 650 */ 651 PAGE_ASSERT_WAIT(copy_m, !change_wiring); 652 RELEASE_PAGE(m); 653 copy_object->ref_count--; 654 vm_object_unlock(copy_object); 655 UNLOCK_THINGS; 656 thread_block("fltcpy"); 657 vm_object_deallocate(first_object); 658 goto RetryFault; 659 } 660 } 661 662 /* 663 * If the page is not in memory (in the object) 664 * and the object has a pager, we have to check 665 * if the pager has the data in secondary 666 * storage. 667 */ 668 if (!page_exists) { 669 670 /* 671 * If we don't allocate a (blank) page 672 * here... another thread could try 673 * to page it in, allocate a page, and 674 * then block on the busy page in its 675 * shadow (first_object). Then we'd 676 * trip over the busy page after we 677 * found that the copy_object's pager 678 * doesn't have the page... 679 */ 680 copy_m = vm_page_alloc(copy_object, copy_offset); 681 if (copy_m == NULL) { 682 /* 683 * Wait for a page, then retry. 684 */ 685 RELEASE_PAGE(m); 686 copy_object->ref_count--; 687 vm_object_unlock(copy_object); 688 UNLOCK_AND_DEALLOCATE; 689 VM_WAIT; 690 goto RetryFault; 691 } 692 693 if (copy_object->pager != NULL) { 694 vm_object_unlock(object); 695 vm_object_unlock(copy_object); 696 UNLOCK_MAP; 697 698 page_exists = vm_pager_has_page( 699 copy_object->pager, 700 (copy_offset + copy_object->paging_offset)); 701 702 vm_object_lock(copy_object); 703 704 /* 705 * Since the map is unlocked, someone 706 * else could have copied this object 707 * and put a different copy_object 708 * between the two. Or, the last 709 * reference to the copy-object (other 710 * than the one we have) may have 711 * disappeared - if that has happened, 712 * we don't need to make the copy. 713 */ 714 if (copy_object->shadow != object || 715 copy_object->ref_count == 1) { 716 /* 717 * Gaah... start over! 718 */ 719 FREE_PAGE(copy_m); 720 vm_object_unlock(copy_object); 721 vm_object_deallocate(copy_object); 722 /* may block */ 723 vm_object_lock(object); 724 goto RetryCopy; 725 } 726 vm_object_lock(object); 727 728 if (page_exists) { 729 /* 730 * We didn't need the page 731 */ 732 FREE_PAGE(copy_m); 733 } 734 } 735 } 736 if (!page_exists) { 737 /* 738 * Must copy page into copy-object. 739 */ 740 vm_page_copy(m, copy_m); 741 copy_m->flags &= ~PG_FAKE; 742 743 /* 744 * Things to remember: 745 * 1. The copied page must be marked 'dirty' 746 * so it will be paged out to the copy 747 * object. 748 * 2. If the old page was in use by any users 749 * of the copy-object, it must be removed 750 * from all pmaps. (We can't know which 751 * pmaps use it.) 752 */ 753 vm_page_lock_queues(); 754 755 vm_page_activate(old_m); 756 757 758 pmap_page_protect(VM_PAGE_TO_PHYS(old_m), 759 VM_PROT_NONE); 760 if ((old_m->flags & PG_CLEAN) == 0) 761 old_m->flags |= PG_LAUNDRY; 762 copy_m->flags &= ~PG_CLEAN; 763 vm_page_activate(copy_m); 764 vm_page_unlock_queues(); 765 766 PAGE_WAKEUP(copy_m); 767 } 768 /* 769 * The reference count on copy_object must be 770 * at least 2: one for our extra reference, 771 * and at least one from the outside world 772 * (we checked that when we last locked 773 * copy_object). 774 */ 775 copy_object->ref_count--; 776 vm_object_unlock(copy_object); 777 m->flags &= ~PG_COPYONWRITE; 778 } 779 } 780 781 if (m->flags & (PG_ACTIVE | PG_INACTIVE)) 782 panic("vm_fault: active or inactive before retrying lookup"); 783 784 /* 785 * We must verify that the maps have not changed 786 * since our last lookup. 787 */ 788 789 if (!lookup_still_valid) { 790 vm_object_t retry_object; 791 vm_offset_t retry_offset; 792 vm_prot_t retry_prot; 793 794 /* 795 * Since map entries may be pageable, make sure we can 796 * take a page fault on them. 797 */ 798 vm_object_unlock(object); 799 800 /* 801 * To avoid trying to write_lock the map while another 802 * thread has it read_locked (in vm_map_pageable), we 803 * do not try for write permission. If the page is 804 * still writable, we will get write permission. If it 805 * is not, or has been marked needs_copy, we enter the 806 * mapping without write permission, and will merely 807 * take another fault. 808 */ 809 result = vm_map_lookup(&map, vaddr, 810 fault_type & ~VM_PROT_WRITE, &entry, 811 &retry_object, &retry_offset, &retry_prot, 812 &wired, &su); 813 814 vm_object_lock(object); 815 816 /* 817 * If we don't need the page any longer, put it on the 818 * active list (the easiest thing to do here). If no 819 * one needs it, pageout will grab it eventually. 820 */ 821 822 if (result != KERN_SUCCESS) { 823 RELEASE_PAGE(m); 824 UNLOCK_AND_DEALLOCATE; 825 return(result); 826 } 827 828 lookup_still_valid = TRUE; 829 830 if ((retry_object != first_object) || 831 (retry_offset != first_offset)) { 832 RELEASE_PAGE(m); 833 UNLOCK_AND_DEALLOCATE; 834 goto RetryFault; 835 } 836 837 /* 838 * Check whether the protection has changed or the object 839 * has been copied while we left the map unlocked. 840 * Changing from read to write permission is OK - we leave 841 * the page write-protected, and catch the write fault. 842 * Changing from write to read permission means that we 843 * can't mark the page write-enabled after all. 844 */ 845 prot &= retry_prot; 846 if (m->flags & PG_COPYONWRITE) 847 prot &= ~VM_PROT_WRITE; 848 } 849 850 /* 851 * (the various bits we're fiddling with here are locked by 852 * the object's lock) 853 */ 854 855 /* XXX This distorts the meaning of the copy_on_write bit */ 856 857 if (prot & VM_PROT_WRITE) 858 m->flags &= ~PG_COPYONWRITE; 859 860 /* 861 * It's critically important that a wired-down page be faulted 862 * only once in each map for which it is wired. 863 */ 864 865 if (m->flags & (PG_ACTIVE | PG_INACTIVE)) 866 panic("vm_fault: active or inactive before pmap_enter"); 867 868 vm_object_unlock(object); 869 870 /* 871 * Put this page into the physical map. 872 * We had to do the unlock above because pmap_enter 873 * may cause other faults. We don't put the 874 * page back on the active queue until later so 875 * that the page-out daemon won't find us (yet). 876 */ 877 878 pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 879 880 /* 881 * If the page is not wired down, then put it where the 882 * pageout daemon can find it. 883 */ 884 vm_object_lock(object); 885 vm_page_lock_queues(); 886 if (change_wiring) { 887 if (wired) 888 vm_page_wire(m); 889 else 890 vm_page_unwire(m); 891 } 892 else { 893 vm_page_activate(m); 894 } 895 896 if( curproc && curproc->p_stats) { 897 if (hardfault) { 898 curproc->p_stats->p_ru.ru_majflt++; 899 } else { 900 curproc->p_stats->p_ru.ru_minflt++; 901 } 902 } 903 904 vm_page_unlock_queues(); 905 906 /* 907 * Unlock everything, and return 908 */ 909 910 PAGE_WAKEUP(m); 911 UNLOCK_AND_DEALLOCATE; 912 913 return(KERN_SUCCESS); 914 915 } 916 917 /* 918 * vm_fault_wire: 919 * 920 * Wire down a range of virtual addresses in a map. 921 */ 922 int 923 vm_fault_wire(map, start, end) 924 vm_map_t map; 925 vm_offset_t start, end; 926 { 927 928 register vm_offset_t va; 929 register pmap_t pmap; 930 int rv; 931 932 pmap = vm_map_pmap(map); 933 934 /* 935 * Inform the physical mapping system that the 936 * range of addresses may not fault, so that 937 * page tables and such can be locked down as well. 938 */ 939 940 pmap_pageable(pmap, start, end, FALSE); 941 942 /* 943 * We simulate a fault to get the page and enter it 944 * in the physical map. 945 */ 946 947 for (va = start; va < end; va += PAGE_SIZE) { 948 rv = vm_fault(map, va, VM_PROT_NONE, TRUE); 949 if (rv) { 950 if (va != start) 951 vm_fault_unwire(map, start, va); 952 return(rv); 953 } 954 } 955 return(KERN_SUCCESS); 956 } 957 958 959 /* 960 * vm_fault_unwire: 961 * 962 * Unwire a range of virtual addresses in a map. 963 */ 964 void 965 vm_fault_unwire(map, start, end) 966 vm_map_t map; 967 vm_offset_t start, end; 968 { 969 970 register vm_offset_t va, pa; 971 register pmap_t pmap; 972 973 pmap = vm_map_pmap(map); 974 975 /* 976 * Since the pages are wired down, we must be able to 977 * get their mappings from the physical map system. 978 */ 979 980 vm_page_lock_queues(); 981 982 for (va = start; va < end; va += PAGE_SIZE) { 983 pa = pmap_extract(pmap, va); 984 if (pa == (vm_offset_t) 0) { 985 panic("unwire: page not in pmap"); 986 } 987 pmap_change_wiring(pmap, va, FALSE); 988 vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 989 } 990 vm_page_unlock_queues(); 991 992 /* 993 * Inform the physical mapping system that the range 994 * of addresses may fault, so that page tables and 995 * such may be unwired themselves. 996 */ 997 998 pmap_pageable(pmap, start, end, TRUE); 999 1000 } 1001 1002 /* 1003 * Routine: 1004 * vm_fault_copy_entry 1005 * Function: 1006 * Copy all of the pages from a wired-down map entry to another. 1007 * 1008 * In/out conditions: 1009 * The source and destination maps must be locked for write. 1010 * The source map entry must be wired down (or be a sharing map 1011 * entry corresponding to a main map entry that is wired down). 1012 */ 1013 1014 void 1015 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 1016 vm_map_t dst_map; 1017 vm_map_t src_map; 1018 vm_map_entry_t dst_entry; 1019 vm_map_entry_t src_entry; 1020 { 1021 vm_object_t dst_object; 1022 vm_object_t src_object; 1023 vm_offset_t dst_offset; 1024 vm_offset_t src_offset; 1025 vm_prot_t prot; 1026 vm_offset_t vaddr; 1027 vm_page_t dst_m; 1028 vm_page_t src_m; 1029 1030 #ifdef lint 1031 src_map++; 1032 #endif lint 1033 1034 src_object = src_entry->object.vm_object; 1035 src_offset = src_entry->offset; 1036 1037 /* 1038 * Create the top-level object for the destination entry. 1039 * (Doesn't actually shadow anything - we copy the pages 1040 * directly.) 1041 */ 1042 dst_object = vm_object_allocate( 1043 (vm_size_t) (dst_entry->end - dst_entry->start)); 1044 1045 dst_entry->object.vm_object = dst_object; 1046 dst_entry->offset = 0; 1047 1048 prot = dst_entry->max_protection; 1049 1050 /* 1051 * Loop through all of the pages in the entry's range, copying 1052 * each one from the source object (it should be there) to the 1053 * destination object. 1054 */ 1055 for (vaddr = dst_entry->start, dst_offset = 0; 1056 vaddr < dst_entry->end; 1057 vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 1058 1059 /* 1060 * Allocate a page in the destination object 1061 */ 1062 vm_object_lock(dst_object); 1063 do { 1064 dst_m = vm_page_alloc(dst_object, dst_offset); 1065 if (dst_m == NULL) { 1066 vm_object_unlock(dst_object); 1067 VM_WAIT; 1068 vm_object_lock(dst_object); 1069 } 1070 } while (dst_m == NULL); 1071 1072 /* 1073 * Find the page in the source object, and copy it in. 1074 * (Because the source is wired down, the page will be 1075 * in memory.) 1076 */ 1077 vm_object_lock(src_object); 1078 src_m = vm_page_lookup(src_object, dst_offset + src_offset); 1079 if (src_m == NULL) 1080 panic("vm_fault_copy_wired: page missing"); 1081 1082 vm_page_copy(src_m, dst_m); 1083 1084 /* 1085 * Enter it in the pmap... 1086 */ 1087 vm_object_unlock(src_object); 1088 vm_object_unlock(dst_object); 1089 1090 pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1091 prot, FALSE); 1092 1093 /* 1094 * Mark it no longer busy, and put it on the active list. 1095 */ 1096 vm_object_lock(dst_object); 1097 vm_page_lock_queues(); 1098 vm_page_activate(dst_m); 1099 vm_page_unlock_queues(); 1100 PAGE_WAKEUP(dst_m); 1101 vm_object_unlock(dst_object); 1102 } 1103 } 1104 1105 1106 /* 1107 * looks page up in shadow chain 1108 */ 1109 1110 int 1111 vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm) 1112 vm_object_t object; 1113 vm_offset_t offset; 1114 vm_object_t *rtobject; 1115 vm_offset_t *rtoffset; 1116 vm_page_t *rtm; 1117 { 1118 vm_page_t m; 1119 vm_object_t first_object = object; 1120 1121 *rtm = 0; 1122 *rtobject = 0; 1123 *rtoffset = 0; 1124 1125 1126 while (!(m=vm_page_lookup(object, offset))) { 1127 if (object->pager) { 1128 if (vm_pager_has_page(object->pager, object->paging_offset+offset)) { 1129 *rtobject = object; 1130 *rtoffset = offset; 1131 return 1; 1132 } 1133 } 1134 1135 if (!object->shadow) 1136 return 0; 1137 else { 1138 offset += object->shadow_offset; 1139 object = object->shadow; 1140 } 1141 } 1142 *rtobject = object; 1143 *rtoffset = offset; 1144 *rtm = m; 1145 return 1; 1146 } 1147 1148 /* 1149 * This routine checks around the requested page for other pages that 1150 * might be able to be faulted in. 1151 * 1152 * Inputs: 1153 * first_object, first_offset, m, rbehind, rahead 1154 * 1155 * Outputs: 1156 * marray (array of vm_page_t), reqpage (index of requested page) 1157 * 1158 * Return value: 1159 * number of pages in marray 1160 */ 1161 int 1162 vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marray, reqpage) 1163 vm_object_t first_object; 1164 vm_offset_t first_offset; 1165 vm_page_t m; 1166 int rbehind; 1167 int raheada; 1168 vm_page_t *marray; 1169 int *reqpage; 1170 { 1171 int i; 1172 vm_page_t tmpm; 1173 vm_object_t object; 1174 vm_offset_t offset, startoffset, endoffset, toffset, size; 1175 vm_object_t rtobject; 1176 vm_page_t rtm; 1177 vm_offset_t rtoffset; 1178 vm_offset_t offsetdiff; 1179 int rahead; 1180 int treqpage; 1181 1182 object = m->object; 1183 offset = m->offset; 1184 1185 offsetdiff = offset - first_offset; 1186 1187 /* 1188 * if the requested page is not available, then give up now 1189 */ 1190 1191 if (!vm_pager_has_page(object->pager, object->paging_offset+offset)) 1192 return 0; 1193 1194 /* 1195 * if there is no getmulti routine for this pager, then just allow 1196 * one page to be read. 1197 */ 1198 /* 1199 if (!object->pager->pg_ops->pgo_getpages) { 1200 *reqpage = 0; 1201 marray[0] = m; 1202 return 1; 1203 } 1204 */ 1205 1206 /* 1207 * try to do any readahead that we might have free pages for. 1208 */ 1209 rahead = raheada; 1210 if (rahead > (cnt.v_free_count - cnt.v_free_reserved)) { 1211 rahead = cnt.v_free_count - cnt.v_free_reserved; 1212 rbehind = 0; 1213 } 1214 1215 if (cnt.v_free_count < cnt.v_free_min) { 1216 if (rahead > VM_FAULT_READ_AHEAD_MIN) 1217 rahead = VM_FAULT_READ_AHEAD_MIN; 1218 rbehind = 0; 1219 } 1220 1221 /* 1222 * if we don't have any free pages, then just read one page. 1223 */ 1224 if (rahead <= 0) { 1225 *reqpage = 0; 1226 marray[0] = m; 1227 return 1; 1228 } 1229 1230 /* 1231 * scan backward for the read behind pages -- 1232 * in memory or on disk not in same object 1233 */ 1234 toffset = offset - NBPG; 1235 if( rbehind*NBPG > offset) 1236 rbehind = offset / NBPG; 1237 startoffset = offset - rbehind*NBPG; 1238 while (toffset >= startoffset) { 1239 if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 1240 rtm != 0 || rtobject != object) { 1241 startoffset = toffset + NBPG; 1242 break; 1243 } 1244 if( toffset == 0) 1245 break; 1246 toffset -= NBPG; 1247 } 1248 1249 /* 1250 * scan forward for the read ahead pages -- 1251 * in memory or on disk not in same object 1252 */ 1253 toffset = offset + NBPG; 1254 endoffset = offset + (rahead+1)*NBPG; 1255 while (toffset < object->size && toffset < endoffset) { 1256 if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 1257 rtm != 0 || rtobject != object) { 1258 break; 1259 } 1260 toffset += NBPG; 1261 } 1262 endoffset = toffset; 1263 1264 /* calculate number of bytes of pages */ 1265 size = (endoffset - startoffset) / NBPG; 1266 1267 /* calculate the page offset of the required page */ 1268 treqpage = (offset - startoffset) / NBPG; 1269 1270 /* see if we have space (again) */ 1271 if (cnt.v_free_count >= cnt.v_free_reserved + size) { 1272 bzero(marray, (rahead + rbehind + 1) * sizeof(vm_page_t)); 1273 /* 1274 * get our pages and don't block for them 1275 */ 1276 for (i = 0; i < size; i++) { 1277 if (i != treqpage) 1278 rtm = vm_page_alloc(object, startoffset + i * NBPG); 1279 else 1280 rtm = m; 1281 marray[i] = rtm; 1282 } 1283 1284 for (i = 0; i < size; i++) { 1285 if (marray[i] == 0) 1286 break; 1287 } 1288 1289 /* 1290 * if we could not get our block of pages, then 1291 * free the readahead/readbehind pages. 1292 */ 1293 if (i < size) { 1294 for (i = 0; i < size; i++) { 1295 if (i != treqpage && marray[i]) 1296 FREE_PAGE(marray[i]); 1297 } 1298 *reqpage = 0; 1299 marray[0] = m; 1300 return 1; 1301 } 1302 1303 *reqpage = treqpage; 1304 return size; 1305 } 1306 *reqpage = 0; 1307 marray[0] = m; 1308 return 1; 1309 } 1310 1311