1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42 * 43 * 44 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45 * All rights reserved. 46 * 47 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 * 69 * $Id: vm_fault.c,v 1.14 1995/01/09 16:05:39 davidg Exp $ 70 */ 71 72 /* 73 * Page fault handling module. 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/resource.h> 80 #include <sys/signalvar.h> 81 #include <sys/resourcevar.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_pageout.h> 86 #include <vm/vm_kern.h> 87 88 int vm_fault_additional_pages __P((vm_object_t, vm_offset_t, vm_page_t, int, int, vm_page_t *, int *)); 89 90 #define VM_FAULT_READ_AHEAD 4 91 #define VM_FAULT_READ_AHEAD_MIN 1 92 #define VM_FAULT_READ_BEHIND 3 93 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 94 extern int swap_pager_full; 95 extern int vm_pageout_proc_limit; 96 97 /* 98 * vm_fault: 99 * 100 * Handle a page fault occuring at the given address, 101 * requiring the given permissions, in the map specified. 102 * If successful, the page is inserted into the 103 * associated physical map. 104 * 105 * NOTE: the given address should be truncated to the 106 * proper page address. 107 * 108 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 109 * a standard error specifying why the fault is fatal is returned. 110 * 111 * 112 * The map in question must be referenced, and remains so. 113 * Caller may hold no locks. 114 */ 115 int 116 vm_fault(map, vaddr, fault_type, change_wiring) 117 vm_map_t map; 118 vm_offset_t vaddr; 119 vm_prot_t fault_type; 120 boolean_t change_wiring; 121 { 122 vm_object_t first_object; 123 vm_offset_t first_offset; 124 vm_map_entry_t entry; 125 register vm_object_t object; 126 register vm_offset_t offset; 127 vm_page_t m; 128 vm_page_t first_m; 129 vm_prot_t prot; 130 int result; 131 boolean_t wired; 132 boolean_t su; 133 boolean_t lookup_still_valid; 134 boolean_t page_exists; 135 vm_page_t old_m; 136 vm_object_t next_object; 137 vm_page_t marray[VM_FAULT_READ]; 138 int spl; 139 int hardfault = 0; 140 141 cnt.v_vm_faults++; /* needs lock XXX */ 142 /* 143 * Recovery actions 144 */ 145 #define FREE_PAGE(m) { \ 146 PAGE_WAKEUP(m); \ 147 vm_page_lock_queues(); \ 148 vm_page_free(m); \ 149 vm_page_unlock_queues(); \ 150 } 151 152 #define RELEASE_PAGE(m) { \ 153 PAGE_WAKEUP(m); \ 154 vm_page_lock_queues(); \ 155 vm_page_activate(m); \ 156 vm_page_unlock_queues(); \ 157 } 158 159 #define UNLOCK_MAP { \ 160 if (lookup_still_valid) { \ 161 vm_map_lookup_done(map, entry); \ 162 lookup_still_valid = FALSE; \ 163 } \ 164 } 165 166 #define UNLOCK_THINGS { \ 167 object->paging_in_progress--; \ 168 if (object->paging_in_progress == 0) \ 169 wakeup((caddr_t)object); \ 170 vm_object_unlock(object); \ 171 if (object != first_object) { \ 172 vm_object_lock(first_object); \ 173 FREE_PAGE(first_m); \ 174 first_object->paging_in_progress--; \ 175 if (first_object->paging_in_progress == 0) \ 176 wakeup((caddr_t)first_object); \ 177 vm_object_unlock(first_object); \ 178 } \ 179 UNLOCK_MAP; \ 180 } 181 182 #define UNLOCK_AND_DEALLOCATE { \ 183 UNLOCK_THINGS; \ 184 vm_object_deallocate(first_object); \ 185 } 186 187 188 RetryFault:; 189 190 /* 191 * Find the backing store object and offset into it to begin the 192 * search. 193 */ 194 195 if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, &first_object, 196 &first_offset, &prot, &wired, &su)) != KERN_SUCCESS) { 197 return (result); 198 } 199 lookup_still_valid = TRUE; 200 201 if (wired) 202 fault_type = prot; 203 204 first_m = NULL; 205 206 /* 207 * Make a reference to this object to prevent its disposal while we 208 * are messing with it. Once we have the reference, the map is free 209 * to be diddled. Since objects reference their shadows (and copies), 210 * they will stay around as well. 211 */ 212 213 vm_object_lock(first_object); 214 215 first_object->ref_count++; 216 first_object->paging_in_progress++; 217 218 /* 219 * INVARIANTS (through entire routine): 220 * 221 * 1) At all times, we must either have the object lock or a busy 222 * page in some object to prevent some other thread from trying to 223 * bring in the same page. 224 * 225 * Note that we cannot hold any locks during the pager access or when 226 * waiting for memory, so we use a busy page then. 227 * 228 * Note also that we aren't as concerned about more than one thead 229 * attempting to pager_data_unlock the same page at once, so we don't 230 * hold the page as busy then, but do record the highest unlock value 231 * so far. [Unlock requests may also be delivered out of order.] 232 * 233 * 2) Once we have a busy page, we must remove it from the pageout 234 * queues, so that the pageout daemon will not grab it away. 235 * 236 * 3) To prevent another thread from racing us down the shadow chain 237 * and entering a new page in the top object before we do, we must 238 * keep a busy page in the top object while following the shadow 239 * chain. 240 * 241 * 4) We must increment paging_in_progress on any object for which 242 * we have a busy page, to prevent vm_object_collapse from removing 243 * the busy page without our noticing. 244 */ 245 246 /* 247 * Search for the page at object/offset. 248 */ 249 250 object = first_object; 251 offset = first_offset; 252 253 /* 254 * See whether this page is resident 255 */ 256 257 while (TRUE) { 258 m = vm_page_lookup(object, offset); 259 if (m != NULL) { 260 /* 261 * If the page is being brought in, wait for it and 262 * then retry. 263 */ 264 if ((m->flags & PG_BUSY) || m->busy) { 265 int s; 266 267 UNLOCK_THINGS; 268 s = splhigh(); 269 if ((m->flags & PG_BUSY) || m->busy) { 270 m->flags |= PG_WANTED | PG_REFERENCED; 271 cnt.v_intrans++; 272 tsleep((caddr_t) m, PSWP, "vmpfw", 0); 273 } 274 splx(s); 275 vm_object_deallocate(first_object); 276 goto RetryFault; 277 } 278 if ((m->flags & PG_CACHE) && 279 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { 280 UNLOCK_AND_DEALLOCATE; 281 VM_WAIT; 282 goto RetryFault; 283 } 284 /* 285 * Remove the page from the pageout daemon's reach 286 * while we play with it. 287 */ 288 289 vm_page_lock_queues(); 290 vm_page_unqueue(m); 291 vm_page_unlock_queues(); 292 293 /* 294 * Mark page busy for other threads. 295 */ 296 m->flags |= PG_BUSY; 297 if (m->object != kernel_object && m->object != kmem_object && 298 m->valid && 299 ((m->valid & vm_page_bits(0, PAGE_SIZE)) 300 != vm_page_bits(0, PAGE_SIZE))) { 301 goto readrest; 302 } 303 break; 304 } 305 if (((object->pager != NULL) && (!change_wiring || wired)) 306 || (object == first_object)) { 307 308 if (swap_pager_full && !object->shadow && (!object->pager || 309 (object->pager && object->pager->pg_type == PG_SWAP && 310 !vm_pager_has_page(object->pager, offset + object->paging_offset)))) { 311 if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) { /* XXX */ 312 printf("Process %lu killed by vm_fault -- out of swap\n", (u_long) curproc->p_pid); 313 psignal(curproc, SIGKILL); 314 curproc->p_estcpu = 0; 315 curproc->p_nice = PRIO_MIN; 316 resetpriority(curproc); 317 } 318 } 319 /* 320 * Allocate a new page for this object/offset pair. 321 */ 322 323 m = vm_page_alloc(object, offset, 0); 324 325 if (m == NULL) { 326 UNLOCK_AND_DEALLOCATE; 327 VM_WAIT; 328 goto RetryFault; 329 } 330 } 331 readrest: 332 if (object->pager != NULL && (!change_wiring || wired)) { 333 int rv; 334 int faultcount; 335 int reqpage; 336 337 /* 338 * Now that we have a busy page, we can release the 339 * object lock. 340 */ 341 vm_object_unlock(object); 342 /* 343 * now we find out if any other pages should be paged 344 * in at this time this routine checks to see if the 345 * pages surrounding this fault reside in the same 346 * object as the page for this fault. If they do, 347 * then they are faulted in also into the object. The 348 * array "marray" returned contains an array of 349 * vm_page_t structs where one of them is the 350 * vm_page_t passed to the routine. The reqpage 351 * return value is the index into the marray for the 352 * vm_page_t passed to the routine. 353 */ 354 faultcount = vm_fault_additional_pages( 355 first_object, first_offset, 356 m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, 357 marray, &reqpage); 358 359 /* 360 * Call the pager to retrieve the data, if any, after 361 * releasing the lock on the map. 362 */ 363 UNLOCK_MAP; 364 365 rv = faultcount ? 366 vm_pager_get_pages(object->pager, 367 marray, faultcount, reqpage, TRUE) : VM_PAGER_FAIL; 368 if (rv == VM_PAGER_OK) { 369 /* 370 * Found the page. Leave it busy while we play 371 * with it. 372 */ 373 vm_object_lock(object); 374 375 /* 376 * Relookup in case pager changed page. Pager 377 * is responsible for disposition of old page 378 * if moved. 379 */ 380 m = vm_page_lookup(object, offset); 381 if (!m) { 382 printf("vm_fault: error fetching offset: %lx (fc: %d, rq: %d)\n", 383 offset, faultcount, reqpage); 384 } 385 m->valid = VM_PAGE_BITS_ALL; 386 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 387 hardfault++; 388 break; 389 } 390 /* 391 * Remove the bogus page (which does not exist at this 392 * object/offset); before doing so, we must get back 393 * our object lock to preserve our invariant. 394 * 395 * Also wake up any other thread that may want to bring 396 * in this page. 397 * 398 * If this is the top-level object, we must leave the 399 * busy page to prevent another thread from rushing 400 * past us, and inserting the page in that object at 401 * the same time that we are. 402 */ 403 404 if (rv == VM_PAGER_ERROR) 405 printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 406 curproc->p_pid); 407 vm_object_lock(object); 408 /* 409 * Data outside the range of the pager or an I/O error 410 */ 411 /* 412 * XXX - the check for kernel_map is a kludge to work 413 * around having the machine panic on a kernel space 414 * fault w/ I/O error. 415 */ 416 if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 417 FREE_PAGE(m); 418 UNLOCK_AND_DEALLOCATE; 419 return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 420 } 421 if (object != first_object) { 422 FREE_PAGE(m); 423 /* 424 * XXX - we cannot just fall out at this 425 * point, m has been freed and is invalid! 426 */ 427 } 428 } 429 /* 430 * We get here if the object has no pager (or unwiring) or the 431 * pager doesn't have the page. 432 */ 433 if (object == first_object) 434 first_m = m; 435 436 /* 437 * Move on to the next object. Lock the next object before 438 * unlocking the current one. 439 */ 440 441 offset += object->shadow_offset; 442 next_object = object->shadow; 443 if (next_object == NULL) { 444 /* 445 * If there's no object left, fill the page in the top 446 * object with zeros. 447 */ 448 if (object != first_object) { 449 object->paging_in_progress--; 450 if (object->paging_in_progress == 0) 451 wakeup((caddr_t) object); 452 vm_object_unlock(object); 453 454 object = first_object; 455 offset = first_offset; 456 m = first_m; 457 vm_object_lock(object); 458 } 459 first_m = NULL; 460 461 vm_page_zero_fill(m); 462 m->valid = VM_PAGE_BITS_ALL; 463 cnt.v_zfod++; 464 break; 465 } else { 466 vm_object_lock(next_object); 467 if (object != first_object) { 468 object->paging_in_progress--; 469 if (object->paging_in_progress == 0) 470 wakeup((caddr_t) object); 471 } 472 vm_object_unlock(object); 473 object = next_object; 474 object->paging_in_progress++; 475 } 476 } 477 478 if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE) != 0) || 479 (m->flags & PG_BUSY) == 0) 480 panic("vm_fault: absent or active or inactive or not busy after main loop"); 481 482 /* 483 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 484 * is held.] 485 */ 486 487 old_m = m; /* save page that would be copied */ 488 489 /* 490 * If the page is being written, but isn't already owned by the 491 * top-level object, we have to copy it into a new page owned by the 492 * top-level object. 493 */ 494 495 if (object != first_object) { 496 /* 497 * We only really need to copy if we want to write it. 498 */ 499 500 if (fault_type & VM_PROT_WRITE) { 501 502 /* 503 * If we try to collapse first_object at this point, 504 * we may deadlock when we try to get the lock on an 505 * intermediate object (since we have the bottom 506 * object locked). We can't unlock the bottom object, 507 * because the page we found may move (by collapse) if 508 * we do. 509 * 510 * Instead, we first copy the page. Then, when we have 511 * no more use for the bottom object, we unlock it and 512 * try to collapse. 513 * 514 * Note that we copy the page even if we didn't need 515 * to... that's the breaks. 516 */ 517 518 /* 519 * We already have an empty page in first_object - use 520 * it. 521 */ 522 523 vm_page_copy(m, first_m); 524 first_m->valid = VM_PAGE_BITS_ALL; 525 526 /* 527 * If another map is truly sharing this page with us, 528 * we have to flush all uses of the original page, 529 * since we can't distinguish those which want the 530 * original from those which need the new copy. 531 * 532 * XXX If we know that only one map has access to this 533 * page, then we could avoid the pmap_page_protect() 534 * call. 535 */ 536 537 vm_page_lock_queues(); 538 539 vm_page_activate(m); 540 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 541 vm_page_unlock_queues(); 542 543 /* 544 * We no longer need the old page or object. 545 */ 546 PAGE_WAKEUP(m); 547 object->paging_in_progress--; 548 if (object->paging_in_progress == 0) 549 wakeup((caddr_t) object); 550 vm_object_unlock(object); 551 552 /* 553 * Only use the new page below... 554 */ 555 556 cnt.v_cow_faults++; 557 m = first_m; 558 object = first_object; 559 offset = first_offset; 560 561 /* 562 * Now that we've gotten the copy out of the way, 563 * let's try to collapse the top object. 564 */ 565 vm_object_lock(object); 566 /* 567 * But we have to play ugly games with 568 * paging_in_progress to do that... 569 */ 570 object->paging_in_progress--; 571 if (object->paging_in_progress == 0) 572 wakeup((caddr_t) object); 573 vm_object_collapse(object); 574 object->paging_in_progress++; 575 } else { 576 prot &= ~VM_PROT_WRITE; 577 m->flags |= PG_COPYONWRITE; 578 } 579 } 580 if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) 581 panic("vm_fault: active or inactive before copy object handling"); 582 583 /* 584 * If the page is being written, but hasn't been copied to the 585 * copy-object, we have to copy it there. 586 */ 587 RetryCopy: 588 if (first_object->copy != NULL) { 589 vm_object_t copy_object = first_object->copy; 590 vm_offset_t copy_offset; 591 vm_page_t copy_m; 592 593 /* 594 * We only need to copy if we want to write it. 595 */ 596 if ((fault_type & VM_PROT_WRITE) == 0) { 597 prot &= ~VM_PROT_WRITE; 598 m->flags |= PG_COPYONWRITE; 599 } else { 600 /* 601 * Try to get the lock on the copy_object. 602 */ 603 if (!vm_object_lock_try(copy_object)) { 604 vm_object_unlock(object); 605 /* should spin a bit here... */ 606 vm_object_lock(object); 607 goto RetryCopy; 608 } 609 /* 610 * Make another reference to the copy-object, to keep 611 * it from disappearing during the copy. 612 */ 613 copy_object->ref_count++; 614 615 /* 616 * Does the page exist in the copy? 617 */ 618 copy_offset = first_offset 619 - copy_object->shadow_offset; 620 copy_m = vm_page_lookup(copy_object, copy_offset); 621 page_exists = (copy_m != NULL); 622 if (page_exists) { 623 if ((copy_m->flags & PG_BUSY) || copy_m->busy) { 624 /* 625 * If the page is being brought in, 626 * wait for it and then retry. 627 */ 628 RELEASE_PAGE(m); 629 copy_object->ref_count--; 630 vm_object_unlock(copy_object); 631 UNLOCK_THINGS; 632 spl = splhigh(); 633 if ((copy_m->flags & PG_BUSY) || copy_m->busy) { 634 copy_m->flags |= PG_WANTED | PG_REFERENCED; 635 tsleep((caddr_t) copy_m, PSWP, "vmpfwc", 0); 636 } 637 splx(spl); 638 vm_object_deallocate(first_object); 639 goto RetryFault; 640 } 641 } 642 /* 643 * If the page is not in memory (in the object) and 644 * the object has a pager, we have to check if the 645 * pager has the data in secondary storage. 646 */ 647 if (!page_exists) { 648 649 /* 650 * If we don't allocate a (blank) page here... 651 * another thread could try to page it in, 652 * allocate a page, and then block on the busy 653 * page in its shadow (first_object). Then 654 * we'd trip over the busy page after we found 655 * that the copy_object's pager doesn't have 656 * the page... 657 */ 658 copy_m = vm_page_alloc(copy_object, copy_offset, 0); 659 if (copy_m == NULL) { 660 /* 661 * Wait for a page, then retry. 662 */ 663 RELEASE_PAGE(m); 664 copy_object->ref_count--; 665 vm_object_unlock(copy_object); 666 UNLOCK_AND_DEALLOCATE; 667 VM_WAIT; 668 goto RetryFault; 669 } 670 if (copy_object->pager != NULL) { 671 vm_object_unlock(object); 672 vm_object_unlock(copy_object); 673 UNLOCK_MAP; 674 675 page_exists = vm_pager_has_page( 676 copy_object->pager, 677 (copy_offset + copy_object->paging_offset)); 678 679 vm_object_lock(copy_object); 680 681 /* 682 * Since the map is unlocked, someone 683 * else could have copied this object 684 * and put a different copy_object 685 * between the two. Or, the last 686 * reference to the copy-object (other 687 * than the one we have) may have 688 * disappeared - if that has happened, 689 * we don't need to make the copy. 690 */ 691 if (copy_object->shadow != object || 692 copy_object->ref_count == 1) { 693 /* 694 * Gaah... start over! 695 */ 696 FREE_PAGE(copy_m); 697 vm_object_unlock(copy_object); 698 vm_object_deallocate(copy_object); 699 /* may block */ 700 vm_object_lock(object); 701 goto RetryCopy; 702 } 703 vm_object_lock(object); 704 705 if (page_exists) { 706 /* 707 * We didn't need the page 708 */ 709 FREE_PAGE(copy_m); 710 } 711 } 712 } 713 if (!page_exists) { 714 /* 715 * Must copy page into copy-object. 716 */ 717 vm_page_copy(m, copy_m); 718 copy_m->valid = VM_PAGE_BITS_ALL; 719 720 /* 721 * Things to remember: 1. The copied page must 722 * be marked 'dirty' so it will be paged out 723 * to the copy object. 2. If the old page was 724 * in use by any users of the copy-object, it 725 * must be removed from all pmaps. (We can't 726 * know which pmaps use it.) 727 */ 728 vm_page_lock_queues(); 729 730 vm_page_activate(old_m); 731 732 pmap_page_protect(VM_PAGE_TO_PHYS(old_m), 733 VM_PROT_NONE); 734 copy_m->dirty = VM_PAGE_BITS_ALL; 735 vm_page_activate(copy_m); 736 vm_page_unlock_queues(); 737 738 PAGE_WAKEUP(copy_m); 739 } 740 /* 741 * The reference count on copy_object must be at least 742 * 2: one for our extra reference, and at least one 743 * from the outside world (we checked that when we 744 * last locked copy_object). 745 */ 746 copy_object->ref_count--; 747 vm_object_unlock(copy_object); 748 m->flags &= ~PG_COPYONWRITE; 749 } 750 } 751 if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) 752 panic("vm_fault: active or inactive before retrying lookup"); 753 754 /* 755 * We must verify that the maps have not changed since our last 756 * lookup. 757 */ 758 759 if (!lookup_still_valid) { 760 vm_object_t retry_object; 761 vm_offset_t retry_offset; 762 vm_prot_t retry_prot; 763 764 /* 765 * Since map entries may be pageable, make sure we can take a 766 * page fault on them. 767 */ 768 vm_object_unlock(object); 769 770 /* 771 * To avoid trying to write_lock the map while another thread 772 * has it read_locked (in vm_map_pageable), we do not try for 773 * write permission. If the page is still writable, we will 774 * get write permission. If it is not, or has been marked 775 * needs_copy, we enter the mapping without write permission, 776 * and will merely take another fault. 777 */ 778 result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 779 &entry, &retry_object, &retry_offset, &retry_prot, &wired, &su); 780 781 vm_object_lock(object); 782 783 /* 784 * If we don't need the page any longer, put it on the active 785 * list (the easiest thing to do here). If no one needs it, 786 * pageout will grab it eventually. 787 */ 788 789 if (result != KERN_SUCCESS) { 790 RELEASE_PAGE(m); 791 UNLOCK_AND_DEALLOCATE; 792 return (result); 793 } 794 lookup_still_valid = TRUE; 795 796 if ((retry_object != first_object) || 797 (retry_offset != first_offset)) { 798 RELEASE_PAGE(m); 799 UNLOCK_AND_DEALLOCATE; 800 goto RetryFault; 801 } 802 /* 803 * Check whether the protection has changed or the object has 804 * been copied while we left the map unlocked. Changing from 805 * read to write permission is OK - we leave the page 806 * write-protected, and catch the write fault. Changing from 807 * write to read permission means that we can't mark the page 808 * write-enabled after all. 809 */ 810 prot &= retry_prot; 811 if (m->flags & PG_COPYONWRITE) 812 prot &= ~VM_PROT_WRITE; 813 } 814 /* 815 * (the various bits we're fiddling with here are locked by the 816 * object's lock) 817 */ 818 819 /* XXX This distorts the meaning of the copy_on_write bit */ 820 821 if (prot & VM_PROT_WRITE) 822 m->flags &= ~PG_COPYONWRITE; 823 824 /* 825 * It's critically important that a wired-down page be faulted only 826 * once in each map for which it is wired. 827 */ 828 829 if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) 830 panic("vm_fault: active or inactive before pmap_enter"); 831 832 vm_object_unlock(object); 833 834 /* 835 * Put this page into the physical map. We had to do the unlock above 836 * because pmap_enter may cause other faults. We don't put the page 837 * back on the active queue until later so that the page-out daemon 838 * won't find us (yet). 839 */ 840 841 pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 842 843 /* 844 * If the page is not wired down, then put it where the pageout daemon 845 * can find it. 846 */ 847 vm_object_lock(object); 848 vm_page_lock_queues(); 849 if (change_wiring) { 850 if (wired) 851 vm_page_wire(m); 852 else 853 vm_page_unwire(m); 854 } else { 855 vm_page_activate(m); 856 } 857 858 if (curproc && curproc->p_stats) { 859 if (hardfault) { 860 curproc->p_stats->p_ru.ru_majflt++; 861 } else { 862 curproc->p_stats->p_ru.ru_minflt++; 863 } 864 } 865 vm_page_unlock_queues(); 866 867 /* 868 * Unlock everything, and return 869 */ 870 871 PAGE_WAKEUP(m); 872 UNLOCK_AND_DEALLOCATE; 873 874 return (KERN_SUCCESS); 875 876 } 877 878 /* 879 * vm_fault_wire: 880 * 881 * Wire down a range of virtual addresses in a map. 882 */ 883 int 884 vm_fault_wire(map, start, end) 885 vm_map_t map; 886 vm_offset_t start, end; 887 { 888 889 register vm_offset_t va; 890 register pmap_t pmap; 891 int rv; 892 893 pmap = vm_map_pmap(map); 894 895 /* 896 * Inform the physical mapping system that the range of addresses may 897 * not fault, so that page tables and such can be locked down as well. 898 */ 899 900 pmap_pageable(pmap, start, end, FALSE); 901 902 /* 903 * We simulate a fault to get the page and enter it in the physical 904 * map. 905 */ 906 907 for (va = start; va < end; va += PAGE_SIZE) { 908 rv = vm_fault(map, va, VM_PROT_NONE, TRUE); 909 if (rv) { 910 if (va != start) 911 vm_fault_unwire(map, start, va); 912 return (rv); 913 } 914 } 915 return (KERN_SUCCESS); 916 } 917 918 919 /* 920 * vm_fault_unwire: 921 * 922 * Unwire a range of virtual addresses in a map. 923 */ 924 void 925 vm_fault_unwire(map, start, end) 926 vm_map_t map; 927 vm_offset_t start, end; 928 { 929 930 register vm_offset_t va, pa; 931 register pmap_t pmap; 932 933 pmap = vm_map_pmap(map); 934 935 /* 936 * Since the pages are wired down, we must be able to get their 937 * mappings from the physical map system. 938 */ 939 940 vm_page_lock_queues(); 941 942 for (va = start; va < end; va += PAGE_SIZE) { 943 pa = pmap_extract(pmap, va); 944 if (pa == (vm_offset_t) 0) { 945 panic("unwire: page not in pmap"); 946 } 947 pmap_change_wiring(pmap, va, FALSE); 948 vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 949 } 950 vm_page_unlock_queues(); 951 952 /* 953 * Inform the physical mapping system that the range of addresses may 954 * fault, so that page tables and such may be unwired themselves. 955 */ 956 957 pmap_pageable(pmap, start, end, TRUE); 958 959 } 960 961 /* 962 * Routine: 963 * vm_fault_copy_entry 964 * Function: 965 * Copy all of the pages from a wired-down map entry to another. 966 * 967 * In/out conditions: 968 * The source and destination maps must be locked for write. 969 * The source map entry must be wired down (or be a sharing map 970 * entry corresponding to a main map entry that is wired down). 971 */ 972 973 void 974 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 975 vm_map_t dst_map; 976 vm_map_t src_map; 977 vm_map_entry_t dst_entry; 978 vm_map_entry_t src_entry; 979 { 980 vm_object_t dst_object; 981 vm_object_t src_object; 982 vm_offset_t dst_offset; 983 vm_offset_t src_offset; 984 vm_prot_t prot; 985 vm_offset_t vaddr; 986 vm_page_t dst_m; 987 vm_page_t src_m; 988 989 #ifdef lint 990 src_map++; 991 #endif /* lint */ 992 993 src_object = src_entry->object.vm_object; 994 src_offset = src_entry->offset; 995 996 /* 997 * Create the top-level object for the destination entry. (Doesn't 998 * actually shadow anything - we copy the pages directly.) 999 */ 1000 dst_object = vm_object_allocate( 1001 (vm_size_t) (dst_entry->end - dst_entry->start)); 1002 1003 dst_entry->object.vm_object = dst_object; 1004 dst_entry->offset = 0; 1005 1006 prot = dst_entry->max_protection; 1007 1008 /* 1009 * Loop through all of the pages in the entry's range, copying each 1010 * one from the source object (it should be there) to the destination 1011 * object. 1012 */ 1013 for (vaddr = dst_entry->start, dst_offset = 0; 1014 vaddr < dst_entry->end; 1015 vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 1016 1017 /* 1018 * Allocate a page in the destination object 1019 */ 1020 vm_object_lock(dst_object); 1021 do { 1022 dst_m = vm_page_alloc(dst_object, dst_offset, 0); 1023 if (dst_m == NULL) { 1024 vm_object_unlock(dst_object); 1025 VM_WAIT; 1026 vm_object_lock(dst_object); 1027 } 1028 } while (dst_m == NULL); 1029 1030 /* 1031 * Find the page in the source object, and copy it in. 1032 * (Because the source is wired down, the page will be in 1033 * memory.) 1034 */ 1035 vm_object_lock(src_object); 1036 src_m = vm_page_lookup(src_object, dst_offset + src_offset); 1037 if (src_m == NULL) 1038 panic("vm_fault_copy_wired: page missing"); 1039 1040 vm_page_copy(src_m, dst_m); 1041 1042 /* 1043 * Enter it in the pmap... 1044 */ 1045 vm_object_unlock(src_object); 1046 vm_object_unlock(dst_object); 1047 1048 pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1049 prot, FALSE); 1050 1051 /* 1052 * Mark it no longer busy, and put it on the active list. 1053 */ 1054 vm_object_lock(dst_object); 1055 vm_page_lock_queues(); 1056 vm_page_activate(dst_m); 1057 vm_page_unlock_queues(); 1058 PAGE_WAKEUP(dst_m); 1059 vm_object_unlock(dst_object); 1060 } 1061 } 1062 1063 1064 /* 1065 * looks page up in shadow chain 1066 */ 1067 1068 int 1069 vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm) 1070 vm_object_t object; 1071 vm_offset_t offset; 1072 vm_object_t *rtobject; 1073 vm_offset_t *rtoffset; 1074 vm_page_t *rtm; 1075 { 1076 vm_page_t m; 1077 1078 *rtm = 0; 1079 *rtobject = 0; 1080 *rtoffset = 0; 1081 1082 while (!(m = vm_page_lookup(object, offset))) { 1083 if (object->pager) { 1084 if (vm_pager_has_page(object->pager, object->paging_offset + offset)) { 1085 *rtobject = object; 1086 *rtoffset = offset; 1087 return 1; 1088 } 1089 } 1090 if (!object->shadow) 1091 return 0; 1092 else { 1093 offset += object->shadow_offset; 1094 object = object->shadow; 1095 } 1096 } 1097 *rtobject = object; 1098 *rtoffset = offset; 1099 *rtm = m; 1100 return 1; 1101 } 1102 1103 /* 1104 * This routine checks around the requested page for other pages that 1105 * might be able to be faulted in. 1106 * 1107 * Inputs: 1108 * first_object, first_offset, m, rbehind, rahead 1109 * 1110 * Outputs: 1111 * marray (array of vm_page_t), reqpage (index of requested page) 1112 * 1113 * Return value: 1114 * number of pages in marray 1115 */ 1116 int 1117 vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marray, reqpage) 1118 vm_object_t first_object; 1119 vm_offset_t first_offset; 1120 vm_page_t m; 1121 int rbehind; 1122 int raheada; 1123 vm_page_t *marray; 1124 int *reqpage; 1125 { 1126 int i; 1127 vm_object_t object; 1128 vm_offset_t offset, startoffset, endoffset, toffset, size; 1129 vm_object_t rtobject; 1130 vm_page_t rtm; 1131 vm_offset_t rtoffset; 1132 vm_offset_t offsetdiff; 1133 int rahead; 1134 int treqpage; 1135 1136 object = m->object; 1137 offset = m->offset; 1138 1139 offsetdiff = offset - first_offset; 1140 1141 /* 1142 * if the requested page is not available, then give up now 1143 */ 1144 1145 if (!vm_pager_has_page(object->pager, object->paging_offset + offset)) 1146 return 0; 1147 1148 /* 1149 * try to do any readahead that we might have free pages for. 1150 */ 1151 rahead = raheada; 1152 if ((rahead + rbehind) > ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { 1153 rahead = ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved) / 2; 1154 rbehind = rahead; 1155 if (!rahead) 1156 wakeup((caddr_t) &vm_pages_needed); 1157 } 1158 /* 1159 * if we don't have any free pages, then just read one page. 1160 */ 1161 if (rahead <= 0) { 1162 *reqpage = 0; 1163 marray[0] = m; 1164 return 1; 1165 } 1166 /* 1167 * scan backward for the read behind pages -- in memory or on disk not 1168 * in same object 1169 */ 1170 toffset = offset - NBPG; 1171 if (toffset < offset) { 1172 if (rbehind * NBPG > offset) 1173 rbehind = offset / NBPG; 1174 startoffset = offset - rbehind * NBPG; 1175 while (toffset >= startoffset) { 1176 if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 1177 rtm != 0 || rtobject != object) { 1178 startoffset = toffset + NBPG; 1179 break; 1180 } 1181 if (toffset == 0) 1182 break; 1183 toffset -= NBPG; 1184 } 1185 } else { 1186 startoffset = offset; 1187 } 1188 1189 /* 1190 * scan forward for the read ahead pages -- in memory or on disk not 1191 * in same object 1192 */ 1193 toffset = offset + NBPG; 1194 endoffset = offset + (rahead + 1) * NBPG; 1195 while (toffset < object->size && toffset < endoffset) { 1196 if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 1197 rtm != 0 || rtobject != object) { 1198 break; 1199 } 1200 toffset += NBPG; 1201 } 1202 endoffset = toffset; 1203 1204 /* calculate number of bytes of pages */ 1205 size = (endoffset - startoffset) / NBPG; 1206 1207 /* calculate the page offset of the required page */ 1208 treqpage = (offset - startoffset) / NBPG; 1209 1210 /* see if we have space (again) */ 1211 if ((cnt.v_free_count + cnt.v_cache_count) > (cnt.v_free_reserved + size)) { 1212 bzero(marray, (rahead + rbehind + 1) * sizeof(vm_page_t)); 1213 /* 1214 * get our pages and don't block for them 1215 */ 1216 for (i = 0; i < size; i++) { 1217 if (i != treqpage) 1218 rtm = vm_page_alloc(object, startoffset + i * NBPG, 0); 1219 else 1220 rtm = m; 1221 marray[i] = rtm; 1222 } 1223 1224 for (i = 0; i < size; i++) { 1225 if (marray[i] == 0) 1226 break; 1227 } 1228 1229 /* 1230 * if we could not get our block of pages, then free the 1231 * readahead/readbehind pages. 1232 */ 1233 if (i < size) { 1234 for (i = 0; i < size; i++) { 1235 if (i != treqpage && marray[i]) 1236 FREE_PAGE(marray[i]); 1237 } 1238 *reqpage = 0; 1239 marray[0] = m; 1240 return 1; 1241 } 1242 *reqpage = treqpage; 1243 return size; 1244 } 1245 *reqpage = 0; 1246 marray[0] = m; 1247 return 1; 1248 } 1249