1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42 * 43 * 44 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45 * All rights reserved. 46 * 47 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 * 69 * $Id: vm_fault.c,v 1.60 1996/12/29 02:33:12 dyson Exp $ 70 */ 71 72 /* 73 * Page fault handling module. 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/vnode.h> 80 #include <sys/resource.h> 81 #include <sys/signalvar.h> 82 #include <sys/resourcevar.h> 83 #include <sys/vmmeter.h> 84 #include <sys/buf.h> 85 86 #include <vm/vm.h> 87 #include <vm/vm_param.h> 88 #include <vm/vm_prot.h> 89 #include <vm/lock.h> 90 #include <vm/pmap.h> 91 #include <vm/vm_map.h> 92 #include <vm/vm_object.h> 93 #include <vm/vm_page.h> 94 #include <vm/vm_pageout.h> 95 #include <vm/vm_kern.h> 96 #include <vm/vm_pager.h> 97 #include <vm/vnode_pager.h> 98 #include <vm/swap_pager.h> 99 #include <vm/vm_extern.h> 100 101 int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *)); 102 103 #define VM_FAULT_READ_AHEAD 4 104 #define VM_FAULT_READ_BEHIND 3 105 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 106 107 /* 108 * vm_fault: 109 * 110 * Handle a page fault occuring at the given address, 111 * requiring the given permissions, in the map specified. 112 * If successful, the page is inserted into the 113 * associated physical map. 114 * 115 * NOTE: the given address should be truncated to the 116 * proper page address. 117 * 118 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 119 * a standard error specifying why the fault is fatal is returned. 120 * 121 * 122 * The map in question must be referenced, and remains so. 123 * Caller may hold no locks. 124 */ 125 int 126 vm_fault(map, vaddr, fault_type, change_wiring) 127 vm_map_t map; 128 vm_offset_t vaddr; 129 vm_prot_t fault_type; 130 boolean_t change_wiring; 131 { 132 vm_object_t first_object; 133 vm_pindex_t first_pindex; 134 vm_map_entry_t entry; 135 register vm_object_t object; 136 register vm_pindex_t pindex; 137 vm_page_t m; 138 vm_page_t first_m; 139 vm_prot_t prot; 140 int result; 141 boolean_t wired; 142 boolean_t su; 143 boolean_t lookup_still_valid; 144 vm_page_t old_m; 145 vm_object_t next_object; 146 vm_page_t marray[VM_FAULT_READ]; 147 int hardfault = 0; 148 struct vnode *vp = NULL; 149 150 cnt.v_vm_faults++; /* needs lock XXX */ 151 /* 152 * Recovery actions 153 */ 154 #define FREE_PAGE(m) { \ 155 PAGE_WAKEUP(m); \ 156 vm_page_free(m); \ 157 } 158 159 #define RELEASE_PAGE(m) { \ 160 PAGE_WAKEUP(m); \ 161 if (m->queue != PQ_ACTIVE) vm_page_activate(m); \ 162 } 163 164 #define UNLOCK_MAP { \ 165 if (lookup_still_valid) { \ 166 vm_map_lookup_done(map, entry); \ 167 lookup_still_valid = FALSE; \ 168 } \ 169 } 170 171 #define UNLOCK_THINGS { \ 172 vm_object_pip_wakeup(object); \ 173 if (object != first_object) { \ 174 FREE_PAGE(first_m); \ 175 vm_object_pip_wakeup(first_object); \ 176 } \ 177 UNLOCK_MAP; \ 178 if (vp != NULL) VOP_UNLOCK(vp); \ 179 } 180 181 #define UNLOCK_AND_DEALLOCATE { \ 182 UNLOCK_THINGS; \ 183 vm_object_deallocate(first_object); \ 184 } 185 186 187 RetryFault:; 188 189 /* 190 * Find the backing store object and offset into it to begin the 191 * search. 192 */ 193 194 if ((result = vm_map_lookup(&map, vaddr, 195 fault_type, &entry, &first_object, 196 &first_pindex, &prot, &wired, &su)) != KERN_SUCCESS) { 197 return (result); 198 } 199 200 if (entry->nofault) { 201 panic("vm_fault: fault on nofault entry, addr: %lx", 202 vaddr); 203 } 204 205 /* 206 * If we are user-wiring a r/w segment, and it is COW, then 207 * we need to do the COW operation. Note that we don't COW 208 * currently RO sections now, because it is NOT desirable 209 * to COW .text. We simply keep .text from ever being COW'ed 210 * and take the heat that one cannot debug wired .text sections. 211 */ 212 if ((change_wiring == VM_FAULT_USER_WIRE) && entry->needs_copy) { 213 if(entry->protection & VM_PROT_WRITE) { 214 int tresult; 215 vm_map_lookup_done(map, entry); 216 217 tresult = vm_map_lookup(&map, vaddr, VM_PROT_READ|VM_PROT_WRITE, 218 &entry, &first_object, &first_pindex, &prot, &wired, &su); 219 if (tresult != KERN_SUCCESS) 220 return tresult; 221 } else { 222 /* 223 * If we don't COW now, on a user wire, the user will never 224 * be able to write to the mapping. If we don't make this 225 * restriction, the bookkeeping would be nearly impossible. 226 */ 227 entry->max_protection &= ~VM_PROT_WRITE; 228 } 229 } 230 231 vp = vnode_pager_lock(first_object); 232 233 lookup_still_valid = TRUE; 234 235 if (wired) 236 fault_type = prot; 237 238 first_m = NULL; 239 240 /* 241 * Make a reference to this object to prevent its disposal while we 242 * are messing with it. Once we have the reference, the map is free 243 * to be diddled. Since objects reference their shadows (and copies), 244 * they will stay around as well. 245 */ 246 247 first_object->ref_count++; 248 first_object->paging_in_progress++; 249 250 /* 251 * INVARIANTS (through entire routine): 252 * 253 * 1) At all times, we must either have the object lock or a busy 254 * page in some object to prevent some other process from trying to 255 * bring in the same page. 256 * 257 * Note that we cannot hold any locks during the pager access or when 258 * waiting for memory, so we use a busy page then. 259 * 260 * Note also that we aren't as concerned about more than one thead 261 * attempting to pager_data_unlock the same page at once, so we don't 262 * hold the page as busy then, but do record the highest unlock value 263 * so far. [Unlock requests may also be delivered out of order.] 264 * 265 * 2) Once we have a busy page, we must remove it from the pageout 266 * queues, so that the pageout daemon will not grab it away. 267 * 268 * 3) To prevent another process from racing us down the shadow chain 269 * and entering a new page in the top object before we do, we must 270 * keep a busy page in the top object while following the shadow 271 * chain. 272 * 273 * 4) We must increment paging_in_progress on any object for which 274 * we have a busy page, to prevent vm_object_collapse from removing 275 * the busy page without our noticing. 276 */ 277 278 /* 279 * Search for the page at object/offset. 280 */ 281 282 object = first_object; 283 pindex = first_pindex; 284 285 /* 286 * See whether this page is resident 287 */ 288 289 while (TRUE) { 290 m = vm_page_lookup(object, pindex); 291 if (m != NULL) { 292 int queue; 293 /* 294 * If the page is being brought in, wait for it and 295 * then retry. 296 */ 297 if ((m->flags & PG_BUSY) || m->busy) { 298 int s; 299 300 UNLOCK_THINGS; 301 s = splvm(); 302 if (((m->flags & PG_BUSY) || m->busy)) { 303 m->flags |= PG_WANTED | PG_REFERENCED; 304 cnt.v_intrans++; 305 tsleep(m, PSWP, "vmpfw", 0); 306 } 307 splx(s); 308 vm_object_deallocate(first_object); 309 goto RetryFault; 310 } 311 312 queue = m->queue; 313 vm_page_unqueue_nowakeup(m); 314 315 /* 316 * Mark page busy for other processes, and the pagedaemon. 317 */ 318 if (((queue - m->pc) == PQ_CACHE) && 319 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { 320 vm_page_activate(m); 321 UNLOCK_AND_DEALLOCATE; 322 VM_WAIT; 323 goto RetryFault; 324 } 325 326 m->flags |= PG_BUSY; 327 328 if (m->valid && 329 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && 330 m->object != kernel_object && m->object != kmem_object) { 331 goto readrest; 332 } 333 break; 334 } 335 if (((object->type != OBJT_DEFAULT) && (!change_wiring || wired)) 336 || (object == first_object)) { 337 338 if (pindex >= object->size) { 339 UNLOCK_AND_DEALLOCATE; 340 return (KERN_PROTECTION_FAILURE); 341 } 342 343 /* 344 * Allocate a new page for this object/offset pair. 345 */ 346 m = vm_page_alloc(object, pindex, 347 (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); 348 349 if (m == NULL) { 350 UNLOCK_AND_DEALLOCATE; 351 VM_WAIT; 352 goto RetryFault; 353 } 354 } 355 readrest: 356 if (object->type != OBJT_DEFAULT && (!change_wiring || wired)) { 357 int rv; 358 int faultcount; 359 int reqpage; 360 int ahead, behind; 361 362 ahead = VM_FAULT_READ_AHEAD; 363 behind = VM_FAULT_READ_BEHIND; 364 if (first_object->behavior == OBJ_RANDOM) { 365 ahead = 0; 366 behind = 0; 367 } 368 369 if ((first_object->type != OBJT_DEVICE) && 370 (first_object->behavior == OBJ_SEQUENTIAL)) { 371 vm_pindex_t firstpindex, tmppindex; 372 if (first_pindex < 373 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1)) 374 firstpindex = 0; 375 else 376 firstpindex = first_pindex - 377 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1); 378 379 for(tmppindex = first_pindex - 1; 380 tmppindex >= first_pindex; 381 --tmppindex) { 382 vm_page_t mt; 383 mt = vm_page_lookup( first_object, tmppindex); 384 if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) 385 break; 386 if (mt->busy || 387 (mt->flags & (PG_BUSY|PG_FICTITIOUS)) || 388 mt->hold_count || 389 mt->wire_count) 390 continue; 391 if (mt->dirty == 0) 392 vm_page_test_dirty(mt); 393 if (mt->dirty) { 394 vm_page_protect(mt, VM_PROT_NONE); 395 vm_page_deactivate(mt); 396 } else { 397 vm_page_cache(mt); 398 } 399 } 400 401 ahead += behind; 402 behind = 0; 403 } 404 405 /* 406 * now we find out if any other pages should be paged 407 * in at this time this routine checks to see if the 408 * pages surrounding this fault reside in the same 409 * object as the page for this fault. If they do, 410 * then they are faulted in also into the object. The 411 * array "marray" returned contains an array of 412 * vm_page_t structs where one of them is the 413 * vm_page_t passed to the routine. The reqpage 414 * return value is the index into the marray for the 415 * vm_page_t passed to the routine. 416 */ 417 faultcount = vm_fault_additional_pages( 418 m, behind, ahead, marray, &reqpage); 419 420 /* 421 * Call the pager to retrieve the data, if any, after 422 * releasing the lock on the map. 423 */ 424 UNLOCK_MAP; 425 426 rv = faultcount ? 427 vm_pager_get_pages(object, marray, faultcount, 428 reqpage) : VM_PAGER_FAIL; 429 430 if (rv == VM_PAGER_OK) { 431 /* 432 * Found the page. Leave it busy while we play 433 * with it. 434 */ 435 436 /* 437 * Relookup in case pager changed page. Pager 438 * is responsible for disposition of old page 439 * if moved. 440 */ 441 m = vm_page_lookup(object, pindex); 442 if( !m) { 443 UNLOCK_AND_DEALLOCATE; 444 goto RetryFault; 445 } 446 447 hardfault++; 448 break; 449 } 450 /* 451 * Remove the bogus page (which does not exist at this 452 * object/offset); before doing so, we must get back 453 * our object lock to preserve our invariant. 454 * 455 * Also wake up any other process that may want to bring 456 * in this page. 457 * 458 * If this is the top-level object, we must leave the 459 * busy page to prevent another process from rushing 460 * past us, and inserting the page in that object at 461 * the same time that we are. 462 */ 463 464 if (rv == VM_PAGER_ERROR) 465 printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 466 curproc->p_pid); 467 /* 468 * Data outside the range of the pager or an I/O error 469 */ 470 /* 471 * XXX - the check for kernel_map is a kludge to work 472 * around having the machine panic on a kernel space 473 * fault w/ I/O error. 474 */ 475 if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 476 FREE_PAGE(m); 477 UNLOCK_AND_DEALLOCATE; 478 return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 479 } 480 if (object != first_object) { 481 FREE_PAGE(m); 482 /* 483 * XXX - we cannot just fall out at this 484 * point, m has been freed and is invalid! 485 */ 486 } 487 } 488 /* 489 * We get here if the object has default pager (or unwiring) or the 490 * pager doesn't have the page. 491 */ 492 if (object == first_object) 493 first_m = m; 494 495 /* 496 * Move on to the next object. Lock the next object before 497 * unlocking the current one. 498 */ 499 500 pindex += OFF_TO_IDX(object->backing_object_offset); 501 next_object = object->backing_object; 502 if (next_object == NULL) { 503 /* 504 * If there's no object left, fill the page in the top 505 * object with zeros. 506 */ 507 if (object != first_object) { 508 vm_object_pip_wakeup(object); 509 510 object = first_object; 511 pindex = first_pindex; 512 m = first_m; 513 } 514 first_m = NULL; 515 516 if ((m->flags & PG_ZERO) == 0) 517 vm_page_zero_fill(m); 518 cnt.v_zfod++; 519 break; 520 } else { 521 if (object != first_object) { 522 vm_object_pip_wakeup(object); 523 } 524 object = next_object; 525 object->paging_in_progress++; 526 } 527 } 528 529 if ((m->flags & PG_BUSY) == 0) 530 panic("vm_fault: not busy after main loop"); 531 532 /* 533 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 534 * is held.] 535 */ 536 537 old_m = m; /* save page that would be copied */ 538 539 /* 540 * If the page is being written, but isn't already owned by the 541 * top-level object, we have to copy it into a new page owned by the 542 * top-level object. 543 */ 544 545 if (object != first_object) { 546 /* 547 * We only really need to copy if we want to write it. 548 */ 549 550 if (fault_type & VM_PROT_WRITE) { 551 552 /* 553 * This allows pages to be virtually copied from a backing_object 554 * into the first_object, where the backing object has no other 555 * refs to it, and cannot gain any more refs. Instead of a 556 * bcopy, we just move the page from the backing object to the 557 * first object. Note that we must mark the page dirty in the 558 * first object so that it will go out to swap when needed. 559 */ 560 if (lookup_still_valid && 561 /* 562 * Only one shadow object 563 */ 564 (object->shadow_count == 1) && 565 /* 566 * No COW refs, except us 567 */ 568 (object->ref_count == 1) && 569 /* 570 * Noone else can look this object up 571 */ 572 (object->handle == NULL) && 573 /* 574 * No other ways to look the object up 575 */ 576 ((object->type == OBJT_DEFAULT) || 577 (object->type == OBJT_SWAP)) && 578 /* 579 * We don't chase down the shadow chain 580 */ 581 (object == first_object->backing_object)) { 582 583 /* 584 * get rid of the unnecessary page 585 */ 586 vm_page_protect(first_m, VM_PROT_NONE); 587 PAGE_WAKEUP(first_m); 588 vm_page_free(first_m); 589 /* 590 * grab the page and put it into the process'es object 591 */ 592 vm_page_rename(m, first_object, first_pindex); 593 first_m = m; 594 m->dirty = VM_PAGE_BITS_ALL; 595 m = NULL; 596 } else { 597 /* 598 * Oh, well, lets copy it. 599 */ 600 vm_page_copy(m, first_m); 601 } 602 603 /* 604 * This code handles the case where there are two references to the 605 * backing object, and one reference is getting a copy of the 606 * page. If the other reference is the only other object that 607 * points to the backing object, then perform a virtual copy 608 * from the backing object to the other object after the 609 * page is copied to the current first_object. If the other 610 * object already has the page, we destroy it in the backing object 611 * performing an optimized collapse-type operation. We don't 612 * bother removing the page from the backing object's swap space. 613 */ 614 if (lookup_still_valid && 615 /* 616 * make sure that we have two shadow objs 617 */ 618 (object->shadow_count == 2) && 619 /* 620 * And no COW refs -- note that there are sometimes 621 * temp refs to objs, but ignore that case -- we just 622 * punt. 623 */ 624 (object->ref_count == 2) && 625 /* 626 * Noone else can look us up 627 */ 628 (object->handle == NULL) && 629 /* 630 * Not something that can be referenced elsewhere 631 */ 632 ((object->type == OBJT_DEFAULT) || 633 (object->type == OBJT_SWAP)) && 634 /* 635 * We don't bother chasing down object chain 636 */ 637 (object == first_object->backing_object)) { 638 639 vm_object_t other_object; 640 vm_pindex_t other_pindex, other_pindex_offset; 641 vm_page_t tm; 642 643 other_object = TAILQ_FIRST(&object->shadow_head); 644 if (other_object == first_object) 645 other_object = TAILQ_NEXT(other_object, shadow_list); 646 if (!other_object) 647 panic("vm_fault: other object missing"); 648 if (other_object && 649 (other_object->type == OBJT_DEFAULT) && 650 (other_object->paging_in_progress == 0)) { 651 other_pindex_offset = 652 OFF_TO_IDX(other_object->backing_object_offset); 653 if (pindex >= other_pindex_offset) { 654 other_pindex = pindex - other_pindex_offset; 655 /* 656 * If the other object has the page, just free it. 657 */ 658 if ((tm = vm_page_lookup(other_object, other_pindex))) { 659 if ((tm->flags & PG_BUSY) == 0 && 660 tm->busy == 0 && 661 tm->valid == VM_PAGE_BITS_ALL) { 662 /* 663 * get rid of the unnecessary page 664 */ 665 vm_page_protect(m, VM_PROT_NONE); 666 PAGE_WAKEUP(m); 667 vm_page_free(m); 668 m = NULL; 669 tm->dirty = VM_PAGE_BITS_ALL; 670 first_m->dirty = VM_PAGE_BITS_ALL; 671 } 672 } else { 673 /* 674 * If the other object doesn't have the page, 675 * then we move it there. 676 */ 677 vm_page_rename(m, other_object, other_pindex); 678 m->dirty = VM_PAGE_BITS_ALL; 679 m->valid = VM_PAGE_BITS_ALL; 680 } 681 } 682 } 683 } 684 685 if (m) { 686 if (m->queue != PQ_ACTIVE) 687 vm_page_activate(m); 688 /* 689 * We no longer need the old page or object. 690 */ 691 PAGE_WAKEUP(m); 692 } 693 694 vm_object_pip_wakeup(object); 695 /* 696 * Only use the new page below... 697 */ 698 699 cnt.v_cow_faults++; 700 m = first_m; 701 object = first_object; 702 pindex = first_pindex; 703 704 #if defined(OLD_COLLAPSE_CODE) 705 /* 706 * Now that we've gotten the copy out of the way, 707 * let's try to collapse the top object. 708 * 709 * But we have to play ugly games with 710 * paging_in_progress to do that... 711 */ 712 vm_object_pip_wakeup(object); 713 vm_object_collapse(object); 714 object->paging_in_progress++; 715 #endif 716 } else { 717 prot &= ~VM_PROT_WRITE; 718 } 719 } 720 721 /* 722 * We must verify that the maps have not changed since our last 723 * lookup. 724 */ 725 726 if (!lookup_still_valid) { 727 vm_object_t retry_object; 728 vm_pindex_t retry_pindex; 729 vm_prot_t retry_prot; 730 731 /* 732 * Since map entries may be pageable, make sure we can take a 733 * page fault on them. 734 */ 735 736 /* 737 * To avoid trying to write_lock the map while another process 738 * has it read_locked (in vm_map_pageable), we do not try for 739 * write permission. If the page is still writable, we will 740 * get write permission. If it is not, or has been marked 741 * needs_copy, we enter the mapping without write permission, 742 * and will merely take another fault. 743 */ 744 result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 745 &entry, &retry_object, &retry_pindex, &retry_prot, &wired, &su); 746 747 /* 748 * If we don't need the page any longer, put it on the active 749 * list (the easiest thing to do here). If no one needs it, 750 * pageout will grab it eventually. 751 */ 752 753 if (result != KERN_SUCCESS) { 754 RELEASE_PAGE(m); 755 UNLOCK_AND_DEALLOCATE; 756 return (result); 757 } 758 lookup_still_valid = TRUE; 759 760 if ((retry_object != first_object) || 761 (retry_pindex != first_pindex)) { 762 RELEASE_PAGE(m); 763 UNLOCK_AND_DEALLOCATE; 764 goto RetryFault; 765 } 766 /* 767 * Check whether the protection has changed or the object has 768 * been copied while we left the map unlocked. Changing from 769 * read to write permission is OK - we leave the page 770 * write-protected, and catch the write fault. Changing from 771 * write to read permission means that we can't mark the page 772 * write-enabled after all. 773 */ 774 prot &= retry_prot; 775 } 776 777 /* 778 * Put this page into the physical map. We had to do the unlock above 779 * because pmap_enter may cause other faults. We don't put the page 780 * back on the active queue until later so that the page-out daemon 781 * won't find us (yet). 782 */ 783 784 if (prot & VM_PROT_WRITE) { 785 m->flags |= PG_WRITEABLE; 786 m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY; 787 /* 788 * If the fault is a write, we know that this page is being 789 * written NOW. This will save on the pmap_is_modified() calls 790 * later. 791 */ 792 if (fault_type & VM_PROT_WRITE) { 793 m->dirty = VM_PAGE_BITS_ALL; 794 } 795 } 796 797 UNLOCK_THINGS; 798 m->valid = VM_PAGE_BITS_ALL; 799 m->flags &= ~PG_ZERO; 800 801 pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 802 if ((change_wiring == 0) && (wired == 0)) 803 pmap_prefault(map->pmap, vaddr, entry, first_object); 804 805 m->flags |= PG_MAPPED|PG_REFERENCED; 806 807 /* 808 * If the page is not wired down, then put it where the pageout daemon 809 * can find it. 810 */ 811 if (change_wiring) { 812 if (wired) 813 vm_page_wire(m); 814 else 815 vm_page_unwire(m); 816 } else { 817 if (m->queue != PQ_ACTIVE) 818 vm_page_activate(m); 819 } 820 821 if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) { 822 if (hardfault) { 823 curproc->p_stats->p_ru.ru_majflt++; 824 } else { 825 curproc->p_stats->p_ru.ru_minflt++; 826 } 827 } 828 829 /* 830 * Unlock everything, and return 831 */ 832 833 PAGE_WAKEUP(m); 834 vm_object_deallocate(first_object); 835 836 return (KERN_SUCCESS); 837 838 } 839 840 /* 841 * vm_fault_wire: 842 * 843 * Wire down a range of virtual addresses in a map. 844 */ 845 int 846 vm_fault_wire(map, start, end) 847 vm_map_t map; 848 vm_offset_t start, end; 849 { 850 851 register vm_offset_t va; 852 register pmap_t pmap; 853 int rv; 854 855 pmap = vm_map_pmap(map); 856 857 /* 858 * Inform the physical mapping system that the range of addresses may 859 * not fault, so that page tables and such can be locked down as well. 860 */ 861 862 pmap_pageable(pmap, start, end, FALSE); 863 864 /* 865 * We simulate a fault to get the page and enter it in the physical 866 * map. 867 */ 868 869 for (va = start; va < end; va += PAGE_SIZE) { 870 rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, 871 VM_FAULT_CHANGE_WIRING); 872 if (rv) { 873 if (va != start) 874 vm_fault_unwire(map, start, va); 875 return (rv); 876 } 877 } 878 return (KERN_SUCCESS); 879 } 880 881 /* 882 * vm_fault_user_wire: 883 * 884 * Wire down a range of virtual addresses in a map. This 885 * is for user mode though, so we only ask for read access 886 * on currently read only sections. 887 */ 888 int 889 vm_fault_user_wire(map, start, end) 890 vm_map_t map; 891 vm_offset_t start, end; 892 { 893 894 register vm_offset_t va; 895 register pmap_t pmap; 896 int rv; 897 898 pmap = vm_map_pmap(map); 899 900 /* 901 * Inform the physical mapping system that the range of addresses may 902 * not fault, so that page tables and such can be locked down as well. 903 */ 904 pmap_pageable(pmap, start, end, FALSE); 905 906 /* 907 * We simulate a fault to get the page and enter it in the physical 908 * map. 909 */ 910 for (va = start; va < end; va += PAGE_SIZE) { 911 rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_USER_WIRE); 912 if (rv) { 913 if (va != start) 914 vm_fault_unwire(map, start, va); 915 return (rv); 916 } 917 } 918 return (KERN_SUCCESS); 919 } 920 921 922 /* 923 * vm_fault_unwire: 924 * 925 * Unwire a range of virtual addresses in a map. 926 */ 927 void 928 vm_fault_unwire(map, start, end) 929 vm_map_t map; 930 vm_offset_t start, end; 931 { 932 933 register vm_offset_t va, pa; 934 register pmap_t pmap; 935 936 pmap = vm_map_pmap(map); 937 938 /* 939 * Since the pages are wired down, we must be able to get their 940 * mappings from the physical map system. 941 */ 942 943 for (va = start; va < end; va += PAGE_SIZE) { 944 pa = pmap_extract(pmap, va); 945 if (pa != (vm_offset_t) 0) { 946 pmap_change_wiring(pmap, va, FALSE); 947 vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 948 } 949 } 950 951 /* 952 * Inform the physical mapping system that the range of addresses may 953 * fault, so that page tables and such may be unwired themselves. 954 */ 955 956 pmap_pageable(pmap, start, end, TRUE); 957 958 } 959 960 /* 961 * Routine: 962 * vm_fault_copy_entry 963 * Function: 964 * Copy all of the pages from a wired-down map entry to another. 965 * 966 * In/out conditions: 967 * The source and destination maps must be locked for write. 968 * The source map entry must be wired down (or be a sharing map 969 * entry corresponding to a main map entry that is wired down). 970 */ 971 972 void 973 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 974 vm_map_t dst_map; 975 vm_map_t src_map; 976 vm_map_entry_t dst_entry; 977 vm_map_entry_t src_entry; 978 { 979 vm_object_t dst_object; 980 vm_object_t src_object; 981 vm_ooffset_t dst_offset; 982 vm_ooffset_t src_offset; 983 vm_prot_t prot; 984 vm_offset_t vaddr; 985 vm_page_t dst_m; 986 vm_page_t src_m; 987 988 #ifdef lint 989 src_map++; 990 #endif /* lint */ 991 992 src_object = src_entry->object.vm_object; 993 src_offset = src_entry->offset; 994 995 /* 996 * Create the top-level object for the destination entry. (Doesn't 997 * actually shadow anything - we copy the pages directly.) 998 */ 999 dst_object = vm_object_allocate(OBJT_DEFAULT, 1000 (vm_size_t) OFF_TO_IDX(dst_entry->end - dst_entry->start)); 1001 1002 dst_entry->object.vm_object = dst_object; 1003 dst_entry->offset = 0; 1004 1005 prot = dst_entry->max_protection; 1006 1007 /* 1008 * Loop through all of the pages in the entry's range, copying each 1009 * one from the source object (it should be there) to the destination 1010 * object. 1011 */ 1012 for (vaddr = dst_entry->start, dst_offset = 0; 1013 vaddr < dst_entry->end; 1014 vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 1015 1016 /* 1017 * Allocate a page in the destination object 1018 */ 1019 do { 1020 dst_m = vm_page_alloc(dst_object, 1021 OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL); 1022 if (dst_m == NULL) { 1023 VM_WAIT; 1024 } 1025 } while (dst_m == NULL); 1026 1027 /* 1028 * Find the page in the source object, and copy it in. 1029 * (Because the source is wired down, the page will be in 1030 * memory.) 1031 */ 1032 src_m = vm_page_lookup(src_object, 1033 OFF_TO_IDX(dst_offset + src_offset)); 1034 if (src_m == NULL) 1035 panic("vm_fault_copy_wired: page missing"); 1036 1037 vm_page_copy(src_m, dst_m); 1038 1039 /* 1040 * Enter it in the pmap... 1041 */ 1042 1043 dst_m->flags &= ~PG_ZERO; 1044 pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1045 prot, FALSE); 1046 dst_m->flags |= PG_WRITEABLE|PG_MAPPED; 1047 1048 /* 1049 * Mark it no longer busy, and put it on the active list. 1050 */ 1051 vm_page_activate(dst_m); 1052 PAGE_WAKEUP(dst_m); 1053 } 1054 } 1055 1056 1057 /* 1058 * This routine checks around the requested page for other pages that 1059 * might be able to be faulted in. This routine brackets the viable 1060 * pages for the pages to be paged in. 1061 * 1062 * Inputs: 1063 * m, rbehind, rahead 1064 * 1065 * Outputs: 1066 * marray (array of vm_page_t), reqpage (index of requested page) 1067 * 1068 * Return value: 1069 * number of pages in marray 1070 */ 1071 int 1072 vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) 1073 vm_page_t m; 1074 int rbehind; 1075 int rahead; 1076 vm_page_t *marray; 1077 int *reqpage; 1078 { 1079 int i; 1080 vm_object_t object; 1081 vm_pindex_t pindex, startpindex, endpindex, tpindex; 1082 vm_offset_t size; 1083 vm_page_t rtm; 1084 int treqpage; 1085 int cbehind, cahead; 1086 1087 object = m->object; 1088 pindex = m->pindex; 1089 1090 /* 1091 * we don't fault-ahead for device pager 1092 */ 1093 if (object->type == OBJT_DEVICE) { 1094 *reqpage = 0; 1095 marray[0] = m; 1096 return 1; 1097 } 1098 1099 /* 1100 * if the requested page is not available, then give up now 1101 */ 1102 1103 if (!vm_pager_has_page(object, 1104 OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead)) 1105 return 0; 1106 1107 if ((cbehind == 0) && (cahead == 0)) { 1108 *reqpage = 0; 1109 marray[0] = m; 1110 return 1; 1111 } 1112 1113 if (rahead > cahead) { 1114 rahead = cahead; 1115 } 1116 1117 if (rbehind > cbehind) { 1118 rbehind = cbehind; 1119 } 1120 1121 /* 1122 * try to do any readahead that we might have free pages for. 1123 */ 1124 if ((rahead + rbehind) > 1125 ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { 1126 pagedaemon_wakeup(); 1127 *reqpage = 0; 1128 marray[0] = m; 1129 return 1; 1130 } 1131 1132 /* 1133 * scan backward for the read behind pages -- in memory or on disk not 1134 * in same object 1135 */ 1136 tpindex = pindex - 1; 1137 if (tpindex < pindex) { 1138 if (rbehind > pindex) 1139 rbehind = pindex; 1140 startpindex = pindex - rbehind; 1141 while (tpindex >= startpindex) { 1142 if (vm_page_lookup( object, tpindex)) { 1143 startpindex = tpindex + 1; 1144 break; 1145 } 1146 if (tpindex == 0) 1147 break; 1148 tpindex -= 1; 1149 } 1150 } else { 1151 startpindex = pindex; 1152 } 1153 1154 /* 1155 * scan forward for the read ahead pages -- in memory or on disk not 1156 * in same object 1157 */ 1158 tpindex = pindex + 1; 1159 endpindex = pindex + (rahead + 1); 1160 if (endpindex > object->size) 1161 endpindex = object->size; 1162 while (tpindex < endpindex) { 1163 if ( vm_page_lookup(object, tpindex)) { 1164 break; 1165 } 1166 tpindex += 1; 1167 } 1168 endpindex = tpindex; 1169 1170 /* calculate number of bytes of pages */ 1171 size = endpindex - startpindex; 1172 1173 /* calculate the page offset of the required page */ 1174 treqpage = pindex - startpindex; 1175 1176 /* see if we have space (again) */ 1177 if ((cnt.v_free_count + cnt.v_cache_count) > 1178 (cnt.v_free_reserved + size)) { 1179 /* 1180 * get our pages and don't block for them 1181 */ 1182 for (i = 0; i < size; i++) { 1183 if (i != treqpage) { 1184 rtm = vm_page_alloc(object, 1185 startpindex + i, 1186 VM_ALLOC_NORMAL); 1187 if (rtm == NULL) { 1188 if (i < treqpage) { 1189 int j; 1190 for (j = 0; j < i; j++) { 1191 FREE_PAGE(marray[j]); 1192 } 1193 *reqpage = 0; 1194 marray[0] = m; 1195 return 1; 1196 } else { 1197 size = i; 1198 *reqpage = treqpage; 1199 return size; 1200 } 1201 } 1202 marray[i] = rtm; 1203 } else { 1204 marray[i] = m; 1205 } 1206 } 1207 1208 *reqpage = treqpage; 1209 return size; 1210 } 1211 *reqpage = 0; 1212 marray[0] = m; 1213 return 1; 1214 } 1215