1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42 * 43 * 44 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45 * All rights reserved. 46 * 47 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 * 69 * $Id: vm_fault.c,v 1.70 1997/08/25 22:15:19 bde Exp $ 70 */ 71 72 /* 73 * Page fault handling module. 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/vnode.h> 80 #include <sys/resourcevar.h> 81 #include <sys/vmmeter.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_param.h> 85 #include <vm/vm_prot.h> 86 #include <sys/lock.h> 87 #include <vm/pmap.h> 88 #include <vm/vm_map.h> 89 #include <vm/vm_object.h> 90 #include <vm/vm_page.h> 91 #include <vm/vm_pageout.h> 92 #include <vm/vm_kern.h> 93 #include <vm/vm_pager.h> 94 #include <vm/vnode_pager.h> 95 #include <vm/vm_extern.h> 96 97 int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *)); 98 99 #define VM_FAULT_READ_AHEAD 4 100 #define VM_FAULT_READ_BEHIND 3 101 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 102 103 /* 104 * vm_fault: 105 * 106 * Handle a page fault occuring at the given address, 107 * requiring the given permissions, in the map specified. 108 * If successful, the page is inserted into the 109 * associated physical map. 110 * 111 * NOTE: the given address should be truncated to the 112 * proper page address. 113 * 114 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 115 * a standard error specifying why the fault is fatal is returned. 116 * 117 * 118 * The map in question must be referenced, and remains so. 119 * Caller may hold no locks. 120 */ 121 int 122 vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags) 123 { 124 vm_object_t first_object; 125 vm_pindex_t first_pindex; 126 vm_map_entry_t entry; 127 register vm_object_t object; 128 register vm_pindex_t pindex; 129 vm_page_t m; 130 vm_page_t first_m; 131 vm_prot_t prot; 132 int result; 133 boolean_t wired; 134 boolean_t su; 135 boolean_t lookup_still_valid; 136 vm_page_t old_m; 137 vm_object_t next_object; 138 vm_page_t marray[VM_FAULT_READ]; 139 int hardfault = 0; 140 struct vnode *vp = NULL; 141 struct proc *p = curproc; /* XXX */ 142 143 cnt.v_vm_faults++; /* needs lock XXX */ 144 /* 145 * Recovery actions 146 */ 147 #define FREE_PAGE(m) { \ 148 PAGE_WAKEUP(m); \ 149 vm_page_free(m); \ 150 } 151 152 #define RELEASE_PAGE(m) { \ 153 PAGE_WAKEUP(m); \ 154 if (m->queue != PQ_ACTIVE) vm_page_activate(m); \ 155 } 156 157 #define UNLOCK_MAP { \ 158 if (lookup_still_valid) { \ 159 vm_map_lookup_done(map, entry); \ 160 lookup_still_valid = FALSE; \ 161 } \ 162 } 163 164 #define UNLOCK_THINGS { \ 165 vm_object_pip_wakeup(object); \ 166 if (object != first_object) { \ 167 FREE_PAGE(first_m); \ 168 vm_object_pip_wakeup(first_object); \ 169 } \ 170 UNLOCK_MAP; \ 171 if (vp != NULL) VOP_UNLOCK(vp, 0, p); \ 172 } 173 174 #define UNLOCK_AND_DEALLOCATE { \ 175 UNLOCK_THINGS; \ 176 vm_object_deallocate(first_object); \ 177 } 178 179 180 RetryFault:; 181 182 /* 183 * Find the backing store object and offset into it to begin the 184 * search. 185 */ 186 187 if ((result = vm_map_lookup(&map, vaddr, 188 fault_type, &entry, &first_object, 189 &first_pindex, &prot, &wired, &su)) != KERN_SUCCESS) { 190 return (result); 191 } 192 193 if (entry->eflags & MAP_ENTRY_NOFAULT) { 194 panic("vm_fault: fault on nofault entry, addr: %lx", 195 vaddr); 196 } 197 198 /* 199 * If we are user-wiring a r/w segment, and it is COW, then 200 * we need to do the COW operation. Note that we don't COW 201 * currently RO sections now, because it is NOT desirable 202 * to COW .text. We simply keep .text from ever being COW'ed 203 * and take the heat that one cannot debug wired .text sections. 204 */ 205 if (((fault_flags & VM_FAULT_WIRE_MASK) == VM_FAULT_USER_WIRE) && (entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 206 if(entry->protection & VM_PROT_WRITE) { 207 int tresult; 208 vm_map_lookup_done(map, entry); 209 210 tresult = vm_map_lookup(&map, vaddr, VM_PROT_READ|VM_PROT_WRITE, 211 &entry, &first_object, &first_pindex, &prot, &wired, &su); 212 if (tresult != KERN_SUCCESS) 213 return tresult; 214 } else { 215 /* 216 * If we don't COW now, on a user wire, the user will never 217 * be able to write to the mapping. If we don't make this 218 * restriction, the bookkeeping would be nearly impossible. 219 */ 220 entry->max_protection &= ~VM_PROT_WRITE; 221 } 222 } 223 224 vp = vnode_pager_lock(first_object); 225 226 lookup_still_valid = TRUE; 227 228 if (wired) 229 fault_type = prot; 230 231 first_m = NULL; 232 233 /* 234 * Make a reference to this object to prevent its disposal while we 235 * are messing with it. Once we have the reference, the map is free 236 * to be diddled. Since objects reference their shadows (and copies), 237 * they will stay around as well. 238 */ 239 240 first_object->ref_count++; 241 first_object->paging_in_progress++; 242 243 /* 244 * INVARIANTS (through entire routine): 245 * 246 * 1) At all times, we must either have the object lock or a busy 247 * page in some object to prevent some other process from trying to 248 * bring in the same page. 249 * 250 * Note that we cannot hold any locks during the pager access or when 251 * waiting for memory, so we use a busy page then. 252 * 253 * Note also that we aren't as concerned about more than one thead 254 * attempting to pager_data_unlock the same page at once, so we don't 255 * hold the page as busy then, but do record the highest unlock value 256 * so far. [Unlock requests may also be delivered out of order.] 257 * 258 * 2) Once we have a busy page, we must remove it from the pageout 259 * queues, so that the pageout daemon will not grab it away. 260 * 261 * 3) To prevent another process from racing us down the shadow chain 262 * and entering a new page in the top object before we do, we must 263 * keep a busy page in the top object while following the shadow 264 * chain. 265 * 266 * 4) We must increment paging_in_progress on any object for which 267 * we have a busy page, to prevent vm_object_collapse from removing 268 * the busy page without our noticing. 269 */ 270 271 /* 272 * Search for the page at object/offset. 273 */ 274 275 object = first_object; 276 pindex = first_pindex; 277 278 /* 279 * See whether this page is resident 280 */ 281 282 while (TRUE) { 283 m = vm_page_lookup(object, pindex); 284 if (m != NULL) { 285 int queue; 286 /* 287 * If the page is being brought in, wait for it and 288 * then retry. 289 */ 290 if ((m->flags & PG_BUSY) || m->busy) { 291 int s; 292 293 UNLOCK_THINGS; 294 s = splvm(); 295 if (((m->flags & PG_BUSY) || m->busy)) { 296 m->flags |= PG_WANTED | PG_REFERENCED; 297 cnt.v_intrans++; 298 tsleep(m, PSWP, "vmpfw", 0); 299 } 300 splx(s); 301 vm_object_deallocate(first_object); 302 goto RetryFault; 303 } 304 305 queue = m->queue; 306 vm_page_unqueue_nowakeup(m); 307 308 /* 309 * Mark page busy for other processes, and the pagedaemon. 310 */ 311 if (((queue - m->pc) == PQ_CACHE) && 312 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { 313 vm_page_activate(m); 314 UNLOCK_AND_DEALLOCATE; 315 VM_WAIT; 316 goto RetryFault; 317 } 318 319 m->flags |= PG_BUSY; 320 321 if (/*m->valid && */ 322 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && 323 m->object != kernel_object && m->object != kmem_object) { 324 goto readrest; 325 } 326 break; 327 } 328 if (((object->type != OBJT_DEFAULT) && (((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired)) 329 || (object == first_object)) { 330 331 if (pindex >= object->size) { 332 UNLOCK_AND_DEALLOCATE; 333 return (KERN_PROTECTION_FAILURE); 334 } 335 336 /* 337 * Allocate a new page for this object/offset pair. 338 */ 339 m = vm_page_alloc(object, pindex, 340 (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); 341 342 if (m == NULL) { 343 UNLOCK_AND_DEALLOCATE; 344 VM_WAIT; 345 goto RetryFault; 346 } 347 } 348 readrest: 349 if (object->type != OBJT_DEFAULT && (((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired)) { 350 int rv; 351 int faultcount; 352 int reqpage; 353 int ahead, behind; 354 355 ahead = VM_FAULT_READ_AHEAD; 356 behind = VM_FAULT_READ_BEHIND; 357 if (first_object->behavior == OBJ_RANDOM) { 358 ahead = 0; 359 behind = 0; 360 } 361 362 if ((first_object->type != OBJT_DEVICE) && 363 (first_object->behavior == OBJ_SEQUENTIAL)) { 364 vm_pindex_t firstpindex, tmppindex; 365 if (first_pindex < 366 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1)) 367 firstpindex = 0; 368 else 369 firstpindex = first_pindex - 370 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1); 371 372 for(tmppindex = first_pindex - 1; 373 tmppindex >= firstpindex; 374 --tmppindex) { 375 vm_page_t mt; 376 mt = vm_page_lookup( first_object, tmppindex); 377 if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) 378 break; 379 if (mt->busy || 380 (mt->flags & (PG_BUSY|PG_FICTITIOUS)) || 381 mt->hold_count || 382 mt->wire_count) 383 continue; 384 if (mt->dirty == 0) 385 vm_page_test_dirty(mt); 386 if (mt->dirty) { 387 vm_page_protect(mt, VM_PROT_NONE); 388 vm_page_deactivate(mt); 389 } else { 390 vm_page_cache(mt); 391 } 392 } 393 394 ahead += behind; 395 behind = 0; 396 } 397 398 /* 399 * now we find out if any other pages should be paged 400 * in at this time this routine checks to see if the 401 * pages surrounding this fault reside in the same 402 * object as the page for this fault. If they do, 403 * then they are faulted in also into the object. The 404 * array "marray" returned contains an array of 405 * vm_page_t structs where one of them is the 406 * vm_page_t passed to the routine. The reqpage 407 * return value is the index into the marray for the 408 * vm_page_t passed to the routine. 409 */ 410 faultcount = vm_fault_additional_pages( 411 m, behind, ahead, marray, &reqpage); 412 413 /* 414 * Call the pager to retrieve the data, if any, after 415 * releasing the lock on the map. 416 */ 417 UNLOCK_MAP; 418 419 rv = faultcount ? 420 vm_pager_get_pages(object, marray, faultcount, 421 reqpage) : VM_PAGER_FAIL; 422 423 if (rv == VM_PAGER_OK) { 424 /* 425 * Found the page. Leave it busy while we play 426 * with it. 427 */ 428 429 /* 430 * Relookup in case pager changed page. Pager 431 * is responsible for disposition of old page 432 * if moved. 433 */ 434 m = vm_page_lookup(object, pindex); 435 if( !m) { 436 UNLOCK_AND_DEALLOCATE; 437 goto RetryFault; 438 } 439 440 hardfault++; 441 break; 442 } 443 /* 444 * Remove the bogus page (which does not exist at this 445 * object/offset); before doing so, we must get back 446 * our object lock to preserve our invariant. 447 * 448 * Also wake up any other process that may want to bring 449 * in this page. 450 * 451 * If this is the top-level object, we must leave the 452 * busy page to prevent another process from rushing 453 * past us, and inserting the page in that object at 454 * the same time that we are. 455 */ 456 457 if (rv == VM_PAGER_ERROR) 458 printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 459 curproc->p_pid); 460 /* 461 * Data outside the range of the pager or an I/O error 462 */ 463 /* 464 * XXX - the check for kernel_map is a kludge to work 465 * around having the machine panic on a kernel space 466 * fault w/ I/O error. 467 */ 468 if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 469 FREE_PAGE(m); 470 UNLOCK_AND_DEALLOCATE; 471 return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 472 } 473 if (object != first_object) { 474 FREE_PAGE(m); 475 /* 476 * XXX - we cannot just fall out at this 477 * point, m has been freed and is invalid! 478 */ 479 } 480 } 481 /* 482 * We get here if the object has default pager (or unwiring) or the 483 * pager doesn't have the page. 484 */ 485 if (object == first_object) 486 first_m = m; 487 488 /* 489 * Move on to the next object. Lock the next object before 490 * unlocking the current one. 491 */ 492 493 pindex += OFF_TO_IDX(object->backing_object_offset); 494 next_object = object->backing_object; 495 if (next_object == NULL) { 496 /* 497 * If there's no object left, fill the page in the top 498 * object with zeros. 499 */ 500 if (object != first_object) { 501 vm_object_pip_wakeup(object); 502 503 object = first_object; 504 pindex = first_pindex; 505 m = first_m; 506 } 507 first_m = NULL; 508 509 if ((m->flags & PG_ZERO) == 0) 510 vm_page_zero_fill(m); 511 cnt.v_zfod++; 512 break; 513 } else { 514 if (object != first_object) { 515 vm_object_pip_wakeup(object); 516 } 517 object = next_object; 518 object->paging_in_progress++; 519 } 520 } 521 522 if ((m->flags & PG_BUSY) == 0) 523 panic("vm_fault: not busy after main loop"); 524 525 /* 526 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 527 * is held.] 528 */ 529 530 old_m = m; /* save page that would be copied */ 531 532 /* 533 * If the page is being written, but isn't already owned by the 534 * top-level object, we have to copy it into a new page owned by the 535 * top-level object. 536 */ 537 538 if (object != first_object) { 539 /* 540 * We only really need to copy if we want to write it. 541 */ 542 543 if (fault_type & VM_PROT_WRITE) { 544 545 /* 546 * This allows pages to be virtually copied from a backing_object 547 * into the first_object, where the backing object has no other 548 * refs to it, and cannot gain any more refs. Instead of a 549 * bcopy, we just move the page from the backing object to the 550 * first object. Note that we must mark the page dirty in the 551 * first object so that it will go out to swap when needed. 552 */ 553 if (lookup_still_valid && 554 /* 555 * Only one shadow object 556 */ 557 (object->shadow_count == 1) && 558 /* 559 * No COW refs, except us 560 */ 561 (object->ref_count == 1) && 562 /* 563 * Noone else can look this object up 564 */ 565 (object->handle == NULL) && 566 /* 567 * No other ways to look the object up 568 */ 569 ((object->type == OBJT_DEFAULT) || 570 (object->type == OBJT_SWAP)) && 571 /* 572 * We don't chase down the shadow chain 573 */ 574 (object == first_object->backing_object)) { 575 576 /* 577 * get rid of the unnecessary page 578 */ 579 vm_page_protect(first_m, VM_PROT_NONE); 580 PAGE_WAKEUP(first_m); 581 vm_page_free(first_m); 582 /* 583 * grab the page and put it into the process'es object 584 */ 585 vm_page_rename(m, first_object, first_pindex); 586 first_m = m; 587 m->dirty = VM_PAGE_BITS_ALL; 588 m = NULL; 589 } else { 590 /* 591 * Oh, well, lets copy it. 592 */ 593 vm_page_copy(m, first_m); 594 } 595 596 /* 597 * This code handles the case where there are two references to the 598 * backing object, and one reference is getting a copy of the 599 * page. If the other reference is the only other object that 600 * points to the backing object, then perform a virtual copy 601 * from the backing object to the other object after the 602 * page is copied to the current first_object. If the other 603 * object already has the page, we destroy it in the backing object 604 * performing an optimized collapse-type operation. We don't 605 * bother removing the page from the backing object's swap space. 606 */ 607 if (lookup_still_valid && 608 /* 609 * make sure that we have two shadow objs 610 */ 611 (object->shadow_count == 2) && 612 /* 613 * And no COW refs -- note that there are sometimes 614 * temp refs to objs, but ignore that case -- we just 615 * punt. 616 */ 617 (object->ref_count == 2) && 618 /* 619 * Noone else can look us up 620 */ 621 (object->handle == NULL) && 622 /* 623 * Not something that can be referenced elsewhere 624 */ 625 ((object->type == OBJT_DEFAULT) || 626 (object->type == OBJT_SWAP)) && 627 /* 628 * We don't bother chasing down object chain 629 */ 630 (object == first_object->backing_object)) { 631 632 vm_object_t other_object; 633 vm_pindex_t other_pindex, other_pindex_offset; 634 vm_page_t tm; 635 636 other_object = TAILQ_FIRST(&object->shadow_head); 637 if (other_object == first_object) 638 other_object = TAILQ_NEXT(other_object, shadow_list); 639 if (!other_object) 640 panic("vm_fault: other object missing"); 641 if (other_object && 642 (other_object->type == OBJT_DEFAULT) && 643 (other_object->paging_in_progress == 0)) { 644 other_pindex_offset = 645 OFF_TO_IDX(other_object->backing_object_offset); 646 if (pindex >= other_pindex_offset) { 647 other_pindex = pindex - other_pindex_offset; 648 /* 649 * If the other object has the page, just free it. 650 */ 651 if ((tm = vm_page_lookup(other_object, other_pindex))) { 652 if ((tm->flags & PG_BUSY) == 0 && 653 tm->busy == 0 && 654 tm->valid == VM_PAGE_BITS_ALL) { 655 /* 656 * get rid of the unnecessary page 657 */ 658 vm_page_protect(m, VM_PROT_NONE); 659 PAGE_WAKEUP(m); 660 vm_page_free(m); 661 m = NULL; 662 tm->dirty = VM_PAGE_BITS_ALL; 663 first_m->dirty = VM_PAGE_BITS_ALL; 664 } 665 } else { 666 /* 667 * If the other object doesn't have the page, 668 * then we move it there. 669 */ 670 vm_page_rename(m, other_object, other_pindex); 671 m->dirty = VM_PAGE_BITS_ALL; 672 m->valid = VM_PAGE_BITS_ALL; 673 } 674 } 675 } 676 } 677 678 if (m) { 679 if (m->queue != PQ_ACTIVE) 680 vm_page_activate(m); 681 /* 682 * We no longer need the old page or object. 683 */ 684 PAGE_WAKEUP(m); 685 } 686 687 vm_object_pip_wakeup(object); 688 /* 689 * Only use the new page below... 690 */ 691 692 cnt.v_cow_faults++; 693 m = first_m; 694 object = first_object; 695 pindex = first_pindex; 696 697 /* 698 * Now that we've gotten the copy out of the way, 699 * let's try to collapse the top object. 700 * 701 * But we have to play ugly games with 702 * paging_in_progress to do that... 703 */ 704 vm_object_pip_wakeup(object); 705 vm_object_collapse(object); 706 object->paging_in_progress++; 707 } else { 708 prot &= ~VM_PROT_WRITE; 709 } 710 } 711 712 /* 713 * We must verify that the maps have not changed since our last 714 * lookup. 715 */ 716 717 if (!lookup_still_valid) { 718 vm_object_t retry_object; 719 vm_pindex_t retry_pindex; 720 vm_prot_t retry_prot; 721 722 /* 723 * Since map entries may be pageable, make sure we can take a 724 * page fault on them. 725 */ 726 727 /* 728 * To avoid trying to write_lock the map while another process 729 * has it read_locked (in vm_map_pageable), we do not try for 730 * write permission. If the page is still writable, we will 731 * get write permission. If it is not, or has been marked 732 * needs_copy, we enter the mapping without write permission, 733 * and will merely take another fault. 734 */ 735 result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 736 &entry, &retry_object, &retry_pindex, &retry_prot, &wired, &su); 737 738 /* 739 * If we don't need the page any longer, put it on the active 740 * list (the easiest thing to do here). If no one needs it, 741 * pageout will grab it eventually. 742 */ 743 744 if (result != KERN_SUCCESS) { 745 RELEASE_PAGE(m); 746 UNLOCK_AND_DEALLOCATE; 747 return (result); 748 } 749 lookup_still_valid = TRUE; 750 751 if ((retry_object != first_object) || 752 (retry_pindex != first_pindex)) { 753 RELEASE_PAGE(m); 754 UNLOCK_AND_DEALLOCATE; 755 goto RetryFault; 756 } 757 /* 758 * Check whether the protection has changed or the object has 759 * been copied while we left the map unlocked. Changing from 760 * read to write permission is OK - we leave the page 761 * write-protected, and catch the write fault. Changing from 762 * write to read permission means that we can't mark the page 763 * write-enabled after all. 764 */ 765 prot &= retry_prot; 766 } 767 768 /* 769 * Put this page into the physical map. We had to do the unlock above 770 * because pmap_enter may cause other faults. We don't put the page 771 * back on the active queue until later so that the page-out daemon 772 * won't find us (yet). 773 */ 774 775 if (prot & VM_PROT_WRITE) { 776 m->flags |= PG_WRITEABLE; 777 m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY; 778 /* 779 * If the fault is a write, we know that this page is being 780 * written NOW. This will save on the pmap_is_modified() calls 781 * later. 782 */ 783 if (fault_flags & VM_FAULT_DIRTY) { 784 m->dirty = VM_PAGE_BITS_ALL; 785 } 786 } 787 788 UNLOCK_THINGS; 789 m->valid = VM_PAGE_BITS_ALL; 790 m->flags &= ~PG_ZERO; 791 792 pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 793 if (((fault_flags & VM_FAULT_WIRE_MASK) == 0) && (wired == 0)) 794 pmap_prefault(map->pmap, vaddr, entry, first_object); 795 796 m->flags |= PG_MAPPED|PG_REFERENCED; 797 if (fault_flags & VM_FAULT_HOLD) 798 vm_page_hold(m); 799 800 /* 801 * If the page is not wired down, then put it where the pageout daemon 802 * can find it. 803 */ 804 if (fault_flags & VM_FAULT_WIRE_MASK) { 805 if (wired) 806 vm_page_wire(m); 807 else 808 vm_page_unwire(m); 809 } else { 810 if (m->queue != PQ_ACTIVE) 811 vm_page_activate(m); 812 } 813 814 if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) { 815 if (hardfault) { 816 curproc->p_stats->p_ru.ru_majflt++; 817 } else { 818 curproc->p_stats->p_ru.ru_minflt++; 819 } 820 } 821 822 /* 823 * Unlock everything, and return 824 */ 825 826 PAGE_WAKEUP(m); 827 vm_object_deallocate(first_object); 828 829 return (KERN_SUCCESS); 830 831 } 832 833 /* 834 * vm_fault_wire: 835 * 836 * Wire down a range of virtual addresses in a map. 837 */ 838 int 839 vm_fault_wire(map, start, end) 840 vm_map_t map; 841 vm_offset_t start, end; 842 { 843 844 register vm_offset_t va; 845 register pmap_t pmap; 846 int rv; 847 848 pmap = vm_map_pmap(map); 849 850 /* 851 * Inform the physical mapping system that the range of addresses may 852 * not fault, so that page tables and such can be locked down as well. 853 */ 854 855 pmap_pageable(pmap, start, end, FALSE); 856 857 /* 858 * We simulate a fault to get the page and enter it in the physical 859 * map. 860 */ 861 862 for (va = start; va < end; va += PAGE_SIZE) { 863 rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, 864 VM_FAULT_CHANGE_WIRING); 865 if (rv) { 866 if (va != start) 867 vm_fault_unwire(map, start, va); 868 return (rv); 869 } 870 } 871 return (KERN_SUCCESS); 872 } 873 874 /* 875 * vm_fault_user_wire: 876 * 877 * Wire down a range of virtual addresses in a map. This 878 * is for user mode though, so we only ask for read access 879 * on currently read only sections. 880 */ 881 int 882 vm_fault_user_wire(map, start, end) 883 vm_map_t map; 884 vm_offset_t start, end; 885 { 886 887 register vm_offset_t va; 888 register pmap_t pmap; 889 int rv; 890 891 pmap = vm_map_pmap(map); 892 893 /* 894 * Inform the physical mapping system that the range of addresses may 895 * not fault, so that page tables and such can be locked down as well. 896 */ 897 pmap_pageable(pmap, start, end, FALSE); 898 899 /* 900 * We simulate a fault to get the page and enter it in the physical 901 * map. 902 */ 903 for (va = start; va < end; va += PAGE_SIZE) { 904 rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_USER_WIRE); 905 if (rv) { 906 if (va != start) 907 vm_fault_unwire(map, start, va); 908 return (rv); 909 } 910 } 911 return (KERN_SUCCESS); 912 } 913 914 915 /* 916 * vm_fault_unwire: 917 * 918 * Unwire a range of virtual addresses in a map. 919 */ 920 void 921 vm_fault_unwire(map, start, end) 922 vm_map_t map; 923 vm_offset_t start, end; 924 { 925 926 register vm_offset_t va, pa; 927 register pmap_t pmap; 928 929 pmap = vm_map_pmap(map); 930 931 /* 932 * Since the pages are wired down, we must be able to get their 933 * mappings from the physical map system. 934 */ 935 936 for (va = start; va < end; va += PAGE_SIZE) { 937 pa = pmap_extract(pmap, va); 938 if (pa != (vm_offset_t) 0) { 939 pmap_change_wiring(pmap, va, FALSE); 940 vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 941 } 942 } 943 944 /* 945 * Inform the physical mapping system that the range of addresses may 946 * fault, so that page tables and such may be unwired themselves. 947 */ 948 949 pmap_pageable(pmap, start, end, TRUE); 950 951 } 952 953 /* 954 * Routine: 955 * vm_fault_copy_entry 956 * Function: 957 * Copy all of the pages from a wired-down map entry to another. 958 * 959 * In/out conditions: 960 * The source and destination maps must be locked for write. 961 * The source map entry must be wired down (or be a sharing map 962 * entry corresponding to a main map entry that is wired down). 963 */ 964 965 void 966 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 967 vm_map_t dst_map; 968 vm_map_t src_map; 969 vm_map_entry_t dst_entry; 970 vm_map_entry_t src_entry; 971 { 972 vm_object_t dst_object; 973 vm_object_t src_object; 974 vm_ooffset_t dst_offset; 975 vm_ooffset_t src_offset; 976 vm_prot_t prot; 977 vm_offset_t vaddr; 978 vm_page_t dst_m; 979 vm_page_t src_m; 980 981 #ifdef lint 982 src_map++; 983 #endif /* lint */ 984 985 src_object = src_entry->object.vm_object; 986 src_offset = src_entry->offset; 987 988 /* 989 * Create the top-level object for the destination entry. (Doesn't 990 * actually shadow anything - we copy the pages directly.) 991 */ 992 dst_object = vm_object_allocate(OBJT_DEFAULT, 993 (vm_size_t) OFF_TO_IDX(dst_entry->end - dst_entry->start)); 994 995 dst_entry->object.vm_object = dst_object; 996 dst_entry->offset = 0; 997 998 prot = dst_entry->max_protection; 999 1000 /* 1001 * Loop through all of the pages in the entry's range, copying each 1002 * one from the source object (it should be there) to the destination 1003 * object. 1004 */ 1005 for (vaddr = dst_entry->start, dst_offset = 0; 1006 vaddr < dst_entry->end; 1007 vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 1008 1009 /* 1010 * Allocate a page in the destination object 1011 */ 1012 do { 1013 dst_m = vm_page_alloc(dst_object, 1014 OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL); 1015 if (dst_m == NULL) { 1016 VM_WAIT; 1017 } 1018 } while (dst_m == NULL); 1019 1020 /* 1021 * Find the page in the source object, and copy it in. 1022 * (Because the source is wired down, the page will be in 1023 * memory.) 1024 */ 1025 src_m = vm_page_lookup(src_object, 1026 OFF_TO_IDX(dst_offset + src_offset)); 1027 if (src_m == NULL) 1028 panic("vm_fault_copy_wired: page missing"); 1029 1030 vm_page_copy(src_m, dst_m); 1031 1032 /* 1033 * Enter it in the pmap... 1034 */ 1035 1036 dst_m->flags &= ~PG_ZERO; 1037 pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1038 prot, FALSE); 1039 dst_m->flags |= PG_WRITEABLE|PG_MAPPED; 1040 1041 /* 1042 * Mark it no longer busy, and put it on the active list. 1043 */ 1044 vm_page_activate(dst_m); 1045 PAGE_WAKEUP(dst_m); 1046 } 1047 } 1048 1049 1050 /* 1051 * This routine checks around the requested page for other pages that 1052 * might be able to be faulted in. This routine brackets the viable 1053 * pages for the pages to be paged in. 1054 * 1055 * Inputs: 1056 * m, rbehind, rahead 1057 * 1058 * Outputs: 1059 * marray (array of vm_page_t), reqpage (index of requested page) 1060 * 1061 * Return value: 1062 * number of pages in marray 1063 */ 1064 int 1065 vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) 1066 vm_page_t m; 1067 int rbehind; 1068 int rahead; 1069 vm_page_t *marray; 1070 int *reqpage; 1071 { 1072 int i; 1073 vm_object_t object; 1074 vm_pindex_t pindex, startpindex, endpindex, tpindex; 1075 vm_offset_t size; 1076 vm_page_t rtm; 1077 int treqpage; 1078 int cbehind, cahead; 1079 1080 object = m->object; 1081 pindex = m->pindex; 1082 1083 /* 1084 * we don't fault-ahead for device pager 1085 */ 1086 if (object->type == OBJT_DEVICE) { 1087 *reqpage = 0; 1088 marray[0] = m; 1089 return 1; 1090 } 1091 1092 /* 1093 * if the requested page is not available, then give up now 1094 */ 1095 1096 if (!vm_pager_has_page(object, 1097 OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead)) 1098 return 0; 1099 1100 if ((cbehind == 0) && (cahead == 0)) { 1101 *reqpage = 0; 1102 marray[0] = m; 1103 return 1; 1104 } 1105 1106 if (rahead > cahead) { 1107 rahead = cahead; 1108 } 1109 1110 if (rbehind > cbehind) { 1111 rbehind = cbehind; 1112 } 1113 1114 /* 1115 * try to do any readahead that we might have free pages for. 1116 */ 1117 if ((rahead + rbehind) > 1118 ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { 1119 pagedaemon_wakeup(); 1120 *reqpage = 0; 1121 marray[0] = m; 1122 return 1; 1123 } 1124 1125 /* 1126 * scan backward for the read behind pages -- in memory or on disk not 1127 * in same object 1128 */ 1129 tpindex = pindex - 1; 1130 if (tpindex < pindex) { 1131 if (rbehind > pindex) 1132 rbehind = pindex; 1133 startpindex = pindex - rbehind; 1134 while (tpindex >= startpindex) { 1135 if (vm_page_lookup( object, tpindex)) { 1136 startpindex = tpindex + 1; 1137 break; 1138 } 1139 if (tpindex == 0) 1140 break; 1141 tpindex -= 1; 1142 } 1143 } else { 1144 startpindex = pindex; 1145 } 1146 1147 /* 1148 * scan forward for the read ahead pages -- in memory or on disk not 1149 * in same object 1150 */ 1151 tpindex = pindex + 1; 1152 endpindex = pindex + (rahead + 1); 1153 if (endpindex > object->size) 1154 endpindex = object->size; 1155 while (tpindex < endpindex) { 1156 if ( vm_page_lookup(object, tpindex)) { 1157 break; 1158 } 1159 tpindex += 1; 1160 } 1161 endpindex = tpindex; 1162 1163 /* calculate number of bytes of pages */ 1164 size = endpindex - startpindex; 1165 1166 /* calculate the page offset of the required page */ 1167 treqpage = pindex - startpindex; 1168 1169 /* see if we have space (again) */ 1170 if ((cnt.v_free_count + cnt.v_cache_count) > 1171 (cnt.v_free_reserved + size)) { 1172 /* 1173 * get our pages and don't block for them 1174 */ 1175 for (i = 0; i < size; i++) { 1176 if (i != treqpage) { 1177 rtm = vm_page_alloc(object, 1178 startpindex + i, 1179 VM_ALLOC_NORMAL); 1180 if (rtm == NULL) { 1181 if (i < treqpage) { 1182 int j; 1183 for (j = 0; j < i; j++) { 1184 FREE_PAGE(marray[j]); 1185 } 1186 *reqpage = 0; 1187 marray[0] = m; 1188 return 1; 1189 } else { 1190 size = i; 1191 *reqpage = treqpage; 1192 return size; 1193 } 1194 } 1195 marray[i] = rtm; 1196 } else { 1197 marray[i] = m; 1198 } 1199 } 1200 1201 *reqpage = treqpage; 1202 return size; 1203 } 1204 *reqpage = 0; 1205 marray[0] = m; 1206 return 1; 1207 } 1208