1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42 * 43 * 44 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45 * All rights reserved. 46 * 47 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 * 69 * $Id: vm_fault.c,v 1.68 1997/04/06 16:16:11 peter Exp $ 70 */ 71 72 /* 73 * Page fault handling module. 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/vnode.h> 80 #include <sys/resource.h> 81 #include <sys/signalvar.h> 82 #include <sys/resourcevar.h> 83 #include <sys/vmmeter.h> 84 #include <sys/buf.h> 85 86 #include <vm/vm.h> 87 #include <vm/vm_param.h> 88 #include <vm/vm_prot.h> 89 #include <sys/lock.h> 90 #include <vm/pmap.h> 91 #include <vm/vm_map.h> 92 #include <vm/vm_object.h> 93 #include <vm/vm_page.h> 94 #include <vm/vm_pageout.h> 95 #include <vm/vm_kern.h> 96 #include <vm/vm_pager.h> 97 #include <vm/vnode_pager.h> 98 #include <vm/swap_pager.h> 99 #include <vm/vm_extern.h> 100 101 int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *)); 102 103 #define VM_FAULT_READ_AHEAD 4 104 #define VM_FAULT_READ_BEHIND 3 105 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 106 107 /* 108 * vm_fault: 109 * 110 * Handle a page fault occuring at the given address, 111 * requiring the given permissions, in the map specified. 112 * If successful, the page is inserted into the 113 * associated physical map. 114 * 115 * NOTE: the given address should be truncated to the 116 * proper page address. 117 * 118 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 119 * a standard error specifying why the fault is fatal is returned. 120 * 121 * 122 * The map in question must be referenced, and remains so. 123 * Caller may hold no locks. 124 */ 125 int 126 vm_fault(map, vaddr, fault_type, fault_flags) 127 vm_map_t map; 128 vm_offset_t vaddr; 129 vm_prot_t fault_type; 130 int fault_flags; 131 { 132 vm_object_t first_object; 133 vm_pindex_t first_pindex; 134 vm_map_entry_t entry; 135 register vm_object_t object; 136 register vm_pindex_t pindex; 137 vm_page_t m; 138 vm_page_t first_m; 139 vm_prot_t prot; 140 int result; 141 boolean_t wired; 142 boolean_t su; 143 boolean_t lookup_still_valid; 144 vm_page_t old_m; 145 vm_object_t next_object; 146 vm_page_t marray[VM_FAULT_READ]; 147 int hardfault = 0; 148 struct vnode *vp = NULL; 149 struct proc *p = curproc; /* XXX */ 150 151 cnt.v_vm_faults++; /* needs lock XXX */ 152 /* 153 * Recovery actions 154 */ 155 #define FREE_PAGE(m) { \ 156 PAGE_WAKEUP(m); \ 157 vm_page_free(m); \ 158 } 159 160 #define RELEASE_PAGE(m) { \ 161 PAGE_WAKEUP(m); \ 162 if (m->queue != PQ_ACTIVE) vm_page_activate(m); \ 163 } 164 165 #define UNLOCK_MAP { \ 166 if (lookup_still_valid) { \ 167 vm_map_lookup_done(map, entry); \ 168 lookup_still_valid = FALSE; \ 169 } \ 170 } 171 172 #define UNLOCK_THINGS { \ 173 vm_object_pip_wakeup(object); \ 174 if (object != first_object) { \ 175 FREE_PAGE(first_m); \ 176 vm_object_pip_wakeup(first_object); \ 177 } \ 178 UNLOCK_MAP; \ 179 if (vp != NULL) VOP_UNLOCK(vp, 0, p); \ 180 } 181 182 #define UNLOCK_AND_DEALLOCATE { \ 183 UNLOCK_THINGS; \ 184 vm_object_deallocate(first_object); \ 185 } 186 187 188 RetryFault:; 189 190 /* 191 * Find the backing store object and offset into it to begin the 192 * search. 193 */ 194 195 if ((result = vm_map_lookup(&map, vaddr, 196 fault_type, &entry, &first_object, 197 &first_pindex, &prot, &wired, &su)) != KERN_SUCCESS) { 198 return (result); 199 } 200 201 if (entry->eflags & MAP_ENTRY_NOFAULT) { 202 panic("vm_fault: fault on nofault entry, addr: %lx", 203 vaddr); 204 } 205 206 /* 207 * If we are user-wiring a r/w segment, and it is COW, then 208 * we need to do the COW operation. Note that we don't COW 209 * currently RO sections now, because it is NOT desirable 210 * to COW .text. We simply keep .text from ever being COW'ed 211 * and take the heat that one cannot debug wired .text sections. 212 */ 213 if (((fault_flags & VM_FAULT_WIRE_MASK) == VM_FAULT_USER_WIRE) && (entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 214 if(entry->protection & VM_PROT_WRITE) { 215 int tresult; 216 vm_map_lookup_done(map, entry); 217 218 tresult = vm_map_lookup(&map, vaddr, VM_PROT_READ|VM_PROT_WRITE, 219 &entry, &first_object, &first_pindex, &prot, &wired, &su); 220 if (tresult != KERN_SUCCESS) 221 return tresult; 222 } else { 223 /* 224 * If we don't COW now, on a user wire, the user will never 225 * be able to write to the mapping. If we don't make this 226 * restriction, the bookkeeping would be nearly impossible. 227 */ 228 entry->max_protection &= ~VM_PROT_WRITE; 229 } 230 } 231 232 vp = vnode_pager_lock(first_object); 233 234 lookup_still_valid = TRUE; 235 236 if (wired) 237 fault_type = prot; 238 239 first_m = NULL; 240 241 /* 242 * Make a reference to this object to prevent its disposal while we 243 * are messing with it. Once we have the reference, the map is free 244 * to be diddled. Since objects reference their shadows (and copies), 245 * they will stay around as well. 246 */ 247 248 first_object->ref_count++; 249 first_object->paging_in_progress++; 250 251 /* 252 * INVARIANTS (through entire routine): 253 * 254 * 1) At all times, we must either have the object lock or a busy 255 * page in some object to prevent some other process from trying to 256 * bring in the same page. 257 * 258 * Note that we cannot hold any locks during the pager access or when 259 * waiting for memory, so we use a busy page then. 260 * 261 * Note also that we aren't as concerned about more than one thead 262 * attempting to pager_data_unlock the same page at once, so we don't 263 * hold the page as busy then, but do record the highest unlock value 264 * so far. [Unlock requests may also be delivered out of order.] 265 * 266 * 2) Once we have a busy page, we must remove it from the pageout 267 * queues, so that the pageout daemon will not grab it away. 268 * 269 * 3) To prevent another process from racing us down the shadow chain 270 * and entering a new page in the top object before we do, we must 271 * keep a busy page in the top object while following the shadow 272 * chain. 273 * 274 * 4) We must increment paging_in_progress on any object for which 275 * we have a busy page, to prevent vm_object_collapse from removing 276 * the busy page without our noticing. 277 */ 278 279 /* 280 * Search for the page at object/offset. 281 */ 282 283 object = first_object; 284 pindex = first_pindex; 285 286 /* 287 * See whether this page is resident 288 */ 289 290 while (TRUE) { 291 m = vm_page_lookup(object, pindex); 292 if (m != NULL) { 293 int queue; 294 /* 295 * If the page is being brought in, wait for it and 296 * then retry. 297 */ 298 if ((m->flags & PG_BUSY) || m->busy) { 299 int s; 300 301 UNLOCK_THINGS; 302 s = splvm(); 303 if (((m->flags & PG_BUSY) || m->busy)) { 304 m->flags |= PG_WANTED | PG_REFERENCED; 305 cnt.v_intrans++; 306 tsleep(m, PSWP, "vmpfw", 0); 307 } 308 splx(s); 309 vm_object_deallocate(first_object); 310 goto RetryFault; 311 } 312 313 queue = m->queue; 314 vm_page_unqueue_nowakeup(m); 315 316 /* 317 * Mark page busy for other processes, and the pagedaemon. 318 */ 319 if (((queue - m->pc) == PQ_CACHE) && 320 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { 321 vm_page_activate(m); 322 UNLOCK_AND_DEALLOCATE; 323 VM_WAIT; 324 goto RetryFault; 325 } 326 327 m->flags |= PG_BUSY; 328 329 if (/*m->valid && */ 330 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && 331 m->object != kernel_object && m->object != kmem_object) { 332 goto readrest; 333 } 334 break; 335 } 336 if (((object->type != OBJT_DEFAULT) && (((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired)) 337 || (object == first_object)) { 338 339 if (pindex >= object->size) { 340 UNLOCK_AND_DEALLOCATE; 341 return (KERN_PROTECTION_FAILURE); 342 } 343 344 /* 345 * Allocate a new page for this object/offset pair. 346 */ 347 m = vm_page_alloc(object, pindex, 348 (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); 349 350 if (m == NULL) { 351 UNLOCK_AND_DEALLOCATE; 352 VM_WAIT; 353 goto RetryFault; 354 } 355 } 356 readrest: 357 if (object->type != OBJT_DEFAULT && (((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired)) { 358 int rv; 359 int faultcount; 360 int reqpage; 361 int ahead, behind; 362 363 ahead = VM_FAULT_READ_AHEAD; 364 behind = VM_FAULT_READ_BEHIND; 365 if (first_object->behavior == OBJ_RANDOM) { 366 ahead = 0; 367 behind = 0; 368 } 369 370 if ((first_object->type != OBJT_DEVICE) && 371 (first_object->behavior == OBJ_SEQUENTIAL)) { 372 vm_pindex_t firstpindex, tmppindex; 373 if (first_pindex < 374 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1)) 375 firstpindex = 0; 376 else 377 firstpindex = first_pindex - 378 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1); 379 380 for(tmppindex = first_pindex - 1; 381 tmppindex >= firstpindex; 382 --tmppindex) { 383 vm_page_t mt; 384 mt = vm_page_lookup( first_object, tmppindex); 385 if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) 386 break; 387 if (mt->busy || 388 (mt->flags & (PG_BUSY|PG_FICTITIOUS)) || 389 mt->hold_count || 390 mt->wire_count) 391 continue; 392 if (mt->dirty == 0) 393 vm_page_test_dirty(mt); 394 if (mt->dirty) { 395 vm_page_protect(mt, VM_PROT_NONE); 396 vm_page_deactivate(mt); 397 } else { 398 vm_page_cache(mt); 399 } 400 } 401 402 ahead += behind; 403 behind = 0; 404 } 405 406 /* 407 * now we find out if any other pages should be paged 408 * in at this time this routine checks to see if the 409 * pages surrounding this fault reside in the same 410 * object as the page for this fault. If they do, 411 * then they are faulted in also into the object. The 412 * array "marray" returned contains an array of 413 * vm_page_t structs where one of them is the 414 * vm_page_t passed to the routine. The reqpage 415 * return value is the index into the marray for the 416 * vm_page_t passed to the routine. 417 */ 418 faultcount = vm_fault_additional_pages( 419 m, behind, ahead, marray, &reqpage); 420 421 /* 422 * Call the pager to retrieve the data, if any, after 423 * releasing the lock on the map. 424 */ 425 UNLOCK_MAP; 426 427 rv = faultcount ? 428 vm_pager_get_pages(object, marray, faultcount, 429 reqpage) : VM_PAGER_FAIL; 430 431 if (rv == VM_PAGER_OK) { 432 /* 433 * Found the page. Leave it busy while we play 434 * with it. 435 */ 436 437 /* 438 * Relookup in case pager changed page. Pager 439 * is responsible for disposition of old page 440 * if moved. 441 */ 442 m = vm_page_lookup(object, pindex); 443 if( !m) { 444 UNLOCK_AND_DEALLOCATE; 445 goto RetryFault; 446 } 447 448 hardfault++; 449 break; 450 } 451 /* 452 * Remove the bogus page (which does not exist at this 453 * object/offset); before doing so, we must get back 454 * our object lock to preserve our invariant. 455 * 456 * Also wake up any other process that may want to bring 457 * in this page. 458 * 459 * If this is the top-level object, we must leave the 460 * busy page to prevent another process from rushing 461 * past us, and inserting the page in that object at 462 * the same time that we are. 463 */ 464 465 if (rv == VM_PAGER_ERROR) 466 printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 467 curproc->p_pid); 468 /* 469 * Data outside the range of the pager or an I/O error 470 */ 471 /* 472 * XXX - the check for kernel_map is a kludge to work 473 * around having the machine panic on a kernel space 474 * fault w/ I/O error. 475 */ 476 if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 477 FREE_PAGE(m); 478 UNLOCK_AND_DEALLOCATE; 479 return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 480 } 481 if (object != first_object) { 482 FREE_PAGE(m); 483 /* 484 * XXX - we cannot just fall out at this 485 * point, m has been freed and is invalid! 486 */ 487 } 488 } 489 /* 490 * We get here if the object has default pager (or unwiring) or the 491 * pager doesn't have the page. 492 */ 493 if (object == first_object) 494 first_m = m; 495 496 /* 497 * Move on to the next object. Lock the next object before 498 * unlocking the current one. 499 */ 500 501 pindex += OFF_TO_IDX(object->backing_object_offset); 502 next_object = object->backing_object; 503 if (next_object == NULL) { 504 /* 505 * If there's no object left, fill the page in the top 506 * object with zeros. 507 */ 508 if (object != first_object) { 509 vm_object_pip_wakeup(object); 510 511 object = first_object; 512 pindex = first_pindex; 513 m = first_m; 514 } 515 first_m = NULL; 516 517 if ((m->flags & PG_ZERO) == 0) 518 vm_page_zero_fill(m); 519 cnt.v_zfod++; 520 break; 521 } else { 522 if (object != first_object) { 523 vm_object_pip_wakeup(object); 524 } 525 object = next_object; 526 object->paging_in_progress++; 527 } 528 } 529 530 if ((m->flags & PG_BUSY) == 0) 531 panic("vm_fault: not busy after main loop"); 532 533 /* 534 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 535 * is held.] 536 */ 537 538 old_m = m; /* save page that would be copied */ 539 540 /* 541 * If the page is being written, but isn't already owned by the 542 * top-level object, we have to copy it into a new page owned by the 543 * top-level object. 544 */ 545 546 if (object != first_object) { 547 /* 548 * We only really need to copy if we want to write it. 549 */ 550 551 if (fault_type & VM_PROT_WRITE) { 552 553 /* 554 * This allows pages to be virtually copied from a backing_object 555 * into the first_object, where the backing object has no other 556 * refs to it, and cannot gain any more refs. Instead of a 557 * bcopy, we just move the page from the backing object to the 558 * first object. Note that we must mark the page dirty in the 559 * first object so that it will go out to swap when needed. 560 */ 561 if (lookup_still_valid && 562 /* 563 * Only one shadow object 564 */ 565 (object->shadow_count == 1) && 566 /* 567 * No COW refs, except us 568 */ 569 (object->ref_count == 1) && 570 /* 571 * Noone else can look this object up 572 */ 573 (object->handle == NULL) && 574 /* 575 * No other ways to look the object up 576 */ 577 ((object->type == OBJT_DEFAULT) || 578 (object->type == OBJT_SWAP)) && 579 /* 580 * We don't chase down the shadow chain 581 */ 582 (object == first_object->backing_object)) { 583 584 /* 585 * get rid of the unnecessary page 586 */ 587 vm_page_protect(first_m, VM_PROT_NONE); 588 PAGE_WAKEUP(first_m); 589 vm_page_free(first_m); 590 /* 591 * grab the page and put it into the process'es object 592 */ 593 vm_page_rename(m, first_object, first_pindex); 594 first_m = m; 595 m->dirty = VM_PAGE_BITS_ALL; 596 m = NULL; 597 } else { 598 /* 599 * Oh, well, lets copy it. 600 */ 601 vm_page_copy(m, first_m); 602 } 603 604 /* 605 * This code handles the case where there are two references to the 606 * backing object, and one reference is getting a copy of the 607 * page. If the other reference is the only other object that 608 * points to the backing object, then perform a virtual copy 609 * from the backing object to the other object after the 610 * page is copied to the current first_object. If the other 611 * object already has the page, we destroy it in the backing object 612 * performing an optimized collapse-type operation. We don't 613 * bother removing the page from the backing object's swap space. 614 */ 615 if (lookup_still_valid && 616 /* 617 * make sure that we have two shadow objs 618 */ 619 (object->shadow_count == 2) && 620 /* 621 * And no COW refs -- note that there are sometimes 622 * temp refs to objs, but ignore that case -- we just 623 * punt. 624 */ 625 (object->ref_count == 2) && 626 /* 627 * Noone else can look us up 628 */ 629 (object->handle == NULL) && 630 /* 631 * Not something that can be referenced elsewhere 632 */ 633 ((object->type == OBJT_DEFAULT) || 634 (object->type == OBJT_SWAP)) && 635 /* 636 * We don't bother chasing down object chain 637 */ 638 (object == first_object->backing_object)) { 639 640 vm_object_t other_object; 641 vm_pindex_t other_pindex, other_pindex_offset; 642 vm_page_t tm; 643 644 other_object = TAILQ_FIRST(&object->shadow_head); 645 if (other_object == first_object) 646 other_object = TAILQ_NEXT(other_object, shadow_list); 647 if (!other_object) 648 panic("vm_fault: other object missing"); 649 if (other_object && 650 (other_object->type == OBJT_DEFAULT) && 651 (other_object->paging_in_progress == 0)) { 652 other_pindex_offset = 653 OFF_TO_IDX(other_object->backing_object_offset); 654 if (pindex >= other_pindex_offset) { 655 other_pindex = pindex - other_pindex_offset; 656 /* 657 * If the other object has the page, just free it. 658 */ 659 if ((tm = vm_page_lookup(other_object, other_pindex))) { 660 if ((tm->flags & PG_BUSY) == 0 && 661 tm->busy == 0 && 662 tm->valid == VM_PAGE_BITS_ALL) { 663 /* 664 * get rid of the unnecessary page 665 */ 666 vm_page_protect(m, VM_PROT_NONE); 667 PAGE_WAKEUP(m); 668 vm_page_free(m); 669 m = NULL; 670 tm->dirty = VM_PAGE_BITS_ALL; 671 first_m->dirty = VM_PAGE_BITS_ALL; 672 } 673 } else { 674 /* 675 * If the other object doesn't have the page, 676 * then we move it there. 677 */ 678 vm_page_rename(m, other_object, other_pindex); 679 m->dirty = VM_PAGE_BITS_ALL; 680 m->valid = VM_PAGE_BITS_ALL; 681 } 682 } 683 } 684 } 685 686 if (m) { 687 if (m->queue != PQ_ACTIVE) 688 vm_page_activate(m); 689 /* 690 * We no longer need the old page or object. 691 */ 692 PAGE_WAKEUP(m); 693 } 694 695 vm_object_pip_wakeup(object); 696 /* 697 * Only use the new page below... 698 */ 699 700 cnt.v_cow_faults++; 701 m = first_m; 702 object = first_object; 703 pindex = first_pindex; 704 705 /* 706 * Now that we've gotten the copy out of the way, 707 * let's try to collapse the top object. 708 * 709 * But we have to play ugly games with 710 * paging_in_progress to do that... 711 */ 712 vm_object_pip_wakeup(object); 713 vm_object_collapse(object); 714 object->paging_in_progress++; 715 } else { 716 prot &= ~VM_PROT_WRITE; 717 } 718 } 719 720 /* 721 * We must verify that the maps have not changed since our last 722 * lookup. 723 */ 724 725 if (!lookup_still_valid) { 726 vm_object_t retry_object; 727 vm_pindex_t retry_pindex; 728 vm_prot_t retry_prot; 729 730 /* 731 * Since map entries may be pageable, make sure we can take a 732 * page fault on them. 733 */ 734 735 /* 736 * To avoid trying to write_lock the map while another process 737 * has it read_locked (in vm_map_pageable), we do not try for 738 * write permission. If the page is still writable, we will 739 * get write permission. If it is not, or has been marked 740 * needs_copy, we enter the mapping without write permission, 741 * and will merely take another fault. 742 */ 743 result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 744 &entry, &retry_object, &retry_pindex, &retry_prot, &wired, &su); 745 746 /* 747 * If we don't need the page any longer, put it on the active 748 * list (the easiest thing to do here). If no one needs it, 749 * pageout will grab it eventually. 750 */ 751 752 if (result != KERN_SUCCESS) { 753 RELEASE_PAGE(m); 754 UNLOCK_AND_DEALLOCATE; 755 return (result); 756 } 757 lookup_still_valid = TRUE; 758 759 if ((retry_object != first_object) || 760 (retry_pindex != first_pindex)) { 761 RELEASE_PAGE(m); 762 UNLOCK_AND_DEALLOCATE; 763 goto RetryFault; 764 } 765 /* 766 * Check whether the protection has changed or the object has 767 * been copied while we left the map unlocked. Changing from 768 * read to write permission is OK - we leave the page 769 * write-protected, and catch the write fault. Changing from 770 * write to read permission means that we can't mark the page 771 * write-enabled after all. 772 */ 773 prot &= retry_prot; 774 } 775 776 /* 777 * Put this page into the physical map. We had to do the unlock above 778 * because pmap_enter may cause other faults. We don't put the page 779 * back on the active queue until later so that the page-out daemon 780 * won't find us (yet). 781 */ 782 783 if (prot & VM_PROT_WRITE) { 784 m->flags |= PG_WRITEABLE; 785 m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY; 786 /* 787 * If the fault is a write, we know that this page is being 788 * written NOW. This will save on the pmap_is_modified() calls 789 * later. 790 */ 791 if (fault_flags & VM_FAULT_DIRTY) { 792 m->dirty = VM_PAGE_BITS_ALL; 793 } 794 } 795 796 UNLOCK_THINGS; 797 m->valid = VM_PAGE_BITS_ALL; 798 m->flags &= ~PG_ZERO; 799 800 pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 801 if (((fault_flags & VM_FAULT_WIRE_MASK) == 0) && (wired == 0)) 802 pmap_prefault(map->pmap, vaddr, entry, first_object); 803 804 m->flags |= PG_MAPPED|PG_REFERENCED; 805 if (fault_flags & VM_FAULT_HOLD) 806 vm_page_hold(m); 807 808 /* 809 * If the page is not wired down, then put it where the pageout daemon 810 * can find it. 811 */ 812 if (fault_flags & VM_FAULT_WIRE_MASK) { 813 if (wired) 814 vm_page_wire(m); 815 else 816 vm_page_unwire(m); 817 } else { 818 if (m->queue != PQ_ACTIVE) 819 vm_page_activate(m); 820 } 821 822 if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) { 823 if (hardfault) { 824 curproc->p_stats->p_ru.ru_majflt++; 825 } else { 826 curproc->p_stats->p_ru.ru_minflt++; 827 } 828 } 829 830 /* 831 * Unlock everything, and return 832 */ 833 834 PAGE_WAKEUP(m); 835 vm_object_deallocate(first_object); 836 837 return (KERN_SUCCESS); 838 839 } 840 841 /* 842 * vm_fault_wire: 843 * 844 * Wire down a range of virtual addresses in a map. 845 */ 846 int 847 vm_fault_wire(map, start, end) 848 vm_map_t map; 849 vm_offset_t start, end; 850 { 851 852 register vm_offset_t va; 853 register pmap_t pmap; 854 int rv; 855 856 pmap = vm_map_pmap(map); 857 858 /* 859 * Inform the physical mapping system that the range of addresses may 860 * not fault, so that page tables and such can be locked down as well. 861 */ 862 863 pmap_pageable(pmap, start, end, FALSE); 864 865 /* 866 * We simulate a fault to get the page and enter it in the physical 867 * map. 868 */ 869 870 for (va = start; va < end; va += PAGE_SIZE) { 871 rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, 872 VM_FAULT_CHANGE_WIRING); 873 if (rv) { 874 if (va != start) 875 vm_fault_unwire(map, start, va); 876 return (rv); 877 } 878 } 879 return (KERN_SUCCESS); 880 } 881 882 /* 883 * vm_fault_user_wire: 884 * 885 * Wire down a range of virtual addresses in a map. This 886 * is for user mode though, so we only ask for read access 887 * on currently read only sections. 888 */ 889 int 890 vm_fault_user_wire(map, start, end) 891 vm_map_t map; 892 vm_offset_t start, end; 893 { 894 895 register vm_offset_t va; 896 register pmap_t pmap; 897 int rv; 898 899 pmap = vm_map_pmap(map); 900 901 /* 902 * Inform the physical mapping system that the range of addresses may 903 * not fault, so that page tables and such can be locked down as well. 904 */ 905 pmap_pageable(pmap, start, end, FALSE); 906 907 /* 908 * We simulate a fault to get the page and enter it in the physical 909 * map. 910 */ 911 for (va = start; va < end; va += PAGE_SIZE) { 912 rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_USER_WIRE); 913 if (rv) { 914 if (va != start) 915 vm_fault_unwire(map, start, va); 916 return (rv); 917 } 918 } 919 return (KERN_SUCCESS); 920 } 921 922 923 /* 924 * vm_fault_unwire: 925 * 926 * Unwire a range of virtual addresses in a map. 927 */ 928 void 929 vm_fault_unwire(map, start, end) 930 vm_map_t map; 931 vm_offset_t start, end; 932 { 933 934 register vm_offset_t va, pa; 935 register pmap_t pmap; 936 937 pmap = vm_map_pmap(map); 938 939 /* 940 * Since the pages are wired down, we must be able to get their 941 * mappings from the physical map system. 942 */ 943 944 for (va = start; va < end; va += PAGE_SIZE) { 945 pa = pmap_extract(pmap, va); 946 if (pa != (vm_offset_t) 0) { 947 pmap_change_wiring(pmap, va, FALSE); 948 vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 949 } 950 } 951 952 /* 953 * Inform the physical mapping system that the range of addresses may 954 * fault, so that page tables and such may be unwired themselves. 955 */ 956 957 pmap_pageable(pmap, start, end, TRUE); 958 959 } 960 961 /* 962 * Routine: 963 * vm_fault_copy_entry 964 * Function: 965 * Copy all of the pages from a wired-down map entry to another. 966 * 967 * In/out conditions: 968 * The source and destination maps must be locked for write. 969 * The source map entry must be wired down (or be a sharing map 970 * entry corresponding to a main map entry that is wired down). 971 */ 972 973 void 974 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 975 vm_map_t dst_map; 976 vm_map_t src_map; 977 vm_map_entry_t dst_entry; 978 vm_map_entry_t src_entry; 979 { 980 vm_object_t dst_object; 981 vm_object_t src_object; 982 vm_ooffset_t dst_offset; 983 vm_ooffset_t src_offset; 984 vm_prot_t prot; 985 vm_offset_t vaddr; 986 vm_page_t dst_m; 987 vm_page_t src_m; 988 989 #ifdef lint 990 src_map++; 991 #endif /* lint */ 992 993 src_object = src_entry->object.vm_object; 994 src_offset = src_entry->offset; 995 996 /* 997 * Create the top-level object for the destination entry. (Doesn't 998 * actually shadow anything - we copy the pages directly.) 999 */ 1000 dst_object = vm_object_allocate(OBJT_DEFAULT, 1001 (vm_size_t) OFF_TO_IDX(dst_entry->end - dst_entry->start)); 1002 1003 dst_entry->object.vm_object = dst_object; 1004 dst_entry->offset = 0; 1005 1006 prot = dst_entry->max_protection; 1007 1008 /* 1009 * Loop through all of the pages in the entry's range, copying each 1010 * one from the source object (it should be there) to the destination 1011 * object. 1012 */ 1013 for (vaddr = dst_entry->start, dst_offset = 0; 1014 vaddr < dst_entry->end; 1015 vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 1016 1017 /* 1018 * Allocate a page in the destination object 1019 */ 1020 do { 1021 dst_m = vm_page_alloc(dst_object, 1022 OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL); 1023 if (dst_m == NULL) { 1024 VM_WAIT; 1025 } 1026 } while (dst_m == NULL); 1027 1028 /* 1029 * Find the page in the source object, and copy it in. 1030 * (Because the source is wired down, the page will be in 1031 * memory.) 1032 */ 1033 src_m = vm_page_lookup(src_object, 1034 OFF_TO_IDX(dst_offset + src_offset)); 1035 if (src_m == NULL) 1036 panic("vm_fault_copy_wired: page missing"); 1037 1038 vm_page_copy(src_m, dst_m); 1039 1040 /* 1041 * Enter it in the pmap... 1042 */ 1043 1044 dst_m->flags &= ~PG_ZERO; 1045 pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1046 prot, FALSE); 1047 dst_m->flags |= PG_WRITEABLE|PG_MAPPED; 1048 1049 /* 1050 * Mark it no longer busy, and put it on the active list. 1051 */ 1052 vm_page_activate(dst_m); 1053 PAGE_WAKEUP(dst_m); 1054 } 1055 } 1056 1057 1058 /* 1059 * This routine checks around the requested page for other pages that 1060 * might be able to be faulted in. This routine brackets the viable 1061 * pages for the pages to be paged in. 1062 * 1063 * Inputs: 1064 * m, rbehind, rahead 1065 * 1066 * Outputs: 1067 * marray (array of vm_page_t), reqpage (index of requested page) 1068 * 1069 * Return value: 1070 * number of pages in marray 1071 */ 1072 int 1073 vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) 1074 vm_page_t m; 1075 int rbehind; 1076 int rahead; 1077 vm_page_t *marray; 1078 int *reqpage; 1079 { 1080 int i; 1081 vm_object_t object; 1082 vm_pindex_t pindex, startpindex, endpindex, tpindex; 1083 vm_offset_t size; 1084 vm_page_t rtm; 1085 int treqpage; 1086 int cbehind, cahead; 1087 1088 object = m->object; 1089 pindex = m->pindex; 1090 1091 /* 1092 * we don't fault-ahead for device pager 1093 */ 1094 if (object->type == OBJT_DEVICE) { 1095 *reqpage = 0; 1096 marray[0] = m; 1097 return 1; 1098 } 1099 1100 /* 1101 * if the requested page is not available, then give up now 1102 */ 1103 1104 if (!vm_pager_has_page(object, 1105 OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead)) 1106 return 0; 1107 1108 if ((cbehind == 0) && (cahead == 0)) { 1109 *reqpage = 0; 1110 marray[0] = m; 1111 return 1; 1112 } 1113 1114 if (rahead > cahead) { 1115 rahead = cahead; 1116 } 1117 1118 if (rbehind > cbehind) { 1119 rbehind = cbehind; 1120 } 1121 1122 /* 1123 * try to do any readahead that we might have free pages for. 1124 */ 1125 if ((rahead + rbehind) > 1126 ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { 1127 pagedaemon_wakeup(); 1128 *reqpage = 0; 1129 marray[0] = m; 1130 return 1; 1131 } 1132 1133 /* 1134 * scan backward for the read behind pages -- in memory or on disk not 1135 * in same object 1136 */ 1137 tpindex = pindex - 1; 1138 if (tpindex < pindex) { 1139 if (rbehind > pindex) 1140 rbehind = pindex; 1141 startpindex = pindex - rbehind; 1142 while (tpindex >= startpindex) { 1143 if (vm_page_lookup( object, tpindex)) { 1144 startpindex = tpindex + 1; 1145 break; 1146 } 1147 if (tpindex == 0) 1148 break; 1149 tpindex -= 1; 1150 } 1151 } else { 1152 startpindex = pindex; 1153 } 1154 1155 /* 1156 * scan forward for the read ahead pages -- in memory or on disk not 1157 * in same object 1158 */ 1159 tpindex = pindex + 1; 1160 endpindex = pindex + (rahead + 1); 1161 if (endpindex > object->size) 1162 endpindex = object->size; 1163 while (tpindex < endpindex) { 1164 if ( vm_page_lookup(object, tpindex)) { 1165 break; 1166 } 1167 tpindex += 1; 1168 } 1169 endpindex = tpindex; 1170 1171 /* calculate number of bytes of pages */ 1172 size = endpindex - startpindex; 1173 1174 /* calculate the page offset of the required page */ 1175 treqpage = pindex - startpindex; 1176 1177 /* see if we have space (again) */ 1178 if ((cnt.v_free_count + cnt.v_cache_count) > 1179 (cnt.v_free_reserved + size)) { 1180 /* 1181 * get our pages and don't block for them 1182 */ 1183 for (i = 0; i < size; i++) { 1184 if (i != treqpage) { 1185 rtm = vm_page_alloc(object, 1186 startpindex + i, 1187 VM_ALLOC_NORMAL); 1188 if (rtm == NULL) { 1189 if (i < treqpage) { 1190 int j; 1191 for (j = 0; j < i; j++) { 1192 FREE_PAGE(marray[j]); 1193 } 1194 *reqpage = 0; 1195 marray[0] = m; 1196 return 1; 1197 } else { 1198 size = i; 1199 *reqpage = treqpage; 1200 return size; 1201 } 1202 } 1203 marray[i] = rtm; 1204 } else { 1205 marray[i] = m; 1206 } 1207 } 1208 1209 *reqpage = treqpage; 1210 return size; 1211 } 1212 *reqpage = 0; 1213 marray[0] = m; 1214 return 1; 1215 } 1216