1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42 * 43 * 44 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45 * All rights reserved. 46 * 47 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 * 69 * $FreeBSD$ 70 */ 71 72 /* 73 * Page fault handling module. 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/vnode.h> 80 #include <sys/resource.h> 81 #include <sys/signalvar.h> 82 #include <sys/resourcevar.h> 83 #include <sys/vmmeter.h> 84 #include <sys/buf.h> 85 86 #include <vm/vm.h> 87 #include <vm/vm_param.h> 88 #include <vm/vm_prot.h> 89 #include <vm/lock.h> 90 #include <vm/pmap.h> 91 #include <vm/vm_map.h> 92 #include <vm/vm_object.h> 93 #include <vm/vm_page.h> 94 #include <vm/vm_pageout.h> 95 #include <vm/vm_kern.h> 96 #include <vm/vm_pager.h> 97 #include <vm/vnode_pager.h> 98 #include <vm/swap_pager.h> 99 #include <vm/vm_extern.h> 100 101 int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *)); 102 103 #define VM_FAULT_READ_AHEAD 4 104 #define VM_FAULT_READ_BEHIND 3 105 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 106 107 /* 108 * vm_fault: 109 * 110 * Handle a page fault occuring at the given address, 111 * requiring the given permissions, in the map specified. 112 * If successful, the page is inserted into the 113 * associated physical map. 114 * 115 * NOTE: the given address should be truncated to the 116 * proper page address. 117 * 118 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 119 * a standard error specifying why the fault is fatal is returned. 120 * 121 * 122 * The map in question must be referenced, and remains so. 123 * Caller may hold no locks. 124 */ 125 int 126 vm_fault(map, vaddr, fault_type, change_wiring) 127 vm_map_t map; 128 vm_offset_t vaddr; 129 vm_prot_t fault_type; 130 boolean_t change_wiring; 131 { 132 vm_object_t first_object; 133 vm_pindex_t first_pindex; 134 vm_map_entry_t entry; 135 register vm_object_t object; 136 register vm_pindex_t pindex; 137 vm_page_t m; 138 vm_page_t first_m; 139 vm_prot_t prot; 140 int result; 141 boolean_t wired; 142 boolean_t su; 143 boolean_t lookup_still_valid; 144 vm_page_t old_m; 145 vm_object_t next_object; 146 vm_page_t marray[VM_FAULT_READ]; 147 int hardfault = 0; 148 struct vnode *vp = NULL; 149 150 cnt.v_vm_faults++; /* needs lock XXX */ 151 /* 152 * Recovery actions 153 */ 154 #define FREE_PAGE(m) { \ 155 PAGE_WAKEUP(m); \ 156 vm_page_free(m); \ 157 } 158 159 #define RELEASE_PAGE(m) { \ 160 PAGE_WAKEUP(m); \ 161 if (m->queue != PQ_ACTIVE) vm_page_activate(m); \ 162 } 163 164 #define UNLOCK_MAP { \ 165 if (lookup_still_valid) { \ 166 vm_map_lookup_done(map, entry); \ 167 lookup_still_valid = FALSE; \ 168 } \ 169 } 170 171 #define UNLOCK_THINGS { \ 172 vm_object_pip_wakeup(object); \ 173 if (object != first_object) { \ 174 FREE_PAGE(first_m); \ 175 vm_object_pip_wakeup(first_object); \ 176 } \ 177 UNLOCK_MAP; \ 178 if (vp != NULL) VOP_UNLOCK(vp); \ 179 } 180 181 #define UNLOCK_AND_DEALLOCATE { \ 182 UNLOCK_THINGS; \ 183 vm_object_deallocate(first_object); \ 184 } 185 186 187 RetryFault:; 188 189 /* 190 * Find the backing store object and offset into it to begin the 191 * search. 192 */ 193 194 if ((result = vm_map_lookup(&map, vaddr, 195 fault_type, &entry, &first_object, 196 &first_pindex, &prot, &wired, &su)) != KERN_SUCCESS) { 197 return (result); 198 } 199 200 if (entry->nofault) { 201 panic("vm_fault: fault on nofault entry, addr: %lx", 202 vaddr); 203 } 204 205 /* 206 * If we are user-wiring a r/w segment, and it is COW, then 207 * we need to do the COW operation. Note that we don't COW 208 * currently RO sections now, because it is NOT desirable 209 * to COW .text. We simply keep .text from ever being COW'ed 210 * and take the heat that one cannot debug wired .text sections. 211 */ 212 if ((change_wiring == VM_FAULT_USER_WIRE) && entry->needs_copy) { 213 if(entry->protection & VM_PROT_WRITE) { 214 int tresult; 215 vm_map_lookup_done(map, entry); 216 217 tresult = vm_map_lookup(&map, vaddr, VM_PROT_READ|VM_PROT_WRITE, 218 &entry, &first_object, &first_pindex, &prot, &wired, &su); 219 if (tresult != KERN_SUCCESS) 220 return tresult; 221 } else { 222 /* 223 * If we don't COW now, on a user wire, the user will never 224 * be able to write to the mapping. If we don't make this 225 * restriction, the bookkeeping would be nearly impossible. 226 */ 227 entry->max_protection &= ~VM_PROT_WRITE; 228 } 229 } 230 231 vp = vnode_pager_lock(first_object); 232 233 lookup_still_valid = TRUE; 234 235 if (wired) 236 fault_type = prot; 237 238 first_m = NULL; 239 240 /* 241 * Make a reference to this object to prevent its disposal while we 242 * are messing with it. Once we have the reference, the map is free 243 * to be diddled. Since objects reference their shadows (and copies), 244 * they will stay around as well. 245 */ 246 247 first_object->ref_count++; 248 first_object->paging_in_progress++; 249 250 /* 251 * INVARIANTS (through entire routine): 252 * 253 * 1) At all times, we must either have the object lock or a busy 254 * page in some object to prevent some other process from trying to 255 * bring in the same page. 256 * 257 * Note that we cannot hold any locks during the pager access or when 258 * waiting for memory, so we use a busy page then. 259 * 260 * Note also that we aren't as concerned about more than one thead 261 * attempting to pager_data_unlock the same page at once, so we don't 262 * hold the page as busy then, but do record the highest unlock value 263 * so far. [Unlock requests may also be delivered out of order.] 264 * 265 * 2) Once we have a busy page, we must remove it from the pageout 266 * queues, so that the pageout daemon will not grab it away. 267 * 268 * 3) To prevent another process from racing us down the shadow chain 269 * and entering a new page in the top object before we do, we must 270 * keep a busy page in the top object while following the shadow 271 * chain. 272 * 273 * 4) We must increment paging_in_progress on any object for which 274 * we have a busy page, to prevent vm_object_collapse from removing 275 * the busy page without our noticing. 276 */ 277 278 /* 279 * Search for the page at object/offset. 280 */ 281 282 object = first_object; 283 pindex = first_pindex; 284 285 /* 286 * See whether this page is resident 287 */ 288 289 while (TRUE) { 290 m = vm_page_lookup(object, pindex); 291 if (m != NULL) { 292 int queue; 293 /* 294 * If the page is being brought in, wait for it and 295 * then retry. 296 */ 297 if ((m->flags & PG_BUSY) || m->busy) { 298 int s; 299 300 UNLOCK_THINGS; 301 s = splvm(); 302 if (((m->flags & PG_BUSY) || m->busy)) { 303 m->flags |= PG_WANTED | PG_REFERENCED; 304 cnt.v_intrans++; 305 tsleep(m, PSWP, "vmpfw", 0); 306 } 307 splx(s); 308 vm_object_deallocate(first_object); 309 goto RetryFault; 310 } 311 312 queue = m->queue; 313 vm_page_unqueue_nowakeup(m); 314 315 /* 316 * Mark page busy for other processes, and the pagedaemon. 317 */ 318 if (((queue - m->pc) == PQ_CACHE) && 319 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { 320 vm_page_activate(m); 321 UNLOCK_AND_DEALLOCATE; 322 VM_WAIT; 323 goto RetryFault; 324 } 325 326 m->flags |= PG_BUSY; 327 328 if (m->valid && 329 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && 330 m->object != kernel_object && m->object != kmem_object) { 331 goto readrest; 332 } 333 break; 334 } 335 if (((object->type != OBJT_DEFAULT) && (!change_wiring || wired)) 336 || (object == first_object)) { 337 338 if (pindex >= object->size) { 339 UNLOCK_AND_DEALLOCATE; 340 return (KERN_PROTECTION_FAILURE); 341 } 342 343 /* 344 * Allocate a new page for this object/offset pair. 345 */ 346 m = vm_page_alloc(object, pindex, 347 (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); 348 349 if (m == NULL) { 350 UNLOCK_AND_DEALLOCATE; 351 VM_WAIT; 352 goto RetryFault; 353 } 354 } 355 readrest: 356 if (object->type != OBJT_DEFAULT && (!change_wiring || wired)) { 357 int rv; 358 int faultcount; 359 int reqpage; 360 int ahead, behind; 361 362 ahead = VM_FAULT_READ_AHEAD; 363 behind = VM_FAULT_READ_BEHIND; 364 if (first_object->behavior == OBJ_RANDOM) { 365 ahead = 0; 366 behind = 0; 367 } 368 369 if ((first_object->type != OBJT_DEVICE) && 370 (first_object->behavior == OBJ_SEQUENTIAL)) { 371 vm_pindex_t firstpindex, tmppindex; 372 if (first_pindex < 373 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1)) 374 firstpindex = 0; 375 else 376 firstpindex = first_pindex - 377 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1); 378 379 for(tmppindex = first_pindex - 1; 380 tmppindex >= first_pindex; 381 --tmppindex) { 382 vm_page_t mt; 383 mt = vm_page_lookup( first_object, tmppindex); 384 if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) 385 break; 386 if (mt->busy || 387 (mt->flags & (PG_BUSY|PG_FICTITIOUS)) || 388 mt->hold_count || 389 mt->wire_count) 390 continue; 391 if (mt->dirty == 0) 392 vm_page_test_dirty(mt); 393 if (mt->dirty) { 394 vm_page_protect(mt, VM_PROT_NONE); 395 vm_page_deactivate(mt); 396 } else { 397 vm_page_cache(mt); 398 } 399 } 400 401 ahead += behind; 402 behind = 0; 403 } 404 405 /* 406 * now we find out if any other pages should be paged 407 * in at this time this routine checks to see if the 408 * pages surrounding this fault reside in the same 409 * object as the page for this fault. If they do, 410 * then they are faulted in also into the object. The 411 * array "marray" returned contains an array of 412 * vm_page_t structs where one of them is the 413 * vm_page_t passed to the routine. The reqpage 414 * return value is the index into the marray for the 415 * vm_page_t passed to the routine. 416 */ 417 faultcount = vm_fault_additional_pages( 418 m, behind, ahead, marray, &reqpage); 419 420 /* 421 * Call the pager to retrieve the data, if any, after 422 * releasing the lock on the map. 423 */ 424 UNLOCK_MAP; 425 426 rv = faultcount ? 427 vm_pager_get_pages(object, marray, faultcount, 428 reqpage) : VM_PAGER_FAIL; 429 430 if (rv == VM_PAGER_OK) { 431 /* 432 * Found the page. Leave it busy while we play 433 * with it. 434 */ 435 436 /* 437 * Relookup in case pager changed page. Pager 438 * is responsible for disposition of old page 439 * if moved. 440 */ 441 m = vm_page_lookup(object, pindex); 442 if( !m) { 443 UNLOCK_AND_DEALLOCATE; 444 goto RetryFault; 445 } 446 447 hardfault++; 448 break; 449 } 450 /* 451 * Remove the bogus page (which does not exist at this 452 * object/offset); before doing so, we must get back 453 * our object lock to preserve our invariant. 454 * 455 * Also wake up any other process that may want to bring 456 * in this page. 457 * 458 * If this is the top-level object, we must leave the 459 * busy page to prevent another process from rushing 460 * past us, and inserting the page in that object at 461 * the same time that we are. 462 */ 463 464 if (rv == VM_PAGER_ERROR) 465 printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 466 curproc->p_pid); 467 /* 468 * Data outside the range of the pager or an I/O error 469 */ 470 /* 471 * XXX - the check for kernel_map is a kludge to work 472 * around having the machine panic on a kernel space 473 * fault w/ I/O error. 474 */ 475 if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 476 FREE_PAGE(m); 477 UNLOCK_AND_DEALLOCATE; 478 return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 479 } 480 if (object != first_object) { 481 FREE_PAGE(m); 482 /* 483 * XXX - we cannot just fall out at this 484 * point, m has been freed and is invalid! 485 */ 486 } 487 } 488 /* 489 * We get here if the object has default pager (or unwiring) or the 490 * pager doesn't have the page. 491 */ 492 if (object == first_object) 493 first_m = m; 494 495 /* 496 * Move on to the next object. Lock the next object before 497 * unlocking the current one. 498 */ 499 500 pindex += OFF_TO_IDX(object->backing_object_offset); 501 next_object = object->backing_object; 502 if (next_object == NULL) { 503 /* 504 * If there's no object left, fill the page in the top 505 * object with zeros. 506 */ 507 if (object != first_object) { 508 vm_object_pip_wakeup(object); 509 510 object = first_object; 511 pindex = first_pindex; 512 m = first_m; 513 } 514 first_m = NULL; 515 516 if ((m->flags & PG_ZERO) == 0) 517 vm_page_zero_fill(m); 518 cnt.v_zfod++; 519 break; 520 } else { 521 if (object != first_object) { 522 vm_object_pip_wakeup(object); 523 } 524 object = next_object; 525 object->paging_in_progress++; 526 } 527 } 528 529 if ((m->flags & PG_BUSY) == 0) 530 panic("vm_fault: not busy after main loop"); 531 532 /* 533 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 534 * is held.] 535 */ 536 537 old_m = m; /* save page that would be copied */ 538 539 /* 540 * If the page is being written, but isn't already owned by the 541 * top-level object, we have to copy it into a new page owned by the 542 * top-level object. 543 */ 544 545 if (object != first_object) { 546 /* 547 * We only really need to copy if we want to write it. 548 */ 549 550 if (fault_type & VM_PROT_WRITE) { 551 552 /* 553 * This allows pages to be virtually copied from a backing_object 554 * into the first_object, where the backing object has no other 555 * refs to it, and cannot gain any more refs. Instead of a 556 * bcopy, we just move the page from the backing object to the 557 * first object. Note that we must mark the page dirty in the 558 * first object so that it will go out to swap when needed. 559 */ 560 if (lookup_still_valid && 561 /* 562 * Only one shadow object 563 */ 564 (object->shadow_count == 1) && 565 /* 566 * No COW refs, except us 567 */ 568 (object->ref_count == 1) && 569 /* 570 * Noone else can look this object up 571 */ 572 (object->handle == NULL) && 573 /* 574 * No other ways to look the object up 575 */ 576 ((object->type == OBJT_DEFAULT) || 577 (object->type == OBJT_SWAP)) && 578 /* 579 * We don't chase down the shadow chain 580 */ 581 (object == first_object->backing_object)) { 582 583 /* 584 * get rid of the unnecessary page 585 */ 586 vm_page_protect(first_m, VM_PROT_NONE); 587 PAGE_WAKEUP(first_m); 588 vm_page_free(first_m); 589 /* 590 * grab the page and put it into the process'es object 591 */ 592 vm_page_rename(m, first_object, first_pindex); 593 first_m = m; 594 m->dirty = VM_PAGE_BITS_ALL; 595 m = NULL; 596 } else { 597 /* 598 * Oh, well, lets copy it. 599 */ 600 vm_page_copy(m, first_m); 601 } 602 603 /* 604 * This code handles the case where there are two references to the 605 * backing object, and one reference is getting a copy of the 606 * page. If the other reference is the only other object that 607 * points to the backing object, then perform a virtual copy 608 * from the backing object to the other object after the 609 * page is copied to the current first_object. If the other 610 * object already has the page, we destroy it in the backing object 611 * performing an optimized collapse-type operation. We don't 612 * bother removing the page from the backing object's swap space. 613 */ 614 if (lookup_still_valid && 615 /* 616 * make sure that we have two shadow objs 617 */ 618 (object->shadow_count == 2) && 619 /* 620 * And no COW refs -- note that there are sometimes 621 * temp refs to objs, but ignore that case -- we just 622 * punt. 623 */ 624 (object->ref_count == 2) && 625 /* 626 * Noone else can look us up 627 */ 628 (object->handle == NULL) && 629 /* 630 * Not something that can be referenced elsewhere 631 */ 632 ((object->type == OBJT_DEFAULT) || 633 (object->type == OBJT_SWAP)) && 634 /* 635 * We don't bother chasing down object chain 636 */ 637 (object == first_object->backing_object)) { 638 639 vm_object_t other_object; 640 vm_pindex_t other_pindex, other_pindex_offset; 641 vm_page_t tm; 642 643 other_object = TAILQ_FIRST(&object->shadow_head); 644 if (other_object == first_object) 645 other_object = TAILQ_NEXT(other_object, shadow_list); 646 if (!other_object) 647 panic("vm_fault: other object missing"); 648 if (other_object && 649 (other_object->type == OBJT_DEFAULT) && 650 (other_object->paging_in_progress == 0)) { 651 other_pindex_offset = 652 OFF_TO_IDX(other_object->backing_object_offset); 653 if (pindex >= other_pindex_offset) { 654 other_pindex = pindex - other_pindex_offset; 655 /* 656 * If the other object has the page, just free it. 657 */ 658 if ((tm = vm_page_lookup(other_object, other_pindex))) { 659 if ((tm->flags & PG_BUSY) == 0 && 660 tm->busy == 0 && 661 tm->valid == VM_PAGE_BITS_ALL) { 662 /* 663 * get rid of the unnecessary page 664 */ 665 vm_page_protect(m, VM_PROT_NONE); 666 PAGE_WAKEUP(m); 667 vm_page_free(m); 668 m = NULL; 669 tm->dirty = VM_PAGE_BITS_ALL; 670 first_m->dirty = VM_PAGE_BITS_ALL; 671 } 672 } else { 673 /* 674 * If the other object doesn't have the page, 675 * then we move it there. 676 */ 677 vm_page_rename(m, other_object, other_pindex); 678 m->dirty = VM_PAGE_BITS_ALL; 679 m->valid = VM_PAGE_BITS_ALL; 680 } 681 } 682 } 683 } 684 685 if (m) { 686 if (m->queue != PQ_ACTIVE) 687 vm_page_activate(m); 688 /* 689 * We no longer need the old page or object. 690 */ 691 PAGE_WAKEUP(m); 692 } 693 694 vm_object_pip_wakeup(object); 695 /* 696 * Only use the new page below... 697 */ 698 699 cnt.v_cow_faults++; 700 m = first_m; 701 object = first_object; 702 pindex = first_pindex; 703 704 /* 705 * Now that we've gotten the copy out of the way, 706 * let's try to collapse the top object. 707 * 708 * But we have to play ugly games with 709 * paging_in_progress to do that... 710 */ 711 vm_object_pip_wakeup(object); 712 vm_object_collapse(object); 713 object->paging_in_progress++; 714 } else { 715 prot &= ~VM_PROT_WRITE; 716 } 717 } 718 719 /* 720 * We must verify that the maps have not changed since our last 721 * lookup. 722 */ 723 724 if (!lookup_still_valid) { 725 vm_object_t retry_object; 726 vm_pindex_t retry_pindex; 727 vm_prot_t retry_prot; 728 729 /* 730 * Since map entries may be pageable, make sure we can take a 731 * page fault on them. 732 */ 733 734 /* 735 * To avoid trying to write_lock the map while another process 736 * has it read_locked (in vm_map_pageable), we do not try for 737 * write permission. If the page is still writable, we will 738 * get write permission. If it is not, or has been marked 739 * needs_copy, we enter the mapping without write permission, 740 * and will merely take another fault. 741 */ 742 result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 743 &entry, &retry_object, &retry_pindex, &retry_prot, &wired, &su); 744 745 /* 746 * If we don't need the page any longer, put it on the active 747 * list (the easiest thing to do here). If no one needs it, 748 * pageout will grab it eventually. 749 */ 750 751 if (result != KERN_SUCCESS) { 752 RELEASE_PAGE(m); 753 UNLOCK_AND_DEALLOCATE; 754 return (result); 755 } 756 lookup_still_valid = TRUE; 757 758 if ((retry_object != first_object) || 759 (retry_pindex != first_pindex)) { 760 RELEASE_PAGE(m); 761 UNLOCK_AND_DEALLOCATE; 762 goto RetryFault; 763 } 764 /* 765 * Check whether the protection has changed or the object has 766 * been copied while we left the map unlocked. Changing from 767 * read to write permission is OK - we leave the page 768 * write-protected, and catch the write fault. Changing from 769 * write to read permission means that we can't mark the page 770 * write-enabled after all. 771 */ 772 prot &= retry_prot; 773 } 774 775 /* 776 * Put this page into the physical map. We had to do the unlock above 777 * because pmap_enter may cause other faults. We don't put the page 778 * back on the active queue until later so that the page-out daemon 779 * won't find us (yet). 780 */ 781 782 if (prot & VM_PROT_WRITE) { 783 m->flags |= PG_WRITEABLE; 784 m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY; 785 /* 786 * If the fault is a write, we know that this page is being 787 * written NOW. This will save on the pmap_is_modified() calls 788 * later. 789 */ 790 if (fault_type & VM_PROT_WRITE) { 791 m->dirty = VM_PAGE_BITS_ALL; 792 } 793 } 794 795 UNLOCK_THINGS; 796 m->valid = VM_PAGE_BITS_ALL; 797 m->flags &= ~PG_ZERO; 798 799 pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 800 if ((change_wiring == 0) && (wired == 0)) 801 pmap_prefault(map->pmap, vaddr, entry, first_object); 802 803 m->flags |= PG_MAPPED|PG_REFERENCED; 804 805 /* 806 * If the page is not wired down, then put it where the pageout daemon 807 * can find it. 808 */ 809 if (change_wiring) { 810 if (wired) 811 vm_page_wire(m); 812 else 813 vm_page_unwire(m); 814 } else { 815 if (m->queue != PQ_ACTIVE) 816 vm_page_activate(m); 817 } 818 819 if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) { 820 if (hardfault) { 821 curproc->p_stats->p_ru.ru_majflt++; 822 } else { 823 curproc->p_stats->p_ru.ru_minflt++; 824 } 825 } 826 827 /* 828 * Unlock everything, and return 829 */ 830 831 PAGE_WAKEUP(m); 832 vm_object_deallocate(first_object); 833 834 return (KERN_SUCCESS); 835 836 } 837 838 /* 839 * vm_fault_wire: 840 * 841 * Wire down a range of virtual addresses in a map. 842 */ 843 int 844 vm_fault_wire(map, start, end) 845 vm_map_t map; 846 vm_offset_t start, end; 847 { 848 849 register vm_offset_t va; 850 register pmap_t pmap; 851 int rv; 852 853 pmap = vm_map_pmap(map); 854 855 /* 856 * Inform the physical mapping system that the range of addresses may 857 * not fault, so that page tables and such can be locked down as well. 858 */ 859 860 pmap_pageable(pmap, start, end, FALSE); 861 862 /* 863 * We simulate a fault to get the page and enter it in the physical 864 * map. 865 */ 866 867 for (va = start; va < end; va += PAGE_SIZE) { 868 rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, 869 VM_FAULT_CHANGE_WIRING); 870 if (rv) { 871 if (va != start) 872 vm_fault_unwire(map, start, va); 873 return (rv); 874 } 875 } 876 return (KERN_SUCCESS); 877 } 878 879 /* 880 * vm_fault_user_wire: 881 * 882 * Wire down a range of virtual addresses in a map. This 883 * is for user mode though, so we only ask for read access 884 * on currently read only sections. 885 */ 886 int 887 vm_fault_user_wire(map, start, end) 888 vm_map_t map; 889 vm_offset_t start, end; 890 { 891 892 register vm_offset_t va; 893 register pmap_t pmap; 894 int rv; 895 896 pmap = vm_map_pmap(map); 897 898 /* 899 * Inform the physical mapping system that the range of addresses may 900 * not fault, so that page tables and such can be locked down as well. 901 */ 902 pmap_pageable(pmap, start, end, FALSE); 903 904 /* 905 * We simulate a fault to get the page and enter it in the physical 906 * map. 907 */ 908 for (va = start; va < end; va += PAGE_SIZE) { 909 rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_USER_WIRE); 910 if (rv) { 911 if (va != start) 912 vm_fault_unwire(map, start, va); 913 return (rv); 914 } 915 } 916 return (KERN_SUCCESS); 917 } 918 919 920 /* 921 * vm_fault_unwire: 922 * 923 * Unwire a range of virtual addresses in a map. 924 */ 925 void 926 vm_fault_unwire(map, start, end) 927 vm_map_t map; 928 vm_offset_t start, end; 929 { 930 931 register vm_offset_t va, pa; 932 register pmap_t pmap; 933 934 pmap = vm_map_pmap(map); 935 936 /* 937 * Since the pages are wired down, we must be able to get their 938 * mappings from the physical map system. 939 */ 940 941 for (va = start; va < end; va += PAGE_SIZE) { 942 pa = pmap_extract(pmap, va); 943 if (pa != (vm_offset_t) 0) { 944 pmap_change_wiring(pmap, va, FALSE); 945 vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 946 } 947 } 948 949 /* 950 * Inform the physical mapping system that the range of addresses may 951 * fault, so that page tables and such may be unwired themselves. 952 */ 953 954 pmap_pageable(pmap, start, end, TRUE); 955 956 } 957 958 /* 959 * Routine: 960 * vm_fault_copy_entry 961 * Function: 962 * Copy all of the pages from a wired-down map entry to another. 963 * 964 * In/out conditions: 965 * The source and destination maps must be locked for write. 966 * The source map entry must be wired down (or be a sharing map 967 * entry corresponding to a main map entry that is wired down). 968 */ 969 970 void 971 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 972 vm_map_t dst_map; 973 vm_map_t src_map; 974 vm_map_entry_t dst_entry; 975 vm_map_entry_t src_entry; 976 { 977 vm_object_t dst_object; 978 vm_object_t src_object; 979 vm_ooffset_t dst_offset; 980 vm_ooffset_t src_offset; 981 vm_prot_t prot; 982 vm_offset_t vaddr; 983 vm_page_t dst_m; 984 vm_page_t src_m; 985 986 #ifdef lint 987 src_map++; 988 #endif /* lint */ 989 990 src_object = src_entry->object.vm_object; 991 src_offset = src_entry->offset; 992 993 /* 994 * Create the top-level object for the destination entry. (Doesn't 995 * actually shadow anything - we copy the pages directly.) 996 */ 997 dst_object = vm_object_allocate(OBJT_DEFAULT, 998 (vm_size_t) OFF_TO_IDX(dst_entry->end - dst_entry->start)); 999 1000 dst_entry->object.vm_object = dst_object; 1001 dst_entry->offset = 0; 1002 1003 prot = dst_entry->max_protection; 1004 1005 /* 1006 * Loop through all of the pages in the entry's range, copying each 1007 * one from the source object (it should be there) to the destination 1008 * object. 1009 */ 1010 for (vaddr = dst_entry->start, dst_offset = 0; 1011 vaddr < dst_entry->end; 1012 vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 1013 1014 /* 1015 * Allocate a page in the destination object 1016 */ 1017 do { 1018 dst_m = vm_page_alloc(dst_object, 1019 OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL); 1020 if (dst_m == NULL) { 1021 VM_WAIT; 1022 } 1023 } while (dst_m == NULL); 1024 1025 /* 1026 * Find the page in the source object, and copy it in. 1027 * (Because the source is wired down, the page will be in 1028 * memory.) 1029 */ 1030 src_m = vm_page_lookup(src_object, 1031 OFF_TO_IDX(dst_offset + src_offset)); 1032 if (src_m == NULL) 1033 panic("vm_fault_copy_wired: page missing"); 1034 1035 vm_page_copy(src_m, dst_m); 1036 1037 /* 1038 * Enter it in the pmap... 1039 */ 1040 1041 dst_m->flags &= ~PG_ZERO; 1042 pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1043 prot, FALSE); 1044 dst_m->flags |= PG_WRITEABLE|PG_MAPPED; 1045 1046 /* 1047 * Mark it no longer busy, and put it on the active list. 1048 */ 1049 vm_page_activate(dst_m); 1050 PAGE_WAKEUP(dst_m); 1051 } 1052 } 1053 1054 1055 /* 1056 * This routine checks around the requested page for other pages that 1057 * might be able to be faulted in. This routine brackets the viable 1058 * pages for the pages to be paged in. 1059 * 1060 * Inputs: 1061 * m, rbehind, rahead 1062 * 1063 * Outputs: 1064 * marray (array of vm_page_t), reqpage (index of requested page) 1065 * 1066 * Return value: 1067 * number of pages in marray 1068 */ 1069 int 1070 vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) 1071 vm_page_t m; 1072 int rbehind; 1073 int rahead; 1074 vm_page_t *marray; 1075 int *reqpage; 1076 { 1077 int i; 1078 vm_object_t object; 1079 vm_pindex_t pindex, startpindex, endpindex, tpindex; 1080 vm_offset_t size; 1081 vm_page_t rtm; 1082 int treqpage; 1083 int cbehind, cahead; 1084 1085 object = m->object; 1086 pindex = m->pindex; 1087 1088 /* 1089 * we don't fault-ahead for device pager 1090 */ 1091 if (object->type == OBJT_DEVICE) { 1092 *reqpage = 0; 1093 marray[0] = m; 1094 return 1; 1095 } 1096 1097 /* 1098 * if the requested page is not available, then give up now 1099 */ 1100 1101 if (!vm_pager_has_page(object, 1102 OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead)) 1103 return 0; 1104 1105 if ((cbehind == 0) && (cahead == 0)) { 1106 *reqpage = 0; 1107 marray[0] = m; 1108 return 1; 1109 } 1110 1111 if (rahead > cahead) { 1112 rahead = cahead; 1113 } 1114 1115 if (rbehind > cbehind) { 1116 rbehind = cbehind; 1117 } 1118 1119 /* 1120 * try to do any readahead that we might have free pages for. 1121 */ 1122 if ((rahead + rbehind) > 1123 ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { 1124 pagedaemon_wakeup(); 1125 *reqpage = 0; 1126 marray[0] = m; 1127 return 1; 1128 } 1129 1130 /* 1131 * scan backward for the read behind pages -- in memory or on disk not 1132 * in same object 1133 */ 1134 tpindex = pindex - 1; 1135 if (tpindex < pindex) { 1136 if (rbehind > pindex) 1137 rbehind = pindex; 1138 startpindex = pindex - rbehind; 1139 while (tpindex >= startpindex) { 1140 if (vm_page_lookup( object, tpindex)) { 1141 startpindex = tpindex + 1; 1142 break; 1143 } 1144 if (tpindex == 0) 1145 break; 1146 tpindex -= 1; 1147 } 1148 } else { 1149 startpindex = pindex; 1150 } 1151 1152 /* 1153 * scan forward for the read ahead pages -- in memory or on disk not 1154 * in same object 1155 */ 1156 tpindex = pindex + 1; 1157 endpindex = pindex + (rahead + 1); 1158 if (endpindex > object->size) 1159 endpindex = object->size; 1160 while (tpindex < endpindex) { 1161 if ( vm_page_lookup(object, tpindex)) { 1162 break; 1163 } 1164 tpindex += 1; 1165 } 1166 endpindex = tpindex; 1167 1168 /* calculate number of bytes of pages */ 1169 size = endpindex - startpindex; 1170 1171 /* calculate the page offset of the required page */ 1172 treqpage = pindex - startpindex; 1173 1174 /* see if we have space (again) */ 1175 if ((cnt.v_free_count + cnt.v_cache_count) > 1176 (cnt.v_free_reserved + size)) { 1177 /* 1178 * get our pages and don't block for them 1179 */ 1180 for (i = 0; i < size; i++) { 1181 if (i != treqpage) { 1182 rtm = vm_page_alloc(object, 1183 startpindex + i, 1184 VM_ALLOC_NORMAL); 1185 if (rtm == NULL) { 1186 if (i < treqpage) { 1187 int j; 1188 for (j = 0; j < i; j++) { 1189 FREE_PAGE(marray[j]); 1190 } 1191 *reqpage = 0; 1192 marray[0] = m; 1193 return 1; 1194 } else { 1195 size = i; 1196 *reqpage = treqpage; 1197 return size; 1198 } 1199 } 1200 marray[i] = rtm; 1201 } else { 1202 marray[i] = m; 1203 } 1204 } 1205 1206 *reqpage = treqpage; 1207 return size; 1208 } 1209 *reqpage = 0; 1210 marray[0] = m; 1211 return 1; 1212 } 1213