1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42 * 43 * 44 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45 * All rights reserved. 46 * 47 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 * 69 * $Id: vm_fault.c,v 1.56 1996/07/30 03:08:07 dyson Exp $ 70 */ 71 72 /* 73 * Page fault handling module. 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/vnode.h> 80 #include <sys/resource.h> 81 #include <sys/signalvar.h> 82 #include <sys/resourcevar.h> 83 #include <sys/vmmeter.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_param.h> 87 #include <vm/vm_prot.h> 88 #include <vm/lock.h> 89 #include <vm/pmap.h> 90 #include <vm/vm_map.h> 91 #include <vm/vm_object.h> 92 #include <vm/vm_page.h> 93 #include <vm/vm_pageout.h> 94 #include <vm/vm_kern.h> 95 #include <vm/vm_pager.h> 96 #include <vm/vnode_pager.h> 97 #include <vm/swap_pager.h> 98 #include <vm/vm_extern.h> 99 100 int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *)); 101 102 #define VM_FAULT_READ_AHEAD 4 103 #define VM_FAULT_READ_BEHIND 3 104 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 105 106 int vm_fault_free_1; 107 int vm_fault_copy_save_1; 108 int vm_fault_copy_save_2; 109 110 /* 111 * vm_fault: 112 * 113 * Handle a page fault occuring at the given address, 114 * requiring the given permissions, in the map specified. 115 * If successful, the page is inserted into the 116 * associated physical map. 117 * 118 * NOTE: the given address should be truncated to the 119 * proper page address. 120 * 121 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 122 * a standard error specifying why the fault is fatal is returned. 123 * 124 * 125 * The map in question must be referenced, and remains so. 126 * Caller may hold no locks. 127 */ 128 int 129 vm_fault(map, vaddr, fault_type, change_wiring) 130 vm_map_t map; 131 vm_offset_t vaddr; 132 vm_prot_t fault_type; 133 boolean_t change_wiring; 134 { 135 vm_object_t first_object; 136 vm_pindex_t first_pindex; 137 vm_map_entry_t entry; 138 register vm_object_t object; 139 register vm_pindex_t pindex; 140 vm_page_t m; 141 vm_page_t first_m; 142 vm_prot_t prot; 143 int result; 144 boolean_t wired; 145 boolean_t su; 146 boolean_t lookup_still_valid; 147 vm_page_t old_m; 148 vm_object_t next_object; 149 vm_page_t marray[VM_FAULT_READ]; 150 int hardfault = 0; 151 struct vnode *vp = NULL; 152 153 cnt.v_vm_faults++; /* needs lock XXX */ 154 /* 155 * Recovery actions 156 */ 157 #define FREE_PAGE(m) { \ 158 PAGE_WAKEUP(m); \ 159 vm_page_free(m); \ 160 } 161 162 #define RELEASE_PAGE(m) { \ 163 PAGE_WAKEUP(m); \ 164 if (m->queue != PQ_ACTIVE) vm_page_activate(m); \ 165 } 166 167 #define UNLOCK_MAP { \ 168 if (lookup_still_valid) { \ 169 vm_map_lookup_done(map, entry); \ 170 lookup_still_valid = FALSE; \ 171 } \ 172 } 173 174 #define UNLOCK_THINGS { \ 175 vm_object_pip_wakeup(object); \ 176 if (object != first_object) { \ 177 FREE_PAGE(first_m); \ 178 vm_object_pip_wakeup(first_object); \ 179 } \ 180 UNLOCK_MAP; \ 181 if (vp != NULL) VOP_UNLOCK(vp); \ 182 } 183 184 #define UNLOCK_AND_DEALLOCATE { \ 185 UNLOCK_THINGS; \ 186 vm_object_deallocate(first_object); \ 187 } 188 189 190 RetryFault:; 191 192 /* 193 * Find the backing store object and offset into it to begin the 194 * search. 195 */ 196 197 if ((result = vm_map_lookup(&map, vaddr, 198 fault_type, &entry, &first_object, 199 &first_pindex, &prot, &wired, &su)) != KERN_SUCCESS) { 200 return (result); 201 } 202 203 vp = vnode_pager_lock(first_object); 204 205 lookup_still_valid = TRUE; 206 207 if (wired) 208 fault_type = prot; 209 210 first_m = NULL; 211 212 /* 213 * Make a reference to this object to prevent its disposal while we 214 * are messing with it. Once we have the reference, the map is free 215 * to be diddled. Since objects reference their shadows (and copies), 216 * they will stay around as well. 217 */ 218 219 first_object->ref_count++; 220 first_object->paging_in_progress++; 221 222 /* 223 * INVARIANTS (through entire routine): 224 * 225 * 1) At all times, we must either have the object lock or a busy 226 * page in some object to prevent some other process from trying to 227 * bring in the same page. 228 * 229 * Note that we cannot hold any locks during the pager access or when 230 * waiting for memory, so we use a busy page then. 231 * 232 * Note also that we aren't as concerned about more than one thead 233 * attempting to pager_data_unlock the same page at once, so we don't 234 * hold the page as busy then, but do record the highest unlock value 235 * so far. [Unlock requests may also be delivered out of order.] 236 * 237 * 2) Once we have a busy page, we must remove it from the pageout 238 * queues, so that the pageout daemon will not grab it away. 239 * 240 * 3) To prevent another process from racing us down the shadow chain 241 * and entering a new page in the top object before we do, we must 242 * keep a busy page in the top object while following the shadow 243 * chain. 244 * 245 * 4) We must increment paging_in_progress on any object for which 246 * we have a busy page, to prevent vm_object_collapse from removing 247 * the busy page without our noticing. 248 */ 249 250 /* 251 * Search for the page at object/offset. 252 */ 253 254 object = first_object; 255 pindex = first_pindex; 256 257 /* 258 * See whether this page is resident 259 */ 260 261 while (TRUE) { 262 m = vm_page_lookup(object, pindex); 263 if (m != NULL) { 264 int queue; 265 /* 266 * If the page is being brought in, wait for it and 267 * then retry. 268 */ 269 if ((m->flags & PG_BUSY) || m->busy) { 270 int s; 271 272 UNLOCK_THINGS; 273 s = splvm(); 274 if (((m->flags & PG_BUSY) || m->busy)) { 275 m->flags |= PG_WANTED | PG_REFERENCED; 276 cnt.v_intrans++; 277 tsleep(m, PSWP, "vmpfw", 0); 278 } 279 splx(s); 280 vm_object_deallocate(first_object); 281 goto RetryFault; 282 } 283 284 queue = m->queue; 285 vm_page_unqueue_nowakeup(m); 286 287 /* 288 * Mark page busy for other processes, and the pagedaemon. 289 */ 290 if (((queue - m->pc) == PQ_CACHE) && 291 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { 292 vm_page_activate(m); 293 UNLOCK_AND_DEALLOCATE; 294 VM_WAIT; 295 goto RetryFault; 296 } 297 298 m->flags |= PG_BUSY; 299 300 if (m->valid && 301 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && 302 m->object != kernel_object && m->object != kmem_object) { 303 goto readrest; 304 } 305 break; 306 } 307 if (((object->type != OBJT_DEFAULT) && (!change_wiring || wired)) 308 || (object == first_object)) { 309 310 if (pindex >= object->size) { 311 UNLOCK_AND_DEALLOCATE; 312 return (KERN_PROTECTION_FAILURE); 313 } 314 315 /* 316 * Allocate a new page for this object/offset pair. 317 */ 318 m = vm_page_alloc(object, pindex, 319 (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); 320 321 if (m == NULL) { 322 UNLOCK_AND_DEALLOCATE; 323 VM_WAIT; 324 goto RetryFault; 325 } 326 } 327 readrest: 328 if (object->type != OBJT_DEFAULT && (!change_wiring || wired)) { 329 int rv; 330 int faultcount; 331 int reqpage; 332 int ahead, behind; 333 334 ahead = VM_FAULT_READ_AHEAD; 335 behind = VM_FAULT_READ_BEHIND; 336 if (first_object->behavior == OBJ_RANDOM) { 337 ahead = 0; 338 behind = 0; 339 } 340 341 if ((first_object->type != OBJT_DEVICE) && 342 (first_object->behavior == OBJ_SEQUENTIAL)) { 343 vm_pindex_t firstpindex, tmppindex; 344 if (first_pindex < 345 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1)) 346 firstpindex = 0; 347 else 348 firstpindex = first_pindex - 349 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1); 350 351 for(tmppindex = first_pindex - 1; 352 tmppindex >= first_pindex; 353 --tmppindex) { 354 vm_page_t mt; 355 mt = vm_page_lookup( first_object, tmppindex); 356 if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) 357 break; 358 if (mt->busy || 359 (mt->flags & (PG_BUSY|PG_FICTITIOUS)) || 360 mt->hold_count || 361 mt->wire_count) 362 continue; 363 if (mt->dirty == 0) 364 vm_page_test_dirty(mt); 365 if (mt->dirty) { 366 vm_page_protect(mt, VM_PROT_NONE); 367 vm_page_deactivate(mt); 368 } else { 369 vm_page_cache(mt); 370 } 371 } 372 373 ahead += behind; 374 behind = 0; 375 } 376 377 /* 378 * now we find out if any other pages should be paged 379 * in at this time this routine checks to see if the 380 * pages surrounding this fault reside in the same 381 * object as the page for this fault. If they do, 382 * then they are faulted in also into the object. The 383 * array "marray" returned contains an array of 384 * vm_page_t structs where one of them is the 385 * vm_page_t passed to the routine. The reqpage 386 * return value is the index into the marray for the 387 * vm_page_t passed to the routine. 388 */ 389 faultcount = vm_fault_additional_pages( 390 m, behind, ahead, marray, &reqpage); 391 392 /* 393 * Call the pager to retrieve the data, if any, after 394 * releasing the lock on the map. 395 */ 396 UNLOCK_MAP; 397 398 rv = faultcount ? 399 vm_pager_get_pages(object, marray, faultcount, 400 reqpage) : VM_PAGER_FAIL; 401 402 if (rv == VM_PAGER_OK) { 403 /* 404 * Found the page. Leave it busy while we play 405 * with it. 406 */ 407 408 /* 409 * Relookup in case pager changed page. Pager 410 * is responsible for disposition of old page 411 * if moved. 412 */ 413 m = vm_page_lookup(object, pindex); 414 if( !m) { 415 UNLOCK_AND_DEALLOCATE; 416 goto RetryFault; 417 } 418 419 hardfault++; 420 break; 421 } 422 /* 423 * Remove the bogus page (which does not exist at this 424 * object/offset); before doing so, we must get back 425 * our object lock to preserve our invariant. 426 * 427 * Also wake up any other process that may want to bring 428 * in this page. 429 * 430 * If this is the top-level object, we must leave the 431 * busy page to prevent another process from rushing 432 * past us, and inserting the page in that object at 433 * the same time that we are. 434 */ 435 436 if (rv == VM_PAGER_ERROR) 437 printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 438 curproc->p_pid); 439 /* 440 * Data outside the range of the pager or an I/O error 441 */ 442 /* 443 * XXX - the check for kernel_map is a kludge to work 444 * around having the machine panic on a kernel space 445 * fault w/ I/O error. 446 */ 447 if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 448 FREE_PAGE(m); 449 UNLOCK_AND_DEALLOCATE; 450 return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 451 } 452 if (object != first_object) { 453 FREE_PAGE(m); 454 /* 455 * XXX - we cannot just fall out at this 456 * point, m has been freed and is invalid! 457 */ 458 } 459 } 460 /* 461 * We get here if the object has default pager (or unwiring) or the 462 * pager doesn't have the page. 463 */ 464 if (object == first_object) 465 first_m = m; 466 467 /* 468 * Move on to the next object. Lock the next object before 469 * unlocking the current one. 470 */ 471 472 pindex += OFF_TO_IDX(object->backing_object_offset); 473 next_object = object->backing_object; 474 if (next_object == NULL) { 475 /* 476 * If there's no object left, fill the page in the top 477 * object with zeros. 478 */ 479 if (object != first_object) { 480 vm_object_pip_wakeup(object); 481 482 object = first_object; 483 pindex = first_pindex; 484 m = first_m; 485 } 486 first_m = NULL; 487 488 if ((m->flags & PG_ZERO) == 0) 489 vm_page_zero_fill(m); 490 cnt.v_zfod++; 491 break; 492 } else { 493 if (object != first_object) { 494 vm_object_pip_wakeup(object); 495 } 496 object = next_object; 497 object->paging_in_progress++; 498 } 499 } 500 501 if ((m->flags & PG_BUSY) == 0) 502 panic("vm_fault: not busy after main loop"); 503 504 /* 505 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 506 * is held.] 507 */ 508 509 old_m = m; /* save page that would be copied */ 510 511 /* 512 * If the page is being written, but isn't already owned by the 513 * top-level object, we have to copy it into a new page owned by the 514 * top-level object. 515 */ 516 517 if (object != first_object) { 518 /* 519 * We only really need to copy if we want to write it. 520 */ 521 522 if (fault_type & VM_PROT_WRITE) { 523 524 /* 525 * This allows pages to be virtually copied from a backing_object 526 * into the first_object, where the backing object has no other 527 * refs to it, and cannot gain any more refs. Instead of a 528 * bcopy, we just move the page from the backing object to the 529 * first object. Note that we must mark the page dirty in the 530 * first object so that it will go out to swap when needed. 531 */ 532 if (lookup_still_valid && 533 /* 534 * Only one shadow object 535 */ 536 (object->shadow_count == 1) && 537 /* 538 * No COW refs, except us 539 */ 540 (object->ref_count == 1) && 541 /* 542 * Noone else can look this object up 543 */ 544 (object->handle == NULL) && 545 /* 546 * No other ways to look the object up 547 */ 548 ((object->type == OBJT_DEFAULT) || 549 (object->type == OBJT_SWAP)) && 550 /* 551 * We don't chase down the shadow chain 552 */ 553 (object == first_object->backing_object)) { 554 555 /* 556 * get rid of the unnecessary page 557 */ 558 vm_page_protect(first_m, VM_PROT_NONE); 559 PAGE_WAKEUP(first_m); 560 vm_page_free(first_m); 561 /* 562 * grab the page and put it into the process'es object 563 */ 564 vm_page_rename(m, first_object, first_pindex); 565 first_m = m; 566 m->dirty = VM_PAGE_BITS_ALL; 567 m = NULL; 568 ++vm_fault_copy_save_1; 569 } else { 570 /* 571 * Oh, well, lets copy it. 572 */ 573 vm_page_copy(m, first_m); 574 } 575 576 /* 577 * This code handles the case where there are two references to the 578 * backing object, and one reference is getting a copy of the 579 * page. If the other reference is the only other object that 580 * points to the backing object, then perform a virtual copy 581 * from the backing object to the other object after the 582 * page is copied to the current first_object. If the other 583 * object already has the page, we destroy it in the backing object 584 * performing an optimized collapse-type operation. We don't 585 * bother removing the page from the backing object's swap space. 586 */ 587 if (lookup_still_valid && 588 /* 589 * make sure that we have two shadow objs 590 */ 591 (object->shadow_count == 2) && 592 /* 593 * And no COW refs -- note that there are sometimes 594 * temp refs to objs, but ignore that case -- we just 595 * punt. 596 */ 597 (object->ref_count == 2) && 598 /* 599 * Noone else can look us up 600 */ 601 (object->handle == NULL) && 602 /* 603 * Not something that can be referenced elsewhere 604 */ 605 ((object->type == OBJT_DEFAULT) || 606 (object->type == OBJT_SWAP)) && 607 /* 608 * We don't bother chasing down object chain 609 */ 610 (object == first_object->backing_object)) { 611 612 vm_object_t other_object; 613 vm_pindex_t other_pindex, other_pindex_offset; 614 vm_page_t tm; 615 616 other_object = TAILQ_FIRST(&object->shadow_head); 617 if (other_object == first_object) 618 other_object = TAILQ_NEXT(other_object, shadow_list); 619 if (!other_object) 620 panic("vm_fault: other object missing"); 621 if (other_object && 622 (other_object->type == OBJT_DEFAULT) && 623 (other_object->paging_in_progress == 0)) { 624 other_pindex_offset = 625 OFF_TO_IDX(other_object->backing_object_offset); 626 if (pindex >= other_pindex_offset) { 627 other_pindex = pindex - other_pindex_offset; 628 /* 629 * If the other object has the page, just free it. 630 */ 631 if ((tm = vm_page_lookup(other_object, other_pindex))) { 632 if ((tm->flags & PG_BUSY) == 0 && 633 tm->busy == 0 && 634 tm->valid == VM_PAGE_BITS_ALL) { 635 /* 636 * get rid of the unnecessary page 637 */ 638 vm_page_protect(m, VM_PROT_NONE); 639 PAGE_WAKEUP(m); 640 vm_page_free(m); 641 m = NULL; 642 ++vm_fault_free_1; 643 tm->dirty = VM_PAGE_BITS_ALL; 644 first_m->dirty = VM_PAGE_BITS_ALL; 645 } 646 } else { 647 /* 648 * If the other object doesn't have the page, 649 * then we move it there. 650 */ 651 vm_page_rename(m, other_object, other_pindex); 652 m->dirty = VM_PAGE_BITS_ALL; 653 m->valid = VM_PAGE_BITS_ALL; 654 ++vm_fault_copy_save_2; 655 } 656 } 657 } 658 } 659 660 if (m) { 661 if (m->queue != PQ_ACTIVE) 662 vm_page_activate(m); 663 /* 664 * We no longer need the old page or object. 665 */ 666 PAGE_WAKEUP(m); 667 } 668 669 vm_object_pip_wakeup(object); 670 /* 671 * Only use the new page below... 672 */ 673 674 cnt.v_cow_faults++; 675 m = first_m; 676 object = first_object; 677 pindex = first_pindex; 678 679 /* 680 * Now that we've gotten the copy out of the way, 681 * let's try to collapse the top object. 682 * 683 * But we have to play ugly games with 684 * paging_in_progress to do that... 685 */ 686 vm_object_pip_wakeup(object); 687 vm_object_collapse(object); 688 object->paging_in_progress++; 689 } else { 690 prot &= ~VM_PROT_WRITE; 691 } 692 } 693 694 /* 695 * We must verify that the maps have not changed since our last 696 * lookup. 697 */ 698 699 if (!lookup_still_valid) { 700 vm_object_t retry_object; 701 vm_pindex_t retry_pindex; 702 vm_prot_t retry_prot; 703 704 /* 705 * Since map entries may be pageable, make sure we can take a 706 * page fault on them. 707 */ 708 709 /* 710 * To avoid trying to write_lock the map while another process 711 * has it read_locked (in vm_map_pageable), we do not try for 712 * write permission. If the page is still writable, we will 713 * get write permission. If it is not, or has been marked 714 * needs_copy, we enter the mapping without write permission, 715 * and will merely take another fault. 716 */ 717 result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 718 &entry, &retry_object, &retry_pindex, &retry_prot, &wired, &su); 719 720 /* 721 * If we don't need the page any longer, put it on the active 722 * list (the easiest thing to do here). If no one needs it, 723 * pageout will grab it eventually. 724 */ 725 726 if (result != KERN_SUCCESS) { 727 RELEASE_PAGE(m); 728 UNLOCK_AND_DEALLOCATE; 729 return (result); 730 } 731 lookup_still_valid = TRUE; 732 733 if ((retry_object != first_object) || 734 (retry_pindex != first_pindex)) { 735 RELEASE_PAGE(m); 736 UNLOCK_AND_DEALLOCATE; 737 goto RetryFault; 738 } 739 /* 740 * Check whether the protection has changed or the object has 741 * been copied while we left the map unlocked. Changing from 742 * read to write permission is OK - we leave the page 743 * write-protected, and catch the write fault. Changing from 744 * write to read permission means that we can't mark the page 745 * write-enabled after all. 746 */ 747 prot &= retry_prot; 748 } 749 750 /* 751 * Put this page into the physical map. We had to do the unlock above 752 * because pmap_enter may cause other faults. We don't put the page 753 * back on the active queue until later so that the page-out daemon 754 * won't find us (yet). 755 */ 756 757 if (prot & VM_PROT_WRITE) { 758 m->flags |= PG_WRITEABLE; 759 m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY; 760 /* 761 * If the fault is a write, we know that this page is being 762 * written NOW. This will save on the pmap_is_modified() calls 763 * later. 764 */ 765 if (fault_type & VM_PROT_WRITE) { 766 m->dirty = VM_PAGE_BITS_ALL; 767 } 768 } 769 770 UNLOCK_THINGS; 771 m->valid = VM_PAGE_BITS_ALL; 772 m->flags &= ~PG_ZERO; 773 774 pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 775 if ((change_wiring == 0) && (wired == 0)) 776 pmap_prefault(map->pmap, vaddr, entry, first_object); 777 778 m->flags |= PG_MAPPED|PG_REFERENCED; 779 780 /* 781 * If the page is not wired down, then put it where the pageout daemon 782 * can find it. 783 */ 784 if (change_wiring) { 785 if (wired) 786 vm_page_wire(m); 787 else 788 vm_page_unwire(m); 789 } else { 790 if (m->queue != PQ_ACTIVE) 791 vm_page_activate(m); 792 } 793 794 if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) { 795 if (hardfault) { 796 curproc->p_stats->p_ru.ru_majflt++; 797 } else { 798 curproc->p_stats->p_ru.ru_minflt++; 799 } 800 } 801 802 /* 803 * Unlock everything, and return 804 */ 805 806 PAGE_WAKEUP(m); 807 vm_object_deallocate(first_object); 808 809 return (KERN_SUCCESS); 810 811 } 812 813 /* 814 * vm_fault_wire: 815 * 816 * Wire down a range of virtual addresses in a map. 817 */ 818 int 819 vm_fault_wire(map, start, end) 820 vm_map_t map; 821 vm_offset_t start, end; 822 { 823 824 register vm_offset_t va; 825 register pmap_t pmap; 826 int rv; 827 828 pmap = vm_map_pmap(map); 829 830 /* 831 * Inform the physical mapping system that the range of addresses may 832 * not fault, so that page tables and such can be locked down as well. 833 */ 834 835 pmap_pageable(pmap, start, end, FALSE); 836 837 /* 838 * We simulate a fault to get the page and enter it in the physical 839 * map. 840 */ 841 842 for (va = start; va < end; va += PAGE_SIZE) { 843 rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, TRUE); 844 if (rv) { 845 if (va != start) 846 vm_fault_unwire(map, start, va); 847 return (rv); 848 } 849 } 850 return (KERN_SUCCESS); 851 } 852 853 854 /* 855 * vm_fault_unwire: 856 * 857 * Unwire a range of virtual addresses in a map. 858 */ 859 void 860 vm_fault_unwire(map, start, end) 861 vm_map_t map; 862 vm_offset_t start, end; 863 { 864 865 register vm_offset_t va, pa; 866 register pmap_t pmap; 867 868 pmap = vm_map_pmap(map); 869 870 /* 871 * Since the pages are wired down, we must be able to get their 872 * mappings from the physical map system. 873 */ 874 875 for (va = start; va < end; va += PAGE_SIZE) { 876 pa = pmap_extract(pmap, va); 877 if (pa != (vm_offset_t) 0) { 878 pmap_change_wiring(pmap, va, FALSE); 879 vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 880 } 881 } 882 883 /* 884 * Inform the physical mapping system that the range of addresses may 885 * fault, so that page tables and such may be unwired themselves. 886 */ 887 888 pmap_pageable(pmap, start, end, TRUE); 889 890 } 891 892 /* 893 * Routine: 894 * vm_fault_copy_entry 895 * Function: 896 * Copy all of the pages from a wired-down map entry to another. 897 * 898 * In/out conditions: 899 * The source and destination maps must be locked for write. 900 * The source map entry must be wired down (or be a sharing map 901 * entry corresponding to a main map entry that is wired down). 902 */ 903 904 void 905 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 906 vm_map_t dst_map; 907 vm_map_t src_map; 908 vm_map_entry_t dst_entry; 909 vm_map_entry_t src_entry; 910 { 911 vm_object_t dst_object; 912 vm_object_t src_object; 913 vm_ooffset_t dst_offset; 914 vm_ooffset_t src_offset; 915 vm_prot_t prot; 916 vm_offset_t vaddr; 917 vm_page_t dst_m; 918 vm_page_t src_m; 919 920 #ifdef lint 921 src_map++; 922 #endif /* lint */ 923 924 src_object = src_entry->object.vm_object; 925 src_offset = src_entry->offset; 926 927 /* 928 * Create the top-level object for the destination entry. (Doesn't 929 * actually shadow anything - we copy the pages directly.) 930 */ 931 dst_object = vm_object_allocate(OBJT_DEFAULT, 932 (vm_size_t) OFF_TO_IDX(dst_entry->end - dst_entry->start)); 933 934 dst_entry->object.vm_object = dst_object; 935 dst_entry->offset = 0; 936 937 prot = dst_entry->max_protection; 938 939 /* 940 * Loop through all of the pages in the entry's range, copying each 941 * one from the source object (it should be there) to the destination 942 * object. 943 */ 944 for (vaddr = dst_entry->start, dst_offset = 0; 945 vaddr < dst_entry->end; 946 vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 947 948 /* 949 * Allocate a page in the destination object 950 */ 951 do { 952 dst_m = vm_page_alloc(dst_object, 953 OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL); 954 if (dst_m == NULL) { 955 VM_WAIT; 956 } 957 } while (dst_m == NULL); 958 959 /* 960 * Find the page in the source object, and copy it in. 961 * (Because the source is wired down, the page will be in 962 * memory.) 963 */ 964 src_m = vm_page_lookup(src_object, 965 OFF_TO_IDX(dst_offset + src_offset)); 966 if (src_m == NULL) 967 panic("vm_fault_copy_wired: page missing"); 968 969 vm_page_copy(src_m, dst_m); 970 971 /* 972 * Enter it in the pmap... 973 */ 974 975 dst_m->flags &= ~PG_ZERO; 976 pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 977 prot, FALSE); 978 dst_m->flags |= PG_WRITEABLE|PG_MAPPED; 979 980 /* 981 * Mark it no longer busy, and put it on the active list. 982 */ 983 vm_page_activate(dst_m); 984 PAGE_WAKEUP(dst_m); 985 } 986 } 987 988 989 /* 990 * This routine checks around the requested page for other pages that 991 * might be able to be faulted in. This routine brackets the viable 992 * pages for the pages to be paged in. 993 * 994 * Inputs: 995 * m, rbehind, rahead 996 * 997 * Outputs: 998 * marray (array of vm_page_t), reqpage (index of requested page) 999 * 1000 * Return value: 1001 * number of pages in marray 1002 */ 1003 int 1004 vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) 1005 vm_page_t m; 1006 int rbehind; 1007 int rahead; 1008 vm_page_t *marray; 1009 int *reqpage; 1010 { 1011 int i; 1012 vm_object_t object; 1013 vm_pindex_t pindex, startpindex, endpindex, tpindex; 1014 vm_offset_t size; 1015 vm_page_t rtm; 1016 int treqpage; 1017 int cbehind, cahead; 1018 1019 object = m->object; 1020 pindex = m->pindex; 1021 1022 /* 1023 * we don't fault-ahead for device pager 1024 */ 1025 if (object->type == OBJT_DEVICE) { 1026 *reqpage = 0; 1027 marray[0] = m; 1028 return 1; 1029 } 1030 1031 /* 1032 * if the requested page is not available, then give up now 1033 */ 1034 1035 if (!vm_pager_has_page(object, 1036 OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead)) 1037 return 0; 1038 1039 if ((cbehind == 0) && (cahead == 0)) { 1040 *reqpage = 0; 1041 marray[0] = m; 1042 return 1; 1043 } 1044 1045 if (rahead > cahead) { 1046 rahead = cahead; 1047 } 1048 1049 if (rbehind > cbehind) { 1050 rbehind = cbehind; 1051 } 1052 1053 /* 1054 * try to do any readahead that we might have free pages for. 1055 */ 1056 if ((rahead + rbehind) > 1057 ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { 1058 pagedaemon_wakeup(); 1059 *reqpage = 0; 1060 marray[0] = m; 1061 return 1; 1062 } 1063 1064 /* 1065 * scan backward for the read behind pages -- in memory or on disk not 1066 * in same object 1067 */ 1068 tpindex = pindex - 1; 1069 if (tpindex < pindex) { 1070 if (rbehind > pindex) 1071 rbehind = pindex; 1072 startpindex = pindex - rbehind; 1073 while (tpindex >= startpindex) { 1074 if (vm_page_lookup( object, tpindex)) { 1075 startpindex = tpindex + 1; 1076 break; 1077 } 1078 if (tpindex == 0) 1079 break; 1080 tpindex -= 1; 1081 } 1082 } else { 1083 startpindex = pindex; 1084 } 1085 1086 /* 1087 * scan forward for the read ahead pages -- in memory or on disk not 1088 * in same object 1089 */ 1090 tpindex = pindex + 1; 1091 endpindex = pindex + (rahead + 1); 1092 if (endpindex > object->size) 1093 endpindex = object->size; 1094 while (tpindex < endpindex) { 1095 if ( vm_page_lookup(object, tpindex)) { 1096 break; 1097 } 1098 tpindex += 1; 1099 } 1100 endpindex = tpindex; 1101 1102 /* calculate number of bytes of pages */ 1103 size = endpindex - startpindex; 1104 1105 /* calculate the page offset of the required page */ 1106 treqpage = pindex - startpindex; 1107 1108 /* see if we have space (again) */ 1109 if ((cnt.v_free_count + cnt.v_cache_count) > 1110 (cnt.v_free_reserved + size)) { 1111 /* 1112 * get our pages and don't block for them 1113 */ 1114 for (i = 0; i < size; i++) { 1115 if (i != treqpage) { 1116 rtm = vm_page_alloc(object, 1117 startpindex + i, 1118 VM_ALLOC_NORMAL); 1119 if (rtm == NULL) { 1120 if (i < treqpage) { 1121 int j; 1122 for (j = 0; j < i; j++) { 1123 FREE_PAGE(marray[j]); 1124 } 1125 *reqpage = 0; 1126 marray[0] = m; 1127 return 1; 1128 } else { 1129 size = i; 1130 *reqpage = treqpage; 1131 return size; 1132 } 1133 } 1134 marray[i] = rtm; 1135 } else { 1136 marray[i] = m; 1137 } 1138 } 1139 1140 *reqpage = treqpage; 1141 return size; 1142 } 1143 *reqpage = 0; 1144 marray[0] = m; 1145 return 1; 1146 } 1147