vm_fault.c (38f1b189cd839bd8aa122ae06cc084810ca1e395) vm_fault.c (13458803f4111b552c573f20768353769ee401cd)
1/*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *

--- 104 unchanged lines hidden (view full) ---

113 -2 * PAGE_SIZE, 2 * PAGE_SIZE,
114 -3 * PAGE_SIZE, 3 * PAGE_SIZE,
115 -4 * PAGE_SIZE, 4 * PAGE_SIZE
116};
117
118static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int *);
119static void vm_fault_prefault(pmap_t, vm_offset_t, vm_map_entry_t);
120
1/*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *

--- 104 unchanged lines hidden (view full) ---

113 -2 * PAGE_SIZE, 2 * PAGE_SIZE,
114 -3 * PAGE_SIZE, 3 * PAGE_SIZE,
115 -4 * PAGE_SIZE, 4 * PAGE_SIZE
116};
117
118static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int *);
119static void vm_fault_prefault(pmap_t, vm_offset_t, vm_map_entry_t);
120
121#define VM_FAULT_READ_AHEAD 8
122#define VM_FAULT_READ_BEHIND 7
123#define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1)
121#define VM_FAULT_READ_BEHIND 8
122#define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX)
123#define VM_FAULT_NINCR (VM_FAULT_READ_MAX / VM_FAULT_READ_BEHIND)
124#define VM_FAULT_SUM (VM_FAULT_NINCR * (VM_FAULT_NINCR + 1) / 2)
125#define VM_FAULT_CACHE_BEHIND (VM_FAULT_READ_BEHIND * VM_FAULT_SUM)
124
125struct faultstate {
126 vm_page_t m;
127 vm_object_t object;
128 vm_pindex_t pindex;
129 vm_page_t first_m;
130 vm_object_t first_object;
131 vm_pindex_t first_pindex;
132 vm_map_t map;
133 vm_map_entry_t entry;
134 int lookup_still_valid;
135 struct vnode *vp;
136 int vfslocked;
137};
138
126
127struct faultstate {
128 vm_page_t m;
129 vm_object_t object;
130 vm_pindex_t pindex;
131 vm_page_t first_m;
132 vm_object_t first_object;
133 vm_pindex_t first_pindex;
134 vm_map_t map;
135 vm_map_entry_t entry;
136 int lookup_still_valid;
137 struct vnode *vp;
138 int vfslocked;
139};
140
141static void vm_fault_cache_behind(const struct faultstate *fs, int distance);
142
139static inline void
140release_page(struct faultstate *fs)
141{
142
143 vm_page_wakeup(fs->m);
144 vm_page_lock(fs->m);
145 vm_page_deactivate(fs->m);
146 vm_page_unlock(fs->m);

--- 84 unchanged lines hidden (view full) ---

231 return (result);
232}
233
234int
235vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
236 int fault_flags, vm_page_t *m_hold)
237{
238 vm_prot_t prot;
143static inline void
144release_page(struct faultstate *fs)
145{
146
147 vm_page_wakeup(fs->m);
148 vm_page_lock(fs->m);
149 vm_page_deactivate(fs->m);
150 vm_page_unlock(fs->m);

--- 84 unchanged lines hidden (view full) ---

235 return (result);
236}
237
238int
239vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
240 int fault_flags, vm_page_t *m_hold)
241{
242 vm_prot_t prot;
239 int is_first_object_locked, result;
240 boolean_t growstack, wired;
243 long ahead, behind;
244 int alloc_req, era, faultcount, nera, reqpage, result;
245 boolean_t growstack, is_first_object_locked, wired;
241 int map_generation;
242 vm_object_t next_object;
246 int map_generation;
247 vm_object_t next_object;
243 vm_page_t marray[VM_FAULT_READ], mt, mt_prev;
248 vm_page_t marray[VM_FAULT_READ_MAX];
244 int hardfault;
249 int hardfault;
245 int faultcount, ahead, behind, alloc_req;
246 struct faultstate fs;
247 struct vnode *vp;
248 int locked, error;
249
250 hardfault = 0;
251 growstack = TRUE;
252 PCPU_INC(cnt.v_vm_faults);
253 fs.vp = NULL;
254 fs.vfslocked = 0;
250 struct faultstate fs;
251 struct vnode *vp;
252 int locked, error;
253
254 hardfault = 0;
255 growstack = TRUE;
256 PCPU_INC(cnt.v_vm_faults);
257 fs.vp = NULL;
258 fs.vfslocked = 0;
255 faultcount = behind = 0;
259 faultcount = reqpage = 0;
256
257RetryFault:;
258
259 /*
260 * Find the backing store object and offset into it to begin the
261 * search.
262 */
263 fs.map = map;

--- 191 unchanged lines hidden (view full) ---

455 * valid.
456 *
457 * Attempt to fault-in the page if there is a chance that the
458 * pager has it, and potentially fault in additional pages
459 * at the same time.
460 */
461 if (TRYPAGER) {
462 int rv;
260
261RetryFault:;
262
263 /*
264 * Find the backing store object and offset into it to begin the
265 * search.
266 */
267 fs.map = map;

--- 191 unchanged lines hidden (view full) ---

459 * valid.
460 *
461 * Attempt to fault-in the page if there is a chance that the
462 * pager has it, and potentially fault in additional pages
463 * at the same time.
464 */
465 if (TRYPAGER) {
466 int rv;
463 int reqpage = 0;
464 u_char behavior = vm_map_entry_behavior(fs.entry);
465
466 if (behavior == MAP_ENTRY_BEHAV_RANDOM ||
467 P_KILLED(curproc)) {
467 u_char behavior = vm_map_entry_behavior(fs.entry);
468
469 if (behavior == MAP_ENTRY_BEHAV_RANDOM ||
470 P_KILLED(curproc)) {
471 behind = 0;
468 ahead = 0;
472 ahead = 0;
473 } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) {
469 behind = 0;
474 behind = 0;
475 ahead = atop(fs.entry->end - vaddr) - 1;
476 if (ahead > VM_FAULT_READ_AHEAD_MAX)
477 ahead = VM_FAULT_READ_AHEAD_MAX;
478 if (fs.pindex == fs.entry->next_read)
479 vm_fault_cache_behind(&fs,
480 VM_FAULT_READ_MAX);
470 } else {
481 } else {
471 behind = (vaddr - fs.entry->start) >> PAGE_SHIFT;
482 /*
483 * If this is a sequential page fault, then
484 * arithmetically increase the number of pages
485 * in the read-ahead window. Otherwise, reset
486 * the read-ahead window to its smallest size.
487 */
488 behind = atop(vaddr - fs.entry->start);
472 if (behind > VM_FAULT_READ_BEHIND)
473 behind = VM_FAULT_READ_BEHIND;
489 if (behind > VM_FAULT_READ_BEHIND)
490 behind = VM_FAULT_READ_BEHIND;
474
475 ahead = ((fs.entry->end - vaddr) >> PAGE_SHIFT) - 1;
476 if (ahead > VM_FAULT_READ_AHEAD)
477 ahead = VM_FAULT_READ_AHEAD;
491 ahead = atop(fs.entry->end - vaddr) - 1;
492 era = fs.entry->read_ahead;
493 if (fs.pindex == fs.entry->next_read) {
494 nera = era + behind;
495 if (nera > VM_FAULT_READ_AHEAD_MAX)
496 nera = VM_FAULT_READ_AHEAD_MAX;
497 behind = 0;
498 if (ahead > nera)
499 ahead = nera;
500 if (era == VM_FAULT_READ_AHEAD_MAX)
501 vm_fault_cache_behind(&fs,
502 VM_FAULT_CACHE_BEHIND);
503 } else if (ahead > VM_FAULT_READ_AHEAD_MIN)
504 ahead = VM_FAULT_READ_AHEAD_MIN;
505 if (era != ahead)
506 fs.entry->read_ahead = ahead;
478 }
507 }
479 is_first_object_locked = FALSE;
480 if ((behavior == MAP_ENTRY_BEHAV_SEQUENTIAL ||
481 (behavior != MAP_ENTRY_BEHAV_RANDOM &&
482 fs.pindex >= fs.entry->lastr &&
483 fs.pindex < fs.entry->lastr + VM_FAULT_READ)) &&
484 (fs.first_object == fs.object ||
485 (is_first_object_locked = VM_OBJECT_TRYLOCK(fs.first_object))) &&
486 fs.first_object->type != OBJT_DEVICE &&
487 fs.first_object->type != OBJT_PHYS &&
488 fs.first_object->type != OBJT_SG) {
489 vm_pindex_t firstpindex;
490
508
491 if (fs.first_pindex < 2 * VM_FAULT_READ)
492 firstpindex = 0;
493 else
494 firstpindex = fs.first_pindex - 2 * VM_FAULT_READ;
495 mt = fs.first_object != fs.object ?
496 fs.first_m : fs.m;
497 KASSERT(mt != NULL, ("vm_fault: missing mt"));
498 KASSERT((mt->oflags & VPO_BUSY) != 0,
499 ("vm_fault: mt %p not busy", mt));
500 mt_prev = vm_page_prev(mt);
501
502 /*
503 * note: partially valid pages cannot be
504 * included in the lookahead - NFS piecemeal
505 * writes will barf on it badly.
506 */
507 while ((mt = mt_prev) != NULL &&
508 mt->pindex >= firstpindex &&
509 mt->valid == VM_PAGE_BITS_ALL) {
510 mt_prev = vm_page_prev(mt);
511 if (mt->busy ||
512 (mt->oflags & VPO_BUSY))
513 continue;
514 vm_page_lock(mt);
515 if (mt->hold_count ||
516 mt->wire_count) {
517 vm_page_unlock(mt);
518 continue;
519 }
520 pmap_remove_all(mt);
521 if (mt->dirty != 0)
522 vm_page_deactivate(mt);
523 else
524 vm_page_cache(mt);
525 vm_page_unlock(mt);
526 }
527 ahead += behind;
528 behind = 0;
529 }
530 if (is_first_object_locked)
531 VM_OBJECT_UNLOCK(fs.first_object);
532
533 /*
534 * Call the pager to retrieve the data, if any, after
535 * releasing the lock on the map. We hold a ref on
536 * fs.object and the pages are VPO_BUSY'd.
537 */
538 unlock_map(&fs);
539
540vnode_lock:

--- 353 unchanged lines hidden (view full) ---

894 * actual set of pages that it read, this update is based on
895 * the requested set. Typically, the requested and actual
896 * sets are the same.
897 *
898 * XXX The following assignment modifies the map
899 * without holding a write lock on it.
900 */
901 if (hardfault)
509 /*
510 * Call the pager to retrieve the data, if any, after
511 * releasing the lock on the map. We hold a ref on
512 * fs.object and the pages are VPO_BUSY'd.
513 */
514 unlock_map(&fs);
515
516vnode_lock:

--- 353 unchanged lines hidden (view full) ---

870 * actual set of pages that it read, this update is based on
871 * the requested set. Typically, the requested and actual
872 * sets are the same.
873 *
874 * XXX The following assignment modifies the map
875 * without holding a write lock on it.
876 */
877 if (hardfault)
902 fs.entry->lastr = fs.pindex + faultcount - behind;
878 fs.entry->next_read = fs.pindex + faultcount - reqpage;
903
904 if ((prot & VM_PROT_WRITE) != 0 ||
905 (fault_flags & VM_FAULT_DIRTY) != 0) {
906 vm_object_set_writeable_dirty(fs.object);
907
908 /*
909 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
910 * if the page is already dirty to prevent data written with

--- 76 unchanged lines hidden (view full) ---

987 curthread->td_ru.ru_majflt++;
988 else
989 curthread->td_ru.ru_minflt++;
990
991 return (KERN_SUCCESS);
992}
993
994/*
879
880 if ((prot & VM_PROT_WRITE) != 0 ||
881 (fault_flags & VM_FAULT_DIRTY) != 0) {
882 vm_object_set_writeable_dirty(fs.object);
883
884 /*
885 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
886 * if the page is already dirty to prevent data written with

--- 76 unchanged lines hidden (view full) ---

963 curthread->td_ru.ru_majflt++;
964 else
965 curthread->td_ru.ru_minflt++;
966
967 return (KERN_SUCCESS);
968}
969
970/*
971 * Speed up the reclamation of up to "distance" pages that precede the
972 * faulting pindex within the first object of the shadow chain.
973 */
974static void
975vm_fault_cache_behind(const struct faultstate *fs, int distance)
976{
977 vm_object_t first_object, object;
978 vm_page_t m, m_prev;
979 vm_pindex_t pindex;
980
981 object = fs->object;
982 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
983 first_object = fs->first_object;
984 if (first_object != object) {
985 if (!VM_OBJECT_TRYLOCK(first_object)) {
986 VM_OBJECT_UNLOCK(object);
987 VM_OBJECT_LOCK(first_object);
988 VM_OBJECT_LOCK(object);
989 }
990 }
991 if (first_object->type != OBJT_DEVICE &&
992 first_object->type != OBJT_PHYS && first_object->type != OBJT_SG) {
993 if (fs->first_pindex < distance)
994 pindex = 0;
995 else
996 pindex = fs->first_pindex - distance;
997 if (pindex < OFF_TO_IDX(fs->entry->offset))
998 pindex = OFF_TO_IDX(fs->entry->offset);
999 m = first_object != object ? fs->first_m : fs->m;
1000 KASSERT((m->oflags & VPO_BUSY) != 0,
1001 ("vm_fault_cache_behind: page %p is not busy", m));
1002 m_prev = vm_page_prev(m);
1003 while ((m = m_prev) != NULL && m->pindex >= pindex &&
1004 m->valid == VM_PAGE_BITS_ALL) {
1005 m_prev = vm_page_prev(m);
1006 if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0)
1007 continue;
1008 vm_page_lock(m);
1009 if (m->hold_count == 0 && m->wire_count == 0) {
1010 pmap_remove_all(m);
1011 vm_page_aflag_clear(m, PGA_REFERENCED);
1012 if (m->dirty != 0)
1013 vm_page_deactivate(m);
1014 else
1015 vm_page_cache(m);
1016 }
1017 vm_page_unlock(m);
1018 }
1019 }
1020 if (first_object != object)
1021 VM_OBJECT_UNLOCK(first_object);
1022}
1023
1024/*
995 * vm_fault_prefault provides a quick way of clustering
996 * pagefaults into a processes address space. It is a "cousin"
997 * of vm_map_pmap_enter, except it runs at page fault time instead
998 * of mmap time.
999 */
1000static void
1001vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
1002{

--- 504 unchanged lines hidden ---
1025 * vm_fault_prefault provides a quick way of clustering
1026 * pagefaults into a processes address space. It is a "cousin"
1027 * of vm_map_pmap_enter, except it runs at page fault time instead
1028 * of mmap time.
1029 */
1030static void
1031vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
1032{

--- 504 unchanged lines hidden ---