xref: /freebsd/sys/i386/i386/vm_machdep.c (revision ce834215a70ff69e7e222827437116eee2f9ac6f)
1 /*-
2  * Copyright (c) 1982, 1986 The Regents of the University of California.
3  * Copyright (c) 1989, 1990 William Jolitz
4  * Copyright (c) 1994 John Dyson
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department, and William Jolitz.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41  *	$Id: vm_machdep.c,v 1.82 1997/06/25 19:49:45 tegge Exp $
42  */
43 
44 #include "npx.h"
45 #include "opt_bounce.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/proc.h>
50 #include <sys/malloc.h>
51 #include <sys/buf.h>
52 #include <sys/vnode.h>
53 #include <sys/vmmeter.h>
54 
55 #include <machine/clock.h>
56 #include <machine/cpu.h>
57 #include <machine/reg.h>
58 #include <machine/md_var.h>
59 #include <machine/npx.h>
60 #ifdef SMP
61 #include <machine/smp.h>
62 #endif
63 
64 #include <vm/vm.h>
65 #include <vm/vm_param.h>
66 #include <vm/vm_prot.h>
67 #include <sys/lock.h>
68 #include <vm/vm_kern.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_extern.h>
72 
73 #include <sys/user.h>
74 
75 #ifdef PC98
76 #include <pc98/pc98/pc98.h>
77 #else
78 #include <i386/isa/isa.h>
79 #endif
80 
81 #ifdef BOUNCE_BUFFERS
82 static vm_offset_t
83 		vm_bounce_kva __P((int size, int waitok));
84 static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
85 					int now));
86 static vm_offset_t
87 		vm_bounce_page_find __P((int count));
88 static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
89 
90 static volatile int	kvasfreecnt;
91 
92 caddr_t		bouncememory;
93 int		bouncepages;
94 static int	bpwait;
95 static vm_offset_t	*bouncepa;
96 static int		bmwait, bmfreeing;
97 
98 #define BITS_IN_UNSIGNED (8*sizeof(unsigned))
99 static int		bounceallocarraysize;
100 static unsigned	*bounceallocarray;
101 static int		bouncefree;
102 
103 #if defined(PC98) && defined (EPSON_BOUNCEDMA)
104 #define SIXTEENMEG (3840*4096)			/* 15MB boundary */
105 #else
106 #define SIXTEENMEG (4096*4096)
107 #endif
108 #define MAXBKVA 1024
109 int		maxbkva = MAXBKVA*PAGE_SIZE;
110 
111 /* special list that can be used at interrupt time for eventual kva free */
112 static struct kvasfree {
113 	vm_offset_t addr;
114 	vm_offset_t size;
115 } kvaf[MAXBKVA];
116 
117 /*
118  * get bounce buffer pages (count physically contiguous)
119  * (only 1 inplemented now)
120  */
121 static vm_offset_t
122 vm_bounce_page_find(count)
123 	int count;
124 {
125 	int bit;
126 	int s,i;
127 
128 	if (count != 1)
129 		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
130 
131 	s = splbio();
132 retry:
133 	for (i = 0; i < bounceallocarraysize; i++) {
134 		if (bounceallocarray[i] != 0xffffffff) {
135 			bit = ffs(~bounceallocarray[i]);
136 			if (bit) {
137 				bounceallocarray[i] |= 1 << (bit - 1) ;
138 				bouncefree -= count;
139 				splx(s);
140 				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
141 			}
142 		}
143 	}
144 	bpwait = 1;
145 	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
146 	goto retry;
147 }
148 
149 static void
150 vm_bounce_kva_free(addr, size, now)
151 	vm_offset_t addr;
152 	vm_offset_t size;
153 	int now;
154 {
155 	int s = splbio();
156 	kvaf[kvasfreecnt].addr = addr;
157 	kvaf[kvasfreecnt].size = size;
158 	++kvasfreecnt;
159 	if( now) {
160 		/*
161 		 * this will do wakeups
162 		 */
163 		vm_bounce_kva(0,0);
164 	} else {
165 		if (bmwait) {
166 		/*
167 		 * if anyone is waiting on the bounce-map, then wakeup
168 		 */
169 			wakeup((caddr_t) io_map);
170 			bmwait = 0;
171 		}
172 	}
173 	splx(s);
174 }
175 
176 /*
177  * free count bounce buffer pages
178  */
179 static void
180 vm_bounce_page_free(pa, count)
181 	vm_offset_t pa;
182 	int count;
183 {
184 	int allocindex;
185 	int index;
186 	int bit;
187 
188 	if (count != 1)
189 		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
190 
191 	for(index=0;index<bouncepages;index++) {
192 		if( pa == bouncepa[index])
193 			break;
194 	}
195 
196 	if( index == bouncepages)
197 		panic("vm_bounce_page_free: invalid bounce buffer");
198 
199 	allocindex = index / BITS_IN_UNSIGNED;
200 	bit = index % BITS_IN_UNSIGNED;
201 
202 	bounceallocarray[allocindex] &= ~(1 << bit);
203 
204 	bouncefree += count;
205 	if (bpwait) {
206 		bpwait = 0;
207 		wakeup((caddr_t) &bounceallocarray);
208 	}
209 }
210 
211 /*
212  * allocate count bounce buffer kva pages
213  */
214 static vm_offset_t
215 vm_bounce_kva(size, waitok)
216 	int size;
217 	int waitok;
218 {
219 	int i;
220 	vm_offset_t kva = 0;
221 	vm_offset_t off;
222 	int s = splbio();
223 more:
224 	if (!bmfreeing && kvasfreecnt) {
225 		bmfreeing = 1;
226 		for (i = 0; i < kvasfreecnt; i++) {
227 			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
228 				pmap_kremove( kvaf[i].addr + off);
229 			}
230 			kmem_free_wakeup(io_map, kvaf[i].addr,
231 				kvaf[i].size);
232 		}
233 		kvasfreecnt = 0;
234 		bmfreeing = 0;
235 		if( bmwait) {
236 			bmwait = 0;
237 			wakeup( (caddr_t) io_map);
238 		}
239 	}
240 
241 	if( size == 0) {
242 		splx(s);
243 		return 0;
244 	}
245 
246 	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
247 		if( !waitok) {
248 			splx(s);
249 			return 0;
250 		}
251 		bmwait = 1;
252 		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
253 		goto more;
254 	}
255 	splx(s);
256 	return kva;
257 }
258 
259 /*
260  * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
261  */
262 vm_offset_t
263 vm_bounce_kva_alloc(count)
264 int count;
265 {
266 	int i;
267 	vm_offset_t kva;
268 	vm_offset_t pa;
269 	if( bouncepages == 0) {
270 		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
271 		return kva;
272 	}
273 	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
274 	for(i=0;i<count;i++) {
275 		pa = vm_bounce_page_find(1);
276 		pmap_kenter(kva + i * PAGE_SIZE, pa);
277 	}
278 	return kva;
279 }
280 
281 /*
282  * same as vm_bounce_kva_free -- but really free
283  */
284 void
285 vm_bounce_kva_alloc_free(kva, count)
286 	vm_offset_t kva;
287 	int count;
288 {
289 	int i;
290 	vm_offset_t pa;
291 	if( bouncepages == 0) {
292 		free((caddr_t) kva, M_TEMP);
293 		return;
294 	}
295 	for(i = 0; i < count; i++) {
296 		pa = pmap_kextract(kva + i * PAGE_SIZE);
297 		vm_bounce_page_free(pa, 1);
298 	}
299 	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
300 }
301 
302 /*
303  * do the things necessary to the struct buf to implement
304  * bounce buffers...  inserted before the disk sort
305  */
306 void
307 vm_bounce_alloc(bp)
308 	struct buf *bp;
309 {
310 	int countvmpg;
311 	vm_offset_t vastart, vaend;
312 	vm_offset_t vapstart, vapend;
313 	vm_offset_t va, kva;
314 	vm_offset_t pa;
315 	int dobounceflag = 0;
316 	int i;
317 
318 	if (bouncepages == 0)
319 		return;
320 
321 	if (bp->b_flags & B_BOUNCE) {
322 		printf("vm_bounce_alloc: called recursively???\n");
323 		return;
324 	}
325 
326 	if (bp->b_bufsize < bp->b_bcount) {
327 		printf(
328 		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
329 			bp->b_bufsize, bp->b_bcount);
330 		panic("vm_bounce_alloc");
331 	}
332 
333 /*
334  *  This is not really necessary
335  *	if( bp->b_bufsize != bp->b_bcount) {
336  *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
337  *	}
338  */
339 
340 
341 	vastart = (vm_offset_t) bp->b_data;
342 	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
343 
344 	vapstart = trunc_page(vastart);
345 	vapend = round_page(vaend);
346 	countvmpg = (vapend - vapstart) / PAGE_SIZE;
347 
348 /*
349  * if any page is above 16MB, then go into bounce-buffer mode
350  */
351 	va = vapstart;
352 	for (i = 0; i < countvmpg; i++) {
353 		pa = pmap_kextract(va);
354 		if (pa >= SIXTEENMEG)
355 			++dobounceflag;
356 		if( pa == 0)
357 			panic("vm_bounce_alloc: Unmapped page");
358 		va += PAGE_SIZE;
359 	}
360 	if (dobounceflag == 0)
361 		return;
362 
363 	if (bouncepages < dobounceflag)
364 		panic("Not enough bounce buffers!!!");
365 
366 /*
367  * allocate a replacement kva for b_addr
368  */
369 	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
370 #if 0
371 	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
372 		(bp->b_flags & B_READ) ? "read":"write",
373 			vapstart, vapend, countvmpg, kva);
374 #endif
375 	va = vapstart;
376 	for (i = 0; i < countvmpg; i++) {
377 		pa = pmap_kextract(va);
378 		if (pa >= SIXTEENMEG) {
379 			/*
380 			 * allocate a replacement page
381 			 */
382 			vm_offset_t bpa = vm_bounce_page_find(1);
383 			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
384 #if 0
385 			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
386 #endif
387 			/*
388 			 * if we are writing, the copy the data into the page
389 			 */
390 			if ((bp->b_flags & B_READ) == 0) {
391 				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
392 			}
393 		} else {
394 			/*
395 			 * use original page
396 			 */
397 			pmap_kenter(kva + (PAGE_SIZE * i), pa);
398 		}
399 		va += PAGE_SIZE;
400 	}
401 
402 /*
403  * flag the buffer as being bounced
404  */
405 	bp->b_flags |= B_BOUNCE;
406 /*
407  * save the original buffer kva
408  */
409 	bp->b_savekva = bp->b_data;
410 /*
411  * put our new kva into the buffer (offset by original offset)
412  */
413 	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
414 				((vm_offset_t) bp->b_savekva & PAGE_MASK));
415 #if 0
416 	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
417 #endif
418 	return;
419 }
420 
421 /*
422  * hook into biodone to free bounce buffer
423  */
424 void
425 vm_bounce_free(bp)
426 	struct buf *bp;
427 {
428 	int i;
429 	vm_offset_t origkva, bouncekva, bouncekvaend;
430 
431 /*
432  * if this isn't a bounced buffer, then just return
433  */
434 	if ((bp->b_flags & B_BOUNCE) == 0)
435 		return;
436 
437 /*
438  *  This check is not necessary
439  *	if (bp->b_bufsize != bp->b_bcount) {
440  *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
441  *			bp->b_bufsize, bp->b_bcount);
442  *	}
443  */
444 
445 	origkva = (vm_offset_t) bp->b_savekva;
446 	bouncekva = (vm_offset_t) bp->b_data;
447 /*
448 	printf("free: %d ", bp->b_bufsize);
449 */
450 
451 /*
452  * check every page in the kva space for b_addr
453  */
454 	for (i = 0; i < bp->b_bufsize; ) {
455 		vm_offset_t mybouncepa;
456 		vm_offset_t copycount;
457 
458 		copycount = round_page(bouncekva + 1) - bouncekva;
459 		mybouncepa = pmap_kextract(trunc_page(bouncekva));
460 
461 /*
462  * if this is a bounced pa, then process as one
463  */
464 		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
465 			vm_offset_t tocopy = copycount;
466 			if (i + tocopy > bp->b_bufsize)
467 				tocopy = bp->b_bufsize - i;
468 /*
469  * if this is a read, then copy from bounce buffer into original buffer
470  */
471 			if (bp->b_flags & B_READ)
472 				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
473 /*
474  * free the bounce allocation
475  */
476 
477 /*
478 			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
479 */
480 			vm_bounce_page_free(mybouncepa, 1);
481 		}
482 
483 		origkva += copycount;
484 		bouncekva += copycount;
485 		i += copycount;
486 	}
487 
488 /*
489 	printf("\n");
490 */
491 /*
492  * add the old kva into the "to free" list
493  */
494 
495 	bouncekva= trunc_page((vm_offset_t) bp->b_data);
496 	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
497 
498 /*
499 	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
500 */
501 	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
502 	bp->b_data = bp->b_savekva;
503 	bp->b_savekva = 0;
504 	bp->b_flags &= ~B_BOUNCE;
505 
506 	return;
507 }
508 
509 
510 /*
511  * init the bounce buffer system
512  */
513 void
514 vm_bounce_init()
515 {
516 	int i;
517 
518 	kvasfreecnt = 0;
519 
520 	if (bouncepages == 0)
521 		return;
522 
523 	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
524 	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
525 
526 	if (!bounceallocarray)
527 		panic("Cannot allocate bounce resource array");
528 
529 	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
530 	if (!bouncepa)
531 		panic("Cannot allocate physical memory array");
532 
533 	for(i=0;i<bounceallocarraysize;i++) {
534 		bounceallocarray[i] = 0xffffffff;
535 	}
536 
537 	for(i=0;i<bouncepages;i++) {
538 		vm_offset_t pa;
539 		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG)
540 			panic("bounce memory out of range");
541 		if( pa == 0)
542 			panic("bounce memory not resident");
543 		bouncepa[i] = pa;
544 		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
545 	}
546 	bouncefree = bouncepages;
547 
548 }
549 #endif /* BOUNCE_BUFFERS */
550 
551 /*
552  * quick version of vm_fault
553  */
554 void
555 vm_fault_quick(v, prot)
556 	caddr_t v;
557 	int prot;
558 {
559 	if (prot & VM_PROT_WRITE)
560 		subyte(v, fubyte(v));
561 	else
562 		fubyte(v);
563 }
564 
565 /*
566  * Finish a fork operation, with process p2 nearly set up.
567  * Copy and update the pcb, set up the stack so that the child
568  * ready to run and return to user mode.
569  */
570 void
571 cpu_fork(p1, p2)
572 	register struct proc *p1, *p2;
573 {
574 	struct pcb *pcb2 = &p2->p_addr->u_pcb;
575 
576 	/* Ensure that p1's pcb is up to date. */
577 	if (npxproc == p1)
578 		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
579 
580 	/* Copy p1's pcb. */
581 	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
582 
583 	/*
584 	 * Create a new fresh stack for the new process.
585 	 * Copy the trap frame for the return to user mode as if from a
586 	 * syscall.  This copies the user mode register values.
587 	 */
588 	p2->p_md.md_regs = (struct trapframe *)
589 			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
590 	*p2->p_md.md_regs = *p1->p_md.md_regs;
591 
592 	/*
593 	 * Set registers for trampoline to user mode.  Leave space for the
594 	 * return address on stack.  These are the kernel mode register values.
595 	 */
596 	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
597 	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
598 	pcb2->pcb_esi = (int)fork_return;
599 	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
600 	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
601 	pcb2->pcb_ebx = (int)p2;
602 	pcb2->pcb_eip = (int)fork_trampoline;
603 	/*
604 	 * pcb2->pcb_ldt:	duplicated below, if necessary.
605 	 * pcb2->pcb_ldt_len:	cloned above.
606 	 * pcb2->pcb_savefpu:	cloned above.
607 	 * pcb2->pcb_flags:	cloned above (always 0 here?).
608 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
609 	 */
610 
611 #ifdef USER_LDT
612         /* Copy the LDT, if necessary. */
613         if (pcb2->pcb_ldt != 0) {
614                 union descriptor *new_ldt;
615                 size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
616 
617                 new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
618                 bcopy(pcb2->pcb_ldt, new_ldt, len);
619                 pcb2->pcb_ldt = (caddr_t)new_ldt;
620         }
621 #endif
622 
623 	/*
624 	 * Now, cpu_switch() can schedule the new process.
625 	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
626 	 * containing the return address when exiting cpu_switch.
627 	 * This will normally be to proc_trampoline(), which will have
628 	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
629 	 * will set up a stack to call fork_return(p, frame); to complete
630 	 * the return to user-mode.
631 	 */
632 }
633 
634 /*
635  * Intercept the return address from a freshly forked process that has NOT
636  * been scheduled yet.
637  *
638  * This is needed to make kernel threads stay in kernel mode.
639  */
640 void
641 cpu_set_fork_handler(p, func, arg)
642 	struct proc *p;
643 	void (*func) __P((void *));
644 	void *arg;
645 {
646 	/*
647 	 * Note that the trap frame follows the args, so the function
648 	 * is really called like this:  func(arg, frame);
649 	 */
650 	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
651 	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
652 }
653 
654 void
655 cpu_exit(p)
656 	register struct proc *p;
657 {
658 #ifdef USER_LDT
659 	struct pcb *pcb;
660 #endif
661 
662 #if NNPX > 0
663 	npxexit(p);
664 #endif	/* NNPX */
665 #ifdef USER_LDT
666 	pcb = &p->p_addr->u_pcb;
667 	if (pcb->pcb_ldt != 0) {
668 		if (pcb == curpcb)
669 			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
670 		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
671 			pcb->pcb_ldt_len * sizeof(union descriptor));
672 		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
673 	}
674 #endif
675 	cnt.v_swtch++;
676 	cpu_switch(p);
677 	panic("cpu_exit");
678 }
679 
680 void
681 cpu_wait(p)
682 	struct proc *p;
683 {
684 	/* drop per-process resources */
685 	pmap_dispose_proc(p);
686 	vmspace_free(p->p_vmspace);
687 }
688 
689 /*
690  * Dump the machine specific header information at the start of a core dump.
691  */
692 int
693 cpu_coredump(p, vp, cred)
694 	struct proc *p;
695 	struct vnode *vp;
696 	struct ucred *cred;
697 {
698 
699 	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
700 	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
701 	    p));
702 }
703 
704 #ifdef notyet
705 static void
706 setredzone(pte, vaddr)
707 	u_short *pte;
708 	caddr_t vaddr;
709 {
710 /* eventually do this by setting up an expand-down stack segment
711    for ss0: selector, allowing stack access down to top of u.
712    this means though that protection violations need to be handled
713    thru a double fault exception that must do an integral task
714    switch to a known good context, within which a dump can be
715    taken. a sensible scheme might be to save the initial context
716    used by sched (that has physical memory mapped 1:1 at bottom)
717    and take the dump while still in mapped mode */
718 }
719 #endif
720 
721 /*
722  * Convert kernel VA to physical address
723  */
724 u_long
725 kvtop(void *addr)
726 {
727 	vm_offset_t va;
728 
729 	va = pmap_kextract((vm_offset_t)addr);
730 	if (va == 0)
731 		panic("kvtop: zero page frame");
732 	return((int)va);
733 }
734 
735 /*
736  * Map an IO request into kernel virtual address space.
737  *
738  * All requests are (re)mapped into kernel VA space.
739  * Notice that we use b_bufsize for the size of the buffer
740  * to be mapped.  b_bcount might be modified by the driver.
741  */
742 void
743 vmapbuf(bp)
744 	register struct buf *bp;
745 {
746 	register caddr_t addr, v, kva;
747 	vm_offset_t pa;
748 
749 	if ((bp->b_flags & B_PHYS) == 0)
750 		panic("vmapbuf");
751 
752 	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
753 	    addr < bp->b_data + bp->b_bufsize;
754 	    addr += PAGE_SIZE, v += PAGE_SIZE) {
755 		/*
756 		 * Do the vm_fault if needed; do the copy-on-write thing
757 		 * when reading stuff off device into memory.
758 		 */
759 		vm_fault_quick(addr,
760 			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
761 		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
762 		if (pa == 0)
763 			panic("vmapbuf: page not present");
764 		vm_page_hold(PHYS_TO_VM_PAGE(pa));
765 		pmap_kenter((vm_offset_t) v, pa);
766 	}
767 
768 	kva = bp->b_saveaddr;
769 	bp->b_saveaddr = bp->b_data;
770 	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
771 }
772 
773 /*
774  * Free the io map PTEs associated with this IO operation.
775  * We also invalidate the TLB entries and restore the original b_addr.
776  */
777 void
778 vunmapbuf(bp)
779 	register struct buf *bp;
780 {
781 	register caddr_t addr;
782 	vm_offset_t pa;
783 
784 	if ((bp->b_flags & B_PHYS) == 0)
785 		panic("vunmapbuf");
786 
787 	for (addr = (caddr_t)trunc_page(bp->b_data);
788 	    addr < bp->b_data + bp->b_bufsize;
789 	    addr += PAGE_SIZE) {
790 		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
791 		pmap_kremove((vm_offset_t) addr);
792 		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
793 	}
794 
795 	bp->b_data = bp->b_saveaddr;
796 }
797 
798 /*
799  * Force reset the processor by invalidating the entire address space!
800  */
801 void
802 cpu_reset() {
803 #ifdef PC98
804 	/*
805 	 * Attempt to do a CPU reset via CPU reset port.
806 	 */
807 	asm("cli");
808 	outb(0x37, 0x0f);		/* SHUT0 = 0. */
809 	outb(0x37, 0x0b);		/* SHUT1 = 0. */
810 	outb(0xf0, 0x00);		/* Reset. */
811 #else
812 	/*
813 	 * Attempt to do a CPU reset via the keyboard controller,
814 	 * do not turn of the GateA20, as any machine that fails
815 	 * to do the reset here would then end up in no man's land.
816 	 */
817 
818 #if !defined(BROKEN_KEYBOARD_RESET)
819 	outb(IO_KBD + 4, 0xFE);
820 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
821 	printf("Keyboard reset did not work, attempting CPU shutdown\n");
822 	DELAY(1000000);	/* wait 1 sec for printf to complete */
823 #endif
824 #endif /* PC98 */
825 	/* force a shutdown by unmapping entire address space ! */
826 	bzero((caddr_t) PTD, PAGE_SIZE);
827 
828 	/* "good night, sweet prince .... <THUNK!>" */
829 	invltlb();
830 	/* NOTREACHED */
831 	while(1);
832 }
833 
834 /*
835  * Grow the user stack to allow for 'sp'. This version grows the stack in
836  *	chunks of SGROWSIZ.
837  */
838 int
839 grow(p, sp)
840 	struct proc *p;
841 	u_int sp;
842 {
843 	unsigned int nss;
844 	caddr_t v;
845 	struct vmspace *vm = p->p_vmspace;
846 
847 	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
848 	    return (1);
849 
850 	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
851 
852 	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
853 		return (0);
854 
855 	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
856 	    SGROWSIZ) < nss) {
857 		int grow_amount;
858 		/*
859 		 * If necessary, grow the VM that the stack occupies
860 		 * to allow for the rlimit. This allows us to not have
861 		 * to allocate all of the VM up-front in execve (which
862 		 * is expensive).
863 		 * Grow the VM by the amount requested rounded up to
864 		 * the nearest SGROWSIZ to provide for some hysteresis.
865 		 */
866 		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
867 		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
868 		    SGROWSIZ) - grow_amount;
869 		/*
870 		 * If there isn't enough room to extend by SGROWSIZ, then
871 		 * just extend to the maximum size
872 		 */
873 		if (v < vm->vm_maxsaddr) {
874 			v = vm->vm_maxsaddr;
875 			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
876 		}
877 		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
878 		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
879 			return (0);
880 		}
881 		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
882 	}
883 
884 	return (1);
885 }
886 
887 /*
888  * Implement the pre-zeroed page mechanism.
889  * This routine is called from the idle loop.
890  */
891 int
892 vm_page_zero_idle()
893 {
894 	static int free_rover;
895 	vm_page_t m;
896 	int s;
897 
898 #ifdef WRONG
899 	if (cnt.v_free_count <= cnt.v_interrupt_free_min)
900 		return (0);
901 #endif
902 	/*
903 	 * XXX
904 	 * We stop zeroing pages when there are sufficent prezeroed pages.
905 	 * This threshold isn't really needed, except we want to
906 	 * bypass unneeded calls to vm_page_list_find, and the
907 	 * associated cache flush and latency.  The pre-zero will
908 	 * still be called when there are significantly more
909 	 * non-prezeroed pages than zeroed pages.  The threshold
910 	 * of half the number of reserved pages is arbitrary, but
911 	 * approximately the right amount.  Eventually, we should
912 	 * perhaps interrupt the zero operation when a process
913 	 * is found to be ready to run.
914 	 */
915 	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
916 		return (0);
917 	s = splvm();
918 	enable_intr();
919 	m = vm_page_list_find(PQ_FREE, free_rover);
920 	if (m != NULL) {
921 		--(*vm_page_queues[m->queue].lcnt);
922 		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
923 		splx(s);
924 		pmap_zero_page(VM_PAGE_TO_PHYS(m));
925 		(void)splvm();
926 		m->queue = PQ_ZERO + m->pc;
927 		++(*vm_page_queues[m->queue].lcnt);
928 		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
929 		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
930 		++vm_page_zero_count;
931 	}
932 	splx(s);
933 	disable_intr();
934 	return (1);
935 }
936