xref: /freebsd/sys/i386/i386/vm_machdep.c (revision 0de89efe5c443f213c7ea28773ef2dc6cf3af2ed)
1 /*-
2  * Copyright (c) 1982, 1986 The Regents of the University of California.
3  * Copyright (c) 1989, 1990 William Jolitz
4  * Copyright (c) 1994 John Dyson
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department, and William Jolitz.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41  *	$Id: vm_machdep.c,v 1.88 1997/09/10 12:31:28 joerg Exp $
42  */
43 
44 #include "npx.h"
45 #include "opt_bounce.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/proc.h>
50 #include <sys/malloc.h>
51 #include <sys/buf.h>
52 #include <sys/vnode.h>
53 #include <sys/vmmeter.h>
54 
55 #include <machine/clock.h>
56 #include <machine/cpu.h>
57 #include <machine/md_var.h>
58 
59 #include <vm/vm.h>
60 #include <vm/vm_param.h>
61 #include <vm/vm_prot.h>
62 #include <sys/lock.h>
63 #include <vm/vm_kern.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_map.h>
66 #include <vm/vm_extern.h>
67 
68 #include <sys/user.h>
69 
70 #ifdef PC98
71 #include <pc98/pc98/pc98.h>
72 #else
73 #include <i386/isa/isa.h>
74 #endif
75 
76 #ifdef BOUNCE_BUFFERS
77 static vm_offset_t
78 		vm_bounce_kva __P((int size, int waitok));
79 static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
80 					int now));
81 static vm_offset_t
82 		vm_bounce_page_find __P((int count));
83 static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
84 
85 static volatile int	kvasfreecnt;
86 
87 caddr_t		bouncememory;
88 int		bouncepages;
89 static int	bpwait;
90 static vm_offset_t	*bouncepa;
91 static int		bmwait, bmfreeing;
92 
93 #define BITS_IN_UNSIGNED (8*sizeof(unsigned))
94 static int		bounceallocarraysize;
95 static unsigned	*bounceallocarray;
96 static int		bouncefree;
97 
98 #if defined(PC98) && defined (EPSON_BOUNCEDMA)
99 #define SIXTEENMEG (3840*4096)			/* 15MB boundary */
100 #else
101 #define SIXTEENMEG (4096*4096)
102 #endif
103 #define MAXBKVA 1024
104 int		maxbkva = MAXBKVA*PAGE_SIZE;
105 
106 /* special list that can be used at interrupt time for eventual kva free */
107 static struct kvasfree {
108 	vm_offset_t addr;
109 	vm_offset_t size;
110 } kvaf[MAXBKVA];
111 
112 /*
113  * get bounce buffer pages (count physically contiguous)
114  * (only 1 inplemented now)
115  */
116 static vm_offset_t
117 vm_bounce_page_find(count)
118 	int count;
119 {
120 	int bit;
121 	int s,i;
122 
123 	if (count != 1)
124 		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
125 
126 	s = splbio();
127 retry:
128 	for (i = 0; i < bounceallocarraysize; i++) {
129 		if (bounceallocarray[i] != 0xffffffff) {
130 			bit = ffs(~bounceallocarray[i]);
131 			if (bit) {
132 				bounceallocarray[i] |= 1 << (bit - 1) ;
133 				bouncefree -= count;
134 				splx(s);
135 				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
136 			}
137 		}
138 	}
139 	bpwait = 1;
140 	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
141 	goto retry;
142 }
143 
144 static void
145 vm_bounce_kva_free(addr, size, now)
146 	vm_offset_t addr;
147 	vm_offset_t size;
148 	int now;
149 {
150 	int s = splbio();
151 	kvaf[kvasfreecnt].addr = addr;
152 	kvaf[kvasfreecnt].size = size;
153 	++kvasfreecnt;
154 	if( now) {
155 		/*
156 		 * this will do wakeups
157 		 */
158 		vm_bounce_kva(0,0);
159 	} else {
160 		if (bmwait) {
161 		/*
162 		 * if anyone is waiting on the bounce-map, then wakeup
163 		 */
164 			wakeup((caddr_t) io_map);
165 			bmwait = 0;
166 		}
167 	}
168 	splx(s);
169 }
170 
171 /*
172  * free count bounce buffer pages
173  */
174 static void
175 vm_bounce_page_free(pa, count)
176 	vm_offset_t pa;
177 	int count;
178 {
179 	int allocindex;
180 	int index;
181 	int bit;
182 
183 	if (count != 1)
184 		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
185 
186 	for(index=0;index<bouncepages;index++) {
187 		if( pa == bouncepa[index])
188 			break;
189 	}
190 
191 	if( index == bouncepages)
192 		panic("vm_bounce_page_free: invalid bounce buffer");
193 
194 	allocindex = index / BITS_IN_UNSIGNED;
195 	bit = index % BITS_IN_UNSIGNED;
196 
197 	bounceallocarray[allocindex] &= ~(1 << bit);
198 
199 	bouncefree += count;
200 	if (bpwait) {
201 		bpwait = 0;
202 		wakeup((caddr_t) &bounceallocarray);
203 	}
204 }
205 
206 /*
207  * allocate count bounce buffer kva pages
208  */
209 static vm_offset_t
210 vm_bounce_kva(size, waitok)
211 	int size;
212 	int waitok;
213 {
214 	int i;
215 	vm_offset_t kva = 0;
216 	vm_offset_t off;
217 	int s = splbio();
218 more:
219 	if (!bmfreeing && kvasfreecnt) {
220 		bmfreeing = 1;
221 		for (i = 0; i < kvasfreecnt; i++) {
222 			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
223 				pmap_kremove( kvaf[i].addr + off);
224 			}
225 			kmem_free_wakeup(io_map, kvaf[i].addr,
226 				kvaf[i].size);
227 		}
228 		kvasfreecnt = 0;
229 		bmfreeing = 0;
230 		if( bmwait) {
231 			bmwait = 0;
232 			wakeup( (caddr_t) io_map);
233 		}
234 	}
235 
236 	if( size == 0) {
237 		splx(s);
238 		return 0;
239 	}
240 
241 	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
242 		if( !waitok) {
243 			splx(s);
244 			return 0;
245 		}
246 		bmwait = 1;
247 		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
248 		goto more;
249 	}
250 	splx(s);
251 	return kva;
252 }
253 
254 /*
255  * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
256  */
257 vm_offset_t
258 vm_bounce_kva_alloc(count)
259 int count;
260 {
261 	int i;
262 	vm_offset_t kva;
263 	vm_offset_t pa;
264 	if( bouncepages == 0) {
265 		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
266 		return kva;
267 	}
268 	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
269 	for(i=0;i<count;i++) {
270 		pa = vm_bounce_page_find(1);
271 		pmap_kenter(kva + i * PAGE_SIZE, pa);
272 	}
273 	return kva;
274 }
275 
276 /*
277  * same as vm_bounce_kva_free -- but really free
278  */
279 void
280 vm_bounce_kva_alloc_free(kva, count)
281 	vm_offset_t kva;
282 	int count;
283 {
284 	int i;
285 	vm_offset_t pa;
286 	if( bouncepages == 0) {
287 		free((caddr_t) kva, M_TEMP);
288 		return;
289 	}
290 	for(i = 0; i < count; i++) {
291 		pa = pmap_kextract(kva + i * PAGE_SIZE);
292 		vm_bounce_page_free(pa, 1);
293 	}
294 	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
295 }
296 
297 /*
298  * do the things necessary to the struct buf to implement
299  * bounce buffers...  inserted before the disk sort
300  */
301 void
302 vm_bounce_alloc(bp)
303 	struct buf *bp;
304 {
305 	int countvmpg;
306 	vm_offset_t vastart, vaend;
307 	vm_offset_t vapstart, vapend;
308 	vm_offset_t va, kva;
309 	vm_offset_t pa;
310 	int dobounceflag = 0;
311 	int i;
312 
313 	if (bouncepages == 0)
314 		return;
315 
316 	if (bp->b_flags & B_BOUNCE) {
317 		printf("vm_bounce_alloc: called recursively???\n");
318 		return;
319 	}
320 
321 	if (bp->b_bufsize < bp->b_bcount) {
322 		printf(
323 		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
324 			bp->b_bufsize, bp->b_bcount);
325 		panic("vm_bounce_alloc");
326 	}
327 
328 /*
329  *  This is not really necessary
330  *	if( bp->b_bufsize != bp->b_bcount) {
331  *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
332  *	}
333  */
334 
335 
336 	vastart = (vm_offset_t) bp->b_data;
337 	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
338 
339 	vapstart = trunc_page(vastart);
340 	vapend = round_page(vaend);
341 	countvmpg = (vapend - vapstart) / PAGE_SIZE;
342 
343 /*
344  * if any page is above 16MB, then go into bounce-buffer mode
345  */
346 	va = vapstart;
347 	for (i = 0; i < countvmpg; i++) {
348 		pa = pmap_kextract(va);
349 		if (pa >= SIXTEENMEG)
350 			++dobounceflag;
351 		if( pa == 0)
352 			panic("vm_bounce_alloc: Unmapped page");
353 		va += PAGE_SIZE;
354 	}
355 	if (dobounceflag == 0)
356 		return;
357 
358 	if (bouncepages < dobounceflag)
359 		panic("Not enough bounce buffers!!!");
360 
361 /*
362  * allocate a replacement kva for b_addr
363  */
364 	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
365 #if 0
366 	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
367 		(bp->b_flags & B_READ) ? "read":"write",
368 			vapstart, vapend, countvmpg, kva);
369 #endif
370 	va = vapstart;
371 	for (i = 0; i < countvmpg; i++) {
372 		pa = pmap_kextract(va);
373 		if (pa >= SIXTEENMEG) {
374 			/*
375 			 * allocate a replacement page
376 			 */
377 			vm_offset_t bpa = vm_bounce_page_find(1);
378 			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
379 #if 0
380 			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
381 #endif
382 			/*
383 			 * if we are writing, the copy the data into the page
384 			 */
385 			if ((bp->b_flags & B_READ) == 0) {
386 				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
387 			}
388 		} else {
389 			/*
390 			 * use original page
391 			 */
392 			pmap_kenter(kva + (PAGE_SIZE * i), pa);
393 		}
394 		va += PAGE_SIZE;
395 	}
396 
397 /*
398  * flag the buffer as being bounced
399  */
400 	bp->b_flags |= B_BOUNCE;
401 /*
402  * save the original buffer kva
403  */
404 	bp->b_savekva = bp->b_data;
405 /*
406  * put our new kva into the buffer (offset by original offset)
407  */
408 	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
409 				((vm_offset_t) bp->b_savekva & PAGE_MASK));
410 #if 0
411 	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
412 #endif
413 	return;
414 }
415 
416 /*
417  * hook into biodone to free bounce buffer
418  */
419 void
420 vm_bounce_free(bp)
421 	struct buf *bp;
422 {
423 	int i;
424 	vm_offset_t origkva, bouncekva, bouncekvaend;
425 
426 /*
427  * if this isn't a bounced buffer, then just return
428  */
429 	if ((bp->b_flags & B_BOUNCE) == 0)
430 		return;
431 
432 /*
433  *  This check is not necessary
434  *	if (bp->b_bufsize != bp->b_bcount) {
435  *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
436  *			bp->b_bufsize, bp->b_bcount);
437  *	}
438  */
439 
440 	origkva = (vm_offset_t) bp->b_savekva;
441 	bouncekva = (vm_offset_t) bp->b_data;
442 /*
443 	printf("free: %d ", bp->b_bufsize);
444 */
445 
446 /*
447  * check every page in the kva space for b_addr
448  */
449 	for (i = 0; i < bp->b_bufsize; ) {
450 		vm_offset_t mybouncepa;
451 		vm_offset_t copycount;
452 
453 		copycount = round_page(bouncekva + 1) - bouncekva;
454 		mybouncepa = pmap_kextract(trunc_page(bouncekva));
455 
456 /*
457  * if this is a bounced pa, then process as one
458  */
459 		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
460 			vm_offset_t tocopy = copycount;
461 			if (i + tocopy > bp->b_bufsize)
462 				tocopy = bp->b_bufsize - i;
463 /*
464  * if this is a read, then copy from bounce buffer into original buffer
465  */
466 			if (bp->b_flags & B_READ)
467 				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
468 /*
469  * free the bounce allocation
470  */
471 
472 /*
473 			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
474 */
475 			vm_bounce_page_free(mybouncepa, 1);
476 		}
477 
478 		origkva += copycount;
479 		bouncekva += copycount;
480 		i += copycount;
481 	}
482 
483 /*
484 	printf("\n");
485 */
486 /*
487  * add the old kva into the "to free" list
488  */
489 
490 	bouncekva= trunc_page((vm_offset_t) bp->b_data);
491 	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
492 
493 /*
494 	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
495 */
496 	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
497 	bp->b_data = bp->b_savekva;
498 	bp->b_savekva = 0;
499 	bp->b_flags &= ~B_BOUNCE;
500 
501 	return;
502 }
503 
504 
505 /*
506  * init the bounce buffer system
507  */
508 void
509 vm_bounce_init()
510 {
511 	int i;
512 
513 	kvasfreecnt = 0;
514 
515 	if (bouncepages == 0)
516 		return;
517 
518 	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
519 	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
520 
521 	if (!bounceallocarray)
522 		panic("Cannot allocate bounce resource array");
523 
524 	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
525 	if (!bouncepa)
526 		panic("Cannot allocate physical memory array");
527 
528 	for(i=0;i<bounceallocarraysize;i++) {
529 		bounceallocarray[i] = 0xffffffff;
530 	}
531 
532 	for(i=0;i<bouncepages;i++) {
533 		vm_offset_t pa;
534 		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG)
535 			panic("bounce memory out of range");
536 		if( pa == 0)
537 			panic("bounce memory not resident");
538 		bouncepa[i] = pa;
539 		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
540 	}
541 	bouncefree = bouncepages;
542 
543 }
544 #endif /* BOUNCE_BUFFERS */
545 
546 /*
547  * quick version of vm_fault
548  */
549 void
550 vm_fault_quick(v, prot)
551 	caddr_t v;
552 	int prot;
553 {
554 	if (prot & VM_PROT_WRITE)
555 		subyte(v, fubyte(v));
556 	else
557 		fubyte(v);
558 }
559 
560 /*
561  * Finish a fork operation, with process p2 nearly set up.
562  * Copy and update the pcb, set up the stack so that the child
563  * ready to run and return to user mode.
564  */
565 void
566 cpu_fork(p1, p2)
567 	register struct proc *p1, *p2;
568 {
569 	struct pcb *pcb2 = &p2->p_addr->u_pcb;
570 
571 	/* Ensure that p1's pcb is up to date. */
572 	if (npxproc == p1)
573 		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
574 
575 	/* Copy p1's pcb. */
576 	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
577 
578 	/*
579 	 * Create a new fresh stack for the new process.
580 	 * Copy the trap frame for the return to user mode as if from a
581 	 * syscall.  This copies the user mode register values.
582 	 */
583 	p2->p_md.md_regs = (struct trapframe *)
584 			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
585 	*p2->p_md.md_regs = *p1->p_md.md_regs;
586 
587 	/*
588 	 * Set registers for trampoline to user mode.  Leave space for the
589 	 * return address on stack.  These are the kernel mode register values.
590 	 */
591 	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
592 	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
593 	pcb2->pcb_esi = (int)fork_return;
594 	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
595 	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
596 	pcb2->pcb_ebx = (int)p2;
597 	pcb2->pcb_eip = (int)fork_trampoline;
598 	/*
599 	 * pcb2->pcb_ldt:	duplicated below, if necessary.
600 	 * pcb2->pcb_ldt_len:	cloned above.
601 	 * pcb2->pcb_savefpu:	cloned above.
602 	 * pcb2->pcb_flags:	cloned above (always 0 here?).
603 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
604 	 */
605 
606 #ifdef VM86
607 	/*
608 	 * XXX don't copy the i/o pages.  this should probably be fixed.
609 	 */
610 	pcb2->pcb_ext = 0;
611 #endif
612 
613 #ifdef USER_LDT
614         /* Copy the LDT, if necessary. */
615         if (pcb2->pcb_ldt != 0) {
616                 union descriptor *new_ldt;
617                 size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
618 
619                 new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
620                 bcopy(pcb2->pcb_ldt, new_ldt, len);
621                 pcb2->pcb_ldt = (caddr_t)new_ldt;
622         }
623 #endif
624 
625 	/*
626 	 * Now, cpu_switch() can schedule the new process.
627 	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
628 	 * containing the return address when exiting cpu_switch.
629 	 * This will normally be to proc_trampoline(), which will have
630 	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
631 	 * will set up a stack to call fork_return(p, frame); to complete
632 	 * the return to user-mode.
633 	 */
634 }
635 
636 /*
637  * Intercept the return address from a freshly forked process that has NOT
638  * been scheduled yet.
639  *
640  * This is needed to make kernel threads stay in kernel mode.
641  */
642 void
643 cpu_set_fork_handler(p, func, arg)
644 	struct proc *p;
645 	void (*func) __P((void *));
646 	void *arg;
647 {
648 	/*
649 	 * Note that the trap frame follows the args, so the function
650 	 * is really called like this:  func(arg, frame);
651 	 */
652 	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
653 	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
654 }
655 
656 void
657 cpu_exit(p)
658 	register struct proc *p;
659 {
660 #if defined(USER_LDT) || defined(VM86)
661 	struct pcb *pcb = &p->p_addr->u_pcb;
662 #endif
663 
664 #if NNPX > 0
665 	npxexit(p);
666 #endif	/* NNPX */
667 #ifdef VM86
668 	if (pcb->pcb_ext != 0) {
669 	        /*
670 		 * XXX do we need to move the TSS off the allocated pages
671 		 * before freeing them?  (not done here)
672 		 */
673 		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
674 		    ctob(IOPAGES + 1));
675 		pcb->pcb_ext = 0;
676 	}
677 #endif
678 #ifdef USER_LDT
679 	if (pcb->pcb_ldt != 0) {
680 		if (pcb == curpcb)
681 			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
682 		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
683 			pcb->pcb_ldt_len * sizeof(union descriptor));
684 		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
685 	}
686 #endif
687 	cnt.v_swtch++;
688 	cpu_switch(p);
689 	panic("cpu_exit");
690 }
691 
692 void
693 cpu_wait(p)
694 	struct proc *p;
695 {
696 	/* drop per-process resources */
697 	pmap_dispose_proc(p);
698 	vmspace_free(p->p_vmspace);
699 }
700 
701 /*
702  * Dump the machine specific header information at the start of a core dump.
703  */
704 int
705 cpu_coredump(p, vp, cred)
706 	struct proc *p;
707 	struct vnode *vp;
708 	struct ucred *cred;
709 {
710 
711 	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
712 	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
713 	    p));
714 }
715 
716 #ifdef notyet
717 static void
718 setredzone(pte, vaddr)
719 	u_short *pte;
720 	caddr_t vaddr;
721 {
722 /* eventually do this by setting up an expand-down stack segment
723    for ss0: selector, allowing stack access down to top of u.
724    this means though that protection violations need to be handled
725    thru a double fault exception that must do an integral task
726    switch to a known good context, within which a dump can be
727    taken. a sensible scheme might be to save the initial context
728    used by sched (that has physical memory mapped 1:1 at bottom)
729    and take the dump while still in mapped mode */
730 }
731 #endif
732 
733 /*
734  * Convert kernel VA to physical address
735  */
736 u_long
737 kvtop(void *addr)
738 {
739 	vm_offset_t va;
740 
741 	va = pmap_kextract((vm_offset_t)addr);
742 	if (va == 0)
743 		panic("kvtop: zero page frame");
744 	return((int)va);
745 }
746 
747 /*
748  * Map an IO request into kernel virtual address space.
749  *
750  * All requests are (re)mapped into kernel VA space.
751  * Notice that we use b_bufsize for the size of the buffer
752  * to be mapped.  b_bcount might be modified by the driver.
753  */
754 void
755 vmapbuf(bp)
756 	register struct buf *bp;
757 {
758 	register caddr_t addr, v, kva;
759 	vm_offset_t pa;
760 
761 	if ((bp->b_flags & B_PHYS) == 0)
762 		panic("vmapbuf");
763 
764 	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
765 	    addr < bp->b_data + bp->b_bufsize;
766 	    addr += PAGE_SIZE, v += PAGE_SIZE) {
767 		/*
768 		 * Do the vm_fault if needed; do the copy-on-write thing
769 		 * when reading stuff off device into memory.
770 		 */
771 		vm_fault_quick(addr,
772 			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
773 		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
774 		if (pa == 0)
775 			panic("vmapbuf: page not present");
776 		vm_page_hold(PHYS_TO_VM_PAGE(pa));
777 		pmap_kenter((vm_offset_t) v, pa);
778 	}
779 
780 	kva = bp->b_saveaddr;
781 	bp->b_saveaddr = bp->b_data;
782 	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
783 }
784 
785 /*
786  * Free the io map PTEs associated with this IO operation.
787  * We also invalidate the TLB entries and restore the original b_addr.
788  */
789 void
790 vunmapbuf(bp)
791 	register struct buf *bp;
792 {
793 	register caddr_t addr;
794 	vm_offset_t pa;
795 
796 	if ((bp->b_flags & B_PHYS) == 0)
797 		panic("vunmapbuf");
798 
799 	for (addr = (caddr_t)trunc_page(bp->b_data);
800 	    addr < bp->b_data + bp->b_bufsize;
801 	    addr += PAGE_SIZE) {
802 		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
803 		pmap_kremove((vm_offset_t) addr);
804 		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
805 	}
806 
807 	bp->b_data = bp->b_saveaddr;
808 }
809 
810 /*
811  * Force reset the processor by invalidating the entire address space!
812  */
813 void
814 cpu_reset() {
815 #ifdef PC98
816 	/*
817 	 * Attempt to do a CPU reset via CPU reset port.
818 	 */
819 	asm("cli");
820 	outb(0x37, 0x0f);		/* SHUT0 = 0. */
821 	outb(0x37, 0x0b);		/* SHUT1 = 0. */
822 	outb(0xf0, 0x00);		/* Reset. */
823 #else
824 	/*
825 	 * Attempt to do a CPU reset via the keyboard controller,
826 	 * do not turn of the GateA20, as any machine that fails
827 	 * to do the reset here would then end up in no man's land.
828 	 */
829 
830 #if !defined(BROKEN_KEYBOARD_RESET)
831 	outb(IO_KBD + 4, 0xFE);
832 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
833 	printf("Keyboard reset did not work, attempting CPU shutdown\n");
834 	DELAY(1000000);	/* wait 1 sec for printf to complete */
835 #endif
836 #endif /* PC98 */
837 	/* force a shutdown by unmapping entire address space ! */
838 	bzero((caddr_t) PTD, PAGE_SIZE);
839 
840 	/* "good night, sweet prince .... <THUNK!>" */
841 	invltlb();
842 	/* NOTREACHED */
843 	while(1);
844 }
845 
846 /*
847  * Grow the user stack to allow for 'sp'. This version grows the stack in
848  *	chunks of SGROWSIZ.
849  */
850 int
851 grow(p, sp)
852 	struct proc *p;
853 	u_int sp;
854 {
855 	unsigned int nss;
856 	caddr_t v;
857 	struct vmspace *vm = p->p_vmspace;
858 
859 	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
860 	    return (1);
861 
862 	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
863 
864 	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
865 		return (0);
866 
867 	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
868 	    SGROWSIZ) < nss) {
869 		int grow_amount;
870 		/*
871 		 * If necessary, grow the VM that the stack occupies
872 		 * to allow for the rlimit. This allows us to not have
873 		 * to allocate all of the VM up-front in execve (which
874 		 * is expensive).
875 		 * Grow the VM by the amount requested rounded up to
876 		 * the nearest SGROWSIZ to provide for some hysteresis.
877 		 */
878 		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
879 		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
880 		    SGROWSIZ) - grow_amount;
881 		/*
882 		 * If there isn't enough room to extend by SGROWSIZ, then
883 		 * just extend to the maximum size
884 		 */
885 		if (v < vm->vm_maxsaddr) {
886 			v = vm->vm_maxsaddr;
887 			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
888 		}
889 		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
890 		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
891 			return (0);
892 		}
893 		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
894 	}
895 
896 	return (1);
897 }
898 
899 /*
900  * Implement the pre-zeroed page mechanism.
901  * This routine is called from the idle loop.
902  */
903 int
904 vm_page_zero_idle()
905 {
906 	static int free_rover;
907 	vm_page_t m;
908 	int s;
909 
910 #ifdef WRONG
911 	if (cnt.v_free_count <= cnt.v_interrupt_free_min)
912 		return (0);
913 #endif
914 	/*
915 	 * XXX
916 	 * We stop zeroing pages when there are sufficent prezeroed pages.
917 	 * This threshold isn't really needed, except we want to
918 	 * bypass unneeded calls to vm_page_list_find, and the
919 	 * associated cache flush and latency.  The pre-zero will
920 	 * still be called when there are significantly more
921 	 * non-prezeroed pages than zeroed pages.  The threshold
922 	 * of half the number of reserved pages is arbitrary, but
923 	 * approximately the right amount.  Eventually, we should
924 	 * perhaps interrupt the zero operation when a process
925 	 * is found to be ready to run.
926 	 */
927 	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
928 		return (0);
929 #ifdef SMP
930 	get_mplock();
931 #endif
932 	s = splvm();
933 	enable_intr();
934 	m = vm_page_list_find(PQ_FREE, free_rover);
935 	if (m != NULL) {
936 		--(*vm_page_queues[m->queue].lcnt);
937 		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
938 		splx(s);
939 #ifdef SMP
940 		rel_mplock();
941 #endif
942 		pmap_zero_page(VM_PAGE_TO_PHYS(m));
943 #ifdef SMP
944 		get_mplock();
945 #endif
946 		(void)splvm();
947 		m->queue = PQ_ZERO + m->pc;
948 		++(*vm_page_queues[m->queue].lcnt);
949 		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
950 		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
951 		++vm_page_zero_count;
952 	}
953 	splx(s);
954 	disable_intr();
955 #ifdef SMP
956 	rel_mplock();
957 #endif
958 	return (1);
959 }
960 
961 /*
962  * Tell whether this address is in some physical memory region.
963  * Currently used by the kernel coredump code in order to avoid
964  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
965  * or other unpredictable behaviour.
966  */
967 
968 #include "isa.h"
969 
970 int
971 is_physical_memory(addr)
972 	vm_offset_t addr;
973 {
974 
975 #if NISA > 0
976 	/* The ISA ``memory hole''. */
977 	if (addr >= 0xa0000 && addr < 0x100000)
978 		return 0;
979 #endif
980 
981 	/*
982 	 * stuff other tests for known memory-mapped devices (PCI?)
983 	 * here
984 	 */
985 
986 	return 1;
987 }
988