xref: /freebsd/sys/amd64/amd64/mem.c (revision 6e8394b8baa7d5d9153ab90de6824bcd19b3b4e1)
1 /*-
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department, and code derived from software contributed to
9  * Berkeley by William Jolitz.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  *	from: Utah $Hdr: mem.c 1.13 89/10/08$
40  *	from: @(#)mem.c	7.2 (Berkeley) 5/9/91
41  *	$Id: mem.c,v 1.62 1999/05/30 16:52:04 phk Exp $
42  */
43 
44 /*
45  * Memory special file
46  */
47 
48 #include "opt_devfs.h"
49 #include "opt_perfmon.h"
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/conf.h>
54 #include <sys/buf.h>
55 #ifdef DEVFS
56 #include <sys/devfsext.h>
57 #endif /* DEVFS */
58 #include <sys/kernel.h>
59 #include <sys/uio.h>
60 #include <sys/ioccom.h>
61 #include <sys/malloc.h>
62 #include <sys/memrange.h>
63 #include <sys/proc.h>
64 #include <sys/signalvar.h>
65 
66 #include <machine/frame.h>
67 #include <machine/md_var.h>
68 #include <machine/random.h>
69 #include <machine/psl.h>
70 #include <machine/specialreg.h>
71 #ifdef PERFMON
72 #include <machine/perfmon.h>
73 #endif
74 #include <i386/isa/intr_machdep.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_prot.h>
78 #include <vm/pmap.h>
79 #include <vm/vm_extern.h>
80 
81 
82 static	d_open_t	mmopen;
83 static	d_close_t	mmclose;
84 static	d_read_t	mmrw;
85 static	d_ioctl_t	mmioctl;
86 static	d_mmap_t	memmmap;
87 static	d_poll_t	mmpoll;
88 
89 #define CDEV_MAJOR 2
90 static struct cdevsw mem_cdevsw = {
91 	/* open */	mmopen,
92 	/* close */	mmclose,
93 	/* read */	mmrw,
94 	/* write */	mmrw,
95 	/* ioctl */	mmioctl,
96 	/* stop */	nostop,
97 	/* reset */	noreset,
98 	/* devtotty */	nodevtotty,
99 	/* poll */	mmpoll,
100 	/* mmap */	memmmap,
101 	/* strategy */	nostrategy,
102 	/* name */	"mem",
103 	/* parms */	noparms,
104 	/* maj */	CDEV_MAJOR,
105 	/* dump */	nodump,
106 	/* psize */	nopsize,
107 	/* flags */	0,
108 	/* maxio */	0,
109 	/* bmaj */	-1
110 };
111 
112 static struct random_softc random_softc[16];
113 static caddr_t	zbuf;
114 
115 MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
116 static int mem_ioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
117 static int random_ioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
118 
119 struct mem_range_softc mem_range_softc;
120 
121 #ifdef DEVFS
122 static void *mem_devfs_token;
123 static void *kmem_devfs_token;
124 static void *null_devfs_token;
125 static void *random_devfs_token;
126 static void *urandom_devfs_token;
127 static void *zero_devfs_token;
128 static void *io_devfs_token;
129 #ifdef PERFMON
130 static void *perfmon_devfs_token;
131 #endif
132 
133 static void memdevfs_init __P((void));
134 
135 static void
136 memdevfs_init()
137 {
138     mem_devfs_token =
139 	devfs_add_devswf(&mem_cdevsw, 0, DV_CHR,
140 			 UID_ROOT, GID_KMEM, 0640, "mem");
141     kmem_devfs_token =
142 	devfs_add_devswf(&mem_cdevsw, 1, DV_CHR,
143 			 UID_ROOT, GID_KMEM, 0640, "kmem");
144     null_devfs_token =
145 	devfs_add_devswf(&mem_cdevsw, 2, DV_CHR,
146 			 UID_ROOT, GID_WHEEL, 0666, "null");
147     random_devfs_token =
148 	devfs_add_devswf(&mem_cdevsw, 3, DV_CHR,
149 			 UID_ROOT, GID_WHEEL, 0644, "random");
150     urandom_devfs_token =
151 	devfs_add_devswf(&mem_cdevsw, 4, DV_CHR,
152 			 UID_ROOT, GID_WHEEL, 0644, "urandom");
153     zero_devfs_token =
154 	devfs_add_devswf(&mem_cdevsw, 12, DV_CHR,
155 			 UID_ROOT, GID_WHEEL, 0666, "zero");
156     io_devfs_token =
157 	devfs_add_devswf(&mem_cdevsw, 14, DV_CHR,
158 			 UID_ROOT, GID_WHEEL, 0600, "io");
159 #ifdef PERFMON
160     perfmon_devfs_token =
161 	devfs_add_devswf(&mem_cdevsw, 32, DV_CHR,
162 			 UID_ROOT, GID_KMEM, 0640, "perfmon");
163 #endif /* PERFMON */
164 }
165 #endif /* DEVFS */
166 
167 static int
168 mmclose(dev, flags, fmt, p)
169 	dev_t dev;
170 	int flags;
171 	int fmt;
172 	struct proc *p;
173 {
174 	switch (minor(dev)) {
175 #ifdef PERFMON
176 	case 32:
177 		return perfmon_close(dev, flags, fmt, p);
178 #endif
179 	case 14:
180 		curproc->p_md.md_regs->tf_eflags &= ~PSL_IOPL;
181 		break;
182 	default:
183 		break;
184 	}
185 	return(0);
186 }
187 
188 static int
189 mmopen(dev, flags, fmt, p)
190 	dev_t dev;
191 	int flags;
192 	int fmt;
193 	struct proc *p;
194 {
195 	int error;
196 
197 	switch (minor(dev)) {
198 	case 32:
199 #ifdef PERFMON
200 		return perfmon_open(dev, flags, fmt, p);
201 #else
202 		return ENODEV;
203 #endif
204 	case 14:
205 		error = suser(p);
206 		if (error != 0)
207 			return (error);
208 		if (securelevel > 0)
209 			return (EPERM);
210 		curproc->p_md.md_regs->tf_eflags |= PSL_IOPL;
211 		break;
212 	default:
213 		break;
214 	}
215 	return(0);
216 }
217 
218 static int
219 mmrw(dev, uio, flags)
220 	dev_t dev;
221 	struct uio *uio;
222 	int flags;
223 {
224 	register int o;
225 	register u_int c, v;
226 	u_int poolsize;
227 	register struct iovec *iov;
228 	int error = 0;
229 	caddr_t buf = NULL;
230 
231 	while (uio->uio_resid > 0 && error == 0) {
232 		iov = uio->uio_iov;
233 		if (iov->iov_len == 0) {
234 			uio->uio_iov++;
235 			uio->uio_iovcnt--;
236 			if (uio->uio_iovcnt < 0)
237 				panic("mmrw");
238 			continue;
239 		}
240 		switch (minor(dev)) {
241 
242 /* minor device 0 is physical memory */
243 		case 0:
244 			v = uio->uio_offset;
245 			pmap_enter(kernel_pmap, (vm_offset_t)ptvmmap, v,
246 				uio->uio_rw == UIO_READ ? VM_PROT_READ : VM_PROT_WRITE,
247 				TRUE);
248 			o = (int)uio->uio_offset & PAGE_MASK;
249 			c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK));
250 			c = min(c, (u_int)(PAGE_SIZE - o));
251 			c = min(c, (u_int)iov->iov_len);
252 			error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
253 			pmap_remove(kernel_pmap, (vm_offset_t)ptvmmap,
254 				    (vm_offset_t)&ptvmmap[PAGE_SIZE]);
255 			continue;
256 
257 /* minor device 1 is kernel memory */
258 		case 1: {
259 			vm_offset_t addr, eaddr;
260 			c = iov->iov_len;
261 
262 			/*
263 			 * Make sure that all of the pages are currently resident so
264 			 * that we don't create any zero-fill pages.
265 			 */
266 			addr = trunc_page(uio->uio_offset);
267 			eaddr = round_page(uio->uio_offset + c);
268 
269 			if (addr < (vm_offset_t)VADDR(PTDPTDI, 0))
270 				return EFAULT;
271 			if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0))
272 				return EFAULT;
273 			for (; addr < eaddr; addr += PAGE_SIZE)
274 				if (pmap_extract(kernel_pmap, addr) == 0)
275 					return EFAULT;
276 
277 			if (!kernacc((caddr_t)(int)uio->uio_offset, c,
278 			    uio->uio_rw == UIO_READ ? B_READ : B_WRITE))
279 				return(EFAULT);
280 			error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio);
281 			continue;
282 		}
283 
284 /* minor device 2 is EOF/RATHOLE */
285 		case 2:
286 			if (uio->uio_rw == UIO_READ)
287 				return (0);
288 			c = iov->iov_len;
289 			break;
290 
291 /* minor device 3 (/dev/random) is source of filth on read, rathole on write */
292 		case 3:
293 			if (uio->uio_rw == UIO_WRITE) {
294 				c = iov->iov_len;
295 				break;
296 			}
297 			if (buf == NULL)
298 				buf = (caddr_t)
299 				    malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
300 			c = min(iov->iov_len, PAGE_SIZE);
301 			poolsize = read_random(buf, c);
302 			if (poolsize == 0) {
303 				if (buf)
304 					free(buf, M_TEMP);
305 				return (0);
306 			}
307 			c = min(c, poolsize);
308 			error = uiomove(buf, (int)c, uio);
309 			continue;
310 
311 /* minor device 4 (/dev/urandom) is source of muck on read, rathole on write */
312 		case 4:
313 			if (uio->uio_rw == UIO_WRITE) {
314 				c = iov->iov_len;
315 				break;
316 			}
317 			if (CURSIG(curproc) != 0) {
318 				/*
319 				 * Use tsleep() to get the error code right.
320 				 * It should return immediately.
321 				 */
322 				error = tsleep(&random_softc[0],
323 				    PZERO | PCATCH, "urand", 1);
324 				if (error != 0 && error != EWOULDBLOCK)
325 					continue;
326 			}
327 			if (buf == NULL)
328 				buf = (caddr_t)
329 				    malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
330 			c = min(iov->iov_len, PAGE_SIZE);
331 			poolsize = read_random_unlimited(buf, c);
332 			c = min(c, poolsize);
333 			error = uiomove(buf, (int)c, uio);
334 			continue;
335 
336 /* minor device 12 (/dev/zero) is source of nulls on read, rathole on write */
337 		case 12:
338 			if (uio->uio_rw == UIO_WRITE) {
339 				c = iov->iov_len;
340 				break;
341 			}
342 			if (zbuf == NULL) {
343 				zbuf = (caddr_t)
344 				    malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
345 				bzero(zbuf, PAGE_SIZE);
346 			}
347 			c = min(iov->iov_len, PAGE_SIZE);
348 			error = uiomove(zbuf, (int)c, uio);
349 			continue;
350 
351 #ifdef notyet
352 /* 386 I/O address space (/dev/ioport[bwl]) is a read/write access to seperate
353    i/o device address bus, different than memory bus. Semantics here are
354    very different than ordinary read/write, as if iov_len is a multiple
355    an implied string move from a single port will be done. Note that lseek
356    must be used to set the port number reliably. */
357 		case 14:
358 			if (iov->iov_len == 1) {
359 				u_char tmp;
360 				tmp = inb(uio->uio_offset);
361 				error = uiomove (&tmp, iov->iov_len, uio);
362 			} else {
363 				if (!useracc((caddr_t)iov->iov_base,
364 					iov->iov_len, uio->uio_rw))
365 					return (EFAULT);
366 				insb(uio->uio_offset, iov->iov_base,
367 					iov->iov_len);
368 			}
369 			break;
370 		case 15:
371 			if (iov->iov_len == sizeof (short)) {
372 				u_short tmp;
373 				tmp = inw(uio->uio_offset);
374 				error = uiomove (&tmp, iov->iov_len, uio);
375 			} else {
376 				if (!useracc((caddr_t)iov->iov_base,
377 					iov->iov_len, uio->uio_rw))
378 					return (EFAULT);
379 				insw(uio->uio_offset, iov->iov_base,
380 					iov->iov_len/ sizeof (short));
381 			}
382 			break;
383 		case 16:
384 			if (iov->iov_len == sizeof (long)) {
385 				u_long tmp;
386 				tmp = inl(uio->uio_offset);
387 				error = uiomove (&tmp, iov->iov_len, uio);
388 			} else {
389 				if (!useracc((caddr_t)iov->iov_base,
390 					iov->iov_len, uio->uio_rw))
391 					return (EFAULT);
392 				insl(uio->uio_offset, iov->iov_base,
393 					iov->iov_len/ sizeof (long));
394 			}
395 			break;
396 #endif
397 
398 		default:
399 			return (ENXIO);
400 		}
401 		if (error)
402 			break;
403 		iov->iov_base += c;
404 		iov->iov_len -= c;
405 		uio->uio_offset += c;
406 		uio->uio_resid -= c;
407 	}
408 	if (buf)
409 		free(buf, M_TEMP);
410 	return (error);
411 }
412 
413 
414 
415 
416 /*******************************************************\
417 * allow user processes to MMAP some memory sections	*
418 * instead of going through read/write			*
419 \*******************************************************/
420 static int
421 memmmap(dev_t dev, vm_offset_t offset, int nprot)
422 {
423 	switch (minor(dev))
424 	{
425 
426 /* minor device 0 is physical memory */
427 	case 0:
428         	return i386_btop(offset);
429 
430 /* minor device 1 is kernel memory */
431 	case 1:
432         	return i386_btop(vtophys(offset));
433 
434 	default:
435 		return -1;
436 	}
437 }
438 
439 static int
440 mmioctl(dev, cmd, data, flags, p)
441 	dev_t dev;
442 	u_long cmd;
443 	caddr_t data;
444 	int flags;
445 	struct proc *p;
446 {
447 
448 	switch (minor(dev)) {
449 	case 0:
450 		return mem_ioctl(dev, cmd, data, flags, p);
451 	case 3:
452 	case 4:
453 		return random_ioctl(dev, cmd, data, flags, p);
454 #ifdef PERFMON
455 	case 32:
456 		return perfmon_ioctl(dev, cmd, data, flags, p);
457 #endif
458 	}
459 	return (ENODEV);
460 }
461 
462 /*
463  * Operations for changing memory attributes.
464  *
465  * This is basically just an ioctl shim for mem_range_attr_get
466  * and mem_range_attr_set.
467  */
468 static int
469 mem_ioctl(dev, cmd, data, flags, p)
470 	dev_t dev;
471 	u_long cmd;
472 	caddr_t data;
473 	int flags;
474 	struct proc *p;
475 {
476 	int nd, error = 0;
477 	struct mem_range_op *mo = (struct mem_range_op *)data;
478 	struct mem_range_desc *md;
479 
480 	/* is this for us? */
481 	if ((cmd != MEMRANGE_GET) &&
482 	    (cmd != MEMRANGE_SET))
483 		return(ENODEV);
484 
485 	/* any chance we can handle this? */
486 	if (mem_range_softc.mr_op == NULL)
487 		return(EOPNOTSUPP);
488 
489 	/* do we have any descriptors? */
490 	if (mem_range_softc.mr_ndesc == 0)
491 		return(ENXIO);
492 
493 	switch(cmd) {
494 	case MEMRANGE_GET:
495 		nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc);
496 		if (nd > 0) {
497 			md = (struct mem_range_desc *)
498 				malloc(nd * sizeof(struct mem_range_desc),
499 				       M_MEMDESC, M_WAITOK);
500 			mem_range_attr_get(md, &nd);
501 			error = copyout(md, mo->mo_desc,
502 					nd * sizeof(struct mem_range_desc));
503 			free(md, M_MEMDESC);
504 		} else {
505 			nd = mem_range_softc.mr_ndesc;
506 		}
507 		mo->mo_arg[0] = nd;
508 		break;
509 
510 	case MEMRANGE_SET:
511 		md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc),
512 						    M_MEMDESC, M_WAITOK);
513 		error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc));
514 		/* clamp description string */
515 		md->mr_owner[sizeof(md->mr_owner) - 1] = 0;
516 		if (error == 0)
517 			error = mem_range_attr_set(md, &mo->mo_arg[0]);
518 		free(md, M_MEMDESC);
519 		break;
520 
521 	default:
522 		error = EOPNOTSUPP;
523 	}
524 	return(error);
525 }
526 
527 /*
528  * Implementation-neutral, kernel-callable functions for manipulating
529  * memory range attributes.
530  */
531 void
532 mem_range_attr_get(struct mem_range_desc *mrd, int *arg)
533 {
534 	if (*arg == 0) {
535 		*arg = mem_range_softc.mr_ndesc;
536 	} else {
537 		bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc));
538 	}
539 }
540 
541 int
542 mem_range_attr_set(struct mem_range_desc *mrd, int *arg)
543 {
544 	return(mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg));
545 }
546 
547 #ifdef SMP
548 void
549 mem_range_AP_init(void)
550 {
551 	if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
552 		return(mem_range_softc.mr_op->initAP(&mem_range_softc));
553 }
554 #endif
555 
556 static int
557 random_ioctl(dev, cmd, data, flags, p)
558 	dev_t dev;
559 	u_long cmd;
560 	caddr_t data;
561 	int flags;
562 	struct proc *p;
563 {
564 	static intrmask_t interrupt_allowed;
565 	intrmask_t interrupt_mask;
566 	int error, intr;
567 	struct random_softc *sc;
568 
569 	/*
570 	 * We're the random or urandom device.  The only ioctls are for
571 	 * selecting and inspecting which interrupts are used in the muck
572 	 * gathering business.
573 	 */
574 	if (cmd != MEM_SETIRQ && cmd != MEM_CLEARIRQ && cmd != MEM_RETURNIRQ)
575 		return (ENOTTY);
576 
577 	/*
578 	 * Even inspecting the state is privileged, since it gives a hint
579 	 * about how easily the randomness might be guessed.
580 	 */
581 	error = suser(p);
582 	if (error != 0)
583 		return (error);
584 
585 	/*
586 	 * XXX the data is 16-bit due to a historical botch, so we use
587 	 * magic 16's instead of ICU_LEN and can't support 24 interrupts
588 	 * under SMP.
589 	 */
590 	intr = *(int16_t *)data;
591 	if (cmd != MEM_RETURNIRQ && (intr < 0 || intr >= 16))
592 		return (EINVAL);
593 
594 	interrupt_mask = 1 << intr;
595 	sc = &random_softc[intr];
596 	switch (cmd) {
597 	case MEM_SETIRQ:
598 		if (interrupt_allowed & interrupt_mask)
599 			break;
600 		interrupt_allowed |= interrupt_mask;
601 		sc->sc_intr = intr;
602 		disable_intr();
603 		sc->sc_handler = intr_handler[intr];
604 		intr_handler[intr] = add_interrupt_randomness;
605 		sc->sc_arg = intr_unit[intr];
606 		intr_unit[intr] = sc;
607 		enable_intr();
608 		break;
609 	case MEM_CLEARIRQ:
610 		if (!(interrupt_allowed & interrupt_mask))
611 			break;
612 		interrupt_allowed &= ~interrupt_mask;
613 		disable_intr();
614 		intr_handler[intr] = sc->sc_handler;
615 		intr_unit[intr] = sc->sc_arg;
616 		enable_intr();
617 		break;
618 	case MEM_RETURNIRQ:
619 		*(u_int16_t *)data = interrupt_allowed;
620 		break;
621 	default:
622 		return (ENOTTY);
623 	}
624 	return (0);
625 }
626 
627 int
628 mmpoll(dev, events, p)
629 	dev_t dev;
630 	int events;
631 	struct proc *p;
632 {
633 	switch (minor(dev)) {
634 	case 3:		/* /dev/random */
635 		return random_poll(dev, events, p);
636 	case 4:		/* /dev/urandom */
637 	default:
638 		return seltrue(dev, events, p);
639 	}
640 }
641 
642 /*
643  * Routine that identifies /dev/mem and /dev/kmem.
644  *
645  * A minimal stub routine can always return 0.
646  */
647 int
648 iskmemdev(dev)
649 	dev_t dev;
650 {
651 
652 	return ((major(dev) == mem_cdevsw.d_maj)
653 	      && (minor(dev) == 0 || minor(dev) == 1));
654 }
655 
656 int
657 iszerodev(dev)
658 	dev_t dev;
659 {
660 	return ((major(dev) == mem_cdevsw.d_maj)
661 	  && minor(dev) == 12);
662 }
663 
664 
665 
666 static int mem_devsw_installed;
667 
668 static void
669 mem_drvinit(void *unused)
670 {
671 
672 	/* Initialise memory range handling */
673 	if (mem_range_softc.mr_op != NULL)
674 		mem_range_softc.mr_op->init(&mem_range_softc);
675 
676 	/* device registration */
677 	if( ! mem_devsw_installed ) {
678 		cdevsw_add(&mem_cdevsw);
679 		mem_devsw_installed = 1;
680 #ifdef DEVFS
681 		memdevfs_init();
682 #endif
683 	}
684 }
685 
686 SYSINIT(memdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,mem_drvinit,NULL)
687 
688