xref: /illumos-gate/usr/src/uts/common/io/mem.c (revision 24fe0b3bf671e123467ce1df0b67cadd3614c8e4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Memory special file
28  */
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/vm.h>
37 #include <sys/uio.h>
38 #include <sys/mman.h>
39 #include <sys/kmem.h>
40 #include <vm/seg.h>
41 #include <vm/page.h>
42 #include <sys/stat.h>
43 #include <sys/vmem.h>
44 #include <sys/memlist.h>
45 #include <sys/bootconf.h>
46 
47 #include <vm/seg_vn.h>
48 #include <vm/seg_dev.h>
49 #include <vm/seg_kmem.h>
50 #include <vm/seg_kp.h>
51 #include <vm/seg_kpm.h>
52 #include <vm/hat.h>
53 
54 #include <sys/conf.h>
55 #include <sys/mem.h>
56 #include <sys/types.h>
57 #include <sys/conf.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/errno.h>
61 #include <sys/modctl.h>
62 #include <sys/memlist.h>
63 #include <sys/ddi.h>
64 #include <sys/sunddi.h>
65 #include <sys/debug.h>
66 #include <sys/fm/protocol.h>
67 
68 #if defined(__sparc)
69 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
70 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
71     uint64_t *, int *, int *, int *);
72 extern size_t cpu_get_name_bufsize(void);
73 extern int cpu_get_mem_sid(char *, char *, int, int *);
74 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
75 #elif defined(__x86)
76 #include <sys/cpu_module.h>
77 #endif	/* __sparc */
78 
79 /*
80  * Turn a byte length into a pagecount.  The DDI btop takes a
81  * 32-bit size on 32-bit machines, this handles 64-bit sizes for
82  * large physical-memory 32-bit machines.
83  */
84 #define	BTOP(x)	((pgcnt_t)((x) >> _pageshift))
85 
86 static kmutex_t mm_lock;
87 static caddr_t mm_map;
88 
89 static dev_info_t *mm_dip;	/* private copy of devinfo pointer */
90 
91 static int mm_kmem_io_access;
92 
93 static int mm_kstat_update(kstat_t *ksp, int rw);
94 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
95 
96 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name);
97 
98 /*ARGSUSED1*/
99 static int
100 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
101 {
102 	int i;
103 	struct mem_minor {
104 		char *name;
105 		minor_t minor;
106 		int privonly;
107 		const char *rdpriv;
108 		const char *wrpriv;
109 		mode_t priv_mode;
110 	} mm[] = {
111 		{ "mem",	M_MEM,		0,	NULL,	"all",	0640 },
112 		{ "kmem",	M_KMEM,		0,	NULL,	"all",	0640 },
113 		{ "allkmem",	M_ALLKMEM,	0,	"all",	"all",	0600 },
114 		{ "null",	M_NULL,	PRIVONLY_DEV,	NULL,	NULL,	0666 },
115 		{ "zero",	M_ZERO, PRIVONLY_DEV,	NULL,	NULL,	0666 },
116 	};
117 	kstat_t *ksp;
118 
119 	mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
120 	mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
121 
122 	for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
123 		if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
124 		    mm[i].minor, DDI_PSEUDO, mm[i].privonly,
125 		    mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
126 		    DDI_FAILURE) {
127 			ddi_remove_minor_node(devi, NULL);
128 			return (DDI_FAILURE);
129 		}
130 	}
131 
132 	mm_dip = devi;
133 
134 	ksp = kstat_create("mm", 0, "phys_installed", "misc",
135 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
136 	if (ksp != NULL) {
137 		ksp->ks_update = mm_kstat_update;
138 		ksp->ks_snapshot = mm_kstat_snapshot;
139 		ksp->ks_lock = &mm_lock; /* XXX - not really needed */
140 		kstat_install(ksp);
141 	}
142 
143 	mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
144 	    "kmem_io_access", 0);
145 
146 	return (DDI_SUCCESS);
147 }
148 
149 /*ARGSUSED*/
150 static int
151 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
152 {
153 	register int error;
154 
155 	switch (infocmd) {
156 	case DDI_INFO_DEVT2DEVINFO:
157 		*result = (void *)mm_dip;
158 		error = DDI_SUCCESS;
159 		break;
160 	case DDI_INFO_DEVT2INSTANCE:
161 		*result = (void *)0;
162 		error = DDI_SUCCESS;
163 		break;
164 	default:
165 		error = DDI_FAILURE;
166 	}
167 	return (error);
168 }
169 
170 /*ARGSUSED1*/
171 static int
172 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
173 {
174 	switch (getminor(*devp)) {
175 	case M_NULL:
176 	case M_ZERO:
177 	case M_MEM:
178 	case M_KMEM:
179 	case M_ALLKMEM:
180 		/* standard devices */
181 		break;
182 
183 	default:
184 		/* Unsupported or unknown type */
185 		return (EINVAL);
186 	}
187 	/* must be character device */
188 	if (typ != OTYP_CHR)
189 		return (EINVAL);
190 	return (0);
191 }
192 
193 struct pollhead	mm_pollhd;
194 
195 /*ARGSUSED*/
196 static int
197 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
198     struct pollhead **phpp)
199 {
200 	switch (getminor(dev)) {
201 	case M_NULL:
202 	case M_ZERO:
203 	case M_MEM:
204 	case M_KMEM:
205 	case M_ALLKMEM:
206 		*reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
207 		    POLLWRNORM | POLLRDBAND | POLLWRBAND);
208 		/*
209 		 * A non NULL pollhead pointer should be returned in case
210 		 * user polls for 0 events.
211 		 */
212 		*phpp = !anyyet && !*reventsp ?
213 		    &mm_pollhd : (struct pollhead *)NULL;
214 		return (0);
215 	default:
216 		/* no other devices currently support polling */
217 		return (ENXIO);
218 	}
219 }
220 
221 static int
222 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
223     char *name, caddr_t valuep, int *lengthp)
224 {
225 	/*
226 	 * implement zero size to reduce overhead (avoid two failing
227 	 * property lookups per stat).
228 	 */
229 	return (ddi_prop_op_size(dev, dip, prop_op,
230 	    flags, name, valuep, lengthp, 0));
231 }
232 
233 extern void mach_sync_icache_pa(caddr_t, size_t);
234 #pragma weak mach_sync_icache_pa
235 
236 static int
237 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
238     page_t *pp)
239 {
240 	int error = 0;
241 	int devload = 0;
242 	int is_memory = pf_is_memory(pfn);
243 	size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
244 	    (size_t)uio->uio_iov->iov_len);
245 	caddr_t va = NULL;
246 
247 	mutex_enter(&mm_lock);
248 
249 	if (is_memory && kpm_enable) {
250 		if (pp)
251 			va = hat_kpm_mapin(pp, NULL);
252 		else
253 			va = hat_kpm_mapin_pfn(pfn);
254 	}
255 
256 	if (va == NULL) {
257 		hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
258 		    (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
259 		    HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
260 		va = mm_map;
261 		devload = 1;
262 	}
263 
264 	if (!is_memory) {
265 		if (allowio) {
266 			size_t c = uio->uio_iov->iov_len;
267 
268 			if (ddi_peekpokeio(NULL, uio, rw,
269 			    (caddr_t)(uintptr_t)uio->uio_loffset, c,
270 			    sizeof (int32_t)) != DDI_SUCCESS)
271 				error = EFAULT;
272 		} else
273 			error = EIO;
274 	} else {
275 		error = uiomove(va + pageoff, nbytes, rw, uio);
276 
277 		/*
278 		 * In case this has changed executable code,
279 		 * non-coherent I-caches must be flushed.
280 		 */
281 		if (rw != UIO_READ && &mach_sync_icache_pa != NULL) {
282 			mach_sync_icache_pa((caddr_t)ptob(pfn), PAGESIZE);
283 		}
284 	}
285 
286 	if (devload)
287 		hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
288 	else if (pp)
289 		hat_kpm_mapout(pp, NULL, va);
290 	else
291 		hat_kpm_mapout_pfn(pfn);
292 
293 	mutex_exit(&mm_lock);
294 	return (error);
295 }
296 
297 static int
298 mmpagelock(struct as *as, caddr_t va)
299 {
300 	struct seg *seg;
301 	int i;
302 
303 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
304 	seg = as_segat(as, va);
305 	i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
306 	AS_LOCK_EXIT(as, &as->a_lock);
307 
308 	return (i);
309 }
310 
311 #ifdef	__sparc
312 
313 #define	NEED_LOCK_KVADDR(kva)	mmpagelock(&kas, kva)
314 
315 #else	/* __i386, __amd64 */
316 
317 #define	NEED_LOCK_KVADDR(va)	0
318 
319 #endif	/* __sparc */
320 
321 /*ARGSUSED3*/
322 static int
323 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
324 {
325 	pfn_t v;
326 	struct iovec *iov;
327 	int error = 0;
328 	size_t c;
329 	ssize_t oresid = uio->uio_resid;
330 	minor_t minor = getminor(dev);
331 
332 	while (uio->uio_resid > 0 && error == 0) {
333 		iov = uio->uio_iov;
334 		if (iov->iov_len == 0) {
335 			uio->uio_iov++;
336 			uio->uio_iovcnt--;
337 			if (uio->uio_iovcnt < 0)
338 				panic("mmrw");
339 			continue;
340 		}
341 		switch (minor) {
342 
343 		case M_MEM:
344 			memlist_read_lock();
345 			if (!address_in_memlist(phys_install,
346 			    (uint64_t)uio->uio_loffset, 1)) {
347 				memlist_read_unlock();
348 				error = EFAULT;
349 				break;
350 			}
351 			memlist_read_unlock();
352 
353 			v = BTOP((u_offset_t)uio->uio_loffset);
354 			error = mmio(uio, rw, v,
355 			    uio->uio_loffset & PAGEOFFSET, 0, NULL);
356 			break;
357 
358 		case M_KMEM:
359 		case M_ALLKMEM:
360 			{
361 			page_t **ppp = NULL;
362 			caddr_t vaddr = (caddr_t)uio->uio_offset;
363 			int try_lock = NEED_LOCK_KVADDR(vaddr);
364 			int locked = 0;
365 
366 			if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
367 				break;
368 
369 			/*
370 			 * If vaddr does not map a valid page, as_pagelock()
371 			 * will return failure. Hence we can't check the
372 			 * return value and return EFAULT here as we'd like.
373 			 * seg_kp and seg_kpm do not properly support
374 			 * as_pagelock() for this context so we avoid it
375 			 * using the try_lock set check above.  Some day when
376 			 * the kernel page locking gets redesigned all this
377 			 * muck can be cleaned up.
378 			 */
379 			if (try_lock)
380 				locked = (as_pagelock(&kas, &ppp, vaddr,
381 				    PAGESIZE, S_WRITE) == 0);
382 
383 			v = hat_getpfnum(kas.a_hat,
384 			    (caddr_t)(uintptr_t)uio->uio_loffset);
385 			if (v == PFN_INVALID) {
386 				if (locked)
387 					as_pageunlock(&kas, ppp, vaddr,
388 					    PAGESIZE, S_WRITE);
389 				error = EFAULT;
390 				break;
391 			}
392 
393 			error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
394 			    minor == M_ALLKMEM || mm_kmem_io_access,
395 			    (locked && ppp) ? *ppp : NULL);
396 			if (locked)
397 				as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
398 				    S_WRITE);
399 			}
400 
401 			break;
402 
403 		case M_ZERO:
404 			if (rw == UIO_READ) {
405 				label_t ljb;
406 
407 				if (on_fault(&ljb)) {
408 					no_fault();
409 					error = EFAULT;
410 					break;
411 				}
412 				uzero(iov->iov_base, iov->iov_len);
413 				no_fault();
414 				uio->uio_resid -= iov->iov_len;
415 				uio->uio_loffset += iov->iov_len;
416 				break;
417 			}
418 			/* else it's a write, fall through to NULL case */
419 			/*FALLTHROUGH*/
420 
421 		case M_NULL:
422 			if (rw == UIO_READ)
423 				return (0);
424 			c = iov->iov_len;
425 			iov->iov_base += c;
426 			iov->iov_len -= c;
427 			uio->uio_loffset += c;
428 			uio->uio_resid -= c;
429 			break;
430 
431 		}
432 	}
433 	return (uio->uio_resid == oresid ? error : 0);
434 }
435 
436 static int
437 mmread(dev_t dev, struct uio *uio, cred_t *cred)
438 {
439 	return (mmrw(dev, uio, UIO_READ, cred));
440 }
441 
442 static int
443 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
444 {
445 	return (mmrw(dev, uio, UIO_WRITE, cred));
446 }
447 
448 /*
449  * Private ioctl for libkvm to support kvm_physaddr().
450  * Given an address space and a VA, compute the PA.
451  */
452 static int
453 mmioctl_vtop(intptr_t data)
454 {
455 #ifdef _SYSCALL32
456 	mem_vtop32_t vtop32;
457 #endif
458 	mem_vtop_t mem_vtop;
459 	proc_t *p;
460 	pfn_t pfn = (pfn_t)PFN_INVALID;
461 	pid_t pid = 0;
462 	struct as *as;
463 	struct seg *seg;
464 
465 	if (get_udatamodel() == DATAMODEL_NATIVE) {
466 		if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
467 			return (EFAULT);
468 	}
469 #ifdef _SYSCALL32
470 	else {
471 		if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
472 			return (EFAULT);
473 		mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
474 		mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
475 
476 		if (mem_vtop.m_as != NULL)
477 			return (EINVAL);
478 	}
479 #endif
480 
481 	if (mem_vtop.m_as == &kas) {
482 		pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
483 	} else {
484 		if (mem_vtop.m_as == NULL) {
485 			/*
486 			 * Assume the calling process's address space if the
487 			 * caller didn't specify one.
488 			 */
489 			p = curthread->t_procp;
490 			if (p == NULL)
491 				return (EIO);
492 			mem_vtop.m_as = p->p_as;
493 		}
494 
495 		mutex_enter(&pidlock);
496 		for (p = practive; p != NULL; p = p->p_next) {
497 			if (p->p_as == mem_vtop.m_as) {
498 				pid = p->p_pid;
499 				break;
500 			}
501 		}
502 		mutex_exit(&pidlock);
503 		if (p == NULL)
504 			return (EIO);
505 		p = sprlock(pid);
506 		if (p == NULL)
507 			return (EIO);
508 		as = p->p_as;
509 		if (as == mem_vtop.m_as) {
510 			mutex_exit(&p->p_lock);
511 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
512 			for (seg = AS_SEGFIRST(as); seg != NULL;
513 			    seg = AS_SEGNEXT(as, seg))
514 				if ((uintptr_t)mem_vtop.m_va -
515 				    (uintptr_t)seg->s_base < seg->s_size)
516 					break;
517 			if (seg != NULL)
518 				pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
519 			AS_LOCK_EXIT(as, &as->a_lock);
520 			mutex_enter(&p->p_lock);
521 		}
522 		sprunlock(p);
523 	}
524 	mem_vtop.m_pfn = pfn;
525 	if (pfn == PFN_INVALID)
526 		return (EIO);
527 
528 	if (get_udatamodel() == DATAMODEL_NATIVE) {
529 		if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
530 			return (EFAULT);
531 	}
532 #ifdef _SYSCALL32
533 	else {
534 		vtop32.m_pfn = mem_vtop.m_pfn;
535 		if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
536 			return (EFAULT);
537 	}
538 #endif
539 
540 	return (0);
541 }
542 
543 /*
544  * Given a PA, execute the given page retire command on it.
545  */
546 static int
547 mmioctl_page_retire(int cmd, intptr_t data)
548 {
549 	extern int page_retire_test(void);
550 	uint64_t pa;
551 
552 	if (copyin((void *)data, &pa, sizeof (uint64_t))) {
553 		return (EFAULT);
554 	}
555 
556 	switch (cmd) {
557 	case MEM_PAGE_ISRETIRED:
558 		return (page_retire_check(pa, NULL));
559 
560 	case MEM_PAGE_UNRETIRE:
561 		return (page_unretire(pa));
562 
563 	case MEM_PAGE_RETIRE:
564 		return (page_retire(pa, PR_FMA));
565 
566 	case MEM_PAGE_RETIRE_MCE:
567 		return (page_retire(pa, PR_MCE));
568 
569 	case MEM_PAGE_RETIRE_UE:
570 		return (page_retire(pa, PR_UE));
571 
572 	case MEM_PAGE_GETERRORS:
573 		{
574 			uint64_t page_errors;
575 			int rc = page_retire_check(pa, &page_errors);
576 			if (copyout(&page_errors, (void *)data,
577 			    sizeof (uint64_t))) {
578 				return (EFAULT);
579 			}
580 			return (rc);
581 		}
582 
583 	case MEM_PAGE_RETIRE_TEST:
584 		return (page_retire_test());
585 
586 	}
587 
588 	return (EINVAL);
589 }
590 
591 #ifdef __sparc
592 /*
593  * Given a syndrome, syndrome type, and address return the
594  * associated memory name in the provided data buffer.
595  */
596 static int
597 mmioctl_get_mem_name(intptr_t data)
598 {
599 	mem_name_t mem_name;
600 	void *buf;
601 	size_t bufsize;
602 	int len, err;
603 
604 	if ((bufsize = cpu_get_name_bufsize()) == 0)
605 		return (ENOTSUP);
606 
607 	if ((err = mm_read_mem_name(data, &mem_name)) < 0)
608 		return (err);
609 
610 	buf = kmem_alloc(bufsize, KM_SLEEP);
611 
612 	/*
613 	 * Call into cpu specific code to do the lookup.
614 	 */
615 	if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
616 	    mem_name.m_addr, buf, bufsize, &len)) != 0) {
617 		kmem_free(buf, bufsize);
618 		return (err);
619 	}
620 
621 	if (len >= mem_name.m_namelen) {
622 		kmem_free(buf, bufsize);
623 		return (ENOSPC);
624 	}
625 
626 	if (copyoutstr(buf, (char *)mem_name.m_name,
627 	    mem_name.m_namelen, NULL) != 0) {
628 		kmem_free(buf, bufsize);
629 		return (EFAULT);
630 	}
631 
632 	kmem_free(buf, bufsize);
633 	return (0);
634 }
635 
636 /*
637  * Given a syndrome and address return information about the associated memory.
638  */
639 static int
640 mmioctl_get_mem_info(intptr_t data)
641 {
642 	mem_info_t mem_info;
643 	int err;
644 
645 	if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
646 		return (EFAULT);
647 
648 	if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
649 	    &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
650 	    &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
651 		return (err);
652 
653 	if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
654 		return (EFAULT);
655 
656 	return (0);
657 }
658 
659 /*
660  * Given a memory name, return its associated serial id
661  */
662 static int
663 mmioctl_get_mem_sid(intptr_t data)
664 {
665 	mem_name_t mem_name;
666 	void *buf;
667 	void *name;
668 	size_t	name_len;
669 	size_t bufsize;
670 	int len, err;
671 
672 	if ((bufsize = cpu_get_name_bufsize()) == 0)
673 		return (ENOTSUP);
674 
675 	if ((err = mm_read_mem_name(data, &mem_name)) < 0)
676 		return (err);
677 
678 	buf = kmem_alloc(bufsize, KM_SLEEP);
679 
680 	if (mem_name.m_namelen > 1024)
681 		mem_name.m_namelen = 1024; /* cap at 1024 bytes */
682 
683 	name = kmem_alloc(mem_name.m_namelen, KM_SLEEP);
684 
685 	if ((err = copyinstr((char *)mem_name.m_name, (char *)name,
686 	    mem_name.m_namelen, &name_len)) != 0) {
687 		kmem_free(buf, bufsize);
688 		kmem_free(name, mem_name.m_namelen);
689 		return (err);
690 	}
691 
692 	/*
693 	 * Call into cpu specific code to do the lookup.
694 	 */
695 	if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) {
696 		kmem_free(buf, bufsize);
697 		kmem_free(name, mem_name.m_namelen);
698 		return (err);
699 	}
700 
701 	if (len > mem_name.m_sidlen) {
702 		kmem_free(buf, bufsize);
703 		kmem_free(name, mem_name.m_namelen);
704 		return (ENAMETOOLONG);
705 	}
706 
707 	if (copyoutstr(buf, (char *)mem_name.m_sid,
708 	    mem_name.m_sidlen, NULL) != 0) {
709 		kmem_free(buf, bufsize);
710 		kmem_free(name, mem_name.m_namelen);
711 		return (EFAULT);
712 	}
713 
714 	kmem_free(buf, bufsize);
715 	kmem_free(name, mem_name.m_namelen);
716 	return (0);
717 }
718 #endif	/* __sparc */
719 
720 /*
721  * Private ioctls for
722  *	libkvm to support kvm_physaddr().
723  *	FMA support for page_retire() and memory attribute information.
724  */
725 /*ARGSUSED*/
726 static int
727 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
728 {
729 	if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
730 	    (cmd != MEM_VTOP && getminor(dev) != M_MEM))
731 		return (ENXIO);
732 
733 	switch (cmd) {
734 	case MEM_VTOP:
735 		return (mmioctl_vtop(data));
736 
737 	case MEM_PAGE_RETIRE:
738 	case MEM_PAGE_ISRETIRED:
739 	case MEM_PAGE_UNRETIRE:
740 	case MEM_PAGE_RETIRE_MCE:
741 	case MEM_PAGE_RETIRE_UE:
742 	case MEM_PAGE_GETERRORS:
743 	case MEM_PAGE_RETIRE_TEST:
744 		return (mmioctl_page_retire(cmd, data));
745 
746 #ifdef __sparc
747 	case MEM_NAME:
748 		return (mmioctl_get_mem_name(data));
749 
750 	case MEM_INFO:
751 		return (mmioctl_get_mem_info(data));
752 
753 	case MEM_SID:
754 		return (mmioctl_get_mem_sid(data));
755 #else
756 	case MEM_NAME:
757 	case MEM_INFO:
758 	case MEM_SID:
759 		return (ENOTSUP);
760 #endif	/* __sparc */
761 	}
762 	return (ENXIO);
763 }
764 
765 /*ARGSUSED2*/
766 static int
767 mmmmap(dev_t dev, off_t off, int prot)
768 {
769 	pfn_t pf;
770 	struct memlist *pmem;
771 	minor_t minor = getminor(dev);
772 
773 	switch (minor) {
774 	case M_MEM:
775 		pf = btop(off);
776 		memlist_read_lock();
777 		for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
778 			if (pf >= BTOP(pmem->address) &&
779 			    pf < BTOP(pmem->address + pmem->size)) {
780 				memlist_read_unlock();
781 				return (impl_obmem_pfnum(pf));
782 			}
783 		}
784 		memlist_read_unlock();
785 		break;
786 
787 	case M_KMEM:
788 	case M_ALLKMEM:
789 		/* no longer supported with KPR */
790 		return (-1);
791 
792 	case M_ZERO:
793 		/*
794 		 * We shouldn't be mmap'ing to /dev/zero here as
795 		 * mmsegmap() should have already converted
796 		 * a mapping request for this device to a mapping
797 		 * using seg_vn for anonymous memory.
798 		 */
799 		break;
800 
801 	}
802 	return (-1);
803 }
804 
805 /*
806  * This function is called when a memory device is mmap'ed.
807  * Set up the mapping to the correct device driver.
808  */
809 static int
810 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
811     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
812 {
813 	struct segvn_crargs vn_a;
814 	struct segdev_crargs dev_a;
815 	int error;
816 	minor_t minor;
817 	off_t i;
818 
819 	minor = getminor(dev);
820 
821 	as_rangelock(as);
822 	/*
823 	 * No need to worry about vac alignment on /dev/zero
824 	 * since this is a "clone" object that doesn't yet exist.
825 	 */
826 	error = choose_addr(as, addrp, len, off,
827 	    (minor == M_MEM) || (minor == M_KMEM), flags);
828 	if (error != 0) {
829 		as_rangeunlock(as);
830 		return (error);
831 	}
832 
833 	switch (minor) {
834 	case M_MEM:
835 		/* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
836 		if ((flags & MAP_TYPE) != MAP_SHARED) {
837 			as_rangeunlock(as);
838 			return (EINVAL);
839 		}
840 
841 		/*
842 		 * Check to ensure that the entire range is
843 		 * legal and we are not trying to map in
844 		 * more than the device will let us.
845 		 */
846 		for (i = 0; i < len; i += PAGESIZE) {
847 			if (mmmmap(dev, off + i, maxprot) == -1) {
848 				as_rangeunlock(as);
849 				return (ENXIO);
850 			}
851 		}
852 
853 		/*
854 		 * Use seg_dev segment driver for /dev/mem mapping.
855 		 */
856 		dev_a.mapfunc = mmmmap;
857 		dev_a.dev = dev;
858 		dev_a.offset = off;
859 		dev_a.type = (flags & MAP_TYPE);
860 		dev_a.prot = (uchar_t)prot;
861 		dev_a.maxprot = (uchar_t)maxprot;
862 		dev_a.hat_attr = 0;
863 
864 		/*
865 		 * Make /dev/mem mappings non-consistent since we can't
866 		 * alias pages that don't have page structs behind them,
867 		 * such as kernel stack pages. If someone mmap()s a kernel
868 		 * stack page and if we give him a tte with cv, a line from
869 		 * that page can get into both pages of the spitfire d$.
870 		 * But snoop from another processor will only invalidate
871 		 * the first page. This later caused kernel (xc_attention)
872 		 * to go into an infinite loop at pil 13 and no interrupts
873 		 * could come in. See 1203630.
874 		 *
875 		 */
876 		dev_a.hat_flags = HAT_LOAD_NOCONSIST;
877 		dev_a.devmap_data = NULL;
878 
879 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
880 		break;
881 
882 	case M_ZERO:
883 		/*
884 		 * Use seg_vn segment driver for /dev/zero mapping.
885 		 * Passing in a NULL amp gives us the "cloning" effect.
886 		 */
887 		vn_a.vp = NULL;
888 		vn_a.offset = 0;
889 		vn_a.type = (flags & MAP_TYPE);
890 		vn_a.prot = prot;
891 		vn_a.maxprot = maxprot;
892 		vn_a.flags = flags & ~MAP_TYPE;
893 		vn_a.cred = cred;
894 		vn_a.amp = NULL;
895 		vn_a.szc = 0;
896 		vn_a.lgrp_mem_policy_flags = 0;
897 		error = as_map(as, *addrp, len, segvn_create, &vn_a);
898 		break;
899 
900 	case M_KMEM:
901 	case M_ALLKMEM:
902 		/* No longer supported with KPR. */
903 		error = ENXIO;
904 		break;
905 
906 	case M_NULL:
907 		/*
908 		 * Use seg_dev segment driver for /dev/null mapping.
909 		 */
910 		dev_a.mapfunc = mmmmap;
911 		dev_a.dev = dev;
912 		dev_a.offset = off;
913 		dev_a.type = 0;		/* neither PRIVATE nor SHARED */
914 		dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
915 		dev_a.hat_attr = 0;
916 		dev_a.hat_flags = 0;
917 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
918 		break;
919 
920 	default:
921 		error = ENXIO;
922 	}
923 
924 	as_rangeunlock(as);
925 	return (error);
926 }
927 
928 static struct cb_ops mm_cb_ops = {
929 	mmopen,			/* open */
930 	nulldev,		/* close */
931 	nodev,			/* strategy */
932 	nodev,			/* print */
933 	nodev,			/* dump */
934 	mmread,			/* read */
935 	mmwrite,		/* write */
936 	mmioctl,		/* ioctl */
937 	nodev,			/* devmap */
938 	mmmmap,			/* mmap */
939 	mmsegmap,		/* segmap */
940 	mmchpoll,		/* poll */
941 	mmpropop,		/* prop_op */
942 	0,			/* streamtab  */
943 	D_NEW | D_MP | D_64BIT | D_U64BIT
944 };
945 
946 static struct dev_ops mm_ops = {
947 	DEVO_REV,		/* devo_rev, */
948 	0,			/* refcnt  */
949 	mm_info,		/* get_dev_info */
950 	nulldev,		/* identify */
951 	nulldev,		/* probe */
952 	mm_attach,		/* attach */
953 	nodev,			/* detach */
954 	nodev,			/* reset */
955 	&mm_cb_ops,		/* driver operations */
956 	(struct bus_ops *)0,	/* bus operations */
957 	NULL,			/* power */
958 	ddi_quiesce_not_needed,		/* quiesce */
959 };
960 
961 static struct modldrv modldrv = {
962 	&mod_driverops, "memory driver", &mm_ops,
963 };
964 
965 static struct modlinkage modlinkage = {
966 	MODREV_1, &modldrv, NULL
967 };
968 
969 int
970 _init(void)
971 {
972 	return (mod_install(&modlinkage));
973 }
974 
975 int
976 _info(struct modinfo *modinfop)
977 {
978 	return (mod_info(&modlinkage, modinfop));
979 }
980 
981 int
982 _fini(void)
983 {
984 	return (mod_remove(&modlinkage));
985 }
986 
987 static int
988 mm_kstat_update(kstat_t *ksp, int rw)
989 {
990 	struct memlist *pmem;
991 	uint_t count;
992 
993 	if (rw == KSTAT_WRITE)
994 		return (EACCES);
995 
996 	count = 0;
997 	memlist_read_lock();
998 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
999 		count++;
1000 	}
1001 	memlist_read_unlock();
1002 
1003 	ksp->ks_ndata = count;
1004 	ksp->ks_data_size = count * 2 * sizeof (uint64_t);
1005 
1006 	return (0);
1007 }
1008 
1009 static int
1010 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
1011 {
1012 	struct memlist *pmem;
1013 	struct memunit {
1014 		uint64_t address;
1015 		uint64_t size;
1016 	} *kspmem;
1017 
1018 	if (rw == KSTAT_WRITE)
1019 		return (EACCES);
1020 
1021 	ksp->ks_snaptime = gethrtime();
1022 
1023 	kspmem = (struct memunit *)buf;
1024 	memlist_read_lock();
1025 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) {
1026 		if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
1027 			break;
1028 		kspmem->address = pmem->address;
1029 		kspmem->size = pmem->size;
1030 	}
1031 	memlist_read_unlock();
1032 
1033 	return (0);
1034 }
1035 
1036 /*
1037  * Read a mem_name_t from user-space and store it in the mem_name_t
1038  * pointed to by the mem_name argument.
1039  */
1040 static int
1041 mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
1042 {
1043 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1044 		if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
1045 			return (EFAULT);
1046 	}
1047 #ifdef	_SYSCALL32
1048 	else {
1049 		mem_name32_t mem_name32;
1050 
1051 		if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
1052 			return (EFAULT);
1053 		mem_name->m_addr = mem_name32.m_addr;
1054 		mem_name->m_synd = mem_name32.m_synd;
1055 		mem_name->m_type[0] = mem_name32.m_type[0];
1056 		mem_name->m_type[1] = mem_name32.m_type[1];
1057 		mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
1058 		mem_name->m_namelen = (size_t)mem_name32.m_namelen;
1059 		mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
1060 		mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
1061 	}
1062 #endif	/* _SYSCALL32 */
1063 
1064 	return (0);
1065 }
1066