xref: /titanic_41/usr/src/uts/common/io/mem.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Memory special file
31  */
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/user.h>
36 #include <sys/buf.h>
37 #include <sys/systm.h>
38 #include <sys/cred.h>
39 #include <sys/vm.h>
40 #include <sys/uio.h>
41 #include <sys/mman.h>
42 #include <sys/kmem.h>
43 #include <vm/seg.h>
44 #include <vm/page.h>
45 #include <sys/stat.h>
46 #include <sys/vmem.h>
47 #include <sys/memlist.h>
48 #include <sys/bootconf.h>
49 
50 #include <vm/seg_vn.h>
51 #include <vm/seg_dev.h>
52 #include <vm/seg_kmem.h>
53 #include <vm/seg_kp.h>
54 #include <vm/seg_kpm.h>
55 #include <vm/hat.h>
56 
57 #include <sys/conf.h>
58 #include <sys/mem.h>
59 #include <sys/types.h>
60 #include <sys/conf.h>
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/errno.h>
64 #include <sys/modctl.h>
65 #include <sys/memlist.h>
66 #include <sys/ddi.h>
67 #include <sys/sunddi.h>
68 #include <sys/debug.h>
69 
70 #ifdef __sparc
71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
73     uint64_t *, int *, int *, int *);
74 extern size_t cpu_get_name_bufsize(void);
75 #endif
76 
77 /*
78  * Turn a byte length into a pagecount.  The DDI btop takes a
79  * 32-bit size on 32-bit machines, this handles 64-bit sizes for
80  * large physical-memory 32-bit machines.
81  */
82 #define	BTOP(x)	((pgcnt_t)((x) >> _pageshift))
83 
84 static kmutex_t mm_lock;
85 static caddr_t mm_map;
86 
87 static dev_info_t *mm_dip;	/* private copy of devinfo pointer */
88 
89 static int mm_kmem_io_access;
90 
91 static int mm_kstat_update(kstat_t *ksp, int rw);
92 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
93 
94 /*ARGSUSED1*/
95 static int
96 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
97 {
98 	int i;
99 	struct mem_minor {
100 		char *name;
101 		minor_t minor;
102 		int privonly;
103 		const char *rdpriv;
104 		const char *wrpriv;
105 		mode_t priv_mode;
106 	} mm[] = {
107 		{ "mem",	M_MEM,		0,	NULL,	"all",	0640 },
108 		{ "kmem",	M_KMEM,		0,	NULL,	"all",	0640 },
109 		{ "allkmem",	M_ALLKMEM,	0,	"all",	"all",	0600 },
110 		{ "null",	M_NULL,	PRIVONLY_DEV,	NULL,	NULL,	0666 },
111 		{ "zero",	M_ZERO, PRIVONLY_DEV,	NULL,	NULL,	0666 },
112 	};
113 	kstat_t *ksp;
114 
115 	mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
116 	mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
117 
118 	for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
119 		if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
120 		    mm[i].minor, DDI_PSEUDO, mm[i].privonly,
121 		    mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
122 		    DDI_FAILURE) {
123 			ddi_remove_minor_node(devi, NULL);
124 			return (DDI_FAILURE);
125 		}
126 	}
127 
128 	mm_dip = devi;
129 
130 	ksp = kstat_create("mm", 0, "phys_installed", "misc",
131 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
132 	if (ksp != NULL) {
133 		ksp->ks_update = mm_kstat_update;
134 		ksp->ks_snapshot = mm_kstat_snapshot;
135 		ksp->ks_lock = &mm_lock; /* XXX - not really needed */
136 		kstat_install(ksp);
137 	}
138 
139 	mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
140 	    "kmem_io_access", 0);
141 
142 	return (DDI_SUCCESS);
143 }
144 
145 /*ARGSUSED*/
146 static int
147 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
148 {
149 	register int error;
150 
151 	switch (infocmd) {
152 	case DDI_INFO_DEVT2DEVINFO:
153 		*result = (void *)mm_dip;
154 		error = DDI_SUCCESS;
155 		break;
156 	case DDI_INFO_DEVT2INSTANCE:
157 		*result = (void *)0;
158 		error = DDI_SUCCESS;
159 		break;
160 	default:
161 		error = DDI_FAILURE;
162 	}
163 	return (error);
164 }
165 
166 /*ARGSUSED1*/
167 static int
168 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
169 {
170 	switch (getminor(*devp)) {
171 	case M_NULL:
172 	case M_ZERO:
173 	case M_MEM:
174 	case M_KMEM:
175 	case M_ALLKMEM:
176 		/* standard devices */
177 		break;
178 
179 	default:
180 		/* Unsupported or unknown type */
181 		return (EINVAL);
182 	}
183 	return (0);
184 }
185 
186 struct pollhead	mm_pollhd;
187 
188 /*ARGSUSED*/
189 static int
190 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
191     struct pollhead **phpp)
192 {
193 	switch (getminor(dev)) {
194 	case M_NULL:
195 	case M_ZERO:
196 	case M_MEM:
197 	case M_KMEM:
198 	case M_ALLKMEM:
199 		*reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
200 			POLLWRNORM | POLLRDBAND | POLLWRBAND);
201 		/*
202 		 * A non NULL pollhead pointer should be returned in case
203 		 * user polls for 0 events.
204 		 */
205 		*phpp = !anyyet && !*reventsp ?
206 		    &mm_pollhd : (struct pollhead *)NULL;
207 		return (0);
208 	default:
209 		/* no other devices currently support polling */
210 		return (ENXIO);
211 	}
212 }
213 
214 static int
215 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
216     char *name, caddr_t valuep, int *lengthp)
217 {
218 	/*
219 	 * implement zero size to reduce overhead (avoid two failing
220 	 * property lookups per stat).
221 	 */
222 	return (ddi_prop_op_size(dev, dip, prop_op,
223 	    flags, name, valuep, lengthp, 0));
224 }
225 
226 static int
227 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio)
228 {
229 	int error = 0;
230 	size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
231 	    (size_t)uio->uio_iov->iov_len);
232 
233 	mutex_enter(&mm_lock);
234 	hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
235 	    (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE),
236 	    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
237 
238 	if (!pf_is_memory(pfn)) {
239 		if (allowio) {
240 			size_t c = uio->uio_iov->iov_len;
241 
242 			if (ddi_peekpokeio(NULL, uio, rw,
243 			    (caddr_t)(uintptr_t)uio->uio_loffset, c,
244 			    sizeof (int32_t)) != DDI_SUCCESS)
245 				error = EFAULT;
246 		} else
247 			error = EIO;
248 	} else
249 		error = uiomove(&mm_map[pageoff], nbytes, rw, uio);
250 
251 	hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
252 	mutex_exit(&mm_lock);
253 	return (error);
254 }
255 
256 #ifdef	__sparc
257 
258 #define	IS_KPM_VA(va)							\
259 	(kpm_enable && (va) >= segkpm->s_base &&			\
260 	(va) < (segkpm->s_base + segkpm->s_size))
261 #define	IS_KP_VA(va)							\
262 	((va) >= segkp->s_base && (va) < segkp->s_base + segkp->s_size)
263 #define	NEED_LOCK_KVADDR(va)	(!IS_KPM_VA(va) && !IS_KP_VA(va))
264 
265 #else	/* __i386, __amd64 */
266 
267 #define	NEED_LOCK_KVADDR(va)	0
268 
269 #endif	/* __sparc */
270 
271 /*ARGSUSED3*/
272 static int
273 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
274 {
275 	pfn_t v;
276 	struct iovec *iov;
277 	int error = 0;
278 	size_t c;
279 	ssize_t oresid = uio->uio_resid;
280 	minor_t minor = getminor(dev);
281 
282 	while (uio->uio_resid > 0 && error == 0) {
283 		iov = uio->uio_iov;
284 		if (iov->iov_len == 0) {
285 			uio->uio_iov++;
286 			uio->uio_iovcnt--;
287 			if (uio->uio_iovcnt < 0)
288 				panic("mmrw");
289 			continue;
290 		}
291 		switch (minor) {
292 
293 		case M_MEM:
294 			memlist_read_lock();
295 			if (!address_in_memlist(phys_install,
296 			    (uint64_t)uio->uio_loffset, 1)) {
297 				memlist_read_unlock();
298 				error = EFAULT;
299 				break;
300 			}
301 			memlist_read_unlock();
302 
303 			v = BTOP((u_offset_t)uio->uio_loffset);
304 			error = mmio(uio, rw, v,
305 			    uio->uio_loffset & PAGEOFFSET, 0);
306 			break;
307 
308 		case M_KMEM:
309 		case M_ALLKMEM:
310 			{
311 			page_t **ppp;
312 			caddr_t vaddr = (caddr_t)uio->uio_offset;
313 			int try_lock = NEED_LOCK_KVADDR(vaddr);
314 			int locked = 0;
315 
316 			/*
317 			 * If vaddr does not map a valid page, as_pagelock()
318 			 * will return failure. Hence we can't check the
319 			 * return value and return EFAULT here as we'd like.
320 			 * seg_kp and seg_kpm do not properly support
321 			 * as_pagelock() for this context so we avoid it
322 			 * using the try_lock set check above.  Some day when
323 			 * the kernel page locking gets redesigned all this
324 			 * muck can be cleaned up.
325 			 */
326 			if (try_lock)
327 				locked = (as_pagelock(&kas, &ppp, vaddr,
328 				    PAGESIZE, S_WRITE) == 0);
329 
330 			v = hat_getpfnum(kas.a_hat, (caddr_t)uio->uio_loffset);
331 			if (v == PFN_INVALID) {
332 				if (locked)
333 					as_pageunlock(&kas, ppp, vaddr,
334 					    PAGESIZE, S_WRITE);
335 				error = EFAULT;
336 				break;
337 			}
338 
339 			error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
340 			    minor == M_ALLKMEM || mm_kmem_io_access);
341 			if (locked)
342 				as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
343 				    S_WRITE);
344 			}
345 
346 			break;
347 
348 		case M_ZERO:
349 			if (rw == UIO_READ) {
350 				label_t ljb;
351 
352 				if (on_fault(&ljb)) {
353 					no_fault();
354 					error = EFAULT;
355 					break;
356 				}
357 				uzero(iov->iov_base, iov->iov_len);
358 				no_fault();
359 				uio->uio_resid -= iov->iov_len;
360 				uio->uio_loffset += iov->iov_len;
361 				break;
362 			}
363 			/* else it's a write, fall through to NULL case */
364 			/*FALLTHROUGH*/
365 
366 		case M_NULL:
367 			if (rw == UIO_READ)
368 				return (0);
369 			c = iov->iov_len;
370 			iov->iov_base += c;
371 			iov->iov_len -= c;
372 			uio->uio_loffset += c;
373 			uio->uio_resid -= c;
374 			break;
375 
376 		}
377 	}
378 	return (uio->uio_resid == oresid ? error : 0);
379 }
380 
381 static int
382 mmread(dev_t dev, struct uio *uio, cred_t *cred)
383 {
384 	return (mmrw(dev, uio, UIO_READ, cred));
385 }
386 
387 static int
388 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
389 {
390 	return (mmrw(dev, uio, UIO_WRITE, cred));
391 }
392 
393 /*
394  * Private ioctl for libkvm to support kvm_physaddr().
395  * Given an address space and a VA, compute the PA.
396  */
397 static int
398 mmioctl_vtop(intptr_t data)
399 {
400 	mem_vtop_t mem_vtop;
401 	proc_t *p;
402 	pfn_t pfn = (pfn_t)PFN_INVALID;
403 	pid_t pid = 0;
404 	struct as *as;
405 	struct seg *seg;
406 
407 	if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
408 		return (EFAULT);
409 	if (mem_vtop.m_as == &kas) {
410 		pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
411 	} else if (mem_vtop.m_as == NULL) {
412 		return (EIO);
413 	} else {
414 		mutex_enter(&pidlock);
415 		for (p = practive; p != NULL; p = p->p_next) {
416 			if (p->p_as == mem_vtop.m_as) {
417 				pid = p->p_pid;
418 				break;
419 			}
420 		}
421 		mutex_exit(&pidlock);
422 		if (p == NULL)
423 			return (EIO);
424 		p = sprlock(pid);
425 		if (p == NULL)
426 			return (EIO);
427 		as = p->p_as;
428 		if (as == mem_vtop.m_as) {
429 			mutex_exit(&p->p_lock);
430 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
431 			for (seg = AS_SEGFIRST(as); seg != NULL;
432 			    seg = AS_SEGNEXT(as, seg))
433 				if ((uintptr_t)mem_vtop.m_va -
434 				    (uintptr_t)seg->s_base < seg->s_size)
435 					break;
436 			if (seg != NULL)
437 				pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
438 			AS_LOCK_EXIT(as, &as->a_lock);
439 			mutex_enter(&p->p_lock);
440 		}
441 		sprunlock(p);
442 	}
443 	mem_vtop.m_pfn = pfn;
444 	if (pfn == PFN_INVALID)
445 		return (EIO);
446 	if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
447 		return (EFAULT);
448 
449 	return (0);
450 }
451 
452 /*
453  * Given a PA, retire that page or check whether it has already been retired.
454  */
455 static int
456 mmioctl_page_retire(int cmd, intptr_t data)
457 {
458 	uint64_t pa;
459 	pfn_t pfn;
460 	page_t *pp;
461 
462 	if (copyin((void *)data, &pa, sizeof (uint64_t)))
463 		return (EFAULT);
464 
465 	pfn = pa >> MMU_PAGESHIFT;
466 
467 	if (!pf_is_memory(pfn) || (pp = page_numtopp_nolock(pfn)) == NULL)
468 		return (EINVAL);
469 
470 	/*
471 	 * If we're checking, see if the page is retired; if not, confirm that
472 	 * its status is at least set to be failing.  If neither, return EIO.
473 	 */
474 	if (cmd == MEM_PAGE_ISRETIRED) {
475 		if (page_isretired(pp))
476 			return (0);
477 
478 		if (!page_isfailing(pp))
479 			return (EIO);
480 
481 		return (EAGAIN);
482 	}
483 
484 	/*
485 	 * Try to retire the page. If the retire fails, it will be scheduled to
486 	 * occur when the page is freed.  If this page is out of circulation
487 	 * already, or is in the process of being retired, we fail.
488 	 */
489 	if (page_isretired(pp) || page_isfailing(pp))
490 		return (EIO);
491 
492 	page_settoxic(pp, PAGE_IS_FAULTY);
493 	return (page_retire(pp, PAGE_IS_FAILING) ? EAGAIN : 0);
494 }
495 
496 #ifdef __sparc
497 /*
498  * Given a syndrome, syndrome type, and address return the
499  * associated memory name in the provided data buffer.
500  */
501 static int
502 mmioctl_get_mem_name(intptr_t data)
503 {
504 	mem_name_t mem_name;
505 #ifdef	_SYSCALL32
506 	mem_name32_t mem_name32;
507 #endif
508 	void *buf;
509 	size_t bufsize;
510 	int len, err;
511 
512 	if ((bufsize = cpu_get_name_bufsize()) == 0)
513 		return (ENOTSUP);
514 
515 	if (get_udatamodel() == DATAMODEL_NATIVE) {
516 		if (copyin((void *)data, &mem_name, sizeof (mem_name_t)))
517 			return (EFAULT);
518 	}
519 #ifdef	_SYSCALL32
520 	else {
521 		if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
522 			return (EFAULT);
523 		mem_name.m_addr = mem_name32.m_addr;
524 		mem_name.m_synd = mem_name32.m_synd;
525 		mem_name.m_type[0] = mem_name32.m_type[0];
526 		mem_name.m_type[1] = mem_name32.m_type[1];
527 		mem_name.m_name = (caddr_t)mem_name32.m_name;
528 		mem_name.m_namelen = (size_t)mem_name32.m_namelen;
529 	}
530 #endif	/* _SYSCALL32 */
531 
532 	buf = kmem_alloc(bufsize, KM_SLEEP);
533 
534 	/*
535 	 * Call into cpu specific code to do the lookup.
536 	 */
537 	if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
538 	    mem_name.m_addr, buf, bufsize, &len)) != 0) {
539 		kmem_free(buf, bufsize);
540 		return (err);
541 	}
542 
543 	if (len >= mem_name.m_namelen) {
544 		kmem_free(buf, bufsize);
545 		return (ENAMETOOLONG);
546 	}
547 
548 	if (copyoutstr(buf, (char *)mem_name.m_name,
549 	    mem_name.m_namelen, NULL) != 0) {
550 		kmem_free(buf, bufsize);
551 		return (EFAULT);
552 	}
553 
554 	kmem_free(buf, bufsize);
555 	return (0);
556 }
557 
558 /*
559  * Given a syndrome and address return information about the associated memory.
560  */
561 static int
562 mmioctl_get_mem_info(intptr_t data)
563 {
564 	mem_info_t mem_info;
565 	int err;
566 
567 	if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
568 		return (EFAULT);
569 
570 	if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
571 	    &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
572 	    &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
573 		return (err);
574 
575 	if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
576 		return (EFAULT);
577 
578 	return (0);
579 }
580 #endif	/* __sparc */
581 
582 /*
583  * Private ioctls for
584  *	libkvm to support kvm_physaddr().
585  *	FMA support for page_retire() and memory attribute information.
586  */
587 /*ARGSUSED*/
588 static int
589 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
590 {
591 	switch (cmd) {
592 	case MEM_VTOP:
593 		if (getminor(dev) != M_KMEM)
594 			return (ENXIO);
595 		return (mmioctl_vtop(data));
596 
597 	case MEM_PAGE_RETIRE:
598 	case MEM_PAGE_ISRETIRED:
599 		if (getminor(dev) != M_MEM)
600 			return (ENXIO);
601 		return (mmioctl_page_retire(cmd, data));
602 
603 	case MEM_NAME:
604 		if (getminor(dev) != M_MEM)
605 			return (ENXIO);
606 #ifdef __sparc
607 		return (mmioctl_get_mem_name(data));
608 #else
609 		return (ENOTSUP);
610 #endif
611 
612 	case MEM_INFO:
613 		if (getminor(dev) != M_MEM)
614 			return (ENXIO);
615 #ifdef __sparc
616 		return (mmioctl_get_mem_info(data));
617 #else
618 		return (ENOTSUP);
619 #endif
620 	}
621 	return (ENXIO);
622 }
623 
624 /*ARGSUSED2*/
625 static int
626 mmmmap(dev_t dev, off_t off, int prot)
627 {
628 	pfn_t pf;
629 	struct memlist *pmem;
630 	minor_t minor = getminor(dev);
631 
632 	switch (minor) {
633 	case M_MEM:
634 		pf = btop(off);
635 		memlist_read_lock();
636 		for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
637 			if (pf >= BTOP(pmem->address) &&
638 			    pf < BTOP(pmem->address + pmem->size)) {
639 				memlist_read_unlock();
640 				return (impl_obmem_pfnum(pf));
641 			}
642 		}
643 		memlist_read_unlock();
644 		break;
645 
646 	case M_KMEM:
647 	case M_ALLKMEM:
648 		/* no longer supported with KPR */
649 		return (-1);
650 
651 	case M_ZERO:
652 		/*
653 		 * We shouldn't be mmap'ing to /dev/zero here as
654 		 * mmsegmap() should have already converted
655 		 * a mapping request for this device to a mapping
656 		 * using seg_vn for anonymous memory.
657 		 */
658 		break;
659 
660 	}
661 	return (-1);
662 }
663 
664 /*
665  * This function is called when a memory device is mmap'ed.
666  * Set up the mapping to the correct device driver.
667  */
668 static int
669 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
670     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
671 {
672 	struct segvn_crargs vn_a;
673 	struct segdev_crargs dev_a;
674 	int error;
675 	minor_t minor;
676 	off_t i;
677 
678 	minor = getminor(dev);
679 
680 	as_rangelock(as);
681 	if ((flags & MAP_FIXED) == 0) {
682 		/*
683 		 * No need to worry about vac alignment on /dev/zero
684 		 * since this is a "clone" object that doesn't yet exist.
685 		 */
686 		map_addr(addrp, len, (offset_t)off,
687 				(minor == M_MEM) || (minor == M_KMEM), flags);
688 
689 		if (*addrp == NULL) {
690 			as_rangeunlock(as);
691 			return (ENOMEM);
692 		}
693 	} else {
694 		/*
695 		 * User specified address -
696 		 * Blow away any previous mappings.
697 		 */
698 		(void) as_unmap(as, *addrp, len);
699 	}
700 
701 	switch (minor) {
702 	case M_MEM:
703 		/* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
704 		if ((flags & MAP_TYPE) != MAP_SHARED) {
705 			as_rangeunlock(as);
706 			return (EINVAL);
707 		}
708 
709 		/*
710 		 * Check to ensure that the entire range is
711 		 * legal and we are not trying to map in
712 		 * more than the device will let us.
713 		 */
714 		for (i = 0; i < len; i += PAGESIZE) {
715 			if (mmmmap(dev, off + i, maxprot) == -1) {
716 				as_rangeunlock(as);
717 				return (ENXIO);
718 			}
719 		}
720 
721 		/*
722 		 * Use seg_dev segment driver for /dev/mem mapping.
723 		 */
724 		dev_a.mapfunc = mmmmap;
725 		dev_a.dev = dev;
726 		dev_a.offset = off;
727 		dev_a.type = (flags & MAP_TYPE);
728 		dev_a.prot = (uchar_t)prot;
729 		dev_a.maxprot = (uchar_t)maxprot;
730 		dev_a.hat_attr = 0;
731 
732 		/*
733 		 * Make /dev/mem mappings non-consistent since we can't
734 		 * alias pages that don't have page structs behind them,
735 		 * such as kernel stack pages. If someone mmap()s a kernel
736 		 * stack page and if we give him a tte with cv, a line from
737 		 * that page can get into both pages of the spitfire d$.
738 		 * But snoop from another processor will only invalidate
739 		 * the first page. This later caused kernel (xc_attention)
740 		 * to go into an infinite loop at pil 13 and no interrupts
741 		 * could come in. See 1203630.
742 		 *
743 		 */
744 		dev_a.hat_flags = HAT_LOAD_NOCONSIST;
745 		dev_a.devmap_data = NULL;
746 
747 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
748 		break;
749 
750 	case M_ZERO:
751 		/*
752 		 * Use seg_vn segment driver for /dev/zero mapping.
753 		 * Passing in a NULL amp gives us the "cloning" effect.
754 		 */
755 		vn_a.vp = NULL;
756 		vn_a.offset = 0;
757 		vn_a.type = (flags & MAP_TYPE);
758 		vn_a.prot = prot;
759 		vn_a.maxprot = maxprot;
760 		vn_a.flags = flags & ~MAP_TYPE;
761 		vn_a.cred = cred;
762 		vn_a.amp = NULL;
763 		vn_a.szc = 0;
764 		vn_a.lgrp_mem_policy_flags = 0;
765 		error = as_map(as, *addrp, len, segvn_create, &vn_a);
766 		break;
767 
768 	case M_KMEM:
769 	case M_ALLKMEM:
770 		/* No longer supported with KPR. */
771 		error = ENXIO;
772 		break;
773 
774 	case M_NULL:
775 		/*
776 		 * Use seg_dev segment driver for /dev/null mapping.
777 		 */
778 		dev_a.mapfunc = mmmmap;
779 		dev_a.dev = dev;
780 		dev_a.offset = off;
781 		dev_a.type = 0;		/* neither PRIVATE nor SHARED */
782 		dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
783 		dev_a.hat_attr = 0;
784 		dev_a.hat_flags = 0;
785 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
786 		break;
787 
788 	default:
789 		error = ENXIO;
790 	}
791 
792 	as_rangeunlock(as);
793 	return (error);
794 }
795 
796 static struct cb_ops mm_cb_ops = {
797 	mmopen,			/* open */
798 	nulldev,		/* close */
799 	nodev,			/* strategy */
800 	nodev,			/* print */
801 	nodev,			/* dump */
802 	mmread,			/* read */
803 	mmwrite,		/* write */
804 	mmioctl,		/* ioctl */
805 	nodev,			/* devmap */
806 	mmmmap,			/* mmap */
807 	mmsegmap,		/* segmap */
808 	mmchpoll,		/* poll */
809 	mmpropop,		/* prop_op */
810 	0,			/* streamtab  */
811 	D_NEW | D_MP | D_64BIT | D_U64BIT
812 };
813 
814 static struct dev_ops mm_ops = {
815 	DEVO_REV,		/* devo_rev, */
816 	0,			/* refcnt  */
817 	mm_info,		/* get_dev_info */
818 	nulldev,		/* identify */
819 	nulldev,		/* probe */
820 	mm_attach,		/* attach */
821 	nodev,			/* detach */
822 	nodev,			/* reset */
823 	&mm_cb_ops,		/* driver operations */
824 	(struct bus_ops *)0	/* bus operations */
825 };
826 
827 static struct modldrv modldrv = {
828 	&mod_driverops, "memory driver %I%", &mm_ops,
829 };
830 
831 static struct modlinkage modlinkage = {
832 	MODREV_1, &modldrv, NULL
833 };
834 
835 int
836 _init(void)
837 {
838 	return (mod_install(&modlinkage));
839 }
840 
841 int
842 _info(struct modinfo *modinfop)
843 {
844 	return (mod_info(&modlinkage, modinfop));
845 }
846 
847 int
848 _fini(void)
849 {
850 	return (mod_remove(&modlinkage));
851 }
852 
853 static int
854 mm_kstat_update(kstat_t *ksp, int rw)
855 {
856 	struct memlist *pmem;
857 	uint_t count;
858 
859 	if (rw == KSTAT_WRITE)
860 		return (EACCES);
861 
862 	count = 0;
863 	memlist_read_lock();
864 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
865 		count++;
866 	}
867 	memlist_read_unlock();
868 
869 	ksp->ks_ndata = count;
870 	ksp->ks_data_size = count * 2 * sizeof (uint64_t);
871 
872 	return (0);
873 }
874 
875 static int
876 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
877 {
878 	struct memlist *pmem;
879 	struct memunit {
880 		uint64_t address;
881 		uint64_t size;
882 	} *kspmem;
883 
884 	if (rw == KSTAT_WRITE)
885 		return (EACCES);
886 
887 	ksp->ks_snaptime = gethrtime();
888 
889 	kspmem = (struct memunit *)buf;
890 	memlist_read_lock();
891 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) {
892 		if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
893 			break;
894 		kspmem->address = pmem->address;
895 		kspmem->size = pmem->size;
896 	}
897 	memlist_read_unlock();
898 
899 	return (0);
900 }
901