xref: /titanic_50/usr/src/uts/common/io/mem.c (revision 4b908718db419b27633010608bf691c20684c0e2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Memory special file
31  */
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/user.h>
36 #include <sys/buf.h>
37 #include <sys/systm.h>
38 #include <sys/cred.h>
39 #include <sys/vm.h>
40 #include <sys/uio.h>
41 #include <sys/mman.h>
42 #include <sys/kmem.h>
43 #include <vm/seg.h>
44 #include <vm/page.h>
45 #include <sys/stat.h>
46 #include <sys/vmem.h>
47 #include <sys/memlist.h>
48 #include <sys/bootconf.h>
49 
50 #include <vm/seg_vn.h>
51 #include <vm/seg_dev.h>
52 #include <vm/seg_kmem.h>
53 #include <vm/seg_kp.h>
54 #include <vm/seg_kpm.h>
55 #include <vm/hat.h>
56 
57 #include <sys/conf.h>
58 #include <sys/mem.h>
59 #include <sys/types.h>
60 #include <sys/conf.h>
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/errno.h>
64 #include <sys/modctl.h>
65 #include <sys/memlist.h>
66 #include <sys/ddi.h>
67 #include <sys/sunddi.h>
68 #include <sys/debug.h>
69 
70 #ifdef __sparc
71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
73     uint64_t *, int *, int *, int *);
74 extern size_t cpu_get_name_bufsize(void);
75 #endif
76 
77 /*
78  * Turn a byte length into a pagecount.  The DDI btop takes a
79  * 32-bit size on 32-bit machines, this handles 64-bit sizes for
80  * large physical-memory 32-bit machines.
81  */
82 #define	BTOP(x)	((pgcnt_t)((x) >> _pageshift))
83 
84 static kmutex_t mm_lock;
85 static caddr_t mm_map;
86 
87 static dev_info_t *mm_dip;	/* private copy of devinfo pointer */
88 
89 static int mm_kmem_io_access;
90 
91 static int mm_kstat_update(kstat_t *ksp, int rw);
92 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
93 
94 /*ARGSUSED1*/
95 static int
96 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
97 {
98 	int i;
99 	struct mem_minor {
100 		char *name;
101 		minor_t minor;
102 		int privonly;
103 		const char *rdpriv;
104 		const char *wrpriv;
105 		mode_t priv_mode;
106 	} mm[] = {
107 		{ "mem",	M_MEM,		0,	NULL,	"all",	0640 },
108 		{ "kmem",	M_KMEM,		0,	NULL,	"all",	0640 },
109 		{ "allkmem",	M_ALLKMEM,	0,	"all",	"all",	0600 },
110 		{ "null",	M_NULL,	PRIVONLY_DEV,	NULL,	NULL,	0666 },
111 		{ "zero",	M_ZERO, PRIVONLY_DEV,	NULL,	NULL,	0666 },
112 	};
113 	kstat_t *ksp;
114 
115 	mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
116 	mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
117 
118 	for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
119 		if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
120 		    mm[i].minor, DDI_PSEUDO, mm[i].privonly,
121 		    mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
122 		    DDI_FAILURE) {
123 			ddi_remove_minor_node(devi, NULL);
124 			return (DDI_FAILURE);
125 		}
126 	}
127 
128 	mm_dip = devi;
129 
130 	ksp = kstat_create("mm", 0, "phys_installed", "misc",
131 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
132 	if (ksp != NULL) {
133 		ksp->ks_update = mm_kstat_update;
134 		ksp->ks_snapshot = mm_kstat_snapshot;
135 		ksp->ks_lock = &mm_lock; /* XXX - not really needed */
136 		kstat_install(ksp);
137 	}
138 
139 	mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
140 	    "kmem_io_access", 0);
141 
142 	return (DDI_SUCCESS);
143 }
144 
145 /*ARGSUSED*/
146 static int
147 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
148 {
149 	register int error;
150 
151 	switch (infocmd) {
152 	case DDI_INFO_DEVT2DEVINFO:
153 		*result = (void *)mm_dip;
154 		error = DDI_SUCCESS;
155 		break;
156 	case DDI_INFO_DEVT2INSTANCE:
157 		*result = (void *)0;
158 		error = DDI_SUCCESS;
159 		break;
160 	default:
161 		error = DDI_FAILURE;
162 	}
163 	return (error);
164 }
165 
166 /*ARGSUSED1*/
167 static int
168 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
169 {
170 	switch (getminor(*devp)) {
171 	case M_NULL:
172 	case M_ZERO:
173 	case M_MEM:
174 	case M_KMEM:
175 	case M_ALLKMEM:
176 		/* standard devices */
177 		break;
178 
179 	default:
180 		/* Unsupported or unknown type */
181 		return (EINVAL);
182 	}
183 	return (0);
184 }
185 
186 struct pollhead	mm_pollhd;
187 
188 /*ARGSUSED*/
189 static int
190 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
191     struct pollhead **phpp)
192 {
193 	switch (getminor(dev)) {
194 	case M_NULL:
195 	case M_ZERO:
196 	case M_MEM:
197 	case M_KMEM:
198 	case M_ALLKMEM:
199 		*reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
200 			POLLWRNORM | POLLRDBAND | POLLWRBAND);
201 		/*
202 		 * A non NULL pollhead pointer should be returned in case
203 		 * user polls for 0 events.
204 		 */
205 		*phpp = !anyyet && !*reventsp ?
206 		    &mm_pollhd : (struct pollhead *)NULL;
207 		return (0);
208 	default:
209 		/* no other devices currently support polling */
210 		return (ENXIO);
211 	}
212 }
213 
214 static int
215 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
216     char *name, caddr_t valuep, int *lengthp)
217 {
218 	/*
219 	 * implement zero size to reduce overhead (avoid two failing
220 	 * property lookups per stat).
221 	 */
222 	return (ddi_prop_op_size(dev, dip, prop_op,
223 	    flags, name, valuep, lengthp, 0));
224 }
225 
226 static int
227 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio)
228 {
229 	int error = 0;
230 	size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
231 	    (size_t)uio->uio_iov->iov_len);
232 
233 	mutex_enter(&mm_lock);
234 	hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
235 	    (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE),
236 	    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
237 
238 	if (!pf_is_memory(pfn)) {
239 		if (allowio) {
240 			size_t c = uio->uio_iov->iov_len;
241 
242 			if (ddi_peekpokeio(NULL, uio, rw,
243 			    (caddr_t)(uintptr_t)uio->uio_loffset, c,
244 			    sizeof (int32_t)) != DDI_SUCCESS)
245 				error = EFAULT;
246 		} else
247 			error = EIO;
248 	} else
249 		error = uiomove(&mm_map[pageoff], nbytes, rw, uio);
250 
251 	hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
252 	mutex_exit(&mm_lock);
253 	return (error);
254 }
255 
256 #ifdef	__sparc
257 
258 #define	IS_KPM_VA(va)							\
259 	(kpm_enable && (va) >= segkpm->s_base &&			\
260 	(va) < (segkpm->s_base + segkpm->s_size))
261 #define	IS_KP_VA(va)							\
262 	((va) >= segkp->s_base && (va) < segkp->s_base + segkp->s_size)
263 #define	NEED_LOCK_KVADDR(va)	(!IS_KPM_VA(va) && !IS_KP_VA(va))
264 
265 #else	/* __i386, __amd64 */
266 
267 #define	NEED_LOCK_KVADDR(va)	0
268 
269 #endif	/* __sparc */
270 
271 /*ARGSUSED3*/
272 static int
273 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
274 {
275 	pfn_t v;
276 	struct iovec *iov;
277 	int error = 0;
278 	size_t c;
279 	ssize_t oresid = uio->uio_resid;
280 	minor_t minor = getminor(dev);
281 
282 	while (uio->uio_resid > 0 && error == 0) {
283 		iov = uio->uio_iov;
284 		if (iov->iov_len == 0) {
285 			uio->uio_iov++;
286 			uio->uio_iovcnt--;
287 			if (uio->uio_iovcnt < 0)
288 				panic("mmrw");
289 			continue;
290 		}
291 		switch (minor) {
292 
293 		case M_MEM:
294 			memlist_read_lock();
295 			if (!address_in_memlist(phys_install,
296 			    (uint64_t)uio->uio_loffset, 1)) {
297 				memlist_read_unlock();
298 				error = EFAULT;
299 				break;
300 			}
301 			memlist_read_unlock();
302 
303 			v = BTOP((u_offset_t)uio->uio_loffset);
304 			error = mmio(uio, rw, v,
305 			    uio->uio_loffset & PAGEOFFSET, 0);
306 			break;
307 
308 		case M_KMEM:
309 		case M_ALLKMEM:
310 			{
311 			page_t **ppp;
312 			caddr_t vaddr = (caddr_t)uio->uio_offset;
313 			int try_lock = NEED_LOCK_KVADDR(vaddr);
314 			int locked = 0;
315 
316 			/*
317 			 * If vaddr does not map a valid page, as_pagelock()
318 			 * will return failure. Hence we can't check the
319 			 * return value and return EFAULT here as we'd like.
320 			 * seg_kp and seg_kpm do not properly support
321 			 * as_pagelock() for this context so we avoid it
322 			 * using the try_lock set check above.  Some day when
323 			 * the kernel page locking gets redesigned all this
324 			 * muck can be cleaned up.
325 			 */
326 			if (try_lock)
327 				locked = (as_pagelock(&kas, &ppp, vaddr,
328 				    PAGESIZE, S_WRITE) == 0);
329 
330 			v = hat_getpfnum(kas.a_hat,
331 			    (caddr_t)(uintptr_t)uio->uio_loffset);
332 			if (v == PFN_INVALID) {
333 				if (locked)
334 					as_pageunlock(&kas, ppp, vaddr,
335 					    PAGESIZE, S_WRITE);
336 				error = EFAULT;
337 				break;
338 			}
339 
340 			error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
341 			    minor == M_ALLKMEM || mm_kmem_io_access);
342 			if (locked)
343 				as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
344 				    S_WRITE);
345 			}
346 
347 			break;
348 
349 		case M_ZERO:
350 			if (rw == UIO_READ) {
351 				label_t ljb;
352 
353 				if (on_fault(&ljb)) {
354 					no_fault();
355 					error = EFAULT;
356 					break;
357 				}
358 				uzero(iov->iov_base, iov->iov_len);
359 				no_fault();
360 				uio->uio_resid -= iov->iov_len;
361 				uio->uio_loffset += iov->iov_len;
362 				break;
363 			}
364 			/* else it's a write, fall through to NULL case */
365 			/*FALLTHROUGH*/
366 
367 		case M_NULL:
368 			if (rw == UIO_READ)
369 				return (0);
370 			c = iov->iov_len;
371 			iov->iov_base += c;
372 			iov->iov_len -= c;
373 			uio->uio_loffset += c;
374 			uio->uio_resid -= c;
375 			break;
376 
377 		}
378 	}
379 	return (uio->uio_resid == oresid ? error : 0);
380 }
381 
382 static int
383 mmread(dev_t dev, struct uio *uio, cred_t *cred)
384 {
385 	return (mmrw(dev, uio, UIO_READ, cred));
386 }
387 
388 static int
389 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
390 {
391 	return (mmrw(dev, uio, UIO_WRITE, cred));
392 }
393 
394 /*
395  * Private ioctl for libkvm to support kvm_physaddr().
396  * Given an address space and a VA, compute the PA.
397  */
398 static int
399 mmioctl_vtop(intptr_t data)
400 {
401 	mem_vtop_t mem_vtop;
402 	proc_t *p;
403 	pfn_t pfn = (pfn_t)PFN_INVALID;
404 	pid_t pid = 0;
405 	struct as *as;
406 	struct seg *seg;
407 
408 	if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
409 		return (EFAULT);
410 	if (mem_vtop.m_as == &kas) {
411 		pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
412 	} else if (mem_vtop.m_as == NULL) {
413 		return (EIO);
414 	} else {
415 		mutex_enter(&pidlock);
416 		for (p = practive; p != NULL; p = p->p_next) {
417 			if (p->p_as == mem_vtop.m_as) {
418 				pid = p->p_pid;
419 				break;
420 			}
421 		}
422 		mutex_exit(&pidlock);
423 		if (p == NULL)
424 			return (EIO);
425 		p = sprlock(pid);
426 		if (p == NULL)
427 			return (EIO);
428 		as = p->p_as;
429 		if (as == mem_vtop.m_as) {
430 			mutex_exit(&p->p_lock);
431 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
432 			for (seg = AS_SEGFIRST(as); seg != NULL;
433 			    seg = AS_SEGNEXT(as, seg))
434 				if ((uintptr_t)mem_vtop.m_va -
435 				    (uintptr_t)seg->s_base < seg->s_size)
436 					break;
437 			if (seg != NULL)
438 				pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
439 			AS_LOCK_EXIT(as, &as->a_lock);
440 			mutex_enter(&p->p_lock);
441 		}
442 		sprunlock(p);
443 	}
444 	mem_vtop.m_pfn = pfn;
445 	if (pfn == PFN_INVALID)
446 		return (EIO);
447 	if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
448 		return (EFAULT);
449 
450 	return (0);
451 }
452 
453 /*
454  * Given a PA, retire that page or check whether it has already been retired.
455  */
456 static int
457 mmioctl_page_retire(int cmd, intptr_t data)
458 {
459 	uint64_t pa;
460 	pfn_t pfn;
461 	page_t *pp;
462 
463 	if (copyin((void *)data, &pa, sizeof (uint64_t)))
464 		return (EFAULT);
465 
466 	pfn = pa >> MMU_PAGESHIFT;
467 
468 	if (!pf_is_memory(pfn) || (pp = page_numtopp_nolock(pfn)) == NULL)
469 		return (EINVAL);
470 
471 	/*
472 	 * If we're checking, see if the page is retired; if not, confirm that
473 	 * its status is at least set to be failing.  If neither, return EIO.
474 	 */
475 	if (cmd == MEM_PAGE_ISRETIRED) {
476 		if (page_isretired(pp))
477 			return (0);
478 
479 		if (!page_isfailing(pp))
480 			return (EIO);
481 
482 		return (EAGAIN);
483 	}
484 
485 	/*
486 	 * Try to retire the page. If the retire fails, it will be scheduled to
487 	 * occur when the page is freed.  If this page is out of circulation
488 	 * already, or is in the process of being retired, we fail.
489 	 */
490 	if (page_isretired(pp) || page_isfailing(pp))
491 		return (EIO);
492 
493 	page_settoxic(pp, PAGE_IS_FAULTY);
494 	return (page_retire(pp, PAGE_IS_FAILING) ? EAGAIN : 0);
495 }
496 
497 #ifdef __sparc
498 /*
499  * Given a syndrome, syndrome type, and address return the
500  * associated memory name in the provided data buffer.
501  */
502 static int
503 mmioctl_get_mem_name(intptr_t data)
504 {
505 	mem_name_t mem_name;
506 #ifdef	_SYSCALL32
507 	mem_name32_t mem_name32;
508 #endif
509 	void *buf;
510 	size_t bufsize;
511 	int len, err;
512 
513 	if ((bufsize = cpu_get_name_bufsize()) == 0)
514 		return (ENOTSUP);
515 
516 	if (get_udatamodel() == DATAMODEL_NATIVE) {
517 		if (copyin((void *)data, &mem_name, sizeof (mem_name_t)))
518 			return (EFAULT);
519 	}
520 #ifdef	_SYSCALL32
521 	else {
522 		if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
523 			return (EFAULT);
524 		mem_name.m_addr = mem_name32.m_addr;
525 		mem_name.m_synd = mem_name32.m_synd;
526 		mem_name.m_type[0] = mem_name32.m_type[0];
527 		mem_name.m_type[1] = mem_name32.m_type[1];
528 		mem_name.m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
529 		mem_name.m_namelen = (size_t)mem_name32.m_namelen;
530 	}
531 #endif	/* _SYSCALL32 */
532 
533 	buf = kmem_alloc(bufsize, KM_SLEEP);
534 
535 	/*
536 	 * Call into cpu specific code to do the lookup.
537 	 */
538 	if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
539 	    mem_name.m_addr, buf, bufsize, &len)) != 0) {
540 		kmem_free(buf, bufsize);
541 		return (err);
542 	}
543 
544 	if (len >= mem_name.m_namelen) {
545 		kmem_free(buf, bufsize);
546 		return (ENAMETOOLONG);
547 	}
548 
549 	if (copyoutstr(buf, (char *)mem_name.m_name,
550 	    mem_name.m_namelen, NULL) != 0) {
551 		kmem_free(buf, bufsize);
552 		return (EFAULT);
553 	}
554 
555 	kmem_free(buf, bufsize);
556 	return (0);
557 }
558 
559 /*
560  * Given a syndrome and address return information about the associated memory.
561  */
562 static int
563 mmioctl_get_mem_info(intptr_t data)
564 {
565 	mem_info_t mem_info;
566 	int err;
567 
568 	if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
569 		return (EFAULT);
570 
571 	if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
572 	    &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
573 	    &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
574 		return (err);
575 
576 	if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
577 		return (EFAULT);
578 
579 	return (0);
580 }
581 #endif	/* __sparc */
582 
583 /*
584  * Private ioctls for
585  *	libkvm to support kvm_physaddr().
586  *	FMA support for page_retire() and memory attribute information.
587  */
588 /*ARGSUSED*/
589 static int
590 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
591 {
592 	switch (cmd) {
593 	case MEM_VTOP:
594 		if (getminor(dev) != M_KMEM)
595 			return (ENXIO);
596 		return (mmioctl_vtop(data));
597 
598 	case MEM_PAGE_RETIRE:
599 	case MEM_PAGE_ISRETIRED:
600 		if (getminor(dev) != M_MEM)
601 			return (ENXIO);
602 		return (mmioctl_page_retire(cmd, data));
603 
604 	case MEM_NAME:
605 		if (getminor(dev) != M_MEM)
606 			return (ENXIO);
607 #ifdef __sparc
608 		return (mmioctl_get_mem_name(data));
609 #else
610 		return (ENOTSUP);
611 #endif
612 
613 	case MEM_INFO:
614 		if (getminor(dev) != M_MEM)
615 			return (ENXIO);
616 #ifdef __sparc
617 		return (mmioctl_get_mem_info(data));
618 #else
619 		return (ENOTSUP);
620 #endif
621 	}
622 	return (ENXIO);
623 }
624 
625 /*ARGSUSED2*/
626 static int
627 mmmmap(dev_t dev, off_t off, int prot)
628 {
629 	pfn_t pf;
630 	struct memlist *pmem;
631 	minor_t minor = getminor(dev);
632 
633 	switch (minor) {
634 	case M_MEM:
635 		pf = btop(off);
636 		memlist_read_lock();
637 		for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
638 			if (pf >= BTOP(pmem->address) &&
639 			    pf < BTOP(pmem->address + pmem->size)) {
640 				memlist_read_unlock();
641 				return (impl_obmem_pfnum(pf));
642 			}
643 		}
644 		memlist_read_unlock();
645 		break;
646 
647 	case M_KMEM:
648 	case M_ALLKMEM:
649 		/* no longer supported with KPR */
650 		return (-1);
651 
652 	case M_ZERO:
653 		/*
654 		 * We shouldn't be mmap'ing to /dev/zero here as
655 		 * mmsegmap() should have already converted
656 		 * a mapping request for this device to a mapping
657 		 * using seg_vn for anonymous memory.
658 		 */
659 		break;
660 
661 	}
662 	return (-1);
663 }
664 
665 /*
666  * This function is called when a memory device is mmap'ed.
667  * Set up the mapping to the correct device driver.
668  */
669 static int
670 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
671     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
672 {
673 	struct segvn_crargs vn_a;
674 	struct segdev_crargs dev_a;
675 	int error;
676 	minor_t minor;
677 	off_t i;
678 
679 	minor = getminor(dev);
680 
681 	as_rangelock(as);
682 	if ((flags & MAP_FIXED) == 0) {
683 		/*
684 		 * No need to worry about vac alignment on /dev/zero
685 		 * since this is a "clone" object that doesn't yet exist.
686 		 */
687 		map_addr(addrp, len, (offset_t)off,
688 				(minor == M_MEM) || (minor == M_KMEM), flags);
689 
690 		if (*addrp == NULL) {
691 			as_rangeunlock(as);
692 			return (ENOMEM);
693 		}
694 	} else {
695 		/*
696 		 * User specified address -
697 		 * Blow away any previous mappings.
698 		 */
699 		(void) as_unmap(as, *addrp, len);
700 	}
701 
702 	switch (minor) {
703 	case M_MEM:
704 		/* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
705 		if ((flags & MAP_TYPE) != MAP_SHARED) {
706 			as_rangeunlock(as);
707 			return (EINVAL);
708 		}
709 
710 		/*
711 		 * Check to ensure that the entire range is
712 		 * legal and we are not trying to map in
713 		 * more than the device will let us.
714 		 */
715 		for (i = 0; i < len; i += PAGESIZE) {
716 			if (mmmmap(dev, off + i, maxprot) == -1) {
717 				as_rangeunlock(as);
718 				return (ENXIO);
719 			}
720 		}
721 
722 		/*
723 		 * Use seg_dev segment driver for /dev/mem mapping.
724 		 */
725 		dev_a.mapfunc = mmmmap;
726 		dev_a.dev = dev;
727 		dev_a.offset = off;
728 		dev_a.type = (flags & MAP_TYPE);
729 		dev_a.prot = (uchar_t)prot;
730 		dev_a.maxprot = (uchar_t)maxprot;
731 		dev_a.hat_attr = 0;
732 
733 		/*
734 		 * Make /dev/mem mappings non-consistent since we can't
735 		 * alias pages that don't have page structs behind them,
736 		 * such as kernel stack pages. If someone mmap()s a kernel
737 		 * stack page and if we give him a tte with cv, a line from
738 		 * that page can get into both pages of the spitfire d$.
739 		 * But snoop from another processor will only invalidate
740 		 * the first page. This later caused kernel (xc_attention)
741 		 * to go into an infinite loop at pil 13 and no interrupts
742 		 * could come in. See 1203630.
743 		 *
744 		 */
745 		dev_a.hat_flags = HAT_LOAD_NOCONSIST;
746 		dev_a.devmap_data = NULL;
747 
748 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
749 		break;
750 
751 	case M_ZERO:
752 		/*
753 		 * Use seg_vn segment driver for /dev/zero mapping.
754 		 * Passing in a NULL amp gives us the "cloning" effect.
755 		 */
756 		vn_a.vp = NULL;
757 		vn_a.offset = 0;
758 		vn_a.type = (flags & MAP_TYPE);
759 		vn_a.prot = prot;
760 		vn_a.maxprot = maxprot;
761 		vn_a.flags = flags & ~MAP_TYPE;
762 		vn_a.cred = cred;
763 		vn_a.amp = NULL;
764 		vn_a.szc = 0;
765 		vn_a.lgrp_mem_policy_flags = 0;
766 		error = as_map(as, *addrp, len, segvn_create, &vn_a);
767 		break;
768 
769 	case M_KMEM:
770 	case M_ALLKMEM:
771 		/* No longer supported with KPR. */
772 		error = ENXIO;
773 		break;
774 
775 	case M_NULL:
776 		/*
777 		 * Use seg_dev segment driver for /dev/null mapping.
778 		 */
779 		dev_a.mapfunc = mmmmap;
780 		dev_a.dev = dev;
781 		dev_a.offset = off;
782 		dev_a.type = 0;		/* neither PRIVATE nor SHARED */
783 		dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
784 		dev_a.hat_attr = 0;
785 		dev_a.hat_flags = 0;
786 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
787 		break;
788 
789 	default:
790 		error = ENXIO;
791 	}
792 
793 	as_rangeunlock(as);
794 	return (error);
795 }
796 
797 static struct cb_ops mm_cb_ops = {
798 	mmopen,			/* open */
799 	nulldev,		/* close */
800 	nodev,			/* strategy */
801 	nodev,			/* print */
802 	nodev,			/* dump */
803 	mmread,			/* read */
804 	mmwrite,		/* write */
805 	mmioctl,		/* ioctl */
806 	nodev,			/* devmap */
807 	mmmmap,			/* mmap */
808 	mmsegmap,		/* segmap */
809 	mmchpoll,		/* poll */
810 	mmpropop,		/* prop_op */
811 	0,			/* streamtab  */
812 	D_NEW | D_MP | D_64BIT | D_U64BIT
813 };
814 
815 static struct dev_ops mm_ops = {
816 	DEVO_REV,		/* devo_rev, */
817 	0,			/* refcnt  */
818 	mm_info,		/* get_dev_info */
819 	nulldev,		/* identify */
820 	nulldev,		/* probe */
821 	mm_attach,		/* attach */
822 	nodev,			/* detach */
823 	nodev,			/* reset */
824 	&mm_cb_ops,		/* driver operations */
825 	(struct bus_ops *)0	/* bus operations */
826 };
827 
828 static struct modldrv modldrv = {
829 	&mod_driverops, "memory driver %I%", &mm_ops,
830 };
831 
832 static struct modlinkage modlinkage = {
833 	MODREV_1, &modldrv, NULL
834 };
835 
836 int
837 _init(void)
838 {
839 	return (mod_install(&modlinkage));
840 }
841 
842 int
843 _info(struct modinfo *modinfop)
844 {
845 	return (mod_info(&modlinkage, modinfop));
846 }
847 
848 int
849 _fini(void)
850 {
851 	return (mod_remove(&modlinkage));
852 }
853 
854 static int
855 mm_kstat_update(kstat_t *ksp, int rw)
856 {
857 	struct memlist *pmem;
858 	uint_t count;
859 
860 	if (rw == KSTAT_WRITE)
861 		return (EACCES);
862 
863 	count = 0;
864 	memlist_read_lock();
865 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
866 		count++;
867 	}
868 	memlist_read_unlock();
869 
870 	ksp->ks_ndata = count;
871 	ksp->ks_data_size = count * 2 * sizeof (uint64_t);
872 
873 	return (0);
874 }
875 
876 static int
877 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
878 {
879 	struct memlist *pmem;
880 	struct memunit {
881 		uint64_t address;
882 		uint64_t size;
883 	} *kspmem;
884 
885 	if (rw == KSTAT_WRITE)
886 		return (EACCES);
887 
888 	ksp->ks_snaptime = gethrtime();
889 
890 	kspmem = (struct memunit *)buf;
891 	memlist_read_lock();
892 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) {
893 		if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
894 			break;
895 		kspmem->address = pmem->address;
896 		kspmem->size = pmem->size;
897 	}
898 	memlist_read_unlock();
899 
900 	return (0);
901 }
902