xref: /titanic_52/usr/src/uts/common/io/mem.c (revision 505d05c73a6e56769f263d4803b22eddd168ee24)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Memory special file
31  */
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/user.h>
36 #include <sys/buf.h>
37 #include <sys/systm.h>
38 #include <sys/cred.h>
39 #include <sys/vm.h>
40 #include <sys/uio.h>
41 #include <sys/mman.h>
42 #include <sys/kmem.h>
43 #include <vm/seg.h>
44 #include <vm/page.h>
45 #include <sys/stat.h>
46 #include <sys/vmem.h>
47 #include <sys/memlist.h>
48 #include <sys/bootconf.h>
49 
50 #include <vm/seg_vn.h>
51 #include <vm/seg_dev.h>
52 #include <vm/seg_kmem.h>
53 #include <vm/seg_kp.h>
54 #include <vm/seg_kpm.h>
55 #include <vm/hat.h>
56 
57 #include <sys/conf.h>
58 #include <sys/mem.h>
59 #include <sys/types.h>
60 #include <sys/conf.h>
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/errno.h>
64 #include <sys/modctl.h>
65 #include <sys/memlist.h>
66 #include <sys/ddi.h>
67 #include <sys/sunddi.h>
68 #include <sys/debug.h>
69 
70 #ifdef __sparc
71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
73     uint64_t *, int *, int *, int *);
74 extern size_t cpu_get_name_bufsize(void);
75 #endif
76 
77 /*
78  * Turn a byte length into a pagecount.  The DDI btop takes a
79  * 32-bit size on 32-bit machines, this handles 64-bit sizes for
80  * large physical-memory 32-bit machines.
81  */
82 #define	BTOP(x)	((pgcnt_t)((x) >> _pageshift))
83 
84 static kmutex_t mm_lock;
85 static caddr_t mm_map;
86 
87 static dev_info_t *mm_dip;	/* private copy of devinfo pointer */
88 
89 static int mm_kmem_io_access;
90 
91 static int mm_kstat_update(kstat_t *ksp, int rw);
92 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
93 
94 /*ARGSUSED1*/
95 static int
96 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
97 {
98 	int i;
99 	struct mem_minor {
100 		char *name;
101 		minor_t minor;
102 		int privonly;
103 		const char *rdpriv;
104 		const char *wrpriv;
105 		mode_t priv_mode;
106 	} mm[] = {
107 		{ "mem",	M_MEM,		0,	NULL,	"all",	0640 },
108 		{ "kmem",	M_KMEM,		0,	NULL,	"all",	0640 },
109 		{ "allkmem",	M_ALLKMEM,	0,	"all",	"all",	0600 },
110 		{ "null",	M_NULL,	PRIVONLY_DEV,	NULL,	NULL,	0666 },
111 		{ "zero",	M_ZERO, PRIVONLY_DEV,	NULL,	NULL,	0666 },
112 	};
113 	kstat_t *ksp;
114 
115 	mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
116 	mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
117 
118 	for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
119 		if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
120 		    mm[i].minor, DDI_PSEUDO, mm[i].privonly,
121 		    mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
122 		    DDI_FAILURE) {
123 			ddi_remove_minor_node(devi, NULL);
124 			return (DDI_FAILURE);
125 		}
126 	}
127 
128 	mm_dip = devi;
129 
130 	ksp = kstat_create("mm", 0, "phys_installed", "misc",
131 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
132 	if (ksp != NULL) {
133 		ksp->ks_update = mm_kstat_update;
134 		ksp->ks_snapshot = mm_kstat_snapshot;
135 		ksp->ks_lock = &mm_lock; /* XXX - not really needed */
136 		kstat_install(ksp);
137 	}
138 
139 	mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
140 	    "kmem_io_access", 0);
141 
142 	return (DDI_SUCCESS);
143 }
144 
145 /*ARGSUSED*/
146 static int
147 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
148 {
149 	register int error;
150 
151 	switch (infocmd) {
152 	case DDI_INFO_DEVT2DEVINFO:
153 		*result = (void *)mm_dip;
154 		error = DDI_SUCCESS;
155 		break;
156 	case DDI_INFO_DEVT2INSTANCE:
157 		*result = (void *)0;
158 		error = DDI_SUCCESS;
159 		break;
160 	default:
161 		error = DDI_FAILURE;
162 	}
163 	return (error);
164 }
165 
166 /*ARGSUSED1*/
167 static int
168 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
169 {
170 	switch (getminor(*devp)) {
171 	case M_NULL:
172 	case M_ZERO:
173 	case M_MEM:
174 	case M_KMEM:
175 	case M_ALLKMEM:
176 		/* standard devices */
177 		break;
178 
179 	default:
180 		/* Unsupported or unknown type */
181 		return (EINVAL);
182 	}
183 	return (0);
184 }
185 
186 struct pollhead	mm_pollhd;
187 
188 /*ARGSUSED*/
189 static int
190 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
191     struct pollhead **phpp)
192 {
193 	switch (getminor(dev)) {
194 	case M_NULL:
195 	case M_ZERO:
196 	case M_MEM:
197 	case M_KMEM:
198 	case M_ALLKMEM:
199 		*reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
200 			POLLWRNORM | POLLRDBAND | POLLWRBAND);
201 		/*
202 		 * A non NULL pollhead pointer should be returned in case
203 		 * user polls for 0 events.
204 		 */
205 		*phpp = !anyyet && !*reventsp ?
206 		    &mm_pollhd : (struct pollhead *)NULL;
207 		return (0);
208 	default:
209 		/* no other devices currently support polling */
210 		return (ENXIO);
211 	}
212 }
213 
214 static int
215 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
216     char *name, caddr_t valuep, int *lengthp)
217 {
218 	/*
219 	 * implement zero size to reduce overhead (avoid two failing
220 	 * property lookups per stat).
221 	 */
222 	return (ddi_prop_op_size(dev, dip, prop_op,
223 	    flags, name, valuep, lengthp, 0));
224 }
225 
226 static int
227 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio)
228 {
229 	int error = 0;
230 	size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
231 	    (size_t)uio->uio_iov->iov_len);
232 
233 	mutex_enter(&mm_lock);
234 	hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
235 	    (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE),
236 	    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
237 
238 	if (!pf_is_memory(pfn)) {
239 		if (allowio) {
240 			size_t c = uio->uio_iov->iov_len;
241 
242 			if (ddi_peekpokeio(NULL, uio, rw,
243 			    (caddr_t)(uintptr_t)uio->uio_loffset, c,
244 			    sizeof (int32_t)) != DDI_SUCCESS)
245 				error = EFAULT;
246 		} else
247 			error = EIO;
248 	} else
249 		error = uiomove(&mm_map[pageoff], nbytes, rw, uio);
250 
251 	hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
252 	mutex_exit(&mm_lock);
253 	return (error);
254 }
255 
256 #ifdef	__sparc
257 
258 static int
259 mmpagelock(struct as *as, caddr_t va)
260 {
261 	struct seg *seg;
262 	int i;
263 
264 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
265 	seg = as_segat(as, va);
266 	i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
267 	AS_LOCK_EXIT(as, &as->a_lock);
268 
269 	return (i);
270 }
271 
272 #define	NEED_LOCK_KVADDR(kva)	mmpagelock(&kas, kva)
273 
274 #else	/* __i386, __amd64 */
275 
276 #define	NEED_LOCK_KVADDR(va)	0
277 
278 #endif	/* __sparc */
279 
280 /*ARGSUSED3*/
281 static int
282 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
283 {
284 	pfn_t v;
285 	struct iovec *iov;
286 	int error = 0;
287 	size_t c;
288 	ssize_t oresid = uio->uio_resid;
289 	minor_t minor = getminor(dev);
290 
291 	while (uio->uio_resid > 0 && error == 0) {
292 		iov = uio->uio_iov;
293 		if (iov->iov_len == 0) {
294 			uio->uio_iov++;
295 			uio->uio_iovcnt--;
296 			if (uio->uio_iovcnt < 0)
297 				panic("mmrw");
298 			continue;
299 		}
300 		switch (minor) {
301 
302 		case M_MEM:
303 			memlist_read_lock();
304 			if (!address_in_memlist(phys_install,
305 			    (uint64_t)uio->uio_loffset, 1)) {
306 				memlist_read_unlock();
307 				error = EFAULT;
308 				break;
309 			}
310 			memlist_read_unlock();
311 
312 			v = BTOP((u_offset_t)uio->uio_loffset);
313 			error = mmio(uio, rw, v,
314 			    uio->uio_loffset & PAGEOFFSET, 0);
315 			break;
316 
317 		case M_KMEM:
318 		case M_ALLKMEM:
319 			{
320 			page_t **ppp;
321 			caddr_t vaddr = (caddr_t)uio->uio_offset;
322 			int try_lock = NEED_LOCK_KVADDR(vaddr);
323 			int locked = 0;
324 
325 			/*
326 			 * If vaddr does not map a valid page, as_pagelock()
327 			 * will return failure. Hence we can't check the
328 			 * return value and return EFAULT here as we'd like.
329 			 * seg_kp and seg_kpm do not properly support
330 			 * as_pagelock() for this context so we avoid it
331 			 * using the try_lock set check above.  Some day when
332 			 * the kernel page locking gets redesigned all this
333 			 * muck can be cleaned up.
334 			 */
335 			if (try_lock)
336 				locked = (as_pagelock(&kas, &ppp, vaddr,
337 				    PAGESIZE, S_WRITE) == 0);
338 
339 			v = hat_getpfnum(kas.a_hat,
340 			    (caddr_t)(uintptr_t)uio->uio_loffset);
341 			if (v == PFN_INVALID) {
342 				if (locked)
343 					as_pageunlock(&kas, ppp, vaddr,
344 					    PAGESIZE, S_WRITE);
345 				error = EFAULT;
346 				break;
347 			}
348 
349 			error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
350 			    minor == M_ALLKMEM || mm_kmem_io_access);
351 			if (locked)
352 				as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
353 				    S_WRITE);
354 			}
355 
356 			break;
357 
358 		case M_ZERO:
359 			if (rw == UIO_READ) {
360 				label_t ljb;
361 
362 				if (on_fault(&ljb)) {
363 					no_fault();
364 					error = EFAULT;
365 					break;
366 				}
367 				uzero(iov->iov_base, iov->iov_len);
368 				no_fault();
369 				uio->uio_resid -= iov->iov_len;
370 				uio->uio_loffset += iov->iov_len;
371 				break;
372 			}
373 			/* else it's a write, fall through to NULL case */
374 			/*FALLTHROUGH*/
375 
376 		case M_NULL:
377 			if (rw == UIO_READ)
378 				return (0);
379 			c = iov->iov_len;
380 			iov->iov_base += c;
381 			iov->iov_len -= c;
382 			uio->uio_loffset += c;
383 			uio->uio_resid -= c;
384 			break;
385 
386 		}
387 	}
388 	return (uio->uio_resid == oresid ? error : 0);
389 }
390 
391 static int
392 mmread(dev_t dev, struct uio *uio, cred_t *cred)
393 {
394 	return (mmrw(dev, uio, UIO_READ, cred));
395 }
396 
397 static int
398 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
399 {
400 	return (mmrw(dev, uio, UIO_WRITE, cred));
401 }
402 
403 /*
404  * Private ioctl for libkvm to support kvm_physaddr().
405  * Given an address space and a VA, compute the PA.
406  */
407 static int
408 mmioctl_vtop(intptr_t data)
409 {
410 	mem_vtop_t mem_vtop;
411 	proc_t *p;
412 	pfn_t pfn = (pfn_t)PFN_INVALID;
413 	pid_t pid = 0;
414 	struct as *as;
415 	struct seg *seg;
416 
417 	if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
418 		return (EFAULT);
419 	if (mem_vtop.m_as == &kas) {
420 		pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
421 	} else if (mem_vtop.m_as == NULL) {
422 		return (EIO);
423 	} else {
424 		mutex_enter(&pidlock);
425 		for (p = practive; p != NULL; p = p->p_next) {
426 			if (p->p_as == mem_vtop.m_as) {
427 				pid = p->p_pid;
428 				break;
429 			}
430 		}
431 		mutex_exit(&pidlock);
432 		if (p == NULL)
433 			return (EIO);
434 		p = sprlock(pid);
435 		if (p == NULL)
436 			return (EIO);
437 		as = p->p_as;
438 		if (as == mem_vtop.m_as) {
439 			mutex_exit(&p->p_lock);
440 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
441 			for (seg = AS_SEGFIRST(as); seg != NULL;
442 			    seg = AS_SEGNEXT(as, seg))
443 				if ((uintptr_t)mem_vtop.m_va -
444 				    (uintptr_t)seg->s_base < seg->s_size)
445 					break;
446 			if (seg != NULL)
447 				pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
448 			AS_LOCK_EXIT(as, &as->a_lock);
449 			mutex_enter(&p->p_lock);
450 		}
451 		sprunlock(p);
452 	}
453 	mem_vtop.m_pfn = pfn;
454 	if (pfn == PFN_INVALID)
455 		return (EIO);
456 	if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
457 		return (EFAULT);
458 
459 	return (0);
460 }
461 
462 /*
463  * Given a PA, retire that page or check whether it has already been retired.
464  */
465 static int
466 mmioctl_page_retire(int cmd, intptr_t data)
467 {
468 	uint64_t pa;
469 	pfn_t pfn;
470 	page_t *pp;
471 
472 	if (copyin((void *)data, &pa, sizeof (uint64_t)))
473 		return (EFAULT);
474 
475 	pfn = pa >> MMU_PAGESHIFT;
476 
477 	if (!pf_is_memory(pfn) || (pp = page_numtopp_nolock(pfn)) == NULL)
478 		return (EINVAL);
479 
480 	/*
481 	 * If we're checking, see if the page is retired; if not, confirm that
482 	 * its status is at least set to be failing.  If neither, return EIO.
483 	 */
484 	if (cmd == MEM_PAGE_ISRETIRED) {
485 		if (page_isretired(pp))
486 			return (0);
487 
488 		if (!page_isfailing(pp))
489 			return (EIO);
490 
491 		return (EAGAIN);
492 	}
493 
494 	/*
495 	 * Try to retire the page. If the retire fails, it will be scheduled to
496 	 * occur when the page is freed.  If this page is out of circulation
497 	 * already, or is in the process of being retired, we fail.
498 	 */
499 	if (page_isretired(pp) || page_isfailing(pp))
500 		return (EIO);
501 
502 	page_settoxic(pp, PAGE_IS_FAULTY);
503 	return (page_retire(pp, PAGE_IS_FAILING) ? EAGAIN : 0);
504 }
505 
506 #ifdef __sparc
507 /*
508  * Given a syndrome, syndrome type, and address return the
509  * associated memory name in the provided data buffer.
510  */
511 static int
512 mmioctl_get_mem_name(intptr_t data)
513 {
514 	mem_name_t mem_name;
515 #ifdef	_SYSCALL32
516 	mem_name32_t mem_name32;
517 #endif
518 	void *buf;
519 	size_t bufsize;
520 	int len, err;
521 
522 	if ((bufsize = cpu_get_name_bufsize()) == 0)
523 		return (ENOTSUP);
524 
525 	if (get_udatamodel() == DATAMODEL_NATIVE) {
526 		if (copyin((void *)data, &mem_name, sizeof (mem_name_t)))
527 			return (EFAULT);
528 	}
529 #ifdef	_SYSCALL32
530 	else {
531 		if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
532 			return (EFAULT);
533 		mem_name.m_addr = mem_name32.m_addr;
534 		mem_name.m_synd = mem_name32.m_synd;
535 		mem_name.m_type[0] = mem_name32.m_type[0];
536 		mem_name.m_type[1] = mem_name32.m_type[1];
537 		mem_name.m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
538 		mem_name.m_namelen = (size_t)mem_name32.m_namelen;
539 	}
540 #endif	/* _SYSCALL32 */
541 
542 	buf = kmem_alloc(bufsize, KM_SLEEP);
543 
544 	/*
545 	 * Call into cpu specific code to do the lookup.
546 	 */
547 	if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
548 	    mem_name.m_addr, buf, bufsize, &len)) != 0) {
549 		kmem_free(buf, bufsize);
550 		return (err);
551 	}
552 
553 	if (len >= mem_name.m_namelen) {
554 		kmem_free(buf, bufsize);
555 		return (ENAMETOOLONG);
556 	}
557 
558 	if (copyoutstr(buf, (char *)mem_name.m_name,
559 	    mem_name.m_namelen, NULL) != 0) {
560 		kmem_free(buf, bufsize);
561 		return (EFAULT);
562 	}
563 
564 	kmem_free(buf, bufsize);
565 	return (0);
566 }
567 
568 /*
569  * Given a syndrome and address return information about the associated memory.
570  */
571 static int
572 mmioctl_get_mem_info(intptr_t data)
573 {
574 	mem_info_t mem_info;
575 	int err;
576 
577 	if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
578 		return (EFAULT);
579 
580 	if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
581 	    &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
582 	    &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
583 		return (err);
584 
585 	if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
586 		return (EFAULT);
587 
588 	return (0);
589 }
590 #endif	/* __sparc */
591 
592 /*
593  * Private ioctls for
594  *	libkvm to support kvm_physaddr().
595  *	FMA support for page_retire() and memory attribute information.
596  */
597 /*ARGSUSED*/
598 static int
599 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
600 {
601 	switch (cmd) {
602 	case MEM_VTOP:
603 		if (getminor(dev) != M_KMEM)
604 			return (ENXIO);
605 		return (mmioctl_vtop(data));
606 
607 	case MEM_PAGE_RETIRE:
608 	case MEM_PAGE_ISRETIRED:
609 		if (getminor(dev) != M_MEM)
610 			return (ENXIO);
611 		return (mmioctl_page_retire(cmd, data));
612 
613 	case MEM_NAME:
614 		if (getminor(dev) != M_MEM)
615 			return (ENXIO);
616 #ifdef __sparc
617 		return (mmioctl_get_mem_name(data));
618 #else
619 		return (ENOTSUP);
620 #endif
621 
622 	case MEM_INFO:
623 		if (getminor(dev) != M_MEM)
624 			return (ENXIO);
625 #ifdef __sparc
626 		return (mmioctl_get_mem_info(data));
627 #else
628 		return (ENOTSUP);
629 #endif
630 	}
631 	return (ENXIO);
632 }
633 
634 /*ARGSUSED2*/
635 static int
636 mmmmap(dev_t dev, off_t off, int prot)
637 {
638 	pfn_t pf;
639 	struct memlist *pmem;
640 	minor_t minor = getminor(dev);
641 
642 	switch (minor) {
643 	case M_MEM:
644 		pf = btop(off);
645 		memlist_read_lock();
646 		for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
647 			if (pf >= BTOP(pmem->address) &&
648 			    pf < BTOP(pmem->address + pmem->size)) {
649 				memlist_read_unlock();
650 				return (impl_obmem_pfnum(pf));
651 			}
652 		}
653 		memlist_read_unlock();
654 		break;
655 
656 	case M_KMEM:
657 	case M_ALLKMEM:
658 		/* no longer supported with KPR */
659 		return (-1);
660 
661 	case M_ZERO:
662 		/*
663 		 * We shouldn't be mmap'ing to /dev/zero here as
664 		 * mmsegmap() should have already converted
665 		 * a mapping request for this device to a mapping
666 		 * using seg_vn for anonymous memory.
667 		 */
668 		break;
669 
670 	}
671 	return (-1);
672 }
673 
674 /*
675  * This function is called when a memory device is mmap'ed.
676  * Set up the mapping to the correct device driver.
677  */
678 static int
679 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
680     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
681 {
682 	struct segvn_crargs vn_a;
683 	struct segdev_crargs dev_a;
684 	int error;
685 	minor_t minor;
686 	off_t i;
687 
688 	minor = getminor(dev);
689 
690 	as_rangelock(as);
691 	if ((flags & MAP_FIXED) == 0) {
692 		/*
693 		 * No need to worry about vac alignment on /dev/zero
694 		 * since this is a "clone" object that doesn't yet exist.
695 		 */
696 		map_addr(addrp, len, (offset_t)off,
697 				(minor == M_MEM) || (minor == M_KMEM), flags);
698 
699 		if (*addrp == NULL) {
700 			as_rangeunlock(as);
701 			return (ENOMEM);
702 		}
703 	} else {
704 		/*
705 		 * User specified address -
706 		 * Blow away any previous mappings.
707 		 */
708 		(void) as_unmap(as, *addrp, len);
709 	}
710 
711 	switch (minor) {
712 	case M_MEM:
713 		/* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
714 		if ((flags & MAP_TYPE) != MAP_SHARED) {
715 			as_rangeunlock(as);
716 			return (EINVAL);
717 		}
718 
719 		/*
720 		 * Check to ensure that the entire range is
721 		 * legal and we are not trying to map in
722 		 * more than the device will let us.
723 		 */
724 		for (i = 0; i < len; i += PAGESIZE) {
725 			if (mmmmap(dev, off + i, maxprot) == -1) {
726 				as_rangeunlock(as);
727 				return (ENXIO);
728 			}
729 		}
730 
731 		/*
732 		 * Use seg_dev segment driver for /dev/mem mapping.
733 		 */
734 		dev_a.mapfunc = mmmmap;
735 		dev_a.dev = dev;
736 		dev_a.offset = off;
737 		dev_a.type = (flags & MAP_TYPE);
738 		dev_a.prot = (uchar_t)prot;
739 		dev_a.maxprot = (uchar_t)maxprot;
740 		dev_a.hat_attr = 0;
741 
742 		/*
743 		 * Make /dev/mem mappings non-consistent since we can't
744 		 * alias pages that don't have page structs behind them,
745 		 * such as kernel stack pages. If someone mmap()s a kernel
746 		 * stack page and if we give him a tte with cv, a line from
747 		 * that page can get into both pages of the spitfire d$.
748 		 * But snoop from another processor will only invalidate
749 		 * the first page. This later caused kernel (xc_attention)
750 		 * to go into an infinite loop at pil 13 and no interrupts
751 		 * could come in. See 1203630.
752 		 *
753 		 */
754 		dev_a.hat_flags = HAT_LOAD_NOCONSIST;
755 		dev_a.devmap_data = NULL;
756 
757 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
758 		break;
759 
760 	case M_ZERO:
761 		/*
762 		 * Use seg_vn segment driver for /dev/zero mapping.
763 		 * Passing in a NULL amp gives us the "cloning" effect.
764 		 */
765 		vn_a.vp = NULL;
766 		vn_a.offset = 0;
767 		vn_a.type = (flags & MAP_TYPE);
768 		vn_a.prot = prot;
769 		vn_a.maxprot = maxprot;
770 		vn_a.flags = flags & ~MAP_TYPE;
771 		vn_a.cred = cred;
772 		vn_a.amp = NULL;
773 		vn_a.szc = 0;
774 		vn_a.lgrp_mem_policy_flags = 0;
775 		error = as_map(as, *addrp, len, segvn_create, &vn_a);
776 		break;
777 
778 	case M_KMEM:
779 	case M_ALLKMEM:
780 		/* No longer supported with KPR. */
781 		error = ENXIO;
782 		break;
783 
784 	case M_NULL:
785 		/*
786 		 * Use seg_dev segment driver for /dev/null mapping.
787 		 */
788 		dev_a.mapfunc = mmmmap;
789 		dev_a.dev = dev;
790 		dev_a.offset = off;
791 		dev_a.type = 0;		/* neither PRIVATE nor SHARED */
792 		dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
793 		dev_a.hat_attr = 0;
794 		dev_a.hat_flags = 0;
795 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
796 		break;
797 
798 	default:
799 		error = ENXIO;
800 	}
801 
802 	as_rangeunlock(as);
803 	return (error);
804 }
805 
806 static struct cb_ops mm_cb_ops = {
807 	mmopen,			/* open */
808 	nulldev,		/* close */
809 	nodev,			/* strategy */
810 	nodev,			/* print */
811 	nodev,			/* dump */
812 	mmread,			/* read */
813 	mmwrite,		/* write */
814 	mmioctl,		/* ioctl */
815 	nodev,			/* devmap */
816 	mmmmap,			/* mmap */
817 	mmsegmap,		/* segmap */
818 	mmchpoll,		/* poll */
819 	mmpropop,		/* prop_op */
820 	0,			/* streamtab  */
821 	D_NEW | D_MP | D_64BIT | D_U64BIT
822 };
823 
824 static struct dev_ops mm_ops = {
825 	DEVO_REV,		/* devo_rev, */
826 	0,			/* refcnt  */
827 	mm_info,		/* get_dev_info */
828 	nulldev,		/* identify */
829 	nulldev,		/* probe */
830 	mm_attach,		/* attach */
831 	nodev,			/* detach */
832 	nodev,			/* reset */
833 	&mm_cb_ops,		/* driver operations */
834 	(struct bus_ops *)0	/* bus operations */
835 };
836 
837 static struct modldrv modldrv = {
838 	&mod_driverops, "memory driver %I%", &mm_ops,
839 };
840 
841 static struct modlinkage modlinkage = {
842 	MODREV_1, &modldrv, NULL
843 };
844 
845 int
846 _init(void)
847 {
848 	return (mod_install(&modlinkage));
849 }
850 
851 int
852 _info(struct modinfo *modinfop)
853 {
854 	return (mod_info(&modlinkage, modinfop));
855 }
856 
857 int
858 _fini(void)
859 {
860 	return (mod_remove(&modlinkage));
861 }
862 
863 static int
864 mm_kstat_update(kstat_t *ksp, int rw)
865 {
866 	struct memlist *pmem;
867 	uint_t count;
868 
869 	if (rw == KSTAT_WRITE)
870 		return (EACCES);
871 
872 	count = 0;
873 	memlist_read_lock();
874 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
875 		count++;
876 	}
877 	memlist_read_unlock();
878 
879 	ksp->ks_ndata = count;
880 	ksp->ks_data_size = count * 2 * sizeof (uint64_t);
881 
882 	return (0);
883 }
884 
885 static int
886 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
887 {
888 	struct memlist *pmem;
889 	struct memunit {
890 		uint64_t address;
891 		uint64_t size;
892 	} *kspmem;
893 
894 	if (rw == KSTAT_WRITE)
895 		return (EACCES);
896 
897 	ksp->ks_snaptime = gethrtime();
898 
899 	kspmem = (struct memunit *)buf;
900 	memlist_read_lock();
901 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) {
902 		if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
903 			break;
904 		kspmem->address = pmem->address;
905 		kspmem->size = pmem->size;
906 	}
907 	memlist_read_unlock();
908 
909 	return (0);
910 }
911