xref: /titanic_51/usr/src/uts/common/io/mem.c (revision fb9f9b975cb9214fec5dab37d461199adab9b964)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Memory special file
31  */
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/user.h>
36 #include <sys/buf.h>
37 #include <sys/systm.h>
38 #include <sys/cred.h>
39 #include <sys/vm.h>
40 #include <sys/uio.h>
41 #include <sys/mman.h>
42 #include <sys/kmem.h>
43 #include <vm/seg.h>
44 #include <vm/page.h>
45 #include <sys/stat.h>
46 #include <sys/vmem.h>
47 #include <sys/memlist.h>
48 #include <sys/bootconf.h>
49 
50 #include <vm/seg_vn.h>
51 #include <vm/seg_dev.h>
52 #include <vm/seg_kmem.h>
53 #include <vm/seg_kp.h>
54 #include <vm/seg_kpm.h>
55 #include <vm/hat.h>
56 
57 #include <sys/conf.h>
58 #include <sys/mem.h>
59 #include <sys/types.h>
60 #include <sys/conf.h>
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/errno.h>
64 #include <sys/modctl.h>
65 #include <sys/memlist.h>
66 #include <sys/ddi.h>
67 #include <sys/sunddi.h>
68 #include <sys/debug.h>
69 
70 #ifdef __sparc
71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
73     uint64_t *, int *, int *, int *);
74 extern size_t cpu_get_name_bufsize(void);
75 #endif
76 
77 /*
78  * Turn a byte length into a pagecount.  The DDI btop takes a
79  * 32-bit size on 32-bit machines, this handles 64-bit sizes for
80  * large physical-memory 32-bit machines.
81  */
82 #define	BTOP(x)	((pgcnt_t)((x) >> _pageshift))
83 
84 static kmutex_t mm_lock;
85 static caddr_t mm_map;
86 
87 static dev_info_t *mm_dip;	/* private copy of devinfo pointer */
88 
89 static int mm_kmem_io_access;
90 
91 static int mm_kstat_update(kstat_t *ksp, int rw);
92 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
93 
94 /*ARGSUSED1*/
95 static int
96 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
97 {
98 	int i;
99 	struct mem_minor {
100 		char *name;
101 		minor_t minor;
102 		int privonly;
103 		const char *rdpriv;
104 		const char *wrpriv;
105 		mode_t priv_mode;
106 	} mm[] = {
107 		{ "mem",	M_MEM,		0,	NULL,	"all",	0640 },
108 		{ "kmem",	M_KMEM,		0,	NULL,	"all",	0640 },
109 		{ "allkmem",	M_ALLKMEM,	0,	"all",	"all",	0600 },
110 		{ "null",	M_NULL,	PRIVONLY_DEV,	NULL,	NULL,	0666 },
111 		{ "zero",	M_ZERO, PRIVONLY_DEV,	NULL,	NULL,	0666 },
112 	};
113 	kstat_t *ksp;
114 
115 	mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
116 	mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
117 
118 	for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
119 		if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
120 		    mm[i].minor, DDI_PSEUDO, mm[i].privonly,
121 		    mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
122 		    DDI_FAILURE) {
123 			ddi_remove_minor_node(devi, NULL);
124 			return (DDI_FAILURE);
125 		}
126 	}
127 
128 	mm_dip = devi;
129 
130 	ksp = kstat_create("mm", 0, "phys_installed", "misc",
131 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
132 	if (ksp != NULL) {
133 		ksp->ks_update = mm_kstat_update;
134 		ksp->ks_snapshot = mm_kstat_snapshot;
135 		ksp->ks_lock = &mm_lock; /* XXX - not really needed */
136 		kstat_install(ksp);
137 	}
138 
139 	mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
140 	    "kmem_io_access", 0);
141 
142 	return (DDI_SUCCESS);
143 }
144 
145 /*ARGSUSED*/
146 static int
147 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
148 {
149 	register int error;
150 
151 	switch (infocmd) {
152 	case DDI_INFO_DEVT2DEVINFO:
153 		*result = (void *)mm_dip;
154 		error = DDI_SUCCESS;
155 		break;
156 	case DDI_INFO_DEVT2INSTANCE:
157 		*result = (void *)0;
158 		error = DDI_SUCCESS;
159 		break;
160 	default:
161 		error = DDI_FAILURE;
162 	}
163 	return (error);
164 }
165 
166 /*ARGSUSED1*/
167 static int
168 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
169 {
170 	switch (getminor(*devp)) {
171 	case M_NULL:
172 	case M_ZERO:
173 	case M_MEM:
174 	case M_KMEM:
175 	case M_ALLKMEM:
176 		/* standard devices */
177 		break;
178 
179 	default:
180 		/* Unsupported or unknown type */
181 		return (EINVAL);
182 	}
183 	return (0);
184 }
185 
186 struct pollhead	mm_pollhd;
187 
188 /*ARGSUSED*/
189 static int
190 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
191     struct pollhead **phpp)
192 {
193 	switch (getminor(dev)) {
194 	case M_NULL:
195 	case M_ZERO:
196 	case M_MEM:
197 	case M_KMEM:
198 	case M_ALLKMEM:
199 		*reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
200 			POLLWRNORM | POLLRDBAND | POLLWRBAND);
201 		/*
202 		 * A non NULL pollhead pointer should be returned in case
203 		 * user polls for 0 events.
204 		 */
205 		*phpp = !anyyet && !*reventsp ?
206 		    &mm_pollhd : (struct pollhead *)NULL;
207 		return (0);
208 	default:
209 		/* no other devices currently support polling */
210 		return (ENXIO);
211 	}
212 }
213 
214 static int
215 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
216     char *name, caddr_t valuep, int *lengthp)
217 {
218 	/*
219 	 * implement zero size to reduce overhead (avoid two failing
220 	 * property lookups per stat).
221 	 */
222 	return (ddi_prop_op_size(dev, dip, prop_op,
223 	    flags, name, valuep, lengthp, 0));
224 }
225 
226 static int
227 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio)
228 {
229 	int error = 0;
230 	size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
231 	    (size_t)uio->uio_iov->iov_len);
232 
233 	mutex_enter(&mm_lock);
234 	hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
235 	    (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE),
236 	    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
237 
238 	if (!pf_is_memory(pfn)) {
239 		if (allowio) {
240 			size_t c = uio->uio_iov->iov_len;
241 
242 			if (ddi_peekpokeio(NULL, uio, rw,
243 			    (caddr_t)(uintptr_t)uio->uio_loffset, c,
244 			    sizeof (int32_t)) != DDI_SUCCESS)
245 				error = EFAULT;
246 		} else
247 			error = EIO;
248 	} else
249 		error = uiomove(&mm_map[pageoff], nbytes, rw, uio);
250 
251 	hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
252 	mutex_exit(&mm_lock);
253 	return (error);
254 }
255 
256 #ifdef	__sparc
257 
258 static int
259 mmpagelock(struct as *as, caddr_t va)
260 {
261 	struct seg *seg;
262 	int i;
263 
264 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
265 	seg = as_segat(as, va);
266 	i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
267 	AS_LOCK_EXIT(as, &as->a_lock);
268 
269 	return (i);
270 }
271 
272 #define	NEED_LOCK_KVADDR(kva)	mmpagelock(&kas, kva)
273 
274 #else	/* __i386, __amd64 */
275 
276 #define	NEED_LOCK_KVADDR(va)	0
277 
278 #endif	/* __sparc */
279 
280 /*ARGSUSED3*/
281 static int
282 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
283 {
284 	pfn_t v;
285 	struct iovec *iov;
286 	int error = 0;
287 	size_t c;
288 	ssize_t oresid = uio->uio_resid;
289 	minor_t minor = getminor(dev);
290 
291 	while (uio->uio_resid > 0 && error == 0) {
292 		iov = uio->uio_iov;
293 		if (iov->iov_len == 0) {
294 			uio->uio_iov++;
295 			uio->uio_iovcnt--;
296 			if (uio->uio_iovcnt < 0)
297 				panic("mmrw");
298 			continue;
299 		}
300 		switch (minor) {
301 
302 		case M_MEM:
303 			memlist_read_lock();
304 			if (!address_in_memlist(phys_install,
305 			    (uint64_t)uio->uio_loffset, 1)) {
306 				memlist_read_unlock();
307 				error = EFAULT;
308 				break;
309 			}
310 			memlist_read_unlock();
311 
312 			v = BTOP((u_offset_t)uio->uio_loffset);
313 			error = mmio(uio, rw, v,
314 			    uio->uio_loffset & PAGEOFFSET, 0);
315 			break;
316 
317 		case M_KMEM:
318 		case M_ALLKMEM:
319 			{
320 			page_t **ppp;
321 			caddr_t vaddr = (caddr_t)uio->uio_offset;
322 			int try_lock = NEED_LOCK_KVADDR(vaddr);
323 			int locked = 0;
324 
325 			/*
326 			 * If vaddr does not map a valid page, as_pagelock()
327 			 * will return failure. Hence we can't check the
328 			 * return value and return EFAULT here as we'd like.
329 			 * seg_kp and seg_kpm do not properly support
330 			 * as_pagelock() for this context so we avoid it
331 			 * using the try_lock set check above.  Some day when
332 			 * the kernel page locking gets redesigned all this
333 			 * muck can be cleaned up.
334 			 */
335 			if (try_lock)
336 				locked = (as_pagelock(&kas, &ppp, vaddr,
337 				    PAGESIZE, S_WRITE) == 0);
338 
339 			v = hat_getpfnum(kas.a_hat,
340 			    (caddr_t)(uintptr_t)uio->uio_loffset);
341 			if (v == PFN_INVALID) {
342 				if (locked)
343 					as_pageunlock(&kas, ppp, vaddr,
344 					    PAGESIZE, S_WRITE);
345 				error = EFAULT;
346 				break;
347 			}
348 
349 			error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
350 			    minor == M_ALLKMEM || mm_kmem_io_access);
351 			if (locked)
352 				as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
353 				    S_WRITE);
354 			}
355 
356 			break;
357 
358 		case M_ZERO:
359 			if (rw == UIO_READ) {
360 				label_t ljb;
361 
362 				if (on_fault(&ljb)) {
363 					no_fault();
364 					error = EFAULT;
365 					break;
366 				}
367 				uzero(iov->iov_base, iov->iov_len);
368 				no_fault();
369 				uio->uio_resid -= iov->iov_len;
370 				uio->uio_loffset += iov->iov_len;
371 				break;
372 			}
373 			/* else it's a write, fall through to NULL case */
374 			/*FALLTHROUGH*/
375 
376 		case M_NULL:
377 			if (rw == UIO_READ)
378 				return (0);
379 			c = iov->iov_len;
380 			iov->iov_base += c;
381 			iov->iov_len -= c;
382 			uio->uio_loffset += c;
383 			uio->uio_resid -= c;
384 			break;
385 
386 		}
387 	}
388 	return (uio->uio_resid == oresid ? error : 0);
389 }
390 
391 static int
392 mmread(dev_t dev, struct uio *uio, cred_t *cred)
393 {
394 	return (mmrw(dev, uio, UIO_READ, cred));
395 }
396 
397 static int
398 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
399 {
400 	return (mmrw(dev, uio, UIO_WRITE, cred));
401 }
402 
403 /*
404  * Private ioctl for libkvm to support kvm_physaddr().
405  * Given an address space and a VA, compute the PA.
406  */
407 static int
408 mmioctl_vtop(intptr_t data)
409 {
410 	mem_vtop_t mem_vtop;
411 	proc_t *p;
412 	pfn_t pfn = (pfn_t)PFN_INVALID;
413 	pid_t pid = 0;
414 	struct as *as;
415 	struct seg *seg;
416 
417 	if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
418 		return (EFAULT);
419 	if (mem_vtop.m_as == &kas) {
420 		pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
421 	} else if (mem_vtop.m_as == NULL) {
422 		return (EIO);
423 	} else {
424 		mutex_enter(&pidlock);
425 		for (p = practive; p != NULL; p = p->p_next) {
426 			if (p->p_as == mem_vtop.m_as) {
427 				pid = p->p_pid;
428 				break;
429 			}
430 		}
431 		mutex_exit(&pidlock);
432 		if (p == NULL)
433 			return (EIO);
434 		p = sprlock(pid);
435 		if (p == NULL)
436 			return (EIO);
437 		as = p->p_as;
438 		if (as == mem_vtop.m_as) {
439 			mutex_exit(&p->p_lock);
440 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
441 			for (seg = AS_SEGFIRST(as); seg != NULL;
442 			    seg = AS_SEGNEXT(as, seg))
443 				if ((uintptr_t)mem_vtop.m_va -
444 				    (uintptr_t)seg->s_base < seg->s_size)
445 					break;
446 			if (seg != NULL)
447 				pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
448 			AS_LOCK_EXIT(as, &as->a_lock);
449 			mutex_enter(&p->p_lock);
450 		}
451 		sprunlock(p);
452 	}
453 	mem_vtop.m_pfn = pfn;
454 	if (pfn == PFN_INVALID)
455 		return (EIO);
456 	if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
457 		return (EFAULT);
458 
459 	return (0);
460 }
461 
462 /*
463  * Given a PA, execute the given page retire command on it.
464  */
465 static int
466 mmioctl_page_retire(int cmd, intptr_t data)
467 {
468 	extern int page_retire_test(void);
469 	uint64_t pa;
470 
471 	if (copyin((void *)data, &pa, sizeof (uint64_t))) {
472 		return (EFAULT);
473 	}
474 
475 	switch (cmd) {
476 	case MEM_PAGE_ISRETIRED:
477 		return (page_retire_check(pa, NULL));
478 
479 	case MEM_PAGE_UNRETIRE:
480 		return (page_unretire(pa));
481 
482 	case MEM_PAGE_RETIRE:
483 		return (page_retire(pa, PR_FMA));
484 
485 	case MEM_PAGE_RETIRE_MCE:
486 		return (page_retire(pa, PR_MCE));
487 
488 	case MEM_PAGE_RETIRE_UE:
489 		return (page_retire(pa, PR_UE));
490 
491 	case MEM_PAGE_GETERRORS:
492 		{
493 			uint64_t page_errors;
494 			int rc = page_retire_check(pa, &page_errors);
495 			if (copyout(&page_errors, (void *)data,
496 			    sizeof (uint64_t))) {
497 				return (EFAULT);
498 			}
499 			return (rc);
500 		}
501 
502 	case MEM_PAGE_RETIRE_TEST:
503 		return (page_retire_test());
504 
505 	}
506 
507 	return (EINVAL);
508 }
509 
510 #ifdef __sparc
511 /*
512  * Given a syndrome, syndrome type, and address return the
513  * associated memory name in the provided data buffer.
514  */
515 static int
516 mmioctl_get_mem_name(intptr_t data)
517 {
518 	mem_name_t mem_name;
519 #ifdef	_SYSCALL32
520 	mem_name32_t mem_name32;
521 #endif
522 	void *buf;
523 	size_t bufsize;
524 	int len, err;
525 
526 	if ((bufsize = cpu_get_name_bufsize()) == 0)
527 		return (ENOTSUP);
528 
529 	if (get_udatamodel() == DATAMODEL_NATIVE) {
530 		if (copyin((void *)data, &mem_name, sizeof (mem_name_t)))
531 			return (EFAULT);
532 	}
533 #ifdef	_SYSCALL32
534 	else {
535 		if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
536 			return (EFAULT);
537 		mem_name.m_addr = mem_name32.m_addr;
538 		mem_name.m_synd = mem_name32.m_synd;
539 		mem_name.m_type[0] = mem_name32.m_type[0];
540 		mem_name.m_type[1] = mem_name32.m_type[1];
541 		mem_name.m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
542 		mem_name.m_namelen = (size_t)mem_name32.m_namelen;
543 	}
544 #endif	/* _SYSCALL32 */
545 
546 	buf = kmem_alloc(bufsize, KM_SLEEP);
547 
548 	/*
549 	 * Call into cpu specific code to do the lookup.
550 	 */
551 	if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
552 	    mem_name.m_addr, buf, bufsize, &len)) != 0) {
553 		kmem_free(buf, bufsize);
554 		return (err);
555 	}
556 
557 	if (len >= mem_name.m_namelen) {
558 		kmem_free(buf, bufsize);
559 		return (ENAMETOOLONG);
560 	}
561 
562 	if (copyoutstr(buf, (char *)mem_name.m_name,
563 	    mem_name.m_namelen, NULL) != 0) {
564 		kmem_free(buf, bufsize);
565 		return (EFAULT);
566 	}
567 
568 	kmem_free(buf, bufsize);
569 	return (0);
570 }
571 
572 /*
573  * Given a syndrome and address return information about the associated memory.
574  */
575 static int
576 mmioctl_get_mem_info(intptr_t data)
577 {
578 	mem_info_t mem_info;
579 	int err;
580 
581 	if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
582 		return (EFAULT);
583 
584 	if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
585 	    &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
586 	    &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
587 		return (err);
588 
589 	if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
590 		return (EFAULT);
591 
592 	return (0);
593 }
594 #endif	/* __sparc */
595 
596 /*
597  * Private ioctls for
598  *	libkvm to support kvm_physaddr().
599  *	FMA support for page_retire() and memory attribute information.
600  */
601 /*ARGSUSED*/
602 static int
603 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
604 {
605 	switch (cmd) {
606 	case MEM_VTOP:
607 		if (getminor(dev) != M_KMEM)
608 			return (ENXIO);
609 		return (mmioctl_vtop(data));
610 
611 	case MEM_PAGE_RETIRE:
612 	case MEM_PAGE_ISRETIRED:
613 	case MEM_PAGE_UNRETIRE:
614 	case MEM_PAGE_RETIRE_MCE:
615 	case MEM_PAGE_RETIRE_UE:
616 	case MEM_PAGE_GETERRORS:
617 	case MEM_PAGE_RETIRE_TEST:
618 		if (getminor(dev) != M_MEM)
619 			return (ENXIO);
620 		return (mmioctl_page_retire(cmd, data));
621 
622 	case MEM_NAME:
623 		if (getminor(dev) != M_MEM)
624 			return (ENXIO);
625 #ifdef __sparc
626 		return (mmioctl_get_mem_name(data));
627 #else
628 		return (ENOTSUP);
629 #endif
630 
631 	case MEM_INFO:
632 		if (getminor(dev) != M_MEM)
633 			return (ENXIO);
634 #ifdef __sparc
635 		return (mmioctl_get_mem_info(data));
636 #else
637 		return (ENOTSUP);
638 #endif
639 	}
640 	return (ENXIO);
641 }
642 
643 /*ARGSUSED2*/
644 static int
645 mmmmap(dev_t dev, off_t off, int prot)
646 {
647 	pfn_t pf;
648 	struct memlist *pmem;
649 	minor_t minor = getminor(dev);
650 
651 	switch (minor) {
652 	case M_MEM:
653 		pf = btop(off);
654 		memlist_read_lock();
655 		for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
656 			if (pf >= BTOP(pmem->address) &&
657 			    pf < BTOP(pmem->address + pmem->size)) {
658 				memlist_read_unlock();
659 				return (impl_obmem_pfnum(pf));
660 			}
661 		}
662 		memlist_read_unlock();
663 		break;
664 
665 	case M_KMEM:
666 	case M_ALLKMEM:
667 		/* no longer supported with KPR */
668 		return (-1);
669 
670 	case M_ZERO:
671 		/*
672 		 * We shouldn't be mmap'ing to /dev/zero here as
673 		 * mmsegmap() should have already converted
674 		 * a mapping request for this device to a mapping
675 		 * using seg_vn for anonymous memory.
676 		 */
677 		break;
678 
679 	}
680 	return (-1);
681 }
682 
683 /*
684  * This function is called when a memory device is mmap'ed.
685  * Set up the mapping to the correct device driver.
686  */
687 static int
688 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
689     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
690 {
691 	struct segvn_crargs vn_a;
692 	struct segdev_crargs dev_a;
693 	int error;
694 	minor_t minor;
695 	off_t i;
696 
697 	minor = getminor(dev);
698 
699 	as_rangelock(as);
700 	if ((flags & MAP_FIXED) == 0) {
701 		/*
702 		 * No need to worry about vac alignment on /dev/zero
703 		 * since this is a "clone" object that doesn't yet exist.
704 		 */
705 		map_addr(addrp, len, (offset_t)off,
706 				(minor == M_MEM) || (minor == M_KMEM), flags);
707 
708 		if (*addrp == NULL) {
709 			as_rangeunlock(as);
710 			return (ENOMEM);
711 		}
712 	} else {
713 		/*
714 		 * User specified address -
715 		 * Blow away any previous mappings.
716 		 */
717 		(void) as_unmap(as, *addrp, len);
718 	}
719 
720 	switch (minor) {
721 	case M_MEM:
722 		/* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
723 		if ((flags & MAP_TYPE) != MAP_SHARED) {
724 			as_rangeunlock(as);
725 			return (EINVAL);
726 		}
727 
728 		/*
729 		 * Check to ensure that the entire range is
730 		 * legal and we are not trying to map in
731 		 * more than the device will let us.
732 		 */
733 		for (i = 0; i < len; i += PAGESIZE) {
734 			if (mmmmap(dev, off + i, maxprot) == -1) {
735 				as_rangeunlock(as);
736 				return (ENXIO);
737 			}
738 		}
739 
740 		/*
741 		 * Use seg_dev segment driver for /dev/mem mapping.
742 		 */
743 		dev_a.mapfunc = mmmmap;
744 		dev_a.dev = dev;
745 		dev_a.offset = off;
746 		dev_a.type = (flags & MAP_TYPE);
747 		dev_a.prot = (uchar_t)prot;
748 		dev_a.maxprot = (uchar_t)maxprot;
749 		dev_a.hat_attr = 0;
750 
751 		/*
752 		 * Make /dev/mem mappings non-consistent since we can't
753 		 * alias pages that don't have page structs behind them,
754 		 * such as kernel stack pages. If someone mmap()s a kernel
755 		 * stack page and if we give him a tte with cv, a line from
756 		 * that page can get into both pages of the spitfire d$.
757 		 * But snoop from another processor will only invalidate
758 		 * the first page. This later caused kernel (xc_attention)
759 		 * to go into an infinite loop at pil 13 and no interrupts
760 		 * could come in. See 1203630.
761 		 *
762 		 */
763 		dev_a.hat_flags = HAT_LOAD_NOCONSIST;
764 		dev_a.devmap_data = NULL;
765 
766 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
767 		break;
768 
769 	case M_ZERO:
770 		/*
771 		 * Use seg_vn segment driver for /dev/zero mapping.
772 		 * Passing in a NULL amp gives us the "cloning" effect.
773 		 */
774 		vn_a.vp = NULL;
775 		vn_a.offset = 0;
776 		vn_a.type = (flags & MAP_TYPE);
777 		vn_a.prot = prot;
778 		vn_a.maxprot = maxprot;
779 		vn_a.flags = flags & ~MAP_TYPE;
780 		vn_a.cred = cred;
781 		vn_a.amp = NULL;
782 		vn_a.szc = 0;
783 		vn_a.lgrp_mem_policy_flags = 0;
784 		error = as_map(as, *addrp, len, segvn_create, &vn_a);
785 		break;
786 
787 	case M_KMEM:
788 	case M_ALLKMEM:
789 		/* No longer supported with KPR. */
790 		error = ENXIO;
791 		break;
792 
793 	case M_NULL:
794 		/*
795 		 * Use seg_dev segment driver for /dev/null mapping.
796 		 */
797 		dev_a.mapfunc = mmmmap;
798 		dev_a.dev = dev;
799 		dev_a.offset = off;
800 		dev_a.type = 0;		/* neither PRIVATE nor SHARED */
801 		dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
802 		dev_a.hat_attr = 0;
803 		dev_a.hat_flags = 0;
804 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
805 		break;
806 
807 	default:
808 		error = ENXIO;
809 	}
810 
811 	as_rangeunlock(as);
812 	return (error);
813 }
814 
815 static struct cb_ops mm_cb_ops = {
816 	mmopen,			/* open */
817 	nulldev,		/* close */
818 	nodev,			/* strategy */
819 	nodev,			/* print */
820 	nodev,			/* dump */
821 	mmread,			/* read */
822 	mmwrite,		/* write */
823 	mmioctl,		/* ioctl */
824 	nodev,			/* devmap */
825 	mmmmap,			/* mmap */
826 	mmsegmap,		/* segmap */
827 	mmchpoll,		/* poll */
828 	mmpropop,		/* prop_op */
829 	0,			/* streamtab  */
830 	D_NEW | D_MP | D_64BIT | D_U64BIT
831 };
832 
833 static struct dev_ops mm_ops = {
834 	DEVO_REV,		/* devo_rev, */
835 	0,			/* refcnt  */
836 	mm_info,		/* get_dev_info */
837 	nulldev,		/* identify */
838 	nulldev,		/* probe */
839 	mm_attach,		/* attach */
840 	nodev,			/* detach */
841 	nodev,			/* reset */
842 	&mm_cb_ops,		/* driver operations */
843 	(struct bus_ops *)0	/* bus operations */
844 };
845 
846 static struct modldrv modldrv = {
847 	&mod_driverops, "memory driver %I%", &mm_ops,
848 };
849 
850 static struct modlinkage modlinkage = {
851 	MODREV_1, &modldrv, NULL
852 };
853 
854 int
855 _init(void)
856 {
857 	return (mod_install(&modlinkage));
858 }
859 
860 int
861 _info(struct modinfo *modinfop)
862 {
863 	return (mod_info(&modlinkage, modinfop));
864 }
865 
866 int
867 _fini(void)
868 {
869 	return (mod_remove(&modlinkage));
870 }
871 
872 static int
873 mm_kstat_update(kstat_t *ksp, int rw)
874 {
875 	struct memlist *pmem;
876 	uint_t count;
877 
878 	if (rw == KSTAT_WRITE)
879 		return (EACCES);
880 
881 	count = 0;
882 	memlist_read_lock();
883 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
884 		count++;
885 	}
886 	memlist_read_unlock();
887 
888 	ksp->ks_ndata = count;
889 	ksp->ks_data_size = count * 2 * sizeof (uint64_t);
890 
891 	return (0);
892 }
893 
894 static int
895 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
896 {
897 	struct memlist *pmem;
898 	struct memunit {
899 		uint64_t address;
900 		uint64_t size;
901 	} *kspmem;
902 
903 	if (rw == KSTAT_WRITE)
904 		return (EACCES);
905 
906 	ksp->ks_snaptime = gethrtime();
907 
908 	kspmem = (struct memunit *)buf;
909 	memlist_read_lock();
910 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) {
911 		if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
912 			break;
913 		kspmem->address = pmem->address;
914 		kspmem->size = pmem->size;
915 	}
916 	memlist_read_unlock();
917 
918 	return (0);
919 }
920