1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Memory special file
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/vm.h>
37 #include <sys/uio.h>
38 #include <sys/mman.h>
39 #include <sys/kmem.h>
40 #include <vm/seg.h>
41 #include <vm/page.h>
42 #include <sys/stat.h>
43 #include <sys/vmem.h>
44 #include <sys/memlist.h>
45 #include <sys/bootconf.h>
46
47 #include <vm/seg_vn.h>
48 #include <vm/seg_dev.h>
49 #include <vm/seg_kmem.h>
50 #include <vm/seg_kp.h>
51 #include <vm/seg_kpm.h>
52 #include <vm/hat.h>
53
54 #include <sys/conf.h>
55 #include <sys/mem.h>
56 #include <sys/types.h>
57 #include <sys/conf.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/errno.h>
61 #include <sys/modctl.h>
62 #include <sys/memlist.h>
63 #include <sys/ddi.h>
64 #include <sys/sunddi.h>
65 #include <sys/debug.h>
66 #include <sys/fm/protocol.h>
67
68 #if defined(__sparc)
69 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
70 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
71 uint64_t *, int *, int *, int *);
72 extern size_t cpu_get_name_bufsize(void);
73 extern int cpu_get_mem_sid(char *, char *, int, int *);
74 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
75 #elif defined(__x86)
76 #include <sys/cpu_module.h>
77 #endif /* __sparc */
78
79 /*
80 * Turn a byte length into a pagecount. The DDI btop takes a
81 * 32-bit size on 32-bit machines, this handles 64-bit sizes for
82 * large physical-memory 32-bit machines.
83 */
84 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift))
85
86 static kmutex_t mm_lock;
87 static caddr_t mm_map;
88
89 static dev_info_t *mm_dip; /* private copy of devinfo pointer */
90
91 static int mm_kmem_io_access;
92
93 static int mm_kstat_update(kstat_t *ksp, int rw);
94 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
95
96 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name);
97
98 /*ARGSUSED1*/
99 static int
mm_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)100 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
101 {
102 int i;
103 struct mem_minor {
104 char *name;
105 minor_t minor;
106 int privonly;
107 const char *rdpriv;
108 const char *wrpriv;
109 mode_t priv_mode;
110 } mm[] = {
111 { "mem", M_MEM, 0, NULL, "all", 0640 },
112 { "kmem", M_KMEM, 0, NULL, "all", 0640 },
113 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 },
114 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 },
115 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 },
116 };
117 kstat_t *ksp;
118
119 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
120 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
121
122 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
123 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
124 mm[i].minor, DDI_PSEUDO, mm[i].privonly,
125 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
126 DDI_FAILURE) {
127 ddi_remove_minor_node(devi, NULL);
128 return (DDI_FAILURE);
129 }
130 }
131
132 mm_dip = devi;
133
134 ksp = kstat_create("mm", 0, "phys_installed", "misc",
135 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
136 if (ksp != NULL) {
137 ksp->ks_update = mm_kstat_update;
138 ksp->ks_snapshot = mm_kstat_snapshot;
139 ksp->ks_lock = &mm_lock; /* XXX - not really needed */
140 kstat_install(ksp);
141 }
142
143 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
144 "kmem_io_access", 0);
145
146 return (DDI_SUCCESS);
147 }
148
149 /*ARGSUSED*/
150 static int
mm_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)151 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
152 {
153 register int error;
154
155 switch (infocmd) {
156 case DDI_INFO_DEVT2DEVINFO:
157 *result = (void *)mm_dip;
158 error = DDI_SUCCESS;
159 break;
160 case DDI_INFO_DEVT2INSTANCE:
161 *result = (void *)0;
162 error = DDI_SUCCESS;
163 break;
164 default:
165 error = DDI_FAILURE;
166 }
167 return (error);
168 }
169
170 /*ARGSUSED1*/
171 static int
mmopen(dev_t * devp,int flag,int typ,struct cred * cred)172 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
173 {
174 switch (getminor(*devp)) {
175 case M_NULL:
176 case M_ZERO:
177 case M_MEM:
178 case M_KMEM:
179 case M_ALLKMEM:
180 /* standard devices */
181 break;
182
183 default:
184 /* Unsupported or unknown type */
185 return (EINVAL);
186 }
187 /* must be character device */
188 if (typ != OTYP_CHR)
189 return (EINVAL);
190 return (0);
191 }
192
193 struct pollhead mm_pollhd;
194
195 /*ARGSUSED*/
196 static int
mmchpoll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)197 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
198 struct pollhead **phpp)
199 {
200 switch (getminor(dev)) {
201 case M_NULL:
202 case M_ZERO:
203 case M_MEM:
204 case M_KMEM:
205 case M_ALLKMEM:
206 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
207 POLLWRNORM | POLLRDBAND | POLLWRBAND);
208 /*
209 * A non NULL pollhead pointer should be returned in case
210 * user polls for 0 events.
211 */
212 *phpp = !anyyet && !*reventsp ?
213 &mm_pollhd : (struct pollhead *)NULL;
214 return (0);
215 default:
216 /* no other devices currently support polling */
217 return (ENXIO);
218 }
219 }
220
221 static int
mmpropop(dev_t dev,dev_info_t * dip,ddi_prop_op_t prop_op,int flags,char * name,caddr_t valuep,int * lengthp)222 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
223 char *name, caddr_t valuep, int *lengthp)
224 {
225 /*
226 * implement zero size to reduce overhead (avoid two failing
227 * property lookups per stat).
228 */
229 return (ddi_prop_op_size(dev, dip, prop_op,
230 flags, name, valuep, lengthp, 0));
231 }
232
233 static int
mmio(struct uio * uio,enum uio_rw rw,pfn_t pfn,off_t pageoff,int allowio,page_t * pp)234 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
235 page_t *pp)
236 {
237 int error = 0;
238 int devload = 0;
239 int is_memory = pf_is_memory(pfn);
240 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
241 (size_t)uio->uio_iov->iov_len);
242 caddr_t va = NULL;
243
244 mutex_enter(&mm_lock);
245
246 if (is_memory && kpm_enable) {
247 if (pp)
248 va = hat_kpm_mapin(pp, NULL);
249 else
250 va = hat_kpm_mapin_pfn(pfn);
251 }
252
253 if (va == NULL) {
254 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
255 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
256 HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
257 va = mm_map;
258 devload = 1;
259 }
260
261 if (!is_memory) {
262 if (allowio) {
263 size_t c = uio->uio_iov->iov_len;
264
265 if (ddi_peekpokeio(NULL, uio, rw,
266 (caddr_t)(uintptr_t)uio->uio_loffset, c,
267 sizeof (int32_t)) != DDI_SUCCESS)
268 error = EFAULT;
269 } else
270 error = EIO;
271 } else
272 error = uiomove(va + pageoff, nbytes, rw, uio);
273
274 if (devload)
275 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
276 else if (pp)
277 hat_kpm_mapout(pp, NULL, va);
278 else
279 hat_kpm_mapout_pfn(pfn);
280
281 mutex_exit(&mm_lock);
282 return (error);
283 }
284
285 static int
mmpagelock(struct as * as,caddr_t va)286 mmpagelock(struct as *as, caddr_t va)
287 {
288 struct seg *seg;
289 int i;
290
291 AS_LOCK_ENTER(as, RW_READER);
292 seg = as_segat(as, va);
293 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
294 AS_LOCK_EXIT(as);
295
296 return (i);
297 }
298
299 #ifdef __sparc
300
301 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva)
302
303 #else /* __i386, __amd64 */
304
305 #define NEED_LOCK_KVADDR(va) 0
306
307 #endif /* __sparc */
308
309 /*ARGSUSED3*/
310 static int
mmrw(dev_t dev,struct uio * uio,enum uio_rw rw,cred_t * cred)311 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
312 {
313 pfn_t v;
314 struct iovec *iov;
315 int error = 0;
316 size_t c;
317 ssize_t oresid = uio->uio_resid;
318 minor_t minor = getminor(dev);
319
320 while (uio->uio_resid > 0 && error == 0) {
321 iov = uio->uio_iov;
322 if (iov->iov_len == 0) {
323 uio->uio_iov++;
324 uio->uio_iovcnt--;
325 if (uio->uio_iovcnt < 0)
326 panic("mmrw");
327 continue;
328 }
329 switch (minor) {
330
331 case M_MEM:
332 memlist_read_lock();
333 if (!address_in_memlist(phys_install,
334 (uint64_t)uio->uio_loffset, 1)) {
335 memlist_read_unlock();
336 error = EFAULT;
337 break;
338 }
339 memlist_read_unlock();
340
341 v = BTOP((u_offset_t)uio->uio_loffset);
342 error = mmio(uio, rw, v,
343 uio->uio_loffset & PAGEOFFSET, 0, NULL);
344 break;
345
346 case M_KMEM:
347 case M_ALLKMEM:
348 {
349 page_t **ppp = NULL;
350 caddr_t vaddr = (caddr_t)uio->uio_offset;
351 int try_lock = NEED_LOCK_KVADDR(vaddr);
352 int locked = 0;
353
354 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
355 break;
356
357 /*
358 * If vaddr does not map a valid page, as_pagelock()
359 * will return failure. Hence we can't check the
360 * return value and return EFAULT here as we'd like.
361 * seg_kp and seg_kpm do not properly support
362 * as_pagelock() for this context so we avoid it
363 * using the try_lock set check above. Some day when
364 * the kernel page locking gets redesigned all this
365 * muck can be cleaned up.
366 */
367 if (try_lock)
368 locked = (as_pagelock(&kas, &ppp, vaddr,
369 PAGESIZE, S_WRITE) == 0);
370
371 v = hat_getpfnum(kas.a_hat,
372 (caddr_t)(uintptr_t)uio->uio_loffset);
373 if (v == PFN_INVALID) {
374 if (locked)
375 as_pageunlock(&kas, ppp, vaddr,
376 PAGESIZE, S_WRITE);
377 error = EFAULT;
378 break;
379 }
380
381 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
382 minor == M_ALLKMEM || mm_kmem_io_access,
383 (locked && ppp) ? *ppp : NULL);
384 if (locked)
385 as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
386 S_WRITE);
387 }
388
389 break;
390
391 case M_ZERO:
392 if (rw == UIO_READ) {
393 label_t ljb;
394
395 if (on_fault(&ljb)) {
396 no_fault();
397 error = EFAULT;
398 break;
399 }
400 uzero(iov->iov_base, iov->iov_len);
401 no_fault();
402 uio->uio_resid -= iov->iov_len;
403 uio->uio_loffset += iov->iov_len;
404 break;
405 }
406 /* else it's a write, fall through to NULL case */
407 /*FALLTHROUGH*/
408
409 case M_NULL:
410 if (rw == UIO_READ)
411 return (0);
412 c = iov->iov_len;
413 iov->iov_base += c;
414 iov->iov_len -= c;
415 uio->uio_loffset += c;
416 uio->uio_resid -= c;
417 break;
418
419 }
420 }
421 return (uio->uio_resid == oresid ? error : 0);
422 }
423
424 static int
mmread(dev_t dev,struct uio * uio,cred_t * cred)425 mmread(dev_t dev, struct uio *uio, cred_t *cred)
426 {
427 return (mmrw(dev, uio, UIO_READ, cred));
428 }
429
430 static int
mmwrite(dev_t dev,struct uio * uio,cred_t * cred)431 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
432 {
433 return (mmrw(dev, uio, UIO_WRITE, cred));
434 }
435
436 /*
437 * Private ioctl for libkvm to support kvm_physaddr().
438 * Given an address space and a VA, compute the PA.
439 */
440 static int
mmioctl_vtop(intptr_t data)441 mmioctl_vtop(intptr_t data)
442 {
443 #ifdef _SYSCALL32
444 mem_vtop32_t vtop32;
445 #endif
446 mem_vtop_t mem_vtop;
447 proc_t *p;
448 pfn_t pfn = (pfn_t)PFN_INVALID;
449 pid_t pid = 0;
450 struct as *as;
451 struct seg *seg;
452
453 if (get_udatamodel() == DATAMODEL_NATIVE) {
454 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
455 return (EFAULT);
456 }
457 #ifdef _SYSCALL32
458 else {
459 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
460 return (EFAULT);
461 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
462 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
463
464 if (mem_vtop.m_as != NULL)
465 return (EINVAL);
466 }
467 #endif
468
469 if (mem_vtop.m_as == &kas) {
470 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
471 } else {
472 if (mem_vtop.m_as == NULL) {
473 /*
474 * Assume the calling process's address space if the
475 * caller didn't specify one.
476 */
477 p = curthread->t_procp;
478 if (p == NULL)
479 return (EIO);
480 mem_vtop.m_as = p->p_as;
481 }
482
483 mutex_enter(&pidlock);
484 for (p = practive; p != NULL; p = p->p_next) {
485 if (p->p_as == mem_vtop.m_as) {
486 pid = p->p_pid;
487 break;
488 }
489 }
490 mutex_exit(&pidlock);
491 if (p == NULL)
492 return (EIO);
493 p = sprlock(pid);
494 if (p == NULL)
495 return (EIO);
496 as = p->p_as;
497 if (as == mem_vtop.m_as) {
498 mutex_exit(&p->p_lock);
499 AS_LOCK_ENTER(as, RW_READER);
500 for (seg = AS_SEGFIRST(as); seg != NULL;
501 seg = AS_SEGNEXT(as, seg))
502 if ((uintptr_t)mem_vtop.m_va -
503 (uintptr_t)seg->s_base < seg->s_size)
504 break;
505 if (seg != NULL)
506 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
507 AS_LOCK_EXIT(as);
508 mutex_enter(&p->p_lock);
509 }
510 sprunlock(p);
511 }
512 mem_vtop.m_pfn = pfn;
513 if (pfn == PFN_INVALID)
514 return (EIO);
515
516 if (get_udatamodel() == DATAMODEL_NATIVE) {
517 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
518 return (EFAULT);
519 }
520 #ifdef _SYSCALL32
521 else {
522 vtop32.m_pfn = mem_vtop.m_pfn;
523 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
524 return (EFAULT);
525 }
526 #endif
527
528 return (0);
529 }
530
531 /*
532 * Given a PA, execute the given page retire command on it.
533 */
534 static int
mmioctl_page_retire(int cmd,intptr_t data)535 mmioctl_page_retire(int cmd, intptr_t data)
536 {
537 extern int page_retire_test(void);
538 uint64_t pa;
539
540 if (copyin((void *)data, &pa, sizeof (uint64_t))) {
541 return (EFAULT);
542 }
543
544 switch (cmd) {
545 case MEM_PAGE_ISRETIRED:
546 return (page_retire_check(pa, NULL));
547
548 case MEM_PAGE_UNRETIRE:
549 return (page_unretire(pa));
550
551 case MEM_PAGE_RETIRE:
552 return (page_retire(pa, PR_FMA));
553
554 case MEM_PAGE_RETIRE_MCE:
555 return (page_retire(pa, PR_MCE));
556
557 case MEM_PAGE_RETIRE_UE:
558 return (page_retire(pa, PR_UE));
559
560 case MEM_PAGE_GETERRORS:
561 {
562 uint64_t page_errors;
563 int rc = page_retire_check(pa, &page_errors);
564 if (copyout(&page_errors, (void *)data,
565 sizeof (uint64_t))) {
566 return (EFAULT);
567 }
568 return (rc);
569 }
570
571 case MEM_PAGE_RETIRE_TEST:
572 return (page_retire_test());
573
574 }
575
576 return (EINVAL);
577 }
578
579 #ifdef __sparc
580 /*
581 * Given a syndrome, syndrome type, and address return the
582 * associated memory name in the provided data buffer.
583 */
584 static int
mmioctl_get_mem_name(intptr_t data)585 mmioctl_get_mem_name(intptr_t data)
586 {
587 mem_name_t mem_name;
588 void *buf;
589 size_t bufsize;
590 int len, err;
591
592 if ((bufsize = cpu_get_name_bufsize()) == 0)
593 return (ENOTSUP);
594
595 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
596 return (err);
597
598 buf = kmem_alloc(bufsize, KM_SLEEP);
599
600 /*
601 * Call into cpu specific code to do the lookup.
602 */
603 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
604 mem_name.m_addr, buf, bufsize, &len)) != 0) {
605 kmem_free(buf, bufsize);
606 return (err);
607 }
608
609 if (len >= mem_name.m_namelen) {
610 kmem_free(buf, bufsize);
611 return (ENOSPC);
612 }
613
614 if (copyoutstr(buf, (char *)mem_name.m_name,
615 mem_name.m_namelen, NULL) != 0) {
616 kmem_free(buf, bufsize);
617 return (EFAULT);
618 }
619
620 kmem_free(buf, bufsize);
621 return (0);
622 }
623
624 /*
625 * Given a syndrome and address return information about the associated memory.
626 */
627 static int
mmioctl_get_mem_info(intptr_t data)628 mmioctl_get_mem_info(intptr_t data)
629 {
630 mem_info_t mem_info;
631 int err;
632
633 if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
634 return (EFAULT);
635
636 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
637 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
638 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
639 return (err);
640
641 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
642 return (EFAULT);
643
644 return (0);
645 }
646
647 /*
648 * Given a memory name, return its associated serial id
649 */
650 static int
mmioctl_get_mem_sid(intptr_t data)651 mmioctl_get_mem_sid(intptr_t data)
652 {
653 mem_name_t mem_name;
654 void *buf;
655 void *name;
656 size_t name_len;
657 size_t bufsize;
658 int len, err;
659
660 if ((bufsize = cpu_get_name_bufsize()) == 0)
661 return (ENOTSUP);
662
663 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
664 return (err);
665
666 buf = kmem_alloc(bufsize, KM_SLEEP);
667
668 if (mem_name.m_namelen > 1024)
669 mem_name.m_namelen = 1024; /* cap at 1024 bytes */
670
671 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP);
672
673 if ((err = copyinstr((char *)mem_name.m_name, (char *)name,
674 mem_name.m_namelen, &name_len)) != 0) {
675 kmem_free(buf, bufsize);
676 kmem_free(name, mem_name.m_namelen);
677 return (err);
678 }
679
680 /*
681 * Call into cpu specific code to do the lookup.
682 */
683 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) {
684 kmem_free(buf, bufsize);
685 kmem_free(name, mem_name.m_namelen);
686 return (err);
687 }
688
689 if (len > mem_name.m_sidlen) {
690 kmem_free(buf, bufsize);
691 kmem_free(name, mem_name.m_namelen);
692 return (ENAMETOOLONG);
693 }
694
695 if (copyoutstr(buf, (char *)mem_name.m_sid,
696 mem_name.m_sidlen, NULL) != 0) {
697 kmem_free(buf, bufsize);
698 kmem_free(name, mem_name.m_namelen);
699 return (EFAULT);
700 }
701
702 kmem_free(buf, bufsize);
703 kmem_free(name, mem_name.m_namelen);
704 return (0);
705 }
706 #endif /* __sparc */
707
708 /*
709 * Private ioctls for
710 * libkvm to support kvm_physaddr().
711 * FMA support for page_retire() and memory attribute information.
712 */
713 /*ARGSUSED*/
714 static int
mmioctl(dev_t dev,int cmd,intptr_t data,int flag,cred_t * cred,int * rvalp)715 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
716 {
717 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
718 (cmd != MEM_VTOP && getminor(dev) != M_MEM))
719 return (ENXIO);
720
721 switch (cmd) {
722 case MEM_VTOP:
723 return (mmioctl_vtop(data));
724
725 case MEM_PAGE_RETIRE:
726 case MEM_PAGE_ISRETIRED:
727 case MEM_PAGE_UNRETIRE:
728 case MEM_PAGE_RETIRE_MCE:
729 case MEM_PAGE_RETIRE_UE:
730 case MEM_PAGE_GETERRORS:
731 case MEM_PAGE_RETIRE_TEST:
732 return (mmioctl_page_retire(cmd, data));
733
734 #ifdef __sparc
735 case MEM_NAME:
736 return (mmioctl_get_mem_name(data));
737
738 case MEM_INFO:
739 return (mmioctl_get_mem_info(data));
740
741 case MEM_SID:
742 return (mmioctl_get_mem_sid(data));
743 #else
744 case MEM_NAME:
745 case MEM_INFO:
746 case MEM_SID:
747 return (ENOTSUP);
748 #endif /* __sparc */
749 }
750 return (ENXIO);
751 }
752
753 /*ARGSUSED2*/
754 static int
mmmmap(dev_t dev,off_t off,int prot)755 mmmmap(dev_t dev, off_t off, int prot)
756 {
757 pfn_t pf;
758 struct memlist *pmem;
759 minor_t minor = getminor(dev);
760
761 switch (minor) {
762 case M_MEM:
763 pf = btop(off);
764 memlist_read_lock();
765 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
766 if (pf >= BTOP(pmem->ml_address) &&
767 pf < BTOP(pmem->ml_address + pmem->ml_size)) {
768 memlist_read_unlock();
769 return (impl_obmem_pfnum(pf));
770 }
771 }
772 memlist_read_unlock();
773 break;
774
775 case M_KMEM:
776 case M_ALLKMEM:
777 /* no longer supported with KPR */
778 return (-1);
779
780 case M_ZERO:
781 /*
782 * We shouldn't be mmap'ing to /dev/zero here as
783 * mmsegmap() should have already converted
784 * a mapping request for this device to a mapping
785 * using seg_vn for anonymous memory.
786 */
787 break;
788
789 }
790 return (-1);
791 }
792
793 /*
794 * This function is called when a memory device is mmap'ed.
795 * Set up the mapping to the correct device driver.
796 */
797 static int
mmsegmap(dev_t dev,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred)798 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
799 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
800 {
801 struct segvn_crargs vn_a;
802 struct segdev_crargs dev_a;
803 int error;
804 minor_t minor;
805 off_t i;
806
807 minor = getminor(dev);
808
809 as_rangelock(as);
810 /*
811 * No need to worry about vac alignment on /dev/zero
812 * since this is a "clone" object that doesn't yet exist.
813 */
814 error = choose_addr(as, addrp, len, off,
815 (minor == M_MEM) || (minor == M_KMEM), flags);
816 if (error != 0) {
817 as_rangeunlock(as);
818 return (error);
819 }
820
821 switch (minor) {
822 case M_MEM:
823 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
824 if ((flags & MAP_TYPE) != MAP_SHARED) {
825 as_rangeunlock(as);
826 return (EINVAL);
827 }
828
829 /*
830 * Check to ensure that the entire range is
831 * legal and we are not trying to map in
832 * more than the device will let us.
833 */
834 for (i = 0; i < len; i += PAGESIZE) {
835 if (mmmmap(dev, off + i, maxprot) == -1) {
836 as_rangeunlock(as);
837 return (ENXIO);
838 }
839 }
840
841 /*
842 * Use seg_dev segment driver for /dev/mem mapping.
843 */
844 dev_a.mapfunc = mmmmap;
845 dev_a.dev = dev;
846 dev_a.offset = off;
847 dev_a.type = (flags & MAP_TYPE);
848 dev_a.prot = (uchar_t)prot;
849 dev_a.maxprot = (uchar_t)maxprot;
850 dev_a.hat_attr = 0;
851
852 /*
853 * Make /dev/mem mappings non-consistent since we can't
854 * alias pages that don't have page structs behind them,
855 * such as kernel stack pages. If someone mmap()s a kernel
856 * stack page and if we give him a tte with cv, a line from
857 * that page can get into both pages of the spitfire d$.
858 * But snoop from another processor will only invalidate
859 * the first page. This later caused kernel (xc_attention)
860 * to go into an infinite loop at pil 13 and no interrupts
861 * could come in. See 1203630.
862 *
863 */
864 dev_a.hat_flags = HAT_LOAD_NOCONSIST;
865 dev_a.devmap_data = NULL;
866
867 error = as_map(as, *addrp, len, segdev_create, &dev_a);
868 break;
869
870 case M_ZERO:
871 /*
872 * Use seg_vn segment driver for /dev/zero mapping.
873 * Passing in a NULL amp gives us the "cloning" effect.
874 */
875 vn_a.vp = NULL;
876 vn_a.offset = 0;
877 vn_a.type = (flags & MAP_TYPE);
878 vn_a.prot = prot;
879 vn_a.maxprot = maxprot;
880 vn_a.flags = flags & ~MAP_TYPE;
881 vn_a.cred = cred;
882 vn_a.amp = NULL;
883 vn_a.szc = 0;
884 vn_a.lgrp_mem_policy_flags = 0;
885 error = as_map(as, *addrp, len, segvn_create, &vn_a);
886 break;
887
888 case M_KMEM:
889 case M_ALLKMEM:
890 /* No longer supported with KPR. */
891 error = ENXIO;
892 break;
893
894 case M_NULL:
895 /*
896 * Use seg_dev segment driver for /dev/null mapping.
897 */
898 dev_a.mapfunc = mmmmap;
899 dev_a.dev = dev;
900 dev_a.offset = off;
901 dev_a.type = 0; /* neither PRIVATE nor SHARED */
902 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
903 dev_a.hat_attr = 0;
904 dev_a.hat_flags = 0;
905 error = as_map(as, *addrp, len, segdev_create, &dev_a);
906 break;
907
908 default:
909 error = ENXIO;
910 }
911
912 as_rangeunlock(as);
913 return (error);
914 }
915
916 static struct cb_ops mm_cb_ops = {
917 mmopen, /* open */
918 nulldev, /* close */
919 nodev, /* strategy */
920 nodev, /* print */
921 nodev, /* dump */
922 mmread, /* read */
923 mmwrite, /* write */
924 mmioctl, /* ioctl */
925 nodev, /* devmap */
926 mmmmap, /* mmap */
927 mmsegmap, /* segmap */
928 mmchpoll, /* poll */
929 mmpropop, /* prop_op */
930 0, /* streamtab */
931 D_NEW | D_MP | D_64BIT | D_U64BIT
932 };
933
934 static struct dev_ops mm_ops = {
935 DEVO_REV, /* devo_rev, */
936 0, /* refcnt */
937 mm_info, /* get_dev_info */
938 nulldev, /* identify */
939 nulldev, /* probe */
940 mm_attach, /* attach */
941 nodev, /* detach */
942 nodev, /* reset */
943 &mm_cb_ops, /* driver operations */
944 (struct bus_ops *)0, /* bus operations */
945 NULL, /* power */
946 ddi_quiesce_not_needed, /* quiesce */
947 };
948
949 static struct modldrv modldrv = {
950 &mod_driverops, "memory driver", &mm_ops,
951 };
952
953 static struct modlinkage modlinkage = {
954 MODREV_1, &modldrv, NULL
955 };
956
957 int
_init(void)958 _init(void)
959 {
960 return (mod_install(&modlinkage));
961 }
962
963 int
_info(struct modinfo * modinfop)964 _info(struct modinfo *modinfop)
965 {
966 return (mod_info(&modlinkage, modinfop));
967 }
968
969 int
_fini(void)970 _fini(void)
971 {
972 return (mod_remove(&modlinkage));
973 }
974
975 static int
mm_kstat_update(kstat_t * ksp,int rw)976 mm_kstat_update(kstat_t *ksp, int rw)
977 {
978 struct memlist *pmem;
979 uint_t count;
980
981 if (rw == KSTAT_WRITE)
982 return (EACCES);
983
984 count = 0;
985 memlist_read_lock();
986 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
987 count++;
988 }
989 memlist_read_unlock();
990
991 ksp->ks_ndata = count;
992 ksp->ks_data_size = count * 2 * sizeof (uint64_t);
993
994 return (0);
995 }
996
997 static int
mm_kstat_snapshot(kstat_t * ksp,void * buf,int rw)998 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
999 {
1000 struct memlist *pmem;
1001 struct memunit {
1002 uint64_t address;
1003 uint64_t size;
1004 } *kspmem;
1005
1006 if (rw == KSTAT_WRITE)
1007 return (EACCES);
1008
1009 ksp->ks_snaptime = gethrtime();
1010
1011 kspmem = (struct memunit *)buf;
1012 memlist_read_lock();
1013 for (pmem = phys_install; pmem != NULL;
1014 pmem = pmem->ml_next, kspmem++) {
1015 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
1016 break;
1017 kspmem->address = pmem->ml_address;
1018 kspmem->size = pmem->ml_size;
1019 }
1020 memlist_read_unlock();
1021
1022 return (0);
1023 }
1024
1025 /*
1026 * Read a mem_name_t from user-space and store it in the mem_name_t
1027 * pointed to by the mem_name argument.
1028 */
1029 static int
mm_read_mem_name(intptr_t data,mem_name_t * mem_name)1030 mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
1031 {
1032 if (get_udatamodel() == DATAMODEL_NATIVE) {
1033 if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
1034 return (EFAULT);
1035 }
1036 #ifdef _SYSCALL32
1037 else {
1038 mem_name32_t mem_name32;
1039
1040 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
1041 return (EFAULT);
1042 mem_name->m_addr = mem_name32.m_addr;
1043 mem_name->m_synd = mem_name32.m_synd;
1044 mem_name->m_type[0] = mem_name32.m_type[0];
1045 mem_name->m_type[1] = mem_name32.m_type[1];
1046 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
1047 mem_name->m_namelen = (size_t)mem_name32.m_namelen;
1048 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
1049 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
1050 }
1051 #endif /* _SYSCALL32 */
1052
1053 return (0);
1054 }
1055