1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
25 */
26
27 /*
28 * Copyright 2017 Joyent, Inc.
29 * Copyright 2017 James S Blachly, MD <james.blachly@gmail.com>
30 */
31
32 /*
33 * Memory special file
34 */
35
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/user.h>
39 #include <sys/buf.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/vm.h>
43 #include <sys/uio.h>
44 #include <sys/mman.h>
45 #include <sys/kmem.h>
46 #include <vm/seg.h>
47 #include <vm/page.h>
48 #include <sys/stat.h>
49 #include <sys/vmem.h>
50 #include <sys/memlist.h>
51 #include <sys/bootconf.h>
52
53 #include <vm/seg_vn.h>
54 #include <vm/seg_dev.h>
55 #include <vm/seg_kmem.h>
56 #include <vm/seg_kp.h>
57 #include <vm/seg_kpm.h>
58 #include <vm/hat.h>
59
60 #include <sys/conf.h>
61 #include <sys/mem.h>
62 #include <sys/types.h>
63 #include <sys/conf.h>
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/errno.h>
67 #include <sys/modctl.h>
68 #include <sys/memlist.h>
69 #include <sys/ddi.h>
70 #include <sys/sunddi.h>
71 #include <sys/debug.h>
72 #include <sys/fm/protocol.h>
73
74 #if defined(__sparc)
75 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
76 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
77 uint64_t *, int *, int *, int *);
78 extern size_t cpu_get_name_bufsize(void);
79 extern int cpu_get_mem_sid(char *, char *, int, int *);
80 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
81 #elif defined(__x86)
82 #include <sys/cpu_module.h>
83 #endif /* __sparc */
84
85 /*
86 * Turn a byte length into a pagecount. The DDI btop takes a
87 * 32-bit size on 32-bit machines, this handles 64-bit sizes for
88 * large physical-memory 32-bit machines.
89 */
90 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift))
91
92 static kmutex_t mm_lock;
93 static caddr_t mm_map;
94
95 static dev_info_t *mm_dip; /* private copy of devinfo pointer */
96
97 static int mm_kmem_io_access;
98
99 static int mm_kstat_update(kstat_t *ksp, int rw);
100 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
101
102 #define MM_KMEMLOG_NENTRIES 64
103
104 static int mm_kmemlogent;
105 static mm_logentry_t mm_kmemlog[MM_KMEMLOG_NENTRIES];
106
107 /*
108 * On kmem/allmem writes, we log information that might be useful in the event
109 * that a write is errant (that is, due to operator error) and induces a later
110 * problem. Note that (in particular) in the event of such operator-induced
111 * corruption, a search over the kernel address space for the corrupted
112 * address will yield the ring buffer entry that recorded the write. And
113 * should it seem baroque or otherwise unnecessary, yes, we need this kind of
114 * auditing facility and yes, we learned that the hard way: disturbingly,
115 * there exist recommendations for "tuning" the system that involve writing to
116 * kernel memory addresses via the kernel debugger, and -- as we discovered --
117 * these can easily be applied incorrectly or unsafely, yielding an entirely
118 * undebuggable "can't happen" kind of panic.
119 */
120 static void
mm_logkmem(struct uio * uio)121 mm_logkmem(struct uio *uio)
122 {
123 mm_logentry_t *ent;
124 proc_t *p = curthread->t_procp;
125
126 mutex_enter(&mm_lock);
127
128 ent = &mm_kmemlog[mm_kmemlogent++];
129
130 if (mm_kmemlogent == MM_KMEMLOG_NENTRIES)
131 mm_kmemlogent = 0;
132
133 ent->mle_vaddr = (uintptr_t)uio->uio_loffset;
134 ent->mle_len = uio->uio_resid;
135 gethrestime(&ent->mle_hrestime);
136 ent->mle_hrtime = gethrtime();
137 ent->mle_pid = p->p_pidp->pid_id;
138
139 (void) strncpy(ent->mle_psargs,
140 p->p_user.u_psargs, sizeof (ent->mle_psargs));
141
142 mutex_exit(&mm_lock);
143 }
144
145 /*ARGSUSED1*/
146 static int
mm_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)147 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
148 {
149 int i;
150 struct mem_minor {
151 char *name;
152 minor_t minor;
153 int privonly;
154 const char *rdpriv;
155 const char *wrpriv;
156 mode_t priv_mode;
157 } mm[] = {
158 { "mem", M_MEM, 0, NULL, "all", 0640 },
159 { "kmem", M_KMEM, 0, NULL, "all", 0640 },
160 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 },
161 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 },
162 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 },
163 { "full", M_FULL, PRIVONLY_DEV, NULL, NULL, 0666 },
164 };
165 kstat_t *ksp;
166
167 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
168 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
169
170 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
171 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
172 mm[i].minor, DDI_PSEUDO, mm[i].privonly,
173 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
174 DDI_FAILURE) {
175 ddi_remove_minor_node(devi, NULL);
176 return (DDI_FAILURE);
177 }
178 }
179
180 mm_dip = devi;
181
182 ksp = kstat_create("mm", 0, "phys_installed", "misc",
183 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
184 if (ksp != NULL) {
185 ksp->ks_update = mm_kstat_update;
186 ksp->ks_snapshot = mm_kstat_snapshot;
187 ksp->ks_lock = &mm_lock; /* XXX - not really needed */
188 kstat_install(ksp);
189 }
190
191 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
192 "kmem_io_access", 0);
193
194 return (DDI_SUCCESS);
195 }
196
197 /*ARGSUSED*/
198 static int
mm_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)199 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
200 {
201 register int error;
202
203 switch (infocmd) {
204 case DDI_INFO_DEVT2DEVINFO:
205 *result = (void *)mm_dip;
206 error = DDI_SUCCESS;
207 break;
208 case DDI_INFO_DEVT2INSTANCE:
209 *result = (void *)0;
210 error = DDI_SUCCESS;
211 break;
212 default:
213 error = DDI_FAILURE;
214 }
215 return (error);
216 }
217
218 /*ARGSUSED1*/
219 static int
mmopen(dev_t * devp,int flag,int typ,struct cred * cred)220 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
221 {
222 switch (getminor(*devp)) {
223 case M_NULL:
224 case M_ZERO:
225 case M_FULL:
226 case M_MEM:
227 case M_KMEM:
228 case M_ALLKMEM:
229 /* standard devices */
230 break;
231
232 default:
233 /* Unsupported or unknown type */
234 return (EINVAL);
235 }
236 /* must be character device */
237 if (typ != OTYP_CHR)
238 return (EINVAL);
239 return (0);
240 }
241
242 struct pollhead mm_pollhd;
243
244 /*ARGSUSED*/
245 static int
mmchpoll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)246 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
247 struct pollhead **phpp)
248 {
249 switch (getminor(dev)) {
250 case M_NULL:
251 case M_ZERO:
252 case M_FULL:
253 case M_MEM:
254 case M_KMEM:
255 case M_ALLKMEM:
256 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
257 POLLWRNORM | POLLRDBAND | POLLWRBAND);
258 /*
259 * A non NULL pollhead pointer should be returned in case
260 * user polls for 0 events or is doing an edge-triggerd poll.
261 */
262 if ((!*reventsp && !anyyet) || (events & POLLET)) {
263 *phpp = &mm_pollhd;
264 }
265 return (0);
266 default:
267 /* no other devices currently support polling */
268 return (ENXIO);
269 }
270 }
271
272 static int
mmpropop(dev_t dev,dev_info_t * dip,ddi_prop_op_t prop_op,int flags,char * name,caddr_t valuep,int * lengthp)273 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
274 char *name, caddr_t valuep, int *lengthp)
275 {
276 /*
277 * implement zero size to reduce overhead (avoid two failing
278 * property lookups per stat).
279 */
280 return (ddi_prop_op_size(dev, dip, prop_op,
281 flags, name, valuep, lengthp, 0));
282 }
283
284 static int
mmio(struct uio * uio,enum uio_rw rw,pfn_t pfn,off_t pageoff,int allowio,page_t * pp)285 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
286 page_t *pp)
287 {
288 int error = 0;
289 int devload = 0;
290 int is_memory = pf_is_memory(pfn);
291 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
292 (size_t)uio->uio_iov->iov_len);
293 caddr_t va = NULL;
294
295 mutex_enter(&mm_lock);
296
297 if (is_memory && kpm_enable) {
298 if (pp)
299 va = hat_kpm_mapin(pp, NULL);
300 else
301 va = hat_kpm_mapin_pfn(pfn);
302 }
303
304 if (va == NULL) {
305 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
306 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
307 HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
308 va = mm_map;
309 devload = 1;
310 }
311
312 if (!is_memory) {
313 if (allowio) {
314 size_t c = uio->uio_iov->iov_len;
315
316 if (ddi_peekpokeio(NULL, uio, rw,
317 (caddr_t)(uintptr_t)uio->uio_loffset, c,
318 sizeof (int32_t)) != DDI_SUCCESS)
319 error = EFAULT;
320 } else
321 error = EIO;
322 } else
323 error = uiomove(va + pageoff, nbytes, rw, uio);
324
325 if (devload)
326 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
327 else if (pp)
328 hat_kpm_mapout(pp, NULL, va);
329 else
330 hat_kpm_mapout_pfn(pfn);
331
332 mutex_exit(&mm_lock);
333 return (error);
334 }
335
336 #ifdef __sparc
337
338 static int
mmpagelock(struct as * as,caddr_t va)339 mmpagelock(struct as *as, caddr_t va)
340 {
341 struct seg *seg;
342 int i;
343
344 AS_LOCK_ENTER(as, RW_READER);
345 seg = as_segat(as, va);
346 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
347 AS_LOCK_EXIT(as);
348
349 return (i);
350 }
351
352 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva)
353
354 #else
355
356 #define NEED_LOCK_KVADDR(va) 0
357
358 #endif /* __sparc */
359
360 /*ARGSUSED3*/
361 static int
mmrw(dev_t dev,struct uio * uio,enum uio_rw rw,cred_t * cred)362 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
363 {
364 pfn_t v;
365 struct iovec *iov;
366 int error = 0;
367 size_t c;
368 ssize_t oresid = uio->uio_resid;
369 minor_t minor = getminor(dev);
370
371 while (uio->uio_resid > 0 && error == 0) {
372 iov = uio->uio_iov;
373 if (iov->iov_len == 0) {
374 uio->uio_iov++;
375 uio->uio_iovcnt--;
376 if (uio->uio_iovcnt < 0)
377 panic("mmrw");
378 continue;
379 }
380 switch (minor) {
381
382 case M_MEM:
383 memlist_read_lock();
384 if (!address_in_memlist(phys_install,
385 (uint64_t)uio->uio_loffset, 1)) {
386 memlist_read_unlock();
387 error = EFAULT;
388 break;
389 }
390 memlist_read_unlock();
391
392 v = BTOP((u_offset_t)uio->uio_loffset);
393 error = mmio(uio, rw, v,
394 uio->uio_loffset & PAGEOFFSET, 0, NULL);
395 break;
396
397 case M_KMEM:
398 case M_ALLKMEM:
399 {
400 page_t **ppp = NULL;
401 caddr_t vaddr = (caddr_t)uio->uio_offset;
402 int try_lock = NEED_LOCK_KVADDR(vaddr);
403 int locked = 0;
404
405 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
406 break;
407
408 if (rw == UIO_WRITE)
409 mm_logkmem(uio);
410
411 /*
412 * If vaddr does not map a valid page, as_pagelock()
413 * will return failure. Hence we can't check the
414 * return value and return EFAULT here as we'd like.
415 * seg_kp and seg_kpm do not properly support
416 * as_pagelock() for this context so we avoid it
417 * using the try_lock set check above. Some day when
418 * the kernel page locking gets redesigned all this
419 * muck can be cleaned up.
420 */
421 if (try_lock)
422 locked = (as_pagelock(&kas, &ppp, vaddr,
423 PAGESIZE, S_WRITE) == 0);
424
425 v = hat_getpfnum(kas.a_hat,
426 (caddr_t)(uintptr_t)uio->uio_loffset);
427 if (v == PFN_INVALID) {
428 if (locked)
429 as_pageunlock(&kas, ppp, vaddr,
430 PAGESIZE, S_WRITE);
431 error = EFAULT;
432 break;
433 }
434
435 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
436 minor == M_ALLKMEM || mm_kmem_io_access,
437 (locked && ppp) ? *ppp : NULL);
438 if (locked)
439 as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
440 S_WRITE);
441 }
442
443 break;
444
445 case M_FULL:
446 if (rw == UIO_WRITE) {
447 error = ENOSPC;
448 break;
449 }
450 /* else it's a read, fall through to zero case */
451 /*FALLTHROUGH*/
452
453 case M_ZERO:
454 if (rw == UIO_READ) {
455 label_t ljb;
456
457 if (on_fault(&ljb)) {
458 no_fault();
459 error = EFAULT;
460 break;
461 }
462 uzero(iov->iov_base, iov->iov_len);
463 no_fault();
464 uio->uio_resid -= iov->iov_len;
465 uio->uio_loffset += iov->iov_len;
466 break;
467 }
468 /* else it's a write, fall through to NULL case */
469 /*FALLTHROUGH*/
470
471 case M_NULL:
472 if (rw == UIO_READ)
473 return (0);
474 c = iov->iov_len;
475 iov->iov_base += c;
476 iov->iov_len -= c;
477 uio->uio_loffset += c;
478 uio->uio_resid -= c;
479 break;
480
481 }
482 }
483 return (uio->uio_resid == oresid ? error : 0);
484 }
485
486 static int
mmread(dev_t dev,struct uio * uio,cred_t * cred)487 mmread(dev_t dev, struct uio *uio, cred_t *cred)
488 {
489 return (mmrw(dev, uio, UIO_READ, cred));
490 }
491
492 static int
mmwrite(dev_t dev,struct uio * uio,cred_t * cred)493 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
494 {
495 return (mmrw(dev, uio, UIO_WRITE, cred));
496 }
497
498 /*
499 * Private ioctl for libkvm to support kvm_physaddr().
500 * Given an address space and a VA, compute the PA.
501 */
502 static int
mmioctl_vtop(intptr_t data)503 mmioctl_vtop(intptr_t data)
504 {
505 #ifdef _SYSCALL32
506 mem_vtop32_t vtop32;
507 #endif
508 mem_vtop_t mem_vtop;
509 proc_t *p;
510 pfn_t pfn = (pfn_t)PFN_INVALID;
511 pid_t pid = 0;
512 struct as *as;
513 struct seg *seg;
514
515 if (get_udatamodel() == DATAMODEL_NATIVE) {
516 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
517 return (EFAULT);
518 }
519 #ifdef _SYSCALL32
520 else {
521 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
522 return (EFAULT);
523 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
524 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
525
526 if (mem_vtop.m_as != NULL)
527 return (EINVAL);
528 }
529 #endif
530
531 if (mem_vtop.m_as == &kas) {
532 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
533 } else {
534 if (mem_vtop.m_as == NULL) {
535 /*
536 * Assume the calling process's address space if the
537 * caller didn't specify one.
538 */
539 p = curthread->t_procp;
540 if (p == NULL)
541 return (EIO);
542 mem_vtop.m_as = p->p_as;
543 }
544
545 mutex_enter(&pidlock);
546 for (p = practive; p != NULL; p = p->p_next) {
547 if (p->p_as == mem_vtop.m_as) {
548 pid = p->p_pid;
549 break;
550 }
551 }
552 mutex_exit(&pidlock);
553 if (p == NULL)
554 return (EIO);
555 p = sprlock(pid);
556 if (p == NULL)
557 return (EIO);
558 as = p->p_as;
559 if (as == mem_vtop.m_as) {
560 mutex_exit(&p->p_lock);
561 AS_LOCK_ENTER(as, RW_READER);
562 for (seg = AS_SEGFIRST(as); seg != NULL;
563 seg = AS_SEGNEXT(as, seg))
564 if ((uintptr_t)mem_vtop.m_va -
565 (uintptr_t)seg->s_base < seg->s_size)
566 break;
567 if (seg != NULL)
568 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
569 AS_LOCK_EXIT(as);
570 mutex_enter(&p->p_lock);
571 }
572 sprunlock(p);
573 }
574 mem_vtop.m_pfn = pfn;
575 if (pfn == PFN_INVALID)
576 return (EIO);
577
578 if (get_udatamodel() == DATAMODEL_NATIVE) {
579 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
580 return (EFAULT);
581 }
582 #ifdef _SYSCALL32
583 else {
584 vtop32.m_pfn = mem_vtop.m_pfn;
585 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
586 return (EFAULT);
587 }
588 #endif
589
590 return (0);
591 }
592
593 /*
594 * Given a PA, execute the given page retire command on it.
595 */
596 static int
mmioctl_page_retire(int cmd,intptr_t data)597 mmioctl_page_retire(int cmd, intptr_t data)
598 {
599 extern int page_retire_test(void);
600 uint64_t pa;
601
602 if (copyin((void *)data, &pa, sizeof (uint64_t))) {
603 return (EFAULT);
604 }
605
606 switch (cmd) {
607 case MEM_PAGE_ISRETIRED:
608 return (page_retire_check(pa, NULL));
609
610 case MEM_PAGE_UNRETIRE:
611 return (page_unretire(pa));
612
613 case MEM_PAGE_RETIRE:
614 return (page_retire(pa, PR_FMA));
615
616 case MEM_PAGE_RETIRE_MCE:
617 return (page_retire(pa, PR_MCE));
618
619 case MEM_PAGE_RETIRE_UE:
620 return (page_retire(pa, PR_UE));
621
622 case MEM_PAGE_GETERRORS:
623 {
624 uint64_t page_errors;
625 int rc = page_retire_check(pa, &page_errors);
626 if (copyout(&page_errors, (void *)data,
627 sizeof (uint64_t))) {
628 return (EFAULT);
629 }
630 return (rc);
631 }
632
633 case MEM_PAGE_RETIRE_TEST:
634 return (page_retire_test());
635
636 }
637
638 return (EINVAL);
639 }
640
641 #ifdef __sparc
642 /*
643 * Read a mem_name_t from user-space and store it in the mem_name_t
644 * pointed to by the mem_name argument.
645 */
646 static int
mm_read_mem_name(intptr_t data,mem_name_t * mem_name)647 mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
648 {
649 if (get_udatamodel() == DATAMODEL_NATIVE) {
650 if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
651 return (EFAULT);
652 }
653 #ifdef _SYSCALL32
654 else {
655 mem_name32_t mem_name32;
656
657 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
658 return (EFAULT);
659 mem_name->m_addr = mem_name32.m_addr;
660 mem_name->m_synd = mem_name32.m_synd;
661 mem_name->m_type[0] = mem_name32.m_type[0];
662 mem_name->m_type[1] = mem_name32.m_type[1];
663 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
664 mem_name->m_namelen = (size_t)mem_name32.m_namelen;
665 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
666 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
667 }
668 #endif /* _SYSCALL32 */
669
670 return (0);
671 }
672
673 /*
674 * Given a syndrome, syndrome type, and address return the
675 * associated memory name in the provided data buffer.
676 */
677 static int
mmioctl_get_mem_name(intptr_t data)678 mmioctl_get_mem_name(intptr_t data)
679 {
680 mem_name_t mem_name;
681 void *buf;
682 size_t bufsize;
683 int len, err;
684
685 if ((bufsize = cpu_get_name_bufsize()) == 0)
686 return (ENOTSUP);
687
688 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
689 return (err);
690
691 buf = kmem_alloc(bufsize, KM_SLEEP);
692
693 /*
694 * Call into cpu specific code to do the lookup.
695 */
696 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
697 mem_name.m_addr, buf, bufsize, &len)) != 0) {
698 kmem_free(buf, bufsize);
699 return (err);
700 }
701
702 if (len >= mem_name.m_namelen) {
703 kmem_free(buf, bufsize);
704 return (ENOSPC);
705 }
706
707 if (copyoutstr(buf, (char *)mem_name.m_name,
708 mem_name.m_namelen, NULL) != 0) {
709 kmem_free(buf, bufsize);
710 return (EFAULT);
711 }
712
713 kmem_free(buf, bufsize);
714 return (0);
715 }
716
717 /*
718 * Given a syndrome and address return information about the associated memory.
719 */
720 static int
mmioctl_get_mem_info(intptr_t data)721 mmioctl_get_mem_info(intptr_t data)
722 {
723 mem_info_t mem_info;
724 int err;
725
726 if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
727 return (EFAULT);
728
729 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
730 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
731 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
732 return (err);
733
734 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
735 return (EFAULT);
736
737 return (0);
738 }
739
740 /*
741 * Given a memory name, return its associated serial id
742 */
743 static int
mmioctl_get_mem_sid(intptr_t data)744 mmioctl_get_mem_sid(intptr_t data)
745 {
746 mem_name_t mem_name;
747 void *buf;
748 void *name;
749 size_t name_len;
750 size_t bufsize;
751 int len, err;
752
753 if ((bufsize = cpu_get_name_bufsize()) == 0)
754 return (ENOTSUP);
755
756 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
757 return (err);
758
759 buf = kmem_alloc(bufsize, KM_SLEEP);
760
761 if (mem_name.m_namelen > 1024)
762 mem_name.m_namelen = 1024; /* cap at 1024 bytes */
763
764 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP);
765
766 if ((err = copyinstr((char *)mem_name.m_name, (char *)name,
767 mem_name.m_namelen, &name_len)) != 0) {
768 kmem_free(buf, bufsize);
769 kmem_free(name, mem_name.m_namelen);
770 return (err);
771 }
772
773 /*
774 * Call into cpu specific code to do the lookup.
775 */
776 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) {
777 kmem_free(buf, bufsize);
778 kmem_free(name, mem_name.m_namelen);
779 return (err);
780 }
781
782 if (len > mem_name.m_sidlen) {
783 kmem_free(buf, bufsize);
784 kmem_free(name, mem_name.m_namelen);
785 return (ENAMETOOLONG);
786 }
787
788 if (copyoutstr(buf, (char *)mem_name.m_sid,
789 mem_name.m_sidlen, NULL) != 0) {
790 kmem_free(buf, bufsize);
791 kmem_free(name, mem_name.m_namelen);
792 return (EFAULT);
793 }
794
795 kmem_free(buf, bufsize);
796 kmem_free(name, mem_name.m_namelen);
797 return (0);
798 }
799 #endif /* __sparc */
800
801 /*
802 * Private ioctls for
803 * libkvm to support kvm_physaddr().
804 * FMA support for page_retire() and memory attribute information.
805 */
806 /*ARGSUSED*/
807 static int
mmioctl(dev_t dev,int cmd,intptr_t data,int flag,cred_t * cred,int * rvalp)808 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
809 {
810 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
811 (cmd != MEM_VTOP && getminor(dev) != M_MEM))
812 return (ENXIO);
813
814 switch (cmd) {
815 case MEM_VTOP:
816 return (mmioctl_vtop(data));
817
818 case MEM_PAGE_RETIRE:
819 case MEM_PAGE_ISRETIRED:
820 case MEM_PAGE_UNRETIRE:
821 case MEM_PAGE_RETIRE_MCE:
822 case MEM_PAGE_RETIRE_UE:
823 case MEM_PAGE_GETERRORS:
824 case MEM_PAGE_RETIRE_TEST:
825 return (mmioctl_page_retire(cmd, data));
826
827 #ifdef __sparc
828 case MEM_NAME:
829 return (mmioctl_get_mem_name(data));
830
831 case MEM_INFO:
832 return (mmioctl_get_mem_info(data));
833
834 case MEM_SID:
835 return (mmioctl_get_mem_sid(data));
836 #else
837 case MEM_NAME:
838 case MEM_INFO:
839 case MEM_SID:
840 return (ENOTSUP);
841 #endif /* __sparc */
842 }
843 return (ENXIO);
844 }
845
846 /*ARGSUSED2*/
847 static int
mmmmap(dev_t dev,off_t off,int prot)848 mmmmap(dev_t dev, off_t off, int prot)
849 {
850 pfn_t pf;
851 struct memlist *pmem;
852 minor_t minor = getminor(dev);
853
854 switch (minor) {
855 case M_MEM:
856 pf = btop(off);
857 memlist_read_lock();
858 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
859 if (pf >= BTOP(pmem->ml_address) &&
860 pf < BTOP(pmem->ml_address + pmem->ml_size)) {
861 memlist_read_unlock();
862 return (impl_obmem_pfnum(pf));
863 }
864 }
865 memlist_read_unlock();
866 break;
867
868 case M_KMEM:
869 case M_ALLKMEM:
870 /* no longer supported with KPR */
871 return (-1);
872
873 case M_FULL:
874 case M_ZERO:
875 /*
876 * We shouldn't be mmap'ing to /dev/zero here as
877 * mmsegmap() should have already converted
878 * a mapping request for this device to a mapping
879 * using seg_vn for anonymous memory.
880 */
881 break;
882
883 }
884 return (-1);
885 }
886
887 /*
888 * This function is called when a memory device is mmap'ed.
889 * Set up the mapping to the correct device driver.
890 */
891 static int
mmsegmap(dev_t dev,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred)892 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
893 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
894 {
895 struct segvn_crargs vn_a;
896 struct segdev_crargs dev_a;
897 int error;
898 minor_t minor;
899 off_t i;
900
901 minor = getminor(dev);
902
903 as_rangelock(as);
904 /*
905 * No need to worry about vac alignment on /dev/zero
906 * since this is a "clone" object that doesn't yet exist.
907 */
908 error = choose_addr(as, addrp, len, off,
909 (minor == M_MEM) || (minor == M_KMEM), flags);
910 if (error != 0) {
911 as_rangeunlock(as);
912 return (error);
913 }
914
915 switch (minor) {
916 case M_MEM:
917 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
918 if ((flags & MAP_TYPE) != MAP_SHARED) {
919 as_rangeunlock(as);
920 return (EINVAL);
921 }
922
923 /*
924 * Check to ensure that the entire range is
925 * legal and we are not trying to map in
926 * more than the device will let us.
927 */
928 for (i = 0; i < len; i += PAGESIZE) {
929 if (mmmmap(dev, off + i, maxprot) == -1) {
930 as_rangeunlock(as);
931 return (ENXIO);
932 }
933 }
934
935 /*
936 * Use seg_dev segment driver for /dev/mem mapping.
937 */
938 dev_a.mapfunc = mmmmap;
939 dev_a.dev = dev;
940 dev_a.offset = off;
941 dev_a.type = (flags & MAP_TYPE);
942 dev_a.prot = (uchar_t)prot;
943 dev_a.maxprot = (uchar_t)maxprot;
944 dev_a.hat_attr = 0;
945
946 /*
947 * Make /dev/mem mappings non-consistent since we can't
948 * alias pages that don't have page structs behind them,
949 * such as kernel stack pages. If someone mmap()s a kernel
950 * stack page and if we give them a tte with cv, a line from
951 * that page can get into both pages of the spitfire d$.
952 * But snoop from another processor will only invalidate
953 * the first page. This later caused kernel (xc_attention)
954 * to go into an infinite loop at pil 13 and no interrupts
955 * could come in. See 1203630.
956 *
957 */
958 dev_a.hat_flags = HAT_LOAD_NOCONSIST;
959 dev_a.devmap_data = NULL;
960
961 error = as_map(as, *addrp, len, segdev_create, &dev_a);
962 break;
963
964 case M_ZERO:
965 /*
966 * Use seg_vn segment driver for /dev/zero mapping.
967 * Passing in a NULL amp gives us the "cloning" effect.
968 */
969 vn_a.vp = NULL;
970 vn_a.offset = 0;
971 vn_a.type = (flags & MAP_TYPE);
972 vn_a.prot = prot;
973 vn_a.maxprot = maxprot;
974 vn_a.flags = flags & ~MAP_TYPE;
975 vn_a.cred = cred;
976 vn_a.amp = NULL;
977 vn_a.szc = 0;
978 vn_a.lgrp_mem_policy_flags = 0;
979 error = as_map(as, *addrp, len, segvn_create, &vn_a);
980 break;
981
982 case M_KMEM:
983 case M_ALLKMEM:
984 /* No longer supported with KPR. */
985 error = ENXIO;
986 break;
987
988 case M_NULL:
989 /*
990 * Use seg_dev segment driver for /dev/null mapping.
991 */
992 dev_a.mapfunc = mmmmap;
993 dev_a.dev = dev;
994 dev_a.offset = off;
995 dev_a.type = 0; /* neither PRIVATE nor SHARED */
996 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
997 dev_a.hat_attr = 0;
998 dev_a.hat_flags = 0;
999 error = as_map(as, *addrp, len, segdev_create, &dev_a);
1000 break;
1001
1002 default:
1003 error = ENXIO;
1004 }
1005
1006 as_rangeunlock(as);
1007 return (error);
1008 }
1009
1010 static struct cb_ops mm_cb_ops = {
1011 mmopen, /* open */
1012 nulldev, /* close */
1013 nodev, /* strategy */
1014 nodev, /* print */
1015 nodev, /* dump */
1016 mmread, /* read */
1017 mmwrite, /* write */
1018 mmioctl, /* ioctl */
1019 nodev, /* devmap */
1020 mmmmap, /* mmap */
1021 mmsegmap, /* segmap */
1022 mmchpoll, /* poll */
1023 mmpropop, /* prop_op */
1024 0, /* streamtab */
1025 D_NEW | D_MP | D_64BIT | D_U64BIT
1026 };
1027
1028 static struct dev_ops mm_ops = {
1029 DEVO_REV, /* devo_rev, */
1030 0, /* refcnt */
1031 mm_info, /* get_dev_info */
1032 nulldev, /* identify */
1033 nulldev, /* probe */
1034 mm_attach, /* attach */
1035 nodev, /* detach */
1036 nodev, /* reset */
1037 &mm_cb_ops, /* driver operations */
1038 (struct bus_ops *)0, /* bus operations */
1039 NULL, /* power */
1040 ddi_quiesce_not_needed, /* quiesce */
1041 };
1042
1043 static struct modldrv modldrv = {
1044 &mod_driverops, "memory driver", &mm_ops,
1045 };
1046
1047 static struct modlinkage modlinkage = {
1048 MODREV_1, &modldrv, NULL
1049 };
1050
1051 int
_init(void)1052 _init(void)
1053 {
1054 return (mod_install(&modlinkage));
1055 }
1056
1057 int
_info(struct modinfo * modinfop)1058 _info(struct modinfo *modinfop)
1059 {
1060 return (mod_info(&modlinkage, modinfop));
1061 }
1062
1063 int
_fini(void)1064 _fini(void)
1065 {
1066 return (mod_remove(&modlinkage));
1067 }
1068
1069 static int
mm_kstat_update(kstat_t * ksp,int rw)1070 mm_kstat_update(kstat_t *ksp, int rw)
1071 {
1072 struct memlist *pmem;
1073 uint_t count;
1074
1075 if (rw == KSTAT_WRITE)
1076 return (EACCES);
1077
1078 count = 0;
1079 memlist_read_lock();
1080 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
1081 count++;
1082 }
1083 memlist_read_unlock();
1084
1085 ksp->ks_ndata = count;
1086 ksp->ks_data_size = count * 2 * sizeof (uint64_t);
1087
1088 return (0);
1089 }
1090
1091 static int
mm_kstat_snapshot(kstat_t * ksp,void * buf,int rw)1092 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
1093 {
1094 struct memlist *pmem;
1095 struct memunit {
1096 uint64_t address;
1097 uint64_t size;
1098 } *kspmem;
1099
1100 if (rw == KSTAT_WRITE)
1101 return (EACCES);
1102
1103 ksp->ks_snaptime = gethrtime();
1104
1105 kspmem = (struct memunit *)buf;
1106 memlist_read_lock();
1107 for (pmem = phys_install; pmem != NULL;
1108 pmem = pmem->ml_next, kspmem++) {
1109 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
1110 break;
1111 kspmem->address = pmem->ml_address;
1112 kspmem->size = pmem->ml_size;
1113 }
1114 memlist_read_unlock();
1115
1116 return (0);
1117 }
1118