1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2016 by Delphix. All rights reserved. 25 */ 26 27 /* 28 * Copyright (c) 2015, Joyent, Inc. All rights reserved. 29 */ 30 31 /* 32 * Memory special file 33 */ 34 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/user.h> 38 #include <sys/buf.h> 39 #include <sys/systm.h> 40 #include <sys/cred.h> 41 #include <sys/vm.h> 42 #include <sys/uio.h> 43 #include <sys/mman.h> 44 #include <sys/kmem.h> 45 #include <vm/seg.h> 46 #include <vm/page.h> 47 #include <sys/stat.h> 48 #include <sys/vmem.h> 49 #include <sys/memlist.h> 50 #include <sys/bootconf.h> 51 52 #include <vm/seg_vn.h> 53 #include <vm/seg_dev.h> 54 #include <vm/seg_kmem.h> 55 #include <vm/seg_kp.h> 56 #include <vm/seg_kpm.h> 57 #include <vm/hat.h> 58 59 #include <sys/conf.h> 60 #include <sys/mem.h> 61 #include <sys/types.h> 62 #include <sys/conf.h> 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/errno.h> 66 #include <sys/modctl.h> 67 #include <sys/memlist.h> 68 #include <sys/ddi.h> 69 #include <sys/sunddi.h> 70 #include <sys/debug.h> 71 #include <sys/fm/protocol.h> 72 73 #if defined(__sparc) 74 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *); 75 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *, 76 uint64_t *, int *, int *, int *); 77 extern size_t cpu_get_name_bufsize(void); 78 extern int cpu_get_mem_sid(char *, char *, int, int *); 79 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *); 80 #elif defined(__x86) 81 #include <sys/cpu_module.h> 82 #endif /* __sparc */ 83 84 /* 85 * Turn a byte length into a pagecount. The DDI btop takes a 86 * 32-bit size on 32-bit machines, this handles 64-bit sizes for 87 * large physical-memory 32-bit machines. 88 */ 89 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift)) 90 91 static kmutex_t mm_lock; 92 static caddr_t mm_map; 93 94 static dev_info_t *mm_dip; /* private copy of devinfo pointer */ 95 96 static int mm_kmem_io_access; 97 98 static int mm_kstat_update(kstat_t *ksp, int rw); 99 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 100 101 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name); 102 103 #define MM_KMEMLOG_NENTRIES 64 104 105 static int mm_kmemlogent; 106 static mm_logentry_t mm_kmemlog[MM_KMEMLOG_NENTRIES]; 107 108 /* 109 * On kmem/allmem writes, we log information that might be useful in the event 110 * that a write is errant (that is, due to operator error) and induces a later 111 * problem. Note that (in particular) in the event of such operator-induced 112 * corruption, a search over the kernel address space for the corrupted 113 * address will yield the ring buffer entry that recorded the write. And 114 * should it seem baroque or otherwise unnecessary, yes, we need this kind of 115 * auditing facility and yes, we learned that the hard way: disturbingly, 116 * there exist recommendations for "tuning" the system that involve writing to 117 * kernel memory addresses via the kernel debugger, and -- as we discovered -- 118 * these can easily be applied incorrectly or unsafely, yielding an entirely 119 * undebuggable "can't happen" kind of panic. 120 */ 121 static void 122 mm_logkmem(struct uio *uio) 123 { 124 mm_logentry_t *ent; 125 proc_t *p = curthread->t_procp; 126 127 mutex_enter(&mm_lock); 128 129 ent = &mm_kmemlog[mm_kmemlogent++]; 130 131 if (mm_kmemlogent == MM_KMEMLOG_NENTRIES) 132 mm_kmemlogent = 0; 133 134 ent->mle_vaddr = (uintptr_t)uio->uio_loffset; 135 ent->mle_len = uio->uio_resid; 136 gethrestime(&ent->mle_hrestime); 137 ent->mle_hrtime = gethrtime(); 138 ent->mle_pid = p->p_pidp->pid_id; 139 140 (void) strncpy(ent->mle_psargs, 141 p->p_user.u_psargs, sizeof (ent->mle_psargs)); 142 143 mutex_exit(&mm_lock); 144 } 145 146 /*ARGSUSED1*/ 147 static int 148 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 149 { 150 int i; 151 struct mem_minor { 152 char *name; 153 minor_t minor; 154 int privonly; 155 const char *rdpriv; 156 const char *wrpriv; 157 mode_t priv_mode; 158 } mm[] = { 159 { "mem", M_MEM, 0, NULL, "all", 0640 }, 160 { "kmem", M_KMEM, 0, NULL, "all", 0640 }, 161 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 }, 162 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 }, 163 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 }, 164 }; 165 kstat_t *ksp; 166 167 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL); 168 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 169 170 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) { 171 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR, 172 mm[i].minor, DDI_PSEUDO, mm[i].privonly, 173 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) == 174 DDI_FAILURE) { 175 ddi_remove_minor_node(devi, NULL); 176 return (DDI_FAILURE); 177 } 178 } 179 180 mm_dip = devi; 181 182 ksp = kstat_create("mm", 0, "phys_installed", "misc", 183 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 184 if (ksp != NULL) { 185 ksp->ks_update = mm_kstat_update; 186 ksp->ks_snapshot = mm_kstat_snapshot; 187 ksp->ks_lock = &mm_lock; /* XXX - not really needed */ 188 kstat_install(ksp); 189 } 190 191 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 192 "kmem_io_access", 0); 193 194 return (DDI_SUCCESS); 195 } 196 197 /*ARGSUSED*/ 198 static int 199 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 200 { 201 register int error; 202 203 switch (infocmd) { 204 case DDI_INFO_DEVT2DEVINFO: 205 *result = (void *)mm_dip; 206 error = DDI_SUCCESS; 207 break; 208 case DDI_INFO_DEVT2INSTANCE: 209 *result = (void *)0; 210 error = DDI_SUCCESS; 211 break; 212 default: 213 error = DDI_FAILURE; 214 } 215 return (error); 216 } 217 218 /*ARGSUSED1*/ 219 static int 220 mmopen(dev_t *devp, int flag, int typ, struct cred *cred) 221 { 222 switch (getminor(*devp)) { 223 case M_NULL: 224 case M_ZERO: 225 case M_MEM: 226 case M_KMEM: 227 case M_ALLKMEM: 228 /* standard devices */ 229 break; 230 231 default: 232 /* Unsupported or unknown type */ 233 return (EINVAL); 234 } 235 /* must be character device */ 236 if (typ != OTYP_CHR) 237 return (EINVAL); 238 return (0); 239 } 240 241 struct pollhead mm_pollhd; 242 243 /*ARGSUSED*/ 244 static int 245 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp, 246 struct pollhead **phpp) 247 { 248 switch (getminor(dev)) { 249 case M_NULL: 250 case M_ZERO: 251 case M_MEM: 252 case M_KMEM: 253 case M_ALLKMEM: 254 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM | 255 POLLWRNORM | POLLRDBAND | POLLWRBAND); 256 /* 257 * A non NULL pollhead pointer should be returned in case 258 * user polls for 0 events. 259 */ 260 *phpp = !anyyet && !*reventsp ? 261 &mm_pollhd : (struct pollhead *)NULL; 262 return (0); 263 default: 264 /* no other devices currently support polling */ 265 return (ENXIO); 266 } 267 } 268 269 static int 270 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 271 char *name, caddr_t valuep, int *lengthp) 272 { 273 /* 274 * implement zero size to reduce overhead (avoid two failing 275 * property lookups per stat). 276 */ 277 return (ddi_prop_op_size(dev, dip, prop_op, 278 flags, name, valuep, lengthp, 0)); 279 } 280 281 static int 282 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio, 283 page_t *pp) 284 { 285 int error = 0; 286 int devload = 0; 287 int is_memory = pf_is_memory(pfn); 288 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), 289 (size_t)uio->uio_iov->iov_len); 290 caddr_t va = NULL; 291 292 mutex_enter(&mm_lock); 293 294 if (is_memory && kpm_enable) { 295 if (pp) 296 va = hat_kpm_mapin(pp, NULL); 297 else 298 va = hat_kpm_mapin_pfn(pfn); 299 } 300 301 if (va == NULL) { 302 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, 303 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE), 304 HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK); 305 va = mm_map; 306 devload = 1; 307 } 308 309 if (!is_memory) { 310 if (allowio) { 311 size_t c = uio->uio_iov->iov_len; 312 313 if (ddi_peekpokeio(NULL, uio, rw, 314 (caddr_t)(uintptr_t)uio->uio_loffset, c, 315 sizeof (int32_t)) != DDI_SUCCESS) 316 error = EFAULT; 317 } else 318 error = EIO; 319 } else 320 error = uiomove(va + pageoff, nbytes, rw, uio); 321 322 if (devload) 323 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); 324 else if (pp) 325 hat_kpm_mapout(pp, NULL, va); 326 else 327 hat_kpm_mapout_pfn(pfn); 328 329 mutex_exit(&mm_lock); 330 return (error); 331 } 332 333 static int 334 mmpagelock(struct as *as, caddr_t va) 335 { 336 struct seg *seg; 337 int i; 338 339 AS_LOCK_ENTER(as, RW_READER); 340 seg = as_segat(as, va); 341 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0; 342 AS_LOCK_EXIT(as); 343 344 return (i); 345 } 346 347 #ifdef __sparc 348 349 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva) 350 351 #else /* __i386, __amd64 */ 352 353 #define NEED_LOCK_KVADDR(va) 0 354 355 #endif /* __sparc */ 356 357 /*ARGSUSED3*/ 358 static int 359 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) 360 { 361 pfn_t v; 362 struct iovec *iov; 363 int error = 0; 364 size_t c; 365 ssize_t oresid = uio->uio_resid; 366 minor_t minor = getminor(dev); 367 368 while (uio->uio_resid > 0 && error == 0) { 369 iov = uio->uio_iov; 370 if (iov->iov_len == 0) { 371 uio->uio_iov++; 372 uio->uio_iovcnt--; 373 if (uio->uio_iovcnt < 0) 374 panic("mmrw"); 375 continue; 376 } 377 switch (minor) { 378 379 case M_MEM: 380 memlist_read_lock(); 381 if (!address_in_memlist(phys_install, 382 (uint64_t)uio->uio_loffset, 1)) { 383 memlist_read_unlock(); 384 error = EFAULT; 385 break; 386 } 387 memlist_read_unlock(); 388 389 v = BTOP((u_offset_t)uio->uio_loffset); 390 error = mmio(uio, rw, v, 391 uio->uio_loffset & PAGEOFFSET, 0, NULL); 392 break; 393 394 case M_KMEM: 395 case M_ALLKMEM: 396 { 397 page_t **ppp = NULL; 398 caddr_t vaddr = (caddr_t)uio->uio_offset; 399 int try_lock = NEED_LOCK_KVADDR(vaddr); 400 int locked = 0; 401 402 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP) 403 break; 404 405 if (rw == UIO_WRITE) 406 mm_logkmem(uio); 407 408 /* 409 * If vaddr does not map a valid page, as_pagelock() 410 * will return failure. Hence we can't check the 411 * return value and return EFAULT here as we'd like. 412 * seg_kp and seg_kpm do not properly support 413 * as_pagelock() for this context so we avoid it 414 * using the try_lock set check above. Some day when 415 * the kernel page locking gets redesigned all this 416 * muck can be cleaned up. 417 */ 418 if (try_lock) 419 locked = (as_pagelock(&kas, &ppp, vaddr, 420 PAGESIZE, S_WRITE) == 0); 421 422 v = hat_getpfnum(kas.a_hat, 423 (caddr_t)(uintptr_t)uio->uio_loffset); 424 if (v == PFN_INVALID) { 425 if (locked) 426 as_pageunlock(&kas, ppp, vaddr, 427 PAGESIZE, S_WRITE); 428 error = EFAULT; 429 break; 430 } 431 432 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 433 minor == M_ALLKMEM || mm_kmem_io_access, 434 (locked && ppp) ? *ppp : NULL); 435 if (locked) 436 as_pageunlock(&kas, ppp, vaddr, PAGESIZE, 437 S_WRITE); 438 } 439 440 break; 441 442 case M_ZERO: 443 if (rw == UIO_READ) { 444 label_t ljb; 445 446 if (on_fault(&ljb)) { 447 no_fault(); 448 error = EFAULT; 449 break; 450 } 451 uzero(iov->iov_base, iov->iov_len); 452 no_fault(); 453 uio->uio_resid -= iov->iov_len; 454 uio->uio_loffset += iov->iov_len; 455 break; 456 } 457 /* else it's a write, fall through to NULL case */ 458 /*FALLTHROUGH*/ 459 460 case M_NULL: 461 if (rw == UIO_READ) 462 return (0); 463 c = iov->iov_len; 464 iov->iov_base += c; 465 iov->iov_len -= c; 466 uio->uio_loffset += c; 467 uio->uio_resid -= c; 468 break; 469 470 } 471 } 472 return (uio->uio_resid == oresid ? error : 0); 473 } 474 475 static int 476 mmread(dev_t dev, struct uio *uio, cred_t *cred) 477 { 478 return (mmrw(dev, uio, UIO_READ, cred)); 479 } 480 481 static int 482 mmwrite(dev_t dev, struct uio *uio, cred_t *cred) 483 { 484 return (mmrw(dev, uio, UIO_WRITE, cred)); 485 } 486 487 /* 488 * Private ioctl for libkvm to support kvm_physaddr(). 489 * Given an address space and a VA, compute the PA. 490 */ 491 static int 492 mmioctl_vtop(intptr_t data) 493 { 494 #ifdef _SYSCALL32 495 mem_vtop32_t vtop32; 496 #endif 497 mem_vtop_t mem_vtop; 498 proc_t *p; 499 pfn_t pfn = (pfn_t)PFN_INVALID; 500 pid_t pid = 0; 501 struct as *as; 502 struct seg *seg; 503 504 if (get_udatamodel() == DATAMODEL_NATIVE) { 505 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t))) 506 return (EFAULT); 507 } 508 #ifdef _SYSCALL32 509 else { 510 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t))) 511 return (EFAULT); 512 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as; 513 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va; 514 515 if (mem_vtop.m_as != NULL) 516 return (EINVAL); 517 } 518 #endif 519 520 if (mem_vtop.m_as == &kas) { 521 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va); 522 } else { 523 if (mem_vtop.m_as == NULL) { 524 /* 525 * Assume the calling process's address space if the 526 * caller didn't specify one. 527 */ 528 p = curthread->t_procp; 529 if (p == NULL) 530 return (EIO); 531 mem_vtop.m_as = p->p_as; 532 } 533 534 mutex_enter(&pidlock); 535 for (p = practive; p != NULL; p = p->p_next) { 536 if (p->p_as == mem_vtop.m_as) { 537 pid = p->p_pid; 538 break; 539 } 540 } 541 mutex_exit(&pidlock); 542 if (p == NULL) 543 return (EIO); 544 p = sprlock(pid); 545 if (p == NULL) 546 return (EIO); 547 as = p->p_as; 548 if (as == mem_vtop.m_as) { 549 mutex_exit(&p->p_lock); 550 AS_LOCK_ENTER(as, RW_READER); 551 for (seg = AS_SEGFIRST(as); seg != NULL; 552 seg = AS_SEGNEXT(as, seg)) 553 if ((uintptr_t)mem_vtop.m_va - 554 (uintptr_t)seg->s_base < seg->s_size) 555 break; 556 if (seg != NULL) 557 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va); 558 AS_LOCK_EXIT(as); 559 mutex_enter(&p->p_lock); 560 } 561 sprunlock(p); 562 } 563 mem_vtop.m_pfn = pfn; 564 if (pfn == PFN_INVALID) 565 return (EIO); 566 567 if (get_udatamodel() == DATAMODEL_NATIVE) { 568 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t))) 569 return (EFAULT); 570 } 571 #ifdef _SYSCALL32 572 else { 573 vtop32.m_pfn = mem_vtop.m_pfn; 574 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t))) 575 return (EFAULT); 576 } 577 #endif 578 579 return (0); 580 } 581 582 /* 583 * Given a PA, execute the given page retire command on it. 584 */ 585 static int 586 mmioctl_page_retire(int cmd, intptr_t data) 587 { 588 extern int page_retire_test(void); 589 uint64_t pa; 590 591 if (copyin((void *)data, &pa, sizeof (uint64_t))) { 592 return (EFAULT); 593 } 594 595 switch (cmd) { 596 case MEM_PAGE_ISRETIRED: 597 return (page_retire_check(pa, NULL)); 598 599 case MEM_PAGE_UNRETIRE: 600 return (page_unretire(pa)); 601 602 case MEM_PAGE_RETIRE: 603 return (page_retire(pa, PR_FMA)); 604 605 case MEM_PAGE_RETIRE_MCE: 606 return (page_retire(pa, PR_MCE)); 607 608 case MEM_PAGE_RETIRE_UE: 609 return (page_retire(pa, PR_UE)); 610 611 case MEM_PAGE_GETERRORS: 612 { 613 uint64_t page_errors; 614 int rc = page_retire_check(pa, &page_errors); 615 if (copyout(&page_errors, (void *)data, 616 sizeof (uint64_t))) { 617 return (EFAULT); 618 } 619 return (rc); 620 } 621 622 case MEM_PAGE_RETIRE_TEST: 623 return (page_retire_test()); 624 625 } 626 627 return (EINVAL); 628 } 629 630 #ifdef __sparc 631 /* 632 * Given a syndrome, syndrome type, and address return the 633 * associated memory name in the provided data buffer. 634 */ 635 static int 636 mmioctl_get_mem_name(intptr_t data) 637 { 638 mem_name_t mem_name; 639 void *buf; 640 size_t bufsize; 641 int len, err; 642 643 if ((bufsize = cpu_get_name_bufsize()) == 0) 644 return (ENOTSUP); 645 646 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 647 return (err); 648 649 buf = kmem_alloc(bufsize, KM_SLEEP); 650 651 /* 652 * Call into cpu specific code to do the lookup. 653 */ 654 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type, 655 mem_name.m_addr, buf, bufsize, &len)) != 0) { 656 kmem_free(buf, bufsize); 657 return (err); 658 } 659 660 if (len >= mem_name.m_namelen) { 661 kmem_free(buf, bufsize); 662 return (ENOSPC); 663 } 664 665 if (copyoutstr(buf, (char *)mem_name.m_name, 666 mem_name.m_namelen, NULL) != 0) { 667 kmem_free(buf, bufsize); 668 return (EFAULT); 669 } 670 671 kmem_free(buf, bufsize); 672 return (0); 673 } 674 675 /* 676 * Given a syndrome and address return information about the associated memory. 677 */ 678 static int 679 mmioctl_get_mem_info(intptr_t data) 680 { 681 mem_info_t mem_info; 682 int err; 683 684 if (copyin((void *)data, &mem_info, sizeof (mem_info_t))) 685 return (EFAULT); 686 687 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr, 688 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size, 689 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0) 690 return (err); 691 692 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0) 693 return (EFAULT); 694 695 return (0); 696 } 697 698 /* 699 * Given a memory name, return its associated serial id 700 */ 701 static int 702 mmioctl_get_mem_sid(intptr_t data) 703 { 704 mem_name_t mem_name; 705 void *buf; 706 void *name; 707 size_t name_len; 708 size_t bufsize; 709 int len, err; 710 711 if ((bufsize = cpu_get_name_bufsize()) == 0) 712 return (ENOTSUP); 713 714 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 715 return (err); 716 717 buf = kmem_alloc(bufsize, KM_SLEEP); 718 719 if (mem_name.m_namelen > 1024) 720 mem_name.m_namelen = 1024; /* cap at 1024 bytes */ 721 722 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP); 723 724 if ((err = copyinstr((char *)mem_name.m_name, (char *)name, 725 mem_name.m_namelen, &name_len)) != 0) { 726 kmem_free(buf, bufsize); 727 kmem_free(name, mem_name.m_namelen); 728 return (err); 729 } 730 731 /* 732 * Call into cpu specific code to do the lookup. 733 */ 734 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) { 735 kmem_free(buf, bufsize); 736 kmem_free(name, mem_name.m_namelen); 737 return (err); 738 } 739 740 if (len > mem_name.m_sidlen) { 741 kmem_free(buf, bufsize); 742 kmem_free(name, mem_name.m_namelen); 743 return (ENAMETOOLONG); 744 } 745 746 if (copyoutstr(buf, (char *)mem_name.m_sid, 747 mem_name.m_sidlen, NULL) != 0) { 748 kmem_free(buf, bufsize); 749 kmem_free(name, mem_name.m_namelen); 750 return (EFAULT); 751 } 752 753 kmem_free(buf, bufsize); 754 kmem_free(name, mem_name.m_namelen); 755 return (0); 756 } 757 #endif /* __sparc */ 758 759 /* 760 * Private ioctls for 761 * libkvm to support kvm_physaddr(). 762 * FMA support for page_retire() and memory attribute information. 763 */ 764 /*ARGSUSED*/ 765 static int 766 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp) 767 { 768 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) || 769 (cmd != MEM_VTOP && getminor(dev) != M_MEM)) 770 return (ENXIO); 771 772 switch (cmd) { 773 case MEM_VTOP: 774 return (mmioctl_vtop(data)); 775 776 case MEM_PAGE_RETIRE: 777 case MEM_PAGE_ISRETIRED: 778 case MEM_PAGE_UNRETIRE: 779 case MEM_PAGE_RETIRE_MCE: 780 case MEM_PAGE_RETIRE_UE: 781 case MEM_PAGE_GETERRORS: 782 case MEM_PAGE_RETIRE_TEST: 783 return (mmioctl_page_retire(cmd, data)); 784 785 #ifdef __sparc 786 case MEM_NAME: 787 return (mmioctl_get_mem_name(data)); 788 789 case MEM_INFO: 790 return (mmioctl_get_mem_info(data)); 791 792 case MEM_SID: 793 return (mmioctl_get_mem_sid(data)); 794 #else 795 case MEM_NAME: 796 case MEM_INFO: 797 case MEM_SID: 798 return (ENOTSUP); 799 #endif /* __sparc */ 800 } 801 return (ENXIO); 802 } 803 804 /*ARGSUSED2*/ 805 static int 806 mmmmap(dev_t dev, off_t off, int prot) 807 { 808 pfn_t pf; 809 struct memlist *pmem; 810 minor_t minor = getminor(dev); 811 812 switch (minor) { 813 case M_MEM: 814 pf = btop(off); 815 memlist_read_lock(); 816 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) { 817 if (pf >= BTOP(pmem->ml_address) && 818 pf < BTOP(pmem->ml_address + pmem->ml_size)) { 819 memlist_read_unlock(); 820 return (impl_obmem_pfnum(pf)); 821 } 822 } 823 memlist_read_unlock(); 824 break; 825 826 case M_KMEM: 827 case M_ALLKMEM: 828 /* no longer supported with KPR */ 829 return (-1); 830 831 case M_ZERO: 832 /* 833 * We shouldn't be mmap'ing to /dev/zero here as 834 * mmsegmap() should have already converted 835 * a mapping request for this device to a mapping 836 * using seg_vn for anonymous memory. 837 */ 838 break; 839 840 } 841 return (-1); 842 } 843 844 /* 845 * This function is called when a memory device is mmap'ed. 846 * Set up the mapping to the correct device driver. 847 */ 848 static int 849 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 850 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 851 { 852 struct segvn_crargs vn_a; 853 struct segdev_crargs dev_a; 854 int error; 855 minor_t minor; 856 off_t i; 857 858 minor = getminor(dev); 859 860 as_rangelock(as); 861 /* 862 * No need to worry about vac alignment on /dev/zero 863 * since this is a "clone" object that doesn't yet exist. 864 */ 865 error = choose_addr(as, addrp, len, off, 866 (minor == M_MEM) || (minor == M_KMEM), flags); 867 if (error != 0) { 868 as_rangeunlock(as); 869 return (error); 870 } 871 872 switch (minor) { 873 case M_MEM: 874 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */ 875 if ((flags & MAP_TYPE) != MAP_SHARED) { 876 as_rangeunlock(as); 877 return (EINVAL); 878 } 879 880 /* 881 * Check to ensure that the entire range is 882 * legal and we are not trying to map in 883 * more than the device will let us. 884 */ 885 for (i = 0; i < len; i += PAGESIZE) { 886 if (mmmmap(dev, off + i, maxprot) == -1) { 887 as_rangeunlock(as); 888 return (ENXIO); 889 } 890 } 891 892 /* 893 * Use seg_dev segment driver for /dev/mem mapping. 894 */ 895 dev_a.mapfunc = mmmmap; 896 dev_a.dev = dev; 897 dev_a.offset = off; 898 dev_a.type = (flags & MAP_TYPE); 899 dev_a.prot = (uchar_t)prot; 900 dev_a.maxprot = (uchar_t)maxprot; 901 dev_a.hat_attr = 0; 902 903 /* 904 * Make /dev/mem mappings non-consistent since we can't 905 * alias pages that don't have page structs behind them, 906 * such as kernel stack pages. If someone mmap()s a kernel 907 * stack page and if we give them a tte with cv, a line from 908 * that page can get into both pages of the spitfire d$. 909 * But snoop from another processor will only invalidate 910 * the first page. This later caused kernel (xc_attention) 911 * to go into an infinite loop at pil 13 and no interrupts 912 * could come in. See 1203630. 913 * 914 */ 915 dev_a.hat_flags = HAT_LOAD_NOCONSIST; 916 dev_a.devmap_data = NULL; 917 918 error = as_map(as, *addrp, len, segdev_create, &dev_a); 919 break; 920 921 case M_ZERO: 922 /* 923 * Use seg_vn segment driver for /dev/zero mapping. 924 * Passing in a NULL amp gives us the "cloning" effect. 925 */ 926 vn_a.vp = NULL; 927 vn_a.offset = 0; 928 vn_a.type = (flags & MAP_TYPE); 929 vn_a.prot = prot; 930 vn_a.maxprot = maxprot; 931 vn_a.flags = flags & ~MAP_TYPE; 932 vn_a.cred = cred; 933 vn_a.amp = NULL; 934 vn_a.szc = 0; 935 vn_a.lgrp_mem_policy_flags = 0; 936 error = as_map(as, *addrp, len, segvn_create, &vn_a); 937 break; 938 939 case M_KMEM: 940 case M_ALLKMEM: 941 /* No longer supported with KPR. */ 942 error = ENXIO; 943 break; 944 945 case M_NULL: 946 /* 947 * Use seg_dev segment driver for /dev/null mapping. 948 */ 949 dev_a.mapfunc = mmmmap; 950 dev_a.dev = dev; 951 dev_a.offset = off; 952 dev_a.type = 0; /* neither PRIVATE nor SHARED */ 953 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 954 dev_a.hat_attr = 0; 955 dev_a.hat_flags = 0; 956 error = as_map(as, *addrp, len, segdev_create, &dev_a); 957 break; 958 959 default: 960 error = ENXIO; 961 } 962 963 as_rangeunlock(as); 964 return (error); 965 } 966 967 static struct cb_ops mm_cb_ops = { 968 mmopen, /* open */ 969 nulldev, /* close */ 970 nodev, /* strategy */ 971 nodev, /* print */ 972 nodev, /* dump */ 973 mmread, /* read */ 974 mmwrite, /* write */ 975 mmioctl, /* ioctl */ 976 nodev, /* devmap */ 977 mmmmap, /* mmap */ 978 mmsegmap, /* segmap */ 979 mmchpoll, /* poll */ 980 mmpropop, /* prop_op */ 981 0, /* streamtab */ 982 D_NEW | D_MP | D_64BIT | D_U64BIT 983 }; 984 985 static struct dev_ops mm_ops = { 986 DEVO_REV, /* devo_rev, */ 987 0, /* refcnt */ 988 mm_info, /* get_dev_info */ 989 nulldev, /* identify */ 990 nulldev, /* probe */ 991 mm_attach, /* attach */ 992 nodev, /* detach */ 993 nodev, /* reset */ 994 &mm_cb_ops, /* driver operations */ 995 (struct bus_ops *)0, /* bus operations */ 996 NULL, /* power */ 997 ddi_quiesce_not_needed, /* quiesce */ 998 }; 999 1000 static struct modldrv modldrv = { 1001 &mod_driverops, "memory driver", &mm_ops, 1002 }; 1003 1004 static struct modlinkage modlinkage = { 1005 MODREV_1, &modldrv, NULL 1006 }; 1007 1008 int 1009 _init(void) 1010 { 1011 return (mod_install(&modlinkage)); 1012 } 1013 1014 int 1015 _info(struct modinfo *modinfop) 1016 { 1017 return (mod_info(&modlinkage, modinfop)); 1018 } 1019 1020 int 1021 _fini(void) 1022 { 1023 return (mod_remove(&modlinkage)); 1024 } 1025 1026 static int 1027 mm_kstat_update(kstat_t *ksp, int rw) 1028 { 1029 struct memlist *pmem; 1030 uint_t count; 1031 1032 if (rw == KSTAT_WRITE) 1033 return (EACCES); 1034 1035 count = 0; 1036 memlist_read_lock(); 1037 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) { 1038 count++; 1039 } 1040 memlist_read_unlock(); 1041 1042 ksp->ks_ndata = count; 1043 ksp->ks_data_size = count * 2 * sizeof (uint64_t); 1044 1045 return (0); 1046 } 1047 1048 static int 1049 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1050 { 1051 struct memlist *pmem; 1052 struct memunit { 1053 uint64_t address; 1054 uint64_t size; 1055 } *kspmem; 1056 1057 if (rw == KSTAT_WRITE) 1058 return (EACCES); 1059 1060 ksp->ks_snaptime = gethrtime(); 1061 1062 kspmem = (struct memunit *)buf; 1063 memlist_read_lock(); 1064 for (pmem = phys_install; pmem != NULL; 1065 pmem = pmem->ml_next, kspmem++) { 1066 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 1067 break; 1068 kspmem->address = pmem->ml_address; 1069 kspmem->size = pmem->ml_size; 1070 } 1071 memlist_read_unlock(); 1072 1073 return (0); 1074 } 1075 1076 /* 1077 * Read a mem_name_t from user-space and store it in the mem_name_t 1078 * pointed to by the mem_name argument. 1079 */ 1080 static int 1081 mm_read_mem_name(intptr_t data, mem_name_t *mem_name) 1082 { 1083 if (get_udatamodel() == DATAMODEL_NATIVE) { 1084 if (copyin((void *)data, mem_name, sizeof (mem_name_t))) 1085 return (EFAULT); 1086 } 1087 #ifdef _SYSCALL32 1088 else { 1089 mem_name32_t mem_name32; 1090 1091 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t))) 1092 return (EFAULT); 1093 mem_name->m_addr = mem_name32.m_addr; 1094 mem_name->m_synd = mem_name32.m_synd; 1095 mem_name->m_type[0] = mem_name32.m_type[0]; 1096 mem_name->m_type[1] = mem_name32.m_type[1]; 1097 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name; 1098 mem_name->m_namelen = (size_t)mem_name32.m_namelen; 1099 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid; 1100 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen; 1101 } 1102 #endif /* _SYSCALL32 */ 1103 1104 return (0); 1105 } 1106