1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Memory special file 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/user.h> 35 #include <sys/buf.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/vm.h> 39 #include <sys/uio.h> 40 #include <sys/mman.h> 41 #include <sys/kmem.h> 42 #include <vm/seg.h> 43 #include <vm/page.h> 44 #include <sys/stat.h> 45 #include <sys/vmem.h> 46 #include <sys/memlist.h> 47 #include <sys/bootconf.h> 48 49 #include <vm/seg_vn.h> 50 #include <vm/seg_dev.h> 51 #include <vm/seg_kmem.h> 52 #include <vm/seg_kp.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/hat.h> 55 56 #include <sys/conf.h> 57 #include <sys/mem.h> 58 #include <sys/types.h> 59 #include <sys/conf.h> 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/errno.h> 63 #include <sys/modctl.h> 64 #include <sys/memlist.h> 65 #include <sys/ddi.h> 66 #include <sys/sunddi.h> 67 #include <sys/debug.h> 68 #include <sys/fm/protocol.h> 69 70 #if defined(__sparc) 71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *); 72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *, 73 uint64_t *, int *, int *, int *); 74 extern size_t cpu_get_name_bufsize(void); 75 extern int cpu_get_mem_sid(char *, char *, int, int *); 76 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *); 77 #elif defined(__x86) 78 #include <sys/cpu_module.h> 79 #endif /* __sparc */ 80 81 /* 82 * Turn a byte length into a pagecount. The DDI btop takes a 83 * 32-bit size on 32-bit machines, this handles 64-bit sizes for 84 * large physical-memory 32-bit machines. 85 */ 86 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift)) 87 88 static kmutex_t mm_lock; 89 static caddr_t mm_map; 90 91 static dev_info_t *mm_dip; /* private copy of devinfo pointer */ 92 93 static int mm_kmem_io_access; 94 95 static int mm_kstat_update(kstat_t *ksp, int rw); 96 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 97 98 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name); 99 static int mm_read_mem_page(intptr_t data, mem_page_t *mpage); 100 static int mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl); 101 static int mm_get_paddr(nvlist_t *nvl, uint64_t *paddr); 102 103 /*ARGSUSED1*/ 104 static int 105 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 106 { 107 int i; 108 struct mem_minor { 109 char *name; 110 minor_t minor; 111 int privonly; 112 const char *rdpriv; 113 const char *wrpriv; 114 mode_t priv_mode; 115 } mm[] = { 116 { "mem", M_MEM, 0, NULL, "all", 0640 }, 117 { "kmem", M_KMEM, 0, NULL, "all", 0640 }, 118 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 }, 119 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 }, 120 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 }, 121 }; 122 kstat_t *ksp; 123 124 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL); 125 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 126 127 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) { 128 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR, 129 mm[i].minor, DDI_PSEUDO, mm[i].privonly, 130 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) == 131 DDI_FAILURE) { 132 ddi_remove_minor_node(devi, NULL); 133 return (DDI_FAILURE); 134 } 135 } 136 137 mm_dip = devi; 138 139 ksp = kstat_create("mm", 0, "phys_installed", "misc", 140 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 141 if (ksp != NULL) { 142 ksp->ks_update = mm_kstat_update; 143 ksp->ks_snapshot = mm_kstat_snapshot; 144 ksp->ks_lock = &mm_lock; /* XXX - not really needed */ 145 kstat_install(ksp); 146 } 147 148 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 149 "kmem_io_access", 0); 150 151 return (DDI_SUCCESS); 152 } 153 154 /*ARGSUSED*/ 155 static int 156 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 157 { 158 register int error; 159 160 switch (infocmd) { 161 case DDI_INFO_DEVT2DEVINFO: 162 *result = (void *)mm_dip; 163 error = DDI_SUCCESS; 164 break; 165 case DDI_INFO_DEVT2INSTANCE: 166 *result = (void *)0; 167 error = DDI_SUCCESS; 168 break; 169 default: 170 error = DDI_FAILURE; 171 } 172 return (error); 173 } 174 175 /*ARGSUSED1*/ 176 static int 177 mmopen(dev_t *devp, int flag, int typ, struct cred *cred) 178 { 179 switch (getminor(*devp)) { 180 case M_NULL: 181 case M_ZERO: 182 case M_MEM: 183 case M_KMEM: 184 case M_ALLKMEM: 185 /* standard devices */ 186 break; 187 188 default: 189 /* Unsupported or unknown type */ 190 return (EINVAL); 191 } 192 /* must be character device */ 193 if (typ != OTYP_CHR) 194 return (EINVAL); 195 return (0); 196 } 197 198 struct pollhead mm_pollhd; 199 200 /*ARGSUSED*/ 201 static int 202 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp, 203 struct pollhead **phpp) 204 { 205 switch (getminor(dev)) { 206 case M_NULL: 207 case M_ZERO: 208 case M_MEM: 209 case M_KMEM: 210 case M_ALLKMEM: 211 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM | 212 POLLWRNORM | POLLRDBAND | POLLWRBAND); 213 /* 214 * A non NULL pollhead pointer should be returned in case 215 * user polls for 0 events. 216 */ 217 *phpp = !anyyet && !*reventsp ? 218 &mm_pollhd : (struct pollhead *)NULL; 219 return (0); 220 default: 221 /* no other devices currently support polling */ 222 return (ENXIO); 223 } 224 } 225 226 static int 227 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 228 char *name, caddr_t valuep, int *lengthp) 229 { 230 /* 231 * implement zero size to reduce overhead (avoid two failing 232 * property lookups per stat). 233 */ 234 return (ddi_prop_op_size(dev, dip, prop_op, 235 flags, name, valuep, lengthp, 0)); 236 } 237 238 static int 239 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio) 240 { 241 int error = 0; 242 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), 243 (size_t)uio->uio_iov->iov_len); 244 245 mutex_enter(&mm_lock); 246 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, 247 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE), 248 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 249 250 if (!pf_is_memory(pfn)) { 251 if (allowio) { 252 size_t c = uio->uio_iov->iov_len; 253 254 if (ddi_peekpokeio(NULL, uio, rw, 255 (caddr_t)(uintptr_t)uio->uio_loffset, c, 256 sizeof (int32_t)) != DDI_SUCCESS) 257 error = EFAULT; 258 } else 259 error = EIO; 260 } else 261 error = uiomove(&mm_map[pageoff], nbytes, rw, uio); 262 263 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); 264 mutex_exit(&mm_lock); 265 return (error); 266 } 267 268 static int 269 mmpagelock(struct as *as, caddr_t va) 270 { 271 struct seg *seg; 272 int i; 273 274 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 275 seg = as_segat(as, va); 276 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0; 277 AS_LOCK_EXIT(as, &as->a_lock); 278 279 return (i); 280 } 281 282 #ifdef __sparc 283 284 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva) 285 286 #else /* __i386, __amd64 */ 287 288 #define NEED_LOCK_KVADDR(va) 0 289 290 #endif /* __sparc */ 291 292 /*ARGSUSED3*/ 293 static int 294 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) 295 { 296 pfn_t v; 297 struct iovec *iov; 298 int error = 0; 299 size_t c; 300 ssize_t oresid = uio->uio_resid; 301 minor_t minor = getminor(dev); 302 303 while (uio->uio_resid > 0 && error == 0) { 304 iov = uio->uio_iov; 305 if (iov->iov_len == 0) { 306 uio->uio_iov++; 307 uio->uio_iovcnt--; 308 if (uio->uio_iovcnt < 0) 309 panic("mmrw"); 310 continue; 311 } 312 switch (minor) { 313 314 case M_MEM: 315 memlist_read_lock(); 316 if (!address_in_memlist(phys_install, 317 (uint64_t)uio->uio_loffset, 1)) { 318 memlist_read_unlock(); 319 error = EFAULT; 320 break; 321 } 322 memlist_read_unlock(); 323 324 v = BTOP((u_offset_t)uio->uio_loffset); 325 error = mmio(uio, rw, v, 326 uio->uio_loffset & PAGEOFFSET, 0); 327 break; 328 329 case M_KMEM: 330 case M_ALLKMEM: 331 { 332 page_t **ppp; 333 caddr_t vaddr = (caddr_t)uio->uio_offset; 334 int try_lock = NEED_LOCK_KVADDR(vaddr); 335 int locked = 0; 336 337 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP) 338 break; 339 340 /* 341 * If vaddr does not map a valid page, as_pagelock() 342 * will return failure. Hence we can't check the 343 * return value and return EFAULT here as we'd like. 344 * seg_kp and seg_kpm do not properly support 345 * as_pagelock() for this context so we avoid it 346 * using the try_lock set check above. Some day when 347 * the kernel page locking gets redesigned all this 348 * muck can be cleaned up. 349 */ 350 if (try_lock) 351 locked = (as_pagelock(&kas, &ppp, vaddr, 352 PAGESIZE, S_WRITE) == 0); 353 354 v = hat_getpfnum(kas.a_hat, 355 (caddr_t)(uintptr_t)uio->uio_loffset); 356 if (v == PFN_INVALID) { 357 if (locked) 358 as_pageunlock(&kas, ppp, vaddr, 359 PAGESIZE, S_WRITE); 360 error = EFAULT; 361 break; 362 } 363 364 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 365 minor == M_ALLKMEM || mm_kmem_io_access); 366 if (locked) 367 as_pageunlock(&kas, ppp, vaddr, PAGESIZE, 368 S_WRITE); 369 } 370 371 break; 372 373 case M_ZERO: 374 if (rw == UIO_READ) { 375 label_t ljb; 376 377 if (on_fault(&ljb)) { 378 no_fault(); 379 error = EFAULT; 380 break; 381 } 382 uzero(iov->iov_base, iov->iov_len); 383 no_fault(); 384 uio->uio_resid -= iov->iov_len; 385 uio->uio_loffset += iov->iov_len; 386 break; 387 } 388 /* else it's a write, fall through to NULL case */ 389 /*FALLTHROUGH*/ 390 391 case M_NULL: 392 if (rw == UIO_READ) 393 return (0); 394 c = iov->iov_len; 395 iov->iov_base += c; 396 iov->iov_len -= c; 397 uio->uio_loffset += c; 398 uio->uio_resid -= c; 399 break; 400 401 } 402 } 403 return (uio->uio_resid == oresid ? error : 0); 404 } 405 406 static int 407 mmread(dev_t dev, struct uio *uio, cred_t *cred) 408 { 409 return (mmrw(dev, uio, UIO_READ, cred)); 410 } 411 412 static int 413 mmwrite(dev_t dev, struct uio *uio, cred_t *cred) 414 { 415 return (mmrw(dev, uio, UIO_WRITE, cred)); 416 } 417 418 /* 419 * Private ioctl for libkvm to support kvm_physaddr(). 420 * Given an address space and a VA, compute the PA. 421 */ 422 static int 423 mmioctl_vtop(intptr_t data) 424 { 425 #ifdef _SYSCALL32 426 mem_vtop32_t vtop32; 427 #endif 428 mem_vtop_t mem_vtop; 429 proc_t *p; 430 pfn_t pfn = (pfn_t)PFN_INVALID; 431 pid_t pid = 0; 432 struct as *as; 433 struct seg *seg; 434 435 if (get_udatamodel() == DATAMODEL_NATIVE) { 436 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t))) 437 return (EFAULT); 438 } 439 #ifdef _SYSCALL32 440 else { 441 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t))) 442 return (EFAULT); 443 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as; 444 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va; 445 446 if (mem_vtop.m_as != NULL) 447 return (EINVAL); 448 } 449 #endif 450 451 if (mem_vtop.m_as == &kas) { 452 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va); 453 } else { 454 if (mem_vtop.m_as == NULL) { 455 /* 456 * Assume the calling process's address space if the 457 * caller didn't specify one. 458 */ 459 p = curthread->t_procp; 460 if (p == NULL) 461 return (EIO); 462 mem_vtop.m_as = p->p_as; 463 } 464 465 mutex_enter(&pidlock); 466 for (p = practive; p != NULL; p = p->p_next) { 467 if (p->p_as == mem_vtop.m_as) { 468 pid = p->p_pid; 469 break; 470 } 471 } 472 mutex_exit(&pidlock); 473 if (p == NULL) 474 return (EIO); 475 p = sprlock(pid); 476 if (p == NULL) 477 return (EIO); 478 as = p->p_as; 479 if (as == mem_vtop.m_as) { 480 mutex_exit(&p->p_lock); 481 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 482 for (seg = AS_SEGFIRST(as); seg != NULL; 483 seg = AS_SEGNEXT(as, seg)) 484 if ((uintptr_t)mem_vtop.m_va - 485 (uintptr_t)seg->s_base < seg->s_size) 486 break; 487 if (seg != NULL) 488 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va); 489 AS_LOCK_EXIT(as, &as->a_lock); 490 mutex_enter(&p->p_lock); 491 } 492 sprunlock(p); 493 } 494 mem_vtop.m_pfn = pfn; 495 if (pfn == PFN_INVALID) 496 return (EIO); 497 498 if (get_udatamodel() == DATAMODEL_NATIVE) { 499 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t))) 500 return (EFAULT); 501 } 502 #ifdef _SYSCALL32 503 else { 504 vtop32.m_pfn = mem_vtop.m_pfn; 505 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t))) 506 return (EFAULT); 507 } 508 #endif 509 510 return (0); 511 } 512 513 /* 514 * Given a PA, execute the given page retire command on it. 515 */ 516 static int 517 mmioctl_page_retire(int cmd, intptr_t data) 518 { 519 extern int page_retire_test(void); 520 uint64_t pa; 521 522 if (copyin((void *)data, &pa, sizeof (uint64_t))) { 523 return (EFAULT); 524 } 525 526 switch (cmd) { 527 case MEM_PAGE_ISRETIRED: 528 return (page_retire_check(pa, NULL)); 529 530 case MEM_PAGE_UNRETIRE: 531 return (page_unretire(pa)); 532 533 case MEM_PAGE_RETIRE: 534 return (page_retire(pa, PR_FMA)); 535 536 case MEM_PAGE_RETIRE_MCE: 537 return (page_retire(pa, PR_MCE)); 538 539 case MEM_PAGE_RETIRE_UE: 540 return (page_retire(pa, PR_UE)); 541 542 case MEM_PAGE_GETERRORS: 543 { 544 uint64_t page_errors; 545 int rc = page_retire_check(pa, &page_errors); 546 if (copyout(&page_errors, (void *)data, 547 sizeof (uint64_t))) { 548 return (EFAULT); 549 } 550 return (rc); 551 } 552 553 case MEM_PAGE_RETIRE_TEST: 554 return (page_retire_test()); 555 556 } 557 558 return (EINVAL); 559 } 560 561 /* 562 * Given a mem-scheme FMRI for a page, execute the given page retire 563 * command on it. 564 */ 565 static int 566 mmioctl_page_fmri_retire(int cmd, intptr_t data) 567 { 568 mem_page_t mpage; 569 uint64_t pa; 570 nvlist_t *nvl; 571 int err; 572 573 if ((err = mm_read_mem_page(data, &mpage)) < 0) 574 return (err); 575 576 if ((err = mm_get_mem_fmri(&mpage, &nvl)) != 0) 577 return (err); 578 579 if ((err = mm_get_paddr(nvl, &pa)) != 0) { 580 nvlist_free(nvl); 581 return (err); 582 } 583 584 nvlist_free(nvl); 585 586 switch (cmd) { 587 case MEM_PAGE_FMRI_ISRETIRED: 588 return (page_retire_check(pa, NULL)); 589 590 case MEM_PAGE_FMRI_RETIRE: 591 return (page_retire(pa, PR_FMA)); 592 593 case MEM_PAGE_FMRI_UNRETIRE: 594 return (page_unretire(pa)); 595 } 596 597 return (EINVAL); 598 } 599 600 #ifdef __sparc 601 /* 602 * Given a syndrome, syndrome type, and address return the 603 * associated memory name in the provided data buffer. 604 */ 605 static int 606 mmioctl_get_mem_name(intptr_t data) 607 { 608 mem_name_t mem_name; 609 void *buf; 610 size_t bufsize; 611 int len, err; 612 613 if ((bufsize = cpu_get_name_bufsize()) == 0) 614 return (ENOTSUP); 615 616 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 617 return (err); 618 619 buf = kmem_alloc(bufsize, KM_SLEEP); 620 621 /* 622 * Call into cpu specific code to do the lookup. 623 */ 624 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type, 625 mem_name.m_addr, buf, bufsize, &len)) != 0) { 626 kmem_free(buf, bufsize); 627 return (err); 628 } 629 630 if (len >= mem_name.m_namelen) { 631 kmem_free(buf, bufsize); 632 return (ENAMETOOLONG); 633 } 634 635 if (copyoutstr(buf, (char *)mem_name.m_name, 636 mem_name.m_namelen, NULL) != 0) { 637 kmem_free(buf, bufsize); 638 return (EFAULT); 639 } 640 641 kmem_free(buf, bufsize); 642 return (0); 643 } 644 645 /* 646 * Given a syndrome and address return information about the associated memory. 647 */ 648 static int 649 mmioctl_get_mem_info(intptr_t data) 650 { 651 mem_info_t mem_info; 652 int err; 653 654 if (copyin((void *)data, &mem_info, sizeof (mem_info_t))) 655 return (EFAULT); 656 657 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr, 658 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size, 659 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0) 660 return (err); 661 662 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0) 663 return (EFAULT); 664 665 return (0); 666 } 667 668 /* 669 * Given a memory name, return its associated serial id 670 */ 671 static int 672 mmioctl_get_mem_sid(intptr_t data) 673 { 674 mem_name_t mem_name; 675 void *buf; 676 void *name; 677 size_t name_len; 678 size_t bufsize; 679 int len, err; 680 681 if ((bufsize = cpu_get_name_bufsize()) == 0) 682 return (ENOTSUP); 683 684 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 685 return (err); 686 687 buf = kmem_alloc(bufsize, KM_SLEEP); 688 689 if (mem_name.m_namelen > 1024) 690 mem_name.m_namelen = 1024; /* cap at 1024 bytes */ 691 692 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP); 693 694 if ((err = copyinstr((char *)mem_name.m_name, (char *)name, 695 mem_name.m_namelen, &name_len)) != 0) { 696 kmem_free(buf, bufsize); 697 kmem_free(name, mem_name.m_namelen); 698 return (err); 699 } 700 701 /* 702 * Call into cpu specific code to do the lookup. 703 */ 704 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) { 705 kmem_free(buf, bufsize); 706 kmem_free(name, mem_name.m_namelen); 707 return (err); 708 } 709 710 if (len > mem_name.m_sidlen) { 711 kmem_free(buf, bufsize); 712 kmem_free(name, mem_name.m_namelen); 713 return (ENAMETOOLONG); 714 } 715 716 if (copyoutstr(buf, (char *)mem_name.m_sid, 717 mem_name.m_sidlen, NULL) != 0) { 718 kmem_free(buf, bufsize); 719 kmem_free(name, mem_name.m_namelen); 720 return (EFAULT); 721 } 722 723 kmem_free(buf, bufsize); 724 kmem_free(name, mem_name.m_namelen); 725 return (0); 726 } 727 #endif /* __sparc */ 728 729 /* 730 * Private ioctls for 731 * libkvm to support kvm_physaddr(). 732 * FMA support for page_retire() and memory attribute information. 733 */ 734 /*ARGSUSED*/ 735 static int 736 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp) 737 { 738 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) || 739 (cmd != MEM_VTOP && getminor(dev) != M_MEM)) 740 return (ENXIO); 741 742 switch (cmd) { 743 case MEM_VTOP: 744 return (mmioctl_vtop(data)); 745 746 case MEM_PAGE_RETIRE: 747 case MEM_PAGE_ISRETIRED: 748 case MEM_PAGE_UNRETIRE: 749 case MEM_PAGE_RETIRE_MCE: 750 case MEM_PAGE_RETIRE_UE: 751 case MEM_PAGE_GETERRORS: 752 case MEM_PAGE_RETIRE_TEST: 753 return (mmioctl_page_retire(cmd, data)); 754 755 case MEM_PAGE_FMRI_RETIRE: 756 case MEM_PAGE_FMRI_ISRETIRED: 757 case MEM_PAGE_FMRI_UNRETIRE: 758 return (mmioctl_page_fmri_retire(cmd, data)); 759 760 #ifdef __sparc 761 case MEM_NAME: 762 return (mmioctl_get_mem_name(data)); 763 764 case MEM_INFO: 765 return (mmioctl_get_mem_info(data)); 766 767 case MEM_SID: 768 return (mmioctl_get_mem_sid(data)); 769 #else 770 case MEM_NAME: 771 case MEM_INFO: 772 case MEM_SID: 773 return (ENOTSUP); 774 #endif /* __sparc */ 775 } 776 return (ENXIO); 777 } 778 779 /*ARGSUSED2*/ 780 static int 781 mmmmap(dev_t dev, off_t off, int prot) 782 { 783 pfn_t pf; 784 struct memlist *pmem; 785 minor_t minor = getminor(dev); 786 787 switch (minor) { 788 case M_MEM: 789 pf = btop(off); 790 memlist_read_lock(); 791 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 792 if (pf >= BTOP(pmem->address) && 793 pf < BTOP(pmem->address + pmem->size)) { 794 memlist_read_unlock(); 795 return (impl_obmem_pfnum(pf)); 796 } 797 } 798 memlist_read_unlock(); 799 break; 800 801 case M_KMEM: 802 case M_ALLKMEM: 803 /* no longer supported with KPR */ 804 return (-1); 805 806 case M_ZERO: 807 /* 808 * We shouldn't be mmap'ing to /dev/zero here as 809 * mmsegmap() should have already converted 810 * a mapping request for this device to a mapping 811 * using seg_vn for anonymous memory. 812 */ 813 break; 814 815 } 816 return (-1); 817 } 818 819 /* 820 * This function is called when a memory device is mmap'ed. 821 * Set up the mapping to the correct device driver. 822 */ 823 static int 824 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 825 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 826 { 827 struct segvn_crargs vn_a; 828 struct segdev_crargs dev_a; 829 int error; 830 minor_t minor; 831 off_t i; 832 833 minor = getminor(dev); 834 835 as_rangelock(as); 836 /* 837 * No need to worry about vac alignment on /dev/zero 838 * since this is a "clone" object that doesn't yet exist. 839 */ 840 error = choose_addr(as, addrp, len, off, 841 (minor == M_MEM) || (minor == M_KMEM), flags); 842 if (error != 0) { 843 as_rangeunlock(as); 844 return (error); 845 } 846 847 switch (minor) { 848 case M_MEM: 849 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */ 850 if ((flags & MAP_TYPE) != MAP_SHARED) { 851 as_rangeunlock(as); 852 return (EINVAL); 853 } 854 855 /* 856 * Check to ensure that the entire range is 857 * legal and we are not trying to map in 858 * more than the device will let us. 859 */ 860 for (i = 0; i < len; i += PAGESIZE) { 861 if (mmmmap(dev, off + i, maxprot) == -1) { 862 as_rangeunlock(as); 863 return (ENXIO); 864 } 865 } 866 867 /* 868 * Use seg_dev segment driver for /dev/mem mapping. 869 */ 870 dev_a.mapfunc = mmmmap; 871 dev_a.dev = dev; 872 dev_a.offset = off; 873 dev_a.type = (flags & MAP_TYPE); 874 dev_a.prot = (uchar_t)prot; 875 dev_a.maxprot = (uchar_t)maxprot; 876 dev_a.hat_attr = 0; 877 878 /* 879 * Make /dev/mem mappings non-consistent since we can't 880 * alias pages that don't have page structs behind them, 881 * such as kernel stack pages. If someone mmap()s a kernel 882 * stack page and if we give him a tte with cv, a line from 883 * that page can get into both pages of the spitfire d$. 884 * But snoop from another processor will only invalidate 885 * the first page. This later caused kernel (xc_attention) 886 * to go into an infinite loop at pil 13 and no interrupts 887 * could come in. See 1203630. 888 * 889 */ 890 dev_a.hat_flags = HAT_LOAD_NOCONSIST; 891 dev_a.devmap_data = NULL; 892 893 error = as_map(as, *addrp, len, segdev_create, &dev_a); 894 break; 895 896 case M_ZERO: 897 /* 898 * Use seg_vn segment driver for /dev/zero mapping. 899 * Passing in a NULL amp gives us the "cloning" effect. 900 */ 901 vn_a.vp = NULL; 902 vn_a.offset = 0; 903 vn_a.type = (flags & MAP_TYPE); 904 vn_a.prot = prot; 905 vn_a.maxprot = maxprot; 906 vn_a.flags = flags & ~MAP_TYPE; 907 vn_a.cred = cred; 908 vn_a.amp = NULL; 909 vn_a.szc = 0; 910 vn_a.lgrp_mem_policy_flags = 0; 911 error = as_map(as, *addrp, len, segvn_create, &vn_a); 912 break; 913 914 case M_KMEM: 915 case M_ALLKMEM: 916 /* No longer supported with KPR. */ 917 error = ENXIO; 918 break; 919 920 case M_NULL: 921 /* 922 * Use seg_dev segment driver for /dev/null mapping. 923 */ 924 dev_a.mapfunc = mmmmap; 925 dev_a.dev = dev; 926 dev_a.offset = off; 927 dev_a.type = 0; /* neither PRIVATE nor SHARED */ 928 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 929 dev_a.hat_attr = 0; 930 dev_a.hat_flags = 0; 931 error = as_map(as, *addrp, len, segdev_create, &dev_a); 932 break; 933 934 default: 935 error = ENXIO; 936 } 937 938 as_rangeunlock(as); 939 return (error); 940 } 941 942 static struct cb_ops mm_cb_ops = { 943 mmopen, /* open */ 944 nulldev, /* close */ 945 nodev, /* strategy */ 946 nodev, /* print */ 947 nodev, /* dump */ 948 mmread, /* read */ 949 mmwrite, /* write */ 950 mmioctl, /* ioctl */ 951 nodev, /* devmap */ 952 mmmmap, /* mmap */ 953 mmsegmap, /* segmap */ 954 mmchpoll, /* poll */ 955 mmpropop, /* prop_op */ 956 0, /* streamtab */ 957 D_NEW | D_MP | D_64BIT | D_U64BIT 958 }; 959 960 static struct dev_ops mm_ops = { 961 DEVO_REV, /* devo_rev, */ 962 0, /* refcnt */ 963 mm_info, /* get_dev_info */ 964 nulldev, /* identify */ 965 nulldev, /* probe */ 966 mm_attach, /* attach */ 967 nodev, /* detach */ 968 nodev, /* reset */ 969 &mm_cb_ops, /* driver operations */ 970 (struct bus_ops *)0 /* bus operations */ 971 }; 972 973 static struct modldrv modldrv = { 974 &mod_driverops, "memory driver %I%", &mm_ops, 975 }; 976 977 static struct modlinkage modlinkage = { 978 MODREV_1, &modldrv, NULL 979 }; 980 981 int 982 _init(void) 983 { 984 return (mod_install(&modlinkage)); 985 } 986 987 int 988 _info(struct modinfo *modinfop) 989 { 990 return (mod_info(&modlinkage, modinfop)); 991 } 992 993 int 994 _fini(void) 995 { 996 return (mod_remove(&modlinkage)); 997 } 998 999 static int 1000 mm_kstat_update(kstat_t *ksp, int rw) 1001 { 1002 struct memlist *pmem; 1003 uint_t count; 1004 1005 if (rw == KSTAT_WRITE) 1006 return (EACCES); 1007 1008 count = 0; 1009 memlist_read_lock(); 1010 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 1011 count++; 1012 } 1013 memlist_read_unlock(); 1014 1015 ksp->ks_ndata = count; 1016 ksp->ks_data_size = count * 2 * sizeof (uint64_t); 1017 1018 return (0); 1019 } 1020 1021 static int 1022 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1023 { 1024 struct memlist *pmem; 1025 struct memunit { 1026 uint64_t address; 1027 uint64_t size; 1028 } *kspmem; 1029 1030 if (rw == KSTAT_WRITE) 1031 return (EACCES); 1032 1033 ksp->ks_snaptime = gethrtime(); 1034 1035 kspmem = (struct memunit *)buf; 1036 memlist_read_lock(); 1037 for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) { 1038 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 1039 break; 1040 kspmem->address = pmem->address; 1041 kspmem->size = pmem->size; 1042 } 1043 memlist_read_unlock(); 1044 1045 return (0); 1046 } 1047 1048 /* 1049 * Read a mem_name_t from user-space and store it in the mem_name_t 1050 * pointed to by the mem_name argument. 1051 */ 1052 static int 1053 mm_read_mem_name(intptr_t data, mem_name_t *mem_name) 1054 { 1055 if (get_udatamodel() == DATAMODEL_NATIVE) { 1056 if (copyin((void *)data, mem_name, sizeof (mem_name_t))) 1057 return (EFAULT); 1058 } 1059 #ifdef _SYSCALL32 1060 else { 1061 mem_name32_t mem_name32; 1062 1063 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t))) 1064 return (EFAULT); 1065 mem_name->m_addr = mem_name32.m_addr; 1066 mem_name->m_synd = mem_name32.m_synd; 1067 mem_name->m_type[0] = mem_name32.m_type[0]; 1068 mem_name->m_type[1] = mem_name32.m_type[1]; 1069 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name; 1070 mem_name->m_namelen = (size_t)mem_name32.m_namelen; 1071 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid; 1072 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen; 1073 } 1074 #endif /* _SYSCALL32 */ 1075 1076 return (0); 1077 } 1078 1079 /* 1080 * Read a mem_page_t from user-space and store it in the mem_page_t 1081 * pointed to by the mpage argument. 1082 */ 1083 static int 1084 mm_read_mem_page(intptr_t data, mem_page_t *mpage) 1085 { 1086 if (get_udatamodel() == DATAMODEL_NATIVE) { 1087 if (copyin((void *)data, mpage, sizeof (mem_page_t)) != 0) 1088 return (EFAULT); 1089 } 1090 #ifdef _SYSCALL32 1091 else { 1092 mem_page32_t mpage32; 1093 1094 if (copyin((void *)data, &mpage32, sizeof (mem_page32_t)) != 0) 1095 return (EFAULT); 1096 1097 mpage->m_fmri = (caddr_t)(uintptr_t)mpage32.m_fmri; 1098 mpage->m_fmrisz = mpage32.m_fmrisz; 1099 } 1100 #endif /* _SYSCALL32 */ 1101 1102 return (0); 1103 } 1104 1105 /* 1106 * Expand an FMRI from a mem_page_t. 1107 */ 1108 static int 1109 mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl) 1110 { 1111 char *buf; 1112 int err; 1113 1114 if (mpage->m_fmri == NULL || mpage->m_fmrisz > MEM_FMRI_MAX_BUFSIZE) 1115 return (EINVAL); 1116 1117 buf = kmem_alloc(mpage->m_fmrisz, KM_SLEEP); 1118 if (copyin(mpage->m_fmri, buf, mpage->m_fmrisz) != 0) { 1119 kmem_free(buf, mpage->m_fmrisz); 1120 return (EFAULT); 1121 } 1122 1123 err = nvlist_unpack(buf, mpage->m_fmrisz, nvl, KM_SLEEP); 1124 kmem_free(buf, mpage->m_fmrisz); 1125 1126 return (err); 1127 } 1128 1129 static int 1130 mm_get_paddr(nvlist_t *nvl, uint64_t *paddr) 1131 { 1132 uint8_t version; 1133 uint64_t pa; 1134 char *scheme; 1135 int err; 1136 #ifdef __sparc 1137 uint64_t offset; 1138 char *unum; 1139 char **serids; 1140 uint_t nserids; 1141 #endif 1142 1143 /* Verify FMRI scheme name and version number */ 1144 if ((nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme) != 0) || 1145 (strcmp(scheme, FM_FMRI_SCHEME_MEM) != 0) || 1146 (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0) || 1147 version > FM_MEM_SCHEME_VERSION) { 1148 return (EINVAL); 1149 } 1150 1151 /* 1152 * There are two ways a physical address can be obtained from a mem 1153 * scheme FMRI. One way is to use the "offset" and "serial" 1154 * members, if they are present, together with the "unum" member to 1155 * calculate a physical address. This is the preferred way since 1156 * it is independent of possible changes to the programming of 1157 * underlying hardware registers that may change the physical address. 1158 * If the "offset" member is not present, then the address is 1159 * retrieved from the "physaddr" member. 1160 */ 1161 #if defined(__sparc) 1162 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &offset) != 0) { 1163 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) != 1164 0) { 1165 return (EINVAL); 1166 } 1167 } else if (nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0 || 1168 nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &serids, 1169 &nserids) != 0) { 1170 return (EINVAL); 1171 } else { 1172 err = cpu_get_mem_addr(unum, serids[0], offset, &pa); 1173 if (err != 0) { 1174 if (err == ENOTSUP) { 1175 /* Fall back to physaddr */ 1176 if (nvlist_lookup_uint64(nvl, 1177 FM_FMRI_MEM_PHYSADDR, &pa) != 0) 1178 return (EINVAL); 1179 } else 1180 return (err); 1181 } 1182 } 1183 #elif defined(__x86) 1184 if ((err = cmi_mc_unumtopa(NULL, nvl, &pa)) != CMI_SUCCESS && 1185 err != CMIERR_MC_PARTIALUNUMTOPA) 1186 return (EINVAL); 1187 #else 1188 #error "port me" 1189 #endif /* __sparc */ 1190 1191 *paddr = pa; 1192 return (0); 1193 } 1194