1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Memory special file 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/user.h> 35 #include <sys/buf.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/vm.h> 39 #include <sys/uio.h> 40 #include <sys/mman.h> 41 #include <sys/kmem.h> 42 #include <vm/seg.h> 43 #include <vm/page.h> 44 #include <sys/stat.h> 45 #include <sys/vmem.h> 46 #include <sys/memlist.h> 47 #include <sys/bootconf.h> 48 49 #include <vm/seg_vn.h> 50 #include <vm/seg_dev.h> 51 #include <vm/seg_kmem.h> 52 #include <vm/seg_kp.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/hat.h> 55 56 #include <sys/conf.h> 57 #include <sys/mem.h> 58 #include <sys/types.h> 59 #include <sys/conf.h> 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/errno.h> 63 #include <sys/modctl.h> 64 #include <sys/memlist.h> 65 #include <sys/ddi.h> 66 #include <sys/sunddi.h> 67 #include <sys/debug.h> 68 #include <sys/fm/protocol.h> 69 70 #if defined(__sparc) 71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *); 72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *, 73 uint64_t *, int *, int *, int *); 74 extern size_t cpu_get_name_bufsize(void); 75 extern int cpu_get_mem_sid(char *, char *, int, int *); 76 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *); 77 #elif defined(__x86) 78 #include <sys/cpu_module.h> 79 #endif /* __sparc */ 80 81 /* 82 * Turn a byte length into a pagecount. The DDI btop takes a 83 * 32-bit size on 32-bit machines, this handles 64-bit sizes for 84 * large physical-memory 32-bit machines. 85 */ 86 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift)) 87 88 static kmutex_t mm_lock; 89 static caddr_t mm_map; 90 91 static dev_info_t *mm_dip; /* private copy of devinfo pointer */ 92 93 static int mm_kmem_io_access; 94 95 static int mm_kstat_update(kstat_t *ksp, int rw); 96 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 97 98 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name); 99 static int mm_read_mem_page(intptr_t data, mem_page_t *mpage); 100 static int mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl); 101 static int mm_get_paddr(nvlist_t *nvl, uint64_t *paddr); 102 103 /*ARGSUSED1*/ 104 static int 105 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 106 { 107 int i; 108 struct mem_minor { 109 char *name; 110 minor_t minor; 111 int privonly; 112 const char *rdpriv; 113 const char *wrpriv; 114 mode_t priv_mode; 115 } mm[] = { 116 { "mem", M_MEM, 0, NULL, "all", 0640 }, 117 { "kmem", M_KMEM, 0, NULL, "all", 0640 }, 118 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 }, 119 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 }, 120 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 }, 121 }; 122 kstat_t *ksp; 123 124 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL); 125 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 126 127 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) { 128 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR, 129 mm[i].minor, DDI_PSEUDO, mm[i].privonly, 130 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) == 131 DDI_FAILURE) { 132 ddi_remove_minor_node(devi, NULL); 133 return (DDI_FAILURE); 134 } 135 } 136 137 mm_dip = devi; 138 139 ksp = kstat_create("mm", 0, "phys_installed", "misc", 140 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 141 if (ksp != NULL) { 142 ksp->ks_update = mm_kstat_update; 143 ksp->ks_snapshot = mm_kstat_snapshot; 144 ksp->ks_lock = &mm_lock; /* XXX - not really needed */ 145 kstat_install(ksp); 146 } 147 148 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 149 "kmem_io_access", 0); 150 151 return (DDI_SUCCESS); 152 } 153 154 /*ARGSUSED*/ 155 static int 156 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 157 { 158 register int error; 159 160 switch (infocmd) { 161 case DDI_INFO_DEVT2DEVINFO: 162 *result = (void *)mm_dip; 163 error = DDI_SUCCESS; 164 break; 165 case DDI_INFO_DEVT2INSTANCE: 166 *result = (void *)0; 167 error = DDI_SUCCESS; 168 break; 169 default: 170 error = DDI_FAILURE; 171 } 172 return (error); 173 } 174 175 /*ARGSUSED1*/ 176 static int 177 mmopen(dev_t *devp, int flag, int typ, struct cred *cred) 178 { 179 switch (getminor(*devp)) { 180 case M_NULL: 181 case M_ZERO: 182 case M_MEM: 183 case M_KMEM: 184 case M_ALLKMEM: 185 /* standard devices */ 186 break; 187 188 default: 189 /* Unsupported or unknown type */ 190 return (EINVAL); 191 } 192 return (0); 193 } 194 195 struct pollhead mm_pollhd; 196 197 /*ARGSUSED*/ 198 static int 199 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp, 200 struct pollhead **phpp) 201 { 202 switch (getminor(dev)) { 203 case M_NULL: 204 case M_ZERO: 205 case M_MEM: 206 case M_KMEM: 207 case M_ALLKMEM: 208 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM | 209 POLLWRNORM | POLLRDBAND | POLLWRBAND); 210 /* 211 * A non NULL pollhead pointer should be returned in case 212 * user polls for 0 events. 213 */ 214 *phpp = !anyyet && !*reventsp ? 215 &mm_pollhd : (struct pollhead *)NULL; 216 return (0); 217 default: 218 /* no other devices currently support polling */ 219 return (ENXIO); 220 } 221 } 222 223 static int 224 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 225 char *name, caddr_t valuep, int *lengthp) 226 { 227 /* 228 * implement zero size to reduce overhead (avoid two failing 229 * property lookups per stat). 230 */ 231 return (ddi_prop_op_size(dev, dip, prop_op, 232 flags, name, valuep, lengthp, 0)); 233 } 234 235 static int 236 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio) 237 { 238 int error = 0; 239 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), 240 (size_t)uio->uio_iov->iov_len); 241 242 mutex_enter(&mm_lock); 243 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, 244 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE), 245 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 246 247 if (!pf_is_memory(pfn)) { 248 if (allowio) { 249 size_t c = uio->uio_iov->iov_len; 250 251 if (ddi_peekpokeio(NULL, uio, rw, 252 (caddr_t)(uintptr_t)uio->uio_loffset, c, 253 sizeof (int32_t)) != DDI_SUCCESS) 254 error = EFAULT; 255 } else 256 error = EIO; 257 } else 258 error = uiomove(&mm_map[pageoff], nbytes, rw, uio); 259 260 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); 261 mutex_exit(&mm_lock); 262 return (error); 263 } 264 265 static int 266 mmpagelock(struct as *as, caddr_t va) 267 { 268 struct seg *seg; 269 int i; 270 271 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 272 seg = as_segat(as, va); 273 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0; 274 AS_LOCK_EXIT(as, &as->a_lock); 275 276 return (i); 277 } 278 279 #ifdef __sparc 280 281 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva) 282 283 #else /* __i386, __amd64 */ 284 285 #define NEED_LOCK_KVADDR(va) 0 286 287 #endif /* __sparc */ 288 289 /*ARGSUSED3*/ 290 static int 291 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) 292 { 293 pfn_t v; 294 struct iovec *iov; 295 int error = 0; 296 size_t c; 297 ssize_t oresid = uio->uio_resid; 298 minor_t minor = getminor(dev); 299 300 while (uio->uio_resid > 0 && error == 0) { 301 iov = uio->uio_iov; 302 if (iov->iov_len == 0) { 303 uio->uio_iov++; 304 uio->uio_iovcnt--; 305 if (uio->uio_iovcnt < 0) 306 panic("mmrw"); 307 continue; 308 } 309 switch (minor) { 310 311 case M_MEM: 312 memlist_read_lock(); 313 if (!address_in_memlist(phys_install, 314 (uint64_t)uio->uio_loffset, 1)) { 315 memlist_read_unlock(); 316 error = EFAULT; 317 break; 318 } 319 memlist_read_unlock(); 320 321 v = BTOP((u_offset_t)uio->uio_loffset); 322 error = mmio(uio, rw, v, 323 uio->uio_loffset & PAGEOFFSET, 0); 324 break; 325 326 case M_KMEM: 327 case M_ALLKMEM: 328 { 329 page_t **ppp; 330 caddr_t vaddr = (caddr_t)uio->uio_offset; 331 int try_lock = NEED_LOCK_KVADDR(vaddr); 332 int locked = 0; 333 334 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP) 335 break; 336 337 /* 338 * If vaddr does not map a valid page, as_pagelock() 339 * will return failure. Hence we can't check the 340 * return value and return EFAULT here as we'd like. 341 * seg_kp and seg_kpm do not properly support 342 * as_pagelock() for this context so we avoid it 343 * using the try_lock set check above. Some day when 344 * the kernel page locking gets redesigned all this 345 * muck can be cleaned up. 346 */ 347 if (try_lock) 348 locked = (as_pagelock(&kas, &ppp, vaddr, 349 PAGESIZE, S_WRITE) == 0); 350 351 v = hat_getpfnum(kas.a_hat, 352 (caddr_t)(uintptr_t)uio->uio_loffset); 353 if (v == PFN_INVALID) { 354 if (locked) 355 as_pageunlock(&kas, ppp, vaddr, 356 PAGESIZE, S_WRITE); 357 error = EFAULT; 358 break; 359 } 360 361 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 362 minor == M_ALLKMEM || mm_kmem_io_access); 363 if (locked) 364 as_pageunlock(&kas, ppp, vaddr, PAGESIZE, 365 S_WRITE); 366 } 367 368 break; 369 370 case M_ZERO: 371 if (rw == UIO_READ) { 372 label_t ljb; 373 374 if (on_fault(&ljb)) { 375 no_fault(); 376 error = EFAULT; 377 break; 378 } 379 uzero(iov->iov_base, iov->iov_len); 380 no_fault(); 381 uio->uio_resid -= iov->iov_len; 382 uio->uio_loffset += iov->iov_len; 383 break; 384 } 385 /* else it's a write, fall through to NULL case */ 386 /*FALLTHROUGH*/ 387 388 case M_NULL: 389 if (rw == UIO_READ) 390 return (0); 391 c = iov->iov_len; 392 iov->iov_base += c; 393 iov->iov_len -= c; 394 uio->uio_loffset += c; 395 uio->uio_resid -= c; 396 break; 397 398 } 399 } 400 return (uio->uio_resid == oresid ? error : 0); 401 } 402 403 static int 404 mmread(dev_t dev, struct uio *uio, cred_t *cred) 405 { 406 return (mmrw(dev, uio, UIO_READ, cred)); 407 } 408 409 static int 410 mmwrite(dev_t dev, struct uio *uio, cred_t *cred) 411 { 412 return (mmrw(dev, uio, UIO_WRITE, cred)); 413 } 414 415 /* 416 * Private ioctl for libkvm to support kvm_physaddr(). 417 * Given an address space and a VA, compute the PA. 418 */ 419 static int 420 mmioctl_vtop(intptr_t data) 421 { 422 #ifdef _SYSCALL32 423 mem_vtop32_t vtop32; 424 #endif 425 mem_vtop_t mem_vtop; 426 proc_t *p; 427 pfn_t pfn = (pfn_t)PFN_INVALID; 428 pid_t pid = 0; 429 struct as *as; 430 struct seg *seg; 431 432 if (get_udatamodel() == DATAMODEL_NATIVE) { 433 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t))) 434 return (EFAULT); 435 } 436 #ifdef _SYSCALL32 437 else { 438 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t))) 439 return (EFAULT); 440 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as; 441 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va; 442 443 if (mem_vtop.m_as != NULL) 444 return (EINVAL); 445 } 446 #endif 447 448 if (mem_vtop.m_as == &kas) { 449 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va); 450 } else { 451 if (mem_vtop.m_as == NULL) { 452 /* 453 * Assume the calling process's address space if the 454 * caller didn't specify one. 455 */ 456 p = curthread->t_procp; 457 if (p == NULL) 458 return (EIO); 459 mem_vtop.m_as = p->p_as; 460 } 461 462 mutex_enter(&pidlock); 463 for (p = practive; p != NULL; p = p->p_next) { 464 if (p->p_as == mem_vtop.m_as) { 465 pid = p->p_pid; 466 break; 467 } 468 } 469 mutex_exit(&pidlock); 470 if (p == NULL) 471 return (EIO); 472 p = sprlock(pid); 473 if (p == NULL) 474 return (EIO); 475 as = p->p_as; 476 if (as == mem_vtop.m_as) { 477 mutex_exit(&p->p_lock); 478 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 479 for (seg = AS_SEGFIRST(as); seg != NULL; 480 seg = AS_SEGNEXT(as, seg)) 481 if ((uintptr_t)mem_vtop.m_va - 482 (uintptr_t)seg->s_base < seg->s_size) 483 break; 484 if (seg != NULL) 485 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va); 486 AS_LOCK_EXIT(as, &as->a_lock); 487 mutex_enter(&p->p_lock); 488 } 489 sprunlock(p); 490 } 491 mem_vtop.m_pfn = pfn; 492 if (pfn == PFN_INVALID) 493 return (EIO); 494 495 if (get_udatamodel() == DATAMODEL_NATIVE) { 496 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t))) 497 return (EFAULT); 498 } 499 #ifdef _SYSCALL32 500 else { 501 vtop32.m_pfn = mem_vtop.m_pfn; 502 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t))) 503 return (EFAULT); 504 } 505 #endif 506 507 return (0); 508 } 509 510 /* 511 * Given a PA, execute the given page retire command on it. 512 */ 513 static int 514 mmioctl_page_retire(int cmd, intptr_t data) 515 { 516 extern int page_retire_test(void); 517 uint64_t pa; 518 519 if (copyin((void *)data, &pa, sizeof (uint64_t))) { 520 return (EFAULT); 521 } 522 523 switch (cmd) { 524 case MEM_PAGE_ISRETIRED: 525 return (page_retire_check(pa, NULL)); 526 527 case MEM_PAGE_UNRETIRE: 528 return (page_unretire(pa)); 529 530 case MEM_PAGE_RETIRE: 531 return (page_retire(pa, PR_FMA)); 532 533 case MEM_PAGE_RETIRE_MCE: 534 return (page_retire(pa, PR_MCE)); 535 536 case MEM_PAGE_RETIRE_UE: 537 return (page_retire(pa, PR_UE)); 538 539 case MEM_PAGE_GETERRORS: 540 { 541 uint64_t page_errors; 542 int rc = page_retire_check(pa, &page_errors); 543 if (copyout(&page_errors, (void *)data, 544 sizeof (uint64_t))) { 545 return (EFAULT); 546 } 547 return (rc); 548 } 549 550 case MEM_PAGE_RETIRE_TEST: 551 return (page_retire_test()); 552 553 } 554 555 return (EINVAL); 556 } 557 558 /* 559 * Given a mem-scheme FMRI for a page, execute the given page retire 560 * command on it. 561 */ 562 static int 563 mmioctl_page_fmri_retire(int cmd, intptr_t data) 564 { 565 mem_page_t mpage; 566 uint64_t pa; 567 nvlist_t *nvl; 568 int err; 569 570 if ((err = mm_read_mem_page(data, &mpage)) < 0) 571 return (err); 572 573 if ((err = mm_get_mem_fmri(&mpage, &nvl)) != 0) 574 return (err); 575 576 if ((err = mm_get_paddr(nvl, &pa)) != 0) { 577 nvlist_free(nvl); 578 return (err); 579 } 580 581 nvlist_free(nvl); 582 583 switch (cmd) { 584 case MEM_PAGE_FMRI_ISRETIRED: 585 return (page_retire_check(pa, NULL)); 586 587 case MEM_PAGE_FMRI_RETIRE: 588 return (page_retire(pa, PR_FMA)); 589 590 case MEM_PAGE_FMRI_UNRETIRE: 591 return (page_unretire(pa)); 592 } 593 594 return (EINVAL); 595 } 596 597 #ifdef __sparc 598 /* 599 * Given a syndrome, syndrome type, and address return the 600 * associated memory name in the provided data buffer. 601 */ 602 static int 603 mmioctl_get_mem_name(intptr_t data) 604 { 605 mem_name_t mem_name; 606 void *buf; 607 size_t bufsize; 608 int len, err; 609 610 if ((bufsize = cpu_get_name_bufsize()) == 0) 611 return (ENOTSUP); 612 613 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 614 return (err); 615 616 buf = kmem_alloc(bufsize, KM_SLEEP); 617 618 /* 619 * Call into cpu specific code to do the lookup. 620 */ 621 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type, 622 mem_name.m_addr, buf, bufsize, &len)) != 0) { 623 kmem_free(buf, bufsize); 624 return (err); 625 } 626 627 if (len >= mem_name.m_namelen) { 628 kmem_free(buf, bufsize); 629 return (ENAMETOOLONG); 630 } 631 632 if (copyoutstr(buf, (char *)mem_name.m_name, 633 mem_name.m_namelen, NULL) != 0) { 634 kmem_free(buf, bufsize); 635 return (EFAULT); 636 } 637 638 kmem_free(buf, bufsize); 639 return (0); 640 } 641 642 /* 643 * Given a syndrome and address return information about the associated memory. 644 */ 645 static int 646 mmioctl_get_mem_info(intptr_t data) 647 { 648 mem_info_t mem_info; 649 int err; 650 651 if (copyin((void *)data, &mem_info, sizeof (mem_info_t))) 652 return (EFAULT); 653 654 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr, 655 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size, 656 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0) 657 return (err); 658 659 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0) 660 return (EFAULT); 661 662 return (0); 663 } 664 665 /* 666 * Given a memory name, return its associated serial id 667 */ 668 static int 669 mmioctl_get_mem_sid(intptr_t data) 670 { 671 mem_name_t mem_name; 672 void *buf; 673 void *name; 674 size_t name_len; 675 size_t bufsize; 676 int len, err; 677 678 if ((bufsize = cpu_get_name_bufsize()) == 0) 679 return (ENOTSUP); 680 681 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 682 return (err); 683 684 buf = kmem_alloc(bufsize, KM_SLEEP); 685 686 if (mem_name.m_namelen > 1024) 687 mem_name.m_namelen = 1024; /* cap at 1024 bytes */ 688 689 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP); 690 691 if ((err = copyinstr((char *)mem_name.m_name, (char *)name, 692 mem_name.m_namelen, &name_len)) != 0) { 693 kmem_free(buf, bufsize); 694 kmem_free(name, mem_name.m_namelen); 695 return (err); 696 } 697 698 /* 699 * Call into cpu specific code to do the lookup. 700 */ 701 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) { 702 kmem_free(buf, bufsize); 703 kmem_free(name, mem_name.m_namelen); 704 return (err); 705 } 706 707 if (len > mem_name.m_sidlen) { 708 kmem_free(buf, bufsize); 709 kmem_free(name, mem_name.m_namelen); 710 return (ENAMETOOLONG); 711 } 712 713 if (copyoutstr(buf, (char *)mem_name.m_sid, 714 mem_name.m_sidlen, NULL) != 0) { 715 kmem_free(buf, bufsize); 716 kmem_free(name, mem_name.m_namelen); 717 return (EFAULT); 718 } 719 720 kmem_free(buf, bufsize); 721 kmem_free(name, mem_name.m_namelen); 722 return (0); 723 } 724 #endif /* __sparc */ 725 726 /* 727 * Private ioctls for 728 * libkvm to support kvm_physaddr(). 729 * FMA support for page_retire() and memory attribute information. 730 */ 731 /*ARGSUSED*/ 732 static int 733 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp) 734 { 735 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) || 736 (cmd != MEM_VTOP && getminor(dev) != M_MEM)) 737 return (ENXIO); 738 739 switch (cmd) { 740 case MEM_VTOP: 741 return (mmioctl_vtop(data)); 742 743 case MEM_PAGE_RETIRE: 744 case MEM_PAGE_ISRETIRED: 745 case MEM_PAGE_UNRETIRE: 746 case MEM_PAGE_RETIRE_MCE: 747 case MEM_PAGE_RETIRE_UE: 748 case MEM_PAGE_GETERRORS: 749 case MEM_PAGE_RETIRE_TEST: 750 return (mmioctl_page_retire(cmd, data)); 751 752 case MEM_PAGE_FMRI_RETIRE: 753 case MEM_PAGE_FMRI_ISRETIRED: 754 case MEM_PAGE_FMRI_UNRETIRE: 755 return (mmioctl_page_fmri_retire(cmd, data)); 756 757 #ifdef __sparc 758 case MEM_NAME: 759 return (mmioctl_get_mem_name(data)); 760 761 case MEM_INFO: 762 return (mmioctl_get_mem_info(data)); 763 764 case MEM_SID: 765 return (mmioctl_get_mem_sid(data)); 766 #else 767 case MEM_NAME: 768 case MEM_INFO: 769 case MEM_SID: 770 return (ENOTSUP); 771 #endif /* __sparc */ 772 } 773 return (ENXIO); 774 } 775 776 /*ARGSUSED2*/ 777 static int 778 mmmmap(dev_t dev, off_t off, int prot) 779 { 780 pfn_t pf; 781 struct memlist *pmem; 782 minor_t minor = getminor(dev); 783 784 switch (minor) { 785 case M_MEM: 786 pf = btop(off); 787 memlist_read_lock(); 788 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 789 if (pf >= BTOP(pmem->address) && 790 pf < BTOP(pmem->address + pmem->size)) { 791 memlist_read_unlock(); 792 return (impl_obmem_pfnum(pf)); 793 } 794 } 795 memlist_read_unlock(); 796 break; 797 798 case M_KMEM: 799 case M_ALLKMEM: 800 /* no longer supported with KPR */ 801 return (-1); 802 803 case M_ZERO: 804 /* 805 * We shouldn't be mmap'ing to /dev/zero here as 806 * mmsegmap() should have already converted 807 * a mapping request for this device to a mapping 808 * using seg_vn for anonymous memory. 809 */ 810 break; 811 812 } 813 return (-1); 814 } 815 816 /* 817 * This function is called when a memory device is mmap'ed. 818 * Set up the mapping to the correct device driver. 819 */ 820 static int 821 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 822 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 823 { 824 struct segvn_crargs vn_a; 825 struct segdev_crargs dev_a; 826 int error; 827 minor_t minor; 828 off_t i; 829 830 minor = getminor(dev); 831 832 as_rangelock(as); 833 /* 834 * No need to worry about vac alignment on /dev/zero 835 * since this is a "clone" object that doesn't yet exist. 836 */ 837 error = choose_addr(as, addrp, len, off, 838 (minor == M_MEM) || (minor == M_KMEM), flags); 839 if (error != 0) { 840 as_rangeunlock(as); 841 return (error); 842 } 843 844 switch (minor) { 845 case M_MEM: 846 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */ 847 if ((flags & MAP_TYPE) != MAP_SHARED) { 848 as_rangeunlock(as); 849 return (EINVAL); 850 } 851 852 /* 853 * Check to ensure that the entire range is 854 * legal and we are not trying to map in 855 * more than the device will let us. 856 */ 857 for (i = 0; i < len; i += PAGESIZE) { 858 if (mmmmap(dev, off + i, maxprot) == -1) { 859 as_rangeunlock(as); 860 return (ENXIO); 861 } 862 } 863 864 /* 865 * Use seg_dev segment driver for /dev/mem mapping. 866 */ 867 dev_a.mapfunc = mmmmap; 868 dev_a.dev = dev; 869 dev_a.offset = off; 870 dev_a.type = (flags & MAP_TYPE); 871 dev_a.prot = (uchar_t)prot; 872 dev_a.maxprot = (uchar_t)maxprot; 873 dev_a.hat_attr = 0; 874 875 /* 876 * Make /dev/mem mappings non-consistent since we can't 877 * alias pages that don't have page structs behind them, 878 * such as kernel stack pages. If someone mmap()s a kernel 879 * stack page and if we give him a tte with cv, a line from 880 * that page can get into both pages of the spitfire d$. 881 * But snoop from another processor will only invalidate 882 * the first page. This later caused kernel (xc_attention) 883 * to go into an infinite loop at pil 13 and no interrupts 884 * could come in. See 1203630. 885 * 886 */ 887 dev_a.hat_flags = HAT_LOAD_NOCONSIST; 888 dev_a.devmap_data = NULL; 889 890 error = as_map(as, *addrp, len, segdev_create, &dev_a); 891 break; 892 893 case M_ZERO: 894 /* 895 * Use seg_vn segment driver for /dev/zero mapping. 896 * Passing in a NULL amp gives us the "cloning" effect. 897 */ 898 vn_a.vp = NULL; 899 vn_a.offset = 0; 900 vn_a.type = (flags & MAP_TYPE); 901 vn_a.prot = prot; 902 vn_a.maxprot = maxprot; 903 vn_a.flags = flags & ~MAP_TYPE; 904 vn_a.cred = cred; 905 vn_a.amp = NULL; 906 vn_a.szc = 0; 907 vn_a.lgrp_mem_policy_flags = 0; 908 error = as_map(as, *addrp, len, segvn_create, &vn_a); 909 break; 910 911 case M_KMEM: 912 case M_ALLKMEM: 913 /* No longer supported with KPR. */ 914 error = ENXIO; 915 break; 916 917 case M_NULL: 918 /* 919 * Use seg_dev segment driver for /dev/null mapping. 920 */ 921 dev_a.mapfunc = mmmmap; 922 dev_a.dev = dev; 923 dev_a.offset = off; 924 dev_a.type = 0; /* neither PRIVATE nor SHARED */ 925 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 926 dev_a.hat_attr = 0; 927 dev_a.hat_flags = 0; 928 error = as_map(as, *addrp, len, segdev_create, &dev_a); 929 break; 930 931 default: 932 error = ENXIO; 933 } 934 935 as_rangeunlock(as); 936 return (error); 937 } 938 939 static struct cb_ops mm_cb_ops = { 940 mmopen, /* open */ 941 nulldev, /* close */ 942 nodev, /* strategy */ 943 nodev, /* print */ 944 nodev, /* dump */ 945 mmread, /* read */ 946 mmwrite, /* write */ 947 mmioctl, /* ioctl */ 948 nodev, /* devmap */ 949 mmmmap, /* mmap */ 950 mmsegmap, /* segmap */ 951 mmchpoll, /* poll */ 952 mmpropop, /* prop_op */ 953 0, /* streamtab */ 954 D_NEW | D_MP | D_64BIT | D_U64BIT 955 }; 956 957 static struct dev_ops mm_ops = { 958 DEVO_REV, /* devo_rev, */ 959 0, /* refcnt */ 960 mm_info, /* get_dev_info */ 961 nulldev, /* identify */ 962 nulldev, /* probe */ 963 mm_attach, /* attach */ 964 nodev, /* detach */ 965 nodev, /* reset */ 966 &mm_cb_ops, /* driver operations */ 967 (struct bus_ops *)0 /* bus operations */ 968 }; 969 970 static struct modldrv modldrv = { 971 &mod_driverops, "memory driver %I%", &mm_ops, 972 }; 973 974 static struct modlinkage modlinkage = { 975 MODREV_1, &modldrv, NULL 976 }; 977 978 int 979 _init(void) 980 { 981 return (mod_install(&modlinkage)); 982 } 983 984 int 985 _info(struct modinfo *modinfop) 986 { 987 return (mod_info(&modlinkage, modinfop)); 988 } 989 990 int 991 _fini(void) 992 { 993 return (mod_remove(&modlinkage)); 994 } 995 996 static int 997 mm_kstat_update(kstat_t *ksp, int rw) 998 { 999 struct memlist *pmem; 1000 uint_t count; 1001 1002 if (rw == KSTAT_WRITE) 1003 return (EACCES); 1004 1005 count = 0; 1006 memlist_read_lock(); 1007 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 1008 count++; 1009 } 1010 memlist_read_unlock(); 1011 1012 ksp->ks_ndata = count; 1013 ksp->ks_data_size = count * 2 * sizeof (uint64_t); 1014 1015 return (0); 1016 } 1017 1018 static int 1019 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1020 { 1021 struct memlist *pmem; 1022 struct memunit { 1023 uint64_t address; 1024 uint64_t size; 1025 } *kspmem; 1026 1027 if (rw == KSTAT_WRITE) 1028 return (EACCES); 1029 1030 ksp->ks_snaptime = gethrtime(); 1031 1032 kspmem = (struct memunit *)buf; 1033 memlist_read_lock(); 1034 for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) { 1035 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 1036 break; 1037 kspmem->address = pmem->address; 1038 kspmem->size = pmem->size; 1039 } 1040 memlist_read_unlock(); 1041 1042 return (0); 1043 } 1044 1045 /* 1046 * Read a mem_name_t from user-space and store it in the mem_name_t 1047 * pointed to by the mem_name argument. 1048 */ 1049 static int 1050 mm_read_mem_name(intptr_t data, mem_name_t *mem_name) 1051 { 1052 if (get_udatamodel() == DATAMODEL_NATIVE) { 1053 if (copyin((void *)data, mem_name, sizeof (mem_name_t))) 1054 return (EFAULT); 1055 } 1056 #ifdef _SYSCALL32 1057 else { 1058 mem_name32_t mem_name32; 1059 1060 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t))) 1061 return (EFAULT); 1062 mem_name->m_addr = mem_name32.m_addr; 1063 mem_name->m_synd = mem_name32.m_synd; 1064 mem_name->m_type[0] = mem_name32.m_type[0]; 1065 mem_name->m_type[1] = mem_name32.m_type[1]; 1066 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name; 1067 mem_name->m_namelen = (size_t)mem_name32.m_namelen; 1068 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid; 1069 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen; 1070 } 1071 #endif /* _SYSCALL32 */ 1072 1073 return (0); 1074 } 1075 1076 /* 1077 * Read a mem_page_t from user-space and store it in the mem_page_t 1078 * pointed to by the mpage argument. 1079 */ 1080 static int 1081 mm_read_mem_page(intptr_t data, mem_page_t *mpage) 1082 { 1083 if (get_udatamodel() == DATAMODEL_NATIVE) { 1084 if (copyin((void *)data, mpage, sizeof (mem_page_t)) != 0) 1085 return (EFAULT); 1086 } 1087 #ifdef _SYSCALL32 1088 else { 1089 mem_page32_t mpage32; 1090 1091 if (copyin((void *)data, &mpage32, sizeof (mem_page32_t)) != 0) 1092 return (EFAULT); 1093 1094 mpage->m_fmri = (caddr_t)(uintptr_t)mpage32.m_fmri; 1095 mpage->m_fmrisz = mpage32.m_fmrisz; 1096 } 1097 #endif /* _SYSCALL32 */ 1098 1099 return (0); 1100 } 1101 1102 /* 1103 * Expand an FMRI from a mem_page_t. 1104 */ 1105 static int 1106 mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl) 1107 { 1108 char *buf; 1109 int err; 1110 1111 if (mpage->m_fmri == NULL || mpage->m_fmrisz > MEM_FMRI_MAX_BUFSIZE) 1112 return (EINVAL); 1113 1114 buf = kmem_alloc(mpage->m_fmrisz, KM_SLEEP); 1115 if (copyin(mpage->m_fmri, buf, mpage->m_fmrisz) != 0) { 1116 kmem_free(buf, mpage->m_fmrisz); 1117 return (EFAULT); 1118 } 1119 1120 err = nvlist_unpack(buf, mpage->m_fmrisz, nvl, KM_SLEEP); 1121 kmem_free(buf, mpage->m_fmrisz); 1122 1123 return (err); 1124 } 1125 1126 static int 1127 mm_get_paddr(nvlist_t *nvl, uint64_t *paddr) 1128 { 1129 uint8_t version; 1130 uint64_t pa; 1131 char *scheme; 1132 int err; 1133 #ifdef __sparc 1134 uint64_t offset; 1135 char *unum; 1136 char **serids; 1137 uint_t nserids; 1138 #endif 1139 1140 /* Verify FMRI scheme name and version number */ 1141 if ((nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme) != 0) || 1142 (strcmp(scheme, FM_FMRI_SCHEME_MEM) != 0) || 1143 (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0) || 1144 version > FM_MEM_SCHEME_VERSION) { 1145 return (EINVAL); 1146 } 1147 1148 /* 1149 * There are two ways a physical address can be obtained from a mem 1150 * scheme FMRI. One way is to use the "offset" and "serial" 1151 * members, if they are present, together with the "unum" member to 1152 * calculate a physical address. This is the preferred way since 1153 * it is independent of possible changes to the programming of 1154 * underlying hardware registers that may change the physical address. 1155 * If the "offset" member is not present, then the address is 1156 * retrieved from the "physaddr" member. 1157 */ 1158 #if defined(__sparc) 1159 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &offset) != 0) { 1160 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) != 1161 0) { 1162 return (EINVAL); 1163 } 1164 } else if (nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0 || 1165 nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &serids, 1166 &nserids) != 0) { 1167 return (EINVAL); 1168 } else { 1169 err = cpu_get_mem_addr(unum, serids[0], offset, &pa); 1170 if (err != 0) { 1171 if (err == ENOTSUP) { 1172 /* Fall back to physaddr */ 1173 if (nvlist_lookup_uint64(nvl, 1174 FM_FMRI_MEM_PHYSADDR, &pa) != 0) 1175 return (EINVAL); 1176 } else 1177 return (err); 1178 } 1179 } 1180 #elif defined(__x86) 1181 if ((err = cmi_mc_unumtopa(NULL, nvl, &pa)) != CMI_SUCCESS && 1182 err != CMIERR_MC_PARTIALUNUMTOPA) 1183 return (EINVAL); 1184 #else 1185 #error "port me" 1186 #endif /* __sparc */ 1187 1188 *paddr = pa; 1189 return (0); 1190 } 1191