1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Memory special file 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/user.h> 35 #include <sys/buf.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/vm.h> 39 #include <sys/uio.h> 40 #include <sys/mman.h> 41 #include <sys/kmem.h> 42 #include <vm/seg.h> 43 #include <vm/page.h> 44 #include <sys/stat.h> 45 #include <sys/vmem.h> 46 #include <sys/memlist.h> 47 #include <sys/bootconf.h> 48 49 #include <vm/seg_vn.h> 50 #include <vm/seg_dev.h> 51 #include <vm/seg_kmem.h> 52 #include <vm/seg_kp.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/hat.h> 55 56 #include <sys/conf.h> 57 #include <sys/mem.h> 58 #include <sys/types.h> 59 #include <sys/conf.h> 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/errno.h> 63 #include <sys/modctl.h> 64 #include <sys/memlist.h> 65 #include <sys/ddi.h> 66 #include <sys/sunddi.h> 67 #include <sys/debug.h> 68 #include <sys/fm/protocol.h> 69 70 #if defined(__sparc) 71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *); 72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *, 73 uint64_t *, int *, int *, int *); 74 extern size_t cpu_get_name_bufsize(void); 75 extern int cpu_get_mem_sid(char *, char *, int, int *); 76 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *); 77 #elif defined(__x86) 78 #include <sys/cpu_module.h> 79 #endif /* __sparc */ 80 81 /* 82 * Turn a byte length into a pagecount. The DDI btop takes a 83 * 32-bit size on 32-bit machines, this handles 64-bit sizes for 84 * large physical-memory 32-bit machines. 85 */ 86 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift)) 87 88 static kmutex_t mm_lock; 89 static caddr_t mm_map; 90 91 static dev_info_t *mm_dip; /* private copy of devinfo pointer */ 92 93 static int mm_kmem_io_access; 94 95 static int mm_kstat_update(kstat_t *ksp, int rw); 96 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 97 98 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name); 99 static int mm_read_mem_page(intptr_t data, mem_page_t *mpage); 100 static int mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl); 101 static int mm_get_paddr(nvlist_t *nvl, uint64_t *paddr); 102 103 /*ARGSUSED1*/ 104 static int 105 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 106 { 107 int i; 108 struct mem_minor { 109 char *name; 110 minor_t minor; 111 int privonly; 112 const char *rdpriv; 113 const char *wrpriv; 114 mode_t priv_mode; 115 } mm[] = { 116 { "mem", M_MEM, 0, NULL, "all", 0640 }, 117 { "kmem", M_KMEM, 0, NULL, "all", 0640 }, 118 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 }, 119 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 }, 120 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 }, 121 }; 122 kstat_t *ksp; 123 124 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL); 125 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 126 127 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) { 128 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR, 129 mm[i].minor, DDI_PSEUDO, mm[i].privonly, 130 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) == 131 DDI_FAILURE) { 132 ddi_remove_minor_node(devi, NULL); 133 return (DDI_FAILURE); 134 } 135 } 136 137 mm_dip = devi; 138 139 ksp = kstat_create("mm", 0, "phys_installed", "misc", 140 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 141 if (ksp != NULL) { 142 ksp->ks_update = mm_kstat_update; 143 ksp->ks_snapshot = mm_kstat_snapshot; 144 ksp->ks_lock = &mm_lock; /* XXX - not really needed */ 145 kstat_install(ksp); 146 } 147 148 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 149 "kmem_io_access", 0); 150 151 return (DDI_SUCCESS); 152 } 153 154 /*ARGSUSED*/ 155 static int 156 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 157 { 158 register int error; 159 160 switch (infocmd) { 161 case DDI_INFO_DEVT2DEVINFO: 162 *result = (void *)mm_dip; 163 error = DDI_SUCCESS; 164 break; 165 case DDI_INFO_DEVT2INSTANCE: 166 *result = (void *)0; 167 error = DDI_SUCCESS; 168 break; 169 default: 170 error = DDI_FAILURE; 171 } 172 return (error); 173 } 174 175 /*ARGSUSED1*/ 176 static int 177 mmopen(dev_t *devp, int flag, int typ, struct cred *cred) 178 { 179 switch (getminor(*devp)) { 180 case M_NULL: 181 case M_ZERO: 182 case M_MEM: 183 case M_KMEM: 184 case M_ALLKMEM: 185 /* standard devices */ 186 break; 187 188 default: 189 /* Unsupported or unknown type */ 190 return (EINVAL); 191 } 192 return (0); 193 } 194 195 struct pollhead mm_pollhd; 196 197 /*ARGSUSED*/ 198 static int 199 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp, 200 struct pollhead **phpp) 201 { 202 switch (getminor(dev)) { 203 case M_NULL: 204 case M_ZERO: 205 case M_MEM: 206 case M_KMEM: 207 case M_ALLKMEM: 208 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM | 209 POLLWRNORM | POLLRDBAND | POLLWRBAND); 210 /* 211 * A non NULL pollhead pointer should be returned in case 212 * user polls for 0 events. 213 */ 214 *phpp = !anyyet && !*reventsp ? 215 &mm_pollhd : (struct pollhead *)NULL; 216 return (0); 217 default: 218 /* no other devices currently support polling */ 219 return (ENXIO); 220 } 221 } 222 223 static int 224 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 225 char *name, caddr_t valuep, int *lengthp) 226 { 227 /* 228 * implement zero size to reduce overhead (avoid two failing 229 * property lookups per stat). 230 */ 231 return (ddi_prop_op_size(dev, dip, prop_op, 232 flags, name, valuep, lengthp, 0)); 233 } 234 235 static int 236 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio) 237 { 238 int error = 0; 239 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), 240 (size_t)uio->uio_iov->iov_len); 241 242 mutex_enter(&mm_lock); 243 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, 244 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE), 245 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 246 247 if (!pf_is_memory(pfn)) { 248 if (allowio) { 249 size_t c = uio->uio_iov->iov_len; 250 251 if (ddi_peekpokeio(NULL, uio, rw, 252 (caddr_t)(uintptr_t)uio->uio_loffset, c, 253 sizeof (int32_t)) != DDI_SUCCESS) 254 error = EFAULT; 255 } else 256 error = EIO; 257 } else 258 error = uiomove(&mm_map[pageoff], nbytes, rw, uio); 259 260 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); 261 mutex_exit(&mm_lock); 262 return (error); 263 } 264 265 static int 266 mmpagelock(struct as *as, caddr_t va) 267 { 268 struct seg *seg; 269 int i; 270 271 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 272 seg = as_segat(as, va); 273 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0; 274 AS_LOCK_EXIT(as, &as->a_lock); 275 276 return (i); 277 } 278 279 #ifdef __sparc 280 281 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva) 282 283 #else /* __i386, __amd64 */ 284 285 #define NEED_LOCK_KVADDR(va) 0 286 287 #endif /* __sparc */ 288 289 /*ARGSUSED3*/ 290 static int 291 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) 292 { 293 pfn_t v; 294 struct iovec *iov; 295 int error = 0; 296 size_t c; 297 ssize_t oresid = uio->uio_resid; 298 minor_t minor = getminor(dev); 299 300 while (uio->uio_resid > 0 && error == 0) { 301 iov = uio->uio_iov; 302 if (iov->iov_len == 0) { 303 uio->uio_iov++; 304 uio->uio_iovcnt--; 305 if (uio->uio_iovcnt < 0) 306 panic("mmrw"); 307 continue; 308 } 309 switch (minor) { 310 311 case M_MEM: 312 memlist_read_lock(); 313 if (!address_in_memlist(phys_install, 314 (uint64_t)uio->uio_loffset, 1)) { 315 memlist_read_unlock(); 316 error = EFAULT; 317 break; 318 } 319 memlist_read_unlock(); 320 321 v = BTOP((u_offset_t)uio->uio_loffset); 322 error = mmio(uio, rw, v, 323 uio->uio_loffset & PAGEOFFSET, 0); 324 break; 325 326 case M_KMEM: 327 case M_ALLKMEM: 328 { 329 page_t **ppp; 330 caddr_t vaddr = (caddr_t)uio->uio_offset; 331 int try_lock = NEED_LOCK_KVADDR(vaddr); 332 int locked = 0; 333 334 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP) 335 break; 336 337 /* 338 * If vaddr does not map a valid page, as_pagelock() 339 * will return failure. Hence we can't check the 340 * return value and return EFAULT here as we'd like. 341 * seg_kp and seg_kpm do not properly support 342 * as_pagelock() for this context so we avoid it 343 * using the try_lock set check above. Some day when 344 * the kernel page locking gets redesigned all this 345 * muck can be cleaned up. 346 */ 347 if (try_lock) 348 locked = (as_pagelock(&kas, &ppp, vaddr, 349 PAGESIZE, S_WRITE) == 0); 350 351 v = hat_getpfnum(kas.a_hat, 352 (caddr_t)(uintptr_t)uio->uio_loffset); 353 if (v == PFN_INVALID) { 354 if (locked) 355 as_pageunlock(&kas, ppp, vaddr, 356 PAGESIZE, S_WRITE); 357 error = EFAULT; 358 break; 359 } 360 361 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 362 minor == M_ALLKMEM || mm_kmem_io_access); 363 if (locked) 364 as_pageunlock(&kas, ppp, vaddr, PAGESIZE, 365 S_WRITE); 366 } 367 368 break; 369 370 case M_ZERO: 371 if (rw == UIO_READ) { 372 label_t ljb; 373 374 if (on_fault(&ljb)) { 375 no_fault(); 376 error = EFAULT; 377 break; 378 } 379 uzero(iov->iov_base, iov->iov_len); 380 no_fault(); 381 uio->uio_resid -= iov->iov_len; 382 uio->uio_loffset += iov->iov_len; 383 break; 384 } 385 /* else it's a write, fall through to NULL case */ 386 /*FALLTHROUGH*/ 387 388 case M_NULL: 389 if (rw == UIO_READ) 390 return (0); 391 c = iov->iov_len; 392 iov->iov_base += c; 393 iov->iov_len -= c; 394 uio->uio_loffset += c; 395 uio->uio_resid -= c; 396 break; 397 398 } 399 } 400 return (uio->uio_resid == oresid ? error : 0); 401 } 402 403 static int 404 mmread(dev_t dev, struct uio *uio, cred_t *cred) 405 { 406 return (mmrw(dev, uio, UIO_READ, cred)); 407 } 408 409 static int 410 mmwrite(dev_t dev, struct uio *uio, cred_t *cred) 411 { 412 return (mmrw(dev, uio, UIO_WRITE, cred)); 413 } 414 415 /* 416 * Private ioctl for libkvm to support kvm_physaddr(). 417 * Given an address space and a VA, compute the PA. 418 */ 419 static int 420 mmioctl_vtop(intptr_t data) 421 { 422 #ifdef _SYSCALL32 423 mem_vtop32_t vtop32; 424 #endif 425 mem_vtop_t mem_vtop; 426 proc_t *p; 427 pfn_t pfn = (pfn_t)PFN_INVALID; 428 pid_t pid = 0; 429 struct as *as; 430 struct seg *seg; 431 432 if (get_udatamodel() == DATAMODEL_NATIVE) { 433 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t))) 434 return (EFAULT); 435 } 436 #ifdef _SYSCALL32 437 else { 438 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t))) 439 return (EFAULT); 440 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as; 441 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va; 442 443 if (mem_vtop.m_as != NULL) 444 return (EINVAL); 445 } 446 #endif 447 448 if (mem_vtop.m_as == &kas) { 449 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va); 450 } else { 451 if (mem_vtop.m_as == NULL) { 452 /* 453 * Assume the calling process's address space if the 454 * caller didn't specify one. 455 */ 456 p = curthread->t_procp; 457 if (p == NULL) 458 return (EIO); 459 mem_vtop.m_as = p->p_as; 460 } 461 462 mutex_enter(&pidlock); 463 for (p = practive; p != NULL; p = p->p_next) { 464 if (p->p_as == mem_vtop.m_as) { 465 pid = p->p_pid; 466 break; 467 } 468 } 469 mutex_exit(&pidlock); 470 if (p == NULL) 471 return (EIO); 472 p = sprlock(pid); 473 if (p == NULL) 474 return (EIO); 475 as = p->p_as; 476 if (as == mem_vtop.m_as) { 477 mutex_exit(&p->p_lock); 478 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 479 for (seg = AS_SEGFIRST(as); seg != NULL; 480 seg = AS_SEGNEXT(as, seg)) 481 if ((uintptr_t)mem_vtop.m_va - 482 (uintptr_t)seg->s_base < seg->s_size) 483 break; 484 if (seg != NULL) 485 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va); 486 AS_LOCK_EXIT(as, &as->a_lock); 487 mutex_enter(&p->p_lock); 488 } 489 sprunlock(p); 490 } 491 mem_vtop.m_pfn = pfn; 492 if (pfn == PFN_INVALID) 493 return (EIO); 494 495 if (get_udatamodel() == DATAMODEL_NATIVE) { 496 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t))) 497 return (EFAULT); 498 } 499 #ifdef _SYSCALL32 500 else { 501 vtop32.m_pfn = mem_vtop.m_pfn; 502 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t))) 503 return (EFAULT); 504 } 505 #endif 506 507 return (0); 508 } 509 510 /* 511 * Given a PA, execute the given page retire command on it. 512 */ 513 static int 514 mmioctl_page_retire(int cmd, intptr_t data) 515 { 516 extern int page_retire_test(void); 517 uint64_t pa; 518 519 if (copyin((void *)data, &pa, sizeof (uint64_t))) { 520 return (EFAULT); 521 } 522 523 switch (cmd) { 524 case MEM_PAGE_ISRETIRED: 525 return (page_retire_check(pa, NULL)); 526 527 case MEM_PAGE_UNRETIRE: 528 return (page_unretire(pa)); 529 530 case MEM_PAGE_RETIRE: 531 return (page_retire(pa, PR_FMA)); 532 533 case MEM_PAGE_RETIRE_MCE: 534 return (page_retire(pa, PR_MCE)); 535 536 case MEM_PAGE_RETIRE_UE: 537 return (page_retire(pa, PR_UE)); 538 539 case MEM_PAGE_GETERRORS: 540 { 541 uint64_t page_errors; 542 int rc = page_retire_check(pa, &page_errors); 543 if (copyout(&page_errors, (void *)data, 544 sizeof (uint64_t))) { 545 return (EFAULT); 546 } 547 return (rc); 548 } 549 550 case MEM_PAGE_RETIRE_TEST: 551 return (page_retire_test()); 552 553 } 554 555 return (EINVAL); 556 } 557 558 /* 559 * Given a mem-scheme FMRI for a page, execute the given page retire 560 * command on it. 561 */ 562 static int 563 mmioctl_page_fmri_retire(int cmd, intptr_t data) 564 { 565 mem_page_t mpage; 566 uint64_t pa; 567 nvlist_t *nvl; 568 int err; 569 570 if ((err = mm_read_mem_page(data, &mpage)) < 0) 571 return (err); 572 573 if ((err = mm_get_mem_fmri(&mpage, &nvl)) != 0) 574 return (err); 575 576 if ((err = mm_get_paddr(nvl, &pa)) != 0) { 577 nvlist_free(nvl); 578 return (err); 579 } 580 581 nvlist_free(nvl); 582 583 switch (cmd) { 584 case MEM_PAGE_FMRI_ISRETIRED: 585 return (page_retire_check(pa, NULL)); 586 587 case MEM_PAGE_FMRI_RETIRE: 588 return (page_retire(pa, PR_FMA)); 589 } 590 591 return (EINVAL); 592 } 593 594 #ifdef __sparc 595 /* 596 * Given a syndrome, syndrome type, and address return the 597 * associated memory name in the provided data buffer. 598 */ 599 static int 600 mmioctl_get_mem_name(intptr_t data) 601 { 602 mem_name_t mem_name; 603 void *buf; 604 size_t bufsize; 605 int len, err; 606 607 if ((bufsize = cpu_get_name_bufsize()) == 0) 608 return (ENOTSUP); 609 610 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 611 return (err); 612 613 buf = kmem_alloc(bufsize, KM_SLEEP); 614 615 /* 616 * Call into cpu specific code to do the lookup. 617 */ 618 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type, 619 mem_name.m_addr, buf, bufsize, &len)) != 0) { 620 kmem_free(buf, bufsize); 621 return (err); 622 } 623 624 if (len >= mem_name.m_namelen) { 625 kmem_free(buf, bufsize); 626 return (ENAMETOOLONG); 627 } 628 629 if (copyoutstr(buf, (char *)mem_name.m_name, 630 mem_name.m_namelen, NULL) != 0) { 631 kmem_free(buf, bufsize); 632 return (EFAULT); 633 } 634 635 kmem_free(buf, bufsize); 636 return (0); 637 } 638 639 /* 640 * Given a syndrome and address return information about the associated memory. 641 */ 642 static int 643 mmioctl_get_mem_info(intptr_t data) 644 { 645 mem_info_t mem_info; 646 int err; 647 648 if (copyin((void *)data, &mem_info, sizeof (mem_info_t))) 649 return (EFAULT); 650 651 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr, 652 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size, 653 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0) 654 return (err); 655 656 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0) 657 return (EFAULT); 658 659 return (0); 660 } 661 662 /* 663 * Given a memory name, return its associated serial id 664 */ 665 static int 666 mmioctl_get_mem_sid(intptr_t data) 667 { 668 mem_name_t mem_name; 669 void *buf; 670 void *name; 671 size_t name_len; 672 size_t bufsize; 673 int len, err; 674 675 if ((bufsize = cpu_get_name_bufsize()) == 0) 676 return (ENOTSUP); 677 678 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 679 return (err); 680 681 buf = kmem_alloc(bufsize, KM_SLEEP); 682 683 if (mem_name.m_namelen > 1024) 684 mem_name.m_namelen = 1024; /* cap at 1024 bytes */ 685 686 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP); 687 688 if ((err = copyinstr((char *)mem_name.m_name, (char *)name, 689 mem_name.m_namelen, &name_len)) != 0) { 690 kmem_free(buf, bufsize); 691 kmem_free(name, mem_name.m_namelen); 692 return (err); 693 } 694 695 /* 696 * Call into cpu specific code to do the lookup. 697 */ 698 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) { 699 kmem_free(buf, bufsize); 700 kmem_free(name, mem_name.m_namelen); 701 return (err); 702 } 703 704 if (len > mem_name.m_sidlen) { 705 kmem_free(buf, bufsize); 706 kmem_free(name, mem_name.m_namelen); 707 return (ENAMETOOLONG); 708 } 709 710 if (copyoutstr(buf, (char *)mem_name.m_sid, 711 mem_name.m_sidlen, NULL) != 0) { 712 kmem_free(buf, bufsize); 713 kmem_free(name, mem_name.m_namelen); 714 return (EFAULT); 715 } 716 717 kmem_free(buf, bufsize); 718 kmem_free(name, mem_name.m_namelen); 719 return (0); 720 } 721 #endif /* __sparc */ 722 723 /* 724 * Private ioctls for 725 * libkvm to support kvm_physaddr(). 726 * FMA support for page_retire() and memory attribute information. 727 */ 728 /*ARGSUSED*/ 729 static int 730 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp) 731 { 732 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) || 733 (cmd != MEM_VTOP && getminor(dev) != M_MEM)) 734 return (ENXIO); 735 736 switch (cmd) { 737 case MEM_VTOP: 738 return (mmioctl_vtop(data)); 739 740 case MEM_PAGE_RETIRE: 741 case MEM_PAGE_ISRETIRED: 742 case MEM_PAGE_UNRETIRE: 743 case MEM_PAGE_RETIRE_MCE: 744 case MEM_PAGE_RETIRE_UE: 745 case MEM_PAGE_GETERRORS: 746 case MEM_PAGE_RETIRE_TEST: 747 return (mmioctl_page_retire(cmd, data)); 748 749 case MEM_PAGE_FMRI_RETIRE: 750 case MEM_PAGE_FMRI_ISRETIRED: 751 return (mmioctl_page_fmri_retire(cmd, data)); 752 753 #ifdef __sparc 754 case MEM_NAME: 755 return (mmioctl_get_mem_name(data)); 756 757 case MEM_INFO: 758 return (mmioctl_get_mem_info(data)); 759 760 case MEM_SID: 761 return (mmioctl_get_mem_sid(data)); 762 #else 763 case MEM_NAME: 764 case MEM_INFO: 765 case MEM_SID: 766 return (ENOTSUP); 767 #endif /* __sparc */ 768 } 769 return (ENXIO); 770 } 771 772 /*ARGSUSED2*/ 773 static int 774 mmmmap(dev_t dev, off_t off, int prot) 775 { 776 pfn_t pf; 777 struct memlist *pmem; 778 minor_t minor = getminor(dev); 779 780 switch (minor) { 781 case M_MEM: 782 pf = btop(off); 783 memlist_read_lock(); 784 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 785 if (pf >= BTOP(pmem->address) && 786 pf < BTOP(pmem->address + pmem->size)) { 787 memlist_read_unlock(); 788 return (impl_obmem_pfnum(pf)); 789 } 790 } 791 memlist_read_unlock(); 792 break; 793 794 case M_KMEM: 795 case M_ALLKMEM: 796 /* no longer supported with KPR */ 797 return (-1); 798 799 case M_ZERO: 800 /* 801 * We shouldn't be mmap'ing to /dev/zero here as 802 * mmsegmap() should have already converted 803 * a mapping request for this device to a mapping 804 * using seg_vn for anonymous memory. 805 */ 806 break; 807 808 } 809 return (-1); 810 } 811 812 /* 813 * This function is called when a memory device is mmap'ed. 814 * Set up the mapping to the correct device driver. 815 */ 816 static int 817 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 818 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 819 { 820 struct segvn_crargs vn_a; 821 struct segdev_crargs dev_a; 822 int error; 823 minor_t minor; 824 off_t i; 825 826 minor = getminor(dev); 827 828 as_rangelock(as); 829 /* 830 * No need to worry about vac alignment on /dev/zero 831 * since this is a "clone" object that doesn't yet exist. 832 */ 833 error = choose_addr(as, addrp, len, off, 834 (minor == M_MEM) || (minor == M_KMEM), flags); 835 if (error != 0) { 836 as_rangeunlock(as); 837 return (error); 838 } 839 840 switch (minor) { 841 case M_MEM: 842 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */ 843 if ((flags & MAP_TYPE) != MAP_SHARED) { 844 as_rangeunlock(as); 845 return (EINVAL); 846 } 847 848 /* 849 * Check to ensure that the entire range is 850 * legal and we are not trying to map in 851 * more than the device will let us. 852 */ 853 for (i = 0; i < len; i += PAGESIZE) { 854 if (mmmmap(dev, off + i, maxprot) == -1) { 855 as_rangeunlock(as); 856 return (ENXIO); 857 } 858 } 859 860 /* 861 * Use seg_dev segment driver for /dev/mem mapping. 862 */ 863 dev_a.mapfunc = mmmmap; 864 dev_a.dev = dev; 865 dev_a.offset = off; 866 dev_a.type = (flags & MAP_TYPE); 867 dev_a.prot = (uchar_t)prot; 868 dev_a.maxprot = (uchar_t)maxprot; 869 dev_a.hat_attr = 0; 870 871 /* 872 * Make /dev/mem mappings non-consistent since we can't 873 * alias pages that don't have page structs behind them, 874 * such as kernel stack pages. If someone mmap()s a kernel 875 * stack page and if we give him a tte with cv, a line from 876 * that page can get into both pages of the spitfire d$. 877 * But snoop from another processor will only invalidate 878 * the first page. This later caused kernel (xc_attention) 879 * to go into an infinite loop at pil 13 and no interrupts 880 * could come in. See 1203630. 881 * 882 */ 883 dev_a.hat_flags = HAT_LOAD_NOCONSIST; 884 dev_a.devmap_data = NULL; 885 886 error = as_map(as, *addrp, len, segdev_create, &dev_a); 887 break; 888 889 case M_ZERO: 890 /* 891 * Use seg_vn segment driver for /dev/zero mapping. 892 * Passing in a NULL amp gives us the "cloning" effect. 893 */ 894 vn_a.vp = NULL; 895 vn_a.offset = 0; 896 vn_a.type = (flags & MAP_TYPE); 897 vn_a.prot = prot; 898 vn_a.maxprot = maxprot; 899 vn_a.flags = flags & ~MAP_TYPE; 900 vn_a.cred = cred; 901 vn_a.amp = NULL; 902 vn_a.szc = 0; 903 vn_a.lgrp_mem_policy_flags = 0; 904 error = as_map(as, *addrp, len, segvn_create, &vn_a); 905 break; 906 907 case M_KMEM: 908 case M_ALLKMEM: 909 /* No longer supported with KPR. */ 910 error = ENXIO; 911 break; 912 913 case M_NULL: 914 /* 915 * Use seg_dev segment driver for /dev/null mapping. 916 */ 917 dev_a.mapfunc = mmmmap; 918 dev_a.dev = dev; 919 dev_a.offset = off; 920 dev_a.type = 0; /* neither PRIVATE nor SHARED */ 921 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 922 dev_a.hat_attr = 0; 923 dev_a.hat_flags = 0; 924 error = as_map(as, *addrp, len, segdev_create, &dev_a); 925 break; 926 927 default: 928 error = ENXIO; 929 } 930 931 as_rangeunlock(as); 932 return (error); 933 } 934 935 static struct cb_ops mm_cb_ops = { 936 mmopen, /* open */ 937 nulldev, /* close */ 938 nodev, /* strategy */ 939 nodev, /* print */ 940 nodev, /* dump */ 941 mmread, /* read */ 942 mmwrite, /* write */ 943 mmioctl, /* ioctl */ 944 nodev, /* devmap */ 945 mmmmap, /* mmap */ 946 mmsegmap, /* segmap */ 947 mmchpoll, /* poll */ 948 mmpropop, /* prop_op */ 949 0, /* streamtab */ 950 D_NEW | D_MP | D_64BIT | D_U64BIT 951 }; 952 953 static struct dev_ops mm_ops = { 954 DEVO_REV, /* devo_rev, */ 955 0, /* refcnt */ 956 mm_info, /* get_dev_info */ 957 nulldev, /* identify */ 958 nulldev, /* probe */ 959 mm_attach, /* attach */ 960 nodev, /* detach */ 961 nodev, /* reset */ 962 &mm_cb_ops, /* driver operations */ 963 (struct bus_ops *)0 /* bus operations */ 964 }; 965 966 static struct modldrv modldrv = { 967 &mod_driverops, "memory driver %I%", &mm_ops, 968 }; 969 970 static struct modlinkage modlinkage = { 971 MODREV_1, &modldrv, NULL 972 }; 973 974 int 975 _init(void) 976 { 977 return (mod_install(&modlinkage)); 978 } 979 980 int 981 _info(struct modinfo *modinfop) 982 { 983 return (mod_info(&modlinkage, modinfop)); 984 } 985 986 int 987 _fini(void) 988 { 989 return (mod_remove(&modlinkage)); 990 } 991 992 static int 993 mm_kstat_update(kstat_t *ksp, int rw) 994 { 995 struct memlist *pmem; 996 uint_t count; 997 998 if (rw == KSTAT_WRITE) 999 return (EACCES); 1000 1001 count = 0; 1002 memlist_read_lock(); 1003 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 1004 count++; 1005 } 1006 memlist_read_unlock(); 1007 1008 ksp->ks_ndata = count; 1009 ksp->ks_data_size = count * 2 * sizeof (uint64_t); 1010 1011 return (0); 1012 } 1013 1014 static int 1015 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1016 { 1017 struct memlist *pmem; 1018 struct memunit { 1019 uint64_t address; 1020 uint64_t size; 1021 } *kspmem; 1022 1023 if (rw == KSTAT_WRITE) 1024 return (EACCES); 1025 1026 ksp->ks_snaptime = gethrtime(); 1027 1028 kspmem = (struct memunit *)buf; 1029 memlist_read_lock(); 1030 for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) { 1031 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 1032 break; 1033 kspmem->address = pmem->address; 1034 kspmem->size = pmem->size; 1035 } 1036 memlist_read_unlock(); 1037 1038 return (0); 1039 } 1040 1041 /* 1042 * Read a mem_name_t from user-space and store it in the mem_name_t 1043 * pointed to by the mem_name argument. 1044 */ 1045 static int 1046 mm_read_mem_name(intptr_t data, mem_name_t *mem_name) 1047 { 1048 if (get_udatamodel() == DATAMODEL_NATIVE) { 1049 if (copyin((void *)data, mem_name, sizeof (mem_name_t))) 1050 return (EFAULT); 1051 } 1052 #ifdef _SYSCALL32 1053 else { 1054 mem_name32_t mem_name32; 1055 1056 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t))) 1057 return (EFAULT); 1058 mem_name->m_addr = mem_name32.m_addr; 1059 mem_name->m_synd = mem_name32.m_synd; 1060 mem_name->m_type[0] = mem_name32.m_type[0]; 1061 mem_name->m_type[1] = mem_name32.m_type[1]; 1062 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name; 1063 mem_name->m_namelen = (size_t)mem_name32.m_namelen; 1064 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid; 1065 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen; 1066 } 1067 #endif /* _SYSCALL32 */ 1068 1069 return (0); 1070 } 1071 1072 /* 1073 * Read a mem_page_t from user-space and store it in the mem_page_t 1074 * pointed to by the mpage argument. 1075 */ 1076 static int 1077 mm_read_mem_page(intptr_t data, mem_page_t *mpage) 1078 { 1079 if (get_udatamodel() == DATAMODEL_NATIVE) { 1080 if (copyin((void *)data, mpage, sizeof (mem_page_t)) != 0) 1081 return (EFAULT); 1082 } 1083 #ifdef _SYSCALL32 1084 else { 1085 mem_page32_t mpage32; 1086 1087 if (copyin((void *)data, &mpage32, sizeof (mem_page32_t)) != 0) 1088 return (EFAULT); 1089 1090 mpage->m_fmri = (caddr_t)(uintptr_t)mpage32.m_fmri; 1091 mpage->m_fmrisz = mpage32.m_fmrisz; 1092 } 1093 #endif /* _SYSCALL32 */ 1094 1095 return (0); 1096 } 1097 1098 /* 1099 * Expand an FMRI from a mem_page_t. 1100 */ 1101 static int 1102 mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl) 1103 { 1104 char *buf; 1105 int err; 1106 1107 if (mpage->m_fmri == NULL || mpage->m_fmrisz > MEM_FMRI_MAX_BUFSIZE) 1108 return (EINVAL); 1109 1110 buf = kmem_alloc(mpage->m_fmrisz, KM_SLEEP); 1111 if (copyin(mpage->m_fmri, buf, mpage->m_fmrisz) != 0) { 1112 kmem_free(buf, mpage->m_fmrisz); 1113 return (EFAULT); 1114 } 1115 1116 err = nvlist_unpack(buf, mpage->m_fmrisz, nvl, KM_SLEEP); 1117 kmem_free(buf, mpage->m_fmrisz); 1118 1119 return (err); 1120 } 1121 1122 static int 1123 mm_get_paddr(nvlist_t *nvl, uint64_t *paddr) 1124 { 1125 uint8_t version; 1126 uint64_t pa; 1127 char *scheme; 1128 int err; 1129 #ifdef __sparc 1130 uint64_t offset; 1131 char *unum; 1132 char **serids; 1133 uint_t nserids; 1134 #endif 1135 1136 /* Verify FMRI scheme name and version number */ 1137 if ((nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme) != 0) || 1138 (strcmp(scheme, FM_FMRI_SCHEME_MEM) != 0) || 1139 (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0) || 1140 version > FM_MEM_SCHEME_VERSION) { 1141 return (EINVAL); 1142 } 1143 1144 /* 1145 * There are two ways a physical address can be obtained from a mem 1146 * scheme FMRI. One way is to use the "offset" and "serial" 1147 * members, if they are present, together with the "unum" member to 1148 * calculate a physical address. This is the preferred way since 1149 * it is independent of possible changes to the programming of 1150 * underlying hardware registers that may change the physical address. 1151 * If the "offset" member is not present, then the address is 1152 * retrieved from the "physaddr" member. 1153 */ 1154 #if defined(__sparc) 1155 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &offset) != 0) { 1156 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) != 1157 0) { 1158 return (EINVAL); 1159 } 1160 } else if (nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0 || 1161 nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &serids, 1162 &nserids) != 0) { 1163 return (EINVAL); 1164 } else { 1165 err = cpu_get_mem_addr(unum, serids[0], offset, &pa); 1166 if (err != 0) { 1167 if (err == ENOTSUP) { 1168 /* Fall back to physaddr */ 1169 if (nvlist_lookup_uint64(nvl, 1170 FM_FMRI_MEM_PHYSADDR, &pa) != 0) 1171 return (EINVAL); 1172 } else 1173 return (err); 1174 } 1175 } 1176 #elif defined(__x86) 1177 if ((err = cmi_mc_unumtopa(NULL, nvl, &pa)) != CMI_SUCCESS && 1178 err != CMIERR_MC_PARTIALUNUMTOPA) 1179 return (EINVAL); 1180 #else 1181 #error "port me" 1182 #endif /* __sparc */ 1183 1184 *paddr = pa; 1185 return (0); 1186 } 1187