1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Memory special file 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/user.h> 35 #include <sys/buf.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/vm.h> 39 #include <sys/uio.h> 40 #include <sys/mman.h> 41 #include <sys/kmem.h> 42 #include <vm/seg.h> 43 #include <vm/page.h> 44 #include <sys/stat.h> 45 #include <sys/vmem.h> 46 #include <sys/memlist.h> 47 #include <sys/bootconf.h> 48 49 #include <vm/seg_vn.h> 50 #include <vm/seg_dev.h> 51 #include <vm/seg_kmem.h> 52 #include <vm/seg_kp.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/hat.h> 55 56 #include <sys/conf.h> 57 #include <sys/mem.h> 58 #include <sys/types.h> 59 #include <sys/conf.h> 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/errno.h> 63 #include <sys/modctl.h> 64 #include <sys/memlist.h> 65 #include <sys/ddi.h> 66 #include <sys/sunddi.h> 67 #include <sys/debug.h> 68 #include <sys/fm/protocol.h> 69 70 #if defined(__sparc) 71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *); 72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *, 73 uint64_t *, int *, int *, int *); 74 extern size_t cpu_get_name_bufsize(void); 75 extern int cpu_get_mem_sid(char *, char *, int, int *); 76 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *); 77 #elif defined(__x86) 78 #include <sys/cpu_module.h> 79 #endif /* __sparc */ 80 81 /* 82 * Turn a byte length into a pagecount. The DDI btop takes a 83 * 32-bit size on 32-bit machines, this handles 64-bit sizes for 84 * large physical-memory 32-bit machines. 85 */ 86 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift)) 87 88 static kmutex_t mm_lock; 89 static caddr_t mm_map; 90 91 static dev_info_t *mm_dip; /* private copy of devinfo pointer */ 92 93 static int mm_kmem_io_access; 94 95 static int mm_kstat_update(kstat_t *ksp, int rw); 96 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 97 98 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name); 99 static int mm_read_mem_page(intptr_t data, mem_page_t *mpage); 100 static int mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl); 101 static int mm_get_paddr(nvlist_t *nvl, uint64_t *paddr); 102 103 /*ARGSUSED1*/ 104 static int 105 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 106 { 107 int i; 108 struct mem_minor { 109 char *name; 110 minor_t minor; 111 int privonly; 112 const char *rdpriv; 113 const char *wrpriv; 114 mode_t priv_mode; 115 } mm[] = { 116 { "mem", M_MEM, 0, NULL, "all", 0640 }, 117 { "kmem", M_KMEM, 0, NULL, "all", 0640 }, 118 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 }, 119 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 }, 120 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 }, 121 }; 122 kstat_t *ksp; 123 124 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL); 125 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 126 127 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) { 128 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR, 129 mm[i].minor, DDI_PSEUDO, mm[i].privonly, 130 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) == 131 DDI_FAILURE) { 132 ddi_remove_minor_node(devi, NULL); 133 return (DDI_FAILURE); 134 } 135 } 136 137 mm_dip = devi; 138 139 ksp = kstat_create("mm", 0, "phys_installed", "misc", 140 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 141 if (ksp != NULL) { 142 ksp->ks_update = mm_kstat_update; 143 ksp->ks_snapshot = mm_kstat_snapshot; 144 ksp->ks_lock = &mm_lock; /* XXX - not really needed */ 145 kstat_install(ksp); 146 } 147 148 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 149 "kmem_io_access", 0); 150 151 return (DDI_SUCCESS); 152 } 153 154 /*ARGSUSED*/ 155 static int 156 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 157 { 158 register int error; 159 160 switch (infocmd) { 161 case DDI_INFO_DEVT2DEVINFO: 162 *result = (void *)mm_dip; 163 error = DDI_SUCCESS; 164 break; 165 case DDI_INFO_DEVT2INSTANCE: 166 *result = (void *)0; 167 error = DDI_SUCCESS; 168 break; 169 default: 170 error = DDI_FAILURE; 171 } 172 return (error); 173 } 174 175 /*ARGSUSED1*/ 176 static int 177 mmopen(dev_t *devp, int flag, int typ, struct cred *cred) 178 { 179 switch (getminor(*devp)) { 180 case M_NULL: 181 case M_ZERO: 182 case M_MEM: 183 case M_KMEM: 184 case M_ALLKMEM: 185 /* standard devices */ 186 break; 187 188 default: 189 /* Unsupported or unknown type */ 190 return (EINVAL); 191 } 192 return (0); 193 } 194 195 struct pollhead mm_pollhd; 196 197 /*ARGSUSED*/ 198 static int 199 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp, 200 struct pollhead **phpp) 201 { 202 switch (getminor(dev)) { 203 case M_NULL: 204 case M_ZERO: 205 case M_MEM: 206 case M_KMEM: 207 case M_ALLKMEM: 208 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM | 209 POLLWRNORM | POLLRDBAND | POLLWRBAND); 210 /* 211 * A non NULL pollhead pointer should be returned in case 212 * user polls for 0 events. 213 */ 214 *phpp = !anyyet && !*reventsp ? 215 &mm_pollhd : (struct pollhead *)NULL; 216 return (0); 217 default: 218 /* no other devices currently support polling */ 219 return (ENXIO); 220 } 221 } 222 223 static int 224 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 225 char *name, caddr_t valuep, int *lengthp) 226 { 227 /* 228 * implement zero size to reduce overhead (avoid two failing 229 * property lookups per stat). 230 */ 231 return (ddi_prop_op_size(dev, dip, prop_op, 232 flags, name, valuep, lengthp, 0)); 233 } 234 235 static int 236 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio) 237 { 238 int error = 0; 239 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), 240 (size_t)uio->uio_iov->iov_len); 241 242 mutex_enter(&mm_lock); 243 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, 244 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE), 245 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 246 247 if (!pf_is_memory(pfn)) { 248 if (allowio) { 249 size_t c = uio->uio_iov->iov_len; 250 251 if (ddi_peekpokeio(NULL, uio, rw, 252 (caddr_t)(uintptr_t)uio->uio_loffset, c, 253 sizeof (int32_t)) != DDI_SUCCESS) 254 error = EFAULT; 255 } else 256 error = EIO; 257 } else 258 error = uiomove(&mm_map[pageoff], nbytes, rw, uio); 259 260 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); 261 mutex_exit(&mm_lock); 262 return (error); 263 } 264 265 static int 266 mmpagelock(struct as *as, caddr_t va) 267 { 268 struct seg *seg; 269 int i; 270 271 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 272 seg = as_segat(as, va); 273 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0; 274 AS_LOCK_EXIT(as, &as->a_lock); 275 276 return (i); 277 } 278 279 #ifdef __sparc 280 281 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva) 282 283 #else /* __i386, __amd64 */ 284 285 #define NEED_LOCK_KVADDR(va) 0 286 287 #endif /* __sparc */ 288 289 /*ARGSUSED3*/ 290 static int 291 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) 292 { 293 pfn_t v; 294 struct iovec *iov; 295 int error = 0; 296 size_t c; 297 ssize_t oresid = uio->uio_resid; 298 minor_t minor = getminor(dev); 299 300 while (uio->uio_resid > 0 && error == 0) { 301 iov = uio->uio_iov; 302 if (iov->iov_len == 0) { 303 uio->uio_iov++; 304 uio->uio_iovcnt--; 305 if (uio->uio_iovcnt < 0) 306 panic("mmrw"); 307 continue; 308 } 309 switch (minor) { 310 311 case M_MEM: 312 memlist_read_lock(); 313 if (!address_in_memlist(phys_install, 314 (uint64_t)uio->uio_loffset, 1)) { 315 memlist_read_unlock(); 316 error = EFAULT; 317 break; 318 } 319 memlist_read_unlock(); 320 321 v = BTOP((u_offset_t)uio->uio_loffset); 322 error = mmio(uio, rw, v, 323 uio->uio_loffset & PAGEOFFSET, 0); 324 break; 325 326 case M_KMEM: 327 case M_ALLKMEM: 328 { 329 page_t **ppp; 330 caddr_t vaddr = (caddr_t)uio->uio_offset; 331 int try_lock = NEED_LOCK_KVADDR(vaddr); 332 int locked = 0; 333 334 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP) 335 break; 336 337 /* 338 * If vaddr does not map a valid page, as_pagelock() 339 * will return failure. Hence we can't check the 340 * return value and return EFAULT here as we'd like. 341 * seg_kp and seg_kpm do not properly support 342 * as_pagelock() for this context so we avoid it 343 * using the try_lock set check above. Some day when 344 * the kernel page locking gets redesigned all this 345 * muck can be cleaned up. 346 */ 347 if (try_lock) 348 locked = (as_pagelock(&kas, &ppp, vaddr, 349 PAGESIZE, S_WRITE) == 0); 350 351 v = hat_getpfnum(kas.a_hat, 352 (caddr_t)(uintptr_t)uio->uio_loffset); 353 if (v == PFN_INVALID) { 354 if (locked) 355 as_pageunlock(&kas, ppp, vaddr, 356 PAGESIZE, S_WRITE); 357 error = EFAULT; 358 break; 359 } 360 361 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 362 minor == M_ALLKMEM || mm_kmem_io_access); 363 if (locked) 364 as_pageunlock(&kas, ppp, vaddr, PAGESIZE, 365 S_WRITE); 366 } 367 368 break; 369 370 case M_ZERO: 371 if (rw == UIO_READ) { 372 label_t ljb; 373 374 if (on_fault(&ljb)) { 375 no_fault(); 376 error = EFAULT; 377 break; 378 } 379 uzero(iov->iov_base, iov->iov_len); 380 no_fault(); 381 uio->uio_resid -= iov->iov_len; 382 uio->uio_loffset += iov->iov_len; 383 break; 384 } 385 /* else it's a write, fall through to NULL case */ 386 /*FALLTHROUGH*/ 387 388 case M_NULL: 389 if (rw == UIO_READ) 390 return (0); 391 c = iov->iov_len; 392 iov->iov_base += c; 393 iov->iov_len -= c; 394 uio->uio_loffset += c; 395 uio->uio_resid -= c; 396 break; 397 398 } 399 } 400 return (uio->uio_resid == oresid ? error : 0); 401 } 402 403 static int 404 mmread(dev_t dev, struct uio *uio, cred_t *cred) 405 { 406 return (mmrw(dev, uio, UIO_READ, cred)); 407 } 408 409 static int 410 mmwrite(dev_t dev, struct uio *uio, cred_t *cred) 411 { 412 return (mmrw(dev, uio, UIO_WRITE, cred)); 413 } 414 415 /* 416 * Private ioctl for libkvm to support kvm_physaddr(). 417 * Given an address space and a VA, compute the PA. 418 */ 419 static int 420 mmioctl_vtop(intptr_t data) 421 { 422 #ifdef _SYSCALL32 423 mem_vtop32_t vtop32; 424 #endif 425 mem_vtop_t mem_vtop; 426 proc_t *p; 427 pfn_t pfn = (pfn_t)PFN_INVALID; 428 pid_t pid = 0; 429 struct as *as; 430 struct seg *seg; 431 432 if (get_udatamodel() == DATAMODEL_NATIVE) { 433 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t))) 434 return (EFAULT); 435 } 436 #ifdef _SYSCALL32 437 else { 438 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t))) 439 return (EFAULT); 440 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as; 441 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va; 442 443 if (mem_vtop.m_as != NULL) 444 return (EINVAL); 445 } 446 #endif 447 448 if (mem_vtop.m_as == &kas) { 449 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va); 450 } else { 451 if (mem_vtop.m_as == NULL) { 452 /* 453 * Assume the calling process's address space if the 454 * caller didn't specify one. 455 */ 456 p = curthread->t_procp; 457 if (p == NULL) 458 return (EIO); 459 mem_vtop.m_as = p->p_as; 460 } 461 462 mutex_enter(&pidlock); 463 for (p = practive; p != NULL; p = p->p_next) { 464 if (p->p_as == mem_vtop.m_as) { 465 pid = p->p_pid; 466 break; 467 } 468 } 469 mutex_exit(&pidlock); 470 if (p == NULL) 471 return (EIO); 472 p = sprlock(pid); 473 if (p == NULL) 474 return (EIO); 475 as = p->p_as; 476 if (as == mem_vtop.m_as) { 477 mutex_exit(&p->p_lock); 478 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 479 for (seg = AS_SEGFIRST(as); seg != NULL; 480 seg = AS_SEGNEXT(as, seg)) 481 if ((uintptr_t)mem_vtop.m_va - 482 (uintptr_t)seg->s_base < seg->s_size) 483 break; 484 if (seg != NULL) 485 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va); 486 AS_LOCK_EXIT(as, &as->a_lock); 487 mutex_enter(&p->p_lock); 488 } 489 sprunlock(p); 490 } 491 mem_vtop.m_pfn = pfn; 492 if (pfn == PFN_INVALID) 493 return (EIO); 494 495 if (get_udatamodel() == DATAMODEL_NATIVE) { 496 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t))) 497 return (EFAULT); 498 } 499 #ifdef _SYSCALL32 500 else { 501 vtop32.m_pfn = mem_vtop.m_pfn; 502 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t))) 503 return (EFAULT); 504 } 505 #endif 506 507 return (0); 508 } 509 510 /* 511 * Given a PA, execute the given page retire command on it. 512 */ 513 static int 514 mmioctl_page_retire(int cmd, intptr_t data) 515 { 516 extern int page_retire_test(void); 517 uint64_t pa; 518 519 if (copyin((void *)data, &pa, sizeof (uint64_t))) { 520 return (EFAULT); 521 } 522 523 switch (cmd) { 524 case MEM_PAGE_ISRETIRED: 525 return (page_retire_check(pa, NULL)); 526 527 case MEM_PAGE_UNRETIRE: 528 return (page_unretire(pa)); 529 530 case MEM_PAGE_RETIRE: 531 return (page_retire(pa, PR_FMA)); 532 533 case MEM_PAGE_RETIRE_MCE: 534 return (page_retire(pa, PR_MCE)); 535 536 case MEM_PAGE_RETIRE_UE: 537 return (page_retire(pa, PR_UE)); 538 539 case MEM_PAGE_GETERRORS: 540 { 541 uint64_t page_errors; 542 int rc = page_retire_check(pa, &page_errors); 543 if (copyout(&page_errors, (void *)data, 544 sizeof (uint64_t))) { 545 return (EFAULT); 546 } 547 return (rc); 548 } 549 550 case MEM_PAGE_RETIRE_TEST: 551 return (page_retire_test()); 552 553 } 554 555 return (EINVAL); 556 } 557 558 /* 559 * Given a mem-scheme FMRI for a page, execute the given page retire 560 * command on it. 561 */ 562 static int 563 mmioctl_page_fmri_retire(int cmd, intptr_t data) 564 { 565 mem_page_t mpage; 566 uint64_t pa; 567 nvlist_t *nvl; 568 int err; 569 570 if ((err = mm_read_mem_page(data, &mpage)) < 0) 571 return (err); 572 573 if ((err = mm_get_mem_fmri(&mpage, &nvl)) != 0) 574 return (err); 575 576 if ((err = mm_get_paddr(nvl, &pa)) != 0) { 577 nvlist_free(nvl); 578 return (err); 579 } 580 581 nvlist_free(nvl); 582 583 switch (cmd) { 584 case MEM_PAGE_FMRI_ISRETIRED: 585 return (page_retire_check(pa, NULL)); 586 587 case MEM_PAGE_FMRI_RETIRE: 588 return (page_retire(pa, PR_FMA)); 589 } 590 591 return (EINVAL); 592 } 593 594 #ifdef __sparc 595 /* 596 * Given a syndrome, syndrome type, and address return the 597 * associated memory name in the provided data buffer. 598 */ 599 static int 600 mmioctl_get_mem_name(intptr_t data) 601 { 602 mem_name_t mem_name; 603 void *buf; 604 size_t bufsize; 605 int len, err; 606 607 if ((bufsize = cpu_get_name_bufsize()) == 0) 608 return (ENOTSUP); 609 610 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 611 return (err); 612 613 buf = kmem_alloc(bufsize, KM_SLEEP); 614 615 /* 616 * Call into cpu specific code to do the lookup. 617 */ 618 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type, 619 mem_name.m_addr, buf, bufsize, &len)) != 0) { 620 kmem_free(buf, bufsize); 621 return (err); 622 } 623 624 if (len >= mem_name.m_namelen) { 625 kmem_free(buf, bufsize); 626 return (ENAMETOOLONG); 627 } 628 629 if (copyoutstr(buf, (char *)mem_name.m_name, 630 mem_name.m_namelen, NULL) != 0) { 631 kmem_free(buf, bufsize); 632 return (EFAULT); 633 } 634 635 kmem_free(buf, bufsize); 636 return (0); 637 } 638 639 /* 640 * Given a syndrome and address return information about the associated memory. 641 */ 642 static int 643 mmioctl_get_mem_info(intptr_t data) 644 { 645 mem_info_t mem_info; 646 int err; 647 648 if (copyin((void *)data, &mem_info, sizeof (mem_info_t))) 649 return (EFAULT); 650 651 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr, 652 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size, 653 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0) 654 return (err); 655 656 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0) 657 return (EFAULT); 658 659 return (0); 660 } 661 662 /* 663 * Given a memory name, return its associated serial id 664 */ 665 static int 666 mmioctl_get_mem_sid(intptr_t data) 667 { 668 mem_name_t mem_name; 669 void *buf; 670 void *name; 671 size_t name_len; 672 size_t bufsize; 673 int len, err; 674 675 if ((bufsize = cpu_get_name_bufsize()) == 0) 676 return (ENOTSUP); 677 678 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 679 return (err); 680 681 buf = kmem_alloc(bufsize, KM_SLEEP); 682 683 if (mem_name.m_namelen > 1024) 684 mem_name.m_namelen = 1024; /* cap at 1024 bytes */ 685 686 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP); 687 688 if ((err = copyinstr((char *)mem_name.m_name, (char *)name, 689 mem_name.m_namelen, &name_len)) != 0) { 690 kmem_free(buf, bufsize); 691 kmem_free(name, mem_name.m_namelen); 692 return (err); 693 } 694 695 /* 696 * Call into cpu specific code to do the lookup. 697 */ 698 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) { 699 kmem_free(buf, bufsize); 700 kmem_free(name, mem_name.m_namelen); 701 return (err); 702 } 703 704 if (len > mem_name.m_sidlen) { 705 kmem_free(buf, bufsize); 706 kmem_free(name, mem_name.m_namelen); 707 return (ENAMETOOLONG); 708 } 709 710 if (copyoutstr(buf, (char *)mem_name.m_sid, 711 mem_name.m_sidlen, NULL) != 0) { 712 kmem_free(buf, bufsize); 713 kmem_free(name, mem_name.m_namelen); 714 return (EFAULT); 715 } 716 717 kmem_free(buf, bufsize); 718 kmem_free(name, mem_name.m_namelen); 719 return (0); 720 } 721 #endif /* __sparc */ 722 723 /* 724 * Private ioctls for 725 * libkvm to support kvm_physaddr(). 726 * FMA support for page_retire() and memory attribute information. 727 */ 728 /*ARGSUSED*/ 729 static int 730 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp) 731 { 732 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) || 733 (cmd != MEM_VTOP && getminor(dev) != M_MEM)) 734 return (ENXIO); 735 736 switch (cmd) { 737 case MEM_VTOP: 738 return (mmioctl_vtop(data)); 739 740 case MEM_PAGE_RETIRE: 741 case MEM_PAGE_ISRETIRED: 742 case MEM_PAGE_UNRETIRE: 743 case MEM_PAGE_RETIRE_MCE: 744 case MEM_PAGE_RETIRE_UE: 745 case MEM_PAGE_GETERRORS: 746 case MEM_PAGE_RETIRE_TEST: 747 return (mmioctl_page_retire(cmd, data)); 748 749 case MEM_PAGE_FMRI_RETIRE: 750 case MEM_PAGE_FMRI_ISRETIRED: 751 return (mmioctl_page_fmri_retire(cmd, data)); 752 753 #ifdef __sparc 754 case MEM_NAME: 755 return (mmioctl_get_mem_name(data)); 756 757 case MEM_INFO: 758 return (mmioctl_get_mem_info(data)); 759 760 case MEM_SID: 761 return (mmioctl_get_mem_sid(data)); 762 #else 763 case MEM_NAME: 764 case MEM_INFO: 765 case MEM_SID: 766 return (ENOTSUP); 767 #endif /* __sparc */ 768 } 769 return (ENXIO); 770 } 771 772 /*ARGSUSED2*/ 773 static int 774 mmmmap(dev_t dev, off_t off, int prot) 775 { 776 pfn_t pf; 777 struct memlist *pmem; 778 minor_t minor = getminor(dev); 779 780 switch (minor) { 781 case M_MEM: 782 pf = btop(off); 783 memlist_read_lock(); 784 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 785 if (pf >= BTOP(pmem->address) && 786 pf < BTOP(pmem->address + pmem->size)) { 787 memlist_read_unlock(); 788 return (impl_obmem_pfnum(pf)); 789 } 790 } 791 memlist_read_unlock(); 792 break; 793 794 case M_KMEM: 795 case M_ALLKMEM: 796 /* no longer supported with KPR */ 797 return (-1); 798 799 case M_ZERO: 800 /* 801 * We shouldn't be mmap'ing to /dev/zero here as 802 * mmsegmap() should have already converted 803 * a mapping request for this device to a mapping 804 * using seg_vn for anonymous memory. 805 */ 806 break; 807 808 } 809 return (-1); 810 } 811 812 /* 813 * This function is called when a memory device is mmap'ed. 814 * Set up the mapping to the correct device driver. 815 */ 816 static int 817 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 818 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 819 { 820 struct segvn_crargs vn_a; 821 struct segdev_crargs dev_a; 822 int error; 823 minor_t minor; 824 off_t i; 825 826 minor = getminor(dev); 827 828 as_rangelock(as); 829 if ((flags & MAP_FIXED) == 0) { 830 /* 831 * No need to worry about vac alignment on /dev/zero 832 * since this is a "clone" object that doesn't yet exist. 833 */ 834 map_addr(addrp, len, (offset_t)off, 835 (minor == M_MEM) || (minor == M_KMEM), flags); 836 837 if (*addrp == NULL) { 838 as_rangeunlock(as); 839 return (ENOMEM); 840 } 841 } else { 842 /* 843 * User specified address - 844 * Blow away any previous mappings. 845 */ 846 (void) as_unmap(as, *addrp, len); 847 } 848 849 switch (minor) { 850 case M_MEM: 851 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */ 852 if ((flags & MAP_TYPE) != MAP_SHARED) { 853 as_rangeunlock(as); 854 return (EINVAL); 855 } 856 857 /* 858 * Check to ensure that the entire range is 859 * legal and we are not trying to map in 860 * more than the device will let us. 861 */ 862 for (i = 0; i < len; i += PAGESIZE) { 863 if (mmmmap(dev, off + i, maxprot) == -1) { 864 as_rangeunlock(as); 865 return (ENXIO); 866 } 867 } 868 869 /* 870 * Use seg_dev segment driver for /dev/mem mapping. 871 */ 872 dev_a.mapfunc = mmmmap; 873 dev_a.dev = dev; 874 dev_a.offset = off; 875 dev_a.type = (flags & MAP_TYPE); 876 dev_a.prot = (uchar_t)prot; 877 dev_a.maxprot = (uchar_t)maxprot; 878 dev_a.hat_attr = 0; 879 880 /* 881 * Make /dev/mem mappings non-consistent since we can't 882 * alias pages that don't have page structs behind them, 883 * such as kernel stack pages. If someone mmap()s a kernel 884 * stack page and if we give him a tte with cv, a line from 885 * that page can get into both pages of the spitfire d$. 886 * But snoop from another processor will only invalidate 887 * the first page. This later caused kernel (xc_attention) 888 * to go into an infinite loop at pil 13 and no interrupts 889 * could come in. See 1203630. 890 * 891 */ 892 dev_a.hat_flags = HAT_LOAD_NOCONSIST; 893 dev_a.devmap_data = NULL; 894 895 error = as_map(as, *addrp, len, segdev_create, &dev_a); 896 break; 897 898 case M_ZERO: 899 /* 900 * Use seg_vn segment driver for /dev/zero mapping. 901 * Passing in a NULL amp gives us the "cloning" effect. 902 */ 903 vn_a.vp = NULL; 904 vn_a.offset = 0; 905 vn_a.type = (flags & MAP_TYPE); 906 vn_a.prot = prot; 907 vn_a.maxprot = maxprot; 908 vn_a.flags = flags & ~MAP_TYPE; 909 vn_a.cred = cred; 910 vn_a.amp = NULL; 911 vn_a.szc = 0; 912 vn_a.lgrp_mem_policy_flags = 0; 913 error = as_map(as, *addrp, len, segvn_create, &vn_a); 914 break; 915 916 case M_KMEM: 917 case M_ALLKMEM: 918 /* No longer supported with KPR. */ 919 error = ENXIO; 920 break; 921 922 case M_NULL: 923 /* 924 * Use seg_dev segment driver for /dev/null mapping. 925 */ 926 dev_a.mapfunc = mmmmap; 927 dev_a.dev = dev; 928 dev_a.offset = off; 929 dev_a.type = 0; /* neither PRIVATE nor SHARED */ 930 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 931 dev_a.hat_attr = 0; 932 dev_a.hat_flags = 0; 933 error = as_map(as, *addrp, len, segdev_create, &dev_a); 934 break; 935 936 default: 937 error = ENXIO; 938 } 939 940 as_rangeunlock(as); 941 return (error); 942 } 943 944 static struct cb_ops mm_cb_ops = { 945 mmopen, /* open */ 946 nulldev, /* close */ 947 nodev, /* strategy */ 948 nodev, /* print */ 949 nodev, /* dump */ 950 mmread, /* read */ 951 mmwrite, /* write */ 952 mmioctl, /* ioctl */ 953 nodev, /* devmap */ 954 mmmmap, /* mmap */ 955 mmsegmap, /* segmap */ 956 mmchpoll, /* poll */ 957 mmpropop, /* prop_op */ 958 0, /* streamtab */ 959 D_NEW | D_MP | D_64BIT | D_U64BIT 960 }; 961 962 static struct dev_ops mm_ops = { 963 DEVO_REV, /* devo_rev, */ 964 0, /* refcnt */ 965 mm_info, /* get_dev_info */ 966 nulldev, /* identify */ 967 nulldev, /* probe */ 968 mm_attach, /* attach */ 969 nodev, /* detach */ 970 nodev, /* reset */ 971 &mm_cb_ops, /* driver operations */ 972 (struct bus_ops *)0 /* bus operations */ 973 }; 974 975 static struct modldrv modldrv = { 976 &mod_driverops, "memory driver %I%", &mm_ops, 977 }; 978 979 static struct modlinkage modlinkage = { 980 MODREV_1, &modldrv, NULL 981 }; 982 983 int 984 _init(void) 985 { 986 return (mod_install(&modlinkage)); 987 } 988 989 int 990 _info(struct modinfo *modinfop) 991 { 992 return (mod_info(&modlinkage, modinfop)); 993 } 994 995 int 996 _fini(void) 997 { 998 return (mod_remove(&modlinkage)); 999 } 1000 1001 static int 1002 mm_kstat_update(kstat_t *ksp, int rw) 1003 { 1004 struct memlist *pmem; 1005 uint_t count; 1006 1007 if (rw == KSTAT_WRITE) 1008 return (EACCES); 1009 1010 count = 0; 1011 memlist_read_lock(); 1012 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 1013 count++; 1014 } 1015 memlist_read_unlock(); 1016 1017 ksp->ks_ndata = count; 1018 ksp->ks_data_size = count * 2 * sizeof (uint64_t); 1019 1020 return (0); 1021 } 1022 1023 static int 1024 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1025 { 1026 struct memlist *pmem; 1027 struct memunit { 1028 uint64_t address; 1029 uint64_t size; 1030 } *kspmem; 1031 1032 if (rw == KSTAT_WRITE) 1033 return (EACCES); 1034 1035 ksp->ks_snaptime = gethrtime(); 1036 1037 kspmem = (struct memunit *)buf; 1038 memlist_read_lock(); 1039 for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) { 1040 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 1041 break; 1042 kspmem->address = pmem->address; 1043 kspmem->size = pmem->size; 1044 } 1045 memlist_read_unlock(); 1046 1047 return (0); 1048 } 1049 1050 /* 1051 * Read a mem_name_t from user-space and store it in the mem_name_t 1052 * pointed to by the mem_name argument. 1053 */ 1054 static int 1055 mm_read_mem_name(intptr_t data, mem_name_t *mem_name) 1056 { 1057 if (get_udatamodel() == DATAMODEL_NATIVE) { 1058 if (copyin((void *)data, mem_name, sizeof (mem_name_t))) 1059 return (EFAULT); 1060 } 1061 #ifdef _SYSCALL32 1062 else { 1063 mem_name32_t mem_name32; 1064 1065 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t))) 1066 return (EFAULT); 1067 mem_name->m_addr = mem_name32.m_addr; 1068 mem_name->m_synd = mem_name32.m_synd; 1069 mem_name->m_type[0] = mem_name32.m_type[0]; 1070 mem_name->m_type[1] = mem_name32.m_type[1]; 1071 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name; 1072 mem_name->m_namelen = (size_t)mem_name32.m_namelen; 1073 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid; 1074 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen; 1075 } 1076 #endif /* _SYSCALL32 */ 1077 1078 return (0); 1079 } 1080 1081 /* 1082 * Read a mem_page_t from user-space and store it in the mem_page_t 1083 * pointed to by the mpage argument. 1084 */ 1085 static int 1086 mm_read_mem_page(intptr_t data, mem_page_t *mpage) 1087 { 1088 if (get_udatamodel() == DATAMODEL_NATIVE) { 1089 if (copyin((void *)data, mpage, sizeof (mem_page_t)) != 0) 1090 return (EFAULT); 1091 } 1092 #ifdef _SYSCALL32 1093 else { 1094 mem_page32_t mpage32; 1095 1096 if (copyin((void *)data, &mpage32, sizeof (mem_page32_t)) != 0) 1097 return (EFAULT); 1098 1099 mpage->m_fmri = (caddr_t)(uintptr_t)mpage32.m_fmri; 1100 mpage->m_fmrisz = mpage32.m_fmrisz; 1101 } 1102 #endif /* _SYSCALL32 */ 1103 1104 return (0); 1105 } 1106 1107 /* 1108 * Expand an FMRI from a mem_page_t. 1109 */ 1110 static int 1111 mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl) 1112 { 1113 char *buf; 1114 int err; 1115 1116 if (mpage->m_fmri == NULL || mpage->m_fmrisz > MEM_FMRI_MAX_BUFSIZE) 1117 return (EINVAL); 1118 1119 buf = kmem_alloc(mpage->m_fmrisz, KM_SLEEP); 1120 if (copyin(mpage->m_fmri, buf, mpage->m_fmrisz) != 0) { 1121 kmem_free(buf, mpage->m_fmrisz); 1122 return (EFAULT); 1123 } 1124 1125 err = nvlist_unpack(buf, mpage->m_fmrisz, nvl, KM_SLEEP); 1126 kmem_free(buf, mpage->m_fmrisz); 1127 1128 return (err); 1129 } 1130 1131 static int 1132 mm_get_paddr(nvlist_t *nvl, uint64_t *paddr) 1133 { 1134 uint8_t version; 1135 uint64_t pa; 1136 char *scheme; 1137 int err; 1138 #ifdef __sparc 1139 uint64_t offset; 1140 char *unum; 1141 char **serids; 1142 uint_t nserids; 1143 #endif 1144 1145 /* Verify FMRI scheme name and version number */ 1146 if ((nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme) != 0) || 1147 (strcmp(scheme, FM_FMRI_SCHEME_MEM) != 0) || 1148 (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0) || 1149 version > FM_MEM_SCHEME_VERSION) { 1150 return (EINVAL); 1151 } 1152 1153 /* 1154 * There are two ways a physical address can be obtained from a mem 1155 * scheme FMRI. One way is to use the "offset" and "serial" 1156 * members, if they are present, together with the "unum" member to 1157 * calculate a physical address. This is the preferred way since 1158 * it is independent of possible changes to the programming of 1159 * underlying hardware registers that may change the physical address. 1160 * If the "offset" member is not present, then the address is 1161 * retrieved from the "physaddr" member. 1162 */ 1163 #if defined(__sparc) 1164 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &offset) != 0) { 1165 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) != 1166 0) { 1167 return (EINVAL); 1168 } 1169 } else if (nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0 || 1170 nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &serids, 1171 &nserids) != 0) { 1172 return (EINVAL); 1173 } else { 1174 err = cpu_get_mem_addr(unum, serids[0], offset, &pa); 1175 if (err != 0) { 1176 if (err == ENOTSUP) { 1177 /* Fall back to physaddr */ 1178 if (nvlist_lookup_uint64(nvl, 1179 FM_FMRI_MEM_PHYSADDR, &pa) != 0) 1180 return (EINVAL); 1181 } else 1182 return (err); 1183 } 1184 } 1185 #elif defined(__x86) 1186 if ((err = cmi_mc_unumtopa(NULL, nvl, &pa)) != CMI_SUCCESS && 1187 err != CMIERR_MC_PARTIALUNUMTOPA) 1188 return (EINVAL); 1189 #else 1190 #error "port me" 1191 #endif /* __sparc */ 1192 1193 *paddr = pa; 1194 return (0); 1195 } 1196