1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Memory special file 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/user.h> 35 #include <sys/buf.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/vm.h> 39 #include <sys/uio.h> 40 #include <sys/mman.h> 41 #include <sys/kmem.h> 42 #include <vm/seg.h> 43 #include <vm/page.h> 44 #include <sys/stat.h> 45 #include <sys/vmem.h> 46 #include <sys/memlist.h> 47 #include <sys/bootconf.h> 48 49 #include <vm/seg_vn.h> 50 #include <vm/seg_dev.h> 51 #include <vm/seg_kmem.h> 52 #include <vm/seg_kp.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/hat.h> 55 56 #include <sys/conf.h> 57 #include <sys/mem.h> 58 #include <sys/types.h> 59 #include <sys/conf.h> 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/errno.h> 63 #include <sys/modctl.h> 64 #include <sys/memlist.h> 65 #include <sys/ddi.h> 66 #include <sys/sunddi.h> 67 #include <sys/debug.h> 68 #include <sys/fm/protocol.h> 69 70 #if defined(__sparc) 71 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *); 72 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *, 73 uint64_t *, int *, int *, int *); 74 extern size_t cpu_get_name_bufsize(void); 75 extern int cpu_get_mem_sid(char *, char *, int, int *); 76 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *); 77 #elif defined(__i386) || defined(__amd64) 78 #include <sys/cpu_module.h> 79 #endif /* __sparc */ 80 81 /* 82 * Turn a byte length into a pagecount. The DDI btop takes a 83 * 32-bit size on 32-bit machines, this handles 64-bit sizes for 84 * large physical-memory 32-bit machines. 85 */ 86 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift)) 87 88 static kmutex_t mm_lock; 89 static caddr_t mm_map; 90 91 static dev_info_t *mm_dip; /* private copy of devinfo pointer */ 92 93 static int mm_kmem_io_access; 94 95 static int mm_kstat_update(kstat_t *ksp, int rw); 96 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 97 98 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name); 99 static int mm_read_mem_page(intptr_t data, mem_page_t *mpage); 100 static int mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl); 101 static int mm_get_paddr(nvlist_t *nvl, uint64_t *paddr); 102 103 /*ARGSUSED1*/ 104 static int 105 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 106 { 107 int i; 108 struct mem_minor { 109 char *name; 110 minor_t minor; 111 int privonly; 112 const char *rdpriv; 113 const char *wrpriv; 114 mode_t priv_mode; 115 } mm[] = { 116 { "mem", M_MEM, 0, NULL, "all", 0640 }, 117 { "kmem", M_KMEM, 0, NULL, "all", 0640 }, 118 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 }, 119 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 }, 120 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 }, 121 }; 122 kstat_t *ksp; 123 124 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL); 125 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 126 127 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) { 128 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR, 129 mm[i].minor, DDI_PSEUDO, mm[i].privonly, 130 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) == 131 DDI_FAILURE) { 132 ddi_remove_minor_node(devi, NULL); 133 return (DDI_FAILURE); 134 } 135 } 136 137 mm_dip = devi; 138 139 ksp = kstat_create("mm", 0, "phys_installed", "misc", 140 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 141 if (ksp != NULL) { 142 ksp->ks_update = mm_kstat_update; 143 ksp->ks_snapshot = mm_kstat_snapshot; 144 ksp->ks_lock = &mm_lock; /* XXX - not really needed */ 145 kstat_install(ksp); 146 } 147 148 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 149 "kmem_io_access", 0); 150 151 return (DDI_SUCCESS); 152 } 153 154 /*ARGSUSED*/ 155 static int 156 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 157 { 158 register int error; 159 160 switch (infocmd) { 161 case DDI_INFO_DEVT2DEVINFO: 162 *result = (void *)mm_dip; 163 error = DDI_SUCCESS; 164 break; 165 case DDI_INFO_DEVT2INSTANCE: 166 *result = (void *)0; 167 error = DDI_SUCCESS; 168 break; 169 default: 170 error = DDI_FAILURE; 171 } 172 return (error); 173 } 174 175 /*ARGSUSED1*/ 176 static int 177 mmopen(dev_t *devp, int flag, int typ, struct cred *cred) 178 { 179 switch (getminor(*devp)) { 180 case M_NULL: 181 case M_ZERO: 182 case M_MEM: 183 case M_KMEM: 184 case M_ALLKMEM: 185 /* standard devices */ 186 break; 187 188 default: 189 /* Unsupported or unknown type */ 190 return (EINVAL); 191 } 192 return (0); 193 } 194 195 struct pollhead mm_pollhd; 196 197 /*ARGSUSED*/ 198 static int 199 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp, 200 struct pollhead **phpp) 201 { 202 switch (getminor(dev)) { 203 case M_NULL: 204 case M_ZERO: 205 case M_MEM: 206 case M_KMEM: 207 case M_ALLKMEM: 208 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM | 209 POLLWRNORM | POLLRDBAND | POLLWRBAND); 210 /* 211 * A non NULL pollhead pointer should be returned in case 212 * user polls for 0 events. 213 */ 214 *phpp = !anyyet && !*reventsp ? 215 &mm_pollhd : (struct pollhead *)NULL; 216 return (0); 217 default: 218 /* no other devices currently support polling */ 219 return (ENXIO); 220 } 221 } 222 223 static int 224 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 225 char *name, caddr_t valuep, int *lengthp) 226 { 227 /* 228 * implement zero size to reduce overhead (avoid two failing 229 * property lookups per stat). 230 */ 231 return (ddi_prop_op_size(dev, dip, prop_op, 232 flags, name, valuep, lengthp, 0)); 233 } 234 235 static int 236 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio) 237 { 238 int error = 0; 239 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), 240 (size_t)uio->uio_iov->iov_len); 241 242 mutex_enter(&mm_lock); 243 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, 244 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE), 245 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 246 247 if (!pf_is_memory(pfn)) { 248 if (allowio) { 249 size_t c = uio->uio_iov->iov_len; 250 251 if (ddi_peekpokeio(NULL, uio, rw, 252 (caddr_t)(uintptr_t)uio->uio_loffset, c, 253 sizeof (int32_t)) != DDI_SUCCESS) 254 error = EFAULT; 255 } else 256 error = EIO; 257 } else 258 error = uiomove(&mm_map[pageoff], nbytes, rw, uio); 259 260 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); 261 mutex_exit(&mm_lock); 262 return (error); 263 } 264 265 #ifdef __sparc 266 267 static int 268 mmpagelock(struct as *as, caddr_t va) 269 { 270 struct seg *seg; 271 int i; 272 273 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 274 seg = as_segat(as, va); 275 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0; 276 AS_LOCK_EXIT(as, &as->a_lock); 277 278 return (i); 279 } 280 281 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva) 282 283 #else /* __i386, __amd64 */ 284 285 #define NEED_LOCK_KVADDR(va) 0 286 287 #endif /* __sparc */ 288 289 /*ARGSUSED3*/ 290 static int 291 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) 292 { 293 pfn_t v; 294 struct iovec *iov; 295 int error = 0; 296 size_t c; 297 ssize_t oresid = uio->uio_resid; 298 minor_t minor = getminor(dev); 299 300 while (uio->uio_resid > 0 && error == 0) { 301 iov = uio->uio_iov; 302 if (iov->iov_len == 0) { 303 uio->uio_iov++; 304 uio->uio_iovcnt--; 305 if (uio->uio_iovcnt < 0) 306 panic("mmrw"); 307 continue; 308 } 309 switch (minor) { 310 311 case M_MEM: 312 memlist_read_lock(); 313 if (!address_in_memlist(phys_install, 314 (uint64_t)uio->uio_loffset, 1)) { 315 memlist_read_unlock(); 316 error = EFAULT; 317 break; 318 } 319 memlist_read_unlock(); 320 321 v = BTOP((u_offset_t)uio->uio_loffset); 322 error = mmio(uio, rw, v, 323 uio->uio_loffset & PAGEOFFSET, 0); 324 break; 325 326 case M_KMEM: 327 case M_ALLKMEM: 328 { 329 page_t **ppp; 330 caddr_t vaddr = (caddr_t)uio->uio_offset; 331 int try_lock = NEED_LOCK_KVADDR(vaddr); 332 int locked = 0; 333 334 /* 335 * If vaddr does not map a valid page, as_pagelock() 336 * will return failure. Hence we can't check the 337 * return value and return EFAULT here as we'd like. 338 * seg_kp and seg_kpm do not properly support 339 * as_pagelock() for this context so we avoid it 340 * using the try_lock set check above. Some day when 341 * the kernel page locking gets redesigned all this 342 * muck can be cleaned up. 343 */ 344 if (try_lock) 345 locked = (as_pagelock(&kas, &ppp, vaddr, 346 PAGESIZE, S_WRITE) == 0); 347 348 v = hat_getpfnum(kas.a_hat, 349 (caddr_t)(uintptr_t)uio->uio_loffset); 350 if (v == PFN_INVALID) { 351 if (locked) 352 as_pageunlock(&kas, ppp, vaddr, 353 PAGESIZE, S_WRITE); 354 error = EFAULT; 355 break; 356 } 357 358 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 359 minor == M_ALLKMEM || mm_kmem_io_access); 360 if (locked) 361 as_pageunlock(&kas, ppp, vaddr, PAGESIZE, 362 S_WRITE); 363 } 364 365 break; 366 367 case M_ZERO: 368 if (rw == UIO_READ) { 369 label_t ljb; 370 371 if (on_fault(&ljb)) { 372 no_fault(); 373 error = EFAULT; 374 break; 375 } 376 uzero(iov->iov_base, iov->iov_len); 377 no_fault(); 378 uio->uio_resid -= iov->iov_len; 379 uio->uio_loffset += iov->iov_len; 380 break; 381 } 382 /* else it's a write, fall through to NULL case */ 383 /*FALLTHROUGH*/ 384 385 case M_NULL: 386 if (rw == UIO_READ) 387 return (0); 388 c = iov->iov_len; 389 iov->iov_base += c; 390 iov->iov_len -= c; 391 uio->uio_loffset += c; 392 uio->uio_resid -= c; 393 break; 394 395 } 396 } 397 return (uio->uio_resid == oresid ? error : 0); 398 } 399 400 static int 401 mmread(dev_t dev, struct uio *uio, cred_t *cred) 402 { 403 return (mmrw(dev, uio, UIO_READ, cred)); 404 } 405 406 static int 407 mmwrite(dev_t dev, struct uio *uio, cred_t *cred) 408 { 409 return (mmrw(dev, uio, UIO_WRITE, cred)); 410 } 411 412 /* 413 * Private ioctl for libkvm to support kvm_physaddr(). 414 * Given an address space and a VA, compute the PA. 415 */ 416 static int 417 mmioctl_vtop(intptr_t data) 418 { 419 #ifdef _SYSCALL32 420 mem_vtop32_t vtop32; 421 #endif 422 mem_vtop_t mem_vtop; 423 proc_t *p; 424 pfn_t pfn = (pfn_t)PFN_INVALID; 425 pid_t pid = 0; 426 struct as *as; 427 struct seg *seg; 428 429 if (get_udatamodel() == DATAMODEL_NATIVE) { 430 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t))) 431 return (EFAULT); 432 } 433 #ifdef _SYSCALL32 434 else { 435 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t))) 436 return (EFAULT); 437 mem_vtop.m_as = (struct as *)vtop32.m_as; 438 mem_vtop.m_va = (void *)vtop32.m_va; 439 440 if (mem_vtop.m_as != NULL) 441 return (EINVAL); 442 } 443 #endif 444 445 if (mem_vtop.m_as == &kas) { 446 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va); 447 } else { 448 if (mem_vtop.m_as == NULL) { 449 /* 450 * Assume the calling process's address space if the 451 * caller didn't specify one. 452 */ 453 p = curthread->t_procp; 454 if (p == NULL) 455 return (EIO); 456 mem_vtop.m_as = p->p_as; 457 } 458 459 mutex_enter(&pidlock); 460 for (p = practive; p != NULL; p = p->p_next) { 461 if (p->p_as == mem_vtop.m_as) { 462 pid = p->p_pid; 463 break; 464 } 465 } 466 mutex_exit(&pidlock); 467 if (p == NULL) 468 return (EIO); 469 p = sprlock(pid); 470 if (p == NULL) 471 return (EIO); 472 as = p->p_as; 473 if (as == mem_vtop.m_as) { 474 mutex_exit(&p->p_lock); 475 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 476 for (seg = AS_SEGFIRST(as); seg != NULL; 477 seg = AS_SEGNEXT(as, seg)) 478 if ((uintptr_t)mem_vtop.m_va - 479 (uintptr_t)seg->s_base < seg->s_size) 480 break; 481 if (seg != NULL) 482 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va); 483 AS_LOCK_EXIT(as, &as->a_lock); 484 mutex_enter(&p->p_lock); 485 } 486 sprunlock(p); 487 } 488 mem_vtop.m_pfn = pfn; 489 if (pfn == PFN_INVALID) 490 return (EIO); 491 492 if (get_udatamodel() == DATAMODEL_NATIVE) { 493 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t))) 494 return (EFAULT); 495 } 496 #ifdef _SYSCALL32 497 else { 498 vtop32.m_pfn = mem_vtop.m_pfn; 499 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t))) 500 return (EFAULT); 501 } 502 #endif 503 504 return (0); 505 } 506 507 /* 508 * Given a PA, execute the given page retire command on it. 509 */ 510 static int 511 mmioctl_page_retire(int cmd, intptr_t data) 512 { 513 extern int page_retire_test(void); 514 uint64_t pa; 515 516 if (copyin((void *)data, &pa, sizeof (uint64_t))) { 517 return (EFAULT); 518 } 519 520 switch (cmd) { 521 case MEM_PAGE_ISRETIRED: 522 return (page_retire_check(pa, NULL)); 523 524 case MEM_PAGE_UNRETIRE: 525 return (page_unretire(pa)); 526 527 case MEM_PAGE_RETIRE: 528 return (page_retire(pa, PR_FMA)); 529 530 case MEM_PAGE_RETIRE_MCE: 531 return (page_retire(pa, PR_MCE)); 532 533 case MEM_PAGE_RETIRE_UE: 534 return (page_retire(pa, PR_UE)); 535 536 case MEM_PAGE_GETERRORS: 537 { 538 uint64_t page_errors; 539 int rc = page_retire_check(pa, &page_errors); 540 if (copyout(&page_errors, (void *)data, 541 sizeof (uint64_t))) { 542 return (EFAULT); 543 } 544 return (rc); 545 } 546 547 case MEM_PAGE_RETIRE_TEST: 548 return (page_retire_test()); 549 550 } 551 552 return (EINVAL); 553 } 554 555 /* 556 * Given a mem-scheme FMRI for a page, execute the given page retire 557 * command on it. 558 */ 559 static int 560 mmioctl_page_fmri_retire(int cmd, intptr_t data) 561 { 562 mem_page_t mpage; 563 uint64_t pa; 564 nvlist_t *nvl; 565 int err; 566 567 if ((err = mm_read_mem_page(data, &mpage)) < 0) 568 return (err); 569 570 if ((err = mm_get_mem_fmri(&mpage, &nvl)) < 0) 571 return (err); 572 573 if ((err = mm_get_paddr(nvl, &pa)) != 0) { 574 nvlist_free(nvl); 575 return (err); 576 } 577 578 nvlist_free(nvl); 579 580 switch (cmd) { 581 case MEM_PAGE_FMRI_ISRETIRED: 582 return (page_retire_check(pa, NULL)); 583 584 case MEM_PAGE_FMRI_RETIRE: 585 return (page_retire(pa, PR_FMA)); 586 } 587 588 return (EINVAL); 589 } 590 591 #ifdef __sparc 592 /* 593 * Given a syndrome, syndrome type, and address return the 594 * associated memory name in the provided data buffer. 595 */ 596 static int 597 mmioctl_get_mem_name(intptr_t data) 598 { 599 mem_name_t mem_name; 600 void *buf; 601 size_t bufsize; 602 int len, err; 603 604 if ((bufsize = cpu_get_name_bufsize()) == 0) 605 return (ENOTSUP); 606 607 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 608 return (err); 609 610 buf = kmem_alloc(bufsize, KM_SLEEP); 611 612 /* 613 * Call into cpu specific code to do the lookup. 614 */ 615 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type, 616 mem_name.m_addr, buf, bufsize, &len)) != 0) { 617 kmem_free(buf, bufsize); 618 return (err); 619 } 620 621 if (len >= mem_name.m_namelen) { 622 kmem_free(buf, bufsize); 623 return (ENAMETOOLONG); 624 } 625 626 if (copyoutstr(buf, (char *)mem_name.m_name, 627 mem_name.m_namelen, NULL) != 0) { 628 kmem_free(buf, bufsize); 629 return (EFAULT); 630 } 631 632 kmem_free(buf, bufsize); 633 return (0); 634 } 635 636 /* 637 * Given a syndrome and address return information about the associated memory. 638 */ 639 static int 640 mmioctl_get_mem_info(intptr_t data) 641 { 642 mem_info_t mem_info; 643 int err; 644 645 if (copyin((void *)data, &mem_info, sizeof (mem_info_t))) 646 return (EFAULT); 647 648 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr, 649 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size, 650 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0) 651 return (err); 652 653 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0) 654 return (EFAULT); 655 656 return (0); 657 } 658 659 /* 660 * Given a memory name, return its associated serial id 661 */ 662 static int 663 mmioctl_get_mem_sid(intptr_t data) 664 { 665 mem_name_t mem_name; 666 void *buf; 667 void *name; 668 size_t name_len; 669 size_t bufsize; 670 int len, err; 671 672 if ((bufsize = cpu_get_name_bufsize()) == 0) 673 return (ENOTSUP); 674 675 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 676 return (err); 677 678 buf = kmem_alloc(bufsize, KM_SLEEP); 679 680 if (mem_name.m_namelen > 1024) 681 mem_name.m_namelen = 1024; /* cap at 1024 bytes */ 682 683 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP); 684 685 if ((err = copyinstr((char *)mem_name.m_name, (char *)name, 686 mem_name.m_namelen, &name_len)) != 0) { 687 kmem_free(buf, bufsize); 688 kmem_free(name, mem_name.m_namelen); 689 return (err); 690 } 691 692 /* 693 * Call into cpu specific code to do the lookup. 694 */ 695 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) { 696 kmem_free(buf, bufsize); 697 kmem_free(name, mem_name.m_namelen); 698 return (err); 699 } 700 701 if (len > mem_name.m_sidlen) { 702 kmem_free(buf, bufsize); 703 kmem_free(name, mem_name.m_namelen); 704 return (ENAMETOOLONG); 705 } 706 707 if (copyoutstr(buf, (char *)mem_name.m_sid, 708 mem_name.m_sidlen, NULL) != 0) { 709 kmem_free(buf, bufsize); 710 kmem_free(name, mem_name.m_namelen); 711 return (EFAULT); 712 } 713 714 kmem_free(buf, bufsize); 715 kmem_free(name, mem_name.m_namelen); 716 return (0); 717 } 718 #endif /* __sparc */ 719 720 /* 721 * Private ioctls for 722 * libkvm to support kvm_physaddr(). 723 * FMA support for page_retire() and memory attribute information. 724 */ 725 /*ARGSUSED*/ 726 static int 727 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp) 728 { 729 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) || 730 (cmd != MEM_VTOP && getminor(dev) != M_MEM)) 731 return (ENXIO); 732 733 switch (cmd) { 734 case MEM_VTOP: 735 return (mmioctl_vtop(data)); 736 737 case MEM_PAGE_RETIRE: 738 case MEM_PAGE_ISRETIRED: 739 case MEM_PAGE_UNRETIRE: 740 case MEM_PAGE_RETIRE_MCE: 741 case MEM_PAGE_RETIRE_UE: 742 case MEM_PAGE_GETERRORS: 743 case MEM_PAGE_RETIRE_TEST: 744 return (mmioctl_page_retire(cmd, data)); 745 746 case MEM_PAGE_FMRI_RETIRE: 747 case MEM_PAGE_FMRI_ISRETIRED: 748 return (mmioctl_page_fmri_retire(cmd, data)); 749 750 #ifdef __sparc 751 case MEM_NAME: 752 return (mmioctl_get_mem_name(data)); 753 754 case MEM_INFO: 755 return (mmioctl_get_mem_info(data)); 756 757 case MEM_SID: 758 return (mmioctl_get_mem_sid(data)); 759 #else 760 case MEM_NAME: 761 case MEM_INFO: 762 case MEM_SID: 763 return (ENOTSUP); 764 #endif /* __sparc */ 765 } 766 return (ENXIO); 767 } 768 769 /*ARGSUSED2*/ 770 static int 771 mmmmap(dev_t dev, off_t off, int prot) 772 { 773 pfn_t pf; 774 struct memlist *pmem; 775 minor_t minor = getminor(dev); 776 777 switch (minor) { 778 case M_MEM: 779 pf = btop(off); 780 memlist_read_lock(); 781 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 782 if (pf >= BTOP(pmem->address) && 783 pf < BTOP(pmem->address + pmem->size)) { 784 memlist_read_unlock(); 785 return (impl_obmem_pfnum(pf)); 786 } 787 } 788 memlist_read_unlock(); 789 break; 790 791 case M_KMEM: 792 case M_ALLKMEM: 793 /* no longer supported with KPR */ 794 return (-1); 795 796 case M_ZERO: 797 /* 798 * We shouldn't be mmap'ing to /dev/zero here as 799 * mmsegmap() should have already converted 800 * a mapping request for this device to a mapping 801 * using seg_vn for anonymous memory. 802 */ 803 break; 804 805 } 806 return (-1); 807 } 808 809 /* 810 * This function is called when a memory device is mmap'ed. 811 * Set up the mapping to the correct device driver. 812 */ 813 static int 814 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 815 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 816 { 817 struct segvn_crargs vn_a; 818 struct segdev_crargs dev_a; 819 int error; 820 minor_t minor; 821 off_t i; 822 823 minor = getminor(dev); 824 825 as_rangelock(as); 826 if ((flags & MAP_FIXED) == 0) { 827 /* 828 * No need to worry about vac alignment on /dev/zero 829 * since this is a "clone" object that doesn't yet exist. 830 */ 831 map_addr(addrp, len, (offset_t)off, 832 (minor == M_MEM) || (minor == M_KMEM), flags); 833 834 if (*addrp == NULL) { 835 as_rangeunlock(as); 836 return (ENOMEM); 837 } 838 } else { 839 /* 840 * User specified address - 841 * Blow away any previous mappings. 842 */ 843 (void) as_unmap(as, *addrp, len); 844 } 845 846 switch (minor) { 847 case M_MEM: 848 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */ 849 if ((flags & MAP_TYPE) != MAP_SHARED) { 850 as_rangeunlock(as); 851 return (EINVAL); 852 } 853 854 /* 855 * Check to ensure that the entire range is 856 * legal and we are not trying to map in 857 * more than the device will let us. 858 */ 859 for (i = 0; i < len; i += PAGESIZE) { 860 if (mmmmap(dev, off + i, maxprot) == -1) { 861 as_rangeunlock(as); 862 return (ENXIO); 863 } 864 } 865 866 /* 867 * Use seg_dev segment driver for /dev/mem mapping. 868 */ 869 dev_a.mapfunc = mmmmap; 870 dev_a.dev = dev; 871 dev_a.offset = off; 872 dev_a.type = (flags & MAP_TYPE); 873 dev_a.prot = (uchar_t)prot; 874 dev_a.maxprot = (uchar_t)maxprot; 875 dev_a.hat_attr = 0; 876 877 /* 878 * Make /dev/mem mappings non-consistent since we can't 879 * alias pages that don't have page structs behind them, 880 * such as kernel stack pages. If someone mmap()s a kernel 881 * stack page and if we give him a tte with cv, a line from 882 * that page can get into both pages of the spitfire d$. 883 * But snoop from another processor will only invalidate 884 * the first page. This later caused kernel (xc_attention) 885 * to go into an infinite loop at pil 13 and no interrupts 886 * could come in. See 1203630. 887 * 888 */ 889 dev_a.hat_flags = HAT_LOAD_NOCONSIST; 890 dev_a.devmap_data = NULL; 891 892 error = as_map(as, *addrp, len, segdev_create, &dev_a); 893 break; 894 895 case M_ZERO: 896 /* 897 * Use seg_vn segment driver for /dev/zero mapping. 898 * Passing in a NULL amp gives us the "cloning" effect. 899 */ 900 vn_a.vp = NULL; 901 vn_a.offset = 0; 902 vn_a.type = (flags & MAP_TYPE); 903 vn_a.prot = prot; 904 vn_a.maxprot = maxprot; 905 vn_a.flags = flags & ~MAP_TYPE; 906 vn_a.cred = cred; 907 vn_a.amp = NULL; 908 vn_a.szc = 0; 909 vn_a.lgrp_mem_policy_flags = 0; 910 error = as_map(as, *addrp, len, segvn_create, &vn_a); 911 break; 912 913 case M_KMEM: 914 case M_ALLKMEM: 915 /* No longer supported with KPR. */ 916 error = ENXIO; 917 break; 918 919 case M_NULL: 920 /* 921 * Use seg_dev segment driver for /dev/null mapping. 922 */ 923 dev_a.mapfunc = mmmmap; 924 dev_a.dev = dev; 925 dev_a.offset = off; 926 dev_a.type = 0; /* neither PRIVATE nor SHARED */ 927 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 928 dev_a.hat_attr = 0; 929 dev_a.hat_flags = 0; 930 error = as_map(as, *addrp, len, segdev_create, &dev_a); 931 break; 932 933 default: 934 error = ENXIO; 935 } 936 937 as_rangeunlock(as); 938 return (error); 939 } 940 941 static struct cb_ops mm_cb_ops = { 942 mmopen, /* open */ 943 nulldev, /* close */ 944 nodev, /* strategy */ 945 nodev, /* print */ 946 nodev, /* dump */ 947 mmread, /* read */ 948 mmwrite, /* write */ 949 mmioctl, /* ioctl */ 950 nodev, /* devmap */ 951 mmmmap, /* mmap */ 952 mmsegmap, /* segmap */ 953 mmchpoll, /* poll */ 954 mmpropop, /* prop_op */ 955 0, /* streamtab */ 956 D_NEW | D_MP | D_64BIT | D_U64BIT 957 }; 958 959 static struct dev_ops mm_ops = { 960 DEVO_REV, /* devo_rev, */ 961 0, /* refcnt */ 962 mm_info, /* get_dev_info */ 963 nulldev, /* identify */ 964 nulldev, /* probe */ 965 mm_attach, /* attach */ 966 nodev, /* detach */ 967 nodev, /* reset */ 968 &mm_cb_ops, /* driver operations */ 969 (struct bus_ops *)0 /* bus operations */ 970 }; 971 972 static struct modldrv modldrv = { 973 &mod_driverops, "memory driver %I%", &mm_ops, 974 }; 975 976 static struct modlinkage modlinkage = { 977 MODREV_1, &modldrv, NULL 978 }; 979 980 int 981 _init(void) 982 { 983 return (mod_install(&modlinkage)); 984 } 985 986 int 987 _info(struct modinfo *modinfop) 988 { 989 return (mod_info(&modlinkage, modinfop)); 990 } 991 992 int 993 _fini(void) 994 { 995 return (mod_remove(&modlinkage)); 996 } 997 998 static int 999 mm_kstat_update(kstat_t *ksp, int rw) 1000 { 1001 struct memlist *pmem; 1002 uint_t count; 1003 1004 if (rw == KSTAT_WRITE) 1005 return (EACCES); 1006 1007 count = 0; 1008 memlist_read_lock(); 1009 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 1010 count++; 1011 } 1012 memlist_read_unlock(); 1013 1014 ksp->ks_ndata = count; 1015 ksp->ks_data_size = count * 2 * sizeof (uint64_t); 1016 1017 return (0); 1018 } 1019 1020 static int 1021 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1022 { 1023 struct memlist *pmem; 1024 struct memunit { 1025 uint64_t address; 1026 uint64_t size; 1027 } *kspmem; 1028 1029 if (rw == KSTAT_WRITE) 1030 return (EACCES); 1031 1032 ksp->ks_snaptime = gethrtime(); 1033 1034 kspmem = (struct memunit *)buf; 1035 memlist_read_lock(); 1036 for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) { 1037 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 1038 break; 1039 kspmem->address = pmem->address; 1040 kspmem->size = pmem->size; 1041 } 1042 memlist_read_unlock(); 1043 1044 return (0); 1045 } 1046 1047 /* 1048 * Read a mem_name_t from user-space and store it in the mem_name_t 1049 * pointed to by the mem_name argument. 1050 */ 1051 static int 1052 mm_read_mem_name(intptr_t data, mem_name_t *mem_name) 1053 { 1054 if (get_udatamodel() == DATAMODEL_NATIVE) { 1055 if (copyin((void *)data, mem_name, sizeof (mem_name_t))) 1056 return (EFAULT); 1057 } 1058 #ifdef _SYSCALL32 1059 else { 1060 mem_name32_t mem_name32; 1061 1062 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t))) 1063 return (EFAULT); 1064 mem_name->m_addr = mem_name32.m_addr; 1065 mem_name->m_synd = mem_name32.m_synd; 1066 mem_name->m_type[0] = mem_name32.m_type[0]; 1067 mem_name->m_type[1] = mem_name32.m_type[1]; 1068 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name; 1069 mem_name->m_namelen = (size_t)mem_name32.m_namelen; 1070 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid; 1071 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen; 1072 } 1073 #endif /* _SYSCALL32 */ 1074 1075 return (0); 1076 } 1077 1078 /* 1079 * Read a mem_page_t from user-space and store it in the mem_page_t 1080 * pointed to by the mpage argument. 1081 */ 1082 static int 1083 mm_read_mem_page(intptr_t data, mem_page_t *mpage) 1084 { 1085 if (get_udatamodel() == DATAMODEL_NATIVE) { 1086 if (copyin((void *)data, mpage, sizeof (mem_page_t)) != 0) 1087 return (EFAULT); 1088 } 1089 #ifdef _SYSCALL32 1090 else { 1091 mem_page32_t mpage32; 1092 1093 if (copyin((void *)data, &mpage32, sizeof (mem_page32_t)) != 0) 1094 return (EFAULT); 1095 1096 mpage->m_fmri = (caddr_t)(uintptr_t)mpage32.m_fmri; 1097 mpage->m_fmrisz = mpage32.m_fmrisz; 1098 } 1099 #endif /* _SYSCALL32 */ 1100 1101 return (0); 1102 } 1103 1104 /* 1105 * Expand an FMRI from a mem_page_t. 1106 */ 1107 static int 1108 mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl) 1109 { 1110 char *buf; 1111 int err; 1112 1113 if (mpage->m_fmri == NULL || mpage->m_fmrisz > MEM_FMRI_MAX_BUFSIZE) 1114 return (EINVAL); 1115 1116 buf = kmem_alloc(mpage->m_fmrisz, KM_SLEEP); 1117 if (copyin(mpage->m_fmri, buf, mpage->m_fmrisz) != 0) { 1118 kmem_free(buf, mpage->m_fmrisz); 1119 return (EFAULT); 1120 } 1121 1122 err = nvlist_unpack(buf, mpage->m_fmrisz, nvl, KM_SLEEP); 1123 kmem_free(buf, mpage->m_fmrisz); 1124 1125 return (err); 1126 } 1127 1128 static int 1129 mm_get_paddr(nvlist_t *nvl, uint64_t *paddr) 1130 { 1131 uint8_t version; 1132 uint64_t pa; 1133 char *scheme; 1134 #ifdef __sparc 1135 uint64_t offset; 1136 char *unum; 1137 char **serids; 1138 uint_t nserids; 1139 int err; 1140 #endif 1141 1142 /* Verify FMRI scheme name and version number */ 1143 if ((nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme) != 0) || 1144 (strcmp(scheme, FM_FMRI_SCHEME_MEM) != 0) || 1145 (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0) || 1146 version > FM_MEM_SCHEME_VERSION) { 1147 return (EINVAL); 1148 } 1149 1150 /* 1151 * There are two ways a physical address can be obtained from a mem 1152 * scheme FMRI. One way is to use the "offset" and "serial" 1153 * members, if they are present, together with the "unum" member to 1154 * calculate a physical address. This is the preferred way since 1155 * it is independent of possible changes to the programming of 1156 * underlying hardware registers that may change the physical address. 1157 * If the "offset" member is not present, then the address is 1158 * retrieved from the "physaddr" member. 1159 */ 1160 #if defined(__sparc) 1161 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &offset) != 0) { 1162 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) != 1163 0) { 1164 return (EINVAL); 1165 } 1166 } else if (nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0 || 1167 nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &serids, 1168 &nserids) != 0) { 1169 return (EINVAL); 1170 } else { 1171 err = cpu_get_mem_addr(unum, serids[0], offset, &pa); 1172 if (err != 0) { 1173 if (err == ENOTSUP) { 1174 /* Fall back to physaddr */ 1175 if (nvlist_lookup_uint64(nvl, 1176 FM_FMRI_MEM_PHYSADDR, &pa) != 0) 1177 return (EINVAL); 1178 } else 1179 return (err); 1180 } 1181 } 1182 #elif defined(__i386) || defined(__amd64) 1183 if (cmi_mc_unumtopa(NULL, nvl, &pa) == 0) 1184 return (EINVAL); 1185 #else 1186 #error "port me" 1187 #endif /* __sparc */ 1188 1189 *paddr = pa; 1190 return (0); 1191 } 1192