1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Memory special file 31 */ 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/user.h> 36 #include <sys/buf.h> 37 #include <sys/systm.h> 38 #include <sys/cred.h> 39 #include <sys/vm.h> 40 #include <sys/uio.h> 41 #include <sys/mman.h> 42 #include <sys/kmem.h> 43 #include <vm/seg.h> 44 #include <vm/page.h> 45 #include <sys/stat.h> 46 #include <sys/vmem.h> 47 #include <sys/memlist.h> 48 #include <sys/bootconf.h> 49 50 #include <vm/seg_vn.h> 51 #include <vm/seg_dev.h> 52 #include <vm/seg_kmem.h> 53 #include <vm/seg_kp.h> 54 #include <vm/seg_kpm.h> 55 #include <vm/hat.h> 56 57 #include <sys/conf.h> 58 #include <sys/mem.h> 59 #include <sys/types.h> 60 #include <sys/conf.h> 61 #include <sys/param.h> 62 #include <sys/systm.h> 63 #include <sys/errno.h> 64 #include <sys/modctl.h> 65 #include <sys/memlist.h> 66 #include <sys/ddi.h> 67 #include <sys/sunddi.h> 68 #include <sys/debug.h> 69 #include <sys/fm/protocol.h> 70 71 #ifdef __sparc 72 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *); 73 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *, 74 uint64_t *, int *, int *, int *); 75 extern size_t cpu_get_name_bufsize(void); 76 extern int cpu_get_mem_sid(char *, char *, int, int *); 77 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *); 78 #endif /* __sparc */ 79 80 /* 81 * Turn a byte length into a pagecount. The DDI btop takes a 82 * 32-bit size on 32-bit machines, this handles 64-bit sizes for 83 * large physical-memory 32-bit machines. 84 */ 85 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift)) 86 87 static kmutex_t mm_lock; 88 static caddr_t mm_map; 89 90 static dev_info_t *mm_dip; /* private copy of devinfo pointer */ 91 92 static int mm_kmem_io_access; 93 94 static int mm_kstat_update(kstat_t *ksp, int rw); 95 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 96 97 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name); 98 static int mm_read_mem_page(intptr_t data, mem_page_t *mpage); 99 static int mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl); 100 static int mm_get_paddr(nvlist_t *nvl, uint64_t *paddr); 101 102 /*ARGSUSED1*/ 103 static int 104 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 105 { 106 int i; 107 struct mem_minor { 108 char *name; 109 minor_t minor; 110 int privonly; 111 const char *rdpriv; 112 const char *wrpriv; 113 mode_t priv_mode; 114 } mm[] = { 115 { "mem", M_MEM, 0, NULL, "all", 0640 }, 116 { "kmem", M_KMEM, 0, NULL, "all", 0640 }, 117 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 }, 118 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 }, 119 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 }, 120 }; 121 kstat_t *ksp; 122 123 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL); 124 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 125 126 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) { 127 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR, 128 mm[i].minor, DDI_PSEUDO, mm[i].privonly, 129 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) == 130 DDI_FAILURE) { 131 ddi_remove_minor_node(devi, NULL); 132 return (DDI_FAILURE); 133 } 134 } 135 136 mm_dip = devi; 137 138 ksp = kstat_create("mm", 0, "phys_installed", "misc", 139 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 140 if (ksp != NULL) { 141 ksp->ks_update = mm_kstat_update; 142 ksp->ks_snapshot = mm_kstat_snapshot; 143 ksp->ks_lock = &mm_lock; /* XXX - not really needed */ 144 kstat_install(ksp); 145 } 146 147 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 148 "kmem_io_access", 0); 149 150 return (DDI_SUCCESS); 151 } 152 153 /*ARGSUSED*/ 154 static int 155 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 156 { 157 register int error; 158 159 switch (infocmd) { 160 case DDI_INFO_DEVT2DEVINFO: 161 *result = (void *)mm_dip; 162 error = DDI_SUCCESS; 163 break; 164 case DDI_INFO_DEVT2INSTANCE: 165 *result = (void *)0; 166 error = DDI_SUCCESS; 167 break; 168 default: 169 error = DDI_FAILURE; 170 } 171 return (error); 172 } 173 174 /*ARGSUSED1*/ 175 static int 176 mmopen(dev_t *devp, int flag, int typ, struct cred *cred) 177 { 178 switch (getminor(*devp)) { 179 case M_NULL: 180 case M_ZERO: 181 case M_MEM: 182 case M_KMEM: 183 case M_ALLKMEM: 184 /* standard devices */ 185 break; 186 187 default: 188 /* Unsupported or unknown type */ 189 return (EINVAL); 190 } 191 return (0); 192 } 193 194 struct pollhead mm_pollhd; 195 196 /*ARGSUSED*/ 197 static int 198 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp, 199 struct pollhead **phpp) 200 { 201 switch (getminor(dev)) { 202 case M_NULL: 203 case M_ZERO: 204 case M_MEM: 205 case M_KMEM: 206 case M_ALLKMEM: 207 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM | 208 POLLWRNORM | POLLRDBAND | POLLWRBAND); 209 /* 210 * A non NULL pollhead pointer should be returned in case 211 * user polls for 0 events. 212 */ 213 *phpp = !anyyet && !*reventsp ? 214 &mm_pollhd : (struct pollhead *)NULL; 215 return (0); 216 default: 217 /* no other devices currently support polling */ 218 return (ENXIO); 219 } 220 } 221 222 static int 223 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 224 char *name, caddr_t valuep, int *lengthp) 225 { 226 /* 227 * implement zero size to reduce overhead (avoid two failing 228 * property lookups per stat). 229 */ 230 return (ddi_prop_op_size(dev, dip, prop_op, 231 flags, name, valuep, lengthp, 0)); 232 } 233 234 static int 235 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio) 236 { 237 int error = 0; 238 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), 239 (size_t)uio->uio_iov->iov_len); 240 241 mutex_enter(&mm_lock); 242 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, 243 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE), 244 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 245 246 if (!pf_is_memory(pfn)) { 247 if (allowio) { 248 size_t c = uio->uio_iov->iov_len; 249 250 if (ddi_peekpokeio(NULL, uio, rw, 251 (caddr_t)(uintptr_t)uio->uio_loffset, c, 252 sizeof (int32_t)) != DDI_SUCCESS) 253 error = EFAULT; 254 } else 255 error = EIO; 256 } else 257 error = uiomove(&mm_map[pageoff], nbytes, rw, uio); 258 259 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); 260 mutex_exit(&mm_lock); 261 return (error); 262 } 263 264 #ifdef __sparc 265 266 static int 267 mmpagelock(struct as *as, caddr_t va) 268 { 269 struct seg *seg; 270 int i; 271 272 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 273 seg = as_segat(as, va); 274 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0; 275 AS_LOCK_EXIT(as, &as->a_lock); 276 277 return (i); 278 } 279 280 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva) 281 282 #else /* __i386, __amd64 */ 283 284 #define NEED_LOCK_KVADDR(va) 0 285 286 #endif /* __sparc */ 287 288 /*ARGSUSED3*/ 289 static int 290 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) 291 { 292 pfn_t v; 293 struct iovec *iov; 294 int error = 0; 295 size_t c; 296 ssize_t oresid = uio->uio_resid; 297 minor_t minor = getminor(dev); 298 299 while (uio->uio_resid > 0 && error == 0) { 300 iov = uio->uio_iov; 301 if (iov->iov_len == 0) { 302 uio->uio_iov++; 303 uio->uio_iovcnt--; 304 if (uio->uio_iovcnt < 0) 305 panic("mmrw"); 306 continue; 307 } 308 switch (minor) { 309 310 case M_MEM: 311 memlist_read_lock(); 312 if (!address_in_memlist(phys_install, 313 (uint64_t)uio->uio_loffset, 1)) { 314 memlist_read_unlock(); 315 error = EFAULT; 316 break; 317 } 318 memlist_read_unlock(); 319 320 v = BTOP((u_offset_t)uio->uio_loffset); 321 error = mmio(uio, rw, v, 322 uio->uio_loffset & PAGEOFFSET, 0); 323 break; 324 325 case M_KMEM: 326 case M_ALLKMEM: 327 { 328 page_t **ppp; 329 caddr_t vaddr = (caddr_t)uio->uio_offset; 330 int try_lock = NEED_LOCK_KVADDR(vaddr); 331 int locked = 0; 332 333 /* 334 * If vaddr does not map a valid page, as_pagelock() 335 * will return failure. Hence we can't check the 336 * return value and return EFAULT here as we'd like. 337 * seg_kp and seg_kpm do not properly support 338 * as_pagelock() for this context so we avoid it 339 * using the try_lock set check above. Some day when 340 * the kernel page locking gets redesigned all this 341 * muck can be cleaned up. 342 */ 343 if (try_lock) 344 locked = (as_pagelock(&kas, &ppp, vaddr, 345 PAGESIZE, S_WRITE) == 0); 346 347 v = hat_getpfnum(kas.a_hat, 348 (caddr_t)(uintptr_t)uio->uio_loffset); 349 if (v == PFN_INVALID) { 350 if (locked) 351 as_pageunlock(&kas, ppp, vaddr, 352 PAGESIZE, S_WRITE); 353 error = EFAULT; 354 break; 355 } 356 357 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 358 minor == M_ALLKMEM || mm_kmem_io_access); 359 if (locked) 360 as_pageunlock(&kas, ppp, vaddr, PAGESIZE, 361 S_WRITE); 362 } 363 364 break; 365 366 case M_ZERO: 367 if (rw == UIO_READ) { 368 label_t ljb; 369 370 if (on_fault(&ljb)) { 371 no_fault(); 372 error = EFAULT; 373 break; 374 } 375 uzero(iov->iov_base, iov->iov_len); 376 no_fault(); 377 uio->uio_resid -= iov->iov_len; 378 uio->uio_loffset += iov->iov_len; 379 break; 380 } 381 /* else it's a write, fall through to NULL case */ 382 /*FALLTHROUGH*/ 383 384 case M_NULL: 385 if (rw == UIO_READ) 386 return (0); 387 c = iov->iov_len; 388 iov->iov_base += c; 389 iov->iov_len -= c; 390 uio->uio_loffset += c; 391 uio->uio_resid -= c; 392 break; 393 394 } 395 } 396 return (uio->uio_resid == oresid ? error : 0); 397 } 398 399 static int 400 mmread(dev_t dev, struct uio *uio, cred_t *cred) 401 { 402 return (mmrw(dev, uio, UIO_READ, cred)); 403 } 404 405 static int 406 mmwrite(dev_t dev, struct uio *uio, cred_t *cred) 407 { 408 return (mmrw(dev, uio, UIO_WRITE, cred)); 409 } 410 411 /* 412 * Private ioctl for libkvm to support kvm_physaddr(). 413 * Given an address space and a VA, compute the PA. 414 */ 415 static int 416 mmioctl_vtop(intptr_t data) 417 { 418 mem_vtop_t mem_vtop; 419 proc_t *p; 420 pfn_t pfn = (pfn_t)PFN_INVALID; 421 pid_t pid = 0; 422 struct as *as; 423 struct seg *seg; 424 425 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t))) 426 return (EFAULT); 427 if (mem_vtop.m_as == &kas) { 428 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va); 429 } else if (mem_vtop.m_as == NULL) { 430 return (EIO); 431 } else { 432 mutex_enter(&pidlock); 433 for (p = practive; p != NULL; p = p->p_next) { 434 if (p->p_as == mem_vtop.m_as) { 435 pid = p->p_pid; 436 break; 437 } 438 } 439 mutex_exit(&pidlock); 440 if (p == NULL) 441 return (EIO); 442 p = sprlock(pid); 443 if (p == NULL) 444 return (EIO); 445 as = p->p_as; 446 if (as == mem_vtop.m_as) { 447 mutex_exit(&p->p_lock); 448 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 449 for (seg = AS_SEGFIRST(as); seg != NULL; 450 seg = AS_SEGNEXT(as, seg)) 451 if ((uintptr_t)mem_vtop.m_va - 452 (uintptr_t)seg->s_base < seg->s_size) 453 break; 454 if (seg != NULL) 455 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va); 456 AS_LOCK_EXIT(as, &as->a_lock); 457 mutex_enter(&p->p_lock); 458 } 459 sprunlock(p); 460 } 461 mem_vtop.m_pfn = pfn; 462 if (pfn == PFN_INVALID) 463 return (EIO); 464 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t))) 465 return (EFAULT); 466 467 return (0); 468 } 469 470 /* 471 * Given a PA, execute the given page retire command on it. 472 */ 473 static int 474 mmioctl_page_retire(int cmd, intptr_t data) 475 { 476 extern int page_retire_test(void); 477 uint64_t pa; 478 479 if (copyin((void *)data, &pa, sizeof (uint64_t))) { 480 return (EFAULT); 481 } 482 483 switch (cmd) { 484 case MEM_PAGE_ISRETIRED: 485 return (page_retire_check(pa, NULL)); 486 487 case MEM_PAGE_UNRETIRE: 488 return (page_unretire(pa)); 489 490 case MEM_PAGE_RETIRE: 491 return (page_retire(pa, PR_FMA)); 492 493 case MEM_PAGE_RETIRE_MCE: 494 return (page_retire(pa, PR_MCE)); 495 496 case MEM_PAGE_RETIRE_UE: 497 return (page_retire(pa, PR_UE)); 498 499 case MEM_PAGE_GETERRORS: 500 { 501 uint64_t page_errors; 502 int rc = page_retire_check(pa, &page_errors); 503 if (copyout(&page_errors, (void *)data, 504 sizeof (uint64_t))) { 505 return (EFAULT); 506 } 507 return (rc); 508 } 509 510 case MEM_PAGE_RETIRE_TEST: 511 return (page_retire_test()); 512 513 } 514 515 return (EINVAL); 516 } 517 518 /* 519 * Given a mem-scheme FMRI for a page, execute the given page retire 520 * command on it. 521 */ 522 static int 523 mmioctl_page_fmri_retire(int cmd, intptr_t data) 524 { 525 mem_page_t mpage; 526 uint64_t pa; 527 nvlist_t *nvl; 528 int err; 529 530 if ((err = mm_read_mem_page(data, &mpage)) < 0) 531 return (err); 532 533 if ((err = mm_get_mem_fmri(&mpage, &nvl)) < 0) 534 return (err); 535 536 if ((err = mm_get_paddr(nvl, &pa)) < 0) { 537 nvlist_free(nvl); 538 return (err); 539 } 540 541 nvlist_free(nvl); 542 543 switch (cmd) { 544 case MEM_PAGE_FMRI_ISRETIRED: 545 return (page_retire_check(pa, NULL)); 546 547 case MEM_PAGE_FMRI_RETIRE: 548 return (page_retire(pa, PR_FMA)); 549 } 550 551 return (EINVAL); 552 } 553 554 #ifdef __sparc 555 /* 556 * Given a syndrome, syndrome type, and address return the 557 * associated memory name in the provided data buffer. 558 */ 559 static int 560 mmioctl_get_mem_name(intptr_t data) 561 { 562 mem_name_t mem_name; 563 void *buf; 564 size_t bufsize; 565 int len, err; 566 567 if ((bufsize = cpu_get_name_bufsize()) == 0) 568 return (ENOTSUP); 569 570 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 571 return (err); 572 573 buf = kmem_alloc(bufsize, KM_SLEEP); 574 575 /* 576 * Call into cpu specific code to do the lookup. 577 */ 578 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type, 579 mem_name.m_addr, buf, bufsize, &len)) != 0) { 580 kmem_free(buf, bufsize); 581 return (err); 582 } 583 584 if (len >= mem_name.m_namelen) { 585 kmem_free(buf, bufsize); 586 return (ENAMETOOLONG); 587 } 588 589 if (copyoutstr(buf, (char *)mem_name.m_name, 590 mem_name.m_namelen, NULL) != 0) { 591 kmem_free(buf, bufsize); 592 return (EFAULT); 593 } 594 595 kmem_free(buf, bufsize); 596 return (0); 597 } 598 599 /* 600 * Given a syndrome and address return information about the associated memory. 601 */ 602 static int 603 mmioctl_get_mem_info(intptr_t data) 604 { 605 mem_info_t mem_info; 606 int err; 607 608 if (copyin((void *)data, &mem_info, sizeof (mem_info_t))) 609 return (EFAULT); 610 611 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr, 612 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size, 613 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0) 614 return (err); 615 616 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0) 617 return (EFAULT); 618 619 return (0); 620 } 621 622 /* 623 * Given a memory name, return its associated serial id 624 */ 625 static int 626 mmioctl_get_mem_sid(intptr_t data) 627 { 628 mem_name_t mem_name; 629 void *buf; 630 void *name; 631 size_t name_len; 632 size_t bufsize; 633 int len, err; 634 635 if ((bufsize = cpu_get_name_bufsize()) == 0) 636 return (ENOTSUP); 637 638 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 639 return (err); 640 641 buf = kmem_alloc(bufsize, KM_SLEEP); 642 643 if (mem_name.m_namelen > 1024) 644 mem_name.m_namelen = 1024; /* cap at 1024 bytes */ 645 646 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP); 647 648 if ((err = copyinstr((char *)mem_name.m_name, (char *)name, 649 mem_name.m_namelen, &name_len)) != 0) { 650 kmem_free(buf, bufsize); 651 kmem_free(name, mem_name.m_namelen); 652 return (err); 653 } 654 655 /* 656 * Call into cpu specific code to do the lookup. 657 */ 658 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) { 659 kmem_free(buf, bufsize); 660 kmem_free(name, mem_name.m_namelen); 661 return (err); 662 } 663 664 if (len > mem_name.m_sidlen) { 665 kmem_free(buf, bufsize); 666 kmem_free(name, mem_name.m_namelen); 667 return (ENAMETOOLONG); 668 } 669 670 if (copyoutstr(buf, (char *)mem_name.m_sid, 671 mem_name.m_sidlen, NULL) != 0) { 672 kmem_free(buf, bufsize); 673 kmem_free(name, mem_name.m_namelen); 674 return (EFAULT); 675 } 676 677 kmem_free(buf, bufsize); 678 kmem_free(name, mem_name.m_namelen); 679 return (0); 680 } 681 #endif /* __sparc */ 682 683 /* 684 * Private ioctls for 685 * libkvm to support kvm_physaddr(). 686 * FMA support for page_retire() and memory attribute information. 687 */ 688 /*ARGSUSED*/ 689 static int 690 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp) 691 { 692 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) || 693 (cmd != MEM_VTOP && getminor(dev) != M_MEM)) 694 return (ENXIO); 695 696 switch (cmd) { 697 case MEM_VTOP: 698 return (mmioctl_vtop(data)); 699 700 case MEM_PAGE_RETIRE: 701 case MEM_PAGE_ISRETIRED: 702 case MEM_PAGE_UNRETIRE: 703 case MEM_PAGE_RETIRE_MCE: 704 case MEM_PAGE_RETIRE_UE: 705 case MEM_PAGE_GETERRORS: 706 case MEM_PAGE_RETIRE_TEST: 707 return (mmioctl_page_retire(cmd, data)); 708 709 case MEM_PAGE_FMRI_RETIRE: 710 case MEM_PAGE_FMRI_ISRETIRED: 711 return (mmioctl_page_fmri_retire(cmd, data)); 712 713 #ifdef __sparc 714 case MEM_NAME: 715 return (mmioctl_get_mem_name(data)); 716 717 case MEM_INFO: 718 return (mmioctl_get_mem_info(data)); 719 720 case MEM_SID: 721 return (mmioctl_get_mem_sid(data)); 722 #else 723 case MEM_NAME: 724 case MEM_INFO: 725 case MEM_SID: 726 return (ENOTSUP); 727 #endif /* __sparc */ 728 } 729 return (ENXIO); 730 } 731 732 /*ARGSUSED2*/ 733 static int 734 mmmmap(dev_t dev, off_t off, int prot) 735 { 736 pfn_t pf; 737 struct memlist *pmem; 738 minor_t minor = getminor(dev); 739 740 switch (minor) { 741 case M_MEM: 742 pf = btop(off); 743 memlist_read_lock(); 744 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 745 if (pf >= BTOP(pmem->address) && 746 pf < BTOP(pmem->address + pmem->size)) { 747 memlist_read_unlock(); 748 return (impl_obmem_pfnum(pf)); 749 } 750 } 751 memlist_read_unlock(); 752 break; 753 754 case M_KMEM: 755 case M_ALLKMEM: 756 /* no longer supported with KPR */ 757 return (-1); 758 759 case M_ZERO: 760 /* 761 * We shouldn't be mmap'ing to /dev/zero here as 762 * mmsegmap() should have already converted 763 * a mapping request for this device to a mapping 764 * using seg_vn for anonymous memory. 765 */ 766 break; 767 768 } 769 return (-1); 770 } 771 772 /* 773 * This function is called when a memory device is mmap'ed. 774 * Set up the mapping to the correct device driver. 775 */ 776 static int 777 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 778 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 779 { 780 struct segvn_crargs vn_a; 781 struct segdev_crargs dev_a; 782 int error; 783 minor_t minor; 784 off_t i; 785 786 minor = getminor(dev); 787 788 as_rangelock(as); 789 if ((flags & MAP_FIXED) == 0) { 790 /* 791 * No need to worry about vac alignment on /dev/zero 792 * since this is a "clone" object that doesn't yet exist. 793 */ 794 map_addr(addrp, len, (offset_t)off, 795 (minor == M_MEM) || (minor == M_KMEM), flags); 796 797 if (*addrp == NULL) { 798 as_rangeunlock(as); 799 return (ENOMEM); 800 } 801 } else { 802 /* 803 * User specified address - 804 * Blow away any previous mappings. 805 */ 806 (void) as_unmap(as, *addrp, len); 807 } 808 809 switch (minor) { 810 case M_MEM: 811 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */ 812 if ((flags & MAP_TYPE) != MAP_SHARED) { 813 as_rangeunlock(as); 814 return (EINVAL); 815 } 816 817 /* 818 * Check to ensure that the entire range is 819 * legal and we are not trying to map in 820 * more than the device will let us. 821 */ 822 for (i = 0; i < len; i += PAGESIZE) { 823 if (mmmmap(dev, off + i, maxprot) == -1) { 824 as_rangeunlock(as); 825 return (ENXIO); 826 } 827 } 828 829 /* 830 * Use seg_dev segment driver for /dev/mem mapping. 831 */ 832 dev_a.mapfunc = mmmmap; 833 dev_a.dev = dev; 834 dev_a.offset = off; 835 dev_a.type = (flags & MAP_TYPE); 836 dev_a.prot = (uchar_t)prot; 837 dev_a.maxprot = (uchar_t)maxprot; 838 dev_a.hat_attr = 0; 839 840 /* 841 * Make /dev/mem mappings non-consistent since we can't 842 * alias pages that don't have page structs behind them, 843 * such as kernel stack pages. If someone mmap()s a kernel 844 * stack page and if we give him a tte with cv, a line from 845 * that page can get into both pages of the spitfire d$. 846 * But snoop from another processor will only invalidate 847 * the first page. This later caused kernel (xc_attention) 848 * to go into an infinite loop at pil 13 and no interrupts 849 * could come in. See 1203630. 850 * 851 */ 852 dev_a.hat_flags = HAT_LOAD_NOCONSIST; 853 dev_a.devmap_data = NULL; 854 855 error = as_map(as, *addrp, len, segdev_create, &dev_a); 856 break; 857 858 case M_ZERO: 859 /* 860 * Use seg_vn segment driver for /dev/zero mapping. 861 * Passing in a NULL amp gives us the "cloning" effect. 862 */ 863 vn_a.vp = NULL; 864 vn_a.offset = 0; 865 vn_a.type = (flags & MAP_TYPE); 866 vn_a.prot = prot; 867 vn_a.maxprot = maxprot; 868 vn_a.flags = flags & ~MAP_TYPE; 869 vn_a.cred = cred; 870 vn_a.amp = NULL; 871 vn_a.szc = 0; 872 vn_a.lgrp_mem_policy_flags = 0; 873 error = as_map(as, *addrp, len, segvn_create, &vn_a); 874 break; 875 876 case M_KMEM: 877 case M_ALLKMEM: 878 /* No longer supported with KPR. */ 879 error = ENXIO; 880 break; 881 882 case M_NULL: 883 /* 884 * Use seg_dev segment driver for /dev/null mapping. 885 */ 886 dev_a.mapfunc = mmmmap; 887 dev_a.dev = dev; 888 dev_a.offset = off; 889 dev_a.type = 0; /* neither PRIVATE nor SHARED */ 890 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 891 dev_a.hat_attr = 0; 892 dev_a.hat_flags = 0; 893 error = as_map(as, *addrp, len, segdev_create, &dev_a); 894 break; 895 896 default: 897 error = ENXIO; 898 } 899 900 as_rangeunlock(as); 901 return (error); 902 } 903 904 static struct cb_ops mm_cb_ops = { 905 mmopen, /* open */ 906 nulldev, /* close */ 907 nodev, /* strategy */ 908 nodev, /* print */ 909 nodev, /* dump */ 910 mmread, /* read */ 911 mmwrite, /* write */ 912 mmioctl, /* ioctl */ 913 nodev, /* devmap */ 914 mmmmap, /* mmap */ 915 mmsegmap, /* segmap */ 916 mmchpoll, /* poll */ 917 mmpropop, /* prop_op */ 918 0, /* streamtab */ 919 D_NEW | D_MP | D_64BIT | D_U64BIT 920 }; 921 922 static struct dev_ops mm_ops = { 923 DEVO_REV, /* devo_rev, */ 924 0, /* refcnt */ 925 mm_info, /* get_dev_info */ 926 nulldev, /* identify */ 927 nulldev, /* probe */ 928 mm_attach, /* attach */ 929 nodev, /* detach */ 930 nodev, /* reset */ 931 &mm_cb_ops, /* driver operations */ 932 (struct bus_ops *)0 /* bus operations */ 933 }; 934 935 static struct modldrv modldrv = { 936 &mod_driverops, "memory driver %I%", &mm_ops, 937 }; 938 939 static struct modlinkage modlinkage = { 940 MODREV_1, &modldrv, NULL 941 }; 942 943 int 944 _init(void) 945 { 946 return (mod_install(&modlinkage)); 947 } 948 949 int 950 _info(struct modinfo *modinfop) 951 { 952 return (mod_info(&modlinkage, modinfop)); 953 } 954 955 int 956 _fini(void) 957 { 958 return (mod_remove(&modlinkage)); 959 } 960 961 static int 962 mm_kstat_update(kstat_t *ksp, int rw) 963 { 964 struct memlist *pmem; 965 uint_t count; 966 967 if (rw == KSTAT_WRITE) 968 return (EACCES); 969 970 count = 0; 971 memlist_read_lock(); 972 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 973 count++; 974 } 975 memlist_read_unlock(); 976 977 ksp->ks_ndata = count; 978 ksp->ks_data_size = count * 2 * sizeof (uint64_t); 979 980 return (0); 981 } 982 983 static int 984 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 985 { 986 struct memlist *pmem; 987 struct memunit { 988 uint64_t address; 989 uint64_t size; 990 } *kspmem; 991 992 if (rw == KSTAT_WRITE) 993 return (EACCES); 994 995 ksp->ks_snaptime = gethrtime(); 996 997 kspmem = (struct memunit *)buf; 998 memlist_read_lock(); 999 for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) { 1000 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 1001 break; 1002 kspmem->address = pmem->address; 1003 kspmem->size = pmem->size; 1004 } 1005 memlist_read_unlock(); 1006 1007 return (0); 1008 } 1009 1010 /* 1011 * Read a mem_name_t from user-space and store it in the mem_name_t 1012 * pointed to by the mem_name argument. 1013 */ 1014 static int 1015 mm_read_mem_name(intptr_t data, mem_name_t *mem_name) 1016 { 1017 if (get_udatamodel() == DATAMODEL_NATIVE) { 1018 if (copyin((void *)data, mem_name, sizeof (mem_name_t))) 1019 return (EFAULT); 1020 } 1021 #ifdef _SYSCALL32 1022 else { 1023 mem_name32_t mem_name32; 1024 1025 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t))) 1026 return (EFAULT); 1027 mem_name->m_addr = mem_name32.m_addr; 1028 mem_name->m_synd = mem_name32.m_synd; 1029 mem_name->m_type[0] = mem_name32.m_type[0]; 1030 mem_name->m_type[1] = mem_name32.m_type[1]; 1031 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name; 1032 mem_name->m_namelen = (size_t)mem_name32.m_namelen; 1033 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid; 1034 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen; 1035 } 1036 #endif /* _SYSCALL32 */ 1037 1038 return (0); 1039 } 1040 1041 /* 1042 * Read a mem_page_t from user-space and store it in the mem_page_t 1043 * pointed to by the mpage argument. 1044 */ 1045 static int 1046 mm_read_mem_page(intptr_t data, mem_page_t *mpage) 1047 { 1048 if (get_udatamodel() == DATAMODEL_NATIVE) { 1049 if (copyin((void *)data, mpage, sizeof (mem_page_t)) != 0) 1050 return (EFAULT); 1051 } 1052 #ifdef _SYSCALL32 1053 else { 1054 mem_page32_t mpage32; 1055 1056 if (copyin((void *)data, &mpage32, sizeof (mem_page32_t)) != 0) 1057 return (EFAULT); 1058 1059 mpage->m_fmri = (caddr_t)(uintptr_t)mpage32.m_fmri; 1060 mpage->m_fmrisz = mpage32.m_fmrisz; 1061 } 1062 #endif /* _SYSCALL32 */ 1063 1064 return (0); 1065 } 1066 1067 /* 1068 * Expand an FMRI from a mem_page_t. 1069 */ 1070 static int 1071 mm_get_mem_fmri(mem_page_t *mpage, nvlist_t **nvl) 1072 { 1073 char *buf; 1074 int err; 1075 1076 if (mpage->m_fmri == NULL || mpage->m_fmrisz > MEM_FMRI_MAX_BUFSIZE) 1077 return (EINVAL); 1078 1079 buf = kmem_alloc(mpage->m_fmrisz, KM_SLEEP); 1080 if (copyin(mpage->m_fmri, buf, mpage->m_fmrisz) != 0) { 1081 kmem_free(buf, mpage->m_fmrisz); 1082 return (EFAULT); 1083 } 1084 1085 err = nvlist_unpack(buf, mpage->m_fmrisz, nvl, KM_SLEEP); 1086 kmem_free(buf, mpage->m_fmrisz); 1087 1088 return (err); 1089 } 1090 1091 static int 1092 mm_get_paddr(nvlist_t *nvl, uint64_t *paddr) 1093 { 1094 uint8_t version; 1095 uint64_t pa; 1096 char *scheme; 1097 #ifdef __sparc 1098 uint64_t offset; 1099 char *unum; 1100 char **serids; 1101 uint_t nserids; 1102 int err; 1103 #endif 1104 1105 /* Verify FMRI scheme name and version number */ 1106 if ((nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme) != 0) || 1107 (strcmp(scheme, FM_FMRI_SCHEME_MEM) != 0) || 1108 (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0) || 1109 version > FM_MEM_SCHEME_VERSION) { 1110 return (EINVAL); 1111 } 1112 1113 /* 1114 * There are two ways a physical address can be obtained from a mem 1115 * scheme FMRI. One way is to use the "offset" and "serial" 1116 * members, if they are present, together with the "unum" member to 1117 * calculate a physical address. This is the preferred way since 1118 * it is independent of possible changes to the programming of 1119 * underlying hardware registers that may change the physical address. 1120 * If the "offset" member is not present, then the address is 1121 * retrieved from the "physaddr" member. 1122 */ 1123 #ifdef __sparc 1124 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &offset) != 0) { 1125 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) != 1126 0) { 1127 return (EINVAL); 1128 } 1129 } else if (nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0 || 1130 nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &serids, 1131 &nserids) != 0) { 1132 return (EINVAL); 1133 } else { 1134 if ((err = cpu_get_mem_addr(unum, serids[0], offset, &pa)) != 0) 1135 return (err); 1136 } 1137 #else /* __i386, __amd64 */ 1138 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) != 0) 1139 return (EINVAL); 1140 #endif /* __sparc */ 1141 1142 *paddr = pa; 1143 return (0); 1144 } 1145