1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Memory special file 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/user.h> 33 #include <sys/buf.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/vm.h> 37 #include <sys/uio.h> 38 #include <sys/mman.h> 39 #include <sys/kmem.h> 40 #include <vm/seg.h> 41 #include <vm/page.h> 42 #include <sys/stat.h> 43 #include <sys/vmem.h> 44 #include <sys/memlist.h> 45 #include <sys/bootconf.h> 46 47 #include <vm/seg_vn.h> 48 #include <vm/seg_dev.h> 49 #include <vm/seg_kmem.h> 50 #include <vm/seg_kp.h> 51 #include <vm/seg_kpm.h> 52 #include <vm/hat.h> 53 54 #include <sys/conf.h> 55 #include <sys/mem.h> 56 #include <sys/types.h> 57 #include <sys/conf.h> 58 #include <sys/param.h> 59 #include <sys/systm.h> 60 #include <sys/errno.h> 61 #include <sys/modctl.h> 62 #include <sys/memlist.h> 63 #include <sys/ddi.h> 64 #include <sys/sunddi.h> 65 #include <sys/debug.h> 66 #include <sys/fm/protocol.h> 67 68 #if defined(__sparc) 69 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *); 70 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *, 71 uint64_t *, int *, int *, int *); 72 extern size_t cpu_get_name_bufsize(void); 73 extern int cpu_get_mem_sid(char *, char *, int, int *); 74 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *); 75 #elif defined(__x86) 76 #include <sys/cpu_module.h> 77 #endif /* __sparc */ 78 79 /* 80 * Turn a byte length into a pagecount. The DDI btop takes a 81 * 32-bit size on 32-bit machines, this handles 64-bit sizes for 82 * large physical-memory 32-bit machines. 83 */ 84 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift)) 85 86 static kmutex_t mm_lock; 87 static caddr_t mm_map; 88 89 static dev_info_t *mm_dip; /* private copy of devinfo pointer */ 90 91 static int mm_kmem_io_access; 92 93 static int mm_kstat_update(kstat_t *ksp, int rw); 94 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 95 96 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name); 97 98 /*ARGSUSED1*/ 99 static int 100 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 101 { 102 int i; 103 struct mem_minor { 104 char *name; 105 minor_t minor; 106 int privonly; 107 const char *rdpriv; 108 const char *wrpriv; 109 mode_t priv_mode; 110 } mm[] = { 111 { "mem", M_MEM, 0, NULL, "all", 0640 }, 112 { "kmem", M_KMEM, 0, NULL, "all", 0640 }, 113 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 }, 114 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 }, 115 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 }, 116 }; 117 kstat_t *ksp; 118 119 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL); 120 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 121 122 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) { 123 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR, 124 mm[i].minor, DDI_PSEUDO, mm[i].privonly, 125 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) == 126 DDI_FAILURE) { 127 ddi_remove_minor_node(devi, NULL); 128 return (DDI_FAILURE); 129 } 130 } 131 132 mm_dip = devi; 133 134 ksp = kstat_create("mm", 0, "phys_installed", "misc", 135 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 136 if (ksp != NULL) { 137 ksp->ks_update = mm_kstat_update; 138 ksp->ks_snapshot = mm_kstat_snapshot; 139 ksp->ks_lock = &mm_lock; /* XXX - not really needed */ 140 kstat_install(ksp); 141 } 142 143 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 144 "kmem_io_access", 0); 145 146 return (DDI_SUCCESS); 147 } 148 149 /*ARGSUSED*/ 150 static int 151 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 152 { 153 register int error; 154 155 switch (infocmd) { 156 case DDI_INFO_DEVT2DEVINFO: 157 *result = (void *)mm_dip; 158 error = DDI_SUCCESS; 159 break; 160 case DDI_INFO_DEVT2INSTANCE: 161 *result = (void *)0; 162 error = DDI_SUCCESS; 163 break; 164 default: 165 error = DDI_FAILURE; 166 } 167 return (error); 168 } 169 170 /*ARGSUSED1*/ 171 static int 172 mmopen(dev_t *devp, int flag, int typ, struct cred *cred) 173 { 174 switch (getminor(*devp)) { 175 case M_NULL: 176 case M_ZERO: 177 case M_MEM: 178 case M_KMEM: 179 case M_ALLKMEM: 180 /* standard devices */ 181 break; 182 183 default: 184 /* Unsupported or unknown type */ 185 return (EINVAL); 186 } 187 /* must be character device */ 188 if (typ != OTYP_CHR) 189 return (EINVAL); 190 return (0); 191 } 192 193 struct pollhead mm_pollhd; 194 195 /*ARGSUSED*/ 196 static int 197 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp, 198 struct pollhead **phpp) 199 { 200 switch (getminor(dev)) { 201 case M_NULL: 202 case M_ZERO: 203 case M_MEM: 204 case M_KMEM: 205 case M_ALLKMEM: 206 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM | 207 POLLWRNORM | POLLRDBAND | POLLWRBAND); 208 /* 209 * A non NULL pollhead pointer should be returned in case 210 * user polls for 0 events. 211 */ 212 *phpp = !anyyet && !*reventsp ? 213 &mm_pollhd : (struct pollhead *)NULL; 214 return (0); 215 default: 216 /* no other devices currently support polling */ 217 return (ENXIO); 218 } 219 } 220 221 static int 222 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 223 char *name, caddr_t valuep, int *lengthp) 224 { 225 /* 226 * implement zero size to reduce overhead (avoid two failing 227 * property lookups per stat). 228 */ 229 return (ddi_prop_op_size(dev, dip, prop_op, 230 flags, name, valuep, lengthp, 0)); 231 } 232 233 extern void mach_sync_icache_pa(caddr_t, size_t); 234 #pragma weak mach_sync_icache_pa 235 236 static int 237 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio, 238 page_t *pp) 239 { 240 int error = 0; 241 int devload = 0; 242 int is_memory = pf_is_memory(pfn); 243 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff), 244 (size_t)uio->uio_iov->iov_len); 245 caddr_t va = NULL; 246 247 mutex_enter(&mm_lock); 248 249 if (is_memory && kpm_enable) { 250 if (pp) 251 va = hat_kpm_mapin(pp, NULL); 252 else 253 va = hat_kpm_mapin_pfn(pfn); 254 } 255 256 if (va == NULL) { 257 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn, 258 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE), 259 HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK); 260 va = mm_map; 261 devload = 1; 262 } 263 264 if (!is_memory) { 265 if (allowio) { 266 size_t c = uio->uio_iov->iov_len; 267 268 if (ddi_peekpokeio(NULL, uio, rw, 269 (caddr_t)(uintptr_t)uio->uio_loffset, c, 270 sizeof (int32_t)) != DDI_SUCCESS) 271 error = EFAULT; 272 } else 273 error = EIO; 274 } else { 275 error = uiomove(va + pageoff, nbytes, rw, uio); 276 277 /* 278 * In case this has changed executable code, 279 * non-coherent I-caches must be flushed. 280 */ 281 if (rw != UIO_READ && &mach_sync_icache_pa != NULL) { 282 mach_sync_icache_pa((caddr_t)ptob(pfn), PAGESIZE); 283 } 284 } 285 286 if (devload) 287 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK); 288 else if (pp) 289 hat_kpm_mapout(pp, NULL, va); 290 else 291 hat_kpm_mapout_pfn(pfn); 292 293 mutex_exit(&mm_lock); 294 return (error); 295 } 296 297 static int 298 mmpagelock(struct as *as, caddr_t va) 299 { 300 struct seg *seg; 301 int i; 302 303 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 304 seg = as_segat(as, va); 305 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0; 306 AS_LOCK_EXIT(as, &as->a_lock); 307 308 return (i); 309 } 310 311 #ifdef __sparc 312 313 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva) 314 315 #else /* __i386, __amd64 */ 316 317 #define NEED_LOCK_KVADDR(va) 0 318 319 #endif /* __sparc */ 320 321 /*ARGSUSED3*/ 322 static int 323 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) 324 { 325 pfn_t v; 326 struct iovec *iov; 327 int error = 0; 328 size_t c; 329 ssize_t oresid = uio->uio_resid; 330 minor_t minor = getminor(dev); 331 332 while (uio->uio_resid > 0 && error == 0) { 333 iov = uio->uio_iov; 334 if (iov->iov_len == 0) { 335 uio->uio_iov++; 336 uio->uio_iovcnt--; 337 if (uio->uio_iovcnt < 0) 338 panic("mmrw"); 339 continue; 340 } 341 switch (minor) { 342 343 case M_MEM: 344 memlist_read_lock(); 345 if (!address_in_memlist(phys_install, 346 (uint64_t)uio->uio_loffset, 1)) { 347 memlist_read_unlock(); 348 error = EFAULT; 349 break; 350 } 351 memlist_read_unlock(); 352 353 v = BTOP((u_offset_t)uio->uio_loffset); 354 error = mmio(uio, rw, v, 355 uio->uio_loffset & PAGEOFFSET, 0, NULL); 356 break; 357 358 case M_KMEM: 359 case M_ALLKMEM: 360 { 361 page_t **ppp = NULL; 362 caddr_t vaddr = (caddr_t)uio->uio_offset; 363 int try_lock = NEED_LOCK_KVADDR(vaddr); 364 int locked = 0; 365 366 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP) 367 break; 368 369 /* 370 * If vaddr does not map a valid page, as_pagelock() 371 * will return failure. Hence we can't check the 372 * return value and return EFAULT here as we'd like. 373 * seg_kp and seg_kpm do not properly support 374 * as_pagelock() for this context so we avoid it 375 * using the try_lock set check above. Some day when 376 * the kernel page locking gets redesigned all this 377 * muck can be cleaned up. 378 */ 379 if (try_lock) 380 locked = (as_pagelock(&kas, &ppp, vaddr, 381 PAGESIZE, S_WRITE) == 0); 382 383 v = hat_getpfnum(kas.a_hat, 384 (caddr_t)(uintptr_t)uio->uio_loffset); 385 if (v == PFN_INVALID) { 386 if (locked) 387 as_pageunlock(&kas, ppp, vaddr, 388 PAGESIZE, S_WRITE); 389 error = EFAULT; 390 break; 391 } 392 393 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 394 minor == M_ALLKMEM || mm_kmem_io_access, 395 (locked && ppp) ? *ppp : NULL); 396 if (locked) 397 as_pageunlock(&kas, ppp, vaddr, PAGESIZE, 398 S_WRITE); 399 } 400 401 break; 402 403 case M_ZERO: 404 if (rw == UIO_READ) { 405 label_t ljb; 406 407 if (on_fault(&ljb)) { 408 no_fault(); 409 error = EFAULT; 410 break; 411 } 412 uzero(iov->iov_base, iov->iov_len); 413 no_fault(); 414 uio->uio_resid -= iov->iov_len; 415 uio->uio_loffset += iov->iov_len; 416 break; 417 } 418 /* else it's a write, fall through to NULL case */ 419 /*FALLTHROUGH*/ 420 421 case M_NULL: 422 if (rw == UIO_READ) 423 return (0); 424 c = iov->iov_len; 425 iov->iov_base += c; 426 iov->iov_len -= c; 427 uio->uio_loffset += c; 428 uio->uio_resid -= c; 429 break; 430 431 } 432 } 433 return (uio->uio_resid == oresid ? error : 0); 434 } 435 436 static int 437 mmread(dev_t dev, struct uio *uio, cred_t *cred) 438 { 439 return (mmrw(dev, uio, UIO_READ, cred)); 440 } 441 442 static int 443 mmwrite(dev_t dev, struct uio *uio, cred_t *cred) 444 { 445 return (mmrw(dev, uio, UIO_WRITE, cred)); 446 } 447 448 /* 449 * Private ioctl for libkvm to support kvm_physaddr(). 450 * Given an address space and a VA, compute the PA. 451 */ 452 static int 453 mmioctl_vtop(intptr_t data) 454 { 455 #ifdef _SYSCALL32 456 mem_vtop32_t vtop32; 457 #endif 458 mem_vtop_t mem_vtop; 459 proc_t *p; 460 pfn_t pfn = (pfn_t)PFN_INVALID; 461 pid_t pid = 0; 462 struct as *as; 463 struct seg *seg; 464 465 if (get_udatamodel() == DATAMODEL_NATIVE) { 466 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t))) 467 return (EFAULT); 468 } 469 #ifdef _SYSCALL32 470 else { 471 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t))) 472 return (EFAULT); 473 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as; 474 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va; 475 476 if (mem_vtop.m_as != NULL) 477 return (EINVAL); 478 } 479 #endif 480 481 if (mem_vtop.m_as == &kas) { 482 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va); 483 } else { 484 if (mem_vtop.m_as == NULL) { 485 /* 486 * Assume the calling process's address space if the 487 * caller didn't specify one. 488 */ 489 p = curthread->t_procp; 490 if (p == NULL) 491 return (EIO); 492 mem_vtop.m_as = p->p_as; 493 } 494 495 mutex_enter(&pidlock); 496 for (p = practive; p != NULL; p = p->p_next) { 497 if (p->p_as == mem_vtop.m_as) { 498 pid = p->p_pid; 499 break; 500 } 501 } 502 mutex_exit(&pidlock); 503 if (p == NULL) 504 return (EIO); 505 p = sprlock(pid); 506 if (p == NULL) 507 return (EIO); 508 as = p->p_as; 509 if (as == mem_vtop.m_as) { 510 mutex_exit(&p->p_lock); 511 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 512 for (seg = AS_SEGFIRST(as); seg != NULL; 513 seg = AS_SEGNEXT(as, seg)) 514 if ((uintptr_t)mem_vtop.m_va - 515 (uintptr_t)seg->s_base < seg->s_size) 516 break; 517 if (seg != NULL) 518 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va); 519 AS_LOCK_EXIT(as, &as->a_lock); 520 mutex_enter(&p->p_lock); 521 } 522 sprunlock(p); 523 } 524 mem_vtop.m_pfn = pfn; 525 if (pfn == PFN_INVALID) 526 return (EIO); 527 528 if (get_udatamodel() == DATAMODEL_NATIVE) { 529 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t))) 530 return (EFAULT); 531 } 532 #ifdef _SYSCALL32 533 else { 534 vtop32.m_pfn = mem_vtop.m_pfn; 535 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t))) 536 return (EFAULT); 537 } 538 #endif 539 540 return (0); 541 } 542 543 /* 544 * Given a PA, execute the given page retire command on it. 545 */ 546 static int 547 mmioctl_page_retire(int cmd, intptr_t data) 548 { 549 extern int page_retire_test(void); 550 uint64_t pa; 551 552 if (copyin((void *)data, &pa, sizeof (uint64_t))) { 553 return (EFAULT); 554 } 555 556 switch (cmd) { 557 case MEM_PAGE_ISRETIRED: 558 return (page_retire_check(pa, NULL)); 559 560 case MEM_PAGE_UNRETIRE: 561 return (page_unretire(pa)); 562 563 case MEM_PAGE_RETIRE: 564 return (page_retire(pa, PR_FMA)); 565 566 case MEM_PAGE_RETIRE_MCE: 567 return (page_retire(pa, PR_MCE)); 568 569 case MEM_PAGE_RETIRE_UE: 570 return (page_retire(pa, PR_UE)); 571 572 case MEM_PAGE_GETERRORS: 573 { 574 uint64_t page_errors; 575 int rc = page_retire_check(pa, &page_errors); 576 if (copyout(&page_errors, (void *)data, 577 sizeof (uint64_t))) { 578 return (EFAULT); 579 } 580 return (rc); 581 } 582 583 case MEM_PAGE_RETIRE_TEST: 584 return (page_retire_test()); 585 586 } 587 588 return (EINVAL); 589 } 590 591 #ifdef __sparc 592 /* 593 * Given a syndrome, syndrome type, and address return the 594 * associated memory name in the provided data buffer. 595 */ 596 static int 597 mmioctl_get_mem_name(intptr_t data) 598 { 599 mem_name_t mem_name; 600 void *buf; 601 size_t bufsize; 602 int len, err; 603 604 if ((bufsize = cpu_get_name_bufsize()) == 0) 605 return (ENOTSUP); 606 607 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 608 return (err); 609 610 buf = kmem_alloc(bufsize, KM_SLEEP); 611 612 /* 613 * Call into cpu specific code to do the lookup. 614 */ 615 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type, 616 mem_name.m_addr, buf, bufsize, &len)) != 0) { 617 kmem_free(buf, bufsize); 618 return (err); 619 } 620 621 if (len >= mem_name.m_namelen) { 622 kmem_free(buf, bufsize); 623 return (ENOSPC); 624 } 625 626 if (copyoutstr(buf, (char *)mem_name.m_name, 627 mem_name.m_namelen, NULL) != 0) { 628 kmem_free(buf, bufsize); 629 return (EFAULT); 630 } 631 632 kmem_free(buf, bufsize); 633 return (0); 634 } 635 636 /* 637 * Given a syndrome and address return information about the associated memory. 638 */ 639 static int 640 mmioctl_get_mem_info(intptr_t data) 641 { 642 mem_info_t mem_info; 643 int err; 644 645 if (copyin((void *)data, &mem_info, sizeof (mem_info_t))) 646 return (EFAULT); 647 648 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr, 649 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size, 650 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0) 651 return (err); 652 653 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0) 654 return (EFAULT); 655 656 return (0); 657 } 658 659 /* 660 * Given a memory name, return its associated serial id 661 */ 662 static int 663 mmioctl_get_mem_sid(intptr_t data) 664 { 665 mem_name_t mem_name; 666 void *buf; 667 void *name; 668 size_t name_len; 669 size_t bufsize; 670 int len, err; 671 672 if ((bufsize = cpu_get_name_bufsize()) == 0) 673 return (ENOTSUP); 674 675 if ((err = mm_read_mem_name(data, &mem_name)) < 0) 676 return (err); 677 678 buf = kmem_alloc(bufsize, KM_SLEEP); 679 680 if (mem_name.m_namelen > 1024) 681 mem_name.m_namelen = 1024; /* cap at 1024 bytes */ 682 683 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP); 684 685 if ((err = copyinstr((char *)mem_name.m_name, (char *)name, 686 mem_name.m_namelen, &name_len)) != 0) { 687 kmem_free(buf, bufsize); 688 kmem_free(name, mem_name.m_namelen); 689 return (err); 690 } 691 692 /* 693 * Call into cpu specific code to do the lookup. 694 */ 695 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) { 696 kmem_free(buf, bufsize); 697 kmem_free(name, mem_name.m_namelen); 698 return (err); 699 } 700 701 if (len > mem_name.m_sidlen) { 702 kmem_free(buf, bufsize); 703 kmem_free(name, mem_name.m_namelen); 704 return (ENAMETOOLONG); 705 } 706 707 if (copyoutstr(buf, (char *)mem_name.m_sid, 708 mem_name.m_sidlen, NULL) != 0) { 709 kmem_free(buf, bufsize); 710 kmem_free(name, mem_name.m_namelen); 711 return (EFAULT); 712 } 713 714 kmem_free(buf, bufsize); 715 kmem_free(name, mem_name.m_namelen); 716 return (0); 717 } 718 #endif /* __sparc */ 719 720 /* 721 * Private ioctls for 722 * libkvm to support kvm_physaddr(). 723 * FMA support for page_retire() and memory attribute information. 724 */ 725 /*ARGSUSED*/ 726 static int 727 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp) 728 { 729 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) || 730 (cmd != MEM_VTOP && getminor(dev) != M_MEM)) 731 return (ENXIO); 732 733 switch (cmd) { 734 case MEM_VTOP: 735 return (mmioctl_vtop(data)); 736 737 case MEM_PAGE_RETIRE: 738 case MEM_PAGE_ISRETIRED: 739 case MEM_PAGE_UNRETIRE: 740 case MEM_PAGE_RETIRE_MCE: 741 case MEM_PAGE_RETIRE_UE: 742 case MEM_PAGE_GETERRORS: 743 case MEM_PAGE_RETIRE_TEST: 744 return (mmioctl_page_retire(cmd, data)); 745 746 #ifdef __sparc 747 case MEM_NAME: 748 return (mmioctl_get_mem_name(data)); 749 750 case MEM_INFO: 751 return (mmioctl_get_mem_info(data)); 752 753 case MEM_SID: 754 return (mmioctl_get_mem_sid(data)); 755 #else 756 case MEM_NAME: 757 case MEM_INFO: 758 case MEM_SID: 759 return (ENOTSUP); 760 #endif /* __sparc */ 761 } 762 return (ENXIO); 763 } 764 765 /*ARGSUSED2*/ 766 static int 767 mmmmap(dev_t dev, off_t off, int prot) 768 { 769 pfn_t pf; 770 struct memlist *pmem; 771 minor_t minor = getminor(dev); 772 773 switch (minor) { 774 case M_MEM: 775 pf = btop(off); 776 memlist_read_lock(); 777 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 778 if (pf >= BTOP(pmem->address) && 779 pf < BTOP(pmem->address + pmem->size)) { 780 memlist_read_unlock(); 781 return (impl_obmem_pfnum(pf)); 782 } 783 } 784 memlist_read_unlock(); 785 break; 786 787 case M_KMEM: 788 case M_ALLKMEM: 789 /* no longer supported with KPR */ 790 return (-1); 791 792 case M_ZERO: 793 /* 794 * We shouldn't be mmap'ing to /dev/zero here as 795 * mmsegmap() should have already converted 796 * a mapping request for this device to a mapping 797 * using seg_vn for anonymous memory. 798 */ 799 break; 800 801 } 802 return (-1); 803 } 804 805 /* 806 * This function is called when a memory device is mmap'ed. 807 * Set up the mapping to the correct device driver. 808 */ 809 static int 810 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 811 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 812 { 813 struct segvn_crargs vn_a; 814 struct segdev_crargs dev_a; 815 int error; 816 minor_t minor; 817 off_t i; 818 819 minor = getminor(dev); 820 821 as_rangelock(as); 822 /* 823 * No need to worry about vac alignment on /dev/zero 824 * since this is a "clone" object that doesn't yet exist. 825 */ 826 error = choose_addr(as, addrp, len, off, 827 (minor == M_MEM) || (minor == M_KMEM), flags); 828 if (error != 0) { 829 as_rangeunlock(as); 830 return (error); 831 } 832 833 switch (minor) { 834 case M_MEM: 835 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */ 836 if ((flags & MAP_TYPE) != MAP_SHARED) { 837 as_rangeunlock(as); 838 return (EINVAL); 839 } 840 841 /* 842 * Check to ensure that the entire range is 843 * legal and we are not trying to map in 844 * more than the device will let us. 845 */ 846 for (i = 0; i < len; i += PAGESIZE) { 847 if (mmmmap(dev, off + i, maxprot) == -1) { 848 as_rangeunlock(as); 849 return (ENXIO); 850 } 851 } 852 853 /* 854 * Use seg_dev segment driver for /dev/mem mapping. 855 */ 856 dev_a.mapfunc = mmmmap; 857 dev_a.dev = dev; 858 dev_a.offset = off; 859 dev_a.type = (flags & MAP_TYPE); 860 dev_a.prot = (uchar_t)prot; 861 dev_a.maxprot = (uchar_t)maxprot; 862 dev_a.hat_attr = 0; 863 864 /* 865 * Make /dev/mem mappings non-consistent since we can't 866 * alias pages that don't have page structs behind them, 867 * such as kernel stack pages. If someone mmap()s a kernel 868 * stack page and if we give him a tte with cv, a line from 869 * that page can get into both pages of the spitfire d$. 870 * But snoop from another processor will only invalidate 871 * the first page. This later caused kernel (xc_attention) 872 * to go into an infinite loop at pil 13 and no interrupts 873 * could come in. See 1203630. 874 * 875 */ 876 dev_a.hat_flags = HAT_LOAD_NOCONSIST; 877 dev_a.devmap_data = NULL; 878 879 error = as_map(as, *addrp, len, segdev_create, &dev_a); 880 break; 881 882 case M_ZERO: 883 /* 884 * Use seg_vn segment driver for /dev/zero mapping. 885 * Passing in a NULL amp gives us the "cloning" effect. 886 */ 887 vn_a.vp = NULL; 888 vn_a.offset = 0; 889 vn_a.type = (flags & MAP_TYPE); 890 vn_a.prot = prot; 891 vn_a.maxprot = maxprot; 892 vn_a.flags = flags & ~MAP_TYPE; 893 vn_a.cred = cred; 894 vn_a.amp = NULL; 895 vn_a.szc = 0; 896 vn_a.lgrp_mem_policy_flags = 0; 897 error = as_map(as, *addrp, len, segvn_create, &vn_a); 898 break; 899 900 case M_KMEM: 901 case M_ALLKMEM: 902 /* No longer supported with KPR. */ 903 error = ENXIO; 904 break; 905 906 case M_NULL: 907 /* 908 * Use seg_dev segment driver for /dev/null mapping. 909 */ 910 dev_a.mapfunc = mmmmap; 911 dev_a.dev = dev; 912 dev_a.offset = off; 913 dev_a.type = 0; /* neither PRIVATE nor SHARED */ 914 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 915 dev_a.hat_attr = 0; 916 dev_a.hat_flags = 0; 917 error = as_map(as, *addrp, len, segdev_create, &dev_a); 918 break; 919 920 default: 921 error = ENXIO; 922 } 923 924 as_rangeunlock(as); 925 return (error); 926 } 927 928 static struct cb_ops mm_cb_ops = { 929 mmopen, /* open */ 930 nulldev, /* close */ 931 nodev, /* strategy */ 932 nodev, /* print */ 933 nodev, /* dump */ 934 mmread, /* read */ 935 mmwrite, /* write */ 936 mmioctl, /* ioctl */ 937 nodev, /* devmap */ 938 mmmmap, /* mmap */ 939 mmsegmap, /* segmap */ 940 mmchpoll, /* poll */ 941 mmpropop, /* prop_op */ 942 0, /* streamtab */ 943 D_NEW | D_MP | D_64BIT | D_U64BIT 944 }; 945 946 static struct dev_ops mm_ops = { 947 DEVO_REV, /* devo_rev, */ 948 0, /* refcnt */ 949 mm_info, /* get_dev_info */ 950 nulldev, /* identify */ 951 nulldev, /* probe */ 952 mm_attach, /* attach */ 953 nodev, /* detach */ 954 nodev, /* reset */ 955 &mm_cb_ops, /* driver operations */ 956 (struct bus_ops *)0, /* bus operations */ 957 NULL, /* power */ 958 ddi_quiesce_not_needed, /* quiesce */ 959 }; 960 961 static struct modldrv modldrv = { 962 &mod_driverops, "memory driver", &mm_ops, 963 }; 964 965 static struct modlinkage modlinkage = { 966 MODREV_1, &modldrv, NULL 967 }; 968 969 int 970 _init(void) 971 { 972 return (mod_install(&modlinkage)); 973 } 974 975 int 976 _info(struct modinfo *modinfop) 977 { 978 return (mod_info(&modlinkage, modinfop)); 979 } 980 981 int 982 _fini(void) 983 { 984 return (mod_remove(&modlinkage)); 985 } 986 987 static int 988 mm_kstat_update(kstat_t *ksp, int rw) 989 { 990 struct memlist *pmem; 991 uint_t count; 992 993 if (rw == KSTAT_WRITE) 994 return (EACCES); 995 996 count = 0; 997 memlist_read_lock(); 998 for (pmem = phys_install; pmem != NULL; pmem = pmem->next) { 999 count++; 1000 } 1001 memlist_read_unlock(); 1002 1003 ksp->ks_ndata = count; 1004 ksp->ks_data_size = count * 2 * sizeof (uint64_t); 1005 1006 return (0); 1007 } 1008 1009 static int 1010 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 1011 { 1012 struct memlist *pmem; 1013 struct memunit { 1014 uint64_t address; 1015 uint64_t size; 1016 } *kspmem; 1017 1018 if (rw == KSTAT_WRITE) 1019 return (EACCES); 1020 1021 ksp->ks_snaptime = gethrtime(); 1022 1023 kspmem = (struct memunit *)buf; 1024 memlist_read_lock(); 1025 for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) { 1026 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 1027 break; 1028 kspmem->address = pmem->address; 1029 kspmem->size = pmem->size; 1030 } 1031 memlist_read_unlock(); 1032 1033 return (0); 1034 } 1035 1036 /* 1037 * Read a mem_name_t from user-space and store it in the mem_name_t 1038 * pointed to by the mem_name argument. 1039 */ 1040 static int 1041 mm_read_mem_name(intptr_t data, mem_name_t *mem_name) 1042 { 1043 if (get_udatamodel() == DATAMODEL_NATIVE) { 1044 if (copyin((void *)data, mem_name, sizeof (mem_name_t))) 1045 return (EFAULT); 1046 } 1047 #ifdef _SYSCALL32 1048 else { 1049 mem_name32_t mem_name32; 1050 1051 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t))) 1052 return (EFAULT); 1053 mem_name->m_addr = mem_name32.m_addr; 1054 mem_name->m_synd = mem_name32.m_synd; 1055 mem_name->m_type[0] = mem_name32.m_type[0]; 1056 mem_name->m_type[1] = mem_name32.m_type[1]; 1057 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name; 1058 mem_name->m_namelen = (size_t)mem_name32.m_namelen; 1059 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid; 1060 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen; 1061 } 1062 #endif /* _SYSCALL32 */ 1063 1064 return (0); 1065 } 1066