1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 #include <sys/types.h> 28 #include <sys/modctl.h> 29 #include <sys/conf.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/devops.h> 33 #include <sys/stat.h> 34 #include <sys/file.h> 35 #include <sys/cred.h> 36 #include <sys/policy.h> 37 #include <sys/errno.h> 38 #include <vm/seg_dev.h> 39 #include <vm/seg_vn.h> 40 #include <vm/page.h> 41 #include <sys/fs/swapnode.h> 42 #include <sys/sysmacros.h> 43 #include <sys/fcntl.h> 44 #include <sys/vmsystm.h> 45 #include <sys/physmem.h> 46 #include <sys/vfs_opreg.h> 47 48 static dev_info_t *physmem_dip = NULL; 49 50 /* 51 * Linked list element hanging off physmem_proc_hash below, which holds all 52 * the information for a given segment which has been setup for this process. 53 * This is a simple linked list as we are assuming that for a given process 54 * the setup ioctl will only be called a handful of times. If this assumption 55 * changes in the future, a quicker to traverse data structure should be used. 56 */ 57 struct physmem_hash { 58 struct physmem_hash *ph_next; 59 uint64_t ph_base_pa; 60 caddr_t ph_base_va; 61 size_t ph_seg_len; 62 struct vnode *ph_vnode; 63 }; 64 65 /* 66 * Hash of all of the processes which have setup mappings with the driver with 67 * pointers to per process data. 68 */ 69 struct physmem_proc_hash { 70 struct proc *pph_proc; 71 struct physmem_hash *pph_hash; 72 struct physmem_proc_hash *pph_next; 73 }; 74 75 76 /* Needs to be a power of two for simple hash algorithm */ 77 #define PPH_SIZE 8 78 struct physmem_proc_hash *pph[PPH_SIZE]; 79 80 /* 81 * Lock which protects the pph hash above. To add an element (either a new 82 * process or a new segment) the WRITE lock must be held. To traverse the 83 * list, only a READ lock is needed. 84 */ 85 krwlock_t pph_rwlock; 86 87 #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1))) 88 89 /* 90 * Need to keep a reference count of how many processes have the driver 91 * open to prevent it from disappearing. 92 */ 93 uint64_t physmem_vnodecnt; 94 kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */ 95 96 static int physmem_getpage(struct vnode *vp, offset_t off, size_t len, 97 uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 98 enum seg_rw rw, struct cred *cr, caller_context_t *ct); 99 100 static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 101 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 102 struct cred *cred, caller_context_t *ct); 103 104 static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 105 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 106 struct cred *cred, caller_context_t *ct); 107 108 static void physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct); 109 110 const fs_operation_def_t physmem_vnodeops_template[] = { 111 VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage }, 112 VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap }, 113 VOPNAME_DELMAP, { .vop_delmap = physmem_delmap }, 114 VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive }, 115 NULL, NULL 116 }; 117 118 vnodeops_t *physmem_vnodeops = NULL; 119 120 /* 121 * Removes the current process from the hash if the process has no more 122 * physmem segments active. 123 */ 124 void 125 physmem_remove_hash_proc() 126 { 127 int index; 128 struct physmem_proc_hash **walker; 129 struct physmem_proc_hash *victim = NULL; 130 131 index = PHYSMEM_HASH(curproc); 132 rw_enter(&pph_rwlock, RW_WRITER); 133 walker = &pph[index]; 134 while (*walker != NULL) { 135 if ((*walker)->pph_proc == curproc && 136 (*walker)->pph_hash == NULL) { 137 victim = *walker; 138 *walker = victim->pph_next; 139 break; 140 } 141 walker = &((*walker)->pph_next); 142 } 143 rw_exit(&pph_rwlock); 144 if (victim != NULL) 145 kmem_free(victim, sizeof (struct physmem_proc_hash)); 146 } 147 148 /* 149 * Add a new entry to the hash for the given process to cache the 150 * address ranges that it is working on. If this is the first hash 151 * item to be added for this process, we will create the head pointer 152 * for this process. 153 * Returns 0 on success, ERANGE when the physical address is already in the 154 * hash. 155 */ 156 int 157 physmem_add_hash(struct physmem_hash *php) 158 { 159 int index; 160 struct physmem_proc_hash *iterator; 161 struct physmem_proc_hash *newp = NULL; 162 struct physmem_hash *temp; 163 int ret = 0; 164 165 index = PHYSMEM_HASH(curproc); 166 167 insert: 168 rw_enter(&pph_rwlock, RW_WRITER); 169 iterator = pph[index]; 170 while (iterator != NULL) { 171 if (iterator->pph_proc == curproc) { 172 /* 173 * check to make sure a single process does not try to 174 * map the same region twice. 175 */ 176 for (temp = iterator->pph_hash; temp != NULL; 177 temp = temp->ph_next) { 178 if ((php->ph_base_pa >= temp->ph_base_pa && 179 php->ph_base_pa < temp->ph_base_pa + 180 temp->ph_seg_len) || 181 (temp->ph_base_pa >= php->ph_base_pa && 182 temp->ph_base_pa < php->ph_base_pa + 183 php->ph_seg_len)) { 184 ret = ERANGE; 185 break; 186 } 187 } 188 if (ret == 0) { 189 php->ph_next = iterator->pph_hash; 190 iterator->pph_hash = php; 191 } 192 rw_exit(&pph_rwlock); 193 /* Need to check for two threads in sync */ 194 if (newp != NULL) 195 kmem_free(newp, sizeof (*newp)); 196 return (ret); 197 } 198 iterator = iterator->pph_next; 199 } 200 201 if (newp != NULL) { 202 newp->pph_proc = curproc; 203 newp->pph_next = pph[index]; 204 newp->pph_hash = php; 205 php->ph_next = NULL; 206 pph[index] = newp; 207 rw_exit(&pph_rwlock); 208 return (0); 209 } 210 211 rw_exit(&pph_rwlock); 212 /* Dropped the lock so we could use KM_SLEEP */ 213 newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP); 214 goto insert; 215 } 216 217 /* 218 * Will return the pointer to the physmem_hash struct if the setup routine 219 * has previously been called for this memory. 220 * Returns NULL on failure. 221 */ 222 struct physmem_hash * 223 physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp) 224 { 225 int index; 226 struct physmem_proc_hash *proc_hp; 227 struct physmem_hash *php; 228 229 ASSERT(rw_lock_held(&pph_rwlock)); 230 231 index = PHYSMEM_HASH(procp); 232 proc_hp = pph[index]; 233 while (proc_hp != NULL) { 234 if (proc_hp->pph_proc == procp) { 235 php = proc_hp->pph_hash; 236 while (php != NULL) { 237 if ((req_paddr >= php->ph_base_pa) && 238 (req_paddr + len <= 239 php->ph_base_pa + php->ph_seg_len)) { 240 return (php); 241 } 242 php = php->ph_next; 243 } 244 } 245 proc_hp = proc_hp->pph_next; 246 } 247 return (NULL); 248 } 249 250 int 251 physmem_validate_cookie(uint64_t p_cookie) 252 { 253 int index; 254 struct physmem_proc_hash *proc_hp; 255 struct physmem_hash *php; 256 257 ASSERT(rw_lock_held(&pph_rwlock)); 258 259 index = PHYSMEM_HASH(curproc); 260 proc_hp = pph[index]; 261 while (proc_hp != NULL) { 262 if (proc_hp->pph_proc == curproc) { 263 php = proc_hp->pph_hash; 264 while (php != NULL) { 265 if ((uint64_t)(uintptr_t)php == p_cookie) { 266 return (1); 267 } 268 php = php->ph_next; 269 } 270 } 271 proc_hp = proc_hp->pph_next; 272 } 273 return (0); 274 } 275 276 /* 277 * Remove the given vnode from the pph hash. If it exists in the hash the 278 * process still has to be around as the vnode is obviously still around and 279 * since it's a physmem vnode, it must be in the hash. 280 * If it is not in the hash that must mean that the setup ioctl failed. 281 * Return 0 in this instance, 1 if it is in the hash. 282 */ 283 int 284 physmem_remove_vnode_hash(vnode_t *vp) 285 { 286 int index; 287 struct physmem_proc_hash *proc_hp; 288 struct physmem_hash **phpp; 289 struct physmem_hash *victim; 290 291 index = PHYSMEM_HASH(curproc); 292 /* synchronize with the map routine */ 293 rw_enter(&pph_rwlock, RW_WRITER); 294 proc_hp = pph[index]; 295 while (proc_hp != NULL) { 296 if (proc_hp->pph_proc == curproc) { 297 phpp = &proc_hp->pph_hash; 298 while (*phpp != NULL) { 299 if ((*phpp)->ph_vnode == vp) { 300 victim = *phpp; 301 *phpp = victim->ph_next; 302 303 rw_exit(&pph_rwlock); 304 kmem_free(victim, sizeof (*victim)); 305 return (1); 306 } 307 phpp = &(*phpp)->ph_next; 308 } 309 } 310 proc_hp = proc_hp->pph_next; 311 } 312 rw_exit(&pph_rwlock); 313 314 /* not found */ 315 return (0); 316 } 317 318 int 319 physmem_setup_vnops() 320 { 321 int error; 322 char *name = "physmem"; 323 if (physmem_vnodeops != NULL) 324 cmn_err(CE_PANIC, "physmem vnodeops already set\n"); 325 error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops); 326 if (error != 0) { 327 cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template"); 328 } 329 return (error); 330 } 331 332 /* 333 * The guts of the PHYSMEM_SETUP ioctl. 334 * Create a segment in the address space with the specified parameters. 335 * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA. 336 * We do not do bounds checking on the requested physical addresses, if they 337 * do not exist in the system, they will not be mappable. 338 * Returns 0 on success with the following error codes on failure: 339 * ENOMEM - The VA range requested was already mapped if pspp->user_va is 340 * non-NULL or the system was unable to find enough VA space for 341 * the desired length if user_va was NULL> 342 * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned. 343 */ 344 int 345 physmem_setup_addrs(struct physmem_setup_param *pspp) 346 { 347 struct as *as = curproc->p_as; 348 struct segvn_crargs vn_a; 349 int ret = 0; 350 uint64_t base_pa; 351 size_t len; 352 caddr_t uvaddr; 353 struct vnode *vp; 354 struct physmem_hash *php; 355 356 ASSERT(pspp != NULL); 357 base_pa = pspp->req_paddr; 358 len = pspp->len; 359 uvaddr = (caddr_t)(uintptr_t)pspp->user_va; 360 361 /* Sanity checking */ 362 if (!IS_P2ALIGNED(base_pa, PAGESIZE)) 363 return (EINVAL); 364 if (!IS_P2ALIGNED(len, PAGESIZE)) 365 return (EINVAL); 366 if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE)) 367 return (EINVAL); 368 369 php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP); 370 371 /* Need to bump vnode count so that the driver can not be unloaded */ 372 mutex_enter(&physmem_mutex); 373 physmem_vnodecnt++; 374 mutex_exit(&physmem_mutex); 375 376 vp = vn_alloc(KM_SLEEP); 377 ASSERT(vp != NULL); /* SLEEP can't return NULL */ 378 vn_setops(vp, physmem_vnodeops); 379 380 php->ph_vnode = vp; 381 382 vn_a.vp = vp; 383 vn_a.offset = (u_offset_t)base_pa; 384 vn_a.type = MAP_SHARED; 385 vn_a.prot = PROT_ALL; 386 vn_a.maxprot = PROT_ALL; 387 vn_a.flags = 0; 388 vn_a.cred = NULL; 389 vn_a.amp = NULL; 390 vn_a.szc = 0; 391 vn_a.lgrp_mem_policy_flags = 0; 392 393 as_rangelock(as); 394 if (uvaddr != NULL) { 395 if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) { 396 ret = ENOMEM; 397 fail: 398 as_rangeunlock(as); 399 vn_free(vp); 400 kmem_free(php, sizeof (*php)); 401 mutex_enter(&physmem_mutex); 402 physmem_vnodecnt--; 403 mutex_exit(&physmem_mutex); 404 return (ret); 405 } 406 } else { 407 /* We pick the address for the user */ 408 map_addr(&uvaddr, len, 0, 1, 0); 409 if (uvaddr == NULL) { 410 ret = ENOMEM; 411 goto fail; 412 } 413 } 414 ret = as_map(as, uvaddr, len, segvn_create, &vn_a); 415 416 if (ret == 0) { 417 as_rangeunlock(as); 418 php->ph_base_pa = base_pa; 419 php->ph_base_va = uvaddr; 420 php->ph_seg_len = len; 421 pspp->user_va = (uint64_t)(uintptr_t)uvaddr; 422 pspp->cookie = (uint64_t)(uintptr_t)php; 423 ret = physmem_add_hash(php); 424 if (ret == 0) 425 return (0); 426 427 /* Note that the call to as_unmap will free the vnode */ 428 (void) as_unmap(as, uvaddr, len); 429 kmem_free(php, sizeof (*php)); 430 return (ret); 431 } 432 433 goto fail; 434 /*NOTREACHED*/ 435 } 436 437 /* 438 * The guts of the PHYSMEM_MAP ioctl. 439 * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already 440 * been called for this PA range. 441 * Returns 0 on success with the following error codes on failure: 442 * EPERM - The requested page is long term locked, and thus repeated 443 * requests to allocate this page will likely fail. 444 * EAGAIN - The requested page could not be allocated, but it is believed 445 * that future attempts could succeed. 446 * ENOMEM - There was not enough free memory in the system to safely 447 * map the requested page. 448 * EINVAL - The requested paddr was not PAGESIZE aligned or the 449 * PHYSMEM_SETUP ioctl was not called for this page. 450 * ENOENT - The requested page was iniside the kernel cage, and the 451 * PHYSMEM_CAGE flag was not set. 452 * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag 453 * was not set. 454 */ 455 static int 456 physmem_map_addrs(struct physmem_map_param *pmpp) 457 { 458 caddr_t uvaddr; 459 page_t *pp; 460 uint64_t req_paddr; 461 struct vnode *vp; 462 int ret = 0; 463 struct physmem_hash *php; 464 uint_t flags = 0; 465 466 ASSERT(pmpp != NULL); 467 req_paddr = pmpp->req_paddr; 468 469 if (!IS_P2ALIGNED(req_paddr, PAGESIZE)) 470 return (EINVAL); 471 /* Find the vnode for this map request */ 472 rw_enter(&pph_rwlock, RW_READER); 473 php = physmem_get_hash(req_paddr, PAGESIZE, curproc); 474 if (php == NULL) { 475 rw_exit(&pph_rwlock); 476 return (EINVAL); 477 } 478 vp = php->ph_vnode; 479 uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa); 480 rw_exit(&pph_rwlock); 481 482 pp = page_numtopp_nolock(btop((size_t)req_paddr)); 483 if (pp == NULL) { 484 pmpp->ret_va = NULL; 485 return (EPERM); 486 } 487 488 /* 489 * Check to see if page already mapped correctly. This can happen 490 * when we failed to capture a page previously and it was captured 491 * asynchronously for us. Return success in this case. 492 */ 493 if (pp->p_vnode == vp) { 494 ASSERT(pp->p_offset == (u_offset_t)req_paddr); 495 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 496 return (0); 497 } 498 499 /* 500 * physmem should be responsible for checking for cage 501 * and prom pages. 502 */ 503 if (pmpp->flags & PHYSMEM_CAGE) 504 flags = CAPTURE_GET_CAGE; 505 if (pmpp->flags & PHYSMEM_RETIRED) 506 flags |= CAPTURE_GET_RETIRED; 507 508 ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc); 509 510 if (ret != 0) { 511 pmpp->ret_va = NULL; 512 return (ret); 513 } else { 514 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 515 return (0); 516 } 517 } 518 519 /* 520 * Map the given page into the process's address space if possible. 521 * We actually only hash the page in on the correct vnode as the page 522 * will be mapped via segvn_pagefault. 523 * returns 0 on success 524 * returns 1 if there is no need to map this page anymore (process exited) 525 * returns -1 if we failed to map the page. 526 */ 527 int 528 map_page_proc(page_t *pp, void *arg, uint_t flags) 529 { 530 struct vnode *vp; 531 proc_t *procp = (proc_t *)arg; 532 int ret; 533 u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum); 534 struct physmem_hash *php; 535 536 ASSERT(pp != NULL); 537 538 /* 539 * Check against availrmem to make sure that we're not low on memory. 540 * We check again here as ASYNC requests do not do this check elsewhere. 541 * We return 1 as we don't want the page to have the PR_CAPTURE bit 542 * set or be on the page capture hash. 543 */ 544 if (swapfs_minfree > availrmem + 1) { 545 page_free(pp, 1); 546 return (1); 547 } 548 549 /* 550 * If this is an asynchronous request for the current process, 551 * we can not map the page as it's possible that we are also in the 552 * process of unmapping the page which could result in a deadlock 553 * with the as lock. 554 */ 555 if ((flags & CAPTURE_ASYNC) && (curproc == procp)) { 556 page_free(pp, 1); 557 return (-1); 558 } 559 560 /* only return zeroed out pages */ 561 pagezero(pp, 0, PAGESIZE); 562 563 rw_enter(&pph_rwlock, RW_READER); 564 php = physmem_get_hash(paddr, PAGESIZE, procp); 565 if (php == NULL) { 566 rw_exit(&pph_rwlock); 567 /* 568 * Free the page as there is no longer a valid outstanding 569 * request for this page. 570 */ 571 page_free(pp, 1); 572 return (1); 573 } 574 575 vp = php->ph_vnode; 576 577 /* 578 * We need to protect against a possible deadlock here where we own 579 * the vnode page hash mutex and want to acquire it again as there 580 * are locations in the code, where we unlock a page while holding 581 * the mutex which can lead to the page being captured and eventually 582 * end up here. 583 */ 584 if (mutex_owned(page_vnode_mutex(vp))) { 585 rw_exit(&pph_rwlock); 586 page_free(pp, 1); 587 return (-1); 588 } 589 590 ret = page_hashin(pp, vp, paddr, NULL); 591 rw_exit(&pph_rwlock); 592 if (ret == 0) { 593 page_free(pp, 1); 594 return (-1); 595 } 596 597 page_downgrade(pp); 598 599 mutex_enter(&freemem_lock); 600 availrmem--; 601 mutex_exit(&freemem_lock); 602 603 return (0); 604 } 605 606 /* 607 * The guts of the PHYSMEM_DESTROY ioctl. 608 * The cookie passed in will provide all of the information needed to 609 * free up the address space and physical memory associated with the 610 * corresponding PHSYMEM_SETUP ioctl. 611 * Returns 0 on success with the following error codes on failure: 612 * EINVAL - The cookie supplied is not valid. 613 */ 614 int 615 physmem_destroy_addrs(uint64_t p_cookie) 616 { 617 struct as *as = curproc->p_as; 618 size_t len; 619 caddr_t uvaddr; 620 621 rw_enter(&pph_rwlock, RW_READER); 622 if (physmem_validate_cookie(p_cookie) == 0) { 623 rw_exit(&pph_rwlock); 624 return (EINVAL); 625 } 626 627 len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len; 628 uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va; 629 rw_exit(&pph_rwlock); 630 631 (void) as_unmap(as, uvaddr, len); 632 633 return (0); 634 } 635 636 /* 637 * If the page has been hashed into the physmem vnode, then just look it up 638 * and return it via pl, otherwise return ENOMEM as the map ioctl has not 639 * succeeded on the given page. 640 */ 641 /*ARGSUSED*/ 642 static int 643 physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, 644 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, 645 struct cred *cr, caller_context_t *ct) 646 { 647 page_t *pp; 648 649 ASSERT(len == PAGESIZE); 650 ASSERT(AS_READ_HELD(seg->s_as)); 651 652 /* 653 * If the page is in the hash, then we successfully claimed this 654 * page earlier, so return it to the caller. 655 */ 656 pp = page_lookup(vp, off, SE_SHARED); 657 if (pp != NULL) { 658 pl[0] = pp; 659 pl[1] = NULL; 660 *protp = PROT_ALL; 661 return (0); 662 } 663 return (ENOMEM); 664 } 665 666 /* 667 * We can not allow a process mapping /dev/physmem pages to fork as there can 668 * only be a single mapping to a /dev/physmem page at a given time. Thus, the 669 * return of EINVAL when we are not working on our own address space. 670 * Otherwise we return zero as this function is required for normal operation. 671 */ 672 /*ARGSUSED*/ 673 static int 674 physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 675 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 676 struct cred *cred, caller_context_t *ct) 677 { 678 if (curproc->p_as != as) { 679 return (EINVAL); 680 } 681 return (0); 682 } 683 684 /* Will always get called for removing a whole segment. */ 685 /*ARGSUSED*/ 686 static int 687 physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 688 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 689 struct cred *cred, caller_context_t *ct) 690 { 691 /* 692 * Release our hold on the vnode so that the final VN_RELE will 693 * call physmem_inactive to clean things up. 694 */ 695 VN_RELE(vp); 696 697 return (0); 698 } 699 700 /* 701 * Clean up all the pages belonging to this vnode and then free it. 702 */ 703 /*ARGSUSED*/ 704 static void 705 physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct) 706 { 707 page_t *pp; 708 709 /* 710 * Remove the vnode from the hash now, to prevent asynchronous 711 * attempts to map into this vnode. This avoids a deadlock 712 * where two threads try to get into this logic at the same 713 * time and try to map the pages they are destroying into the 714 * other's address space. 715 * If it's not in the hash, just free it. 716 */ 717 if (physmem_remove_vnode_hash(vp) == 0) { 718 ASSERT(vp->v_pages == NULL); 719 vn_free(vp); 720 physmem_remove_hash_proc(); 721 mutex_enter(&physmem_mutex); 722 physmem_vnodecnt--; 723 mutex_exit(&physmem_mutex); 724 return; 725 } 726 727 /* 728 * At this point in time, no other logic can be adding or removing 729 * pages from the vnode, otherwise the v_pages list could be inaccurate. 730 */ 731 732 while ((pp = vp->v_pages) != NULL) { 733 page_t *rpp; 734 if (page_tryupgrade(pp)) { 735 /* 736 * set lckcnt for page_destroy to do availrmem 737 * accounting 738 */ 739 pp->p_lckcnt = 1; 740 page_destroy(pp, 0); 741 } else { 742 /* failure to lock should be transient */ 743 rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED); 744 if (rpp != pp) { 745 page_unlock(rpp); 746 continue; 747 } 748 page_unlock(pp); 749 } 750 } 751 vn_free(vp); 752 physmem_remove_hash_proc(); 753 mutex_enter(&physmem_mutex); 754 physmem_vnodecnt--; 755 mutex_exit(&physmem_mutex); 756 } 757 758 /*ARGSUSED*/ 759 static int 760 physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 761 int *rvalp) 762 { 763 int ret; 764 765 switch (cmd) { 766 case PHYSMEM_SETUP: 767 { 768 struct physmem_setup_param psp; 769 if (ddi_copyin((void *)arg, &psp, 770 sizeof (struct physmem_setup_param), 0)) 771 return (EFAULT); 772 ret = physmem_setup_addrs(&psp); 773 if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0)) 774 return (EFAULT); 775 } 776 break; 777 case PHYSMEM_MAP: 778 { 779 struct physmem_map_param pmp; 780 if (ddi_copyin((void *)arg, &pmp, 781 sizeof (struct physmem_map_param), 0)) 782 return (EFAULT); 783 ret = physmem_map_addrs(&pmp); 784 if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0)) 785 return (EFAULT); 786 } 787 break; 788 case PHYSMEM_DESTROY: 789 { 790 uint64_t cookie; 791 if (ddi_copyin((void *)arg, &cookie, 792 sizeof (uint64_t), 0)) 793 return (EFAULT); 794 ret = physmem_destroy_addrs(cookie); 795 } 796 break; 797 default: 798 return (ENOTSUP); 799 } 800 return (ret); 801 } 802 803 /*ARGSUSED*/ 804 static int 805 physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp) 806 { 807 int ret; 808 static int msg_printed = 0; 809 810 if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) { 811 return (EINVAL); 812 } 813 814 /* need to make sure we have the right privileges */ 815 if ((ret = secpolicy_resource(credp)) != 0) 816 return (ret); 817 if ((ret = secpolicy_lock_memory(credp)) != 0) 818 return (ret); 819 820 if (msg_printed == 0) { 821 cmn_err(CE_NOTE, "!driver has been opened. This driver may " 822 "take out long term locks on pages which may impact " 823 "dynamic reconfiguration events"); 824 msg_printed = 1; 825 } 826 827 return (0); 828 } 829 830 /*ARGSUSED*/ 831 static int 832 physmem_close(dev_t dev, int flag, int otyp, cred_t *credp) 833 { 834 return (0); 835 } 836 837 /*ARGSUSED*/ 838 static int 839 physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, 840 void *arg, void **resultp) 841 { 842 switch (infocmd) { 843 case DDI_INFO_DEVT2DEVINFO: 844 *resultp = physmem_dip; 845 return (DDI_SUCCESS); 846 847 case DDI_INFO_DEVT2INSTANCE: 848 *resultp = (void *)(ulong_t)getminor((dev_t)arg); 849 return (DDI_SUCCESS); 850 851 default: 852 return (DDI_FAILURE); 853 } 854 } 855 856 static int 857 physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 858 { 859 int i; 860 861 if (cmd == DDI_RESUME) { 862 return (DDI_SUCCESS); 863 } 864 865 if (cmd != DDI_ATTACH) 866 return (DDI_FAILURE); 867 868 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 869 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 870 return (DDI_FAILURE); 871 872 physmem_dip = dip; 873 874 /* Initialize driver specific data */ 875 if (physmem_setup_vnops()) { 876 ddi_remove_minor_node(dip, ddi_get_name(dip)); 877 return (DDI_FAILURE); 878 } 879 880 for (i = 0; i < PPH_SIZE; i++) 881 pph[i] = NULL; 882 883 page_capture_register_callback(PC_PHYSMEM, 10000, 884 map_page_proc); 885 886 return (DDI_SUCCESS); 887 } 888 889 static int 890 physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 891 { 892 int ret = DDI_SUCCESS; 893 894 if (cmd == DDI_SUSPEND) { 895 return (DDI_SUCCESS); 896 } 897 898 if (cmd != DDI_DETACH) 899 return (DDI_FAILURE); 900 901 ASSERT(physmem_dip == dip); 902 903 mutex_enter(&physmem_mutex); 904 if (physmem_vnodecnt == 0) { 905 if (physmem_vnodeops != NULL) { 906 vn_freevnodeops(physmem_vnodeops); 907 physmem_vnodeops = NULL; 908 page_capture_unregister_callback(PC_PHYSMEM); 909 } 910 } else { 911 ret = EBUSY; 912 } 913 mutex_exit(&physmem_mutex); 914 if (ret == DDI_SUCCESS) 915 ddi_remove_minor_node(dip, ddi_get_name(dip)); 916 return (ret); 917 } 918 919 static struct cb_ops physmem_cb_ops = { 920 physmem_open, /* open */ 921 physmem_close, /* close */ 922 nodev, /* strategy */ 923 nodev, /* print */ 924 nodev, /* dump */ 925 nodev, /* read */ 926 nodev, /* write */ 927 physmem_ioctl, /* ioctl */ 928 nodev, /* devmap */ 929 nodev, /* mmap */ 930 nodev, /* segmap */ 931 nochpoll, /* chpoll */ 932 ddi_prop_op, /* prop_op */ 933 NULL, /* cb_str */ 934 D_NEW | D_MP | D_DEVMAP, 935 CB_REV, 936 NULL, 937 NULL 938 }; 939 940 static struct dev_ops physmem_ops = { 941 DEVO_REV, 942 0, 943 physmem_getinfo, 944 nulldev, 945 nulldev, 946 physmem_attach, 947 physmem_detach, 948 nodev, 949 &physmem_cb_ops, 950 NULL, 951 NULL, 952 ddi_quiesce_not_needed, /* quiesce */ 953 }; 954 955 static struct modldrv modldrv = { 956 &mod_driverops, 957 "physmem driver", 958 &physmem_ops 959 }; 960 961 static struct modlinkage modlinkage = { 962 MODREV_1, 963 &modldrv, 964 NULL 965 }; 966 967 int 968 _init(void) 969 { 970 return (mod_install(&modlinkage)); 971 } 972 973 int 974 _info(struct modinfo *modinfop) 975 { 976 return (mod_info(&modlinkage, modinfop)); 977 } 978 979 int 980 _fini(void) 981 { 982 return (mod_remove(&modlinkage)); 983 } 984