1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/modctl.h> 30 #include <sys/conf.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/devops.h> 34 #include <sys/stat.h> 35 #include <sys/file.h> 36 #include <sys/cred.h> 37 #include <sys/policy.h> 38 #include <sys/errno.h> 39 #include <vm/seg_dev.h> 40 #include <vm/seg_vn.h> 41 #include <vm/page.h> 42 #include <sys/fs/swapnode.h> 43 #include <sys/sysmacros.h> 44 #include <sys/fcntl.h> 45 #include <sys/vmsystm.h> 46 #include <sys/physmem.h> 47 #include <sys/vfs_opreg.h> 48 49 static dev_info_t *physmem_dip = NULL; 50 51 /* 52 * Linked list element hanging off physmem_proc_hash below, which holds all 53 * the information for a given segment which has been setup for this process. 54 * This is a simple linked list as we are assuming that for a given process 55 * the setup ioctl will only be called a handful of times. If this assumption 56 * changes in the future, a quicker to traverse data structure should be used. 57 */ 58 struct physmem_hash { 59 struct physmem_hash *ph_next; 60 uint64_t ph_base_pa; 61 caddr_t ph_base_va; 62 size_t ph_seg_len; 63 struct vnode *ph_vnode; 64 }; 65 66 /* 67 * Hash of all of the processes which have setup mappings with the driver with 68 * pointers to per process data. 69 */ 70 struct physmem_proc_hash { 71 struct proc *pph_proc; 72 struct physmem_hash *pph_hash; 73 struct physmem_proc_hash *pph_next; 74 }; 75 76 77 /* Needs to be a power of two for simple hash algorithm */ 78 #define PPH_SIZE 8 79 struct physmem_proc_hash *pph[PPH_SIZE]; 80 81 /* 82 * Lock which protects the pph hash above. To add an element (either a new 83 * process or a new segment) the WRITE lock must be held. To traverse the 84 * list, only a READ lock is needed. 85 */ 86 krwlock_t pph_rwlock; 87 88 #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1))) 89 90 /* 91 * Need to keep a reference count of how many processes have the driver 92 * open to prevent it from disappearing. 93 */ 94 uint64_t physmem_vnodecnt; 95 kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */ 96 97 static int physmem_getpage(struct vnode *vp, offset_t off, size_t len, 98 uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 99 enum seg_rw rw, struct cred *cr); 100 101 static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 102 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 103 struct cred *cred); 104 105 static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 106 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 107 struct cred *cred); 108 109 static void physmem_inactive(vnode_t *vp, cred_t *crp); 110 111 const fs_operation_def_t physmem_vnodeops_template[] = { 112 VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage }, 113 VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap }, 114 VOPNAME_DELMAP, { .vop_delmap = physmem_delmap }, 115 VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive }, 116 NULL, NULL 117 }; 118 119 vnodeops_t *physmem_vnodeops = NULL; 120 121 /* 122 * Removes the current process from the hash if the process has no more 123 * physmem segments active. 124 */ 125 void 126 physmem_remove_hash_proc() 127 { 128 int index; 129 struct physmem_proc_hash **walker; 130 struct physmem_proc_hash *victim = NULL; 131 132 index = PHYSMEM_HASH(curproc); 133 rw_enter(&pph_rwlock, RW_WRITER); 134 walker = &pph[index]; 135 while (*walker != NULL) { 136 if ((*walker)->pph_proc == curproc && 137 (*walker)->pph_hash == NULL) { 138 victim = *walker; 139 *walker = victim->pph_next; 140 break; 141 } 142 walker = &((*walker)->pph_next); 143 } 144 rw_exit(&pph_rwlock); 145 if (victim != NULL) 146 kmem_free(victim, sizeof (struct physmem_proc_hash)); 147 } 148 149 /* 150 * Add a new entry to the hash for the given process to cache the 151 * address ranges that it is working on. If this is the first hash 152 * item to be added for this process, we will create the head pointer 153 * for this process. 154 * Returns 0 on success, ERANGE when the physical address is already in the 155 * hash. 156 */ 157 int 158 physmem_add_hash(struct physmem_hash *php) 159 { 160 int index; 161 struct physmem_proc_hash *iterator; 162 struct physmem_proc_hash *newp = NULL; 163 struct physmem_hash *temp; 164 int ret = 0; 165 166 index = PHYSMEM_HASH(curproc); 167 168 insert: 169 rw_enter(&pph_rwlock, RW_WRITER); 170 iterator = pph[index]; 171 while (iterator != NULL) { 172 if (iterator->pph_proc == curproc) { 173 /* 174 * check to make sure a single process does not try to 175 * map the same region twice. 176 */ 177 for (temp = iterator->pph_hash; temp != NULL; 178 temp = temp->ph_next) { 179 if ((php->ph_base_pa >= temp->ph_base_pa && 180 php->ph_base_pa < temp->ph_base_pa + 181 temp->ph_seg_len) || 182 (temp->ph_base_pa >= php->ph_base_pa && 183 temp->ph_base_pa < php->ph_base_pa + 184 php->ph_seg_len)) { 185 ret = ERANGE; 186 break; 187 } 188 } 189 if (ret == 0) { 190 php->ph_next = iterator->pph_hash; 191 iterator->pph_hash = php; 192 } 193 rw_exit(&pph_rwlock); 194 /* Need to check for two threads in sync */ 195 if (newp != NULL) 196 kmem_free(newp, sizeof (*newp)); 197 return (ret); 198 } 199 iterator = iterator->pph_next; 200 } 201 202 if (newp != NULL) { 203 newp->pph_proc = curproc; 204 newp->pph_next = pph[index]; 205 newp->pph_hash = php; 206 php->ph_next = NULL; 207 pph[index] = newp; 208 rw_exit(&pph_rwlock); 209 return (0); 210 } 211 212 rw_exit(&pph_rwlock); 213 /* Dropped the lock so we could use KM_SLEEP */ 214 newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP); 215 goto insert; 216 } 217 218 /* 219 * Will return the pointer to the physmem_hash struct if the setup routine 220 * has previously been called for this memory. 221 * Returns NULL on failure. 222 */ 223 struct physmem_hash * 224 physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp) 225 { 226 int index; 227 struct physmem_proc_hash *proc_hp; 228 struct physmem_hash *php; 229 230 ASSERT(rw_lock_held(&pph_rwlock)); 231 232 index = PHYSMEM_HASH(procp); 233 proc_hp = pph[index]; 234 while (proc_hp != NULL) { 235 if (proc_hp->pph_proc == procp) { 236 php = proc_hp->pph_hash; 237 while (php != NULL) { 238 if ((req_paddr >= php->ph_base_pa) && 239 (req_paddr + len <= 240 php->ph_base_pa + php->ph_seg_len)) { 241 return (php); 242 } 243 php = php->ph_next; 244 } 245 } 246 proc_hp = proc_hp->pph_next; 247 } 248 return (NULL); 249 } 250 251 int 252 physmem_validate_cookie(uint64_t p_cookie) 253 { 254 int index; 255 struct physmem_proc_hash *proc_hp; 256 struct physmem_hash *php; 257 258 ASSERT(rw_lock_held(&pph_rwlock)); 259 260 index = PHYSMEM_HASH(curproc); 261 proc_hp = pph[index]; 262 while (proc_hp != NULL) { 263 if (proc_hp->pph_proc == curproc) { 264 php = proc_hp->pph_hash; 265 while (php != NULL) { 266 if ((uint64_t)(uintptr_t)php == p_cookie) { 267 return (1); 268 } 269 php = php->ph_next; 270 } 271 } 272 proc_hp = proc_hp->pph_next; 273 } 274 return (0); 275 } 276 277 /* 278 * Remove the given vnode from the pph hash. If it exists in the hash the 279 * process still has to be around as the vnode is obviously still around and 280 * since it's a physmem vnode, it must be in the hash. 281 * If it is not in the hash that must mean that the setup ioctl failed. 282 * Return 0 in this instance, 1 if it is in the hash. 283 */ 284 int 285 physmem_remove_vnode_hash(vnode_t *vp) 286 { 287 int index; 288 struct physmem_proc_hash *proc_hp; 289 struct physmem_hash **phpp; 290 struct physmem_hash *victim; 291 292 index = PHYSMEM_HASH(curproc); 293 /* synchronize with the map routine */ 294 rw_enter(&pph_rwlock, RW_WRITER); 295 proc_hp = pph[index]; 296 while (proc_hp != NULL) { 297 if (proc_hp->pph_proc == curproc) { 298 phpp = &proc_hp->pph_hash; 299 while (*phpp != NULL) { 300 if ((*phpp)->ph_vnode == vp) { 301 victim = *phpp; 302 *phpp = victim->ph_next; 303 304 rw_exit(&pph_rwlock); 305 kmem_free(victim, sizeof (*victim)); 306 return (1); 307 } 308 phpp = &(*phpp)->ph_next; 309 } 310 } 311 proc_hp = proc_hp->pph_next; 312 } 313 rw_exit(&pph_rwlock); 314 315 /* not found */ 316 return (0); 317 } 318 319 int 320 physmem_setup_vnops() 321 { 322 int error; 323 char *name = "physmem"; 324 if (physmem_vnodeops != NULL) 325 cmn_err(CE_PANIC, "physmem vnodeops already set\n"); 326 error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops); 327 if (error != 0) { 328 cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template"); 329 } 330 return (error); 331 } 332 333 /* 334 * The guts of the PHYSMEM_SETUP ioctl. 335 * Create a segment in the address space with the specified parameters. 336 * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA. 337 * We do not do bounds checking on the requested phsycial addresses, if they 338 * do not exist in the system, they will not be mappable. 339 * Returns 0 on success with the following error codes on failure: 340 * ENOMEM - The VA range requested was already mapped if pspp->user_va is 341 * non-NULL or the system was unable to find enough VA space for 342 * the desired length if user_va was NULL> 343 * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned. 344 */ 345 int 346 physmem_setup_addrs(struct physmem_setup_param *pspp) 347 { 348 struct as *as = curproc->p_as; 349 struct segvn_crargs vn_a; 350 int ret = 0; 351 uint64_t base_pa; 352 size_t len; 353 caddr_t uvaddr; 354 struct vnode *vp; 355 struct physmem_hash *php; 356 357 ASSERT(pspp != NULL); 358 base_pa = pspp->req_paddr; 359 len = pspp->len; 360 uvaddr = (caddr_t)(uintptr_t)pspp->user_va; 361 362 /* Sanity checking */ 363 if (!IS_P2ALIGNED(base_pa, PAGESIZE)) 364 return (EINVAL); 365 if (!IS_P2ALIGNED(len, PAGESIZE)) 366 return (EINVAL); 367 if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE)) 368 return (EINVAL); 369 370 php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP); 371 372 /* Need to bump vnode count so that the driver can not be unloaded */ 373 mutex_enter(&physmem_mutex); 374 physmem_vnodecnt++; 375 mutex_exit(&physmem_mutex); 376 377 vp = vn_alloc(KM_SLEEP); 378 ASSERT(vp != NULL); /* SLEEP can't return NULL */ 379 vn_setops(vp, physmem_vnodeops); 380 381 php->ph_vnode = vp; 382 383 vn_a.vp = vp; 384 vn_a.offset = (u_offset_t)base_pa; 385 vn_a.type = MAP_SHARED; 386 vn_a.prot = PROT_ALL; 387 vn_a.maxprot = PROT_ALL; 388 vn_a.flags = 0; 389 vn_a.cred = NULL; 390 vn_a.amp = NULL; 391 vn_a.szc = 0; 392 vn_a.lgrp_mem_policy_flags = 0; 393 394 as_rangelock(as); 395 if (uvaddr != NULL) { 396 if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) { 397 ret = ENOMEM; 398 fail: 399 as_rangeunlock(as); 400 vn_free(vp); 401 kmem_free(php, sizeof (*php)); 402 mutex_enter(&physmem_mutex); 403 physmem_vnodecnt--; 404 mutex_exit(&physmem_mutex); 405 return (ret); 406 } 407 } else { 408 /* We pick the address for the user */ 409 map_addr(&uvaddr, len, 0, 1, 0); 410 if (uvaddr == NULL) { 411 ret = ENOMEM; 412 goto fail; 413 } 414 } 415 ret = as_map(as, uvaddr, len, segvn_create, &vn_a); 416 417 if (ret == 0) { 418 as_rangeunlock(as); 419 php->ph_base_pa = base_pa; 420 php->ph_base_va = uvaddr; 421 php->ph_seg_len = len; 422 pspp->user_va = (uint64_t)(uintptr_t)uvaddr; 423 pspp->cookie = (uint64_t)(uintptr_t)php; 424 ret = physmem_add_hash(php); 425 if (ret == 0) 426 return (0); 427 428 /* Note that the call to as_unmap will free the vnode */ 429 (void) as_unmap(as, uvaddr, len); 430 kmem_free(php, sizeof (*php)); 431 return (ret); 432 } 433 434 goto fail; 435 /*NOTREACHED*/ 436 } 437 438 /* 439 * The guts of the PHYSMEM_MAP ioctl. 440 * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already 441 * been called for this PA range. 442 * Returns 0 on success with the following error codes on failure: 443 * EPERM - The requested page is long term locked, and thus repeated 444 * requests to allocate this page will likely fail. 445 * EAGAIN - The requested page could not be allocated, but it is believed 446 * that future attempts could succeed. 447 * ENOMEM - There was not enough free memory in the system to safely 448 * map the requested page. 449 * EINVAL - The requested paddr was not PAGESIZE aligned or the 450 * PHYSMEM_SETUP ioctl was not called for this page. 451 * ENOENT - The requested page was iniside the kernel cage, and the 452 * PHYSMEM_CAGE flag was not set. 453 * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag 454 * was not set. 455 */ 456 static int 457 physmem_map_addrs(struct physmem_map_param *pmpp) 458 { 459 caddr_t uvaddr; 460 page_t *pp; 461 uint64_t req_paddr; 462 struct vnode *vp; 463 int ret = 0; 464 struct physmem_hash *php; 465 uint_t flags = 0; 466 467 ASSERT(pmpp != NULL); 468 req_paddr = pmpp->req_paddr; 469 470 if (!IS_P2ALIGNED(req_paddr, PAGESIZE)) 471 return (EINVAL); 472 /* Find the vnode for this map request */ 473 rw_enter(&pph_rwlock, RW_READER); 474 php = physmem_get_hash(req_paddr, PAGESIZE, curproc); 475 if (php == NULL) { 476 rw_exit(&pph_rwlock); 477 return (EINVAL); 478 } 479 vp = php->ph_vnode; 480 uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa); 481 rw_exit(&pph_rwlock); 482 483 pp = page_numtopp_nolock(btop((size_t)req_paddr)); 484 if (pp == NULL) { 485 pmpp->ret_va = NULL; 486 return (EPERM); 487 } 488 489 /* 490 * Check to see if page already mapped correctly. This can happen 491 * when we failed to capture a page previously and it was captured 492 * asynchronously for us. Return success in this case. 493 */ 494 if (pp->p_vnode == vp) { 495 ASSERT(pp->p_offset == (u_offset_t)req_paddr); 496 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 497 return (0); 498 } 499 500 /* 501 * physmem should be responsible for checking for cage 502 * and prom pages. 503 */ 504 if (pmpp->flags & PHYSMEM_CAGE) 505 flags = CAPTURE_GET_CAGE; 506 if (pmpp->flags & PHYSMEM_RETIRED) 507 flags |= CAPTURE_GET_RETIRED; 508 509 ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc); 510 511 if (ret != 0) { 512 pmpp->ret_va = NULL; 513 return (ret); 514 } else { 515 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 516 return (0); 517 } 518 } 519 520 /* 521 * Map the given page into the process's address space if possible. 522 * We actually only hash the page in on the correct vnode as the page 523 * will be mapped via segvn_pagefault. 524 * returns 0 on success 525 * returns 1 if there is no need to map this page anymore (process exited) 526 * returns -1 if we failed to map the page. 527 */ 528 int 529 map_page_proc(page_t *pp, void *arg, uint_t flags) 530 { 531 struct vnode *vp; 532 proc_t *procp = (proc_t *)arg; 533 int ret; 534 u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum); 535 struct physmem_hash *php; 536 537 ASSERT(pp != NULL); 538 539 /* 540 * Check against availrmem to make sure that we're not low on memory. 541 * We check again here as ASYNC requests do not do this check elsewhere. 542 * We return 1 as we don't want the page to have the PR_CAPTURE bit 543 * set or be on the page capture hash. 544 */ 545 if (swapfs_minfree > availrmem + 1) { 546 page_free(pp, 1); 547 return (1); 548 } 549 550 /* 551 * If this is an asynchronous request for the current process, 552 * we can not map the page as it's possible that we are also in the 553 * process of unmapping the page which could result in a deadlock 554 * with the as lock. 555 */ 556 if ((flags & CAPTURE_ASYNC) && (curproc == procp)) { 557 page_free(pp, 1); 558 return (-1); 559 } 560 561 /* only return zeroed out pages */ 562 pagezero(pp, 0, PAGESIZE); 563 564 rw_enter(&pph_rwlock, RW_READER); 565 php = physmem_get_hash(paddr, PAGESIZE, procp); 566 if (php == NULL) { 567 rw_exit(&pph_rwlock); 568 /* 569 * Free the page as there is no longer a valid outstanding 570 * request for this page. 571 */ 572 page_free(pp, 1); 573 return (1); 574 } 575 576 vp = php->ph_vnode; 577 578 /* 579 * We need to protect against a possible deadlock here where we own 580 * the vnode page hash mutex and want to acquire it again as there 581 * are locations in the code, where we unlock a page while holding 582 * the mutex which can lead to the page being captured and eventually 583 * end up here. 584 */ 585 if (mutex_owned(page_vnode_mutex(vp))) { 586 rw_exit(&pph_rwlock); 587 page_free(pp, 1); 588 return (-1); 589 } 590 591 ret = page_hashin(pp, vp, paddr, NULL); 592 rw_exit(&pph_rwlock); 593 if (ret == 0) { 594 page_free(pp, 1); 595 return (-1); 596 } 597 598 page_downgrade(pp); 599 600 mutex_enter(&freemem_lock); 601 availrmem--; 602 mutex_exit(&freemem_lock); 603 604 return (0); 605 } 606 607 /* 608 * The guts of the PHYSMEM_DESTROY ioctl. 609 * The cookie passed in will provide all of the information needed to 610 * free up the address space and physical memory associated with the 611 * corresponding PHSYMEM_SETUP ioctl. 612 * Returns 0 on success with the following error codes on failure: 613 * EINVAL - The cookie supplied is not valid. 614 */ 615 int 616 physmem_destroy_addrs(uint64_t p_cookie) 617 { 618 struct as *as = curproc->p_as; 619 size_t len; 620 caddr_t uvaddr; 621 622 rw_enter(&pph_rwlock, RW_READER); 623 if (physmem_validate_cookie(p_cookie) == 0) { 624 rw_exit(&pph_rwlock); 625 return (EINVAL); 626 } 627 628 len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len; 629 uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va; 630 rw_exit(&pph_rwlock); 631 632 (void) as_unmap(as, uvaddr, len); 633 634 return (0); 635 } 636 637 /* 638 * If the page has been hashed into the physmem vnode, then just look it up 639 * and return it via pl, otherwise return ENOMEM as the map ioctl has not 640 * succeeded on the given page. 641 */ 642 /*ARGSUSED*/ 643 static int 644 physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, 645 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, 646 struct cred *cr) 647 { 648 page_t *pp; 649 650 ASSERT(len == PAGESIZE); 651 ASSERT(AS_READ_HELD(seg->s_as, &seg->s_as->a_lock)); 652 653 /* 654 * If the page is in the hash, then we successfully claimed this 655 * page earlier, so return it to the caller. 656 */ 657 pp = page_lookup(vp, off, SE_SHARED); 658 if (pp != NULL) { 659 pl[0] = pp; 660 pl[1] = NULL; 661 *protp = PROT_ALL; 662 return (0); 663 } 664 return (ENOMEM); 665 } 666 667 /* 668 * We can not allow a process mapping /dev/physmem pages to fork as there can 669 * only be a single mapping to a /dev/physmem page at a given time. Thus, the 670 * return of EINVAL when we are not working on our own address space. 671 * Otherwise we return zero as this function is required for normal operation. 672 */ 673 /*ARGSUSED*/ 674 static int 675 physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 676 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 677 struct cred *cred) 678 { 679 if (curproc->p_as != as) { 680 return (EINVAL); 681 } 682 return (0); 683 } 684 685 /* Will always get called for removing a whole segment. */ 686 /*ARGSUSED*/ 687 static int 688 physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 689 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 690 struct cred *cred) 691 { 692 /* 693 * Release our hold on the vnode so that the final VN_RELE will 694 * call physmem_inactive to clean things up. 695 */ 696 VN_RELE(vp); 697 698 return (0); 699 } 700 701 /* 702 * Clean up all the pages belonging to this vnode and then free it. 703 */ 704 /*ARGSUSED*/ 705 static void 706 physmem_inactive(vnode_t *vp, cred_t *crp) 707 { 708 page_t *pp; 709 710 /* 711 * Remove the vnode from the hash now, to prevent asynchronous 712 * attempts to map into this vnode. This avoids a deadlock 713 * where two threads try to get into this logic at the same 714 * time and try to map the pages they are destroying into the 715 * other's address space. 716 * If it's not in the hash, just free it. 717 */ 718 if (physmem_remove_vnode_hash(vp) == 0) { 719 ASSERT(vp->v_pages == NULL); 720 vn_free(vp); 721 physmem_remove_hash_proc(); 722 mutex_enter(&physmem_mutex); 723 physmem_vnodecnt--; 724 mutex_exit(&physmem_mutex); 725 return; 726 } 727 728 /* 729 * At this point in time, no other logic can be adding or removing 730 * pages from the vnode, otherwise the v_pages list could be inaccurate. 731 */ 732 733 while ((pp = vp->v_pages) != NULL) { 734 page_t *rpp; 735 if (page_tryupgrade(pp)) { 736 /* 737 * set lckcnt for page_destroy to do availrmem 738 * accounting 739 */ 740 pp->p_lckcnt = 1; 741 page_destroy(pp, 0); 742 } else { 743 /* failure to lock should be transient */ 744 rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED); 745 if (rpp != pp) { 746 page_unlock(rpp); 747 continue; 748 } 749 page_unlock(pp); 750 } 751 } 752 vn_free(vp); 753 physmem_remove_hash_proc(); 754 mutex_enter(&physmem_mutex); 755 physmem_vnodecnt--; 756 mutex_exit(&physmem_mutex); 757 } 758 759 /*ARGSUSED*/ 760 static int 761 physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 762 int *rvalp) 763 { 764 int ret; 765 766 switch (cmd) { 767 case PHYSMEM_SETUP: 768 { 769 struct physmem_setup_param psp; 770 if (ddi_copyin((void *)arg, &psp, 771 sizeof (struct physmem_setup_param), 0)) 772 return (EFAULT); 773 ret = physmem_setup_addrs(&psp); 774 if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0)) 775 return (EFAULT); 776 } 777 break; 778 case PHYSMEM_MAP: 779 { 780 struct physmem_map_param pmp; 781 if (ddi_copyin((void *)arg, &pmp, 782 sizeof (struct physmem_map_param), 0)) 783 return (EFAULT); 784 ret = physmem_map_addrs(&pmp); 785 if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0)) 786 return (EFAULT); 787 } 788 break; 789 case PHYSMEM_DESTROY: 790 { 791 uint64_t cookie; 792 if (ddi_copyin((void *)arg, &cookie, 793 sizeof (uint64_t), 0)) 794 return (EFAULT); 795 ret = physmem_destroy_addrs(cookie); 796 } 797 break; 798 default: 799 return (ENOTSUP); 800 } 801 return (ret); 802 } 803 804 /*ARGSUSED*/ 805 static int 806 physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp) 807 { 808 int ret; 809 static int msg_printed = 0; 810 811 if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) { 812 return (EINVAL); 813 } 814 815 /* need to make sure we have the right privileges */ 816 if ((ret = secpolicy_resource(credp)) != 0) 817 return (ret); 818 if ((ret = secpolicy_lock_memory(credp)) != 0) 819 return (ret); 820 821 if (msg_printed == 0) { 822 cmn_err(CE_NOTE, "!driver has been opened. This driver may " 823 "take out long term locks on pages which may impact " 824 "dynamic reconfiguration events"); 825 msg_printed = 1; 826 } 827 828 return (0); 829 } 830 831 /*ARGSUSED*/ 832 static int 833 physmem_close(dev_t dev, int flag, int otyp, cred_t *credp) 834 { 835 return (0); 836 } 837 838 /*ARGSUSED*/ 839 static int 840 physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, 841 void *arg, void **resultp) 842 { 843 switch (infocmd) { 844 case DDI_INFO_DEVT2DEVINFO: 845 *resultp = physmem_dip; 846 return (DDI_SUCCESS); 847 848 case DDI_INFO_DEVT2INSTANCE: 849 *resultp = (void *)(ulong_t)getminor((dev_t)arg); 850 return (DDI_SUCCESS); 851 852 default: 853 return (DDI_FAILURE); 854 } 855 } 856 857 static int 858 physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 859 { 860 int i; 861 862 if (cmd == DDI_RESUME) { 863 return (DDI_SUCCESS); 864 } 865 866 if (cmd != DDI_ATTACH) 867 return (DDI_FAILURE); 868 869 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 870 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 871 return (DDI_FAILURE); 872 873 physmem_dip = dip; 874 875 /* Initialize driver specific data */ 876 if (physmem_setup_vnops()) { 877 ddi_remove_minor_node(dip, ddi_get_name(dip)); 878 return (DDI_FAILURE); 879 } 880 881 for (i = 0; i < PPH_SIZE; i++) 882 pph[i] = NULL; 883 884 page_capture_register_callback(PC_PHYSMEM, 10000, 885 map_page_proc); 886 887 return (DDI_SUCCESS); 888 } 889 890 static int 891 physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 892 { 893 int ret = DDI_SUCCESS; 894 895 if (cmd == DDI_SUSPEND) { 896 return (DDI_SUCCESS); 897 } 898 899 if (cmd != DDI_DETACH) 900 return (DDI_FAILURE); 901 902 ASSERT(physmem_dip == dip); 903 904 mutex_enter(&physmem_mutex); 905 if (physmem_vnodecnt == 0) { 906 if (physmem_vnodeops != NULL) { 907 vn_freevnodeops(physmem_vnodeops); 908 physmem_vnodeops = NULL; 909 page_capture_unregister_callback(PC_PHYSMEM); 910 } 911 } else { 912 ret = EBUSY; 913 } 914 mutex_exit(&physmem_mutex); 915 if (ret == DDI_SUCCESS) 916 ddi_remove_minor_node(dip, ddi_get_name(dip)); 917 return (ret); 918 } 919 920 static struct cb_ops physmem_cb_ops = { 921 physmem_open, /* open */ 922 physmem_close, /* close */ 923 nodev, /* strategy */ 924 nodev, /* print */ 925 nodev, /* dump */ 926 nodev, /* read */ 927 nodev, /* write */ 928 physmem_ioctl, /* ioctl */ 929 nodev, /* devmap */ 930 nodev, /* mmap */ 931 nodev, /* segmap */ 932 nochpoll, /* chpoll */ 933 ddi_prop_op, /* prop_op */ 934 NULL, /* cb_str */ 935 D_NEW | D_MP | D_DEVMAP, 936 CB_REV, 937 NULL, 938 NULL 939 }; 940 941 static struct dev_ops physmem_ops = { 942 DEVO_REV, 943 0, 944 physmem_getinfo, 945 nulldev, 946 nulldev, 947 physmem_attach, 948 physmem_detach, 949 nodev, 950 &physmem_cb_ops, 951 NULL, 952 NULL 953 }; 954 955 static struct modldrv modldrv = { 956 &mod_driverops, 957 "physmem driver %I%", 958 &physmem_ops 959 }; 960 961 static struct modlinkage modlinkage = { 962 MODREV_1, 963 &modldrv, 964 NULL 965 }; 966 967 int 968 _init(void) 969 { 970 return (mod_install(&modlinkage)); 971 } 972 973 int 974 _info(struct modinfo *modinfop) 975 { 976 return (mod_info(&modlinkage, modinfop)); 977 } 978 979 int 980 _fini(void) 981 { 982 return (mod_remove(&modlinkage)); 983 } 984