1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/modctl.h> 30 #include <sys/conf.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/devops.h> 34 #include <sys/stat.h> 35 #include <sys/file.h> 36 #include <sys/cred.h> 37 #include <sys/policy.h> 38 #include <sys/errno.h> 39 #include <vm/seg_dev.h> 40 #include <vm/seg_vn.h> 41 #include <vm/page.h> 42 #include <sys/fs/swapnode.h> 43 #include <sys/sysmacros.h> 44 #include <sys/fcntl.h> 45 #include <sys/vmsystm.h> 46 #include <sys/physmem.h> 47 48 static dev_info_t *physmem_dip = NULL; 49 50 /* 51 * Linked list element hanging off physmem_proc_hash below, which holds all 52 * the information for a given segment which has been setup for this process. 53 * This is a simple linked list as we are assuming that for a given process 54 * the setup ioctl will only be called a handful of times. If this assumption 55 * changes in the future, a quicker to traverse data structure should be used. 56 */ 57 struct physmem_hash { 58 struct physmem_hash *ph_next; 59 uint64_t ph_base_pa; 60 caddr_t ph_base_va; 61 size_t ph_seg_len; 62 struct vnode *ph_vnode; 63 }; 64 65 /* 66 * Hash of all of the processes which have setup mappings with the driver with 67 * pointers to per process data. 68 */ 69 struct physmem_proc_hash { 70 struct proc *pph_proc; 71 struct physmem_hash *pph_hash; 72 struct physmem_proc_hash *pph_next; 73 }; 74 75 76 /* Needs to be a power of two for simple hash algorithm */ 77 #define PPH_SIZE 8 78 struct physmem_proc_hash *pph[PPH_SIZE]; 79 80 /* 81 * Lock which protects the pph hash above. To add an element (either a new 82 * process or a new segment) the WRITE lock must be held. To traverse the 83 * list, only a READ lock is needed. 84 */ 85 krwlock_t pph_rwlock; 86 87 #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1))) 88 89 /* 90 * Need to keep a reference count of how many processes have the driver 91 * open to prevent it from disappearing. 92 */ 93 uint64_t physmem_vnodecnt; 94 kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */ 95 96 static int physmem_getpage(struct vnode *vp, offset_t off, size_t len, 97 uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 98 enum seg_rw rw, struct cred *cr); 99 100 static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 101 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 102 struct cred *cred); 103 104 static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 105 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 106 struct cred *cred); 107 108 static void physmem_inactive(vnode_t *vp, cred_t *crp); 109 110 const fs_operation_def_t physmem_vnodeops_template[] = { 111 VOPNAME_GETPAGE, physmem_getpage, 112 VOPNAME_ADDMAP, (fs_generic_func_p) physmem_addmap, 113 VOPNAME_DELMAP, physmem_delmap, 114 VOPNAME_INACTIVE, (fs_generic_func_p) physmem_inactive, 115 NULL, NULL 116 }; 117 118 vnodeops_t *physmem_vnodeops = NULL; 119 120 /* 121 * Removes the current process from the hash if the process has no more 122 * physmem segments active. 123 */ 124 void 125 physmem_remove_hash_proc() 126 { 127 int index; 128 struct physmem_proc_hash **walker; 129 struct physmem_proc_hash *victim = NULL; 130 131 index = PHYSMEM_HASH(curproc); 132 rw_enter(&pph_rwlock, RW_WRITER); 133 walker = &pph[index]; 134 while (*walker != NULL) { 135 if ((*walker)->pph_proc == curproc && 136 (*walker)->pph_hash == NULL) { 137 victim = *walker; 138 *walker = victim->pph_next; 139 break; 140 } 141 walker = &((*walker)->pph_next); 142 } 143 rw_exit(&pph_rwlock); 144 if (victim != NULL) 145 kmem_free(victim, sizeof (struct physmem_proc_hash)); 146 } 147 148 /* 149 * Add a new entry to the hash for the given process to cache the 150 * address ranges that it is working on. If this is the first hash 151 * item to be added for this process, we will create the head pointer 152 * for this process. 153 * Returns 0 on success, ERANGE when the physical address is already in the 154 * hash. Note that we add it to the hash as we have already called as_map 155 * and thus the as_unmap call will try to free the vnode, which needs 156 * to be found in the hash. 157 */ 158 int 159 physmem_add_hash(struct physmem_hash *php) 160 { 161 int index; 162 struct physmem_proc_hash *iterator; 163 struct physmem_proc_hash *newp = NULL; 164 struct physmem_hash *temp; 165 int ret = 0; 166 167 index = PHYSMEM_HASH(curproc); 168 169 insert: 170 rw_enter(&pph_rwlock, RW_WRITER); 171 iterator = pph[index]; 172 while (iterator != NULL) { 173 if (iterator->pph_proc == curproc) { 174 /* 175 * check to make sure a single process does not try to 176 * map the same region twice. 177 */ 178 for (temp = iterator->pph_hash; temp != NULL; 179 temp = temp->ph_next) { 180 if ((php->ph_base_pa >= temp->ph_base_pa && 181 php->ph_base_pa < temp->ph_base_pa + 182 temp->ph_seg_len) || 183 (temp->ph_base_pa >= php->ph_base_pa && 184 temp->ph_base_pa < php->ph_base_pa + 185 php->ph_seg_len)) { 186 ret = ERANGE; 187 break; 188 } 189 } 190 if (ret == 0) { 191 php->ph_next = iterator->pph_hash; 192 iterator->pph_hash = php; 193 } 194 rw_exit(&pph_rwlock); 195 /* Need to check for two threads in sync */ 196 if (newp != NULL) 197 kmem_free(newp, sizeof (*newp)); 198 return (ret); 199 } 200 iterator = iterator->pph_next; 201 } 202 203 if (newp != NULL) { 204 newp->pph_proc = curproc; 205 newp->pph_next = pph[index]; 206 newp->pph_hash = php; 207 php->ph_next = NULL; 208 pph[index] = newp; 209 rw_exit(&pph_rwlock); 210 return (0); 211 } 212 213 rw_exit(&pph_rwlock); 214 /* Dropped the lock so we could use KM_SLEEP */ 215 newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP); 216 goto insert; 217 } 218 219 /* 220 * Will return the pointer to the physmem_hash struct if the setup routine 221 * has previously been called for this memory. 222 * Returns NULL on failure. 223 */ 224 struct physmem_hash * 225 physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp) 226 { 227 int index; 228 struct physmem_proc_hash *proc_hp; 229 struct physmem_hash *php; 230 231 ASSERT(rw_lock_held(&pph_rwlock)); 232 233 index = PHYSMEM_HASH(procp); 234 proc_hp = pph[index]; 235 while (proc_hp != NULL) { 236 if (proc_hp->pph_proc == procp) { 237 php = proc_hp->pph_hash; 238 while (php != NULL) { 239 if ((req_paddr >= php->ph_base_pa) && 240 (req_paddr + len <= 241 php->ph_base_pa + php->ph_seg_len)) { 242 return (php); 243 } 244 php = php->ph_next; 245 } 246 } 247 proc_hp = proc_hp->pph_next; 248 } 249 return (NULL); 250 } 251 252 int 253 physmem_validate_cookie(uint64_t p_cookie) 254 { 255 int index; 256 struct physmem_proc_hash *proc_hp; 257 struct physmem_hash *php; 258 259 ASSERT(rw_lock_held(&pph_rwlock)); 260 261 index = PHYSMEM_HASH(curproc); 262 proc_hp = pph[index]; 263 while (proc_hp != NULL) { 264 if (proc_hp->pph_proc == curproc) { 265 php = proc_hp->pph_hash; 266 while (php != NULL) { 267 if ((uint64_t)(uintptr_t)php == p_cookie) { 268 return (1); 269 } 270 php = php->ph_next; 271 } 272 } 273 proc_hp = proc_hp->pph_next; 274 } 275 return (0); 276 } 277 278 /* 279 * Remove the given vnode from the pph hash. If it exists in the hash the 280 * process still has to be around as the vnode is obviously still around and 281 * since it's a physmem vnode, it must be in the hash. 282 * If it is not in the hash that must mean that the setup ioctl failed. 283 * Return 0 in this instance, 1 if it is in the hash. 284 */ 285 int 286 physmem_remove_vnode_hash(vnode_t *vp) 287 { 288 int index; 289 struct physmem_proc_hash *proc_hp; 290 struct physmem_hash **phpp; 291 struct physmem_hash *victim; 292 293 index = PHYSMEM_HASH(curproc); 294 /* synchronize with the map routine */ 295 rw_enter(&pph_rwlock, RW_WRITER); 296 proc_hp = pph[index]; 297 while (proc_hp != NULL) { 298 if (proc_hp->pph_proc == curproc) { 299 phpp = &proc_hp->pph_hash; 300 while (*phpp != NULL) { 301 if ((*phpp)->ph_vnode == vp) { 302 victim = *phpp; 303 *phpp = victim->ph_next; 304 305 rw_exit(&pph_rwlock); 306 kmem_free(victim, sizeof (*victim)); 307 return (1); 308 } 309 phpp = &(*phpp)->ph_next; 310 } 311 } 312 proc_hp = proc_hp->pph_next; 313 } 314 rw_exit(&pph_rwlock); 315 316 /* not found */ 317 return (0); 318 } 319 320 int 321 physmem_setup_vnops() 322 { 323 int error; 324 char *name = "physmem"; 325 if (physmem_vnodeops != NULL) 326 cmn_err(CE_PANIC, "physmem vnodeops already set\n"); 327 error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops); 328 if (error != 0) { 329 cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template"); 330 } 331 return (error); 332 } 333 334 /* 335 * The guts of the PHYSMEM_SETUP ioctl. 336 * Create a segment in the address space with the specified parameters. 337 * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA. 338 * We do not do bounds checking on the requested phsycial addresses, if they 339 * do not exist in the system, they will not be mappable. 340 * Returns 0 on success with the following error codes on failure: 341 * ENOMEM - The VA range requested was already mapped if pspp->user_va is 342 * non-NULL or the system was unable to find enough VA space for 343 * the desired length if user_va was NULL> 344 * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned. 345 */ 346 int 347 physmem_setup_addrs(struct physmem_setup_param *pspp) 348 { 349 struct as *as = curproc->p_as; 350 struct segvn_crargs vn_a; 351 int ret = 0; 352 uint64_t base_pa; 353 size_t len; 354 caddr_t uvaddr; 355 struct vnode *vp; 356 struct physmem_hash *php; 357 358 ASSERT(pspp != NULL); 359 base_pa = pspp->req_paddr; 360 len = pspp->len; 361 uvaddr = (caddr_t)(uintptr_t)pspp->user_va; 362 363 /* Sanity checking */ 364 if (!IS_P2ALIGNED(base_pa, PAGESIZE)) 365 return (EINVAL); 366 if (!IS_P2ALIGNED(len, PAGESIZE)) 367 return (EINVAL); 368 if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE)) 369 return (EINVAL); 370 371 php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP); 372 373 /* Need to bump vnode count so that the driver can not be unloaded */ 374 mutex_enter(&physmem_mutex); 375 physmem_vnodecnt++; 376 mutex_exit(&physmem_mutex); 377 378 vp = vn_alloc(KM_SLEEP); 379 ASSERT(vp != NULL); /* SLEEP can't return NULL */ 380 vn_setops(vp, physmem_vnodeops); 381 382 php->ph_vnode = vp; 383 384 vn_a.vp = vp; 385 vn_a.offset = (u_offset_t)base_pa; 386 vn_a.type = MAP_SHARED; 387 vn_a.prot = PROT_ALL; 388 vn_a.maxprot = PROT_ALL; 389 vn_a.flags = 0; 390 vn_a.cred = NULL; 391 vn_a.amp = NULL; 392 vn_a.szc = 0; 393 vn_a.lgrp_mem_policy_flags = 0; 394 395 as_rangelock(as); 396 if (uvaddr != NULL) { 397 if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) { 398 ret = ENOMEM; 399 fail: 400 as_rangeunlock(as); 401 vn_free(vp); 402 kmem_free(php, sizeof (*php)); 403 mutex_enter(&physmem_mutex); 404 physmem_vnodecnt--; 405 mutex_exit(&physmem_mutex); 406 return (ret); 407 } 408 } else { 409 /* We pick the address for the user */ 410 map_addr(&uvaddr, len, 0, 1, 0); 411 if (uvaddr == NULL) { 412 ret = ENOMEM; 413 goto fail; 414 } 415 } 416 ret = as_map(as, uvaddr, len, segvn_create, &vn_a); 417 418 as_rangeunlock(as); 419 if (ret == 0) { 420 php->ph_base_pa = base_pa; 421 php->ph_base_va = uvaddr; 422 php->ph_seg_len = len; 423 pspp->user_va = (uint64_t)(uintptr_t)uvaddr; 424 pspp->cookie = (uint64_t)(uintptr_t)php; 425 ret = physmem_add_hash(php); 426 if (ret == 0) 427 return (0); 428 (void) as_unmap(as, uvaddr, len); 429 return (ret); 430 } 431 432 goto fail; 433 /*NOTREACHED*/ 434 } 435 436 /* 437 * The guts of the PHYSMEM_MAP ioctl. 438 * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already 439 * been called for this PA range. 440 * Returns 0 on success with the following error codes on failure: 441 * EPERM - The requested page is long term locked, and thus repeated 442 * requests to allocate this page will likely fail. 443 * EAGAIN - The requested page could not be allocated, but it is believed 444 * that future attempts could succeed. 445 * ENOMEM - There was not enough free memory in the system to safely 446 * map the requested page. 447 * EINVAL - The requested paddr was not PAGESIZE aligned or the 448 * PHYSMEM_SETUP ioctl was not called for this page. 449 * ENOENT - The requested page was iniside the kernel cage, and the 450 * PHYSMEM_CAGE flag was not set. 451 * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag 452 * was not set. 453 */ 454 static int 455 physmem_map_addrs(struct physmem_map_param *pmpp) 456 { 457 caddr_t uvaddr; 458 page_t *pp; 459 uint64_t req_paddr; 460 struct vnode *vp; 461 int ret = 0; 462 struct physmem_hash *php; 463 uint_t flags = 0; 464 465 ASSERT(pmpp != NULL); 466 req_paddr = pmpp->req_paddr; 467 468 if (!IS_P2ALIGNED(req_paddr, PAGESIZE)) 469 return (EINVAL); 470 /* Find the vnode for this map request */ 471 rw_enter(&pph_rwlock, RW_READER); 472 php = physmem_get_hash(req_paddr, PAGESIZE, curproc); 473 if (php == NULL) { 474 rw_exit(&pph_rwlock); 475 return (EINVAL); 476 } 477 vp = php->ph_vnode; 478 uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa); 479 rw_exit(&pph_rwlock); 480 481 pp = page_numtopp_nolock(btop((size_t)req_paddr)); 482 if (pp == NULL) { 483 pmpp->ret_va = NULL; 484 return (EPERM); 485 } 486 487 /* 488 * Check to see if page already mapped correctly. This can happen 489 * when we failed to capture a page previously and it was captured 490 * asynchronously for us. Return success in this case. 491 */ 492 if (pp->p_vnode == vp) { 493 ASSERT(pp->p_offset == (u_offset_t)req_paddr); 494 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 495 return (0); 496 } 497 498 /* 499 * physmem should be responsible for checking for cage 500 * and prom pages. 501 */ 502 if (pmpp->flags & PHYSMEM_CAGE) 503 flags = CAPTURE_GET_CAGE; 504 if (pmpp->flags & PHYSMEM_RETIRED) 505 flags |= CAPTURE_GET_RETIRED; 506 507 ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc); 508 509 if (ret != 0) { 510 pmpp->ret_va = NULL; 511 return (ret); 512 } else { 513 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 514 return (0); 515 } 516 } 517 518 /* 519 * Map the given page into the process's address space if possible. 520 * We actually only hash the page in on the correct vnode as the page 521 * will be mapped via segvn_pagefault. 522 * returns 0 on success 523 * returns 1 if there is no need to map this page anymore (process exited) 524 * returns -1 if we failed to map the page. 525 */ 526 int 527 map_page_proc(page_t *pp, void *arg, uint_t flags) 528 { 529 struct vnode *vp; 530 proc_t *procp = (proc_t *)arg; 531 int ret; 532 u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum); 533 struct physmem_hash *php; 534 535 ASSERT(pp != NULL); 536 537 /* 538 * Check against availrmem to make sure that we're not low on memory. 539 * We check again here as ASYNC requests do not do this check elsewhere. 540 * We return 1 as we don't want the page to have the PR_CAPTURE bit 541 * set or be on the page capture hash. 542 */ 543 if (swapfs_minfree > availrmem + 1) { 544 page_free(pp, 1); 545 return (1); 546 } 547 548 /* 549 * If this is an asynchronous request for the current process, 550 * we can not map the page as it's possible that we are also in the 551 * process of unmapping the page which could result in a deadlock 552 * with the as lock. 553 */ 554 if ((flags & CAPTURE_ASYNC) && (curproc == procp)) { 555 page_free(pp, 1); 556 return (-1); 557 } 558 559 /* only return zeroed out pages */ 560 pagezero(pp, 0, PAGESIZE); 561 562 rw_enter(&pph_rwlock, RW_READER); 563 php = physmem_get_hash(paddr, PAGESIZE, procp); 564 if (php == NULL) { 565 rw_exit(&pph_rwlock); 566 /* 567 * Free the page as there is no longer a valid outstanding 568 * request for this page. 569 */ 570 page_free(pp, 1); 571 return (1); 572 } 573 574 vp = php->ph_vnode; 575 576 /* 577 * We need to protect against a possible deadlock here where we own 578 * the vnode page hash mutex and want to acquire it again as there 579 * are locations in the code, where we unlock a page while holding 580 * the mutex which can lead to the page being captured and eventually 581 * end up here. 582 */ 583 if (mutex_owned(page_vnode_mutex(vp))) { 584 rw_exit(&pph_rwlock); 585 page_free(pp, 1); 586 return (-1); 587 } 588 589 ret = page_hashin(pp, vp, paddr, NULL); 590 rw_exit(&pph_rwlock); 591 if (ret == 0) { 592 page_free(pp, 1); 593 return (-1); 594 } 595 596 page_downgrade(pp); 597 598 mutex_enter(&freemem_lock); 599 availrmem--; 600 mutex_exit(&freemem_lock); 601 602 return (0); 603 } 604 605 /* 606 * The guts of the PHYSMEM_DESTROY ioctl. 607 * The cookie passed in will provide all of the information needed to 608 * free up the address space and physical memory associated with the 609 * corresponding PHSYMEM_SETUP ioctl. 610 * Returns 0 on success with the following error codes on failure: 611 * EINVAL - The cookie supplied is not valid. 612 */ 613 int 614 physmem_destroy_addrs(uint64_t p_cookie) 615 { 616 struct as *as = curproc->p_as; 617 size_t len; 618 caddr_t uvaddr; 619 620 rw_enter(&pph_rwlock, RW_READER); 621 if (physmem_validate_cookie(p_cookie) == 0) { 622 rw_exit(&pph_rwlock); 623 return (EINVAL); 624 } 625 626 len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len; 627 uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va; 628 rw_exit(&pph_rwlock); 629 630 (void) as_unmap(as, uvaddr, len); 631 632 return (0); 633 } 634 635 /* 636 * If the page has been hashed into the physmem vnode, then just look it up 637 * and return it via pl, otherwise return ENOMEM as the map ioctl has not 638 * succeeded on the given page. 639 */ 640 /*ARGSUSED*/ 641 static int 642 physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, 643 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, 644 struct cred *cr) 645 { 646 page_t *pp; 647 648 ASSERT(len == PAGESIZE); 649 ASSERT(AS_READ_HELD(seg->s_as, &seg->s_as->a_lock)); 650 651 /* 652 * If the page is in the hash, then we successfully claimed this 653 * page earlier, so return it to the caller. 654 */ 655 pp = page_lookup(vp, off, SE_SHARED); 656 if (pp != NULL) { 657 pl[0] = pp; 658 pl[1] = NULL; 659 *protp = PROT_ALL; 660 return (0); 661 } 662 return (ENOMEM); 663 } 664 665 /* 666 * We can not allow a process mapping /dev/physmem pages to fork as there can 667 * only be a single mapping to a /dev/physmem page at a given time. Thus, the 668 * return of EINVAL when we are not working on our own address space. 669 * Otherwise we return zero as this function is required for normal operation. 670 */ 671 /*ARGSUSED*/ 672 static int 673 physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 674 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 675 struct cred *cred) 676 { 677 if (curproc->p_as != as) { 678 return (EINVAL); 679 } 680 return (0); 681 } 682 683 /* Will always get called for removing a whole segment. */ 684 /*ARGSUSED*/ 685 static int 686 physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 687 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 688 struct cred *cred) 689 { 690 /* 691 * Release our hold on the vnode so that the final VN_RELE will 692 * call physmem_inactive to clean things up. 693 */ 694 VN_RELE(vp); 695 696 return (0); 697 } 698 699 /* 700 * Clean up all the pages belonging to this vnode and then free it. 701 */ 702 /*ARGSUSED*/ 703 static void 704 physmem_inactive(vnode_t *vp, cred_t *crp) 705 { 706 page_t *pp; 707 708 /* 709 * Remove the vnode from the hash now, to prevent asynchronous 710 * attempts to map into this vnode. This avoids a deadlock 711 * where two threads try to get into this logic at the same 712 * time and try to map the pages they are destroying into the 713 * other's address space. 714 * If it's not in the hash, just free it. 715 */ 716 if (physmem_remove_vnode_hash(vp) == 0) { 717 ASSERT(vp->v_pages == NULL); 718 vn_free(vp); 719 physmem_remove_hash_proc(); 720 mutex_enter(&physmem_mutex); 721 physmem_vnodecnt--; 722 mutex_exit(&physmem_mutex); 723 return; 724 } 725 726 /* 727 * At this point in time, no other logic can be adding or removing 728 * pages from the vnode, otherwise the v_pages list could be inaccurate. 729 */ 730 731 while ((pp = vp->v_pages) != NULL) { 732 page_t *rpp; 733 if (page_tryupgrade(pp)) { 734 /* 735 * set lckcnt for page_destroy to do availrmem 736 * accounting 737 */ 738 pp->p_lckcnt = 1; 739 page_destroy(pp, 0); 740 } else { 741 /* failure to lock should be transient */ 742 rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED); 743 if (rpp != pp) { 744 page_unlock(rpp); 745 continue; 746 } 747 page_unlock(pp); 748 } 749 } 750 vn_free(vp); 751 physmem_remove_hash_proc(); 752 mutex_enter(&physmem_mutex); 753 physmem_vnodecnt--; 754 mutex_exit(&physmem_mutex); 755 } 756 757 /*ARGSUSED*/ 758 static int 759 physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 760 int *rvalp) 761 { 762 int ret; 763 764 switch (cmd) { 765 case PHYSMEM_SETUP: 766 { 767 struct physmem_setup_param psp; 768 if (ddi_copyin((void *)arg, &psp, 769 sizeof (struct physmem_setup_param), 0)) 770 return (EFAULT); 771 ret = physmem_setup_addrs(&psp); 772 if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0)) 773 return (EFAULT); 774 } 775 break; 776 case PHYSMEM_MAP: 777 { 778 struct physmem_map_param pmp; 779 if (ddi_copyin((void *)arg, &pmp, 780 sizeof (struct physmem_map_param), 0)) 781 return (EFAULT); 782 ret = physmem_map_addrs(&pmp); 783 if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0)) 784 return (EFAULT); 785 } 786 break; 787 case PHYSMEM_DESTROY: 788 { 789 uint64_t cookie; 790 if (ddi_copyin((void *)arg, &cookie, 791 sizeof (uint64_t), 0)) 792 return (EFAULT); 793 ret = physmem_destroy_addrs(cookie); 794 } 795 break; 796 default: 797 return (ENOTSUP); 798 } 799 return (ret); 800 } 801 802 /*ARGSUSED*/ 803 static int 804 physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp) 805 { 806 int ret; 807 static int msg_printed = 0; 808 809 if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) { 810 return (EINVAL); 811 } 812 813 /* need to make sure we have the right privileges */ 814 if ((ret = secpolicy_resource(credp)) != 0) 815 return (ret); 816 if ((ret = secpolicy_lock_memory(credp)) != 0) 817 return (ret); 818 819 if (msg_printed == 0) { 820 cmn_err(CE_NOTE, "!driver has been opened. This driver may " 821 "take out long term locks on pages which may impact " 822 "dynamic reconfiguration events"); 823 msg_printed = 1; 824 } 825 826 return (0); 827 } 828 829 /*ARGSUSED*/ 830 static int 831 physmem_close(dev_t dev, int flag, int otyp, cred_t *credp) 832 { 833 return (0); 834 } 835 836 /*ARGSUSED*/ 837 static int 838 physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, 839 void *arg, void **resultp) 840 { 841 switch (infocmd) { 842 case DDI_INFO_DEVT2DEVINFO: 843 *resultp = physmem_dip; 844 return (DDI_SUCCESS); 845 846 case DDI_INFO_DEVT2INSTANCE: 847 *resultp = (void *)(ulong_t)getminor((dev_t)arg); 848 return (DDI_SUCCESS); 849 850 default: 851 return (DDI_FAILURE); 852 } 853 } 854 855 static int 856 physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 857 { 858 int i; 859 860 if (cmd == DDI_RESUME) { 861 return (DDI_SUCCESS); 862 } 863 864 if (cmd != DDI_ATTACH) 865 return (DDI_FAILURE); 866 867 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 868 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 869 return (DDI_FAILURE); 870 871 physmem_dip = dip; 872 873 /* Initialize driver specific data */ 874 if (physmem_setup_vnops()) { 875 ddi_remove_minor_node(dip, ddi_get_name(dip)); 876 return (DDI_FAILURE); 877 } 878 879 for (i = 0; i < PPH_SIZE; i++) 880 pph[i] = NULL; 881 882 page_capture_register_callback(PC_PHYSMEM, 10000, 883 map_page_proc); 884 885 return (DDI_SUCCESS); 886 } 887 888 static int 889 physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 890 { 891 int ret = DDI_SUCCESS; 892 893 if (cmd == DDI_SUSPEND) { 894 return (DDI_SUCCESS); 895 } 896 897 if (cmd != DDI_DETACH) 898 return (DDI_FAILURE); 899 900 ASSERT(physmem_dip == dip); 901 902 mutex_enter(&physmem_mutex); 903 if (physmem_vnodecnt == 0) { 904 if (physmem_vnodeops != NULL) { 905 vn_freevnodeops(physmem_vnodeops); 906 physmem_vnodeops = NULL; 907 page_capture_unregister_callback(PC_PHYSMEM); 908 } 909 } else { 910 ret = EBUSY; 911 } 912 mutex_exit(&physmem_mutex); 913 if (ret == DDI_SUCCESS) 914 ddi_remove_minor_node(dip, ddi_get_name(dip)); 915 return (ret); 916 } 917 918 static struct cb_ops physmem_cb_ops = { 919 physmem_open, /* open */ 920 physmem_close, /* close */ 921 nodev, /* strategy */ 922 nodev, /* print */ 923 nodev, /* dump */ 924 nodev, /* read */ 925 nodev, /* write */ 926 physmem_ioctl, /* ioctl */ 927 nodev, /* devmap */ 928 nodev, /* mmap */ 929 nodev, /* segmap */ 930 nochpoll, /* chpoll */ 931 ddi_prop_op, /* prop_op */ 932 NULL, /* cb_str */ 933 D_NEW | D_MP | D_DEVMAP, 934 CB_REV, 935 NULL, 936 NULL 937 }; 938 939 static struct dev_ops physmem_ops = { 940 DEVO_REV, 941 0, 942 physmem_getinfo, 943 nulldev, 944 nulldev, 945 physmem_attach, 946 physmem_detach, 947 nodev, 948 &physmem_cb_ops, 949 NULL, 950 NULL 951 }; 952 953 static struct modldrv modldrv = { 954 &mod_driverops, 955 "physmem driver %I%", 956 &physmem_ops 957 }; 958 959 static struct modlinkage modlinkage = { 960 MODREV_1, 961 &modldrv, 962 NULL 963 }; 964 965 int 966 _init(void) 967 { 968 return (mod_install(&modlinkage)); 969 } 970 971 int 972 _info(struct modinfo *modinfop) 973 { 974 return (mod_info(&modlinkage, modinfop)); 975 } 976 977 int 978 _fini(void) 979 { 980 return (mod_remove(&modlinkage)); 981 } 982