1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This is the lock device driver. 31 * 32 * The lock driver provides a variation of inter-process mutexes with the 33 * following twist in semantics: 34 * A waiter for a lock after a set timeout can "break" the lock and 35 * grab it from the current owner (without informing the owner). 36 * 37 * These semantics result in temporarily multiple processes thinking they 38 * own the lock. This usually does not make sense for cases where locks are 39 * used to protect a critical region and it is important to serialize access 40 * to data structures. As breaking the lock will also lose the serialization 41 * and result in corrupt data structures. 42 * 43 * The usage for winlock driver is primarily driven by the graphics system 44 * when doing DGA (direct graphics access) graphics. The locks are used to 45 * protect access to the frame buffer (presumably reflects back to the screen) 46 * between competing processes that directly write to the screen as opposed 47 * to going through the window server etc. 48 * In this case, the result of breaking the lock at worst causes the screen 49 * image to be distorted and is easily fixed by doing a "refresh" 50 * 51 * In well-behaved applications, the lock is held for a very short time and 52 * the breaking semantics do not come into play. Not having this feature and 53 * using normal inter-process mutexes will result in a misbehaved application 54 * from grabbing the screen writing capability from the window manager and 55 * effectively make the system look like it is hung (mouse pointer does not 56 * move). 57 * 58 * A secondary aspect of the winlock driver is that it allows for extremely 59 * fast lock acquire/release in cases where there is low contention. A memory 60 * write is all that is needed (not even a function call). And the window 61 * manager is the only DGA writer usually and this optimized for. Occasionally 62 * some processes might do DGA graphics and cause kernel faults to handle 63 * the contention/locking (and that has got to be slow!). 64 * 65 * The following IOCTLs are supported: 66 * 67 * GRABPAGEALLOC: 68 * Compatibility with old cgsix device driver lockpage ioctls. 69 * Lockpages created this way must be an entire page for compatibility with 70 * older software. This ioctl allocates a lock context with its own 71 * private lock page. The unique "ident" that identifies this lock is 72 * returned. 73 * 74 * GRABPAGEFREE: 75 * Compatibility with cgsix device driver lockpage ioctls. This 76 * ioctl releases the lock context allocated by GRABPAGEALLOC. 77 * 78 * GRABLOCKINFO: 79 * Returns a one-word flag. '1' means that multiple clients may 80 * access this lock page. Older device drivers returned '0', 81 * meaning that only two clients could access a lock page. 82 * 83 * GRABATTACH: 84 * Not supported. This ioctl would have grabbed all lock pages 85 * on behalf of the calling program. 86 * 87 * WINLOCKALLOC: 88 * Allocate a lock context. This ioctl accepts a key value. as 89 * its argument. If the key is zero, a new lock context is 90 * created, and its "ident" is returned. If the key is nonzero, 91 * all existing contexts are checked to see if they match they 92 * key. If a match is found, its reference count is incremented 93 * and its ident is returned, otherwise a new context is created 94 * and its ident is returned. 95 * 96 * WINLOCKFREE: 97 * Free a lock context. This ioctl accepts the ident of a lock 98 * context and decrements its reference count. Once the reference 99 * count reaches zero *and* all mappings are released, the lock 100 * context is freed. When all the lock context in the lock page are 101 * freed, the lock page is freed as well. 102 * 103 * WINLOCKSETTIMEOUT: 104 * Set lock timeout for a context. This ioctl accepts the ident 105 * of a lock context and a timeout value in milliseconds. 106 * Whenever lock contention occurs, the timer is started and the lock is 107 * broken after the timeout expires. If timeout value is zero, lock does 108 * not timeout. This value will be rounded to the nearest clock 109 * tick, so don't try to use it for real-time control or something. 110 * 111 * WINLOCKGETTIMEOUT: 112 * Get lock timeout from a context. 113 * 114 * WINLOCKDUMP: 115 * Dump state of this device. 116 * 117 * 118 * How /dev/winlock works: 119 * 120 * Every lock context consists of two mappings for the client to the lock 121 * page. These mappings are known as the "lock page" and "unlock page" 122 * to the client. The first mmap to the lock context (identified by the 123 * sy_ident field returns during alloc) allocates mapping to the lock page, 124 * the second mmap allocates a mapping to the unlock page. 125 * The mappings dont have to be ordered in virtual address space, but do 126 * need to be ordered in time. Mapping and unmapping of these lock and unlock 127 * pages should happen in pairs. Doing them one at a time or unmapping one 128 * and leaving one mapped etc cause undefined behaviors. 129 * The mappings are always of length PAGESIZE, and type MAP_SHARED. 130 * 131 * The first ioctl is to ALLOC a lock, either based on a key (if trying to 132 * grab a preexisting lock) or 0 (gets a default new one) 133 * This ioctl returns a value in sy_ident which is needed to do the 134 * later mmaps and FREE/other ioctls. 135 * 136 * The "page number" portion of the sy_ident needs to be passed as the 137 * file offset when doing an mmap for both the lock page and unlock page 138 * 139 * The value returned by mmap ( a user virtual address) needs to be 140 * incremented by the "page offset" portion of sy_ident to obtain the 141 * pointer to the actual lock. (Skipping this step, does not cause any 142 * visible error, but the process will be using the wrong lock!) 143 * 144 * On a fork(), the child process will inherit the mappings for free, but 145 * will not inherit the parent's lock ownership if any. The child should NOT 146 * do an explicit FREE on the lock context unless it did an explicit ALLOC. 147 * Only one process at a time is allowed to have a valid hat 148 * mapping to a lock page. This is enforced by this driver. 149 * A client acquires a lock by writing a '1' to the lock page. 150 * Note, that it is not necessary to read and veryify that the lock is '0' 151 * prior to writing a '1' in it. 152 * If it does not already have a valid mapping to that page, the driver 153 * takes a fault (devmap_access), loads the client mapping 154 * and allows the client to continue. The client releases the lock by 155 * writing a '0' to the unlock page. Again, if it does not have a valid 156 * mapping to the unlock page, the segment driver takes a fault, 157 * loads the mapping, and lets the client continue. From this point 158 * forward, the client can make as many locks and unlocks as it 159 * wants, without any more faults into the kernel. 160 * 161 * If a different process wants to acquire a lock, it takes a page fault 162 * when it writes the '1' to the lock page. If the segment driver sees 163 * that the lock page contained a zero, then it invalidates the owner's 164 * mappings and gives the mappings to this process. 165 * 166 * If there is already a '1' in the lock page when the second client 167 * tries to access the lock page, then a lock exists. The segment 168 * driver sleeps the second client and, if applicable, starts the 169 * timeout on the lock. The owner's mapping to the unlock page 170 * is invalidated so that the driver will be woken again when the owner 171 * releases the lock. 172 * 173 * When the locking client finally writes a '0' to the unlock page, the 174 * segment driver takes another fault. The client is given a valid 175 * mapping, not to the unlock page, but to the "trash page", and allowed 176 * to continue. Meanwhile, the sleeping client is given a valid mapping 177 * to the lock/unlock pages and allowed to continue as well. 178 * 179 * RFE: There is a leak if process exits before freeing allocated locks 180 * But currently not tracking which locks were allocated by which 181 * process and we do not have a clean entry point into the driver 182 * to do garbage collection. If the interface used a file descriptor for each 183 * lock it allocs, then the driver can free up stuff in the _close routine 184 */ 185 186 #include <sys/types.h> /* various type defn's */ 187 #include <sys/debug.h> 188 #include <sys/param.h> /* various kernel limits */ 189 #include <sys/time.h> 190 #include <sys/errno.h> 191 #include <sys/kmem.h> /* defines kmem_alloc() */ 192 #include <sys/conf.h> /* defines cdevsw */ 193 #include <sys/file.h> /* various file modes, etc. */ 194 #include <sys/uio.h> /* UIO stuff */ 195 #include <sys/ioctl.h> 196 #include <sys/cred.h> /* defines cred struct */ 197 #include <sys/mman.h> /* defines mmap(2) parameters */ 198 #include <sys/stat.h> /* defines S_IFCHR */ 199 #include <sys/cmn_err.h> /* use cmn_err */ 200 #include <sys/ddi.h> /* ddi stuff */ 201 #include <sys/sunddi.h> /* ddi stuff */ 202 #include <sys/ddi_impldefs.h> /* ddi stuff */ 203 #include <sys/winlockio.h> /* defines ioctls, flags, data structs */ 204 205 static int winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 206 static int winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t, 207 size_t *, uint_t); 208 static int winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t, 209 uint_t, uint_t, uint_t, cred_t *); 210 211 static struct cb_ops winlock_cb_ops = { 212 nulldev, /* open */ 213 nulldev, /* close */ 214 nodev, /* strategy */ 215 nodev, /* print */ 216 nodev, /* dump */ 217 nodev, /* read */ 218 nodev, /* write */ 219 winlock_ioctl, /* ioctl */ 220 winlock_devmap, /* devmap */ 221 nodev, /* mmap */ 222 winlocksegmap, /* segmap */ 223 nochpoll, /* poll */ 224 ddi_prop_op, /* prop_op */ 225 NULL, /* streamtab */ 226 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 227 0, /* rev */ 228 nodev, /* aread */ 229 nodev /* awrite */ 230 }; 231 232 static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 233 static int winlock_attach(dev_info_t *, ddi_attach_cmd_t); 234 static int winlock_detach(dev_info_t *, ddi_detach_cmd_t); 235 236 static struct dev_ops winlock_ops = { 237 DEVO_REV, 238 0, /* refcount */ 239 winlock_info, /* info */ 240 nulldev, /* identify */ 241 nulldev, /* probe */ 242 winlock_attach, /* attach */ 243 winlock_detach, /* detach */ 244 nodev, /* reset */ 245 &winlock_cb_ops, /* driver ops */ 246 NULL, /* bus ops */ 247 NULL /* power */ 248 }; 249 250 static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t, 251 void **); 252 static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t, 253 devmap_cookie_t, void **, devmap_cookie_t, void **); 254 static int winlockmap_dup(devmap_cookie_t, void *, 255 devmap_cookie_t, void **); 256 static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t, 257 uint_t, uint_t); 258 259 static 260 struct devmap_callback_ctl winlockmap_ops = { 261 DEVMAP_OPS_REV, 262 winlockmap_map, 263 winlockmap_access, 264 winlockmap_dup, 265 winlockmap_unmap, 266 }; 267 268 #if DEBUG 269 static int lock_debug = 0; 270 #define DEBUGF(level, args) { if (lock_debug >= (level)) cmn_err args; } 271 #else 272 #define DEBUGF(level, args) 273 #endif 274 275 /* Driver supports two styles of locks */ 276 enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK }; 277 278 /* 279 * These structures describe a lock context. We permit multiple 280 * clients (not just two) to access a lock page 281 * 282 * The "cookie" identifies the lock context. It is the page number portion 283 * sy_ident returned on lock allocation. Cookie is used in later ioctls. 284 * "cookie" is lockid * PAGESIZE 285 * "lockptr" is the kernel virtual address to the lock itself 286 * The page offset portion of lockptr is the page offset portion of sy_ident 287 */ 288 289 /* 290 * per-process information about locks. This is the private field of 291 * a devmap mapping. Note that usually *two* mappings point to this. 292 */ 293 294 /* 295 * Each process using winlock is associated with a segproc structure 296 * In various driver entry points, we need to search to find the right 297 * segproc structure (If we were using file handles for each lock this 298 * would not have been necessary). 299 * It would have been simple to use the process pid (and ddi_get_pid) 300 * However, during fork devmap_dup is called in the parent process context 301 * and using the pid complicates the code by introducing orphans. 302 * Instead we use the as pointer for the process as a cookie 303 * which requires delving into various non-DDI kosher structs 304 */ 305 typedef struct segproc { 306 struct segproc *next; /* next client of this lock */ 307 struct seglock *lp; /* associated lock context */ 308 devmap_cookie_t lockseg; /* lock mapping, if any */ 309 devmap_cookie_t unlockseg; /* unlock mapping, if any */ 310 void *tag; /* process as pointer as tag */ 311 uint_t flag; /* see "flag bits" in winlockio.h */ 312 } SegProc; 313 314 #define ID(sdp) ((sdp)->tag) 315 #define CURPROC_ID (void *)(curproc->p_as) 316 317 /* per lock context information */ 318 319 typedef struct seglock { 320 struct seglock *next; /* next lock */ 321 uint_t sleepers; /* nthreads sleeping on this lock */ 322 uint_t alloccount; /* how many times created? */ 323 uint_t cookie; /* mmap() offset (page #) into device */ 324 uint_t key; /* key, if any */ 325 enum winlock_style style; /* style of lock - OLDSTYLE, NEWSTYLE */ 326 clock_t timeout; /* sleep time in ticks */ 327 ddi_umem_cookie_t umem_cookie; /* cookie for umem allocated memory */ 328 int *lockptr; /* kernel virtual addr of lock */ 329 struct segproc *clients; /* list of clients of this lock */ 330 struct segproc *owner; /* current owner of lock */ 331 kmutex_t mutex; /* mutex for lock */ 332 kcondvar_t locksleep; /* for sleeping on lock */ 333 } SegLock; 334 335 #define LOCK(lp) (*((lp)->lockptr)) 336 337 /* 338 * Number of locks that can fit in a page. Driver can support only that many. 339 * For oldsytle locks, it is relatively easy to increase the limit as each 340 * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation 341 * For newstyle locks, this is trickier as the code needs to allow for mapping 342 * into the second or third page of the cookie for some locks. 343 */ 344 #define MAX_LOCKS (PAGESIZE/sizeof (int)) 345 346 #define LOCKTIME 3 /* Default lock timeout in seconds */ 347 348 349 /* Protections setting for winlock user mappings */ 350 #define WINLOCK_PROT (PROT_READ|PROT_WRITE|PROT_USER) 351 352 /* 353 * The trash page is where unwanted writes go 354 * when a process is releasing a lock. 355 */ 356 static ddi_umem_cookie_t trashpage_cookie = NULL; 357 358 /* For newstyle allocations a common page of locks is used */ 359 static caddr_t lockpage = NULL; 360 static ddi_umem_cookie_t lockpage_cookie = NULL; 361 362 static dev_info_t *winlock_dip = NULL; 363 static kmutex_t winlock_mutex; 364 365 /* 366 * winlock_mutex protects 367 * lock_list 368 * lock_free_list 369 * "next" field in SegLock 370 * next_lock 371 * trashpage_cookie 372 * lockpage & lockpage_cookie 373 * 374 * SegLock_mutex protects 375 * rest of fields in SegLock 376 * All fields in list of SegProc (lp->clients) 377 * 378 * Lock ordering is winlock_mutex->SegLock_mutex 379 * During devmap/seg operations SegLock_mutex acquired without winlock_mutex 380 * 381 * During devmap callbacks, the pointer to SegProc is stored as the private 382 * data in the devmap handle. This pointer will not go stale (i.e., the 383 * SegProc getting deleted) as the SegProc is not deleted until both the 384 * lockseg and unlockseg have been unmapped and the pointers stored in 385 * the devmap handles have been NULL'ed. 386 * But before this pointer is used to access any fields (other than the 'lp') 387 * lp->mutex must be held. 388 */ 389 390 /* 391 * The allocation code tries to allocate from lock_free_list 392 * first, otherwise it uses kmem_zalloc. When lock list is idle, all 393 * locks in lock_free_list are kmem_freed 394 */ 395 static SegLock *lock_list = NULL; /* in-use locks */ 396 static SegLock *lock_free_list = NULL; /* free locks */ 397 static int next_lock = 0; /* next lock cookie */ 398 399 /* Routines to find a lock in lock_list based on offset or key */ 400 static SegLock *seglock_findlock(uint_t); 401 static SegLock *seglock_findkey(uint_t); 402 403 /* Routines to find and allocate SegProc structures */ 404 static SegProc *seglock_find_specific(SegLock *, void *); 405 static SegProc *seglock_alloc_specific(SegLock *, void *); 406 #define seglock_findclient(lp) seglock_find_specific((lp), CURPROC_ID) 407 #define seglock_allocclient(lp) seglock_alloc_specific((lp), CURPROC_ID) 408 409 /* Delete client from lock's client list */ 410 static void seglock_deleteclient(SegLock *, SegProc *); 411 static void garbage_collect_lock(SegLock *, SegProc *); 412 413 /* Create a new lock */ 414 static SegLock *seglock_createlock(enum winlock_style); 415 /* Destroy lock */ 416 static void seglock_destroylock(SegLock *); 417 static void lock_destroyall(void); 418 419 /* Helper functions in winlockmap_access */ 420 static int give_mapping(SegLock *, SegProc *, uint_t); 421 static int lock_giveup(SegLock *, int); 422 static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t); 423 424 /* routines called from ioctl */ 425 static int seglock_graballoc(intptr_t, enum winlock_style, int); 426 static int seglock_grabinfo(intptr_t, int); 427 static int seglock_grabfree(intptr_t, int); 428 static int seglock_gettimeout(intptr_t, int); 429 static int seglock_settimeout(intptr_t, int); 430 static void seglock_dump_all(void); 431 432 static int 433 winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 434 { 435 DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n", 436 (void *)devi, (int)cmd)); 437 if (cmd != DDI_ATTACH) 438 return (DDI_FAILURE); 439 if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0) 440 == DDI_FAILURE) { 441 return (DDI_FAILURE); 442 } 443 winlock_dip = devi; 444 ddi_report_dev(devi); 445 return (DDI_SUCCESS); 446 } 447 448 /*ARGSUSED*/ 449 static int 450 winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 451 { 452 DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n", 453 (void *)devi, (int)cmd)); 454 if (cmd != DDI_DETACH) 455 return (DDI_FAILURE); 456 457 mutex_enter(&winlock_mutex); 458 if (lock_list != NULL) { 459 mutex_exit(&winlock_mutex); 460 return (DDI_FAILURE); 461 } 462 ASSERT(lock_free_list == NULL); 463 464 DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n")); 465 /* destroy any common stuff created */ 466 if (trashpage_cookie != NULL) { 467 ddi_umem_free(trashpage_cookie); 468 trashpage_cookie = NULL; 469 } 470 if (lockpage != NULL) { 471 ddi_umem_free(lockpage_cookie); 472 lockpage = NULL; 473 lockpage_cookie = NULL; 474 } 475 winlock_dip = NULL; 476 mutex_exit(&winlock_mutex); 477 return (DDI_SUCCESS); 478 } 479 480 /*ARGSUSED*/ 481 static int 482 winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 483 { 484 register int error; 485 486 /* initialize result */ 487 *result = NULL; 488 489 /* only valid instance (i.e., getminor) is 0 */ 490 if (getminor((dev_t)arg) >= 1) 491 return (DDI_FAILURE); 492 493 switch (infocmd) { 494 case DDI_INFO_DEVT2DEVINFO: 495 if (winlock_dip == NULL) 496 error = DDI_FAILURE; 497 else { 498 *result = (void *)winlock_dip; 499 error = DDI_SUCCESS; 500 } 501 break; 502 case DDI_INFO_DEVT2INSTANCE: 503 *result = (void *)0; 504 error = DDI_SUCCESS; 505 break; 506 default: 507 error = DDI_FAILURE; 508 } 509 return (error); 510 } 511 512 513 /*ARGSUSED*/ 514 int 515 winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 516 cred_t *cred, int *rval) 517 { 518 DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n", 519 cmd, (void *)arg)); 520 521 switch (cmd) { 522 /* 523 * ioctls that used to be handled by framebuffers (defined in fbio.h) 524 * RFE: No code really calls the GRAB* ioctls now. Should EOL. 525 */ 526 527 case GRABPAGEALLOC: 528 return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode)); 529 case GRABPAGEFREE: 530 return (seglock_grabfree(arg, mode)); 531 case GRABLOCKINFO: 532 return (seglock_grabinfo(arg, mode)); 533 case GRABATTACH: 534 return (EINVAL); /* GRABATTACH is not supported (never was) */ 535 536 case WINLOCKALLOC: 537 return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode)); 538 case WINLOCKFREE: 539 return (seglock_grabfree(arg, mode)); 540 case WINLOCKSETTIMEOUT: 541 return (seglock_settimeout(arg, mode)); 542 case WINLOCKGETTIMEOUT: 543 return (seglock_gettimeout(arg, mode)); 544 case WINLOCKDUMP: 545 seglock_dump_all(); 546 return (0); 547 548 #ifdef DEBUG 549 case (WIOC|255): 550 lock_debug = arg; 551 return (0); 552 #endif 553 554 default: 555 return (ENOTTY); /* Why is this not EINVAL */ 556 } 557 } 558 559 int 560 winlocksegmap( 561 dev_t dev, /* major:minor */ 562 off_t off, /* device offset from mmap(2) */ 563 struct as *as, /* user's address space. */ 564 caddr_t *addr, /* address from mmap(2) */ 565 off_t len, /* length from mmap(2) */ 566 uint_t prot, /* user wants this access */ 567 uint_t maxprot, /* this is the maximum the user can have */ 568 uint_t flags, /* flags from mmap(2) */ 569 cred_t *cred) 570 { 571 DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len)); 572 573 /* Only MAP_SHARED mappings are supported */ 574 if ((flags & MAP_TYPE) == MAP_PRIVATE) { 575 return (EINVAL); 576 } 577 578 /* Use devmap_setup to setup the mapping */ 579 return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot, 580 maxprot, flags, cred)); 581 } 582 583 /*ARGSUSED*/ 584 int 585 winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 586 size_t *maplen, uint_t model) 587 { 588 SegLock *lp; 589 int err; 590 591 DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n", 592 off, len, (void *)dhp)); 593 594 *maplen = 0; 595 596 /* Check if the lock exists, i.e., has been created by alloc */ 597 /* off is the sy_ident returned in the alloc ioctl */ 598 if ((lp = seglock_findlock((uint_t)off)) == NULL) { 599 return (ENXIO); 600 } 601 602 /* 603 * The offset bits in mmap(2) offset has to be same as in lockptr 604 * OR the offset should be 0 (i.e. masked off) 605 */ 606 if (((off & PAGEOFFSET) != 0) && 607 ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) { 608 DEBUGF(2, (CE_CONT, 609 "mmap offset %llx mismatch with lockptr %p\n", 610 off, (void *)lp->lockptr)); 611 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 612 return (EINVAL); 613 } 614 615 /* Only supports PAGESIZE length mappings */ 616 if (len != PAGESIZE) { 617 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 618 return (EINVAL); 619 } 620 621 /* 622 * Set up devmap to point at page associated with lock 623 * RFE: At this point we dont know if this is a lockpage or unlockpage 624 * a lockpage would not need DEVMAP_ALLOW_REMAP setting 625 * We could have kept track of the mapping order here, 626 * but devmap framework does not support storing any state in this 627 * devmap callback as it does not callback for error cleanup if some 628 * other error happens in the framework. 629 * RFE: We should modify the winlock mmap interface so that the 630 * user process marks in the offset passed in whether this is for a 631 * lock or unlock mapping instead of guessing based on order of maps 632 * This would cleanup other things (such as in fork) 633 */ 634 if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops, 635 lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 636 DEVMAP_ALLOW_REMAP, 0)) < 0) { 637 mutex_exit(&lp->mutex); /* held by seglock_findlock */ 638 return (err); 639 } 640 /* 641 * No mappings are loaded to those segments yet. The correctness 642 * of the winlock semantics depends on the devmap framework/seg_dev NOT 643 * loading the translations without calling _access callback. 644 */ 645 646 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 647 *maplen = PAGESIZE; 648 return (0); 649 } 650 651 /* 652 * This routine is called by the devmap framework after the devmap entry point 653 * above and the mapping is setup in seg_dev. 654 * We store the pointer to the per-process context in the devmap private data. 655 */ 656 /*ARGSUSED*/ 657 static int 658 winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 659 size_t len, void **pvtp) 660 { 661 SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */ 662 SegProc *sdp; 663 664 ASSERT(len == PAGESIZE); 665 666 /* Find the per-process context for this lock, alloc one if not found */ 667 sdp = seglock_allocclient(lp); 668 669 /* 670 * RFE: Determining which is a lock vs unlock seg is based on order 671 * of mmaps, we should change that to be derivable from off 672 */ 673 if (sdp->lockseg == NULL) { 674 sdp->lockseg = dhp; 675 } else if (sdp->unlockseg == NULL) { 676 sdp->unlockseg = dhp; 677 } else { 678 /* attempting to map lock more than twice */ 679 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 680 return (ENOMEM); 681 } 682 683 *pvtp = sdp; 684 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 685 return (DDI_SUCCESS); 686 } 687 688 /* 689 * duplicate a segment, as in fork() 690 * On fork, the child inherits the mappings to the lock 691 * lp->alloccount is NOT incremented, so child should not do a free(). 692 * Semantics same as if done an alloc(), map(), map(). 693 * This way it would work fine if doing an exec() variant later 694 * Child does not inherit any UFLAGS set in parent 695 * The lock and unlock pages are started off unmapped, i.e., child does not 696 * own the lock. 697 * The code assumes that the child process has a valid pid at this point 698 * RFE: This semantics depends on fork not duplicating the hat mappings 699 * (which is the current implementation). To enforce it would need to 700 * call devmap_unload from here - not clear if that is allowed. 701 */ 702 703 static int 704 winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 705 void **newpvt) 706 { 707 SegProc *sdp = (SegProc *)oldpvt; 708 SegProc *ndp; 709 SegLock *lp = sdp->lp; 710 711 mutex_enter(&lp->mutex); 712 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); 713 714 /* 715 * Note: At this point, the child process does have a pid, but 716 * the arguments passed to as_dup and hence to devmap_dup dont pass it 717 * down. So we cannot use normal seglock_findclient - which finds the 718 * parent sdp itself! 719 * Instead we allocate the child's SegProc by using the child as pointer 720 * RFE: we are using the as stucture which means peeking into the 721 * devmap_cookie. This is not DDI-compliant. Need a compliant way of 722 * getting at either the as or, better, a way to get the child's new pid 723 */ 724 ndp = seglock_alloc_specific(lp, 725 (void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as); 726 ASSERT(ndp != sdp); 727 728 if (sdp->lockseg == dhp) { 729 ASSERT(ndp->lockseg == NULL); 730 ndp->lockseg = new_dhp; 731 } else { 732 ASSERT(sdp->unlockseg == dhp); 733 ASSERT(ndp->unlockseg == NULL); 734 ndp->unlockseg = new_dhp; 735 if (sdp->flag & TRASHPAGE) { 736 ndp->flag |= TRASHPAGE; 737 } 738 } 739 mutex_exit(&lp->mutex); 740 *newpvt = (void *)ndp; 741 return (0); 742 } 743 744 745 /*ARGSUSED*/ 746 static void 747 winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 748 devmap_cookie_t new_dhp1, void **newpvtp1, 749 devmap_cookie_t new_dhp2, void **newpvtp2) 750 { 751 SegProc *sdp = (SegProc *)pvtp; 752 SegLock *lp = sdp->lp; 753 754 /* 755 * We always create PAGESIZE length mappings, so there should never 756 * be a partial unmapping case 757 */ 758 ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL)); 759 760 mutex_enter(&lp->mutex); 761 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); 762 /* make sure this process doesn't own the lock */ 763 if (sdp == lp->owner) { 764 /* 765 * Not handling errors - i.e., errors in unloading mapping 766 * As part of unmapping hat/seg structure get torn down anyway 767 */ 768 (void) lock_giveup(lp, 0); 769 } 770 771 ASSERT(sdp != lp->owner); 772 if (sdp->lockseg == dhp) { 773 sdp->lockseg = NULL; 774 } else { 775 ASSERT(sdp->unlockseg == dhp); 776 sdp->unlockseg = NULL; 777 sdp->flag &= ~TRASHPAGE; /* clear flag if set */ 778 } 779 780 garbage_collect_lock(lp, sdp); 781 } 782 783 /*ARGSUSED*/ 784 static int 785 winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len, 786 uint_t type, uint_t rw) 787 { 788 SegProc *sdp = (SegProc *)pvt; 789 SegLock *lp = sdp->lp; 790 int err; 791 792 /* Driver handles only DEVMAP_ACCESS type of faults */ 793 if (type != DEVMAP_ACCESS) 794 return (-1); 795 796 mutex_enter(&lp->mutex); 797 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg)); 798 799 /* should be using a SegProc that corresponds to current process */ 800 ASSERT(ID(sdp) == CURPROC_ID); 801 802 /* 803 * If process is faulting but does not have both segments mapped 804 * return error (should cause a segv). 805 * RFE: could give it a permanent trashpage 806 */ 807 if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) { 808 err = -1; 809 } else { 810 err = seglock_lockfault(dhp, sdp, lp, rw); 811 } 812 mutex_exit(&lp->mutex); 813 return (err); 814 } 815 816 /* INTERNAL ROUTINES START HERE */ 817 818 819 820 /* 821 * search the lock_list list for the specified cookie 822 * The cookie is the sy_ident field returns by ALLOC ioctl. 823 * This has two parts: 824 * the pageoffset bits contain offset into the lock page. 825 * the pagenumber bits contain the lock id. 826 * The user code is supposed to pass in only the pagenumber portion 827 * (i.e. mask off the pageoffset bits). However the code below 828 * does the mask in case the users are not diligent 829 * if found, returns with mutex for SegLock structure held 830 */ 831 static SegLock * 832 seglock_findlock(uint_t cookie) 833 { 834 SegLock *lp; 835 836 cookie &= (uint_t)PAGEMASK; /* remove pageoffset bits to get cookie */ 837 mutex_enter(&winlock_mutex); 838 for (lp = lock_list; lp != NULL; lp = lp->next) { 839 mutex_enter(&lp->mutex); 840 if (cookie == lp->cookie) { 841 break; /* return with lp->mutex held */ 842 } 843 mutex_exit(&lp->mutex); 844 } 845 mutex_exit(&winlock_mutex); 846 return (lp); 847 } 848 849 /* 850 * search the lock_list list for the specified non-zero key 851 * if found, returns with lock for SegLock structure held 852 */ 853 static SegLock * 854 seglock_findkey(uint_t key) 855 { 856 SegLock *lp; 857 858 ASSERT(MUTEX_HELD(&winlock_mutex)); 859 /* The driver allows multiple locks with key 0, dont search */ 860 if (key == 0) 861 return (NULL); 862 for (lp = lock_list; lp != NULL; lp = lp->next) { 863 mutex_enter(&lp->mutex); 864 if (key == lp->key) 865 break; 866 mutex_exit(&lp->mutex); 867 } 868 return (lp); 869 } 870 871 /* 872 * Create a new lock context. 873 * Returns with SegLock mutex held 874 */ 875 876 static SegLock * 877 seglock_createlock(enum winlock_style style) 878 { 879 SegLock *lp; 880 881 DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n", 882 (void *)lock_free_list, next_lock)); 883 884 ASSERT(MUTEX_HELD(&winlock_mutex)); 885 if (lock_free_list != NULL) { 886 lp = lock_free_list; 887 lock_free_list = lp->next; 888 } else if (next_lock >= MAX_LOCKS) { 889 return (NULL); 890 } else { 891 lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP); 892 lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE; 893 mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL); 894 cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL); 895 ++next_lock; 896 } 897 898 mutex_enter(&lp->mutex); 899 ASSERT((lp->cookie/PAGESIZE) <= next_lock); 900 901 if (style == OLDSTYLE_LOCK) { 902 lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE, 903 DDI_UMEM_SLEEP, &(lp->umem_cookie)); 904 } else { 905 lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1); 906 lp->umem_cookie = lockpage_cookie; 907 } 908 909 ASSERT(lp->lockptr != NULL); 910 lp->style = style; 911 lp->sleepers = 0; 912 lp->alloccount = 1; 913 lp->timeout = LOCKTIME*hz; 914 lp->clients = NULL; 915 lp->owner = NULL; 916 LOCK(lp) = 0; 917 lp->next = lock_list; 918 lock_list = lp; 919 return (lp); 920 } 921 922 /* 923 * Routine to destory a lock structure. 924 * This routine is called while holding the lp->mutex but not the 925 * winlock_mutex. 926 */ 927 928 static void 929 seglock_destroylock(SegLock *lp) 930 { 931 ASSERT(MUTEX_HELD(&lp->mutex)); 932 ASSERT(!MUTEX_HELD(&winlock_mutex)); 933 934 DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n", 935 lp->cookie, lp->key)); 936 937 ASSERT(lp->alloccount == 0); 938 ASSERT(lp->clients == NULL); 939 ASSERT(lp->owner == NULL); 940 ASSERT(lp->sleepers == 0); 941 942 /* clean up/release fields in lp */ 943 if (lp->style == OLDSTYLE_LOCK) { 944 ddi_umem_free(lp->umem_cookie); 945 } 946 lp->umem_cookie = NULL; 947 lp->lockptr = NULL; 948 lp->key = 0; 949 950 /* 951 * Reduce cookie by 1, makes it non page-aligned and invalid 952 * This prevents any valid lookup from finding this lock 953 * so when we drop the lock and regrab it it will still 954 * be there and nobody else would have attached to it 955 */ 956 lp->cookie--; 957 958 /* Drop and reacquire mutexes in right order */ 959 mutex_exit(&lp->mutex); 960 mutex_enter(&winlock_mutex); 961 mutex_enter(&lp->mutex); 962 963 /* reincrement the cookie to get the original valid cookie */ 964 lp->cookie++; 965 ASSERT((lp->cookie & PAGEOFFSET) == 0); 966 ASSERT(lp->alloccount == 0); 967 ASSERT(lp->clients == NULL); 968 ASSERT(lp->owner == NULL); 969 ASSERT(lp->sleepers == 0); 970 971 /* Remove lp from lock_list */ 972 if (lock_list == lp) { 973 lock_list = lp->next; 974 } else { 975 SegLock *tmp = lock_list; 976 while (tmp->next != lp) { 977 tmp = tmp->next; 978 ASSERT(tmp != NULL); 979 } 980 tmp->next = lp->next; 981 } 982 983 /* Add to lock_free_list */ 984 lp->next = lock_free_list; 985 lock_free_list = lp; 986 mutex_exit(&lp->mutex); 987 988 /* Check if all locks deleted and cleanup */ 989 if (lock_list == NULL) { 990 lock_destroyall(); 991 } 992 993 mutex_exit(&winlock_mutex); 994 } 995 996 /* Routine to find a SegProc corresponding to the tag */ 997 998 static SegProc * 999 seglock_find_specific(SegLock *lp, void *tag) 1000 { 1001 SegProc *sdp; 1002 1003 ASSERT(MUTEX_HELD(&lp->mutex)); 1004 ASSERT(tag != NULL); 1005 for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) { 1006 if (ID(sdp) == tag) 1007 break; 1008 } 1009 return (sdp); 1010 } 1011 1012 /* Routine to find (and if needed allocate) a SegProc corresponding to tag */ 1013 1014 static SegProc * 1015 seglock_alloc_specific(SegLock *lp, void *tag) 1016 { 1017 SegProc *sdp; 1018 1019 ASSERT(MUTEX_HELD(&lp->mutex)); 1020 ASSERT(tag != NULL); 1021 1022 /* Search and return if existing one found */ 1023 sdp = seglock_find_specific(lp, tag); 1024 if (sdp != NULL) 1025 return (sdp); 1026 1027 DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n", 1028 tag, lp->cookie)); 1029 1030 /* Allocate a new SegProc */ 1031 sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP); 1032 sdp->next = lp->clients; 1033 lp->clients = sdp; 1034 sdp->lp = lp; 1035 ID(sdp) = tag; 1036 return (sdp); 1037 } 1038 1039 /* 1040 * search a context's client list for the given client and delete 1041 */ 1042 1043 static void 1044 seglock_deleteclient(SegLock *lp, SegProc *sdp) 1045 { 1046 ASSERT(MUTEX_HELD(&lp->mutex)); 1047 ASSERT(lp->owner != sdp); /* Not current owner of lock */ 1048 ASSERT(sdp->lockseg == NULL); /* Mappings torn down */ 1049 ASSERT(sdp->unlockseg == NULL); 1050 1051 DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n", 1052 ddi_get_pid(), lp->cookie)); 1053 if (lp->clients == sdp) { 1054 lp->clients = sdp->next; 1055 } else { 1056 SegProc *tmp = lp->clients; 1057 while (tmp->next != sdp) { 1058 tmp = tmp->next; 1059 ASSERT(tmp != NULL); 1060 } 1061 tmp->next = sdp->next; 1062 } 1063 kmem_free(sdp, sizeof (SegProc)); 1064 } 1065 1066 /* 1067 * Routine to verify if a SegProc and SegLock 1068 * structures are empty/idle. 1069 * Destroys the structures if they are ready 1070 * Can be called with sdp == NULL if want to verify only the lock state 1071 * caller should hold the lp->mutex 1072 * and this routine drops the mutex 1073 */ 1074 static void 1075 garbage_collect_lock(SegLock *lp, SegProc *sdp) 1076 { 1077 ASSERT(MUTEX_HELD(&lp->mutex)); 1078 /* see if both segments unmapped from client structure */ 1079 if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL)) 1080 seglock_deleteclient(lp, sdp); 1081 1082 /* see if this is last client in the entire lock context */ 1083 if ((lp->clients == NULL) && (lp->alloccount == 0)) { 1084 seglock_destroylock(lp); 1085 } else { 1086 mutex_exit(&lp->mutex); 1087 } 1088 } 1089 1090 1091 /* IOCTLS START HERE */ 1092 1093 static int 1094 seglock_grabinfo(intptr_t arg, int mode) 1095 { 1096 int i = 1; 1097 1098 /* multiple clients per lock supported - see comments up top */ 1099 if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0) 1100 return (EFAULT); 1101 return (0); 1102 } 1103 1104 static int 1105 seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */ 1106 { 1107 struct seglock *lp; 1108 uint_t key; 1109 struct winlockalloc wla; 1110 int err; 1111 1112 if (style == OLDSTYLE_LOCK) { 1113 key = 0; 1114 } else { 1115 if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla), 1116 mode)) { 1117 return (EFAULT); 1118 } 1119 key = wla.sy_key; 1120 } 1121 1122 DEBUGF(3, (CE_CONT, 1123 "seglock_graballoc: key=%u, style=%d\n", key, style)); 1124 1125 mutex_enter(&winlock_mutex); 1126 /* Allocate lockpage on first new style alloc */ 1127 if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) { 1128 lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, 1129 &lockpage_cookie); 1130 } 1131 1132 /* Allocate trashpage on first alloc (any style) */ 1133 if (trashpage_cookie == NULL) { 1134 (void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP, 1135 &trashpage_cookie); 1136 } 1137 1138 if ((lp = seglock_findkey(key)) != NULL) { 1139 DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n", 1140 key, lp->cookie)); 1141 ++lp->alloccount; 1142 } else if ((lp = seglock_createlock(style)) != NULL) { 1143 DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n", 1144 key, lp->cookie)); 1145 lp->key = key; 1146 } else { 1147 DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key)); 1148 mutex_exit(&winlock_mutex); 1149 return (ENOMEM); 1150 } 1151 ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex)); 1152 1153 mutex_exit(&winlock_mutex); 1154 1155 if (style == OLDSTYLE_LOCK) { 1156 err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg, 1157 sizeof (lp->cookie), mode); 1158 } else { 1159 wla.sy_ident = lp->cookie + 1160 (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET); 1161 err = ddi_copyout((caddr_t)&wla, (caddr_t)arg, 1162 sizeof (wla), mode); 1163 } 1164 1165 if (err) { 1166 /* On error, should undo allocation */ 1167 lp->alloccount--; 1168 1169 /* Verify and delete if lock is unused now */ 1170 garbage_collect_lock(lp, NULL); 1171 return (EFAULT); 1172 } 1173 1174 mutex_exit(&lp->mutex); 1175 return (0); 1176 } 1177 1178 static int 1179 seglock_grabfree(intptr_t arg, int mode) /* IOCTL */ 1180 { 1181 struct seglock *lp; 1182 uint_t offset; 1183 1184 if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode) 1185 != 0) { 1186 return (EFAULT); 1187 } 1188 DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset)); 1189 1190 if ((lp = seglock_findlock(offset)) == NULL) { 1191 DEBUGF(2, (CE_CONT, "did not find lock\n")); 1192 return (EINVAL); 1193 } 1194 DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n", 1195 lp->key, lp->cookie, lp->alloccount)); 1196 1197 if (lp->alloccount > 0) 1198 lp->alloccount--; 1199 1200 /* Verify and delete if lock is unused now */ 1201 garbage_collect_lock(lp, NULL); 1202 return (0); 1203 } 1204 1205 1206 /* 1207 * Sets timeout in lock and UFLAGS in client 1208 * the UFLAGS are stored in the client structure and persistent only 1209 * till the unmap of the lock pages. If the process sets UFLAGS 1210 * does a map of the lock/unlock pages and unmaps them, the client 1211 * structure will get deleted and the UFLAGS will be lost. The process 1212 * will need to resetup the flags. 1213 */ 1214 static int 1215 seglock_settimeout(intptr_t arg, int mode) /* IOCTL */ 1216 { 1217 SegLock *lp; 1218 SegProc *sdp; 1219 struct winlocktimeout wlt; 1220 1221 if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) { 1222 return (EFAULT); 1223 } 1224 1225 if ((lp = seglock_findlock(wlt.sy_ident)) == NULL) 1226 return (EINVAL); 1227 1228 lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout); 1229 /* if timeout modified, wake up any sleepers */ 1230 if (lp->sleepers > 0) { 1231 cv_broadcast(&lp->locksleep); 1232 } 1233 1234 /* 1235 * If the process is trying to set UFLAGS, 1236 * Find the client segproc and allocate one if needed 1237 * Set the flags preserving the kernel flags 1238 * If the process is clearing UFLAGS 1239 * Find the client segproc but dont allocate one if does not exist 1240 */ 1241 if (wlt.sy_flags & UFLAGS) { 1242 sdp = seglock_allocclient(lp); 1243 sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS; 1244 } else if ((sdp = seglock_findclient(lp)) != NULL) { 1245 sdp->flag = sdp->flag & KFLAGS; 1246 /* If clearing UFLAGS leaves the segment or lock idle, delete */ 1247 garbage_collect_lock(lp, sdp); 1248 return (0); 1249 } 1250 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 1251 return (0); 1252 } 1253 1254 static int 1255 seglock_gettimeout(intptr_t arg, int mode) 1256 { 1257 SegLock *lp; 1258 SegProc *sdp; 1259 struct winlocktimeout wlt; 1260 1261 if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) 1262 return (EFAULT); 1263 1264 if ((lp = seglock_findlock(wlt.sy_ident)) == NULL) 1265 return (EINVAL); 1266 1267 wlt.sy_timeout = TICK_TO_MSEC(lp->timeout); 1268 /* 1269 * If this process has an active allocated lock return those flags 1270 * Dont allocate a client structure on gettimeout 1271 * If not, return 0. 1272 */ 1273 if ((sdp = seglock_findclient(lp)) != NULL) { 1274 wlt.sy_flags = sdp->flag & UFLAGS; 1275 } else { 1276 wlt.sy_flags = 0; 1277 } 1278 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */ 1279 1280 if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0) 1281 return (EFAULT); 1282 1283 return (0); 1284 } 1285 1286 /* 1287 * Handle lock segment faults here... 1288 * 1289 * This is where the magic happens. 1290 */ 1291 1292 /* ARGSUSED */ 1293 static int 1294 seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw) 1295 { 1296 SegProc *owner = lp->owner; 1297 int err; 1298 1299 ASSERT(MUTEX_HELD(&lp->mutex)); 1300 DEBUGF(3, (CE_CONT, 1301 "seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n", 1302 (void *)dhp, (void *)sdp, (void *)lp, (void *)owner)); 1303 1304 /* lockfault is always called with sdp in current process context */ 1305 ASSERT(ID(sdp) == CURPROC_ID); 1306 1307 /* If Lock has no current owner, give the mapping to new owner */ 1308 if (owner == NULL) { 1309 DEBUGF(4, (CE_CONT, " lock has no current owner\n")); 1310 return (give_mapping(lp, sdp, rw)); 1311 } 1312 1313 if (owner == sdp) { 1314 /* 1315 * Current owner is faulting on owned lock segment OR 1316 * Current owner is faulting on unlock page and has no waiters 1317 * Then can give the mapping to current owner 1318 */ 1319 if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) { 1320 DEBUGF(4, (CE_CONT, "lock owner faulting\n")); 1321 return (give_mapping(lp, sdp, rw)); 1322 } else { 1323 /* 1324 * Owner must be writing to unlock page and there are waiters. 1325 * other cases have been checked earlier. 1326 * Release the lock, owner, and owners mappings 1327 * As the owner is trying to write to the unlock page, leave 1328 * it with a trashpage mapping and wake up the sleepers 1329 */ 1330 ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0)); 1331 DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n")); 1332 return (lock_giveup(lp, 1)); 1333 } 1334 } 1335 1336 ASSERT(owner != sdp); 1337 1338 /* 1339 * If old owner faulting on trash unlock mapping, 1340 * load hat mappings to trash page 1341 * RFE: non-owners should NOT be faulting on unlock mapping as they 1342 * as first supposed to fault on the lock seg. We could give them 1343 * a trash page or return error. 1344 */ 1345 if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) { 1346 DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n")); 1347 return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE, 1348 DEVMAP_ACCESS, rw)); 1349 } 1350 1351 /* 1352 * Non-owner faulting. Need to check current LOCK state. 1353 * 1354 * Before reading lock value in LOCK(lp), we must make sure that 1355 * the owner cannot change its value before we change mappings 1356 * or else we could end up either with a hung process 1357 * or more than one process thinking they have the lock. 1358 * We do that by unloading the owner's mappings 1359 */ 1360 DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n")); 1361 err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE); 1362 err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE); 1363 if (err != 0) 1364 return (err); /* unable to remove owner mapping */ 1365 1366 /* 1367 * If lock is not held, then current owner mappings were 1368 * unloaded above and we can give the lock to the new owner 1369 */ 1370 if (LOCK(lp) == 0) { 1371 DEBUGF(4, (CE_CONT, 1372 "Free lock (%p): Giving mapping to new owner %d\n", 1373 (void *)lp, ddi_get_pid())); 1374 return (give_mapping(lp, sdp, rw)); 1375 } 1376 1377 DEBUGF(4, (CE_CONT, " lock held, sleeping\n")); 1378 1379 /* 1380 * A non-owning process tried to write (presumably to the lockpage, 1381 * but it doesn't matter) but the lock is held; we need to sleep for 1382 * the lock while there is an owner. 1383 */ 1384 1385 lp->sleepers++; 1386 while ((owner = lp->owner) != NULL) { 1387 int rval; 1388 1389 if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) { 1390 /* 1391 * No timeout has been specified for this lock; 1392 * we'll simply sleep on the condition variable. 1393 */ 1394 rval = cv_wait_sig(&lp->locksleep, &lp->mutex); 1395 } else { 1396 /* 1397 * A timeout _has_ been specified for this lock. We need 1398 * to wake up and possibly steal this lock if the owner 1399 * does not let it go. Note that all sleepers on a lock 1400 * with a timeout wait; the sleeper with the earliest 1401 * timeout will wakeup, and potentially steal the lock 1402 * Stealing the lock will cause a broadcast on the 1403 * locksleep cv and thus kick the other timed waiters 1404 * and cause everyone to restart in a new timedwait 1405 */ 1406 rval = cv_timedwait_sig(&lp->locksleep, 1407 &lp->mutex, ddi_get_lbolt() + lp->timeout); 1408 } 1409 1410 /* 1411 * Timeout and still old owner - steal lock 1412 * Force-Release lock and give old owner a trashpage mapping 1413 */ 1414 if ((rval == -1) && (lp->owner == owner)) { 1415 /* 1416 * if any errors in lock_giveup, go back and sleep/retry 1417 * If successful, will break out of loop 1418 */ 1419 cmn_err(CE_NOTE, "Process %d timed out on lock %d\n", 1420 ddi_get_pid(), lp->cookie); 1421 (void) lock_giveup(lp, 1); 1422 } else if (rval == 0) { /* signal pending */ 1423 cmn_err(CE_NOTE, 1424 "Process %d signalled while waiting on lock %d\n", 1425 ddi_get_pid(), lp->cookie); 1426 lp->sleepers--; 1427 return (FC_MAKE_ERR(EINTR)); 1428 } 1429 } 1430 1431 lp->sleepers--; 1432 /* 1433 * Give mapping to this process and save a fault later 1434 */ 1435 return (give_mapping(lp, sdp, rw)); 1436 } 1437 1438 /* 1439 * Utility: give a valid mapping to lock and unlock pages to current process. 1440 * Caller responsible for unloading old owner's mappings 1441 */ 1442 1443 static int 1444 give_mapping(SegLock *lp, SegProc *sdp, uint_t rw) 1445 { 1446 int err = 0; 1447 1448 ASSERT(MUTEX_HELD(&lp->mutex)); 1449 ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0))); 1450 /* give_mapping is always called with sdp in current process context */ 1451 ASSERT(ID(sdp) == CURPROC_ID); 1452 1453 /* remap any old trash mappings */ 1454 if (sdp->flag & TRASHPAGE) { 1455 /* current owner should not have a trash mapping */ 1456 ASSERT(sdp != lp->owner); 1457 1458 DEBUGF(4, (CE_CONT, 1459 "new owner %d remapping old trash mapping\n", 1460 ddi_get_pid())); 1461 if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip, 1462 lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) { 1463 /* 1464 * unable to remap old trash page, 1465 * abort before changing owner 1466 */ 1467 DEBUGF(4, (CE_CONT, 1468 "aborting: error in umem_remap %d\n", err)); 1469 return (err); 1470 } 1471 sdp->flag &= ~TRASHPAGE; 1472 } 1473 1474 /* we have a new owner now */ 1475 lp->owner = sdp; 1476 1477 if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE, 1478 DEVMAP_ACCESS, rw)) != 0) { 1479 return (err); 1480 } 1481 DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid())); 1482 1483 if (lp->sleepers) { 1484 /* Force unload unlock mapping if there are waiters */ 1485 DEBUGF(4, (CE_CONT, 1486 " lock has %d sleepers => remove unlock mapping\n", 1487 lp->sleepers)); 1488 err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE); 1489 } else { 1490 /* 1491 * while here, give new owner a valid mapping to unlock 1492 * page so we don't get called again. 1493 */ 1494 DEBUGF(4, (CE_CONT, " and unlock mapping\n")); 1495 err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE, 1496 DEVMAP_ACCESS, PROT_WRITE); 1497 } 1498 return (err); 1499 } 1500 1501 /* 1502 * Unload owner's mappings, release the lock and wakeup any sleepers 1503 * If trash, then the old owner is given a trash mapping 1504 * => old owner held lock too long and caused a timeout 1505 */ 1506 static int 1507 lock_giveup(SegLock *lp, int trash) 1508 { 1509 SegProc *owner = lp->owner; 1510 1511 DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n", 1512 (void *)lp, (void *)ID(lp->owner), trash)); 1513 1514 ASSERT(MUTEX_HELD(&lp->mutex)); 1515 ASSERT(owner != NULL); 1516 1517 /* 1518 * owner loses lockpage/unlockpage mappings and gains a 1519 * trashpage mapping, if needed. 1520 */ 1521 if (!trash) { 1522 /* 1523 * We do not handle errors in devmap_unload in the !trash case, 1524 * as the process is attempting to unmap/exit or otherwise 1525 * release the lock. Errors in unloading the mapping are not 1526 * going to affect that (unmap does not take error return). 1527 */ 1528 (void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE); 1529 (void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE); 1530 } else { 1531 int err; 1532 1533 if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) { 1534 /* error unloading lockseg mapping. abort giveup */ 1535 return (err); 1536 } 1537 1538 /* 1539 * old owner gets mapping to trash page so it can continue 1540 * devmap_umem_remap does a hat_unload (and does it holding 1541 * the right locks), so no need to devmap_unload on unlockseg 1542 */ 1543 if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip, 1544 trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) { 1545 /* error remapping to trash page, abort giveup */ 1546 return (err); 1547 } 1548 owner->flag |= TRASHPAGE; 1549 /* 1550 * Preload mapping to trash page by calling devmap_load 1551 * However, devmap_load can only be called on the faulting 1552 * process context and not on the owner's process context 1553 * we preload only if we happen to be in owner process context 1554 * Other processes will fault on the unlock mapping 1555 * and be given a trash mapping at that time. 1556 */ 1557 if (ID(owner) == CURPROC_ID) { 1558 (void) devmap_load(owner->unlockseg, lp->cookie, PAGESIZE, 1559 DEVMAP_ACCESS, PROT_WRITE); 1560 } 1561 } 1562 1563 lp->owner = NULL; 1564 1565 /* Clear the lock value in underlying page so new owner can grab it */ 1566 LOCK(lp) = 0; 1567 1568 if (lp->sleepers) { 1569 DEBUGF(4, (CE_CONT, " waking up, lp=%p\n", (void *)lp)); 1570 cv_broadcast(&lp->locksleep); 1571 } 1572 return (0); 1573 } 1574 1575 /* 1576 * destroy all allocated memory. 1577 */ 1578 1579 static void 1580 lock_destroyall(void) 1581 { 1582 SegLock *lp, *lpnext; 1583 1584 ASSERT(MUTEX_HELD(&winlock_mutex)); 1585 ASSERT(lock_list == NULL); 1586 1587 DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n")); 1588 for (lp = lock_free_list; lp != NULL; lp = lpnext) { 1589 mutex_enter(&lp->mutex); 1590 lpnext = lp->next; 1591 ASSERT(lp->clients == NULL); 1592 ASSERT(lp->owner == NULL); 1593 ASSERT(lp->alloccount == 0); 1594 mutex_destroy(&lp->mutex); 1595 cv_destroy(&lp->locksleep); 1596 kmem_free(lp, sizeof (SegLock)); 1597 } 1598 lock_free_list = NULL; 1599 next_lock = 0; 1600 } 1601 1602 1603 /* RFE: create mdb walkers instead of dump routines? */ 1604 static void 1605 seglock_dump_all(void) 1606 { 1607 SegLock *lp; 1608 1609 mutex_enter(&winlock_mutex); 1610 cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n"); 1611 1612 cmn_err(CE_CONT, "Lock List:\n"); 1613 for (lp = lock_list; lp != NULL; lp = lp->next) { 1614 mutex_enter(&lp->mutex); 1615 cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n", 1616 lp->cookie, lp->key, lp->alloccount, 1617 lp->clients ? 'Y' : 'N', 1618 lp->owner ? 'Y' : 'N', 1619 lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N', 1620 lp->sleepers); 1621 mutex_exit(&lp->mutex); 1622 } 1623 cmn_err(CE_CONT, "Free Lock List:\n"); 1624 for (lp = lock_free_list; lp != NULL; lp = lp->next) { 1625 mutex_enter(&lp->mutex); 1626 cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n", 1627 lp->cookie, lp->key, lp->alloccount, 1628 lp->clients ? 'Y' : 'N', 1629 lp->owner ? 'Y' : 'N', 1630 lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N', 1631 lp->sleepers); 1632 mutex_exit(&lp->mutex); 1633 } 1634 1635 #ifdef DEBUG 1636 if (lock_debug < 3) { 1637 mutex_exit(&winlock_mutex); 1638 return; 1639 } 1640 1641 for (lp = lock_list; lp != NULL; lp = lp->next) { 1642 SegProc *sdp; 1643 1644 mutex_enter(&lp->mutex); 1645 cmn_err(CE_CONT, 1646 "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n", 1647 (void *)lp, lp->key, lp->cookie, lp->alloccount, 1648 lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers); 1649 1650 cmn_err(CE_CONT, 1651 "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n", 1652 lp->style, (void *)lp->lockptr, lp->timeout, 1653 (void *)lp->clients, (void *)lp->owner); 1654 1655 1656 for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) { 1657 cmn_err(CE_CONT, " client %p%s, lp=%p, flag=%x, " 1658 "process tag=%p, lockseg=%p, unlockseg=%p\n", 1659 (void *)sdp, sdp == lp->owner ? " (owner)" : "", 1660 (void *)sdp->lp, sdp->flag, (void *)ID(sdp), 1661 (void *)sdp->lockseg, (void *)sdp->unlockseg); 1662 } 1663 mutex_exit(&lp->mutex); 1664 } 1665 #endif 1666 mutex_exit(&winlock_mutex); 1667 } 1668 1669 #include <sys/modctl.h> 1670 1671 static struct modldrv modldrv = { 1672 &mod_driverops, /* Type of module. This one is a driver */ 1673 "Winlock Driver v%I%", /* Name of the module */ 1674 &winlock_ops, /* driver ops */ 1675 }; 1676 1677 static struct modlinkage modlinkage = { 1678 MODREV_1, 1679 (void *)&modldrv, 1680 0, 1681 0, 1682 0 1683 }; 1684 1685 int 1686 _init(void) 1687 { 1688 int e; 1689 1690 mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL); 1691 e = mod_install(&modlinkage); 1692 if (e) { 1693 mutex_destroy(&winlock_mutex); 1694 } 1695 return (e); 1696 } 1697 1698 1699 int 1700 _info(struct modinfo *modinfop) 1701 { 1702 return (mod_info(&modlinkage, modinfop)); 1703 } 1704 1705 int 1706 _fini(void) 1707 { 1708 int e; 1709 1710 e = mod_remove(&modlinkage); 1711 if (e == 0) { 1712 mutex_destroy(&winlock_mutex); 1713 } 1714 return (e); 1715 } 1716