1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 /*
28 * This is the lock device driver.
29 *
30 * The lock driver provides a variation of inter-process mutexes with the
31 * following twist in semantics:
32 * A waiter for a lock after a set timeout can "break" the lock and
33 * grab it from the current owner (without informing the owner).
34 *
35 * These semantics result in temporarily multiple processes thinking they
36 * own the lock. This usually does not make sense for cases where locks are
37 * used to protect a critical region and it is important to serialize access
38 * to data structures. As breaking the lock will also lose the serialization
39 * and result in corrupt data structures.
40 *
41 * The usage for winlock driver is primarily driven by the graphics system
42 * when doing DGA (direct graphics access) graphics. The locks are used to
43 * protect access to the frame buffer (presumably reflects back to the screen)
44 * between competing processes that directly write to the screen as opposed
45 * to going through the window server etc.
46 * In this case, the result of breaking the lock at worst causes the screen
47 * image to be distorted and is easily fixed by doing a "refresh"
48 *
49 * In well-behaved applications, the lock is held for a very short time and
50 * the breaking semantics do not come into play. Not having this feature and
51 * using normal inter-process mutexes will result in a misbehaved application
52 * from grabbing the screen writing capability from the window manager and
53 * effectively make the system look like it is hung (mouse pointer does not
54 * move).
55 *
56 * A secondary aspect of the winlock driver is that it allows for extremely
57 * fast lock acquire/release in cases where there is low contention. A memory
58 * write is all that is needed (not even a function call). And the window
59 * manager is the only DGA writer usually and this optimized for. Occasionally
60 * some processes might do DGA graphics and cause kernel faults to handle
61 * the contention/locking (and that has got to be slow!).
62 *
63 * The following IOCTLs are supported:
64 *
65 * GRABPAGEALLOC:
66 * Compatibility with old cgsix device driver lockpage ioctls.
67 * Lockpages created this way must be an entire page for compatibility with
68 * older software. This ioctl allocates a lock context with its own
69 * private lock page. The unique "ident" that identifies this lock is
70 * returned.
71 *
72 * GRABPAGEFREE:
73 * Compatibility with cgsix device driver lockpage ioctls. This
74 * ioctl releases the lock context allocated by GRABPAGEALLOC.
75 *
76 * GRABLOCKINFO:
77 * Returns a one-word flag. '1' means that multiple clients may
78 * access this lock page. Older device drivers returned '0',
79 * meaning that only two clients could access a lock page.
80 *
81 * GRABATTACH:
82 * Not supported. This ioctl would have grabbed all lock pages
83 * on behalf of the calling program.
84 *
85 * WINLOCKALLOC:
86 * Allocate a lock context. This ioctl accepts a key value. as
87 * its argument. If the key is zero, a new lock context is
88 * created, and its "ident" is returned. If the key is nonzero,
89 * all existing contexts are checked to see if they match they
90 * key. If a match is found, its reference count is incremented
91 * and its ident is returned, otherwise a new context is created
92 * and its ident is returned.
93 *
94 * WINLOCKFREE:
95 * Free a lock context. This ioctl accepts the ident of a lock
96 * context and decrements its reference count. Once the reference
97 * count reaches zero *and* all mappings are released, the lock
98 * context is freed. When all the lock context in the lock page are
99 * freed, the lock page is freed as well.
100 *
101 * WINLOCKSETTIMEOUT:
102 * Set lock timeout for a context. This ioctl accepts the ident
103 * of a lock context and a timeout value in milliseconds.
104 * Whenever lock contention occurs, the timer is started and the lock is
105 * broken after the timeout expires. If timeout value is zero, lock does
106 * not timeout. This value will be rounded to the nearest clock
107 * tick, so don't try to use it for real-time control or something.
108 *
109 * WINLOCKGETTIMEOUT:
110 * Get lock timeout from a context.
111 *
112 * WINLOCKDUMP:
113 * Dump state of this device.
114 *
115 *
116 * How /dev/winlock works:
117 *
118 * Every lock context consists of two mappings for the client to the lock
119 * page. These mappings are known as the "lock page" and "unlock page"
120 * to the client. The first mmap to the lock context (identified by the
121 * sy_ident field returns during alloc) allocates mapping to the lock page,
122 * the second mmap allocates a mapping to the unlock page.
123 * The mappings dont have to be ordered in virtual address space, but do
124 * need to be ordered in time. Mapping and unmapping of these lock and unlock
125 * pages should happen in pairs. Doing them one at a time or unmapping one
126 * and leaving one mapped etc cause undefined behaviors.
127 * The mappings are always of length PAGESIZE, and type MAP_SHARED.
128 *
129 * The first ioctl is to ALLOC a lock, either based on a key (if trying to
130 * grab a preexisting lock) or 0 (gets a default new one)
131 * This ioctl returns a value in sy_ident which is needed to do the
132 * later mmaps and FREE/other ioctls.
133 *
134 * The "page number" portion of the sy_ident needs to be passed as the
135 * file offset when doing an mmap for both the lock page and unlock page
136 *
137 * The value returned by mmap ( a user virtual address) needs to be
138 * incremented by the "page offset" portion of sy_ident to obtain the
139 * pointer to the actual lock. (Skipping this step, does not cause any
140 * visible error, but the process will be using the wrong lock!)
141 *
142 * On a fork(), the child process will inherit the mappings for free, but
143 * will not inherit the parent's lock ownership if any. The child should NOT
144 * do an explicit FREE on the lock context unless it did an explicit ALLOC.
145 * Only one process at a time is allowed to have a valid hat
146 * mapping to a lock page. This is enforced by this driver.
147 * A client acquires a lock by writing a '1' to the lock page.
148 * Note, that it is not necessary to read and veryify that the lock is '0'
149 * prior to writing a '1' in it.
150 * If it does not already have a valid mapping to that page, the driver
151 * takes a fault (devmap_access), loads the client mapping
152 * and allows the client to continue. The client releases the lock by
153 * writing a '0' to the unlock page. Again, if it does not have a valid
154 * mapping to the unlock page, the segment driver takes a fault,
155 * loads the mapping, and lets the client continue. From this point
156 * forward, the client can make as many locks and unlocks as it
157 * wants, without any more faults into the kernel.
158 *
159 * If a different process wants to acquire a lock, it takes a page fault
160 * when it writes the '1' to the lock page. If the segment driver sees
161 * that the lock page contained a zero, then it invalidates the owner's
162 * mappings and gives the mappings to this process.
163 *
164 * If there is already a '1' in the lock page when the second client
165 * tries to access the lock page, then a lock exists. The segment
166 * driver sleeps the second client and, if applicable, starts the
167 * timeout on the lock. The owner's mapping to the unlock page
168 * is invalidated so that the driver will be woken again when the owner
169 * releases the lock.
170 *
171 * When the locking client finally writes a '0' to the unlock page, the
172 * segment driver takes another fault. The client is given a valid
173 * mapping, not to the unlock page, but to the "trash page", and allowed
174 * to continue. Meanwhile, the sleeping client is given a valid mapping
175 * to the lock/unlock pages and allowed to continue as well.
176 *
177 * RFE: There is a leak if process exits before freeing allocated locks
178 * But currently not tracking which locks were allocated by which
179 * process and we do not have a clean entry point into the driver
180 * to do garbage collection. If the interface used a file descriptor for each
181 * lock it allocs, then the driver can free up stuff in the _close routine
182 */
183
184 #include <sys/types.h> /* various type defn's */
185 #include <sys/debug.h>
186 #include <sys/param.h> /* various kernel limits */
187 #include <sys/time.h>
188 #include <sys/errno.h>
189 #include <sys/kmem.h> /* defines kmem_alloc() */
190 #include <sys/conf.h> /* defines cdevsw */
191 #include <sys/file.h> /* various file modes, etc. */
192 #include <sys/uio.h> /* UIO stuff */
193 #include <sys/ioctl.h>
194 #include <sys/cred.h> /* defines cred struct */
195 #include <sys/mman.h> /* defines mmap(2) parameters */
196 #include <sys/stat.h> /* defines S_IFCHR */
197 #include <sys/cmn_err.h> /* use cmn_err */
198 #include <sys/ddi.h> /* ddi stuff */
199 #include <sys/sunddi.h> /* ddi stuff */
200 #include <sys/ddi_impldefs.h> /* ddi stuff */
201 #include <sys/winlockio.h> /* defines ioctls, flags, data structs */
202
203 static int winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
204 static int winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t,
205 size_t *, uint_t);
206 static int winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t,
207 uint_t, uint_t, uint_t, cred_t *);
208
209 static struct cb_ops winlock_cb_ops = {
210 nulldev, /* open */
211 nulldev, /* close */
212 nodev, /* strategy */
213 nodev, /* print */
214 nodev, /* dump */
215 nodev, /* read */
216 nodev, /* write */
217 winlock_ioctl, /* ioctl */
218 winlock_devmap, /* devmap */
219 nodev, /* mmap */
220 winlocksegmap, /* segmap */
221 nochpoll, /* poll */
222 ddi_prop_op, /* prop_op */
223 NULL, /* streamtab */
224 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
225 0, /* rev */
226 nodev, /* aread */
227 nodev /* awrite */
228 };
229
230 static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
231 static int winlock_attach(dev_info_t *, ddi_attach_cmd_t);
232 static int winlock_detach(dev_info_t *, ddi_detach_cmd_t);
233
234 static struct dev_ops winlock_ops = {
235 DEVO_REV,
236 0, /* refcount */
237 winlock_info, /* info */
238 nulldev, /* identify */
239 nulldev, /* probe */
240 winlock_attach, /* attach */
241 winlock_detach, /* detach */
242 nodev, /* reset */
243 &winlock_cb_ops, /* driver ops */
244 NULL, /* bus ops */
245 NULL, /* power */
246 ddi_quiesce_not_needed, /* quiesce */
247 };
248
249 static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t,
250 void **);
251 static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t,
252 devmap_cookie_t, void **, devmap_cookie_t, void **);
253 static int winlockmap_dup(devmap_cookie_t, void *,
254 devmap_cookie_t, void **);
255 static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t,
256 uint_t, uint_t);
257
258 static
259 struct devmap_callback_ctl winlockmap_ops = {
260 DEVMAP_OPS_REV,
261 winlockmap_map,
262 winlockmap_access,
263 winlockmap_dup,
264 winlockmap_unmap,
265 };
266
267 #if DEBUG
268 static int lock_debug = 0;
269 #define DEBUGF(level, args) { if (lock_debug >= (level)) cmn_err args; }
270 #else
271 #define DEBUGF(level, args)
272 #endif
273
274 /* Driver supports two styles of locks */
275 enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK };
276
277 /*
278 * These structures describe a lock context. We permit multiple
279 * clients (not just two) to access a lock page
280 *
281 * The "cookie" identifies the lock context. It is the page number portion
282 * sy_ident returned on lock allocation. Cookie is used in later ioctls.
283 * "cookie" is lockid * PAGESIZE
284 * "lockptr" is the kernel virtual address to the lock itself
285 * The page offset portion of lockptr is the page offset portion of sy_ident
286 */
287
288 /*
289 * per-process information about locks. This is the private field of
290 * a devmap mapping. Note that usually *two* mappings point to this.
291 */
292
293 /*
294 * Each process using winlock is associated with a segproc structure
295 * In various driver entry points, we need to search to find the right
296 * segproc structure (If we were using file handles for each lock this
297 * would not have been necessary).
298 * It would have been simple to use the process pid (and ddi_get_pid)
299 * However, during fork devmap_dup is called in the parent process context
300 * and using the pid complicates the code by introducing orphans.
301 * Instead we use the as pointer for the process as a cookie
302 * which requires delving into various non-DDI kosher structs
303 */
304 typedef struct segproc {
305 struct segproc *next; /* next client of this lock */
306 struct seglock *lp; /* associated lock context */
307 devmap_cookie_t lockseg; /* lock mapping, if any */
308 devmap_cookie_t unlockseg; /* unlock mapping, if any */
309 void *tag; /* process as pointer as tag */
310 uint_t flag; /* see "flag bits" in winlockio.h */
311 } SegProc;
312
313 #define ID(sdp) ((sdp)->tag)
314 #define CURPROC_ID (void *)(curproc->p_as)
315
316 /* per lock context information */
317
318 typedef struct seglock {
319 struct seglock *next; /* next lock */
320 uint_t sleepers; /* nthreads sleeping on this lock */
321 uint_t alloccount; /* how many times created? */
322 uint_t cookie; /* mmap() offset (page #) into device */
323 uint_t key; /* key, if any */
324 enum winlock_style style; /* style of lock - OLDSTYLE, NEWSTYLE */
325 clock_t timeout; /* sleep time in ticks */
326 ddi_umem_cookie_t umem_cookie; /* cookie for umem allocated memory */
327 int *lockptr; /* kernel virtual addr of lock */
328 struct segproc *clients; /* list of clients of this lock */
329 struct segproc *owner; /* current owner of lock */
330 kmutex_t mutex; /* mutex for lock */
331 kcondvar_t locksleep; /* for sleeping on lock */
332 } SegLock;
333
334 #define LOCK(lp) (*((lp)->lockptr))
335
336 /*
337 * Number of locks that can fit in a page. Driver can support only that many.
338 * For oldsytle locks, it is relatively easy to increase the limit as each
339 * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation
340 * For newstyle locks, this is trickier as the code needs to allow for mapping
341 * into the second or third page of the cookie for some locks.
342 */
343 #define MAX_LOCKS (PAGESIZE/sizeof (int))
344
345 #define LOCKTIME 3 /* Default lock timeout in seconds */
346
347
348 /* Protections setting for winlock user mappings */
349 #define WINLOCK_PROT (PROT_READ|PROT_WRITE|PROT_USER)
350
351 /*
352 * The trash page is where unwanted writes go
353 * when a process is releasing a lock.
354 */
355 static ddi_umem_cookie_t trashpage_cookie = NULL;
356
357 /* For newstyle allocations a common page of locks is used */
358 static caddr_t lockpage = NULL;
359 static ddi_umem_cookie_t lockpage_cookie = NULL;
360
361 static dev_info_t *winlock_dip = NULL;
362 static kmutex_t winlock_mutex;
363
364 /*
365 * winlock_mutex protects
366 * lock_list
367 * lock_free_list
368 * "next" field in SegLock
369 * next_lock
370 * trashpage_cookie
371 * lockpage & lockpage_cookie
372 *
373 * SegLock_mutex protects
374 * rest of fields in SegLock
375 * All fields in list of SegProc (lp->clients)
376 *
377 * Lock ordering is winlock_mutex->SegLock_mutex
378 * During devmap/seg operations SegLock_mutex acquired without winlock_mutex
379 *
380 * During devmap callbacks, the pointer to SegProc is stored as the private
381 * data in the devmap handle. This pointer will not go stale (i.e., the
382 * SegProc getting deleted) as the SegProc is not deleted until both the
383 * lockseg and unlockseg have been unmapped and the pointers stored in
384 * the devmap handles have been NULL'ed.
385 * But before this pointer is used to access any fields (other than the 'lp')
386 * lp->mutex must be held.
387 */
388
389 /*
390 * The allocation code tries to allocate from lock_free_list
391 * first, otherwise it uses kmem_zalloc. When lock list is idle, all
392 * locks in lock_free_list are kmem_freed
393 */
394 static SegLock *lock_list = NULL; /* in-use locks */
395 static SegLock *lock_free_list = NULL; /* free locks */
396 static int next_lock = 0; /* next lock cookie */
397
398 /* Routines to find a lock in lock_list based on offset or key */
399 static SegLock *seglock_findlock(uint_t);
400 static SegLock *seglock_findkey(uint_t);
401
402 /* Routines to find and allocate SegProc structures */
403 static SegProc *seglock_find_specific(SegLock *, void *);
404 static SegProc *seglock_alloc_specific(SegLock *, void *);
405 #define seglock_findclient(lp) seglock_find_specific((lp), CURPROC_ID)
406 #define seglock_allocclient(lp) seglock_alloc_specific((lp), CURPROC_ID)
407
408 /* Delete client from lock's client list */
409 static void seglock_deleteclient(SegLock *, SegProc *);
410 static void garbage_collect_lock(SegLock *, SegProc *);
411
412 /* Create a new lock */
413 static SegLock *seglock_createlock(enum winlock_style);
414 /* Destroy lock */
415 static void seglock_destroylock(SegLock *);
416 static void lock_destroyall(void);
417
418 /* Helper functions in winlockmap_access */
419 static int give_mapping(SegLock *, SegProc *, uint_t);
420 static int lock_giveup(SegLock *, int);
421 static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t);
422
423 /* routines called from ioctl */
424 static int seglock_graballoc(intptr_t, enum winlock_style, int);
425 static int seglock_grabinfo(intptr_t, int);
426 static int seglock_grabfree(intptr_t, int);
427 static int seglock_gettimeout(intptr_t, int);
428 static int seglock_settimeout(intptr_t, int);
429 static void seglock_dump_all(void);
430
431 static int
winlock_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)432 winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
433 {
434 DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n",
435 (void *)devi, (int)cmd));
436 if (cmd != DDI_ATTACH)
437 return (DDI_FAILURE);
438 if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0)
439 == DDI_FAILURE) {
440 return (DDI_FAILURE);
441 }
442 winlock_dip = devi;
443 ddi_report_dev(devi);
444 return (DDI_SUCCESS);
445 }
446
447 /*ARGSUSED*/
448 static int
winlock_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)449 winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
450 {
451 DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n",
452 (void *)devi, (int)cmd));
453 if (cmd != DDI_DETACH)
454 return (DDI_FAILURE);
455
456 mutex_enter(&winlock_mutex);
457 if (lock_list != NULL) {
458 mutex_exit(&winlock_mutex);
459 return (DDI_FAILURE);
460 }
461 ASSERT(lock_free_list == NULL);
462
463 DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n"));
464 /* destroy any common stuff created */
465 if (trashpage_cookie != NULL) {
466 ddi_umem_free(trashpage_cookie);
467 trashpage_cookie = NULL;
468 }
469 if (lockpage != NULL) {
470 ddi_umem_free(lockpage_cookie);
471 lockpage = NULL;
472 lockpage_cookie = NULL;
473 }
474 winlock_dip = NULL;
475 mutex_exit(&winlock_mutex);
476 return (DDI_SUCCESS);
477 }
478
479 /*ARGSUSED*/
480 static int
winlock_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)481 winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
482 {
483 register int error;
484
485 /* initialize result */
486 *result = NULL;
487
488 /* only valid instance (i.e., getminor) is 0 */
489 if (getminor((dev_t)arg) >= 1)
490 return (DDI_FAILURE);
491
492 switch (infocmd) {
493 case DDI_INFO_DEVT2DEVINFO:
494 if (winlock_dip == NULL)
495 error = DDI_FAILURE;
496 else {
497 *result = (void *)winlock_dip;
498 error = DDI_SUCCESS;
499 }
500 break;
501 case DDI_INFO_DEVT2INSTANCE:
502 *result = (void *)0;
503 error = DDI_SUCCESS;
504 break;
505 default:
506 error = DDI_FAILURE;
507 }
508 return (error);
509 }
510
511
512 /*ARGSUSED*/
513 int
winlock_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cred,int * rval)514 winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
515 cred_t *cred, int *rval)
516 {
517 DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n",
518 cmd, (void *)arg));
519
520 switch (cmd) {
521 /*
522 * ioctls that used to be handled by framebuffers (defined in fbio.h)
523 * RFE: No code really calls the GRAB* ioctls now. Should EOL.
524 */
525
526 case GRABPAGEALLOC:
527 return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode));
528 case GRABPAGEFREE:
529 return (seglock_grabfree(arg, mode));
530 case GRABLOCKINFO:
531 return (seglock_grabinfo(arg, mode));
532 case GRABATTACH:
533 return (EINVAL); /* GRABATTACH is not supported (never was) */
534
535 case WINLOCKALLOC:
536 return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode));
537 case WINLOCKFREE:
538 return (seglock_grabfree(arg, mode));
539 case WINLOCKSETTIMEOUT:
540 return (seglock_settimeout(arg, mode));
541 case WINLOCKGETTIMEOUT:
542 return (seglock_gettimeout(arg, mode));
543 case WINLOCKDUMP:
544 seglock_dump_all();
545 return (0);
546
547 #ifdef DEBUG
548 case (WIOC|255):
549 lock_debug = arg;
550 return (0);
551 #endif
552
553 default:
554 return (ENOTTY); /* Why is this not EINVAL */
555 }
556 }
557
558 int
winlocksegmap(dev_t dev,off_t off,struct as * as,caddr_t * addr,off_t len,uint_t prot,uint_t maxprot,uint_t flags,cred_t * cred)559 winlocksegmap(
560 dev_t dev, /* major:minor */
561 off_t off, /* device offset from mmap(2) */
562 struct as *as, /* user's address space. */
563 caddr_t *addr, /* address from mmap(2) */
564 off_t len, /* length from mmap(2) */
565 uint_t prot, /* user wants this access */
566 uint_t maxprot, /* this is the maximum the user can have */
567 uint_t flags, /* flags from mmap(2) */
568 cred_t *cred)
569 {
570 DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len));
571
572 /* Only MAP_SHARED mappings are supported */
573 if ((flags & MAP_TYPE) == MAP_PRIVATE) {
574 return (EINVAL);
575 }
576
577 /* Use devmap_setup to setup the mapping */
578 return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot,
579 maxprot, flags, cred));
580 }
581
582 /*ARGSUSED*/
583 int
winlock_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)584 winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
585 size_t *maplen, uint_t model)
586 {
587 SegLock *lp;
588 int err;
589
590 DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n",
591 off, len, (void *)dhp));
592
593 *maplen = 0;
594
595 /* Check if the lock exists, i.e., has been created by alloc */
596 /* off is the sy_ident returned in the alloc ioctl */
597 if ((lp = seglock_findlock((uint_t)off)) == NULL) {
598 return (ENXIO);
599 }
600
601 /*
602 * The offset bits in mmap(2) offset has to be same as in lockptr
603 * OR the offset should be 0 (i.e. masked off)
604 */
605 if (((off & PAGEOFFSET) != 0) &&
606 ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) {
607 DEBUGF(2, (CE_CONT,
608 "mmap offset %llx mismatch with lockptr %p\n",
609 off, (void *)lp->lockptr));
610 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
611 return (EINVAL);
612 }
613
614 /* Only supports PAGESIZE length mappings */
615 if (len != PAGESIZE) {
616 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
617 return (EINVAL);
618 }
619
620 /*
621 * Set up devmap to point at page associated with lock
622 * RFE: At this point we dont know if this is a lockpage or unlockpage
623 * a lockpage would not need DEVMAP_ALLOW_REMAP setting
624 * We could have kept track of the mapping order here,
625 * but devmap framework does not support storing any state in this
626 * devmap callback as it does not callback for error cleanup if some
627 * other error happens in the framework.
628 * RFE: We should modify the winlock mmap interface so that the
629 * user process marks in the offset passed in whether this is for a
630 * lock or unlock mapping instead of guessing based on order of maps
631 * This would cleanup other things (such as in fork)
632 */
633 if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops,
634 lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT,
635 DEVMAP_ALLOW_REMAP, 0)) < 0) {
636 mutex_exit(&lp->mutex); /* held by seglock_findlock */
637 return (err);
638 }
639 /*
640 * No mappings are loaded to those segments yet. The correctness
641 * of the winlock semantics depends on the devmap framework/seg_dev NOT
642 * loading the translations without calling _access callback.
643 */
644
645 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
646 *maplen = PAGESIZE;
647 return (0);
648 }
649
650 /*
651 * This routine is called by the devmap framework after the devmap entry point
652 * above and the mapping is setup in seg_dev.
653 * We store the pointer to the per-process context in the devmap private data.
654 */
655 /*ARGSUSED*/
656 static int
winlockmap_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)657 winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
658 size_t len, void **pvtp)
659 {
660 SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */
661 SegProc *sdp;
662
663 ASSERT(len == PAGESIZE);
664
665 /* Find the per-process context for this lock, alloc one if not found */
666 sdp = seglock_allocclient(lp);
667
668 /*
669 * RFE: Determining which is a lock vs unlock seg is based on order
670 * of mmaps, we should change that to be derivable from off
671 */
672 if (sdp->lockseg == NULL) {
673 sdp->lockseg = dhp;
674 } else if (sdp->unlockseg == NULL) {
675 sdp->unlockseg = dhp;
676 } else {
677 /* attempting to map lock more than twice */
678 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
679 return (ENOMEM);
680 }
681
682 *pvtp = sdp;
683 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
684 return (DDI_SUCCESS);
685 }
686
687 /*
688 * duplicate a segment, as in fork()
689 * On fork, the child inherits the mappings to the lock
690 * lp->alloccount is NOT incremented, so child should not do a free().
691 * Semantics same as if done an alloc(), map(), map().
692 * This way it would work fine if doing an exec() variant later
693 * Child does not inherit any UFLAGS set in parent
694 * The lock and unlock pages are started off unmapped, i.e., child does not
695 * own the lock.
696 * The code assumes that the child process has a valid pid at this point
697 * RFE: This semantics depends on fork not duplicating the hat mappings
698 * (which is the current implementation). To enforce it would need to
699 * call devmap_unload from here - not clear if that is allowed.
700 */
701
702 static int
winlockmap_dup(devmap_cookie_t dhp,void * oldpvt,devmap_cookie_t new_dhp,void ** newpvt)703 winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
704 void **newpvt)
705 {
706 SegProc *sdp = (SegProc *)oldpvt;
707 SegProc *ndp;
708 SegLock *lp = sdp->lp;
709
710 mutex_enter(&lp->mutex);
711 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
712
713 /*
714 * Note: At this point, the child process does have a pid, but
715 * the arguments passed to as_dup and hence to devmap_dup dont pass it
716 * down. So we cannot use normal seglock_findclient - which finds the
717 * parent sdp itself!
718 * Instead we allocate the child's SegProc by using the child as pointer
719 * RFE: we are using the as stucture which means peeking into the
720 * devmap_cookie. This is not DDI-compliant. Need a compliant way of
721 * getting at either the as or, better, a way to get the child's new pid
722 */
723 ndp = seglock_alloc_specific(lp,
724 (void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as);
725 ASSERT(ndp != sdp);
726
727 if (sdp->lockseg == dhp) {
728 ASSERT(ndp->lockseg == NULL);
729 ndp->lockseg = new_dhp;
730 } else {
731 ASSERT(sdp->unlockseg == dhp);
732 ASSERT(ndp->unlockseg == NULL);
733 ndp->unlockseg = new_dhp;
734 if (sdp->flag & TRASHPAGE) {
735 ndp->flag |= TRASHPAGE;
736 }
737 }
738 mutex_exit(&lp->mutex);
739 *newpvt = (void *)ndp;
740 return (0);
741 }
742
743
744 /*ARGSUSED*/
745 static void
winlockmap_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** newpvtp1,devmap_cookie_t new_dhp2,void ** newpvtp2)746 winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
747 devmap_cookie_t new_dhp1, void **newpvtp1,
748 devmap_cookie_t new_dhp2, void **newpvtp2)
749 {
750 SegProc *sdp = (SegProc *)pvtp;
751 SegLock *lp = sdp->lp;
752
753 /*
754 * We always create PAGESIZE length mappings, so there should never
755 * be a partial unmapping case
756 */
757 ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL));
758
759 mutex_enter(&lp->mutex);
760 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
761 /* make sure this process doesn't own the lock */
762 if (sdp == lp->owner) {
763 /*
764 * Not handling errors - i.e., errors in unloading mapping
765 * As part of unmapping hat/seg structure get torn down anyway
766 */
767 (void) lock_giveup(lp, 0);
768 }
769
770 ASSERT(sdp != lp->owner);
771 if (sdp->lockseg == dhp) {
772 sdp->lockseg = NULL;
773 } else {
774 ASSERT(sdp->unlockseg == dhp);
775 sdp->unlockseg = NULL;
776 sdp->flag &= ~TRASHPAGE; /* clear flag if set */
777 }
778
779 garbage_collect_lock(lp, sdp);
780 }
781
782 /*ARGSUSED*/
783 static int
winlockmap_access(devmap_cookie_t dhp,void * pvt,offset_t off,size_t len,uint_t type,uint_t rw)784 winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len,
785 uint_t type, uint_t rw)
786 {
787 SegProc *sdp = (SegProc *)pvt;
788 SegLock *lp = sdp->lp;
789 int err;
790
791 /* Driver handles only DEVMAP_ACCESS type of faults */
792 if (type != DEVMAP_ACCESS)
793 return (-1);
794
795 mutex_enter(&lp->mutex);
796 ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
797
798 /* should be using a SegProc that corresponds to current process */
799 ASSERT(ID(sdp) == CURPROC_ID);
800
801 /*
802 * If process is faulting but does not have both segments mapped
803 * return error (should cause a segv).
804 * RFE: could give it a permanent trashpage
805 */
806 if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) {
807 err = -1;
808 } else {
809 err = seglock_lockfault(dhp, sdp, lp, rw);
810 }
811 mutex_exit(&lp->mutex);
812 return (err);
813 }
814
815 /* INTERNAL ROUTINES START HERE */
816
817
818
819 /*
820 * search the lock_list list for the specified cookie
821 * The cookie is the sy_ident field returns by ALLOC ioctl.
822 * This has two parts:
823 * the pageoffset bits contain offset into the lock page.
824 * the pagenumber bits contain the lock id.
825 * The user code is supposed to pass in only the pagenumber portion
826 * (i.e. mask off the pageoffset bits). However the code below
827 * does the mask in case the users are not diligent
828 * if found, returns with mutex for SegLock structure held
829 */
830 static SegLock *
seglock_findlock(uint_t cookie)831 seglock_findlock(uint_t cookie)
832 {
833 SegLock *lp;
834
835 cookie &= (uint_t)PAGEMASK; /* remove pageoffset bits to get cookie */
836 mutex_enter(&winlock_mutex);
837 for (lp = lock_list; lp != NULL; lp = lp->next) {
838 mutex_enter(&lp->mutex);
839 if (cookie == lp->cookie) {
840 break; /* return with lp->mutex held */
841 }
842 mutex_exit(&lp->mutex);
843 }
844 mutex_exit(&winlock_mutex);
845 return (lp);
846 }
847
848 /*
849 * search the lock_list list for the specified non-zero key
850 * if found, returns with lock for SegLock structure held
851 */
852 static SegLock *
seglock_findkey(uint_t key)853 seglock_findkey(uint_t key)
854 {
855 SegLock *lp;
856
857 ASSERT(MUTEX_HELD(&winlock_mutex));
858 /* The driver allows multiple locks with key 0, dont search */
859 if (key == 0)
860 return (NULL);
861 for (lp = lock_list; lp != NULL; lp = lp->next) {
862 mutex_enter(&lp->mutex);
863 if (key == lp->key)
864 break;
865 mutex_exit(&lp->mutex);
866 }
867 return (lp);
868 }
869
870 /*
871 * Create a new lock context.
872 * Returns with SegLock mutex held
873 */
874
875 static SegLock *
seglock_createlock(enum winlock_style style)876 seglock_createlock(enum winlock_style style)
877 {
878 SegLock *lp;
879
880 DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n",
881 (void *)lock_free_list, next_lock));
882
883 ASSERT(MUTEX_HELD(&winlock_mutex));
884 if (lock_free_list != NULL) {
885 lp = lock_free_list;
886 lock_free_list = lp->next;
887 } else if (next_lock >= MAX_LOCKS) {
888 return (NULL);
889 } else {
890 lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP);
891 lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE;
892 mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL);
893 cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL);
894 ++next_lock;
895 }
896
897 mutex_enter(&lp->mutex);
898 ASSERT((lp->cookie/PAGESIZE) <= next_lock);
899
900 if (style == OLDSTYLE_LOCK) {
901 lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE,
902 DDI_UMEM_SLEEP, &(lp->umem_cookie));
903 } else {
904 lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1);
905 lp->umem_cookie = lockpage_cookie;
906 }
907
908 ASSERT(lp->lockptr != NULL);
909 lp->style = style;
910 lp->sleepers = 0;
911 lp->alloccount = 1;
912 lp->timeout = LOCKTIME*hz;
913 lp->clients = NULL;
914 lp->owner = NULL;
915 LOCK(lp) = 0;
916 lp->next = lock_list;
917 lock_list = lp;
918 return (lp);
919 }
920
921 /*
922 * Routine to destory a lock structure.
923 * This routine is called while holding the lp->mutex but not the
924 * winlock_mutex.
925 */
926
927 static void
seglock_destroylock(SegLock * lp)928 seglock_destroylock(SegLock *lp)
929 {
930 ASSERT(MUTEX_HELD(&lp->mutex));
931 ASSERT(!MUTEX_HELD(&winlock_mutex));
932
933 DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n",
934 lp->cookie, lp->key));
935
936 ASSERT(lp->alloccount == 0);
937 ASSERT(lp->clients == NULL);
938 ASSERT(lp->owner == NULL);
939 ASSERT(lp->sleepers == 0);
940
941 /* clean up/release fields in lp */
942 if (lp->style == OLDSTYLE_LOCK) {
943 ddi_umem_free(lp->umem_cookie);
944 }
945 lp->umem_cookie = NULL;
946 lp->lockptr = NULL;
947 lp->key = 0;
948
949 /*
950 * Reduce cookie by 1, makes it non page-aligned and invalid
951 * This prevents any valid lookup from finding this lock
952 * so when we drop the lock and regrab it it will still
953 * be there and nobody else would have attached to it
954 */
955 lp->cookie--;
956
957 /* Drop and reacquire mutexes in right order */
958 mutex_exit(&lp->mutex);
959 mutex_enter(&winlock_mutex);
960 mutex_enter(&lp->mutex);
961
962 /* reincrement the cookie to get the original valid cookie */
963 lp->cookie++;
964 ASSERT((lp->cookie & PAGEOFFSET) == 0);
965 ASSERT(lp->alloccount == 0);
966 ASSERT(lp->clients == NULL);
967 ASSERT(lp->owner == NULL);
968 ASSERT(lp->sleepers == 0);
969
970 /* Remove lp from lock_list */
971 if (lock_list == lp) {
972 lock_list = lp->next;
973 } else {
974 SegLock *tmp = lock_list;
975 while (tmp->next != lp) {
976 tmp = tmp->next;
977 ASSERT(tmp != NULL);
978 }
979 tmp->next = lp->next;
980 }
981
982 /* Add to lock_free_list */
983 lp->next = lock_free_list;
984 lock_free_list = lp;
985 mutex_exit(&lp->mutex);
986
987 /* Check if all locks deleted and cleanup */
988 if (lock_list == NULL) {
989 lock_destroyall();
990 }
991
992 mutex_exit(&winlock_mutex);
993 }
994
995 /* Routine to find a SegProc corresponding to the tag */
996
997 static SegProc *
seglock_find_specific(SegLock * lp,void * tag)998 seglock_find_specific(SegLock *lp, void *tag)
999 {
1000 SegProc *sdp;
1001
1002 ASSERT(MUTEX_HELD(&lp->mutex));
1003 ASSERT(tag != NULL);
1004 for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1005 if (ID(sdp) == tag)
1006 break;
1007 }
1008 return (sdp);
1009 }
1010
1011 /* Routine to find (and if needed allocate) a SegProc corresponding to tag */
1012
1013 static SegProc *
seglock_alloc_specific(SegLock * lp,void * tag)1014 seglock_alloc_specific(SegLock *lp, void *tag)
1015 {
1016 SegProc *sdp;
1017
1018 ASSERT(MUTEX_HELD(&lp->mutex));
1019 ASSERT(tag != NULL);
1020
1021 /* Search and return if existing one found */
1022 sdp = seglock_find_specific(lp, tag);
1023 if (sdp != NULL)
1024 return (sdp);
1025
1026 DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n",
1027 tag, lp->cookie));
1028
1029 /* Allocate a new SegProc */
1030 sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP);
1031 sdp->next = lp->clients;
1032 lp->clients = sdp;
1033 sdp->lp = lp;
1034 ID(sdp) = tag;
1035 return (sdp);
1036 }
1037
1038 /*
1039 * search a context's client list for the given client and delete
1040 */
1041
1042 static void
seglock_deleteclient(SegLock * lp,SegProc * sdp)1043 seglock_deleteclient(SegLock *lp, SegProc *sdp)
1044 {
1045 ASSERT(MUTEX_HELD(&lp->mutex));
1046 ASSERT(lp->owner != sdp); /* Not current owner of lock */
1047 ASSERT(sdp->lockseg == NULL); /* Mappings torn down */
1048 ASSERT(sdp->unlockseg == NULL);
1049
1050 DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n",
1051 ddi_get_pid(), lp->cookie));
1052 if (lp->clients == sdp) {
1053 lp->clients = sdp->next;
1054 } else {
1055 SegProc *tmp = lp->clients;
1056 while (tmp->next != sdp) {
1057 tmp = tmp->next;
1058 ASSERT(tmp != NULL);
1059 }
1060 tmp->next = sdp->next;
1061 }
1062 kmem_free(sdp, sizeof (SegProc));
1063 }
1064
1065 /*
1066 * Routine to verify if a SegProc and SegLock
1067 * structures are empty/idle.
1068 * Destroys the structures if they are ready
1069 * Can be called with sdp == NULL if want to verify only the lock state
1070 * caller should hold the lp->mutex
1071 * and this routine drops the mutex
1072 */
1073 static void
garbage_collect_lock(SegLock * lp,SegProc * sdp)1074 garbage_collect_lock(SegLock *lp, SegProc *sdp)
1075 {
1076 ASSERT(MUTEX_HELD(&lp->mutex));
1077 /* see if both segments unmapped from client structure */
1078 if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL))
1079 seglock_deleteclient(lp, sdp);
1080
1081 /* see if this is last client in the entire lock context */
1082 if ((lp->clients == NULL) && (lp->alloccount == 0)) {
1083 seglock_destroylock(lp);
1084 } else {
1085 mutex_exit(&lp->mutex);
1086 }
1087 }
1088
1089
1090 /* IOCTLS START HERE */
1091
1092 static int
seglock_grabinfo(intptr_t arg,int mode)1093 seglock_grabinfo(intptr_t arg, int mode)
1094 {
1095 int i = 1;
1096
1097 /* multiple clients per lock supported - see comments up top */
1098 if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0)
1099 return (EFAULT);
1100 return (0);
1101 }
1102
1103 static int
seglock_graballoc(intptr_t arg,enum winlock_style style,int mode)1104 seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */
1105 {
1106 struct seglock *lp;
1107 uint_t key;
1108 struct winlockalloc wla;
1109 int err;
1110
1111 if (style == OLDSTYLE_LOCK) {
1112 key = 0;
1113 } else {
1114 if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla),
1115 mode)) {
1116 return (EFAULT);
1117 }
1118 key = wla.sy_key;
1119 }
1120
1121 DEBUGF(3, (CE_CONT,
1122 "seglock_graballoc: key=%u, style=%d\n", key, style));
1123
1124 mutex_enter(&winlock_mutex);
1125 /* Allocate lockpage on first new style alloc */
1126 if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) {
1127 lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
1128 &lockpage_cookie);
1129 }
1130
1131 /* Allocate trashpage on first alloc (any style) */
1132 if (trashpage_cookie == NULL) {
1133 (void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP,
1134 &trashpage_cookie);
1135 }
1136
1137 if ((lp = seglock_findkey(key)) != NULL) {
1138 DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n",
1139 key, lp->cookie));
1140 ++lp->alloccount;
1141 } else if ((lp = seglock_createlock(style)) != NULL) {
1142 DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n",
1143 key, lp->cookie));
1144 lp->key = key;
1145 } else {
1146 DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key));
1147 mutex_exit(&winlock_mutex);
1148 return (ENOMEM);
1149 }
1150 ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex));
1151
1152 mutex_exit(&winlock_mutex);
1153
1154 if (style == OLDSTYLE_LOCK) {
1155 err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg,
1156 sizeof (lp->cookie), mode);
1157 } else {
1158 wla.sy_ident = lp->cookie +
1159 (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET);
1160 err = ddi_copyout((caddr_t)&wla, (caddr_t)arg,
1161 sizeof (wla), mode);
1162 }
1163
1164 if (err) {
1165 /* On error, should undo allocation */
1166 lp->alloccount--;
1167
1168 /* Verify and delete if lock is unused now */
1169 garbage_collect_lock(lp, NULL);
1170 return (EFAULT);
1171 }
1172
1173 mutex_exit(&lp->mutex);
1174 return (0);
1175 }
1176
1177 static int
seglock_grabfree(intptr_t arg,int mode)1178 seglock_grabfree(intptr_t arg, int mode) /* IOCTL */
1179 {
1180 struct seglock *lp;
1181 uint_t offset;
1182
1183 if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode)
1184 != 0) {
1185 return (EFAULT);
1186 }
1187 DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset));
1188
1189 if ((lp = seglock_findlock(offset)) == NULL) {
1190 DEBUGF(2, (CE_CONT, "did not find lock\n"));
1191 return (EINVAL);
1192 }
1193 DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n",
1194 lp->key, lp->cookie, lp->alloccount));
1195
1196 if (lp->alloccount > 0)
1197 lp->alloccount--;
1198
1199 /* Verify and delete if lock is unused now */
1200 garbage_collect_lock(lp, NULL);
1201 return (0);
1202 }
1203
1204
1205 /*
1206 * Sets timeout in lock and UFLAGS in client
1207 * the UFLAGS are stored in the client structure and persistent only
1208 * till the unmap of the lock pages. If the process sets UFLAGS
1209 * does a map of the lock/unlock pages and unmaps them, the client
1210 * structure will get deleted and the UFLAGS will be lost. The process
1211 * will need to resetup the flags.
1212 */
1213 static int
seglock_settimeout(intptr_t arg,int mode)1214 seglock_settimeout(intptr_t arg, int mode) /* IOCTL */
1215 {
1216 SegLock *lp;
1217 SegProc *sdp;
1218 struct winlocktimeout wlt;
1219
1220 if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) {
1221 return (EFAULT);
1222 }
1223
1224 if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1225 return (EINVAL);
1226
1227 lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout);
1228 /* if timeout modified, wake up any sleepers */
1229 if (lp->sleepers > 0) {
1230 cv_broadcast(&lp->locksleep);
1231 }
1232
1233 /*
1234 * If the process is trying to set UFLAGS,
1235 * Find the client segproc and allocate one if needed
1236 * Set the flags preserving the kernel flags
1237 * If the process is clearing UFLAGS
1238 * Find the client segproc but dont allocate one if does not exist
1239 */
1240 if (wlt.sy_flags & UFLAGS) {
1241 sdp = seglock_allocclient(lp);
1242 sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS;
1243 } else if ((sdp = seglock_findclient(lp)) != NULL) {
1244 sdp->flag = sdp->flag & KFLAGS;
1245 /* If clearing UFLAGS leaves the segment or lock idle, delete */
1246 garbage_collect_lock(lp, sdp);
1247 return (0);
1248 }
1249 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
1250 return (0);
1251 }
1252
1253 static int
seglock_gettimeout(intptr_t arg,int mode)1254 seglock_gettimeout(intptr_t arg, int mode)
1255 {
1256 SegLock *lp;
1257 SegProc *sdp;
1258 struct winlocktimeout wlt;
1259
1260 if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0)
1261 return (EFAULT);
1262
1263 if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1264 return (EINVAL);
1265
1266 wlt.sy_timeout = TICK_TO_MSEC(lp->timeout);
1267 /*
1268 * If this process has an active allocated lock return those flags
1269 * Dont allocate a client structure on gettimeout
1270 * If not, return 0.
1271 */
1272 if ((sdp = seglock_findclient(lp)) != NULL) {
1273 wlt.sy_flags = sdp->flag & UFLAGS;
1274 } else {
1275 wlt.sy_flags = 0;
1276 }
1277 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
1278
1279 if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0)
1280 return (EFAULT);
1281
1282 return (0);
1283 }
1284
1285 /*
1286 * Handle lock segment faults here...
1287 *
1288 * This is where the magic happens.
1289 */
1290
1291 /* ARGSUSED */
1292 static int
seglock_lockfault(devmap_cookie_t dhp,SegProc * sdp,SegLock * lp,uint_t rw)1293 seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw)
1294 {
1295 SegProc *owner = lp->owner;
1296 int err;
1297
1298 ASSERT(MUTEX_HELD(&lp->mutex));
1299 DEBUGF(3, (CE_CONT,
1300 "seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n",
1301 (void *)dhp, (void *)sdp, (void *)lp, (void *)owner));
1302
1303 /* lockfault is always called with sdp in current process context */
1304 ASSERT(ID(sdp) == CURPROC_ID);
1305
1306 /* If Lock has no current owner, give the mapping to new owner */
1307 if (owner == NULL) {
1308 DEBUGF(4, (CE_CONT, " lock has no current owner\n"));
1309 return (give_mapping(lp, sdp, rw));
1310 }
1311
1312 if (owner == sdp) {
1313 /*
1314 * Current owner is faulting on owned lock segment OR
1315 * Current owner is faulting on unlock page and has no waiters
1316 * Then can give the mapping to current owner
1317 */
1318 if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) {
1319 DEBUGF(4, (CE_CONT, "lock owner faulting\n"));
1320 return (give_mapping(lp, sdp, rw));
1321 } else {
1322 /*
1323 * Owner must be writing to unlock page and there are waiters.
1324 * other cases have been checked earlier.
1325 * Release the lock, owner, and owners mappings
1326 * As the owner is trying to write to the unlock page, leave
1327 * it with a trashpage mapping and wake up the sleepers
1328 */
1329 ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0));
1330 DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n"));
1331 return (lock_giveup(lp, 1));
1332 }
1333 }
1334
1335 ASSERT(owner != sdp);
1336
1337 /*
1338 * If old owner faulting on trash unlock mapping,
1339 * load hat mappings to trash page
1340 * RFE: non-owners should NOT be faulting on unlock mapping as they
1341 * as first supposed to fault on the lock seg. We could give them
1342 * a trash page or return error.
1343 */
1344 if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) {
1345 DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n"));
1346 return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1347 DEVMAP_ACCESS, rw));
1348 }
1349
1350 /*
1351 * Non-owner faulting. Need to check current LOCK state.
1352 *
1353 * Before reading lock value in LOCK(lp), we must make sure that
1354 * the owner cannot change its value before we change mappings
1355 * or else we could end up either with a hung process
1356 * or more than one process thinking they have the lock.
1357 * We do that by unloading the owner's mappings
1358 */
1359 DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n"));
1360 err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1361 err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1362 if (err != 0)
1363 return (err); /* unable to remove owner mapping */
1364
1365 /*
1366 * If lock is not held, then current owner mappings were
1367 * unloaded above and we can give the lock to the new owner
1368 */
1369 if (LOCK(lp) == 0) {
1370 DEBUGF(4, (CE_CONT,
1371 "Free lock (%p): Giving mapping to new owner %d\n",
1372 (void *)lp, ddi_get_pid()));
1373 return (give_mapping(lp, sdp, rw));
1374 }
1375
1376 DEBUGF(4, (CE_CONT, " lock held, sleeping\n"));
1377
1378 /*
1379 * A non-owning process tried to write (presumably to the lockpage,
1380 * but it doesn't matter) but the lock is held; we need to sleep for
1381 * the lock while there is an owner.
1382 */
1383
1384 lp->sleepers++;
1385 while ((owner = lp->owner) != NULL) {
1386 int rval;
1387
1388 if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) {
1389 /*
1390 * No timeout has been specified for this lock;
1391 * we'll simply sleep on the condition variable.
1392 */
1393 rval = cv_wait_sig(&lp->locksleep, &lp->mutex);
1394 } else {
1395 /*
1396 * A timeout _has_ been specified for this lock. We need
1397 * to wake up and possibly steal this lock if the owner
1398 * does not let it go. Note that all sleepers on a lock
1399 * with a timeout wait; the sleeper with the earliest
1400 * timeout will wakeup, and potentially steal the lock
1401 * Stealing the lock will cause a broadcast on the
1402 * locksleep cv and thus kick the other timed waiters
1403 * and cause everyone to restart in a new timedwait
1404 */
1405 rval = cv_reltimedwait_sig(&lp->locksleep,
1406 &lp->mutex, lp->timeout, TR_CLOCK_TICK);
1407 }
1408
1409 /*
1410 * Timeout and still old owner - steal lock
1411 * Force-Release lock and give old owner a trashpage mapping
1412 */
1413 if ((rval == -1) && (lp->owner == owner)) {
1414 /*
1415 * if any errors in lock_giveup, go back and sleep/retry
1416 * If successful, will break out of loop
1417 */
1418 cmn_err(CE_NOTE, "Process %d timed out on lock %d\n",
1419 ddi_get_pid(), lp->cookie);
1420 (void) lock_giveup(lp, 1);
1421 } else if (rval == 0) { /* signal pending */
1422 cmn_err(CE_NOTE,
1423 "Process %d signalled while waiting on lock %d\n",
1424 ddi_get_pid(), lp->cookie);
1425 lp->sleepers--;
1426 return (FC_MAKE_ERR(EINTR));
1427 }
1428 }
1429
1430 lp->sleepers--;
1431 /*
1432 * Give mapping to this process and save a fault later
1433 */
1434 return (give_mapping(lp, sdp, rw));
1435 }
1436
1437 /*
1438 * Utility: give a valid mapping to lock and unlock pages to current process.
1439 * Caller responsible for unloading old owner's mappings
1440 */
1441
1442 static int
give_mapping(SegLock * lp,SegProc * sdp,uint_t rw)1443 give_mapping(SegLock *lp, SegProc *sdp, uint_t rw)
1444 {
1445 int err = 0;
1446
1447 ASSERT(MUTEX_HELD(&lp->mutex));
1448 ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0)));
1449 /* give_mapping is always called with sdp in current process context */
1450 ASSERT(ID(sdp) == CURPROC_ID);
1451
1452 /* remap any old trash mappings */
1453 if (sdp->flag & TRASHPAGE) {
1454 /* current owner should not have a trash mapping */
1455 ASSERT(sdp != lp->owner);
1456
1457 DEBUGF(4, (CE_CONT,
1458 "new owner %d remapping old trash mapping\n",
1459 ddi_get_pid()));
1460 if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip,
1461 lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1462 /*
1463 * unable to remap old trash page,
1464 * abort before changing owner
1465 */
1466 DEBUGF(4, (CE_CONT,
1467 "aborting: error in umem_remap %d\n", err));
1468 return (err);
1469 }
1470 sdp->flag &= ~TRASHPAGE;
1471 }
1472
1473 /* we have a new owner now */
1474 lp->owner = sdp;
1475
1476 if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE,
1477 DEVMAP_ACCESS, rw)) != 0) {
1478 return (err);
1479 }
1480 DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid()));
1481
1482 if (lp->sleepers) {
1483 /* Force unload unlock mapping if there are waiters */
1484 DEBUGF(4, (CE_CONT,
1485 " lock has %d sleepers => remove unlock mapping\n",
1486 lp->sleepers));
1487 err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE);
1488 } else {
1489 /*
1490 * while here, give new owner a valid mapping to unlock
1491 * page so we don't get called again.
1492 */
1493 DEBUGF(4, (CE_CONT, " and unlock mapping\n"));
1494 err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1495 DEVMAP_ACCESS, PROT_WRITE);
1496 }
1497 return (err);
1498 }
1499
1500 /*
1501 * Unload owner's mappings, release the lock and wakeup any sleepers
1502 * If trash, then the old owner is given a trash mapping
1503 * => old owner held lock too long and caused a timeout
1504 */
1505 static int
lock_giveup(SegLock * lp,int trash)1506 lock_giveup(SegLock *lp, int trash)
1507 {
1508 SegProc *owner = lp->owner;
1509
1510 DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n",
1511 (void *)lp, (void *)ID(lp->owner), trash));
1512
1513 ASSERT(MUTEX_HELD(&lp->mutex));
1514 ASSERT(owner != NULL);
1515
1516 /*
1517 * owner loses lockpage/unlockpage mappings and gains a
1518 * trashpage mapping, if needed.
1519 */
1520 if (!trash) {
1521 /*
1522 * We do not handle errors in devmap_unload in the !trash case,
1523 * as the process is attempting to unmap/exit or otherwise
1524 * release the lock. Errors in unloading the mapping are not
1525 * going to affect that (unmap does not take error return).
1526 */
1527 (void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1528 (void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1529 } else {
1530 int err;
1531
1532 if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) {
1533 /* error unloading lockseg mapping. abort giveup */
1534 return (err);
1535 }
1536
1537 /*
1538 * old owner gets mapping to trash page so it can continue
1539 * devmap_umem_remap does a hat_unload (and does it holding
1540 * the right locks), so no need to devmap_unload on unlockseg
1541 */
1542 if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip,
1543 trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1544 /* error remapping to trash page, abort giveup */
1545 return (err);
1546 }
1547 owner->flag |= TRASHPAGE;
1548 /*
1549 * Preload mapping to trash page by calling devmap_load
1550 * However, devmap_load can only be called on the faulting
1551 * process context and not on the owner's process context
1552 * we preload only if we happen to be in owner process context
1553 * Other processes will fault on the unlock mapping
1554 * and be given a trash mapping at that time.
1555 */
1556 if (ID(owner) == CURPROC_ID) {
1557 (void) devmap_load(owner->unlockseg, lp->cookie,
1558 PAGESIZE, DEVMAP_ACCESS, PROT_WRITE);
1559 }
1560 }
1561
1562 lp->owner = NULL;
1563
1564 /* Clear the lock value in underlying page so new owner can grab it */
1565 LOCK(lp) = 0;
1566
1567 if (lp->sleepers) {
1568 DEBUGF(4, (CE_CONT, " waking up, lp=%p\n", (void *)lp));
1569 cv_broadcast(&lp->locksleep);
1570 }
1571 return (0);
1572 }
1573
1574 /*
1575 * destroy all allocated memory.
1576 */
1577
1578 static void
lock_destroyall(void)1579 lock_destroyall(void)
1580 {
1581 SegLock *lp, *lpnext;
1582
1583 ASSERT(MUTEX_HELD(&winlock_mutex));
1584 ASSERT(lock_list == NULL);
1585
1586 DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n"));
1587 for (lp = lock_free_list; lp != NULL; lp = lpnext) {
1588 mutex_enter(&lp->mutex);
1589 lpnext = lp->next;
1590 ASSERT(lp->clients == NULL);
1591 ASSERT(lp->owner == NULL);
1592 ASSERT(lp->alloccount == 0);
1593 mutex_destroy(&lp->mutex);
1594 cv_destroy(&lp->locksleep);
1595 kmem_free(lp, sizeof (SegLock));
1596 }
1597 lock_free_list = NULL;
1598 next_lock = 0;
1599 }
1600
1601
1602 /* RFE: create mdb walkers instead of dump routines? */
1603 static void
seglock_dump_all(void)1604 seglock_dump_all(void)
1605 {
1606 SegLock *lp;
1607
1608 mutex_enter(&winlock_mutex);
1609 cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n");
1610
1611 cmn_err(CE_CONT, "Lock List:\n");
1612 for (lp = lock_list; lp != NULL; lp = lp->next) {
1613 mutex_enter(&lp->mutex);
1614 cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1615 lp->cookie, lp->key, lp->alloccount,
1616 lp->clients ? 'Y' : 'N',
1617 lp->owner ? 'Y' : 'N',
1618 lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1619 lp->sleepers);
1620 mutex_exit(&lp->mutex);
1621 }
1622 cmn_err(CE_CONT, "Free Lock List:\n");
1623 for (lp = lock_free_list; lp != NULL; lp = lp->next) {
1624 mutex_enter(&lp->mutex);
1625 cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1626 lp->cookie, lp->key, lp->alloccount,
1627 lp->clients ? 'Y' : 'N',
1628 lp->owner ? 'Y' : 'N',
1629 lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1630 lp->sleepers);
1631 mutex_exit(&lp->mutex);
1632 }
1633
1634 #ifdef DEBUG
1635 if (lock_debug < 3) {
1636 mutex_exit(&winlock_mutex);
1637 return;
1638 }
1639
1640 for (lp = lock_list; lp != NULL; lp = lp->next) {
1641 SegProc *sdp;
1642
1643 mutex_enter(&lp->mutex);
1644 cmn_err(CE_CONT,
1645 "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n",
1646 (void *)lp, lp->key, lp->cookie, lp->alloccount,
1647 lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers);
1648
1649 cmn_err(CE_CONT,
1650 "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n",
1651 lp->style, (void *)lp->lockptr, lp->timeout,
1652 (void *)lp->clients, (void *)lp->owner);
1653
1654
1655 for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1656 cmn_err(CE_CONT, " client %p%s, lp=%p, flag=%x, "
1657 "process tag=%p, lockseg=%p, unlockseg=%p\n",
1658 (void *)sdp, sdp == lp->owner ? " (owner)" : "",
1659 (void *)sdp->lp, sdp->flag, (void *)ID(sdp),
1660 (void *)sdp->lockseg, (void *)sdp->unlockseg);
1661 }
1662 mutex_exit(&lp->mutex);
1663 }
1664 #endif
1665 mutex_exit(&winlock_mutex);
1666 }
1667
1668 #include <sys/modctl.h>
1669
1670 static struct modldrv modldrv = {
1671 &mod_driverops, /* Type of module. This one is a driver */
1672 "Winlock Driver", /* Name of the module */
1673 &winlock_ops, /* driver ops */
1674 };
1675
1676 static struct modlinkage modlinkage = {
1677 MODREV_1,
1678 (void *)&modldrv,
1679 0,
1680 0,
1681 0
1682 };
1683
1684 int
_init(void)1685 _init(void)
1686 {
1687 int e;
1688
1689 mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL);
1690 e = mod_install(&modlinkage);
1691 if (e) {
1692 mutex_destroy(&winlock_mutex);
1693 }
1694 return (e);
1695 }
1696
1697
1698 int
_info(struct modinfo * modinfop)1699 _info(struct modinfo *modinfop)
1700 {
1701 return (mod_info(&modlinkage, modinfop));
1702 }
1703
1704 int
_fini(void)1705 _fini(void)
1706 {
1707 int e;
1708
1709 e = mod_remove(&modlinkage);
1710 if (e == 0) {
1711 mutex_destroy(&winlock_mutex);
1712 }
1713 return (e);
1714 }
1715