xref: /titanic_50/usr/src/uts/common/io/winlockio.c (revision 392e836b07e8da771953e4d64233b2abe4393efe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * This is the lock device driver.
29  *
30  * The lock driver provides a variation of inter-process mutexes with the
31  * following twist in semantics:
32  *	A waiter for a lock after a set timeout can "break" the lock and
33  *	grab it from the current owner (without informing the owner).
34  *
35  * These semantics result in temporarily multiple processes thinking they
36  * own the lock. This usually does not make sense for cases where locks are
37  * used to protect a critical region and it is important to serialize access
38  * to data structures. As breaking the lock will also lose the serialization
39  * and result in corrupt data structures.
40  *
41  * The usage for winlock driver is primarily driven by the graphics system
42  * when doing DGA (direct graphics access) graphics. The locks are used to
43  * protect access to the frame buffer (presumably reflects back to the screen)
44  * between competing processes that directly write to the screen as opposed
45  * to going through the window server etc.
46  * In this case, the result of breaking the lock at worst causes the screen
47  * image to be distorted and is easily fixed by doing a "refresh"
48  *
49  * In well-behaved applications, the lock is held for a very short time and
50  * the breaking semantics do not come into play. Not having this feature and
51  * using normal inter-process mutexes will result in a misbehaved application
52  * from grabbing the screen writing capability from the window manager and
53  * effectively make the system look like it is hung (mouse pointer does not
54  * move).
55  *
56  * A secondary aspect of the winlock driver is that it allows for extremely
57  * fast lock acquire/release in cases where there is low contention. A memory
58  * write is all that is needed (not even a function call). And the window
59  * manager is the only DGA writer usually and this optimized for. Occasionally
60  * some processes might do DGA graphics and cause kernel faults to handle
61  * the contention/locking (and that has got to be slow!).
62  *
63  * The following IOCTLs are supported:
64  *
65  *   GRABPAGEALLOC:
66  *	Compatibility with old cgsix device driver lockpage ioctls.
67  *	Lockpages created this way must be an entire page for compatibility with
68  *	older software.	 This ioctl allocates a lock context with its own
69  *	private lock page.  The unique "ident" that identifies this lock is
70  *	returned.
71  *
72  *   GRABPAGEFREE:
73  *	Compatibility with cgsix device driver lockpage ioctls.	 This
74  *	ioctl releases the lock context allocated by GRABPAGEALLOC.
75  *
76  *   GRABLOCKINFO:
77  *	Returns a one-word flag.  '1' means that multiple clients may
78  *	access this lock page.	Older device drivers returned '0',
79  *	meaning that only two clients could access a lock page.
80  *
81  *   GRABATTACH:
82  *	Not supported.	This ioctl would have grabbed all lock pages
83  *	on behalf of the calling program.
84  *
85  *   WINLOCKALLOC:
86  *	Allocate a lock context.  This ioctl accepts a key value.  as
87  *	its argument.  If the key is zero, a new lock context is
88  *	created, and its "ident" is returned.	If the key is nonzero,
89  *	all existing contexts are checked to see if they match they
90  *	key.  If a match is found, its reference count is incremented
91  *	and its ident is returned, otherwise a new context is created
92  *	and its ident is returned.
93  *
94  *   WINLOCKFREE:
95  *	Free a lock context.  This ioctl accepts the ident of a lock
96  *	context and decrements its reference count.  Once the reference
97  *	count reaches zero *and* all mappings are released, the lock
98  *	context is freed.  When all the lock context in the lock page are
99  *	freed, the lock page is freed as well.
100  *
101  *   WINLOCKSETTIMEOUT:
102  *	Set lock timeout for a context.	 This ioctl accepts the ident
103  *	of a lock context and a timeout value in milliseconds.
104  *	Whenever lock contention occurs, the timer is started and the lock is
105  *	broken after the timeout expires. If timeout value is zero, lock does
106  *	not timeout.  This value will be rounded to the nearest clock
107  *	tick, so don't try to use it for real-time control or something.
108  *
109  *   WINLOCKGETTIMEOUT:
110  *	Get lock timeout from a context.
111  *
112  *   WINLOCKDUMP:
113  *	Dump state of this device.
114  *
115  *
116  * How /dev/winlock works:
117  *
118  *   Every lock context consists of two mappings for the client to the lock
119  *   page.  These mappings are known as the "lock page" and "unlock page"
120  *   to the client. The first mmap to the lock context (identified by the
121  *   sy_ident field returns during alloc) allocates mapping to the lock page,
122  *   the second mmap allocates a mapping to the unlock page.
123  *	The mappings dont have to be ordered in virtual address space, but do
124  *   need to be ordered in time. Mapping and unmapping of these lock and unlock
125  *   pages should happen in pairs. Doing them one at a time or unmapping one
126  *   and leaving one mapped etc cause undefined behaviors.
127  *	The mappings are always of length PAGESIZE, and type MAP_SHARED.
128  *
129  *   The first ioctl is to ALLOC a lock, either based on a key (if trying to
130  *	grab a preexisting lock) or 0 (gets a default new one)
131  *	This ioctl returns a value in sy_ident which is needed to do the
132  *	later mmaps and FREE/other ioctls.
133  *
134  *   The "page number" portion of the sy_ident needs to be passed as the
135  *	file offset when doing an mmap for both the lock page and unlock page
136  *
137  *   The value returned by mmap ( a user virtual address) needs to be
138  *	incremented by the "page offset" portion of sy_ident to obtain the
139  *	pointer to the actual lock. (Skipping this step, does not cause any
140  *	visible error, but the process will be using the wrong lock!)
141  *
142  *	On a fork(), the child process will inherit the mappings for free, but
143  *   will not inherit the parent's lock ownership if any. The child should NOT
144  *   do an explicit FREE on the lock context unless it did an explicit ALLOC.
145  *	Only one process at a time is allowed to have a valid hat
146  *   mapping to a lock page. This is enforced by this driver.
147  *   A client acquires a lock by writing a '1' to the lock page.
148  *   Note, that it is not necessary to read and veryify that the lock is '0'
149  *	prior to writing a '1' in it.
150  *   If it does not already have a valid mapping to that page, the driver
151  *   takes a fault (devmap_access), loads the client mapping
152  *   and allows the client to continue.	 The client releases the lock by
153  *   writing a '0' to the unlock page.	Again, if it does not have a valid
154  *   mapping to the unlock page, the segment driver takes a fault,
155  *   loads the mapping, and lets the client continue.  From this point
156  *   forward, the client can make as many locks and unlocks as it
157  *   wants, without any more faults into the kernel.
158  *
159  *   If a different process wants to acquire a lock, it takes a page fault
160  *   when it writes the '1' to the lock page.  If the segment driver sees
161  *   that the lock page contained a zero, then it invalidates the owner's
162  *   mappings and gives the mappings to this process.
163  *
164  *   If there is already a '1' in the lock page when the second client
165  *   tries to access the lock page, then a lock exists.	 The segment
166  *   driver sleeps the second client and, if applicable, starts the
167  *   timeout on the lock.  The owner's mapping to the unlock page
168  *   is invalidated so that the driver will be woken again when the owner
169  *   releases the lock.
170  *
171  *   When the locking client finally writes a '0' to the unlock page, the
172  *   segment driver takes another fault.  The client is given a valid
173  *   mapping, not to the unlock page, but to the "trash page", and allowed
174  *   to continue.  Meanwhile, the sleeping client is given a valid mapping
175  *   to the lock/unlock pages and allowed to continue as well.
176  *
177  * RFE: There is a leak if process exits before freeing allocated locks
178  * But currently not tracking which locks were allocated by which
179  * process and we do not have a clean entry point into the driver
180  * to do garbage collection. If the interface used a file descriptor for each
181  * lock it allocs, then the driver can free up stuff in the _close routine
182  */
183 
184 #include <sys/types.h>		/* various type defn's */
185 #include <sys/debug.h>
186 #include <sys/param.h>		/* various kernel limits */
187 #include <sys/time.h>
188 #include <sys/errno.h>
189 #include <sys/kmem.h>		/* defines kmem_alloc() */
190 #include <sys/conf.h>		/* defines cdevsw */
191 #include <sys/file.h>		/* various file modes, etc. */
192 #include <sys/uio.h>		/* UIO stuff */
193 #include <sys/ioctl.h>
194 #include <sys/cred.h>		/* defines cred struct */
195 #include <sys/mman.h>		/* defines mmap(2) parameters */
196 #include <sys/stat.h>		/* defines S_IFCHR */
197 #include <sys/cmn_err.h>	/* use cmn_err */
198 #include <sys/ddi.h>		/* ddi stuff */
199 #include <sys/sunddi.h>		/* ddi stuff */
200 #include <sys/ddi_impldefs.h>	/* ddi stuff */
201 #include <sys/winlockio.h>	/* defines ioctls, flags, data structs */
202 
203 static int	winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
204 static int	winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t,
205 			size_t *, uint_t);
206 static int	winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t,
207 			uint_t, uint_t, uint_t, cred_t *);
208 
209 static struct cb_ops	winlock_cb_ops = {
210 	nulldev,		/* open */
211 	nulldev,		/* close */
212 	nodev,			/* strategy */
213 	nodev,			/* print */
214 	nodev,			/* dump */
215 	nodev,			/* read */
216 	nodev,			/* write */
217 	winlock_ioctl,		/* ioctl */
218 	winlock_devmap,		/* devmap */
219 	nodev,			/* mmap */
220 	winlocksegmap,		/* segmap */
221 	nochpoll,		/* poll */
222 	ddi_prop_op,		/* prop_op */
223 	NULL,			/* streamtab */
224 	D_NEW|D_MP|D_DEVMAP,	/* Driver compatibility flag */
225 	0,			/* rev */
226 	nodev,			/* aread */
227 	nodev			/* awrite */
228 };
229 
230 static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
231 static int winlock_attach(dev_info_t *, ddi_attach_cmd_t);
232 static int winlock_detach(dev_info_t *, ddi_detach_cmd_t);
233 
234 static struct dev_ops	winlock_ops = {
235 	DEVO_REV,
236 	0,			/* refcount */
237 	winlock_info,		/* info */
238 	nulldev,		/* identify */
239 	nulldev,		/* probe */
240 	winlock_attach,		/* attach */
241 	winlock_detach,		/* detach */
242 	nodev,			/* reset */
243 	&winlock_cb_ops,	/* driver ops */
244 	NULL,			/* bus ops */
245 	NULL,			/* power */
246 	ddi_quiesce_not_needed,		/* quiesce */
247 };
248 
249 static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t,
250 		void **);
251 static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t,
252 		devmap_cookie_t, void **, devmap_cookie_t, void **);
253 static int winlockmap_dup(devmap_cookie_t, void *,
254 		devmap_cookie_t, void **);
255 static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t,
256 		uint_t, uint_t);
257 
258 static
259 struct devmap_callback_ctl winlockmap_ops = {
260 	DEVMAP_OPS_REV,
261 	winlockmap_map,
262 	winlockmap_access,
263 	winlockmap_dup,
264 	winlockmap_unmap,
265 };
266 
267 #if DEBUG
268 static	int	lock_debug = 0;
269 #define	DEBUGF(level, args)	{ if (lock_debug >= (level)) cmn_err args; }
270 #else
271 #define	DEBUGF(level, args)
272 #endif
273 
274 /* Driver supports two styles of locks */
275 enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK };
276 
277 /*
278  * These structures describe a lock context.  We permit multiple
279  * clients (not just two) to access a lock page
280  *
281  * The "cookie" identifies the lock context. It is the page number portion
282  * sy_ident returned on lock allocation. Cookie is used in later ioctls.
283  * "cookie" is lockid * PAGESIZE
284  * "lockptr" is the kernel virtual address to the lock itself
285  * The page offset portion of lockptr is the page offset portion of sy_ident
286  */
287 
288 /*
289  * per-process information about locks.  This is the private field of
290  * a devmap mapping.  Note that usually *two* mappings point to this.
291  */
292 
293 /*
294  * Each process using winlock is associated with a segproc structure
295  * In various driver entry points, we need to search to find the right
296  * segproc structure (If we were using file handles for each lock this
297  * would not have been necessary).
298  * It would have been simple to use the process pid (and ddi_get_pid)
299  * However, during fork devmap_dup is called in the parent process context
300  * and using the pid complicates the code by introducing orphans.
301  * Instead we use the as pointer for the process as a cookie
302  * which requires delving into various non-DDI kosher structs
303  */
304 typedef struct segproc {
305 	struct segproc	*next;		/* next client of this lock */
306 	struct seglock	*lp;		/* associated lock context */
307 	devmap_cookie_t	lockseg;	/* lock mapping, if any */
308 	devmap_cookie_t unlockseg;	/* unlock mapping, if any */
309 	void		*tag;		/* process as pointer as tag */
310 	uint_t		flag;		/* see "flag bits" in winlockio.h */
311 } SegProc;
312 
313 #define	ID(sdp)		((sdp)->tag)
314 #define	CURPROC_ID	(void *)(curproc->p_as)
315 
316 /* per lock context information */
317 
318 typedef struct seglock {
319 	struct seglock	*next;		/* next lock */
320 	uint_t		sleepers;	/* nthreads sleeping on this lock */
321 	uint_t		alloccount;	/* how many times created? */
322 	uint_t		cookie;		/* mmap() offset (page #) into device */
323 	uint_t		key;		/* key, if any */
324 	enum winlock_style	style;	/* style of lock - OLDSTYLE, NEWSTYLE */
325 	clock_t		timeout;	/* sleep time in ticks */
326 	ddi_umem_cookie_t umem_cookie;	/* cookie for umem allocated memory */
327 	int		*lockptr;	/* kernel virtual addr of lock */
328 	struct segproc	*clients;	/* list of clients of this lock */
329 	struct segproc	*owner;		/* current owner of lock */
330 	kmutex_t	mutex;		/* mutex for lock */
331 	kcondvar_t	locksleep;	/* for sleeping on lock */
332 } SegLock;
333 
334 #define	LOCK(lp)	(*((lp)->lockptr))
335 
336 /*
337  * Number of locks that can fit in a page. Driver can support only that many.
338  * For oldsytle locks, it is relatively easy to increase the limit as each
339  * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation
340  * For newstyle locks, this is trickier as the code needs to allow for mapping
341  * into the second or third page of the cookie for some locks.
342  */
343 #define	MAX_LOCKS	(PAGESIZE/sizeof (int))
344 
345 #define	LOCKTIME	3	/* Default lock timeout in seconds */
346 
347 
348 /* Protections setting for winlock user mappings */
349 #define	WINLOCK_PROT	(PROT_READ|PROT_WRITE|PROT_USER)
350 
351 /*
352  * The trash page is where unwanted writes go
353  * when a process is releasing a lock.
354  */
355 static	ddi_umem_cookie_t trashpage_cookie = NULL;
356 
357 /* For newstyle allocations a common page of locks is used */
358 static	caddr_t	lockpage = NULL;
359 static	ddi_umem_cookie_t lockpage_cookie = NULL;
360 
361 static	dev_info_t	*winlock_dip = NULL;
362 static	kmutex_t	winlock_mutex;
363 
364 /*
365  * winlock_mutex protects
366  *	lock_list
367  *	lock_free_list
368  *	"next" field in SegLock
369  *	next_lock
370  *	trashpage_cookie
371  *	lockpage & lockpage_cookie
372  *
373  * SegLock_mutex protects
374  *	rest of fields in SegLock
375  *	All fields in list of SegProc (lp->clients)
376  *
377  * Lock ordering is winlock_mutex->SegLock_mutex
378  * During devmap/seg operations SegLock_mutex acquired without winlock_mutex
379  *
380  * During devmap callbacks, the pointer to SegProc is stored as the private
381  * data in the devmap handle. This pointer will not go stale (i.e., the
382  * SegProc getting deleted) as the SegProc is not deleted until both the
383  * lockseg and unlockseg have been unmapped and the pointers stored in
384  * the devmap handles have been NULL'ed.
385  * But before this pointer is used to access any fields (other than the 'lp')
386  * lp->mutex must be held.
387  */
388 
389 /*
390  * The allocation code tries to allocate from lock_free_list
391  * first, otherwise it uses kmem_zalloc.  When lock list is idle, all
392  * locks in lock_free_list are kmem_freed
393  */
394 static	SegLock	*lock_list = NULL;		/* in-use locks */
395 static	SegLock	*lock_free_list = NULL;		/* free locks */
396 static	int	next_lock = 0;			/* next lock cookie */
397 
398 /* Routines to find a lock in lock_list based on offset or key */
399 static SegLock *seglock_findlock(uint_t);
400 static SegLock *seglock_findkey(uint_t);
401 
402 /* Routines to find and allocate SegProc structures */
403 static SegProc *seglock_find_specific(SegLock *, void *);
404 static SegProc *seglock_alloc_specific(SegLock *, void *);
405 #define	seglock_findclient(lp)	seglock_find_specific((lp), CURPROC_ID)
406 #define	seglock_allocclient(lp)	seglock_alloc_specific((lp), CURPROC_ID)
407 
408 /* Delete client from lock's client list */
409 static void seglock_deleteclient(SegLock *, SegProc *);
410 static void garbage_collect_lock(SegLock *, SegProc *);
411 
412 /* Create a new lock */
413 static SegLock *seglock_createlock(enum winlock_style);
414 /* Destroy lock */
415 static void seglock_destroylock(SegLock *);
416 static void lock_destroyall(void);
417 
418 /* Helper functions in winlockmap_access */
419 static int give_mapping(SegLock *, SegProc *, uint_t);
420 static int lock_giveup(SegLock *, int);
421 static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t);
422 
423 /* routines called from ioctl */
424 static int seglock_graballoc(intptr_t, enum winlock_style, int);
425 static int seglock_grabinfo(intptr_t, int);
426 static int seglock_grabfree(intptr_t, int);
427 static int seglock_gettimeout(intptr_t, int);
428 static int seglock_settimeout(intptr_t, int);
429 static void seglock_dump_all(void);
430 
431 static	int
432 winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
433 {
434 	DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n",
435 	    (void *)devi, (int)cmd));
436 	if (cmd != DDI_ATTACH)
437 		return (DDI_FAILURE);
438 	if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0)
439 	    == DDI_FAILURE) {
440 		return (DDI_FAILURE);
441 	}
442 	winlock_dip = devi;
443 	ddi_report_dev(devi);
444 	return (DDI_SUCCESS);
445 }
446 
447 /*ARGSUSED*/
448 static	int
449 winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
450 {
451 	DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n",
452 	    (void *)devi, (int)cmd));
453 	if (cmd != DDI_DETACH)
454 		return (DDI_FAILURE);
455 
456 	mutex_enter(&winlock_mutex);
457 	if (lock_list != NULL) {
458 		mutex_exit(&winlock_mutex);
459 		return (DDI_FAILURE);
460 	}
461 	ASSERT(lock_free_list == NULL);
462 
463 	DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n"));
464 	/* destroy any common stuff created */
465 	if (trashpage_cookie != NULL) {
466 		ddi_umem_free(trashpage_cookie);
467 		trashpage_cookie = NULL;
468 	}
469 	if (lockpage != NULL) {
470 		ddi_umem_free(lockpage_cookie);
471 		lockpage = NULL;
472 		lockpage_cookie = NULL;
473 	}
474 	winlock_dip = NULL;
475 	mutex_exit(&winlock_mutex);
476 	return (DDI_SUCCESS);
477 }
478 
479 /*ARGSUSED*/
480 static	int
481 winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
482 {
483 	register int error;
484 
485 	/* initialize result */
486 	*result = NULL;
487 
488 	/* only valid instance (i.e., getminor) is 0 */
489 	if (getminor((dev_t)arg) >= 1)
490 		return (DDI_FAILURE);
491 
492 	switch (infocmd) {
493 	case DDI_INFO_DEVT2DEVINFO:
494 		if (winlock_dip == NULL)
495 			error = DDI_FAILURE;
496 		else {
497 			*result = (void *)winlock_dip;
498 			error = DDI_SUCCESS;
499 		}
500 		break;
501 	case DDI_INFO_DEVT2INSTANCE:
502 		*result = (void *)0;
503 		error = DDI_SUCCESS;
504 		break;
505 	default:
506 		error = DDI_FAILURE;
507 	}
508 	return (error);
509 }
510 
511 
512 /*ARGSUSED*/
513 int
514 winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
515 	cred_t *cred, int *rval)
516 {
517 	DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n",
518 	    cmd, (void *)arg));
519 
520 	switch (cmd) {
521 	/*
522 	 * ioctls that used to be handled by framebuffers (defined in fbio.h)
523 	 * RFE: No code really calls the GRAB* ioctls now. Should EOL.
524 	 */
525 
526 	case GRABPAGEALLOC:
527 		return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode));
528 	case GRABPAGEFREE:
529 		return (seglock_grabfree(arg, mode));
530 	case GRABLOCKINFO:
531 		return (seglock_grabinfo(arg, mode));
532 	case GRABATTACH:
533 		return (EINVAL); /* GRABATTACH is not supported (never was) */
534 
535 	case WINLOCKALLOC:
536 		return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode));
537 	case WINLOCKFREE:
538 		return (seglock_grabfree(arg, mode));
539 	case WINLOCKSETTIMEOUT:
540 		return (seglock_settimeout(arg, mode));
541 	case WINLOCKGETTIMEOUT:
542 		return (seglock_gettimeout(arg, mode));
543 	case WINLOCKDUMP:
544 		seglock_dump_all();
545 		return (0);
546 
547 #ifdef DEBUG
548 	case (WIOC|255):
549 		lock_debug = arg;
550 		return (0);
551 #endif
552 
553 	default:
554 		return (ENOTTY);		/* Why is this not EINVAL */
555 	}
556 }
557 
558 int
559 winlocksegmap(
560 	dev_t	dev,		/* major:minor */
561 	off_t	off,		/* device offset from mmap(2) */
562 	struct as *as,		/* user's address space. */
563 	caddr_t	*addr,		/* address from mmap(2) */
564 	off_t	len,		/* length from mmap(2) */
565 	uint_t	prot,		/* user wants this access */
566 	uint_t	maxprot,	/* this is the maximum the user can have */
567 	uint_t	flags,		/* flags from mmap(2) */
568 	cred_t	*cred)
569 {
570 	DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len));
571 
572 	/* Only MAP_SHARED mappings are supported */
573 	if ((flags & MAP_TYPE) == MAP_PRIVATE) {
574 		return (EINVAL);
575 	}
576 
577 	/* Use devmap_setup to setup the mapping */
578 	return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot,
579 	    maxprot, flags, cred));
580 }
581 
582 /*ARGSUSED*/
583 int
584 winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
585     size_t *maplen, uint_t model)
586 {
587 	SegLock *lp;
588 	int err;
589 
590 	DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n",
591 	    off, len, (void *)dhp));
592 
593 	*maplen = 0;
594 
595 	/* Check if the lock exists, i.e., has been created by alloc */
596 	/* off is the sy_ident returned in the alloc ioctl */
597 	if ((lp = seglock_findlock((uint_t)off)) == NULL) {
598 		return (ENXIO);
599 	}
600 
601 	/*
602 	 * The offset bits in mmap(2) offset has to be same as in lockptr
603 	 * OR the offset should be 0 (i.e. masked off)
604 	 */
605 	if (((off & PAGEOFFSET) != 0) &&
606 	    ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) {
607 		DEBUGF(2, (CE_CONT,
608 		    "mmap offset %llx mismatch with lockptr %p\n",
609 		    off, (void *)lp->lockptr));
610 		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
611 		return (EINVAL);
612 	}
613 
614 	/* Only supports PAGESIZE length mappings */
615 	if (len != PAGESIZE) {
616 		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
617 		return (EINVAL);
618 	}
619 
620 	/*
621 	 * Set up devmap to point at page associated with lock
622 	 * RFE: At this point we dont know if this is a lockpage or unlockpage
623 	 * a lockpage would not need DEVMAP_ALLOW_REMAP setting
624 	 * We could have kept track of the mapping order here,
625 	 * but devmap framework does not support storing any state in this
626 	 * devmap callback as it does not callback for error cleanup if some
627 	 * other error happens in the framework.
628 	 * RFE: We should modify the winlock mmap interface so that the
629 	 * user process marks in the offset passed in whether this is for a
630 	 * lock or unlock mapping instead of guessing based on order of maps
631 	 * This would cleanup other things (such as in fork)
632 	 */
633 	if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops,
634 	    lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT,
635 	    DEVMAP_ALLOW_REMAP, 0)) < 0) {
636 		mutex_exit(&lp->mutex);	/* held by seglock_findlock */
637 		return (err);
638 	}
639 	/*
640 	 * No mappings are loaded to those segments yet. The correctness
641 	 * of the winlock semantics depends on the devmap framework/seg_dev NOT
642 	 * loading the translations without calling _access callback.
643 	 */
644 
645 	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
646 	*maplen = PAGESIZE;
647 	return (0);
648 }
649 
650 /*
651  * This routine is called by the devmap framework after the devmap entry point
652  * above and the mapping is setup in seg_dev.
653  * We store the pointer to the per-process context in the devmap private data.
654  */
655 /*ARGSUSED*/
656 static int
657 winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
658 	size_t len, void **pvtp)
659 {
660 	SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */
661 	SegProc *sdp;
662 
663 	ASSERT(len == PAGESIZE);
664 
665 	/* Find the per-process context for this lock, alloc one if not found */
666 	sdp = seglock_allocclient(lp);
667 
668 	/*
669 	 * RFE: Determining which is a lock vs unlock seg is based on order
670 	 * of mmaps, we should change that to be derivable from off
671 	 */
672 	if (sdp->lockseg == NULL) {
673 		sdp->lockseg = dhp;
674 	} else if (sdp->unlockseg == NULL) {
675 		sdp->unlockseg = dhp;
676 	} else {
677 		/* attempting to map lock more than twice */
678 		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
679 		return (ENOMEM);
680 	}
681 
682 	*pvtp = sdp;
683 	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
684 	return (DDI_SUCCESS);
685 }
686 
687 /*
688  * duplicate a segment, as in fork()
689  * On fork, the child inherits the mappings to the lock
690  *	lp->alloccount is NOT incremented, so child should not do a free().
691  *	Semantics same as if done an alloc(), map(), map().
692  *	This way it would work fine if doing an exec() variant later
693  *	Child does not inherit any UFLAGS set in parent
694  * The lock and unlock pages are started off unmapped, i.e., child does not
695  *	own the lock.
696  * The code assumes that the child process has a valid pid at this point
697  * RFE: This semantics depends on fork not duplicating the hat mappings
698  *	(which is the current implementation). To enforce it would need to
699  *	call devmap_unload from here - not clear if that is allowed.
700  */
701 
702 static int
703 winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
704 	void **newpvt)
705 {
706 	SegProc *sdp = (SegProc *)oldpvt;
707 	SegProc *ndp;
708 	SegLock *lp = sdp->lp;
709 
710 	mutex_enter(&lp->mutex);
711 	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
712 
713 	/*
714 	 * Note: At this point, the child process does have a pid, but
715 	 * the arguments passed to as_dup and hence to devmap_dup dont pass it
716 	 * down. So we cannot use normal seglock_findclient - which finds the
717 	 * parent sdp itself!
718 	 * Instead we allocate the child's SegProc by using the child as pointer
719 	 * RFE: we are using the as stucture which means peeking into the
720 	 * devmap_cookie. This is not DDI-compliant. Need a compliant way of
721 	 * getting at either the as or, better, a way to get the child's new pid
722 	 */
723 	ndp = seglock_alloc_specific(lp,
724 	    (void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as);
725 	ASSERT(ndp != sdp);
726 
727 	if (sdp->lockseg == dhp) {
728 		ASSERT(ndp->lockseg == NULL);
729 		ndp->lockseg = new_dhp;
730 	} else {
731 		ASSERT(sdp->unlockseg == dhp);
732 		ASSERT(ndp->unlockseg == NULL);
733 		ndp->unlockseg = new_dhp;
734 		if (sdp->flag & TRASHPAGE) {
735 			ndp->flag |= TRASHPAGE;
736 		}
737 	}
738 	mutex_exit(&lp->mutex);
739 	*newpvt = (void *)ndp;
740 	return (0);
741 }
742 
743 
744 /*ARGSUSED*/
745 static void
746 winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
747 	devmap_cookie_t new_dhp1, void **newpvtp1,
748 	devmap_cookie_t new_dhp2, void **newpvtp2)
749 {
750 	SegProc	*sdp = (SegProc *)pvtp;
751 	SegLock	*lp = sdp->lp;
752 
753 	/*
754 	 * We always create PAGESIZE length mappings, so there should never
755 	 * be a partial unmapping case
756 	 */
757 	ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL));
758 
759 	mutex_enter(&lp->mutex);
760 	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
761 	/* make sure this process doesn't own the lock */
762 	if (sdp == lp->owner) {
763 		/*
764 		 * Not handling errors - i.e., errors in unloading mapping
765 		 * As part of unmapping hat/seg structure get torn down anyway
766 		 */
767 		(void) lock_giveup(lp, 0);
768 	}
769 
770 	ASSERT(sdp != lp->owner);
771 	if (sdp->lockseg == dhp) {
772 		sdp->lockseg = NULL;
773 	} else {
774 		ASSERT(sdp->unlockseg == dhp);
775 		sdp->unlockseg = NULL;
776 		sdp->flag &= ~TRASHPAGE;	/* clear flag if set */
777 	}
778 
779 	garbage_collect_lock(lp, sdp);
780 }
781 
782 /*ARGSUSED*/
783 static int
784 winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len,
785 	uint_t type, uint_t rw)
786 {
787 	SegProc *sdp = (SegProc *)pvt;
788 	SegLock *lp = sdp->lp;
789 	int err;
790 
791 	/* Driver handles only DEVMAP_ACCESS type of faults */
792 	if (type != DEVMAP_ACCESS)
793 		return (-1);
794 
795 	mutex_enter(&lp->mutex);
796 	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
797 
798 	/* should be using a SegProc that corresponds to current process */
799 	ASSERT(ID(sdp) == CURPROC_ID);
800 
801 	/*
802 	 * If process is faulting but does not have both segments mapped
803 	 * return error (should cause a segv).
804 	 * RFE: could give it a permanent trashpage
805 	 */
806 	if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) {
807 		err = -1;
808 	} else {
809 		err = seglock_lockfault(dhp, sdp, lp, rw);
810 	}
811 	mutex_exit(&lp->mutex);
812 	return (err);
813 }
814 
815 	/* INTERNAL ROUTINES START HERE */
816 
817 
818 
819 /*
820  * search the lock_list list for the specified cookie
821  * The cookie is the sy_ident field returns by ALLOC ioctl.
822  * This has two parts:
823  * the pageoffset bits contain offset into the lock page.
824  * the pagenumber bits contain the lock id.
825  * The user code is supposed to pass in only the pagenumber portion
826  *	(i.e. mask off the pageoffset bits). However the code below
827  *	does the mask in case the users are not diligent
828  * if found, returns with mutex for SegLock structure held
829  */
830 static SegLock *
831 seglock_findlock(uint_t cookie)
832 {
833 	SegLock	*lp;
834 
835 	cookie &= (uint_t)PAGEMASK;   /* remove pageoffset bits to get cookie */
836 	mutex_enter(&winlock_mutex);
837 	for (lp = lock_list; lp != NULL; lp = lp->next) {
838 		mutex_enter(&lp->mutex);
839 		if (cookie == lp->cookie) {
840 			break;	/* return with lp->mutex held */
841 		}
842 		mutex_exit(&lp->mutex);
843 	}
844 	mutex_exit(&winlock_mutex);
845 	return (lp);
846 }
847 
848 /*
849  * search the lock_list list for the specified non-zero key
850  * if found, returns with lock for SegLock structure held
851  */
852 static SegLock *
853 seglock_findkey(uint_t key)
854 {
855 	SegLock	*lp;
856 
857 	ASSERT(MUTEX_HELD(&winlock_mutex));
858 	/* The driver allows multiple locks with key 0, dont search */
859 	if (key == 0)
860 		return (NULL);
861 	for (lp = lock_list; lp != NULL; lp = lp->next) {
862 		mutex_enter(&lp->mutex);
863 		if (key == lp->key)
864 			break;
865 		mutex_exit(&lp->mutex);
866 	}
867 	return (lp);
868 }
869 
870 /*
871  * Create a new lock context.
872  * Returns with SegLock mutex held
873  */
874 
875 static SegLock *
876 seglock_createlock(enum winlock_style style)
877 {
878 	SegLock	*lp;
879 
880 	DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n",
881 	    (void *)lock_free_list, next_lock));
882 
883 	ASSERT(MUTEX_HELD(&winlock_mutex));
884 	if (lock_free_list != NULL) {
885 		lp = lock_free_list;
886 		lock_free_list = lp->next;
887 	} else if (next_lock >= MAX_LOCKS) {
888 		return (NULL);
889 	} else {
890 		lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP);
891 		lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE;
892 		mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL);
893 		cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL);
894 		++next_lock;
895 	}
896 
897 	mutex_enter(&lp->mutex);
898 	ASSERT((lp->cookie/PAGESIZE) <= next_lock);
899 
900 	if (style == OLDSTYLE_LOCK) {
901 		lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE,
902 		    DDI_UMEM_SLEEP, &(lp->umem_cookie));
903 	} else {
904 		lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1);
905 		lp->umem_cookie = lockpage_cookie;
906 	}
907 
908 	ASSERT(lp->lockptr != NULL);
909 	lp->style = style;
910 	lp->sleepers = 0;
911 	lp->alloccount = 1;
912 	lp->timeout = LOCKTIME*hz;
913 	lp->clients = NULL;
914 	lp->owner = NULL;
915 	LOCK(lp) = 0;
916 	lp->next = lock_list;
917 	lock_list = lp;
918 	return (lp);
919 }
920 
921 /*
922  * Routine to destory a lock structure.
923  * This routine is called while holding the lp->mutex but not the
924  * winlock_mutex.
925  */
926 
927 static void
928 seglock_destroylock(SegLock *lp)
929 {
930 	ASSERT(MUTEX_HELD(&lp->mutex));
931 	ASSERT(!MUTEX_HELD(&winlock_mutex));
932 
933 	DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n",
934 	    lp->cookie, lp->key));
935 
936 	ASSERT(lp->alloccount == 0);
937 	ASSERT(lp->clients == NULL);
938 	ASSERT(lp->owner == NULL);
939 	ASSERT(lp->sleepers == 0);
940 
941 	/* clean up/release fields in lp */
942 	if (lp->style == OLDSTYLE_LOCK) {
943 		ddi_umem_free(lp->umem_cookie);
944 	}
945 	lp->umem_cookie = NULL;
946 	lp->lockptr = NULL;
947 	lp->key = 0;
948 
949 	/*
950 	 * Reduce cookie by 1, makes it non page-aligned and invalid
951 	 * This prevents any valid lookup from finding this lock
952 	 * so when we drop the lock and regrab it it will still
953 	 * be there and nobody else would have attached to it
954 	 */
955 	lp->cookie--;
956 
957 	/* Drop and reacquire mutexes in right order */
958 	mutex_exit(&lp->mutex);
959 	mutex_enter(&winlock_mutex);
960 	mutex_enter(&lp->mutex);
961 
962 	/* reincrement the cookie to get the original valid cookie */
963 	lp->cookie++;
964 	ASSERT((lp->cookie & PAGEOFFSET) == 0);
965 	ASSERT(lp->alloccount == 0);
966 	ASSERT(lp->clients == NULL);
967 	ASSERT(lp->owner == NULL);
968 	ASSERT(lp->sleepers == 0);
969 
970 	/* Remove lp from lock_list */
971 	if (lock_list == lp) {
972 		lock_list = lp->next;
973 	} else {
974 		SegLock *tmp = lock_list;
975 		while (tmp->next != lp) {
976 			tmp = tmp->next;
977 			ASSERT(tmp != NULL);
978 		}
979 		tmp->next = lp->next;
980 	}
981 
982 	/* Add to lock_free_list */
983 	lp->next = lock_free_list;
984 	lock_free_list = lp;
985 	mutex_exit(&lp->mutex);
986 
987 	/* Check if all locks deleted and cleanup */
988 	if (lock_list == NULL) {
989 		lock_destroyall();
990 	}
991 
992 	mutex_exit(&winlock_mutex);
993 }
994 
995 /* Routine to find a SegProc corresponding to the tag */
996 
997 static SegProc *
998 seglock_find_specific(SegLock *lp, void *tag)
999 {
1000 	SegProc *sdp;
1001 
1002 	ASSERT(MUTEX_HELD(&lp->mutex));
1003 	ASSERT(tag != NULL);
1004 	for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1005 		if (ID(sdp) == tag)
1006 			break;
1007 	}
1008 	return (sdp);
1009 }
1010 
1011 /* Routine to find (and if needed allocate) a SegProc corresponding to tag */
1012 
1013 static SegProc *
1014 seglock_alloc_specific(SegLock *lp, void *tag)
1015 {
1016 	SegProc *sdp;
1017 
1018 	ASSERT(MUTEX_HELD(&lp->mutex));
1019 	ASSERT(tag != NULL);
1020 
1021 	/* Search and return if existing one found */
1022 	sdp = seglock_find_specific(lp, tag);
1023 	if (sdp != NULL)
1024 		return (sdp);
1025 
1026 	DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n",
1027 	    tag, lp->cookie));
1028 
1029 	/* Allocate a new SegProc */
1030 	sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP);
1031 	sdp->next = lp->clients;
1032 	lp->clients = sdp;
1033 	sdp->lp = lp;
1034 	ID(sdp) = tag;
1035 	return (sdp);
1036 }
1037 
1038 /*
1039  * search a context's client list for the given client and delete
1040  */
1041 
1042 static void
1043 seglock_deleteclient(SegLock *lp, SegProc *sdp)
1044 {
1045 	ASSERT(MUTEX_HELD(&lp->mutex));
1046 	ASSERT(lp->owner != sdp);	/* Not current owner of lock */
1047 	ASSERT(sdp->lockseg == NULL);	/* Mappings torn down */
1048 	ASSERT(sdp->unlockseg == NULL);
1049 
1050 	DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n",
1051 	    ddi_get_pid(), lp->cookie));
1052 	if (lp->clients == sdp) {
1053 		lp->clients = sdp->next;
1054 	} else {
1055 		SegProc *tmp = lp->clients;
1056 		while (tmp->next != sdp) {
1057 			tmp = tmp->next;
1058 			ASSERT(tmp != NULL);
1059 		}
1060 		tmp->next = sdp->next;
1061 	}
1062 	kmem_free(sdp, sizeof (SegProc));
1063 }
1064 
1065 /*
1066  * Routine to verify if a SegProc and SegLock
1067  * structures are empty/idle.
1068  * Destroys the structures if they are ready
1069  * Can be called with sdp == NULL if want to verify only the lock state
1070  * caller should hold the lp->mutex
1071  * and this routine drops the mutex
1072  */
1073 static void
1074 garbage_collect_lock(SegLock *lp, SegProc *sdp)
1075 {
1076 	ASSERT(MUTEX_HELD(&lp->mutex));
1077 	/* see if both segments unmapped from client structure */
1078 	if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL))
1079 		seglock_deleteclient(lp, sdp);
1080 
1081 	/* see if this is last client in the entire lock context */
1082 	if ((lp->clients == NULL) && (lp->alloccount == 0)) {
1083 		seglock_destroylock(lp);
1084 	} else {
1085 		mutex_exit(&lp->mutex);
1086 	}
1087 }
1088 
1089 
1090 /* IOCTLS START HERE */
1091 
1092 static int
1093 seglock_grabinfo(intptr_t arg, int mode)
1094 {
1095 	int i = 1;
1096 
1097 	/* multiple clients per lock supported - see comments up top */
1098 	if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0)
1099 		return (EFAULT);
1100 	return (0);
1101 }
1102 
1103 static int
1104 seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */
1105 {
1106 	struct seglock	*lp;
1107 	uint_t		key;
1108 	struct		winlockalloc wla;
1109 	int		err;
1110 
1111 	if (style == OLDSTYLE_LOCK) {
1112 		key = 0;
1113 	} else {
1114 		if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla),
1115 		    mode)) {
1116 			return (EFAULT);
1117 		}
1118 		key = wla.sy_key;
1119 	}
1120 
1121 	DEBUGF(3, (CE_CONT,
1122 	    "seglock_graballoc: key=%u, style=%d\n", key, style));
1123 
1124 	mutex_enter(&winlock_mutex);
1125 	/* Allocate lockpage on first new style alloc */
1126 	if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) {
1127 		lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
1128 		    &lockpage_cookie);
1129 	}
1130 
1131 	/* Allocate trashpage on first alloc (any style) */
1132 	if (trashpage_cookie == NULL) {
1133 		(void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP,
1134 		    &trashpage_cookie);
1135 	}
1136 
1137 	if ((lp = seglock_findkey(key)) != NULL) {
1138 		DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n",
1139 		    key, lp->cookie));
1140 		++lp->alloccount;
1141 	} else if ((lp = seglock_createlock(style)) != NULL) {
1142 		DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n",
1143 		    key, lp->cookie));
1144 		lp->key = key;
1145 	} else {
1146 		DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key));
1147 		mutex_exit(&winlock_mutex);
1148 		return (ENOMEM);
1149 	}
1150 	ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex));
1151 
1152 	mutex_exit(&winlock_mutex);
1153 
1154 	if (style == OLDSTYLE_LOCK) {
1155 		err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg,
1156 		    sizeof (lp->cookie), mode);
1157 	} else {
1158 		wla.sy_ident = lp->cookie +
1159 		    (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET);
1160 		err = ddi_copyout((caddr_t)&wla, (caddr_t)arg,
1161 		    sizeof (wla), mode);
1162 	}
1163 
1164 	if (err) {
1165 		/* On error, should undo allocation */
1166 		lp->alloccount--;
1167 
1168 		/* Verify and delete if lock is unused now */
1169 		garbage_collect_lock(lp, NULL);
1170 		return (EFAULT);
1171 	}
1172 
1173 	mutex_exit(&lp->mutex);
1174 	return (0);
1175 }
1176 
1177 static int
1178 seglock_grabfree(intptr_t arg, int mode)	/* IOCTL */
1179 {
1180 	struct seglock	*lp;
1181 	uint_t	offset;
1182 
1183 	if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode)
1184 	    != 0) {
1185 		return (EFAULT);
1186 	}
1187 	DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset));
1188 
1189 	if ((lp = seglock_findlock(offset)) == NULL) {
1190 		DEBUGF(2, (CE_CONT, "did not find lock\n"));
1191 		return (EINVAL);
1192 	}
1193 	DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n",
1194 	    lp->key, lp->cookie, lp->alloccount));
1195 
1196 	if (lp->alloccount > 0)
1197 		lp->alloccount--;
1198 
1199 	/* Verify and delete if lock is unused now */
1200 	garbage_collect_lock(lp, NULL);
1201 	return (0);
1202 }
1203 
1204 
1205 /*
1206  * Sets timeout in lock and UFLAGS in client
1207  *	the UFLAGS are stored in the client structure and persistent only
1208  *	till the unmap of the lock pages. If the process sets UFLAGS
1209  *	does a map of the lock/unlock pages and unmaps them, the client
1210  *	structure will get deleted and the UFLAGS will be lost. The process
1211  *	will need to resetup the flags.
1212  */
1213 static int
1214 seglock_settimeout(intptr_t arg, int mode)	/* IOCTL */
1215 {
1216 	SegLock		*lp;
1217 	SegProc		*sdp;
1218 	struct winlocktimeout		wlt;
1219 
1220 	if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) {
1221 		return (EFAULT);
1222 	}
1223 
1224 	if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1225 		return (EINVAL);
1226 
1227 	lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout);
1228 	/* if timeout modified, wake up any sleepers */
1229 	if (lp->sleepers > 0) {
1230 		cv_broadcast(&lp->locksleep);
1231 	}
1232 
1233 	/*
1234 	 * If the process is trying to set UFLAGS,
1235 	 *	Find the client segproc and allocate one if needed
1236 	 *	Set the flags preserving the kernel flags
1237 	 * If the process is clearing UFLAGS
1238 	 *	Find the client segproc but dont allocate one if does not exist
1239 	 */
1240 	if (wlt.sy_flags & UFLAGS) {
1241 		sdp = seglock_allocclient(lp);
1242 		sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS;
1243 	} else if ((sdp = seglock_findclient(lp)) != NULL) {
1244 		sdp->flag = sdp->flag & KFLAGS;
1245 		/* If clearing UFLAGS leaves the segment or lock idle, delete */
1246 		garbage_collect_lock(lp, sdp);
1247 		return (0);
1248 	}
1249 	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
1250 	return (0);
1251 }
1252 
1253 static int
1254 seglock_gettimeout(intptr_t arg, int mode)
1255 {
1256 	SegLock		*lp;
1257 	SegProc		*sdp;
1258 	struct winlocktimeout		wlt;
1259 
1260 	if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0)
1261 		return (EFAULT);
1262 
1263 	if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1264 		return (EINVAL);
1265 
1266 	wlt.sy_timeout = TICK_TO_MSEC(lp->timeout);
1267 	/*
1268 	 * If this process has an active allocated lock return those flags
1269 	 *	Dont allocate a client structure on gettimeout
1270 	 * If not, return 0.
1271 	 */
1272 	if ((sdp = seglock_findclient(lp)) != NULL) {
1273 		wlt.sy_flags = sdp->flag & UFLAGS;
1274 	} else {
1275 		wlt.sy_flags = 0;
1276 	}
1277 	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
1278 
1279 	if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0)
1280 		return (EFAULT);
1281 
1282 	return (0);
1283 }
1284 
1285 /*
1286  * Handle lock segment faults here...
1287  *
1288  * This is where the magic happens.
1289  */
1290 
1291 /* ARGSUSED */
1292 static	int
1293 seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw)
1294 {
1295 	SegProc *owner = lp->owner;
1296 	int err;
1297 
1298 	ASSERT(MUTEX_HELD(&lp->mutex));
1299 	DEBUGF(3, (CE_CONT,
1300 	    "seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n",
1301 	    (void *)dhp, (void *)sdp, (void *)lp, (void *)owner));
1302 
1303 	/* lockfault is always called with sdp in current process context */
1304 	ASSERT(ID(sdp) == CURPROC_ID);
1305 
1306 	/* If Lock has no current owner, give the mapping to new owner */
1307 	if (owner == NULL) {
1308 		DEBUGF(4, (CE_CONT, " lock has no current owner\n"));
1309 		return (give_mapping(lp, sdp, rw));
1310 	}
1311 
1312 	if (owner == sdp) {
1313 		/*
1314 		 * Current owner is faulting on owned lock segment OR
1315 		 * Current owner is faulting on unlock page and has no waiters
1316 		 * Then can give the mapping to current owner
1317 		 */
1318 		if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) {
1319 		DEBUGF(4, (CE_CONT, "lock owner faulting\n"));
1320 		return (give_mapping(lp, sdp, rw));
1321 		} else {
1322 		/*
1323 		 * Owner must be writing to unlock page and there are waiters.
1324 		 * other cases have been checked earlier.
1325 		 * Release the lock, owner, and owners mappings
1326 		 * As the owner is trying to write to the unlock page, leave
1327 		 * it with a trashpage mapping and wake up the sleepers
1328 		 */
1329 		ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0));
1330 		DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n"));
1331 		return (lock_giveup(lp, 1));
1332 		}
1333 	}
1334 
1335 	ASSERT(owner != sdp);
1336 
1337 	/*
1338 	 * If old owner faulting on trash unlock mapping,
1339 	 * load hat mappings to trash page
1340 	 * RFE: non-owners should NOT be faulting on unlock mapping as they
1341 	 * as first supposed to fault on the lock seg. We could give them
1342 	 * a trash page or return error.
1343 	 */
1344 	if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) {
1345 		DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n"));
1346 		return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1347 		    DEVMAP_ACCESS, rw));
1348 	}
1349 
1350 	/*
1351 	 * Non-owner faulting. Need to check current LOCK state.
1352 	 *
1353 	 * Before reading lock value in LOCK(lp), we must make sure that
1354 	 * the owner cannot change its value before we change mappings
1355 	 * or else we could end up either with a hung process
1356 	 * or more than one process thinking they have the lock.
1357 	 * We do that by unloading the owner's mappings
1358 	 */
1359 	DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n"));
1360 	err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1361 	err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1362 	if (err != 0)
1363 		return (err);	/* unable to remove owner mapping */
1364 
1365 	/*
1366 	 * If lock is not held, then current owner mappings were
1367 	 * unloaded above and we can give the lock to the new owner
1368 	 */
1369 	if (LOCK(lp) == 0) {
1370 		DEBUGF(4, (CE_CONT,
1371 		    "Free lock (%p): Giving mapping to new owner %d\n",
1372 		    (void *)lp, ddi_get_pid()));
1373 		return (give_mapping(lp, sdp, rw));
1374 	}
1375 
1376 	DEBUGF(4, (CE_CONT, "  lock held, sleeping\n"));
1377 
1378 	/*
1379 	 * A non-owning process tried to write (presumably to the lockpage,
1380 	 * but it doesn't matter) but the lock is held; we need to sleep for
1381 	 * the lock while there is an owner.
1382 	 */
1383 
1384 	lp->sleepers++;
1385 	while ((owner = lp->owner) != NULL) {
1386 		int rval;
1387 
1388 		if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) {
1389 			/*
1390 			 * No timeout has been specified for this lock;
1391 			 * we'll simply sleep on the condition variable.
1392 			 */
1393 			rval = cv_wait_sig(&lp->locksleep, &lp->mutex);
1394 		} else {
1395 			/*
1396 			 * A timeout _has_ been specified for this lock. We need
1397 			 * to wake up and possibly steal this lock if the owner
1398 			 * does not let it go. Note that all sleepers on a lock
1399 			 * with a timeout wait; the sleeper with the earliest
1400 			 * timeout will wakeup, and potentially steal the lock
1401 			 * Stealing the lock will cause a broadcast on the
1402 			 * locksleep cv and thus kick the other timed waiters
1403 			 * and cause everyone to restart in a new timedwait
1404 			 */
1405 			rval = cv_reltimedwait_sig(&lp->locksleep,
1406 			    &lp->mutex, lp->timeout, TR_CLOCK_TICK);
1407 		}
1408 
1409 		/*
1410 		 * Timeout and still old owner - steal lock
1411 		 * Force-Release lock and give old owner a trashpage mapping
1412 		 */
1413 		if ((rval == -1) && (lp->owner == owner)) {
1414 			/*
1415 			 * if any errors in lock_giveup, go back and sleep/retry
1416 			 * If successful, will break out of loop
1417 			 */
1418 			cmn_err(CE_NOTE, "Process %d timed out on lock %d\n",
1419 			    ddi_get_pid(), lp->cookie);
1420 			(void) lock_giveup(lp, 1);
1421 		} else if (rval == 0) { /* signal pending */
1422 			cmn_err(CE_NOTE,
1423 			    "Process %d signalled while waiting on lock %d\n",
1424 			    ddi_get_pid(), lp->cookie);
1425 			lp->sleepers--;
1426 			return (FC_MAKE_ERR(EINTR));
1427 		}
1428 	}
1429 
1430 	lp->sleepers--;
1431 	/*
1432 	 * Give mapping to this process and save a fault later
1433 	 */
1434 	return (give_mapping(lp, sdp, rw));
1435 }
1436 
1437 /*
1438  * Utility: give a valid mapping to lock and unlock pages to current process.
1439  * Caller responsible for unloading old owner's mappings
1440  */
1441 
1442 static int
1443 give_mapping(SegLock *lp, SegProc *sdp, uint_t rw)
1444 {
1445 	int err = 0;
1446 
1447 	ASSERT(MUTEX_HELD(&lp->mutex));
1448 	ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0)));
1449 	/* give_mapping is always called with sdp in current process context */
1450 	ASSERT(ID(sdp) == CURPROC_ID);
1451 
1452 	/* remap any old trash mappings */
1453 	if (sdp->flag & TRASHPAGE) {
1454 		/* current owner should not have a trash mapping */
1455 		ASSERT(sdp != lp->owner);
1456 
1457 		DEBUGF(4, (CE_CONT,
1458 		    "new owner %d remapping old trash mapping\n",
1459 		    ddi_get_pid()));
1460 		if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip,
1461 		    lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1462 			/*
1463 			 * unable to remap old trash page,
1464 			 * abort before changing owner
1465 			 */
1466 			DEBUGF(4, (CE_CONT,
1467 			    "aborting: error in umem_remap %d\n", err));
1468 			return (err);
1469 		}
1470 		sdp->flag &= ~TRASHPAGE;
1471 	}
1472 
1473 	/* we have a new owner now */
1474 	lp->owner = sdp;
1475 
1476 	if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE,
1477 	    DEVMAP_ACCESS, rw)) != 0) {
1478 		return (err);
1479 	}
1480 	DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid()));
1481 
1482 	if (lp->sleepers) {
1483 		/* Force unload unlock mapping if there are waiters */
1484 		DEBUGF(4, (CE_CONT,
1485 		    " lock has %d sleepers => remove unlock mapping\n",
1486 		    lp->sleepers));
1487 		err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE);
1488 	} else {
1489 		/*
1490 		 * while here, give new owner a valid mapping to unlock
1491 		 * page so we don't get called again.
1492 		 */
1493 		DEBUGF(4, (CE_CONT, " and unlock mapping\n"));
1494 		err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1495 		    DEVMAP_ACCESS, PROT_WRITE);
1496 	}
1497 	return (err);
1498 }
1499 
1500 /*
1501  * Unload owner's mappings, release the lock and wakeup any sleepers
1502  * If trash, then the old owner is given a trash mapping
1503  *	=> old owner held lock too long and caused a timeout
1504  */
1505 static int
1506 lock_giveup(SegLock *lp, int trash)
1507 {
1508 	SegProc *owner = lp->owner;
1509 
1510 	DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n",
1511 	    (void *)lp, (void *)ID(lp->owner), trash));
1512 
1513 	ASSERT(MUTEX_HELD(&lp->mutex));
1514 	ASSERT(owner != NULL);
1515 
1516 	/*
1517 	 * owner loses lockpage/unlockpage mappings and gains a
1518 	 * trashpage mapping, if needed.
1519 	 */
1520 	if (!trash) {
1521 		/*
1522 		 * We do not handle errors in devmap_unload in the !trash case,
1523 		 * as the process is attempting to unmap/exit or otherwise
1524 		 * release the lock. Errors in unloading the mapping are not
1525 		 * going to affect that (unmap does not take error return).
1526 		 */
1527 		(void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1528 		(void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1529 	} else {
1530 		int err;
1531 
1532 		if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) {
1533 			/* error unloading lockseg mapping. abort giveup */
1534 			return (err);
1535 		}
1536 
1537 		/*
1538 		 * old owner gets mapping to trash page so it can continue
1539 		 * devmap_umem_remap does a hat_unload (and does it holding
1540 		 * the right locks), so no need to devmap_unload on unlockseg
1541 		 */
1542 		if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip,
1543 		    trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1544 			/* error remapping to trash page, abort giveup */
1545 			return (err);
1546 		}
1547 		owner->flag |= TRASHPAGE;
1548 		/*
1549 		 * Preload mapping to trash page by calling devmap_load
1550 		 * However, devmap_load can only be called on the faulting
1551 		 * process context and not on the owner's process context
1552 		 * we preload only if we happen to be in owner process context
1553 		 * Other processes will fault on the unlock mapping
1554 		 * and be given a trash mapping at that time.
1555 		 */
1556 		if (ID(owner) == CURPROC_ID) {
1557 			(void) devmap_load(owner->unlockseg, lp->cookie,
1558 			    PAGESIZE, DEVMAP_ACCESS, PROT_WRITE);
1559 		}
1560 	}
1561 
1562 	lp->owner = NULL;
1563 
1564 	/* Clear the lock value in underlying page so new owner can grab it */
1565 	LOCK(lp) = 0;
1566 
1567 	if (lp->sleepers) {
1568 		DEBUGF(4, (CE_CONT, "  waking up, lp=%p\n", (void *)lp));
1569 		cv_broadcast(&lp->locksleep);
1570 	}
1571 	return (0);
1572 }
1573 
1574 /*
1575  * destroy all allocated memory.
1576  */
1577 
1578 static void
1579 lock_destroyall(void)
1580 {
1581 	SegLock	*lp, *lpnext;
1582 
1583 	ASSERT(MUTEX_HELD(&winlock_mutex));
1584 	ASSERT(lock_list == NULL);
1585 
1586 	DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n"));
1587 	for (lp = lock_free_list; lp != NULL; lp = lpnext) {
1588 		mutex_enter(&lp->mutex);
1589 		lpnext =  lp->next;
1590 		ASSERT(lp->clients == NULL);
1591 		ASSERT(lp->owner == NULL);
1592 		ASSERT(lp->alloccount == 0);
1593 		mutex_destroy(&lp->mutex);
1594 		cv_destroy(&lp->locksleep);
1595 		kmem_free(lp, sizeof (SegLock));
1596 	}
1597 	lock_free_list = NULL;
1598 	next_lock = 0;
1599 }
1600 
1601 
1602 /* RFE: create mdb walkers instead of dump routines? */
1603 static void
1604 seglock_dump_all(void)
1605 {
1606 	SegLock	*lp;
1607 
1608 	mutex_enter(&winlock_mutex);
1609 	cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n");
1610 
1611 	cmn_err(CE_CONT, "Lock List:\n");
1612 	for (lp = lock_list; lp != NULL; lp = lp->next) {
1613 		mutex_enter(&lp->mutex);
1614 		cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1615 		    lp->cookie, lp->key, lp->alloccount,
1616 		    lp->clients ? 'Y' : 'N',
1617 		    lp->owner ? 'Y' : 'N',
1618 		    lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1619 		    lp->sleepers);
1620 		mutex_exit(&lp->mutex);
1621 	}
1622 	cmn_err(CE_CONT, "Free Lock List:\n");
1623 	for (lp = lock_free_list; lp != NULL; lp = lp->next) {
1624 		mutex_enter(&lp->mutex);
1625 		cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1626 		    lp->cookie, lp->key, lp->alloccount,
1627 		    lp->clients ? 'Y' : 'N',
1628 		    lp->owner ? 'Y' : 'N',
1629 		    lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1630 		    lp->sleepers);
1631 		mutex_exit(&lp->mutex);
1632 	}
1633 
1634 #ifdef DEBUG
1635 	if (lock_debug < 3) {
1636 		mutex_exit(&winlock_mutex);
1637 		return;
1638 	}
1639 
1640 	for (lp = lock_list; lp != NULL; lp = lp->next) {
1641 		SegProc	*sdp;
1642 
1643 		mutex_enter(&lp->mutex);
1644 		cmn_err(CE_CONT,
1645 		    "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n",
1646 		    (void *)lp, lp->key, lp->cookie, lp->alloccount,
1647 		    lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers);
1648 
1649 		cmn_err(CE_CONT,
1650 		    "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n",
1651 		    lp->style, (void *)lp->lockptr, lp->timeout,
1652 		    (void *)lp->clients, (void *)lp->owner);
1653 
1654 
1655 		for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1656 			cmn_err(CE_CONT, "  client %p%s, lp=%p, flag=%x, "
1657 			    "process tag=%p, lockseg=%p, unlockseg=%p\n",
1658 			    (void *)sdp, sdp == lp->owner ? " (owner)" : "",
1659 			    (void *)sdp->lp, sdp->flag, (void *)ID(sdp),
1660 			    (void *)sdp->lockseg, (void *)sdp->unlockseg);
1661 		}
1662 		mutex_exit(&lp->mutex);
1663 	}
1664 #endif
1665 	mutex_exit(&winlock_mutex);
1666 }
1667 
1668 #include <sys/modctl.h>
1669 
1670 static struct modldrv modldrv = {
1671 	&mod_driverops,		/* Type of module.  This one is a driver */
1672 	"Winlock Driver",	/* Name of the module */
1673 	&winlock_ops,		/* driver ops */
1674 };
1675 
1676 static struct modlinkage modlinkage = {
1677 	MODREV_1,
1678 	(void *)&modldrv,
1679 	0,
1680 	0,
1681 	0
1682 };
1683 
1684 int
1685 _init(void)
1686 {
1687 	int e;
1688 
1689 	mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL);
1690 	e = mod_install(&modlinkage);
1691 	if (e) {
1692 		mutex_destroy(&winlock_mutex);
1693 	}
1694 	return (e);
1695 }
1696 
1697 
1698 int
1699 _info(struct modinfo *modinfop)
1700 {
1701 	return (mod_info(&modlinkage, modinfop));
1702 }
1703 
1704 int
1705 _fini(void)
1706 {
1707 	int	e;
1708 
1709 	e = mod_remove(&modlinkage);
1710 	if (e == 0) {
1711 		mutex_destroy(&winlock_mutex);
1712 	}
1713 	return (e);
1714 }
1715