xref: /titanic_50/usr/src/uts/common/io/winlockio.c (revision b86efd96f8acd85ddaa930a2f0c1d664237e4aaf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This is the lock device driver.
31  *
32  * The lock driver provides a variation of inter-process mutexes with the
33  * following twist in semantics:
34  *	A waiter for a lock after a set timeout can "break" the lock and
35  *	grab it from the current owner (without informing the owner).
36  *
37  * These semantics result in temporarily multiple processes thinking they
38  * own the lock. This usually does not make sense for cases where locks are
39  * used to protect a critical region and it is important to serialize access
40  * to data structures. As breaking the lock will also lose the serialization
41  * and result in corrupt data structures.
42  *
43  * The usage for winlock driver is primarily driven by the graphics system
44  * when doing DGA (direct graphics access) graphics. The locks are used to
45  * protect access to the frame buffer (presumably reflects back to the screen)
46  * between competing processes that directly write to the screen as opposed
47  * to going through the window server etc.
48  * In this case, the result of breaking the lock at worst causes the screen
49  * image to be distorted and is easily fixed by doing a "refresh"
50  *
51  * In well-behaved applications, the lock is held for a very short time and
52  * the breaking semantics do not come into play. Not having this feature and
53  * using normal inter-process mutexes will result in a misbehaved application
54  * from grabbing the screen writing capability from the window manager and
55  * effectively make the system look like it is hung (mouse pointer does not
56  * move).
57  *
58  * A secondary aspect of the winlock driver is that it allows for extremely
59  * fast lock acquire/release in cases where there is low contention. A memory
60  * write is all that is needed (not even a function call). And the window
61  * manager is the only DGA writer usually and this optimized for. Occasionally
62  * some processes might do DGA graphics and cause kernel faults to handle
63  * the contention/locking (and that has got to be slow!).
64  *
65  * The following IOCTLs are supported:
66  *
67  *   GRABPAGEALLOC:
68  *	Compatibility with old cgsix device driver lockpage ioctls.
69  *	Lockpages created this way must be an entire page for compatibility with
70  *	older software.	 This ioctl allocates a lock context with its own
71  *	private lock page.  The unique "ident" that identifies this lock is
72  *	returned.
73  *
74  *   GRABPAGEFREE:
75  *	Compatibility with cgsix device driver lockpage ioctls.	 This
76  *	ioctl releases the lock context allocated by GRABPAGEALLOC.
77  *
78  *   GRABLOCKINFO:
79  *	Returns a one-word flag.  '1' means that multiple clients may
80  *	access this lock page.	Older device drivers returned '0',
81  *	meaning that only two clients could access a lock page.
82  *
83  *   GRABATTACH:
84  *	Not supported.	This ioctl would have grabbed all lock pages
85  *	on behalf of the calling program.
86  *
87  *   WINLOCKALLOC:
88  *	Allocate a lock context.  This ioctl accepts a key value.  as
89  *	its argument.  If the key is zero, a new lock context is
90  *	created, and its "ident" is returned.	If the key is nonzero,
91  *	all existing contexts are checked to see if they match they
92  *	key.  If a match is found, its reference count is incremented
93  *	and its ident is returned, otherwise a new context is created
94  *	and its ident is returned.
95  *
96  *   WINLOCKFREE:
97  *	Free a lock context.  This ioctl accepts the ident of a lock
98  *	context and decrements its reference count.  Once the reference
99  *	count reaches zero *and* all mappings are released, the lock
100  *	context is freed.  When all the lock context in the lock page are
101  *	freed, the lock page is freed as well.
102  *
103  *   WINLOCKSETTIMEOUT:
104  *	Set lock timeout for a context.	 This ioctl accepts the ident
105  *	of a lock context and a timeout value in milliseconds.
106  *	Whenever lock contention occurs, the timer is started and the lock is
107  *	broken after the timeout expires. If timeout value is zero, lock does
108  *	not timeout.  This value will be rounded to the nearest clock
109  *	tick, so don't try to use it for real-time control or something.
110  *
111  *   WINLOCKGETTIMEOUT:
112  *	Get lock timeout from a context.
113  *
114  *   WINLOCKDUMP:
115  *	Dump state of this device.
116  *
117  *
118  * How /dev/winlock works:
119  *
120  *   Every lock context consists of two mappings for the client to the lock
121  *   page.  These mappings are known as the "lock page" and "unlock page"
122  *   to the client. The first mmap to the lock context (identified by the
123  *   sy_ident field returns during alloc) allocates mapping to the lock page,
124  *   the second mmap allocates a mapping to the unlock page.
125  *	The mappings dont have to be ordered in virtual address space, but do
126  *   need to be ordered in time. Mapping and unmapping of these lock and unlock
127  *   pages should happen in pairs. Doing them one at a time or unmapping one
128  *   and leaving one mapped etc cause undefined behaviors.
129  *	The mappings are always of length PAGESIZE, and type MAP_SHARED.
130  *
131  *   The first ioctl is to ALLOC a lock, either based on a key (if trying to
132  *	grab a preexisting lock) or 0 (gets a default new one)
133  *	This ioctl returns a value in sy_ident which is needed to do the
134  *	later mmaps and FREE/other ioctls.
135  *
136  *   The "page number" portion of the sy_ident needs to be passed as the
137  *	file offset when doing an mmap for both the lock page and unlock page
138  *
139  *   The value returned by mmap ( a user virtual address) needs to be
140  *	incremented by the "page offset" portion of sy_ident to obtain the
141  *	pointer to the actual lock. (Skipping this step, does not cause any
142  *	visible error, but the process will be using the wrong lock!)
143  *
144  *	On a fork(), the child process will inherit the mappings for free, but
145  *   will not inherit the parent's lock ownership if any. The child should NOT
146  *   do an explicit FREE on the lock context unless it did an explicit ALLOC.
147  *	Only one process at a time is allowed to have a valid hat
148  *   mapping to a lock page. This is enforced by this driver.
149  *   A client acquires a lock by writing a '1' to the lock page.
150  *   Note, that it is not necessary to read and veryify that the lock is '0'
151  *	prior to writing a '1' in it.
152  *   If it does not already have a valid mapping to that page, the driver
153  *   takes a fault (devmap_access), loads the client mapping
154  *   and allows the client to continue.	 The client releases the lock by
155  *   writing a '0' to the unlock page.	Again, if it does not have a valid
156  *   mapping to the unlock page, the segment driver takes a fault,
157  *   loads the mapping, and lets the client continue.  From this point
158  *   forward, the client can make as many locks and unlocks as it
159  *   wants, without any more faults into the kernel.
160  *
161  *   If a different process wants to acquire a lock, it takes a page fault
162  *   when it writes the '1' to the lock page.  If the segment driver sees
163  *   that the lock page contained a zero, then it invalidates the owner's
164  *   mappings and gives the mappings to this process.
165  *
166  *   If there is already a '1' in the lock page when the second client
167  *   tries to access the lock page, then a lock exists.	 The segment
168  *   driver sleeps the second client and, if applicable, starts the
169  *   timeout on the lock.  The owner's mapping to the unlock page
170  *   is invalidated so that the driver will be woken again when the owner
171  *   releases the lock.
172  *
173  *   When the locking client finally writes a '0' to the unlock page, the
174  *   segment driver takes another fault.  The client is given a valid
175  *   mapping, not to the unlock page, but to the "trash page", and allowed
176  *   to continue.  Meanwhile, the sleeping client is given a valid mapping
177  *   to the lock/unlock pages and allowed to continue as well.
178  *
179  * RFE: There is a leak if process exits before freeing allocated locks
180  * But currently not tracking which locks were allocated by which
181  * process and we do not have a clean entry point into the driver
182  * to do garbage collection. If the interface used a file descriptor for each
183  * lock it allocs, then the driver can free up stuff in the _close routine
184  */
185 
186 #include <sys/types.h>		/* various type defn's */
187 #include <sys/debug.h>
188 #include <sys/param.h>		/* various kernel limits */
189 #include <sys/time.h>
190 #include <sys/errno.h>
191 #include <sys/kmem.h>		/* defines kmem_alloc() */
192 #include <sys/conf.h>		/* defines cdevsw */
193 #include <sys/file.h>		/* various file modes, etc. */
194 #include <sys/uio.h>		/* UIO stuff */
195 #include <sys/ioctl.h>
196 #include <sys/cred.h>		/* defines cred struct */
197 #include <sys/mman.h>		/* defines mmap(2) parameters */
198 #include <sys/stat.h>		/* defines S_IFCHR */
199 #include <sys/cmn_err.h>	/* use cmn_err */
200 #include <sys/ddi.h>		/* ddi stuff */
201 #include <sys/sunddi.h>		/* ddi stuff */
202 #include <sys/ddi_impldefs.h>	/* ddi stuff */
203 #include <sys/winlockio.h>	/* defines ioctls, flags, data structs */
204 
205 static int	winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
206 static int	winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t,
207 			size_t *, uint_t);
208 static int	winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t,
209 			uint_t, uint_t, uint_t, cred_t *);
210 
211 static struct cb_ops	winlock_cb_ops = {
212 	nulldev,		/* open */
213 	nulldev,		/* close */
214 	nodev,			/* strategy */
215 	nodev,			/* print */
216 	nodev,			/* dump */
217 	nodev,			/* read */
218 	nodev,			/* write */
219 	winlock_ioctl,		/* ioctl */
220 	winlock_devmap,		/* devmap */
221 	nodev,			/* mmap */
222 	winlocksegmap,		/* segmap */
223 	nochpoll,		/* poll */
224 	ddi_prop_op,		/* prop_op */
225 	NULL,			/* streamtab */
226 	D_NEW|D_MP|D_DEVMAP,	/* Driver compatibility flag */
227 	0,			/* rev */
228 	nodev,			/* aread */
229 	nodev			/* awrite */
230 };
231 
232 static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
233 static int winlock_attach(dev_info_t *, ddi_attach_cmd_t);
234 static int winlock_detach(dev_info_t *, ddi_detach_cmd_t);
235 
236 static struct dev_ops	winlock_ops = {
237 	DEVO_REV,
238 	0,			/* refcount */
239 	winlock_info,		/* info */
240 	nulldev,		/* identify */
241 	nulldev,		/* probe */
242 	winlock_attach,		/* attach */
243 	winlock_detach,		/* detach */
244 	nodev,			/* reset */
245 	&winlock_cb_ops,	/* driver ops */
246 	NULL,			/* bus ops */
247 	NULL			/* power */
248 };
249 
250 static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t,
251 		void **);
252 static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t,
253 		devmap_cookie_t, void **, devmap_cookie_t, void **);
254 static int winlockmap_dup(devmap_cookie_t, void *,
255 		devmap_cookie_t, void **);
256 static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t,
257 		uint_t, uint_t);
258 
259 static
260 struct devmap_callback_ctl winlockmap_ops = {
261 	DEVMAP_OPS_REV,
262 	winlockmap_map,
263 	winlockmap_access,
264 	winlockmap_dup,
265 	winlockmap_unmap,
266 };
267 
268 #if DEBUG
269 static	int	lock_debug = 0;
270 #define	DEBUGF(level, args)	{ if (lock_debug >= (level)) cmn_err args; }
271 #else
272 #define	DEBUGF(level, args)
273 #endif
274 
275 /* Driver supports two styles of locks */
276 enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK };
277 
278 /*
279  * These structures describe a lock context.  We permit multiple
280  * clients (not just two) to access a lock page
281  *
282  * The "cookie" identifies the lock context. It is the page number portion
283  * sy_ident returned on lock allocation. Cookie is used in later ioctls.
284  * "cookie" is lockid * PAGESIZE
285  * "lockptr" is the kernel virtual address to the lock itself
286  * The page offset portion of lockptr is the page offset portion of sy_ident
287  */
288 
289 /*
290  * per-process information about locks.  This is the private field of
291  * a devmap mapping.  Note that usually *two* mappings point to this.
292  */
293 
294 /*
295  * Each process using winlock is associated with a segproc structure
296  * In various driver entry points, we need to search to find the right
297  * segproc structure (If we were using file handles for each lock this
298  * would not have been necessary).
299  * It would have been simple to use the process pid (and ddi_get_pid)
300  * However, during fork devmap_dup is called in the parent process context
301  * and using the pid complicates the code by introducing orphans.
302  * Instead we use the as pointer for the process as a cookie
303  * which requires delving into various non-DDI kosher structs
304  */
305 typedef struct segproc {
306 	struct segproc	*next;		/* next client of this lock */
307 	struct seglock	*lp;		/* associated lock context */
308 	devmap_cookie_t	lockseg;	/* lock mapping, if any */
309 	devmap_cookie_t unlockseg;	/* unlock mapping, if any */
310 	void		*tag;		/* process as pointer as tag */
311 	uint_t		flag;		/* see "flag bits" in winlockio.h */
312 } SegProc;
313 
314 #define	ID(sdp)		((sdp)->tag)
315 #define	CURPROC_ID	(void *)(curproc->p_as)
316 
317 /* per lock context information */
318 
319 typedef struct seglock {
320 	struct seglock	*next;		/* next lock */
321 	uint_t		sleepers;	/* nthreads sleeping on this lock */
322 	uint_t		alloccount;	/* how many times created? */
323 	uint_t		cookie;		/* mmap() offset (page #) into device */
324 	uint_t		key;		/* key, if any */
325 	enum winlock_style	style;	/* style of lock - OLDSTYLE, NEWSTYLE */
326 	clock_t		timeout;	/* sleep time in ticks */
327 	ddi_umem_cookie_t umem_cookie;	/* cookie for umem allocated memory */
328 	int		*lockptr;	/* kernel virtual addr of lock */
329 	struct segproc	*clients;	/* list of clients of this lock */
330 	struct segproc	*owner;		/* current owner of lock */
331 	kmutex_t	mutex;		/* mutex for lock */
332 	kcondvar_t	locksleep;	/* for sleeping on lock */
333 } SegLock;
334 
335 #define	LOCK(lp)	(*((lp)->lockptr))
336 
337 /*
338  * Number of locks that can fit in a page. Driver can support only that many.
339  * For oldsytle locks, it is relatively easy to increase the limit as each
340  * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation
341  * For newstyle locks, this is trickier as the code needs to allow for mapping
342  * into the second or third page of the cookie for some locks.
343  */
344 #define	MAX_LOCKS	(PAGESIZE/sizeof (int))
345 
346 #define	LOCKTIME	3	/* Default lock timeout in seconds */
347 
348 
349 /* Protections setting for winlock user mappings */
350 #define	WINLOCK_PROT	(PROT_READ|PROT_WRITE|PROT_USER)
351 
352 /*
353  * The trash page is where unwanted writes go
354  * when a process is releasing a lock.
355  */
356 static	ddi_umem_cookie_t trashpage_cookie = NULL;
357 
358 /* For newstyle allocations a common page of locks is used */
359 static	caddr_t	lockpage = NULL;
360 static	ddi_umem_cookie_t lockpage_cookie = NULL;
361 
362 static	dev_info_t	*winlock_dip = NULL;
363 static	kmutex_t	winlock_mutex;
364 
365 /*
366  * winlock_mutex protects
367  *	lock_list
368  *	lock_free_list
369  *	"next" field in SegLock
370  *	next_lock
371  *	trashpage_cookie
372  *	lockpage & lockpage_cookie
373  *
374  * SegLock_mutex protects
375  *	rest of fields in SegLock
376  *	All fields in list of SegProc (lp->clients)
377  *
378  * Lock ordering is winlock_mutex->SegLock_mutex
379  * During devmap/seg operations SegLock_mutex acquired without winlock_mutex
380  *
381  * During devmap callbacks, the pointer to SegProc is stored as the private
382  * data in the devmap handle. This pointer will not go stale (i.e., the
383  * SegProc getting deleted) as the SegProc is not deleted until both the
384  * lockseg and unlockseg have been unmapped and the pointers stored in
385  * the devmap handles have been NULL'ed.
386  * But before this pointer is used to access any fields (other than the 'lp')
387  * lp->mutex must be held.
388  */
389 
390 /*
391  * The allocation code tries to allocate from lock_free_list
392  * first, otherwise it uses kmem_zalloc.  When lock list is idle, all
393  * locks in lock_free_list are kmem_freed
394  */
395 static	SegLock	*lock_list = NULL;		/* in-use locks */
396 static	SegLock	*lock_free_list = NULL;		/* free locks */
397 static	int	next_lock = 0;			/* next lock cookie */
398 
399 /* Routines to find a lock in lock_list based on offset or key */
400 static SegLock *seglock_findlock(uint_t);
401 static SegLock *seglock_findkey(uint_t);
402 
403 /* Routines to find and allocate SegProc structures */
404 static SegProc *seglock_find_specific(SegLock *, void *);
405 static SegProc *seglock_alloc_specific(SegLock *, void *);
406 #define	seglock_findclient(lp)	seglock_find_specific((lp), CURPROC_ID)
407 #define	seglock_allocclient(lp)	seglock_alloc_specific((lp), CURPROC_ID)
408 
409 /* Delete client from lock's client list */
410 static void seglock_deleteclient(SegLock *, SegProc *);
411 static void garbage_collect_lock(SegLock *, SegProc *);
412 
413 /* Create a new lock */
414 static SegLock *seglock_createlock(enum winlock_style);
415 /* Destroy lock */
416 static void seglock_destroylock(SegLock *);
417 static void lock_destroyall(void);
418 
419 /* Helper functions in winlockmap_access */
420 static int give_mapping(SegLock *, SegProc *, uint_t);
421 static int lock_giveup(SegLock *, int);
422 static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t);
423 
424 /* routines called from ioctl */
425 static int seglock_graballoc(intptr_t, enum winlock_style, int);
426 static int seglock_grabinfo(intptr_t, int);
427 static int seglock_grabfree(intptr_t, int);
428 static int seglock_gettimeout(intptr_t, int);
429 static int seglock_settimeout(intptr_t, int);
430 static void seglock_dump_all(void);
431 
432 static	int
433 winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
434 {
435 	DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n",
436 		(void *)devi, (int)cmd));
437 	if (cmd != DDI_ATTACH)
438 		return (DDI_FAILURE);
439 	if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0)
440 	    == DDI_FAILURE) {
441 		return (DDI_FAILURE);
442 	}
443 	winlock_dip = devi;
444 	ddi_report_dev(devi);
445 	return (DDI_SUCCESS);
446 }
447 
448 /*ARGSUSED*/
449 static	int
450 winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
451 {
452 	DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n",
453 		(void *)devi, (int)cmd));
454 	if (cmd != DDI_DETACH)
455 		return (DDI_FAILURE);
456 
457 	mutex_enter(&winlock_mutex);
458 	if (lock_list != NULL) {
459 		mutex_exit(&winlock_mutex);
460 		return (DDI_FAILURE);
461 	}
462 	ASSERT(lock_free_list == NULL);
463 
464 	DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n"));
465 	/* destroy any common stuff created */
466 	if (trashpage_cookie != NULL) {
467 		ddi_umem_free(trashpage_cookie);
468 		trashpage_cookie = NULL;
469 	}
470 	if (lockpage != NULL) {
471 		ddi_umem_free(lockpage_cookie);
472 		lockpage = NULL;
473 		lockpage_cookie = NULL;
474 	}
475 	winlock_dip = NULL;
476 	mutex_exit(&winlock_mutex);
477 	return (DDI_SUCCESS);
478 }
479 
480 /*ARGSUSED*/
481 static	int
482 winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
483 {
484 	register int error;
485 
486 	/* initialize result */
487 	*result = NULL;
488 
489 	/* only valid instance (i.e., getminor) is 0 */
490 	if (getminor((dev_t)arg) >= 1)
491 		return (DDI_FAILURE);
492 
493 	switch (infocmd) {
494 	case DDI_INFO_DEVT2DEVINFO:
495 		if (winlock_dip == NULL)
496 			error = DDI_FAILURE;
497 		else {
498 			*result = (void *)winlock_dip;
499 			error = DDI_SUCCESS;
500 		}
501 		break;
502 	case DDI_INFO_DEVT2INSTANCE:
503 		*result = (void *)0;
504 		error = DDI_SUCCESS;
505 		break;
506 	default:
507 		error = DDI_FAILURE;
508 	}
509 	return (error);
510 }
511 
512 
513 /*ARGSUSED*/
514 int
515 winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
516 	cred_t *cred, int *rval)
517 {
518 	DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n",
519 		cmd, (void *)arg));
520 
521 	switch (cmd) {
522 	/*
523 	 * ioctls that used to be handled by framebuffers (defined in fbio.h)
524 	 * RFE: No code really calls the GRAB* ioctls now. Should EOL.
525 	 */
526 
527 	case GRABPAGEALLOC:
528 		return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode));
529 	case GRABPAGEFREE:
530 		return (seglock_grabfree(arg, mode));
531 	case GRABLOCKINFO:
532 		return (seglock_grabinfo(arg, mode));
533 	case GRABATTACH:
534 		return (EINVAL); /* GRABATTACH is not supported (never was) */
535 
536 	case WINLOCKALLOC:
537 		return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode));
538 	case WINLOCKFREE:
539 		return (seglock_grabfree(arg, mode));
540 	case WINLOCKSETTIMEOUT:
541 		return (seglock_settimeout(arg, mode));
542 	case WINLOCKGETTIMEOUT:
543 		return (seglock_gettimeout(arg, mode));
544 	case WINLOCKDUMP:
545 		seglock_dump_all();
546 		return (0);
547 
548 #ifdef DEBUG
549 	case (WIOC|255):
550 		lock_debug = arg;
551 		return (0);
552 #endif
553 
554 	default:
555 		return (ENOTTY);		/* Why is this not EINVAL */
556 	}
557 }
558 
559 int
560 winlocksegmap(
561 	dev_t	dev,		/* major:minor */
562 	off_t	off,		/* device offset from mmap(2) */
563 	struct as *as,		/* user's address space. */
564 	caddr_t	*addr,		/* address from mmap(2) */
565 	off_t	len,		/* length from mmap(2) */
566 	uint_t	prot,		/* user wants this access */
567 	uint_t	maxprot,	/* this is the maximum the user can have */
568 	uint_t	flags,		/* flags from mmap(2) */
569 	cred_t	*cred)
570 {
571 	DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len));
572 
573 	/* Only MAP_SHARED mappings are supported */
574 	if ((flags & MAP_TYPE) == MAP_PRIVATE) {
575 		return (EINVAL);
576 	}
577 
578 	/* Use devmap_setup to setup the mapping */
579 	return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot,
580 		maxprot, flags, cred));
581 }
582 
583 /*ARGSUSED*/
584 int
585 winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
586     size_t *maplen, uint_t model)
587 {
588 	SegLock *lp;
589 	int err;
590 
591 	DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n",
592 		off, len, (void *)dhp));
593 
594 	*maplen = 0;
595 
596 	/* Check if the lock exists, i.e., has been created by alloc */
597 	/* off is the sy_ident returned in the alloc ioctl */
598 	if ((lp = seglock_findlock((uint_t)off)) == NULL) {
599 		return (ENXIO);
600 	}
601 
602 	/*
603 	 * The offset bits in mmap(2) offset has to be same as in lockptr
604 	 * OR the offset should be 0 (i.e. masked off)
605 	 */
606 	if (((off & PAGEOFFSET) != 0) &&
607 	    ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) {
608 		DEBUGF(2, (CE_CONT,
609 			"mmap offset %llx mismatch with lockptr %p\n",
610 			off, (void *)lp->lockptr));
611 		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
612 		return (EINVAL);
613 	}
614 
615 	/* Only supports PAGESIZE length mappings */
616 	if (len != PAGESIZE) {
617 		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
618 		return (EINVAL);
619 	}
620 
621 	/*
622 	 * Set up devmap to point at page associated with lock
623 	 * RFE: At this point we dont know if this is a lockpage or unlockpage
624 	 * a lockpage would not need DEVMAP_ALLOW_REMAP setting
625 	 * We could have kept track of the mapping order here,
626 	 * but devmap framework does not support storing any state in this
627 	 * devmap callback as it does not callback for error cleanup if some
628 	 * other error happens in the framework.
629 	 * RFE: We should modify the winlock mmap interface so that the
630 	 * user process marks in the offset passed in whether this is for a
631 	 * lock or unlock mapping instead of guessing based on order of maps
632 	 * This would cleanup other things (such as in fork)
633 	 */
634 	if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops,
635 	    lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT,
636 	    DEVMAP_ALLOW_REMAP, 0)) < 0) {
637 		mutex_exit(&lp->mutex);	/* held by seglock_findlock */
638 		return (err);
639 	}
640 	/*
641 	 * No mappings are loaded to those segments yet. The correctness
642 	 * of the winlock semantics depends on the devmap framework/seg_dev NOT
643 	 * loading the translations without calling _access callback.
644 	 */
645 
646 	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
647 	*maplen = PAGESIZE;
648 	return (0);
649 }
650 
651 /*
652  * This routine is called by the devmap framework after the devmap entry point
653  * above and the mapping is setup in seg_dev.
654  * We store the pointer to the per-process context in the devmap private data.
655  */
656 /*ARGSUSED*/
657 static int
658 winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
659 	size_t len, void **pvtp)
660 {
661 	SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */
662 	SegProc *sdp;
663 
664 	ASSERT(len == PAGESIZE);
665 
666 	/* Find the per-process context for this lock, alloc one if not found */
667 	sdp = seglock_allocclient(lp);
668 
669 	/*
670 	 * RFE: Determining which is a lock vs unlock seg is based on order
671 	 * of mmaps, we should change that to be derivable from off
672 	 */
673 	if (sdp->lockseg == NULL) {
674 		sdp->lockseg = dhp;
675 	} else if (sdp->unlockseg == NULL) {
676 		sdp->unlockseg = dhp;
677 	} else {
678 		/* attempting to map lock more than twice */
679 		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
680 		return (ENOMEM);
681 	}
682 
683 	*pvtp = sdp;
684 	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
685 	return (DDI_SUCCESS);
686 }
687 
688 /*
689  * duplicate a segment, as in fork()
690  * On fork, the child inherits the mappings to the lock
691  *	lp->alloccount is NOT incremented, so child should not do a free().
692  *	Semantics same as if done an alloc(), map(), map().
693  *	This way it would work fine if doing an exec() variant later
694  *	Child does not inherit any UFLAGS set in parent
695  * The lock and unlock pages are started off unmapped, i.e., child does not
696  *	own the lock.
697  * The code assumes that the child process has a valid pid at this point
698  * RFE: This semantics depends on fork not duplicating the hat mappings
699  *	(which is the current implementation). To enforce it would need to
700  *	call devmap_unload from here - not clear if that is allowed.
701  */
702 
703 static int
704 winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
705 	void **newpvt)
706 {
707 	SegProc *sdp = (SegProc *)oldpvt;
708 	SegProc *ndp;
709 	SegLock *lp = sdp->lp;
710 
711 	mutex_enter(&lp->mutex);
712 	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
713 
714 	/*
715 	 * Note: At this point, the child process does have a pid, but
716 	 * the arguments passed to as_dup and hence to devmap_dup dont pass it
717 	 * down. So we cannot use normal seglock_findclient - which finds the
718 	 * parent sdp itself!
719 	 * Instead we allocate the child's SegProc by using the child as pointer
720 	 * RFE: we are using the as stucture which means peeking into the
721 	 * devmap_cookie. This is not DDI-compliant. Need a compliant way of
722 	 * getting at either the as or, better, a way to get the child's new pid
723 	 */
724 	ndp = seglock_alloc_specific(lp,
725 		(void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as);
726 	ASSERT(ndp != sdp);
727 
728 	if (sdp->lockseg == dhp) {
729 		ASSERT(ndp->lockseg == NULL);
730 		ndp->lockseg = new_dhp;
731 	} else {
732 		ASSERT(sdp->unlockseg == dhp);
733 		ASSERT(ndp->unlockseg == NULL);
734 		ndp->unlockseg = new_dhp;
735 		if (sdp->flag & TRASHPAGE) {
736 			ndp->flag |= TRASHPAGE;
737 		}
738 	}
739 	mutex_exit(&lp->mutex);
740 	*newpvt = (void *)ndp;
741 	return (0);
742 }
743 
744 
745 /*ARGSUSED*/
746 static void
747 winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
748 	devmap_cookie_t new_dhp1, void **newpvtp1,
749 	devmap_cookie_t new_dhp2, void **newpvtp2)
750 {
751 	SegProc	*sdp = (SegProc *)pvtp;
752 	SegLock	*lp = sdp->lp;
753 
754 	/*
755 	 * We always create PAGESIZE length mappings, so there should never
756 	 * be a partial unmapping case
757 	 */
758 	ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL));
759 
760 	mutex_enter(&lp->mutex);
761 	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
762 	/* make sure this process doesn't own the lock */
763 	if (sdp == lp->owner) {
764 		/*
765 		 * Not handling errors - i.e., errors in unloading mapping
766 		 * As part of unmapping hat/seg structure get torn down anyway
767 		 */
768 		(void) lock_giveup(lp, 0);
769 	}
770 
771 	ASSERT(sdp != lp->owner);
772 	if (sdp->lockseg == dhp) {
773 		sdp->lockseg = NULL;
774 	} else {
775 		ASSERT(sdp->unlockseg == dhp);
776 		sdp->unlockseg = NULL;
777 		sdp->flag &= ~TRASHPAGE;	/* clear flag if set */
778 	}
779 
780 	garbage_collect_lock(lp, sdp);
781 }
782 
783 /*ARGSUSED*/
784 static int
785 winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len,
786 	uint_t type, uint_t rw)
787 {
788 	SegProc *sdp = (SegProc *)pvt;
789 	SegLock *lp = sdp->lp;
790 	int err;
791 
792 	/* Driver handles only DEVMAP_ACCESS type of faults */
793 	if (type != DEVMAP_ACCESS)
794 		return (-1);
795 
796 	mutex_enter(&lp->mutex);
797 	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
798 
799 	/* should be using a SegProc that corresponds to current process */
800 	ASSERT(ID(sdp) == CURPROC_ID);
801 
802 	/*
803 	 * If process is faulting but does not have both segments mapped
804 	 * return error (should cause a segv).
805 	 * RFE: could give it a permanent trashpage
806 	 */
807 	if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) {
808 		err = -1;
809 	} else {
810 		err = seglock_lockfault(dhp, sdp, lp, rw);
811 	}
812 	mutex_exit(&lp->mutex);
813 	return (err);
814 }
815 
816 	/* INTERNAL ROUTINES START HERE */
817 
818 
819 
820 /*
821  * search the lock_list list for the specified cookie
822  * The cookie is the sy_ident field returns by ALLOC ioctl.
823  * This has two parts:
824  * the pageoffset bits contain offset into the lock page.
825  * the pagenumber bits contain the lock id.
826  * The user code is supposed to pass in only the pagenumber portion
827  *	(i.e. mask off the pageoffset bits). However the code below
828  *	does the mask in case the users are not diligent
829  * if found, returns with mutex for SegLock structure held
830  */
831 static SegLock *
832 seglock_findlock(uint_t cookie)
833 {
834 	SegLock	*lp;
835 
836 	cookie &= (uint_t)PAGEMASK;   /* remove pageoffset bits to get cookie */
837 	mutex_enter(&winlock_mutex);
838 	for (lp = lock_list; lp != NULL; lp = lp->next) {
839 		mutex_enter(&lp->mutex);
840 		if (cookie == lp->cookie) {
841 			break;	/* return with lp->mutex held */
842 		}
843 		mutex_exit(&lp->mutex);
844 	}
845 	mutex_exit(&winlock_mutex);
846 	return (lp);
847 }
848 
849 /*
850  * search the lock_list list for the specified non-zero key
851  * if found, returns with lock for SegLock structure held
852  */
853 static SegLock *
854 seglock_findkey(uint_t key)
855 {
856 	SegLock	*lp;
857 
858 	ASSERT(MUTEX_HELD(&winlock_mutex));
859 	/* The driver allows multiple locks with key 0, dont search */
860 	if (key == 0)
861 		return (NULL);
862 	for (lp = lock_list; lp != NULL; lp = lp->next) {
863 		mutex_enter(&lp->mutex);
864 		if (key == lp->key)
865 			break;
866 		mutex_exit(&lp->mutex);
867 	}
868 	return (lp);
869 }
870 
871 /*
872  * Create a new lock context.
873  * Returns with SegLock mutex held
874  */
875 
876 static SegLock *
877 seglock_createlock(enum winlock_style style)
878 {
879 	SegLock	*lp;
880 
881 	DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n",
882 		(void *)lock_free_list, next_lock));
883 
884 	ASSERT(MUTEX_HELD(&winlock_mutex));
885 	if (lock_free_list != NULL) {
886 		lp = lock_free_list;
887 		lock_free_list = lp->next;
888 	} else if (next_lock >= MAX_LOCKS) {
889 		return (NULL);
890 	} else {
891 		lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP);
892 		lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE;
893 		mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL);
894 		cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL);
895 		++next_lock;
896 	}
897 
898 	mutex_enter(&lp->mutex);
899 	ASSERT((lp->cookie/PAGESIZE) <= next_lock);
900 
901 	if (style == OLDSTYLE_LOCK) {
902 		lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE,
903 			DDI_UMEM_SLEEP, &(lp->umem_cookie));
904 	} else {
905 		lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1);
906 		lp->umem_cookie = lockpage_cookie;
907 	}
908 
909 	ASSERT(lp->lockptr != NULL);
910 	lp->style = style;
911 	lp->sleepers = 0;
912 	lp->alloccount = 1;
913 	lp->timeout = LOCKTIME*hz;
914 	lp->clients = NULL;
915 	lp->owner = NULL;
916 	LOCK(lp) = 0;
917 	lp->next = lock_list;
918 	lock_list = lp;
919 	return (lp);
920 }
921 
922 /*
923  * Routine to destory a lock structure.
924  * This routine is called while holding the lp->mutex but not the
925  * winlock_mutex.
926  */
927 
928 static void
929 seglock_destroylock(SegLock *lp)
930 {
931 	ASSERT(MUTEX_HELD(&lp->mutex));
932 	ASSERT(!MUTEX_HELD(&winlock_mutex));
933 
934 	DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n",
935 		lp->cookie, lp->key));
936 
937 	ASSERT(lp->alloccount == 0);
938 	ASSERT(lp->clients == NULL);
939 	ASSERT(lp->owner == NULL);
940 	ASSERT(lp->sleepers == 0);
941 
942 	/* clean up/release fields in lp */
943 	if (lp->style == OLDSTYLE_LOCK) {
944 		ddi_umem_free(lp->umem_cookie);
945 	}
946 	lp->umem_cookie = NULL;
947 	lp->lockptr = NULL;
948 	lp->key = 0;
949 
950 	/*
951 	 * Reduce cookie by 1, makes it non page-aligned and invalid
952 	 * This prevents any valid lookup from finding this lock
953 	 * so when we drop the lock and regrab it it will still
954 	 * be there and nobody else would have attached to it
955 	 */
956 	lp->cookie--;
957 
958 	/* Drop and reacquire mutexes in right order */
959 	mutex_exit(&lp->mutex);
960 	mutex_enter(&winlock_mutex);
961 	mutex_enter(&lp->mutex);
962 
963 	/* reincrement the cookie to get the original valid cookie */
964 	lp->cookie++;
965 	ASSERT((lp->cookie & PAGEOFFSET) == 0);
966 	ASSERT(lp->alloccount == 0);
967 	ASSERT(lp->clients == NULL);
968 	ASSERT(lp->owner == NULL);
969 	ASSERT(lp->sleepers == 0);
970 
971 	/* Remove lp from lock_list */
972 	if (lock_list == lp) {
973 		lock_list = lp->next;
974 	} else {
975 		SegLock *tmp = lock_list;
976 		while (tmp->next != lp) {
977 			tmp = tmp->next;
978 			ASSERT(tmp != NULL);
979 		}
980 		tmp->next = lp->next;
981 	}
982 
983 	/* Add to lock_free_list */
984 	lp->next = lock_free_list;
985 	lock_free_list = lp;
986 	mutex_exit(&lp->mutex);
987 
988 	/* Check if all locks deleted and cleanup */
989 	if (lock_list == NULL) {
990 		lock_destroyall();
991 	}
992 
993 	mutex_exit(&winlock_mutex);
994 }
995 
996 /* Routine to find a SegProc corresponding to the tag */
997 
998 static SegProc *
999 seglock_find_specific(SegLock *lp, void *tag)
1000 {
1001 	SegProc *sdp;
1002 
1003 	ASSERT(MUTEX_HELD(&lp->mutex));
1004 	ASSERT(tag != NULL);
1005 	for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1006 		if (ID(sdp) == tag)
1007 			break;
1008 	}
1009 	return (sdp);
1010 }
1011 
1012 /* Routine to find (and if needed allocate) a SegProc corresponding to tag */
1013 
1014 static SegProc *
1015 seglock_alloc_specific(SegLock *lp, void *tag)
1016 {
1017 	SegProc *sdp;
1018 
1019 	ASSERT(MUTEX_HELD(&lp->mutex));
1020 	ASSERT(tag != NULL);
1021 
1022 	/* Search and return if existing one found */
1023 	sdp = seglock_find_specific(lp, tag);
1024 	if (sdp != NULL)
1025 		return (sdp);
1026 
1027 	DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n",
1028 		    tag, lp->cookie));
1029 
1030 	/* Allocate a new SegProc */
1031 	sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP);
1032 	sdp->next = lp->clients;
1033 	lp->clients = sdp;
1034 	sdp->lp = lp;
1035 	ID(sdp) = tag;
1036 	return (sdp);
1037 }
1038 
1039 /*
1040  * search a context's client list for the given client and delete
1041  */
1042 
1043 static void
1044 seglock_deleteclient(SegLock *lp, SegProc *sdp)
1045 {
1046 	ASSERT(MUTEX_HELD(&lp->mutex));
1047 	ASSERT(lp->owner != sdp);	/* Not current owner of lock */
1048 	ASSERT(sdp->lockseg == NULL);	/* Mappings torn down */
1049 	ASSERT(sdp->unlockseg == NULL);
1050 
1051 	DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n",
1052 		ddi_get_pid(), lp->cookie));
1053 	if (lp->clients == sdp) {
1054 		lp->clients = sdp->next;
1055 	} else {
1056 		SegProc *tmp = lp->clients;
1057 		while (tmp->next != sdp) {
1058 			tmp = tmp->next;
1059 			ASSERT(tmp != NULL);
1060 		}
1061 		tmp->next = sdp->next;
1062 	}
1063 	kmem_free(sdp, sizeof (SegProc));
1064 }
1065 
1066 /*
1067  * Routine to verify if a SegProc and SegLock
1068  * structures are empty/idle.
1069  * Destroys the structures if they are ready
1070  * Can be called with sdp == NULL if want to verify only the lock state
1071  * caller should hold the lp->mutex
1072  * and this routine drops the mutex
1073  */
1074 static void
1075 garbage_collect_lock(SegLock *lp, SegProc *sdp)
1076 {
1077 	ASSERT(MUTEX_HELD(&lp->mutex));
1078 	/* see if both segments unmapped from client structure */
1079 	if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL))
1080 		seglock_deleteclient(lp, sdp);
1081 
1082 	/* see if this is last client in the entire lock context */
1083 	if ((lp->clients == NULL) && (lp->alloccount == 0)) {
1084 		seglock_destroylock(lp);
1085 	} else {
1086 		mutex_exit(&lp->mutex);
1087 	}
1088 }
1089 
1090 
1091 /* IOCTLS START HERE */
1092 
1093 static int
1094 seglock_grabinfo(intptr_t arg, int mode)
1095 {
1096 	int i = 1;
1097 
1098 	/* multiple clients per lock supported - see comments up top */
1099 	if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0)
1100 		return (EFAULT);
1101 	return (0);
1102 }
1103 
1104 static int
1105 seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */
1106 {
1107 	struct seglock	*lp;
1108 	uint_t		key;
1109 	struct		winlockalloc wla;
1110 	int		err;
1111 
1112 	if (style == OLDSTYLE_LOCK) {
1113 		key = 0;
1114 	} else {
1115 		if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla),
1116 		    mode)) {
1117 			return (EFAULT);
1118 		}
1119 		key = wla.sy_key;
1120 	}
1121 
1122 	DEBUGF(3, (CE_CONT,
1123 		"seglock_graballoc: key=%u, style=%d\n", key, style));
1124 
1125 	mutex_enter(&winlock_mutex);
1126 	/* Allocate lockpage on first new style alloc */
1127 	if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) {
1128 		lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
1129 				&lockpage_cookie);
1130 	}
1131 
1132 	/* Allocate trashpage on first alloc (any style) */
1133 	if (trashpage_cookie == NULL) {
1134 		(void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP,
1135 					&trashpage_cookie);
1136 	}
1137 
1138 	if ((lp = seglock_findkey(key)) != NULL) {
1139 		DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n",
1140 			key, lp->cookie));
1141 		++lp->alloccount;
1142 	} else if ((lp = seglock_createlock(style)) != NULL) {
1143 		DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n",
1144 			key, lp->cookie));
1145 		lp->key = key;
1146 	} else {
1147 		DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key));
1148 		mutex_exit(&winlock_mutex);
1149 		return (ENOMEM);
1150 	}
1151 	ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex));
1152 
1153 	mutex_exit(&winlock_mutex);
1154 
1155 	if (style == OLDSTYLE_LOCK) {
1156 		err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg,
1157 			sizeof (lp->cookie), mode);
1158 	} else {
1159 		wla.sy_ident = lp->cookie +
1160 		    (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET);
1161 		err = ddi_copyout((caddr_t)&wla, (caddr_t)arg,
1162 		    sizeof (wla), mode);
1163 	}
1164 
1165 	if (err) {
1166 		/* On error, should undo allocation */
1167 		lp->alloccount--;
1168 
1169 		/* Verify and delete if lock is unused now */
1170 		garbage_collect_lock(lp, NULL);
1171 		return (EFAULT);
1172 	}
1173 
1174 	mutex_exit(&lp->mutex);
1175 	return (0);
1176 }
1177 
1178 static int
1179 seglock_grabfree(intptr_t arg, int mode)	/* IOCTL */
1180 {
1181 	struct seglock	*lp;
1182 	uint_t	offset;
1183 
1184 	if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode)
1185 	    != 0) {
1186 		return (EFAULT);
1187 	}
1188 	DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset));
1189 
1190 	if ((lp = seglock_findlock(offset)) == NULL) {
1191 		DEBUGF(2, (CE_CONT, "did not find lock\n"));
1192 		return (EINVAL);
1193 	}
1194 	DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n",
1195 		lp->key, lp->cookie, lp->alloccount));
1196 
1197 	if (lp->alloccount > 0)
1198 		lp->alloccount--;
1199 
1200 	/* Verify and delete if lock is unused now */
1201 	garbage_collect_lock(lp, NULL);
1202 	return (0);
1203 }
1204 
1205 
1206 /*
1207  * Sets timeout in lock and UFLAGS in client
1208  *	the UFLAGS are stored in the client structure and persistent only
1209  *	till the unmap of the lock pages. If the process sets UFLAGS
1210  *	does a map of the lock/unlock pages and unmaps them, the client
1211  *	structure will get deleted and the UFLAGS will be lost. The process
1212  *	will need to resetup the flags.
1213  */
1214 static int
1215 seglock_settimeout(intptr_t arg, int mode)	/* IOCTL */
1216 {
1217 	SegLock		*lp;
1218 	SegProc		*sdp;
1219 	struct winlocktimeout		wlt;
1220 
1221 	if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) {
1222 		return (EFAULT);
1223 	}
1224 
1225 	if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1226 		return (EINVAL);
1227 
1228 	lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout);
1229 	/* if timeout modified, wake up any sleepers */
1230 	if (lp->sleepers > 0) {
1231 		cv_broadcast(&lp->locksleep);
1232 	}
1233 
1234 	/*
1235 	 * If the process is trying to set UFLAGS,
1236 	 *	Find the client segproc and allocate one if needed
1237 	 *	Set the flags preserving the kernel flags
1238 	 * If the process is clearing UFLAGS
1239 	 *	Find the client segproc but dont allocate one if does not exist
1240 	 */
1241 	if (wlt.sy_flags & UFLAGS) {
1242 		sdp = seglock_allocclient(lp);
1243 		sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS;
1244 	} else if ((sdp = seglock_findclient(lp)) != NULL) {
1245 		sdp->flag = sdp->flag & KFLAGS;
1246 		/* If clearing UFLAGS leaves the segment or lock idle, delete */
1247 		garbage_collect_lock(lp, sdp);
1248 		return (0);
1249 	}
1250 	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
1251 	return (0);
1252 }
1253 
1254 static int
1255 seglock_gettimeout(intptr_t arg, int mode)
1256 {
1257 	SegLock		*lp;
1258 	SegProc		*sdp;
1259 	struct winlocktimeout		wlt;
1260 
1261 	if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0)
1262 		return (EFAULT);
1263 
1264 	if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1265 		return (EINVAL);
1266 
1267 	wlt.sy_timeout = TICK_TO_MSEC(lp->timeout);
1268 	/*
1269 	 * If this process has an active allocated lock return those flags
1270 	 *	Dont allocate a client structure on gettimeout
1271 	 * If not, return 0.
1272 	 */
1273 	if ((sdp = seglock_findclient(lp)) != NULL) {
1274 		wlt.sy_flags = sdp->flag & UFLAGS;
1275 	} else {
1276 		wlt.sy_flags = 0;
1277 	}
1278 	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
1279 
1280 	if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0)
1281 		return (EFAULT);
1282 
1283 	return (0);
1284 }
1285 
1286 /*
1287  * Handle lock segment faults here...
1288  *
1289  * This is where the magic happens.
1290  */
1291 
1292 /* ARGSUSED */
1293 static	int
1294 seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw)
1295 {
1296 	SegProc *owner = lp->owner;
1297 	int err;
1298 
1299 	ASSERT(MUTEX_HELD(&lp->mutex));
1300 	DEBUGF(3, (CE_CONT,
1301 		"seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n",
1302 		(void *)dhp, (void *)sdp, (void *)lp, (void *)owner));
1303 
1304 	/* lockfault is always called with sdp in current process context */
1305 	ASSERT(ID(sdp) == CURPROC_ID);
1306 
1307 	/* If Lock has no current owner, give the mapping to new owner */
1308 	if (owner == NULL) {
1309 		DEBUGF(4, (CE_CONT, " lock has no current owner\n"));
1310 		return (give_mapping(lp, sdp, rw));
1311 	}
1312 
1313 	if (owner == sdp) {
1314 		/*
1315 		 * Current owner is faulting on owned lock segment OR
1316 		 * Current owner is faulting on unlock page and has no waiters
1317 		 * Then can give the mapping to current owner
1318 		 */
1319 	    if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) {
1320 		DEBUGF(4, (CE_CONT, "lock owner faulting\n"));
1321 		return (give_mapping(lp, sdp, rw));
1322 	    } else {
1323 		/*
1324 		 * Owner must be writing to unlock page and there are waiters.
1325 		 * other cases have been checked earlier.
1326 		 * Release the lock, owner, and owners mappings
1327 		 * As the owner is trying to write to the unlock page, leave
1328 		 * it with a trashpage mapping and wake up the sleepers
1329 		 */
1330 		ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0));
1331 		DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n"));
1332 		return (lock_giveup(lp, 1));
1333 	    }
1334 	}
1335 
1336 	ASSERT(owner != sdp);
1337 
1338 	/*
1339 	 * If old owner faulting on trash unlock mapping,
1340 	 * load hat mappings to trash page
1341 	 * RFE: non-owners should NOT be faulting on unlock mapping as they
1342 	 * as first supposed to fault on the lock seg. We could give them
1343 	 * a trash page or return error.
1344 	 */
1345 	if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) {
1346 		DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n"));
1347 		return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1348 			DEVMAP_ACCESS, rw));
1349 	}
1350 
1351 	/*
1352 	 * Non-owner faulting. Need to check current LOCK state.
1353 	 *
1354 	 * Before reading lock value in LOCK(lp), we must make sure that
1355 	 * the owner cannot change its value before we change mappings
1356 	 * or else we could end up either with a hung process
1357 	 * or more than one process thinking they have the lock.
1358 	 * We do that by unloading the owner's mappings
1359 	 */
1360 	DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n"));
1361 	err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1362 	err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1363 	if (err != 0)
1364 		return (err);	/* unable to remove owner mapping */
1365 
1366 	/*
1367 	 * If lock is not held, then current owner mappings were
1368 	 * unloaded above and we can give the lock to the new owner
1369 	 */
1370 	if (LOCK(lp) == 0) {
1371 		DEBUGF(4, (CE_CONT,
1372 			"Free lock (%p): Giving mapping to new owner %d\n",
1373 			(void *)lp, ddi_get_pid()));
1374 		return (give_mapping(lp, sdp, rw));
1375 	}
1376 
1377 	DEBUGF(4, (CE_CONT, "  lock held, sleeping\n"));
1378 
1379 	/*
1380 	 * A non-owning process tried to write (presumably to the lockpage,
1381 	 * but it doesn't matter) but the lock is held; we need to sleep for
1382 	 * the lock while there is an owner.
1383 	 */
1384 
1385 	lp->sleepers++;
1386 	while ((owner = lp->owner) != NULL) {
1387 		int rval;
1388 
1389 		if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) {
1390 			/*
1391 			 * No timeout has been specified for this lock;
1392 			 * we'll simply sleep on the condition variable.
1393 			 */
1394 			rval = cv_wait_sig(&lp->locksleep, &lp->mutex);
1395 		} else {
1396 			/*
1397 			 * A timeout _has_ been specified for this lock. We need
1398 			 * to wake up and possibly steal this lock if the owner
1399 			 * does not let it go. Note that all sleepers on a lock
1400 			 * with a timeout wait; the sleeper with the earliest
1401 			 * timeout will wakeup, and potentially steal the lock
1402 			 * Stealing the lock will cause a broadcast on the
1403 			 * locksleep cv and thus kick the other timed waiters
1404 			 * and cause everyone to restart in a new timedwait
1405 			 */
1406 			rval = cv_timedwait_sig(&lp->locksleep,
1407 			    &lp->mutex, ddi_get_lbolt() + lp->timeout);
1408 		}
1409 
1410 		/*
1411 		 * Timeout and still old owner - steal lock
1412 		 * Force-Release lock and give old owner a trashpage mapping
1413 		 */
1414 		if ((rval == -1) && (lp->owner == owner)) {
1415 			/*
1416 			 * if any errors in lock_giveup, go back and sleep/retry
1417 			 * If successful, will break out of loop
1418 			 */
1419 			cmn_err(CE_NOTE, "Process %d timed out on lock %d\n",
1420 				ddi_get_pid(), lp->cookie);
1421 			(void) lock_giveup(lp, 1);
1422 		} else if (rval == 0) { /* signal pending */
1423 			cmn_err(CE_NOTE,
1424 			    "Process %d signalled while waiting on lock %d\n",
1425 			    ddi_get_pid(), lp->cookie);
1426 			lp->sleepers--;
1427 			return (FC_MAKE_ERR(EINTR));
1428 		}
1429 	}
1430 
1431 	lp->sleepers--;
1432 	/*
1433 	 * Give mapping to this process and save a fault later
1434 	 */
1435 	return (give_mapping(lp, sdp, rw));
1436 }
1437 
1438 /*
1439  * Utility: give a valid mapping to lock and unlock pages to current process.
1440  * Caller responsible for unloading old owner's mappings
1441  */
1442 
1443 static int
1444 give_mapping(SegLock *lp, SegProc *sdp, uint_t rw)
1445 {
1446 	int err = 0;
1447 
1448 	ASSERT(MUTEX_HELD(&lp->mutex));
1449 	ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0)));
1450 	/* give_mapping is always called with sdp in current process context */
1451 	ASSERT(ID(sdp) == CURPROC_ID);
1452 
1453 	/* remap any old trash mappings */
1454 	if (sdp->flag & TRASHPAGE) {
1455 		/* current owner should not have a trash mapping */
1456 		ASSERT(sdp != lp->owner);
1457 
1458 		DEBUGF(4, (CE_CONT,
1459 		    "new owner %d remapping old trash mapping\n",
1460 		    ddi_get_pid()));
1461 		if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip,
1462 		    lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1463 			/*
1464 			 * unable to remap old trash page,
1465 			 * abort before changing owner
1466 			 */
1467 			DEBUGF(4, (CE_CONT,
1468 			    "aborting: error in umem_remap %d\n", err));
1469 			return (err);
1470 		}
1471 		sdp->flag &= ~TRASHPAGE;
1472 	}
1473 
1474 	/* we have a new owner now */
1475 	lp->owner = sdp;
1476 
1477 	if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE,
1478 	    DEVMAP_ACCESS, rw)) != 0) {
1479 		return (err);
1480 	}
1481 	DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid()));
1482 
1483 	if (lp->sleepers) {
1484 		/* Force unload unlock mapping if there are waiters */
1485 		DEBUGF(4, (CE_CONT,
1486 		    " lock has %d sleepers => remove unlock mapping\n",
1487 		    lp->sleepers));
1488 		err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE);
1489 	} else {
1490 		/*
1491 		 * while here, give new owner a valid mapping to unlock
1492 		 * page so we don't get called again.
1493 		 */
1494 		DEBUGF(4, (CE_CONT, " and unlock mapping\n"));
1495 		err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1496 			DEVMAP_ACCESS, PROT_WRITE);
1497 	}
1498 	return (err);
1499 }
1500 
1501 /*
1502  * Unload owner's mappings, release the lock and wakeup any sleepers
1503  * If trash, then the old owner is given a trash mapping
1504  *	=> old owner held lock too long and caused a timeout
1505  */
1506 static int
1507 lock_giveup(SegLock *lp, int trash)
1508 {
1509 	SegProc *owner = lp->owner;
1510 
1511 	DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n",
1512 	    (void *)lp, (void *)ID(lp->owner), trash));
1513 
1514 	ASSERT(MUTEX_HELD(&lp->mutex));
1515 	ASSERT(owner != NULL);
1516 
1517 	/*
1518 	 * owner loses lockpage/unlockpage mappings and gains a
1519 	 * trashpage mapping, if needed.
1520 	 */
1521 	if (!trash) {
1522 		/*
1523 		 * We do not handle errors in devmap_unload in the !trash case,
1524 		 * as the process is attempting to unmap/exit or otherwise
1525 		 * release the lock. Errors in unloading the mapping are not
1526 		 * going to affect that (unmap does not take error return).
1527 		 */
1528 		(void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1529 		(void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1530 	} else {
1531 		int err;
1532 
1533 		if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) {
1534 			/* error unloading lockseg mapping. abort giveup */
1535 			return (err);
1536 		}
1537 
1538 		/*
1539 		 * old owner gets mapping to trash page so it can continue
1540 		 * devmap_umem_remap does a hat_unload (and does it holding
1541 		 * the right locks), so no need to devmap_unload on unlockseg
1542 		 */
1543 		if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip,
1544 		    trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1545 			/* error remapping to trash page, abort giveup */
1546 			return (err);
1547 		}
1548 		owner->flag |= TRASHPAGE;
1549 		/*
1550 		 * Preload mapping to trash page by calling devmap_load
1551 		 * However, devmap_load can only be called on the faulting
1552 		 * process context and not on the owner's process context
1553 		 * we preload only if we happen to be in owner process context
1554 		 * Other processes will fault on the unlock mapping
1555 		 * and be given a trash mapping at that time.
1556 		 */
1557 		if (ID(owner) == CURPROC_ID) {
1558 		    (void) devmap_load(owner->unlockseg, lp->cookie, PAGESIZE,
1559 			DEVMAP_ACCESS, PROT_WRITE);
1560 		}
1561 	}
1562 
1563 	lp->owner = NULL;
1564 
1565 	/* Clear the lock value in underlying page so new owner can grab it */
1566 	LOCK(lp) = 0;
1567 
1568 	if (lp->sleepers) {
1569 		DEBUGF(4, (CE_CONT, "  waking up, lp=%p\n", (void *)lp));
1570 		cv_broadcast(&lp->locksleep);
1571 	}
1572 	return (0);
1573 }
1574 
1575 /*
1576  * destroy all allocated memory.
1577  */
1578 
1579 static void
1580 lock_destroyall(void)
1581 {
1582 	SegLock	*lp, *lpnext;
1583 
1584 	ASSERT(MUTEX_HELD(&winlock_mutex));
1585 	ASSERT(lock_list == NULL);
1586 
1587 	DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n"));
1588 	for (lp = lock_free_list; lp != NULL; lp = lpnext) {
1589 		mutex_enter(&lp->mutex);
1590 		lpnext =  lp->next;
1591 		ASSERT(lp->clients == NULL);
1592 		ASSERT(lp->owner == NULL);
1593 		ASSERT(lp->alloccount == 0);
1594 		mutex_destroy(&lp->mutex);
1595 		cv_destroy(&lp->locksleep);
1596 		kmem_free(lp, sizeof (SegLock));
1597 	}
1598 	lock_free_list = NULL;
1599 	next_lock = 0;
1600 }
1601 
1602 
1603 /* RFE: create mdb walkers instead of dump routines? */
1604 static void
1605 seglock_dump_all(void)
1606 {
1607 	SegLock	*lp;
1608 
1609 	mutex_enter(&winlock_mutex);
1610 	cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n");
1611 
1612 	cmn_err(CE_CONT, "Lock List:\n");
1613 	for (lp = lock_list; lp != NULL; lp = lp->next) {
1614 		mutex_enter(&lp->mutex);
1615 		cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1616 		    lp->cookie, lp->key, lp->alloccount,
1617 		    lp->clients ? 'Y' : 'N',
1618 		    lp->owner ? 'Y' : 'N',
1619 		    lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1620 		    lp->sleepers);
1621 		mutex_exit(&lp->mutex);
1622 	}
1623 	cmn_err(CE_CONT, "Free Lock List:\n");
1624 	for (lp = lock_free_list; lp != NULL; lp = lp->next) {
1625 		mutex_enter(&lp->mutex);
1626 		cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1627 		    lp->cookie, lp->key, lp->alloccount,
1628 		    lp->clients ? 'Y' : 'N',
1629 		    lp->owner ? 'Y' : 'N',
1630 		    lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1631 		    lp->sleepers);
1632 		mutex_exit(&lp->mutex);
1633 	}
1634 
1635 #ifdef DEBUG
1636 	if (lock_debug < 3) {
1637 		mutex_exit(&winlock_mutex);
1638 		return;
1639 	}
1640 
1641 	for (lp = lock_list; lp != NULL; lp = lp->next) {
1642 		SegProc	*sdp;
1643 
1644 		mutex_enter(&lp->mutex);
1645 		cmn_err(CE_CONT,
1646 		    "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n",
1647 		    (void *)lp, lp->key, lp->cookie, lp->alloccount,
1648 		    lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers);
1649 
1650 		cmn_err(CE_CONT,
1651 		    "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n",
1652 		    lp->style, (void *)lp->lockptr, lp->timeout,
1653 		    (void *)lp->clients, (void *)lp->owner);
1654 
1655 
1656 		for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1657 			cmn_err(CE_CONT, "  client %p%s, lp=%p, flag=%x, "
1658 			    "process tag=%p, lockseg=%p, unlockseg=%p\n",
1659 			    (void *)sdp, sdp == lp->owner ? " (owner)" : "",
1660 			    (void *)sdp->lp, sdp->flag, (void *)ID(sdp),
1661 			    (void *)sdp->lockseg, (void *)sdp->unlockseg);
1662 		}
1663 		mutex_exit(&lp->mutex);
1664 	}
1665 #endif
1666 	mutex_exit(&winlock_mutex);
1667 }
1668 
1669 #include <sys/modctl.h>
1670 
1671 static struct modldrv modldrv = {
1672 	&mod_driverops,		/* Type of module.  This one is a driver */
1673 	"Winlock Driver v%I%",	/* Name of the module */
1674 	&winlock_ops,		/* driver ops */
1675 };
1676 
1677 static struct modlinkage modlinkage = {
1678 	MODREV_1,
1679 	(void *)&modldrv,
1680 	0,
1681 	0,
1682 	0
1683 };
1684 
1685 int
1686 _init(void)
1687 {
1688 	int e;
1689 
1690 	mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL);
1691 	e = mod_install(&modlinkage);
1692 	if (e) {
1693 		mutex_destroy(&winlock_mutex);
1694 	}
1695 	return (e);
1696 }
1697 
1698 
1699 int
1700 _info(struct modinfo *modinfop)
1701 {
1702 	return (mod_info(&modlinkage, modinfop));
1703 }
1704 
1705 int
1706 _fini(void)
1707 {
1708 	int	e;
1709 
1710 	e = mod_remove(&modlinkage);
1711 	if (e == 0) {
1712 		mutex_destroy(&winlock_mutex);
1713 	}
1714 	return (e);
1715 }
1716