xref: /titanic_44/usr/src/uts/common/os/ddi.c (revision 1979231e1e29c981e5d1e6cee60f2db46d052b00)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  * UNIX Device Driver Interface functions
35  *
36  * This file contains functions that are to be added to the kernel
37  * to put the interface presented to drivers in conformance with
38  * the DDI standard. Of the functions added to the kernel, 17 are
39  * function equivalents of existing macros in sysmacros.h,
40  * stream.h, and param.h
41  *
42  * 17 additional functions -- drv_getparm(), drv_setparm(),
43  * getrbuf(), freerbuf(),
44  * getemajor(), geteminor(), etoimajor(), itoemajor(), drv_usectohz(),
45  * drv_hztousec(), drv_usecwait(), drv_priv(), and kvtoppid() --
46  * are specified by DDI to exist in the kernel and are implemented here.
47  *
48  * Note that putnext() and put() are not in this file. The C version of
49  * these routines are in uts/common/os/putnext.c and assembly versions
50  * might exist for some architectures.
51  */
52 
53 #include <sys/types.h>
54 #include <sys/param.h>
55 #include <sys/t_lock.h>
56 #include <sys/time.h>
57 #include <sys/systm.h>
58 #include <sys/cpuvar.h>
59 #include <sys/signal.h>
60 #include <sys/pcb.h>
61 #include <sys/user.h>
62 #include <sys/errno.h>
63 #include <sys/buf.h>
64 #include <sys/proc.h>
65 #include <sys/cmn_err.h>
66 #include <sys/stream.h>
67 #include <sys/strsubr.h>
68 #include <sys/uio.h>
69 #include <sys/kmem.h>
70 #include <sys/conf.h>
71 #include <sys/cred.h>
72 #include <sys/vnode.h>
73 #include <sys/file.h>
74 #include <sys/poll.h>
75 #include <sys/session.h>
76 #include <sys/ddi.h>
77 #include <sys/sunddi.h>
78 #include <sys/esunddi.h>
79 #include <sys/mkdev.h>
80 #include <sys/debug.h>
81 #include <sys/vtrace.h>
82 
83 /*
84  * return internal major number corresponding to device
85  * number (new format) argument
86  */
87 major_t
88 getmajor(dev_t dev)
89 {
90 #ifdef _LP64
91 	return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
92 #else
93 	return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
94 #endif
95 }
96 
97 /*
98  * return external major number corresponding to device
99  * number (new format) argument
100  */
101 major_t
102 getemajor(dev_t dev)
103 {
104 #ifdef _LP64
105 	return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
106 #else
107 	return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
108 #endif
109 }
110 
111 /*
112  * return internal minor number corresponding to device
113  * number (new format) argument
114  */
115 minor_t
116 getminor(dev_t dev)
117 {
118 #ifdef _LP64
119 	return ((minor_t)(dev & MAXMIN64));
120 #else
121 	return ((minor_t)(dev & MAXMIN));
122 #endif
123 }
124 
125 /*
126  * return external minor number corresponding to device
127  * number (new format) argument
128  */
129 minor_t
130 geteminor(dev_t dev)
131 {
132 #ifdef _LP64
133 	return ((minor_t)(dev & MAXMIN64));
134 #else
135 	return ((minor_t)(dev & MAXMIN));
136 #endif
137 }
138 
139 /*
140  * return internal major number corresponding to external
141  * major number.
142  */
143 int
144 etoimajor(major_t emajnum)
145 {
146 #ifdef _LP64
147 	if (emajnum >= devcnt)
148 		return (-1); /* invalid external major */
149 #else
150 	if (emajnum > MAXMAJ || emajnum >= devcnt)
151 		return (-1); /* invalid external major */
152 #endif
153 	return ((int)emajnum);
154 }
155 
156 /*
157  * return external major number corresponding to internal
158  * major number argument or -1 if no external major number
159  * can be found after lastemaj that maps to the internal
160  * major number. Pass a lastemaj val of -1 to start
161  * the search initially. (Typical use of this function is
162  * of the form:
163  *
164  *	lastemaj = -1;
165  *	while ((lastemaj = itoemajor(imag, lastemaj)) != -1)
166  *		{ process major number }
167  */
168 int
169 itoemajor(major_t imajnum, int lastemaj)
170 {
171 	if (imajnum >= devcnt)
172 		return (-1);
173 
174 	/*
175 	 * if lastemaj == -1 then start from beginning of
176 	 * the (imaginary) MAJOR table
177 	 */
178 	if (lastemaj < -1)
179 		return (-1);
180 
181 	/*
182 	 * given that there's a 1-1 mapping of internal to external
183 	 * major numbers, searching is somewhat pointless ... let's
184 	 * just go there directly.
185 	 */
186 	if (++lastemaj < devcnt && imajnum < devcnt)
187 		return (imajnum);
188 	return (-1);
189 }
190 
191 /*
192  * encode external major and minor number arguments into a
193  * new format device number
194  */
195 dev_t
196 makedevice(major_t maj, minor_t minor)
197 {
198 #ifdef _LP64
199 	return (((dev_t)maj << NBITSMINOR64) | (minor & MAXMIN64));
200 #else
201 	return (((dev_t)maj << NBITSMINOR) | (minor & MAXMIN));
202 #endif
203 }
204 
205 /*
206  * cmpdev - compress new device format to old device format
207  */
208 o_dev_t
209 cmpdev(dev_t dev)
210 {
211 	major_t major_d;
212 	minor_t minor_d;
213 
214 #ifdef _LP64
215 	major_d = dev >> NBITSMINOR64;
216 	minor_d = dev & MAXMIN64;
217 #else
218 	major_d = dev >> NBITSMINOR;
219 	minor_d = dev & MAXMIN;
220 #endif
221 	if (major_d > OMAXMAJ || minor_d > OMAXMIN)
222 		return ((o_dev_t)NODEV);
223 	return ((o_dev_t)((major_d << ONBITSMINOR) | minor_d));
224 }
225 
226 dev_t
227 expdev(dev_t dev)
228 {
229 	major_t major_d;
230 	minor_t minor_d;
231 
232 	major_d = ((dev >> ONBITSMINOR) & OMAXMAJ);
233 	minor_d = (dev & OMAXMIN);
234 #ifdef _LP64
235 	return ((((dev_t)major_d << NBITSMINOR64) | minor_d));
236 #else
237 	return ((((dev_t)major_d << NBITSMINOR) | minor_d));
238 #endif
239 }
240 
241 /*
242  * return true (1) if the message type input is a data
243  * message type, 0 otherwise
244  */
245 #undef datamsg
246 int
247 datamsg(unsigned char db_type)
248 {
249 	return (db_type == M_DATA || db_type == M_PROTO ||
250 		db_type == M_PCPROTO || db_type == M_DELAY);
251 }
252 
253 /*
254  * return a pointer to the other queue in the queue pair of qp
255  */
256 queue_t *
257 OTHERQ(queue_t *q)
258 {
259 	return (_OTHERQ(q));
260 }
261 
262 /*
263  * return a pointer to the read queue in the queue pair of qp.
264  */
265 queue_t *
266 RD(queue_t *q)
267 {
268 		return (_RD(q));
269 
270 }
271 
272 /*
273  * return a pointer to the write queue in the queue pair of qp.
274  */
275 int
276 SAMESTR(queue_t *q)
277 {
278 	return (_SAMESTR(q));
279 }
280 
281 /*
282  * return a pointer to the write queue in the queue pair of qp.
283  */
284 queue_t *
285 WR(queue_t *q)
286 {
287 	return (_WR(q));
288 }
289 
290 /*
291  * store value of kernel parameter associated with parm
292  */
293 int
294 drv_getparm(unsigned int parm, void *valuep)
295 {
296 	time_t now;
297 
298 	switch (parm) {
299 	case UPROCP:
300 		*(proc_t **)valuep = ttoproc(curthread);
301 		break;
302 	case PPGRP:
303 		*(pid_t *)valuep = ttoproc(curthread)->p_pgrp;
304 		break;
305 	case LBOLT:
306 		*(clock_t *)valuep = lbolt;
307 		break;
308 	case TIME:
309 		if ((now = gethrestime_sec()) == 0) {
310 			timestruc_t ts;
311 			mutex_enter(&tod_lock);
312 			ts = tod_get();
313 			mutex_exit(&tod_lock);
314 			*(time_t *)valuep = ts.tv_sec;
315 		} else {
316 			*(time_t *)valuep = now;
317 		}
318 		break;
319 	case PPID:
320 		*(pid_t *)valuep = ttoproc(curthread)->p_pid;
321 		break;
322 	case PSID:
323 		*(pid_t *)valuep = ttoproc(curthread)->p_sessp->s_sid;
324 		break;
325 	case UCRED:
326 		*(cred_t **)valuep = CRED();
327 		break;
328 	default:
329 		return (-1);
330 	}
331 
332 	return (0);
333 }
334 
335 /*
336  * set value of kernel parameter associated with parm
337  */
338 int
339 drv_setparm(unsigned int parm, unsigned long value)
340 {
341 	switch (parm) {
342 	case SYSRINT:
343 		CPU_STATS_ADDQ(CPU, sys, rcvint, value);
344 		break;
345 	case SYSXINT:
346 		CPU_STATS_ADDQ(CPU, sys, xmtint, value);
347 		break;
348 	case SYSMINT:
349 		CPU_STATS_ADDQ(CPU, sys, mdmint, value);
350 		break;
351 	case SYSRAWC:
352 		CPU_STATS_ADDQ(CPU, sys, rawch, value);
353 		break;
354 	case SYSCANC:
355 		CPU_STATS_ADDQ(CPU, sys, canch, value);
356 		break;
357 	case SYSOUTC:
358 		CPU_STATS_ADDQ(CPU, sys, outch, value);
359 		break;
360 	default:
361 		return (-1);
362 	}
363 
364 	return (0);
365 }
366 
367 /*
368  * allocate space for buffer header and return pointer to it.
369  * preferred means of obtaining space for a local buf header.
370  * returns pointer to buf upon success, NULL for failure
371  */
372 struct buf *
373 getrbuf(int sleep)
374 {
375 	struct buf *bp;
376 
377 	bp = kmem_alloc(sizeof (struct buf), sleep);
378 	if (bp == NULL)
379 		return (NULL);
380 	bioinit(bp);
381 
382 	return (bp);
383 }
384 
385 /*
386  * free up space allocated by getrbuf()
387  */
388 void
389 freerbuf(struct buf *bp)
390 {
391 	biofini(bp);
392 	kmem_free(bp, sizeof (struct buf));
393 }
394 
395 /*
396  * convert byte count input to logical page units
397  * (byte counts that are not a page-size multiple
398  * are rounded down)
399  */
400 pgcnt_t
401 btop(size_t numbytes)
402 {
403 	return (numbytes >> PAGESHIFT);
404 }
405 
406 /*
407  * convert byte count input to logical page units
408  * (byte counts that are not a page-size multiple
409  * are rounded up)
410  */
411 pgcnt_t
412 btopr(size_t numbytes)
413 {
414 	return ((numbytes + PAGEOFFSET) >> PAGESHIFT);
415 }
416 
417 /*
418  * convert size in pages to bytes.
419  */
420 size_t
421 ptob(pgcnt_t numpages)
422 {
423 	return (numpages << PAGESHIFT);
424 }
425 
426 #define	MAXCLOCK_T LONG_MAX
427 
428 /*
429  * Convert from system time units (hz) to microseconds.
430  *
431  * If ticks <= 0, return 0.
432  * If converting ticks to usecs would overflow, return MAXCLOCK_T.
433  * Otherwise, convert ticks to microseconds.
434  */
435 clock_t
436 drv_hztousec(clock_t ticks)
437 {
438 	if (ticks <= 0)
439 		return (0);
440 
441 	if (ticks > MAXCLOCK_T / usec_per_tick)
442 		return (MAXCLOCK_T);
443 
444 	return (TICK_TO_USEC(ticks));
445 }
446 
447 
448 /*
449  * Convert from microseconds to system time units (hz), rounded up.
450  *
451  * If ticks <= 0, return 0.
452  * Otherwise, convert microseconds to ticks, rounding up.
453  */
454 clock_t
455 drv_usectohz(clock_t microsecs)
456 {
457 	if (microsecs <= 0)
458 		return (0);
459 
460 	return (USEC_TO_TICK_ROUNDUP(microsecs));
461 }
462 
463 #ifdef	sun
464 /*
465  * drv_usecwait implemented in each architecture's machine
466  * specific code somewhere. For sparc, it is the alternate entry
467  * to usec_delay (eventually usec_delay goes away). See
468  * sparc/os/ml/sparc_subr.s
469  */
470 #endif
471 
472 /*
473  * bcanputnext, canputnext assume called from timeout, bufcall,
474  * or esballoc free routines.  since these are driven by
475  * clock interrupts, instead of system calls the appropriate plumbing
476  * locks have not been acquired.
477  */
478 int
479 bcanputnext(queue_t *q, unsigned char band)
480 {
481 	int	ret;
482 
483 	claimstr(q);
484 	ret = bcanput(q->q_next, band);
485 	releasestr(q);
486 	return (ret);
487 }
488 
489 int
490 canputnext(queue_t *q)
491 {
492 	queue_t	*qofsq = q;
493 	struct stdata *stp = STREAM(q);
494 	kmutex_t *sdlock;
495 
496 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_IN,
497 	    "canputnext?:%p\n", q);
498 
499 	if (stp->sd_ciputctrl != NULL) {
500 		int ix = CPU->cpu_seqid & stp->sd_nciputctrl;
501 		sdlock = &stp->sd_ciputctrl[ix].ciputctrl_lock;
502 		mutex_enter(sdlock);
503 	} else
504 		mutex_enter(sdlock = &stp->sd_reflock);
505 
506 	/* get next module forward with a service queue */
507 	q = q->q_next->q_nfsrv;
508 	ASSERT(q != NULL);
509 
510 	/* this is for loopback transports, they should not do a canputnext */
511 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(qofsq));
512 
513 	if (!(q->q_flag & QFULL)) {
514 		mutex_exit(sdlock);
515 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
516 		    "canputnext:%p %d", q, 1);
517 		return (1);
518 	}
519 
520 	if (sdlock != &stp->sd_reflock) {
521 		mutex_exit(sdlock);
522 		mutex_enter(&stp->sd_reflock);
523 	}
524 
525 	/* the above is the most frequently used path */
526 	stp->sd_refcnt++;
527 	ASSERT(stp->sd_refcnt != 0);	/* Wraparound */
528 	mutex_exit(&stp->sd_reflock);
529 
530 	mutex_enter(QLOCK(q));
531 	if (q->q_flag & QFULL) {
532 		q->q_flag |= QWANTW;
533 		mutex_exit(QLOCK(q));
534 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
535 		    "canputnext:%p %d", q, 0);
536 		releasestr(qofsq);
537 
538 		return (0);
539 	}
540 	mutex_exit(QLOCK(q));
541 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT, "canputnext:%p %d", q, 1);
542 	releasestr(qofsq);
543 
544 	return (1);
545 }
546 
547 
548 /*
549  * Open has progressed to the point where it is safe to send/receive messages.
550  *
551  * "qprocson enables the put and service routines of the driver
552  * or module... Prior to the call to qprocson, the put and service
553  * routines of a newly pushed module or newly opened driver are
554  * disabled.  For the module, messages flow around it as if it
555  * were not present in the stream... qprocson must be called by
556  * the first open of a module or driver after allocation and
557  * initialization of any resource on which the put and service
558  * routines depend."
559  *
560  * Note that before calling qprocson a module/driver could itself cause its
561  * put or service procedures to be run by using put() or qenable().
562  */
563 void
564 qprocson(queue_t *q)
565 {
566 	ASSERT(q->q_flag & QREADR);
567 	/*
568 	 * Do not call insertq() if it is a re-open.  But if _QINSERTING
569 	 * is set, q_next will not be NULL and we need to call insertq().
570 	 */
571 	if ((q->q_next == NULL && WR(q)->q_next == NULL) ||
572 	    (q->q_flag & _QINSERTING))
573 		insertq(STREAM(q), q);
574 }
575 
576 /*
577  * Close has reached a point where it can no longer allow put/service
578  * into the queue.
579  *
580  * "qprocsoff disables the put and service routines of the driver
581  * or module... When the routines are disabled in a module, messages
582  * flow around the module as if it were not present in the stream.
583  * qprocsoff must be called by the close routine of a driver or module
584  * before deallocating any resources on which the driver/module's
585  * put and service routines depend.  qprocsoff will remove the
586  * queue's service routines from the list of service routines to be
587  * run and waits until any concurrent put or service routines are
588  * finished."
589  *
590  * Note that after calling qprocsoff a module/driver could itself cause its
591  * put procedures to be run by using put().
592  */
593 void
594 qprocsoff(queue_t *q)
595 {
596 	ASSERT(q->q_flag & QREADR);
597 	if (q->q_flag & QWCLOSE) {
598 		/* Called more than once */
599 		return;
600 	}
601 	disable_svc(q);
602 	removeq(q);
603 }
604 
605 /*
606  * "freezestr() freezes the state of the entire STREAM  containing
607  *  the  queue  pair  q.  A frozen STREAM blocks any thread
608  *  attempting to enter any open, close, put or service  routine
609  *  belonging  to  any  queue instance in the STREAM, and blocks
610  *  any thread currently within the STREAM if it attempts to put
611  *  messages  onto  or take messages off of any queue within the
612  *  STREAM (with the sole exception  of  the  caller).   Threads
613  *  blocked  by  this  mechanism  remain  so until the STREAM is
614  *  thawed by a call to unfreezestr().
615  *
616  * Use strblock to set SQ_FROZEN in all syncqs in the stream (prevents
617  * further entry into put, service, open, and close procedures) and
618  * grab (and hold) all the QLOCKs in the stream (to block putq, getq etc.)
619  *
620  * Note: this has to be the only code that acquires one QLOCK while holding
621  * another QLOCK (otherwise we would have locking hirarchy/ordering violations.)
622  */
623 void
624 freezestr(queue_t *q)
625 {
626 	struct stdata *stp = STREAM(q);
627 
628 	/*
629 	 * Increment refcnt to prevent q_next from changing during the strblock
630 	 * as well as while the stream is frozen.
631 	 */
632 	claimstr(RD(q));
633 
634 	strblock(q);
635 	ASSERT(stp->sd_freezer == NULL);
636 	stp->sd_freezer = curthread;
637 	for (q = stp->sd_wrq; q != NULL; q = SAMESTR(q) ? q->q_next : NULL) {
638 		mutex_enter(QLOCK(q));
639 		mutex_enter(QLOCK(RD(q)));
640 	}
641 }
642 
643 /*
644  * Undo what freezestr did.
645  * Have to drop the QLOCKs before the strunblock since strunblock will
646  * potentially call other put procedures.
647  */
648 void
649 unfreezestr(queue_t *q)
650 {
651 	struct stdata *stp = STREAM(q);
652 	queue_t	*q1;
653 
654 	for (q1 = stp->sd_wrq; q1 != NULL;
655 	    q1 = SAMESTR(q1) ? q1->q_next : NULL) {
656 		mutex_exit(QLOCK(q1));
657 		mutex_exit(QLOCK(RD(q1)));
658 	}
659 	ASSERT(stp->sd_freezer == curthread);
660 	stp->sd_freezer = NULL;
661 	strunblock(q);
662 	releasestr(RD(q));
663 }
664 
665 /*
666  * Used by open and close procedures to "sleep" waiting for messages to
667  * arrive. Note: can only be used in open and close procedures.
668  *
669  * Lower the gate and let in either messages on the syncq (if there are
670  * any) or put/service procedures.
671  *
672  * If the queue has an outer perimeter this will not prevent entry into this
673  * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
674  * exclusive access to the outer perimeter.)
675  *
676  * Return 0 is the cv_wait_sig was interrupted; otherwise 1.
677  *
678  * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
679  * otherwise put entry points were not blocked in the first place. if this is
680  * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
681  * is always SQ_CIPUT if it is SQ_CIOC.
682  *
683  * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
684  * atomically under sq_putlocks to make sure putnext will not miss a pending
685  * wakeup.
686  */
687 int
688 qwait_sig(queue_t *q)
689 {
690 	syncq_t		*sq, *outer;
691 	uint_t		flags;
692 	int		ret = 1;
693 	int		is_sq_cioc;
694 
695 	/*
696 	 * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
697 	 * while detecting all cases where the perimeter is entered
698 	 * so that qwait_sig can return to the caller.
699 	 *
700 	 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
701 	 * wait for a thread to leave the syncq.
702 	 */
703 	sq = q->q_syncq;
704 	ASSERT(sq);
705 	is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
706 	ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
707 	outer = sq->sq_outer;
708 	/*
709 	 * XXX this does not work if there is only an outer perimeter.
710 	 * The semantics of qwait/qwait_sig are undefined in this case.
711 	 */
712 	if (outer)
713 		outer_exit(outer);
714 
715 	mutex_enter(SQLOCK(sq));
716 	if (is_sq_cioc == 0) {
717 		SQ_PUTLOCKS_ENTER(sq);
718 	}
719 	flags = sq->sq_flags;
720 	/*
721 	 * Drop SQ_EXCL and sq_count but hold the SQLOCK
722 	 * to prevent any undetected entry and exit into the perimeter.
723 	 */
724 	ASSERT(sq->sq_count > 0);
725 	sq->sq_count--;
726 
727 	if (is_sq_cioc == 0) {
728 		ASSERT(flags & SQ_EXCL);
729 		flags &= ~SQ_EXCL;
730 	}
731 	/*
732 	 * Unblock any thread blocked in an entersq or outer_enter.
733 	 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
734 	 * since that could lead to livelock with two threads in
735 	 * qwait for the same (per module) inner perimeter.
736 	 */
737 	if (flags & SQ_WANTWAKEUP) {
738 		cv_broadcast(&sq->sq_wait);
739 		flags &= ~SQ_WANTWAKEUP;
740 	}
741 	sq->sq_flags = flags;
742 	if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
743 		if (is_sq_cioc == 0) {
744 			SQ_PUTLOCKS_EXIT(sq);
745 		}
746 		/* drain_syncq() drops SQLOCK */
747 		drain_syncq(sq);
748 		ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
749 		entersq(sq, SQ_OPENCLOSE);
750 		return (1);
751 	}
752 	/*
753 	 * Sleep on sq_exitwait to only be woken up when threads leave the
754 	 * put or service procedures. We can not sleep on sq_wait since an
755 	 * outer_exit in a qwait running in the same outer perimeter would
756 	 * cause a livelock "ping-pong" between two or more qwait'ers.
757 	 */
758 	do {
759 		sq->sq_flags |= SQ_WANTEXWAKEUP;
760 		if (is_sq_cioc == 0) {
761 			SQ_PUTLOCKS_EXIT(sq);
762 		}
763 		ret = cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq));
764 		if (is_sq_cioc == 0) {
765 			SQ_PUTLOCKS_ENTER(sq);
766 		}
767 	} while (ret && (sq->sq_flags & SQ_WANTEXWAKEUP));
768 	if (is_sq_cioc == 0) {
769 		SQ_PUTLOCKS_EXIT(sq);
770 	}
771 	mutex_exit(SQLOCK(sq));
772 
773 	/*
774 	 * Re-enter the perimeters again
775 	 */
776 	entersq(sq, SQ_OPENCLOSE);
777 	return (ret);
778 }
779 
780 /*
781  * Used by open and close procedures to "sleep" waiting for messages to
782  * arrive. Note: can only be used in open and close procedures.
783  *
784  * Lower the gate and let in either messages on the syncq (if there are
785  * any) or put/service procedures.
786  *
787  * If the queue has an outer perimeter this will not prevent entry into this
788  * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
789  * exclusive access to the outer perimeter.)
790  *
791  * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
792  * otherwise put entry points were not blocked in the first place. if this is
793  * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
794  * is always SQ_CIPUT if it is SQ_CIOC.
795  *
796  * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
797  * atomically under sq_putlocks to make sure putnext will not miss a pending
798  * wakeup.
799  */
800 void
801 qwait(queue_t *q)
802 {
803 	syncq_t		*sq, *outer;
804 	uint_t		flags;
805 	int		is_sq_cioc;
806 
807 	/*
808 	 * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
809 	 * while detecting all cases where the perimeter is entered
810 	 * so that qwait can return to the caller.
811 	 *
812 	 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
813 	 * wait for a thread to leave the syncq.
814 	 */
815 	sq = q->q_syncq;
816 	ASSERT(sq);
817 	is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
818 	ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
819 	outer = sq->sq_outer;
820 	/*
821 	 * XXX this does not work if there is only an outer perimeter.
822 	 * The semantics of qwait/qwait_sig are undefined in this case.
823 	 */
824 	if (outer)
825 		outer_exit(outer);
826 
827 	mutex_enter(SQLOCK(sq));
828 	if (is_sq_cioc == 0) {
829 		SQ_PUTLOCKS_ENTER(sq);
830 	}
831 	flags = sq->sq_flags;
832 	/*
833 	 * Drop SQ_EXCL and sq_count but hold the SQLOCK
834 	 * to prevent any undetected entry and exit into the perimeter.
835 	 */
836 	ASSERT(sq->sq_count > 0);
837 	sq->sq_count--;
838 
839 	if (is_sq_cioc == 0) {
840 		ASSERT(flags & SQ_EXCL);
841 		flags &= ~SQ_EXCL;
842 	}
843 	/*
844 	 * Unblock any thread blocked in an entersq or outer_enter.
845 	 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
846 	 * since that could lead to livelock with two threads in
847 	 * qwait for the same (per module) inner perimeter.
848 	 */
849 	if (flags & SQ_WANTWAKEUP) {
850 		cv_broadcast(&sq->sq_wait);
851 		flags &= ~SQ_WANTWAKEUP;
852 	}
853 	sq->sq_flags = flags;
854 	if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
855 		if (is_sq_cioc == 0) {
856 			SQ_PUTLOCKS_EXIT(sq);
857 		}
858 		/* drain_syncq() drops SQLOCK */
859 		drain_syncq(sq);
860 		ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
861 		entersq(sq, SQ_OPENCLOSE);
862 		return;
863 	}
864 	/*
865 	 * Sleep on sq_exitwait to only be woken up when threads leave the
866 	 * put or service procedures. We can not sleep on sq_wait since an
867 	 * outer_exit in a qwait running in the same outer perimeter would
868 	 * cause a livelock "ping-pong" between two or more qwait'ers.
869 	 */
870 	do {
871 		sq->sq_flags |= SQ_WANTEXWAKEUP;
872 		if (is_sq_cioc == 0) {
873 			SQ_PUTLOCKS_EXIT(sq);
874 		}
875 		cv_wait(&sq->sq_exitwait, SQLOCK(sq));
876 		if (is_sq_cioc == 0) {
877 			SQ_PUTLOCKS_ENTER(sq);
878 		}
879 	} while (sq->sq_flags & SQ_WANTEXWAKEUP);
880 	if (is_sq_cioc == 0) {
881 		SQ_PUTLOCKS_EXIT(sq);
882 	}
883 	mutex_exit(SQLOCK(sq));
884 
885 	/*
886 	 * Re-enter the perimeters again
887 	 */
888 	entersq(sq, SQ_OPENCLOSE);
889 }
890 
891 /*
892  * Used for the synchronous streams entrypoints when sleeping outside
893  * the perimeters. Must never be called from regular put entrypoint.
894  *
895  * There's no need to grab sq_putlocks here (which only exist for CIPUT sync
896  * queues). If it is CIPUT sync queue put entry points were not blocked in the
897  * first place by rwnext/infonext which are treated as put entrypoints for
898  * permiter syncronization purposes.
899  *
900  * Consolidation private.
901  */
902 boolean_t
903 qwait_rw(queue_t *q)
904 {
905 	syncq_t		*sq;
906 	ulong_t		flags;
907 	boolean_t	gotsignal = B_FALSE;
908 
909 	/*
910 	 * Perform the same operations as a leavesq(sq, SQ_PUT)
911 	 * while detecting all cases where the perimeter is entered
912 	 * so that qwait_rw can return to the caller.
913 	 *
914 	 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
915 	 * wait for a thread to leave the syncq.
916 	 */
917 	sq = q->q_syncq;
918 	ASSERT(sq);
919 
920 	mutex_enter(SQLOCK(sq));
921 	flags = sq->sq_flags;
922 	/*
923 	 * Drop SQ_EXCL and sq_count but hold the SQLOCK until to prevent any
924 	 * undetected entry and exit into the perimeter.
925 	 */
926 	ASSERT(sq->sq_count > 0);
927 	sq->sq_count--;
928 	if (!(sq->sq_type & SQ_CIPUT)) {
929 		ASSERT(flags & SQ_EXCL);
930 		flags &= ~SQ_EXCL;
931 	}
932 	/*
933 	 * Unblock any thread blocked in an entersq or outer_enter.
934 	 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
935 	 * since that could lead to livelock with two threads in
936 	 * qwait for the same (per module) inner perimeter.
937 	 */
938 	if (flags & SQ_WANTWAKEUP) {
939 		cv_broadcast(&sq->sq_wait);
940 		flags &= ~SQ_WANTWAKEUP;
941 	}
942 	sq->sq_flags = flags;
943 	if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
944 		/* drain_syncq() drops SQLOCK */
945 		drain_syncq(sq);
946 		ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
947 		entersq(sq, SQ_PUT);
948 		return (B_FALSE);
949 	}
950 	/*
951 	 * Sleep on sq_exitwait to only be woken up when threads leave the
952 	 * put or service procedures. We can not sleep on sq_wait since an
953 	 * outer_exit in a qwait running in the same outer perimeter would
954 	 * cause a livelock "ping-pong" between two or more qwait'ers.
955 	 */
956 	do {
957 		sq->sq_flags |= SQ_WANTEXWAKEUP;
958 		if (cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq)) <= 0) {
959 			sq->sq_flags &= ~SQ_WANTEXWAKEUP;
960 			gotsignal = B_TRUE;
961 			break;
962 		}
963 	} while (sq->sq_flags & SQ_WANTEXWAKEUP);
964 	mutex_exit(SQLOCK(sq));
965 
966 	/*
967 	 * Re-enter the perimeters again
968 	 */
969 	entersq(sq, SQ_PUT);
970 	return (gotsignal);
971 }
972 
973 /*
974  * Asynchronously upgrade to exclusive access at either the inner or
975  * outer perimeter.
976  */
977 void
978 qwriter(queue_t *q, mblk_t *mp, void (*func)(), int perim)
979 {
980 	if (perim == PERIM_INNER)
981 		qwriter_inner(q, mp, func);
982 	else if (perim == PERIM_OUTER)
983 		qwriter_outer(q, mp, func);
984 	else
985 		panic("qwriter: wrong \"perimeter\" parameter");
986 }
987 
988 /*
989  * Schedule a synchronous streams timeout
990  */
991 timeout_id_t
992 qtimeout(queue_t *q, void (*func)(void *), void *arg, clock_t tim)
993 {
994 	syncq_t		*sq;
995 	callbparams_t	*cbp;
996 	timeout_id_t	tid;
997 
998 	sq = q->q_syncq;
999 	/*
1000 	 * you don't want the timeout firing before its params are set up
1001 	 * callbparams_alloc() acquires SQLOCK(sq)
1002 	 * qtimeout() can't fail and can't sleep, so panic if memory is not
1003 	 * available.
1004 	 */
1005 	cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP | KM_PANIC);
1006 	/*
1007 	 * the callbflags in the sq use the same flags. They get anded
1008 	 * in the callbwrapper to determine if a qun* of this callback type
1009 	 * is required. This is not a request to cancel.
1010 	 */
1011 	cbp->cbp_flags = SQ_CANCEL_TOUT;
1012 	/* check new timeout version return codes */
1013 	tid = timeout(qcallbwrapper, cbp, tim);
1014 	cbp->cbp_id = (callbparams_id_t)tid;
1015 	mutex_exit(SQLOCK(sq));
1016 	/* use local id because the cbp memory could be free by now */
1017 	return (tid);
1018 }
1019 
1020 bufcall_id_t
1021 qbufcall(queue_t *q, size_t size, uint_t pri, void (*func)(void *), void *arg)
1022 {
1023 	syncq_t		*sq;
1024 	callbparams_t	*cbp;
1025 	bufcall_id_t	bid;
1026 
1027 	sq = q->q_syncq;
1028 	/*
1029 	 * you don't want the timeout firing before its params are set up
1030 	 * callbparams_alloc() acquires SQLOCK(sq) if successful.
1031 	 */
1032 	cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP);
1033 	if (cbp == NULL)
1034 		return ((bufcall_id_t)0);
1035 
1036 	/*
1037 	 * the callbflags in the sq use the same flags. They get anded
1038 	 * in the callbwrapper to determine if a qun* of this callback type
1039 	 * is required. This is not a request to cancel.
1040 	 */
1041 	cbp->cbp_flags = SQ_CANCEL_BUFCALL;
1042 	/* check new timeout version return codes */
1043 	bid = bufcall(size, pri, qcallbwrapper, cbp);
1044 	cbp->cbp_id = (callbparams_id_t)bid;
1045 	if (bid == 0) {
1046 		callbparams_free(sq, cbp);
1047 	}
1048 	mutex_exit(SQLOCK(sq));
1049 	/* use local id because the params memory could be free by now */
1050 	return (bid);
1051 }
1052 
1053 /*
1054  * cancel a timeout callback which enters the inner perimeter.
1055  * cancelling of all callback types on a given syncq is serialized.
1056  * the SQ_CALLB_BYPASSED flag indicates that the callback fn did
1057  * not execute. The quntimeout return value needs to reflect this.
1058  * As with out existing callback programming model - callbacks must
1059  * be cancelled before a close completes - so ensuring that the sq
1060  * is valid when the callback wrapper is executed.
1061  */
1062 clock_t
1063 quntimeout(queue_t *q, timeout_id_t id)
1064 {
1065 	syncq_t *sq = q->q_syncq;
1066 	clock_t ret;
1067 
1068 	mutex_enter(SQLOCK(sq));
1069 	/* callbacks are processed serially on each syncq */
1070 	while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1071 		sq->sq_flags |= SQ_WANTWAKEUP;
1072 		cv_wait(&sq->sq_wait, SQLOCK(sq));
1073 	}
1074 	sq->sq_cancelid = (callbparams_id_t)id;
1075 	sq->sq_callbflags = SQ_CANCEL_TOUT;
1076 	if (sq->sq_flags & SQ_WANTWAKEUP) {
1077 		cv_broadcast(&sq->sq_wait);
1078 		sq->sq_flags &= ~SQ_WANTWAKEUP;
1079 	}
1080 	mutex_exit(SQLOCK(sq));
1081 	ret = untimeout(id);
1082 	mutex_enter(SQLOCK(sq));
1083 	if (ret != -1) {
1084 		/* The wrapper was never called - need to free based on id */
1085 		callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_TOUT);
1086 	}
1087 	if (sq->sq_callbflags & SQ_CALLB_BYPASSED) {
1088 		ret = 0;	/* this was how much time left */
1089 	}
1090 	sq->sq_callbflags = 0;
1091 	if (sq->sq_flags & SQ_WANTWAKEUP) {
1092 		cv_broadcast(&sq->sq_wait);
1093 		sq->sq_flags &= ~SQ_WANTWAKEUP;
1094 	}
1095 	mutex_exit(SQLOCK(sq));
1096 	return (ret);
1097 }
1098 
1099 
1100 void
1101 qunbufcall(queue_t *q, bufcall_id_t id)
1102 {
1103 	syncq_t *sq = q->q_syncq;
1104 
1105 	mutex_enter(SQLOCK(sq));
1106 	/* callbacks are processed serially on each syncq */
1107 	while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1108 		sq->sq_flags |= SQ_WANTWAKEUP;
1109 		cv_wait(&sq->sq_wait, SQLOCK(sq));
1110 	}
1111 	sq->sq_cancelid = (callbparams_id_t)id;
1112 	sq->sq_callbflags = SQ_CANCEL_BUFCALL;
1113 	if (sq->sq_flags & SQ_WANTWAKEUP) {
1114 		cv_broadcast(&sq->sq_wait);
1115 		sq->sq_flags &= ~SQ_WANTWAKEUP;
1116 	}
1117 	mutex_exit(SQLOCK(sq));
1118 	unbufcall(id);
1119 	mutex_enter(SQLOCK(sq));
1120 	/*
1121 	 * No indication from unbufcall if the callback has already run.
1122 	 * Always attempt to free it.
1123 	 */
1124 	callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_BUFCALL);
1125 	sq->sq_callbflags = 0;
1126 	if (sq->sq_flags & SQ_WANTWAKEUP) {
1127 		cv_broadcast(&sq->sq_wait);
1128 		sq->sq_flags &= ~SQ_WANTWAKEUP;
1129 	}
1130 	mutex_exit(SQLOCK(sq));
1131 }
1132 
1133 /*
1134  * Associate the stream with an instance of the bottom driver.  This
1135  * function is called by APIs that establish or modify the hardware
1136  * association (ppa) of an open stream.  Two examples of such
1137  * post-open(9E) APIs are the dlpi(7p) DL_ATTACH_REQ message, and the
1138  * ndd(1M) "instance=" ioctl(2).  This interface may be called from a
1139  * stream driver's wput procedure and from within syncq perimeters,
1140  * so it can't block.
1141  *
1142  * The qassociate() "model" is that it should drive attach(9E), yet it
1143  * can't really do that because driving attach(9E) is a blocking
1144  * operation.  Instead, the qassociate() implementation has complex
1145  * dependencies on the implementation behavior of other parts of the
1146  * kernel to ensure all appropriate instances (ones that have not been
1147  * made inaccessible by DR) are attached at stream open() time, and
1148  * that they will not autodetach.  The code relies on the fact that an
1149  * open() of a stream that ends up using qassociate() always occurs on
1150  * a minor node created with CLONE_DEV.  The open() comes through
1151  * clnopen() and since clnopen() calls ddi_hold_installed_driver() we
1152  * attach all instances and mark them DN_NO_AUTODETACH (given
1153  * DN_DRIVER_HELD is maintained correctly).
1154  *
1155  * Since qassociate() can't really drive attach(9E), there are corner
1156  * cases where the compromise described above leads to qassociate()
1157  * returning failure.  This can happen when administrative functions
1158  * that cause detach(9E), such as "update_drv" or "modunload -i", are
1159  * performed on the driver between the time the stream was opened and
1160  * the time its hardware association was established.  Although this can
1161  * theoretically be an arbitrary amount of time, in practice the window
1162  * is usually quite small, since applications almost always issue their
1163  * hardware association request immediately after opening the stream,
1164  * and do not typically switch association while open.  When these
1165  * corner cases occur, and qassociate() finds the requested instance
1166  * detached, it will return failure.  This failure should be propagated
1167  * to the requesting administrative application using the appropriate
1168  * post-open(9E) API error mechanism.
1169  *
1170  * All qassociate() callers are expected to check for and gracefully handle
1171  * failure return, propagating errors back to the requesting administrative
1172  * application.
1173  */
1174 int
1175 qassociate(queue_t *q, int instance)
1176 {
1177 	vnode_t *vp;
1178 	major_t major;
1179 	dev_info_t *dip;
1180 
1181 	if (instance == -1) {
1182 		ddi_assoc_queue_with_devi(q, NULL);
1183 		return (0);
1184 	}
1185 
1186 	vp = STREAM(q)->sd_vnode;
1187 	major = getmajor(vp->v_rdev);
1188 	dip = ddi_hold_devi_by_instance(major, instance,
1189 	    E_DDI_HOLD_DEVI_NOATTACH);
1190 	if (dip == NULL)
1191 		return (-1);
1192 
1193 	ddi_assoc_queue_with_devi(q, dip);
1194 	ddi_release_devi(dip);
1195 	return (0);
1196 }
1197 
1198 /*
1199  * This routine is the SVR4MP 'replacement' for
1200  * hat_getkpfnum.  The only major difference is
1201  * the return value for illegal addresses - since
1202  * sunm_getkpfnum() and srmmu_getkpfnum() both
1203  * return '-1' for bogus mappings, we can (more or
1204  * less) return the value directly.
1205  */
1206 ppid_t
1207 kvtoppid(caddr_t addr)
1208 {
1209 	return ((ppid_t)hat_getpfnum(kas.a_hat, addr));
1210 }
1211 
1212 /*
1213  * This is used to set the timeout value for cv_timed_wait() or
1214  * cv_timedwait_sig().
1215  */
1216 void
1217 time_to_wait(clock_t *now, clock_t time)
1218 {
1219 	*now = lbolt + time;
1220 }
1221