1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
23
24
25 /*
26 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 /*
31 * UNIX Device Driver Interface functions
32 *
33 * This file contains functions that are to be added to the kernel
34 * to put the interface presented to drivers in conformance with
35 * the DDI standard. Of the functions added to the kernel, 17 are
36 * function equivalents of existing macros in sysmacros.h,
37 * stream.h, and param.h
38 *
39 * 17 additional functions -- drv_getparm(), drv_setparm(),
40 * getrbuf(), freerbuf(),
41 * getemajor(), geteminor(), etoimajor(), itoemajor(), drv_usectohz(),
42 * drv_hztousec(), drv_usecwait(), drv_priv(), and kvtoppid() --
43 * are specified by DDI to exist in the kernel and are implemented here.
44 *
45 * Note that putnext() and put() are not in this file. The C version of
46 * these routines are in uts/common/os/putnext.c and assembly versions
47 * might exist for some architectures.
48 */
49
50 #include <sys/types.h>
51 #include <sys/param.h>
52 #include <sys/t_lock.h>
53 #include <sys/time.h>
54 #include <sys/systm.h>
55 #include <sys/cpuvar.h>
56 #include <sys/signal.h>
57 #include <sys/pcb.h>
58 #include <sys/user.h>
59 #include <sys/errno.h>
60 #include <sys/buf.h>
61 #include <sys/proc.h>
62 #include <sys/cmn_err.h>
63 #include <sys/stream.h>
64 #include <sys/strsubr.h>
65 #include <sys/uio.h>
66 #include <sys/kmem.h>
67 #include <sys/conf.h>
68 #include <sys/cred.h>
69 #include <sys/vnode.h>
70 #include <sys/file.h>
71 #include <sys/poll.h>
72 #include <sys/session.h>
73 #include <sys/ddi.h>
74 #include <sys/sunddi.h>
75 #include <sys/esunddi.h>
76 #include <sys/mkdev.h>
77 #include <sys/debug.h>
78 #include <sys/vtrace.h>
79
80 /*
81 * return internal major number corresponding to device
82 * number (new format) argument
83 */
84 major_t
getmajor(dev_t dev)85 getmajor(dev_t dev)
86 {
87 #ifdef _LP64
88 return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
89 #else
90 return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
91 #endif
92 }
93
94 /*
95 * return external major number corresponding to device
96 * number (new format) argument
97 */
98 major_t
getemajor(dev_t dev)99 getemajor(dev_t dev)
100 {
101 #ifdef _LP64
102 return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
103 #else
104 return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
105 #endif
106 }
107
108 /*
109 * return internal minor number corresponding to device
110 * number (new format) argument
111 */
112 minor_t
getminor(dev_t dev)113 getminor(dev_t dev)
114 {
115 #ifdef _LP64
116 return ((minor_t)(dev & MAXMIN64));
117 #else
118 return ((minor_t)(dev & MAXMIN));
119 #endif
120 }
121
122 /*
123 * return external minor number corresponding to device
124 * number (new format) argument
125 */
126 minor_t
geteminor(dev_t dev)127 geteminor(dev_t dev)
128 {
129 #ifdef _LP64
130 return ((minor_t)(dev & MAXMIN64));
131 #else
132 return ((minor_t)(dev & MAXMIN));
133 #endif
134 }
135
136 /*
137 * return internal major number corresponding to external
138 * major number.
139 */
140 int
etoimajor(major_t emajnum)141 etoimajor(major_t emajnum)
142 {
143 #ifdef _LP64
144 if (emajnum >= devcnt)
145 return (-1); /* invalid external major */
146 #else
147 if (emajnum > MAXMAJ || emajnum >= devcnt)
148 return (-1); /* invalid external major */
149 #endif
150 return ((int)emajnum);
151 }
152
153 /*
154 * return external major number corresponding to internal
155 * major number argument or -1 if no external major number
156 * can be found after lastemaj that maps to the internal
157 * major number. Pass a lastemaj val of -1 to start
158 * the search initially. (Typical use of this function is
159 * of the form:
160 *
161 * lastemaj = -1;
162 * while ((lastemaj = itoemajor(imag, lastemaj)) != -1)
163 * { process major number }
164 */
165 int
itoemajor(major_t imajnum,int lastemaj)166 itoemajor(major_t imajnum, int lastemaj)
167 {
168 if (imajnum >= devcnt)
169 return (-1);
170
171 /*
172 * if lastemaj == -1 then start from beginning of
173 * the (imaginary) MAJOR table
174 */
175 if (lastemaj < -1)
176 return (-1);
177
178 /*
179 * given that there's a 1-1 mapping of internal to external
180 * major numbers, searching is somewhat pointless ... let's
181 * just go there directly.
182 */
183 if (++lastemaj < devcnt && imajnum < devcnt)
184 return (imajnum);
185 return (-1);
186 }
187
188 /*
189 * encode external major and minor number arguments into a
190 * new format device number
191 */
192 dev_t
makedevice(major_t maj,minor_t minor)193 makedevice(major_t maj, minor_t minor)
194 {
195 #ifdef _LP64
196 return (((dev_t)maj << NBITSMINOR64) | (minor & MAXMIN64));
197 #else
198 return (((dev_t)maj << NBITSMINOR) | (minor & MAXMIN));
199 #endif
200 }
201
202 /*
203 * cmpdev - compress new device format to old device format
204 */
205 o_dev_t
cmpdev(dev_t dev)206 cmpdev(dev_t dev)
207 {
208 major_t major_d;
209 minor_t minor_d;
210
211 #ifdef _LP64
212 major_d = dev >> NBITSMINOR64;
213 minor_d = dev & MAXMIN64;
214 #else
215 major_d = dev >> NBITSMINOR;
216 minor_d = dev & MAXMIN;
217 #endif
218 if (major_d > OMAXMAJ || minor_d > OMAXMIN)
219 return ((o_dev_t)NODEV);
220 return ((o_dev_t)((major_d << ONBITSMINOR) | minor_d));
221 }
222
223 dev_t
expdev(dev_t dev)224 expdev(dev_t dev)
225 {
226 major_t major_d;
227 minor_t minor_d;
228
229 major_d = ((dev >> ONBITSMINOR) & OMAXMAJ);
230 minor_d = (dev & OMAXMIN);
231 #ifdef _LP64
232 return ((((dev_t)major_d << NBITSMINOR64) | minor_d));
233 #else
234 return ((((dev_t)major_d << NBITSMINOR) | minor_d));
235 #endif
236 }
237
238 /*
239 * return true (1) if the message type input is a data
240 * message type, 0 otherwise
241 */
242 #undef datamsg
243 int
datamsg(unsigned char db_type)244 datamsg(unsigned char db_type)
245 {
246 return (db_type == M_DATA || db_type == M_PROTO ||
247 db_type == M_PCPROTO || db_type == M_DELAY);
248 }
249
250 /*
251 * return a pointer to the other queue in the queue pair of qp
252 */
253 queue_t *
OTHERQ(queue_t * q)254 OTHERQ(queue_t *q)
255 {
256 return (_OTHERQ(q));
257 }
258
259 /*
260 * return a pointer to the read queue in the queue pair of qp.
261 */
262 queue_t *
RD(queue_t * q)263 RD(queue_t *q)
264 {
265 return (_RD(q));
266
267 }
268
269 /*
270 * return a pointer to the write queue in the queue pair of qp.
271 */
272 int
SAMESTR(queue_t * q)273 SAMESTR(queue_t *q)
274 {
275 return (_SAMESTR(q));
276 }
277
278 /*
279 * return a pointer to the write queue in the queue pair of qp.
280 */
281 queue_t *
WR(queue_t * q)282 WR(queue_t *q)
283 {
284 return (_WR(q));
285 }
286
287 /*
288 * store value of kernel parameter associated with parm
289 */
290 int
drv_getparm(unsigned int parm,void * valuep)291 drv_getparm(unsigned int parm, void *valuep)
292 {
293 proc_t *p = curproc;
294 time_t now;
295
296 switch (parm) {
297 case UPROCP:
298 *(proc_t **)valuep = p;
299 break;
300 case PPGRP:
301 mutex_enter(&p->p_lock);
302 *(pid_t *)valuep = p->p_pgrp;
303 mutex_exit(&p->p_lock);
304 break;
305 case LBOLT:
306 *(clock_t *)valuep = ddi_get_lbolt();
307 break;
308 case TIME:
309 if ((now = gethrestime_sec()) == 0) {
310 timestruc_t ts;
311 mutex_enter(&tod_lock);
312 ts = tod_get();
313 mutex_exit(&tod_lock);
314 *(time_t *)valuep = ts.tv_sec;
315 } else {
316 *(time_t *)valuep = now;
317 }
318 break;
319 case PPID:
320 *(pid_t *)valuep = p->p_pid;
321 break;
322 case PSID:
323 mutex_enter(&p->p_splock);
324 *(pid_t *)valuep = p->p_sessp->s_sid;
325 mutex_exit(&p->p_splock);
326 break;
327 case UCRED:
328 *(cred_t **)valuep = CRED();
329 break;
330 default:
331 return (-1);
332 }
333
334 return (0);
335 }
336
337 /*
338 * set value of kernel parameter associated with parm
339 */
340 int
drv_setparm(unsigned int parm,unsigned long value)341 drv_setparm(unsigned int parm, unsigned long value)
342 {
343 switch (parm) {
344 case SYSRINT:
345 CPU_STATS_ADDQ(CPU, sys, rcvint, value);
346 break;
347 case SYSXINT:
348 CPU_STATS_ADDQ(CPU, sys, xmtint, value);
349 break;
350 case SYSMINT:
351 CPU_STATS_ADDQ(CPU, sys, mdmint, value);
352 break;
353 case SYSRAWC:
354 CPU_STATS_ADDQ(CPU, sys, rawch, value);
355 break;
356 case SYSCANC:
357 CPU_STATS_ADDQ(CPU, sys, canch, value);
358 break;
359 case SYSOUTC:
360 CPU_STATS_ADDQ(CPU, sys, outch, value);
361 break;
362 default:
363 return (-1);
364 }
365
366 return (0);
367 }
368
369 /*
370 * allocate space for buffer header and return pointer to it.
371 * preferred means of obtaining space for a local buf header.
372 * returns pointer to buf upon success, NULL for failure
373 */
374 struct buf *
getrbuf(int sleep)375 getrbuf(int sleep)
376 {
377 struct buf *bp;
378
379 bp = kmem_alloc(sizeof (struct buf), sleep);
380 if (bp == NULL)
381 return (NULL);
382 bioinit(bp);
383
384 return (bp);
385 }
386
387 /*
388 * free up space allocated by getrbuf()
389 */
390 void
freerbuf(struct buf * bp)391 freerbuf(struct buf *bp)
392 {
393 biofini(bp);
394 kmem_free(bp, sizeof (struct buf));
395 }
396
397 /*
398 * convert byte count input to logical page units
399 * (byte counts that are not a page-size multiple
400 * are rounded down)
401 */
402 pgcnt_t
btop(size_t numbytes)403 btop(size_t numbytes)
404 {
405 return (numbytes >> PAGESHIFT);
406 }
407
408 /*
409 * convert byte count input to logical page units
410 * (byte counts that are not a page-size multiple
411 * are rounded up)
412 */
413 pgcnt_t
btopr(size_t numbytes)414 btopr(size_t numbytes)
415 {
416 return ((numbytes + PAGEOFFSET) >> PAGESHIFT);
417 }
418
419 /*
420 * convert size in pages to bytes.
421 */
422 size_t
ptob(pgcnt_t numpages)423 ptob(pgcnt_t numpages)
424 {
425 return (numpages << PAGESHIFT);
426 }
427
428 #define MAXCLOCK_T LONG_MAX
429
430 /*
431 * Convert from system time units (hz) to microseconds.
432 *
433 * If ticks <= 0, return 0.
434 * If converting ticks to usecs would overflow, return MAXCLOCK_T.
435 * Otherwise, convert ticks to microseconds.
436 */
437 clock_t
drv_hztousec(clock_t ticks)438 drv_hztousec(clock_t ticks)
439 {
440 if (ticks <= 0)
441 return (0);
442
443 if (ticks > MAXCLOCK_T / usec_per_tick)
444 return (MAXCLOCK_T);
445
446 return (TICK_TO_USEC(ticks));
447 }
448
449
450 /*
451 * Convert from microseconds to system time units (hz), rounded up.
452 *
453 * If ticks <= 0, return 0.
454 * Otherwise, convert microseconds to ticks, rounding up.
455 */
456 clock_t
drv_usectohz(clock_t microsecs)457 drv_usectohz(clock_t microsecs)
458 {
459 if (microsecs <= 0)
460 return (0);
461
462 return (USEC_TO_TICK_ROUNDUP(microsecs));
463 }
464
465 #ifdef sun
466 /*
467 * drv_usecwait implemented in each architecture's machine
468 * specific code somewhere. For sparc, it is the alternate entry
469 * to usec_delay (eventually usec_delay goes away). See
470 * sparc/os/ml/sparc_subr.s
471 */
472 #endif
473
474 /*
475 * bcanputnext, canputnext assume called from timeout, bufcall,
476 * or esballoc free routines. since these are driven by
477 * clock interrupts, instead of system calls the appropriate plumbing
478 * locks have not been acquired.
479 */
480 int
bcanputnext(queue_t * q,unsigned char band)481 bcanputnext(queue_t *q, unsigned char band)
482 {
483 int ret;
484
485 claimstr(q);
486 ret = bcanput(q->q_next, band);
487 releasestr(q);
488 return (ret);
489 }
490
491 int
canputnext(queue_t * q)492 canputnext(queue_t *q)
493 {
494 queue_t *qofsq = q;
495 struct stdata *stp = STREAM(q);
496 kmutex_t *sdlock;
497
498 TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_IN,
499 "canputnext?:%p\n", q);
500
501 if (stp->sd_ciputctrl != NULL) {
502 int ix = CPU->cpu_seqid & stp->sd_nciputctrl;
503 sdlock = &stp->sd_ciputctrl[ix].ciputctrl_lock;
504 mutex_enter(sdlock);
505 } else
506 mutex_enter(sdlock = &stp->sd_reflock);
507
508 /* get next module forward with a service queue */
509 q = q->q_next->q_nfsrv;
510 ASSERT(q != NULL);
511
512 /* this is for loopback transports, they should not do a canputnext */
513 ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(qofsq));
514
515 if (!(q->q_flag & QFULL)) {
516 mutex_exit(sdlock);
517 TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
518 "canputnext:%p %d", q, 1);
519 return (1);
520 }
521
522 if (sdlock != &stp->sd_reflock) {
523 mutex_exit(sdlock);
524 mutex_enter(&stp->sd_reflock);
525 }
526
527 /* the above is the most frequently used path */
528 stp->sd_refcnt++;
529 ASSERT(stp->sd_refcnt != 0); /* Wraparound */
530 mutex_exit(&stp->sd_reflock);
531
532 mutex_enter(QLOCK(q));
533 if (q->q_flag & QFULL) {
534 q->q_flag |= QWANTW;
535 mutex_exit(QLOCK(q));
536 TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
537 "canputnext:%p %d", q, 0);
538 releasestr(qofsq);
539
540 return (0);
541 }
542 mutex_exit(QLOCK(q));
543 TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT, "canputnext:%p %d", q, 1);
544 releasestr(qofsq);
545
546 return (1);
547 }
548
549
550 /*
551 * Open has progressed to the point where it is safe to send/receive messages.
552 *
553 * "qprocson enables the put and service routines of the driver
554 * or module... Prior to the call to qprocson, the put and service
555 * routines of a newly pushed module or newly opened driver are
556 * disabled. For the module, messages flow around it as if it
557 * were not present in the stream... qprocson must be called by
558 * the first open of a module or driver after allocation and
559 * initialization of any resource on which the put and service
560 * routines depend."
561 *
562 * Note that before calling qprocson a module/driver could itself cause its
563 * put or service procedures to be run by using put() or qenable().
564 */
565 void
qprocson(queue_t * q)566 qprocson(queue_t *q)
567 {
568 ASSERT(q->q_flag & QREADR);
569 /*
570 * Do not call insertq() if it is a re-open. But if _QINSERTING
571 * is set, q_next will not be NULL and we need to call insertq().
572 */
573 if ((q->q_next == NULL && WR(q)->q_next == NULL) ||
574 (q->q_flag & _QINSERTING))
575 insertq(STREAM(q), q);
576 }
577
578 /*
579 * Close has reached a point where it can no longer allow put/service
580 * into the queue.
581 *
582 * "qprocsoff disables the put and service routines of the driver
583 * or module... When the routines are disabled in a module, messages
584 * flow around the module as if it were not present in the stream.
585 * qprocsoff must be called by the close routine of a driver or module
586 * before deallocating any resources on which the driver/module's
587 * put and service routines depend. qprocsoff will remove the
588 * queue's service routines from the list of service routines to be
589 * run and waits until any concurrent put or service routines are
590 * finished."
591 *
592 * Note that after calling qprocsoff a module/driver could itself cause its
593 * put procedures to be run by using put().
594 */
595 void
qprocsoff(queue_t * q)596 qprocsoff(queue_t *q)
597 {
598 ASSERT(q->q_flag & QREADR);
599 if (q->q_flag & QWCLOSE) {
600 /* Called more than once */
601 return;
602 }
603 disable_svc(q);
604 removeq(q);
605 }
606
607 /*
608 * "freezestr() freezes the state of the entire STREAM containing
609 * the queue pair q. A frozen STREAM blocks any thread
610 * attempting to enter any open, close, put or service routine
611 * belonging to any queue instance in the STREAM, and blocks
612 * any thread currently within the STREAM if it attempts to put
613 * messages onto or take messages off of any queue within the
614 * STREAM (with the sole exception of the caller). Threads
615 * blocked by this mechanism remain so until the STREAM is
616 * thawed by a call to unfreezestr().
617 *
618 * Use strblock to set SQ_FROZEN in all syncqs in the stream (prevents
619 * further entry into put, service, open, and close procedures) and
620 * grab (and hold) all the QLOCKs in the stream (to block putq, getq etc.)
621 *
622 * Note: this has to be the only code that acquires one QLOCK while holding
623 * another QLOCK (otherwise we would have locking hirarchy/ordering violations.)
624 */
625 void
freezestr(queue_t * q)626 freezestr(queue_t *q)
627 {
628 struct stdata *stp = STREAM(q);
629
630 /*
631 * Increment refcnt to prevent q_next from changing during the strblock
632 * as well as while the stream is frozen.
633 */
634 claimstr(RD(q));
635
636 strblock(q);
637 ASSERT(stp->sd_freezer == NULL);
638 stp->sd_freezer = curthread;
639 for (q = stp->sd_wrq; q != NULL; q = SAMESTR(q) ? q->q_next : NULL) {
640 mutex_enter(QLOCK(q));
641 mutex_enter(QLOCK(RD(q)));
642 }
643 }
644
645 /*
646 * Undo what freezestr did.
647 * Have to drop the QLOCKs before the strunblock since strunblock will
648 * potentially call other put procedures.
649 */
650 void
unfreezestr(queue_t * q)651 unfreezestr(queue_t *q)
652 {
653 struct stdata *stp = STREAM(q);
654 queue_t *q1;
655
656 for (q1 = stp->sd_wrq; q1 != NULL;
657 q1 = SAMESTR(q1) ? q1->q_next : NULL) {
658 mutex_exit(QLOCK(q1));
659 mutex_exit(QLOCK(RD(q1)));
660 }
661 ASSERT(stp->sd_freezer == curthread);
662 stp->sd_freezer = NULL;
663 strunblock(q);
664 releasestr(RD(q));
665 }
666
667 /*
668 * Used by open and close procedures to "sleep" waiting for messages to
669 * arrive. Note: can only be used in open and close procedures.
670 *
671 * Lower the gate and let in either messages on the syncq (if there are
672 * any) or put/service procedures.
673 *
674 * If the queue has an outer perimeter this will not prevent entry into this
675 * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
676 * exclusive access to the outer perimeter.)
677 *
678 * Return 0 is the cv_wait_sig was interrupted; otherwise 1.
679 *
680 * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
681 * otherwise put entry points were not blocked in the first place. if this is
682 * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
683 * is always SQ_CIPUT if it is SQ_CIOC.
684 *
685 * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
686 * atomically under sq_putlocks to make sure putnext will not miss a pending
687 * wakeup.
688 */
689 int
qwait_sig(queue_t * q)690 qwait_sig(queue_t *q)
691 {
692 syncq_t *sq, *outer;
693 uint_t flags;
694 int ret = 1;
695 int is_sq_cioc;
696
697 /*
698 * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
699 * while detecting all cases where the perimeter is entered
700 * so that qwait_sig can return to the caller.
701 *
702 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
703 * wait for a thread to leave the syncq.
704 */
705 sq = q->q_syncq;
706 ASSERT(sq);
707 is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
708 ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
709 outer = sq->sq_outer;
710 /*
711 * XXX this does not work if there is only an outer perimeter.
712 * The semantics of qwait/qwait_sig are undefined in this case.
713 */
714 if (outer)
715 outer_exit(outer);
716
717 mutex_enter(SQLOCK(sq));
718 if (is_sq_cioc == 0) {
719 SQ_PUTLOCKS_ENTER(sq);
720 }
721 flags = sq->sq_flags;
722 /*
723 * Drop SQ_EXCL and sq_count but hold the SQLOCK
724 * to prevent any undetected entry and exit into the perimeter.
725 */
726 ASSERT(sq->sq_count > 0);
727 sq->sq_count--;
728
729 if (is_sq_cioc == 0) {
730 ASSERT(flags & SQ_EXCL);
731 flags &= ~SQ_EXCL;
732 }
733 /*
734 * Unblock any thread blocked in an entersq or outer_enter.
735 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
736 * since that could lead to livelock with two threads in
737 * qwait for the same (per module) inner perimeter.
738 */
739 if (flags & SQ_WANTWAKEUP) {
740 cv_broadcast(&sq->sq_wait);
741 flags &= ~SQ_WANTWAKEUP;
742 }
743 sq->sq_flags = flags;
744 if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
745 if (is_sq_cioc == 0) {
746 SQ_PUTLOCKS_EXIT(sq);
747 }
748 /* drain_syncq() drops SQLOCK */
749 drain_syncq(sq);
750 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
751 entersq(sq, SQ_OPENCLOSE);
752 return (1);
753 }
754 /*
755 * Sleep on sq_exitwait to only be woken up when threads leave the
756 * put or service procedures. We can not sleep on sq_wait since an
757 * outer_exit in a qwait running in the same outer perimeter would
758 * cause a livelock "ping-pong" between two or more qwait'ers.
759 */
760 do {
761 sq->sq_flags |= SQ_WANTEXWAKEUP;
762 if (is_sq_cioc == 0) {
763 SQ_PUTLOCKS_EXIT(sq);
764 }
765 ret = cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq));
766 if (is_sq_cioc == 0) {
767 SQ_PUTLOCKS_ENTER(sq);
768 }
769 } while (ret && (sq->sq_flags & SQ_WANTEXWAKEUP));
770 if (is_sq_cioc == 0) {
771 SQ_PUTLOCKS_EXIT(sq);
772 }
773 mutex_exit(SQLOCK(sq));
774
775 /*
776 * Re-enter the perimeters again
777 */
778 entersq(sq, SQ_OPENCLOSE);
779 return (ret);
780 }
781
782 /*
783 * Used by open and close procedures to "sleep" waiting for messages to
784 * arrive. Note: can only be used in open and close procedures.
785 *
786 * Lower the gate and let in either messages on the syncq (if there are
787 * any) or put/service procedures.
788 *
789 * If the queue has an outer perimeter this will not prevent entry into this
790 * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
791 * exclusive access to the outer perimeter.)
792 *
793 * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
794 * otherwise put entry points were not blocked in the first place. if this is
795 * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
796 * is always SQ_CIPUT if it is SQ_CIOC.
797 *
798 * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
799 * atomically under sq_putlocks to make sure putnext will not miss a pending
800 * wakeup.
801 */
802 void
qwait(queue_t * q)803 qwait(queue_t *q)
804 {
805 syncq_t *sq, *outer;
806 uint_t flags;
807 int is_sq_cioc;
808
809 /*
810 * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
811 * while detecting all cases where the perimeter is entered
812 * so that qwait can return to the caller.
813 *
814 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
815 * wait for a thread to leave the syncq.
816 */
817 sq = q->q_syncq;
818 ASSERT(sq);
819 is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
820 ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
821 outer = sq->sq_outer;
822 /*
823 * XXX this does not work if there is only an outer perimeter.
824 * The semantics of qwait/qwait_sig are undefined in this case.
825 */
826 if (outer)
827 outer_exit(outer);
828
829 mutex_enter(SQLOCK(sq));
830 if (is_sq_cioc == 0) {
831 SQ_PUTLOCKS_ENTER(sq);
832 }
833 flags = sq->sq_flags;
834 /*
835 * Drop SQ_EXCL and sq_count but hold the SQLOCK
836 * to prevent any undetected entry and exit into the perimeter.
837 */
838 ASSERT(sq->sq_count > 0);
839 sq->sq_count--;
840
841 if (is_sq_cioc == 0) {
842 ASSERT(flags & SQ_EXCL);
843 flags &= ~SQ_EXCL;
844 }
845 /*
846 * Unblock any thread blocked in an entersq or outer_enter.
847 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
848 * since that could lead to livelock with two threads in
849 * qwait for the same (per module) inner perimeter.
850 */
851 if (flags & SQ_WANTWAKEUP) {
852 cv_broadcast(&sq->sq_wait);
853 flags &= ~SQ_WANTWAKEUP;
854 }
855 sq->sq_flags = flags;
856 if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
857 if (is_sq_cioc == 0) {
858 SQ_PUTLOCKS_EXIT(sq);
859 }
860 /* drain_syncq() drops SQLOCK */
861 drain_syncq(sq);
862 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
863 entersq(sq, SQ_OPENCLOSE);
864 return;
865 }
866 /*
867 * Sleep on sq_exitwait to only be woken up when threads leave the
868 * put or service procedures. We can not sleep on sq_wait since an
869 * outer_exit in a qwait running in the same outer perimeter would
870 * cause a livelock "ping-pong" between two or more qwait'ers.
871 */
872 do {
873 sq->sq_flags |= SQ_WANTEXWAKEUP;
874 if (is_sq_cioc == 0) {
875 SQ_PUTLOCKS_EXIT(sq);
876 }
877 cv_wait(&sq->sq_exitwait, SQLOCK(sq));
878 if (is_sq_cioc == 0) {
879 SQ_PUTLOCKS_ENTER(sq);
880 }
881 } while (sq->sq_flags & SQ_WANTEXWAKEUP);
882 if (is_sq_cioc == 0) {
883 SQ_PUTLOCKS_EXIT(sq);
884 }
885 mutex_exit(SQLOCK(sq));
886
887 /*
888 * Re-enter the perimeters again
889 */
890 entersq(sq, SQ_OPENCLOSE);
891 }
892
893 /*
894 * Used for the synchronous streams entrypoints when sleeping outside
895 * the perimeters. Must never be called from regular put entrypoint.
896 *
897 * There's no need to grab sq_putlocks here (which only exist for CIPUT sync
898 * queues). If it is CIPUT sync queue put entry points were not blocked in the
899 * first place by rwnext/infonext which are treated as put entrypoints for
900 * permiter syncronization purposes.
901 *
902 * Consolidation private.
903 */
904 boolean_t
qwait_rw(queue_t * q)905 qwait_rw(queue_t *q)
906 {
907 syncq_t *sq;
908 ulong_t flags;
909 boolean_t gotsignal = B_FALSE;
910
911 /*
912 * Perform the same operations as a leavesq(sq, SQ_PUT)
913 * while detecting all cases where the perimeter is entered
914 * so that qwait_rw can return to the caller.
915 *
916 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
917 * wait for a thread to leave the syncq.
918 */
919 sq = q->q_syncq;
920 ASSERT(sq);
921
922 mutex_enter(SQLOCK(sq));
923 flags = sq->sq_flags;
924 /*
925 * Drop SQ_EXCL and sq_count but hold the SQLOCK until to prevent any
926 * undetected entry and exit into the perimeter.
927 */
928 ASSERT(sq->sq_count > 0);
929 sq->sq_count--;
930 if (!(sq->sq_type & SQ_CIPUT)) {
931 ASSERT(flags & SQ_EXCL);
932 flags &= ~SQ_EXCL;
933 }
934 /*
935 * Unblock any thread blocked in an entersq or outer_enter.
936 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
937 * since that could lead to livelock with two threads in
938 * qwait for the same (per module) inner perimeter.
939 */
940 if (flags & SQ_WANTWAKEUP) {
941 cv_broadcast(&sq->sq_wait);
942 flags &= ~SQ_WANTWAKEUP;
943 }
944 sq->sq_flags = flags;
945 if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
946 /* drain_syncq() drops SQLOCK */
947 drain_syncq(sq);
948 ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
949 entersq(sq, SQ_PUT);
950 return (B_FALSE);
951 }
952 /*
953 * Sleep on sq_exitwait to only be woken up when threads leave the
954 * put or service procedures. We can not sleep on sq_wait since an
955 * outer_exit in a qwait running in the same outer perimeter would
956 * cause a livelock "ping-pong" between two or more qwait'ers.
957 */
958 do {
959 sq->sq_flags |= SQ_WANTEXWAKEUP;
960 if (cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq)) <= 0) {
961 sq->sq_flags &= ~SQ_WANTEXWAKEUP;
962 gotsignal = B_TRUE;
963 break;
964 }
965 } while (sq->sq_flags & SQ_WANTEXWAKEUP);
966 mutex_exit(SQLOCK(sq));
967
968 /*
969 * Re-enter the perimeters again
970 */
971 entersq(sq, SQ_PUT);
972 return (gotsignal);
973 }
974
975 /*
976 * Asynchronously upgrade to exclusive access at either the inner or
977 * outer perimeter.
978 */
979 void
qwriter(queue_t * q,mblk_t * mp,void (* func)(),int perim)980 qwriter(queue_t *q, mblk_t *mp, void (*func)(), int perim)
981 {
982 if (perim == PERIM_INNER)
983 qwriter_inner(q, mp, func);
984 else if (perim == PERIM_OUTER)
985 qwriter_outer(q, mp, func);
986 else
987 panic("qwriter: wrong \"perimeter\" parameter");
988 }
989
990 /*
991 * Schedule a synchronous streams timeout
992 */
993 timeout_id_t
qtimeout(queue_t * q,void (* func)(void *),void * arg,clock_t tim)994 qtimeout(queue_t *q, void (*func)(void *), void *arg, clock_t tim)
995 {
996 syncq_t *sq;
997 callbparams_t *cbp;
998 timeout_id_t tid;
999
1000 sq = q->q_syncq;
1001 /*
1002 * you don't want the timeout firing before its params are set up
1003 * callbparams_alloc() acquires SQLOCK(sq)
1004 * qtimeout() can't fail and can't sleep, so panic if memory is not
1005 * available.
1006 */
1007 cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP | KM_PANIC);
1008 /*
1009 * the callbflags in the sq use the same flags. They get anded
1010 * in the callbwrapper to determine if a qun* of this callback type
1011 * is required. This is not a request to cancel.
1012 */
1013 cbp->cbp_flags = SQ_CANCEL_TOUT;
1014 /* check new timeout version return codes */
1015 tid = timeout(qcallbwrapper, cbp, tim);
1016 cbp->cbp_id = (callbparams_id_t)tid;
1017 mutex_exit(SQLOCK(sq));
1018 /* use local id because the cbp memory could be free by now */
1019 return (tid);
1020 }
1021
1022 bufcall_id_t
qbufcall(queue_t * q,size_t size,uint_t pri,void (* func)(void *),void * arg)1023 qbufcall(queue_t *q, size_t size, uint_t pri, void (*func)(void *), void *arg)
1024 {
1025 syncq_t *sq;
1026 callbparams_t *cbp;
1027 bufcall_id_t bid;
1028
1029 sq = q->q_syncq;
1030 /*
1031 * you don't want the timeout firing before its params are set up
1032 * callbparams_alloc() acquires SQLOCK(sq) if successful.
1033 */
1034 cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP);
1035 if (cbp == NULL)
1036 return ((bufcall_id_t)0);
1037
1038 /*
1039 * the callbflags in the sq use the same flags. They get anded
1040 * in the callbwrapper to determine if a qun* of this callback type
1041 * is required. This is not a request to cancel.
1042 */
1043 cbp->cbp_flags = SQ_CANCEL_BUFCALL;
1044 /* check new timeout version return codes */
1045 bid = bufcall(size, pri, qcallbwrapper, cbp);
1046 cbp->cbp_id = (callbparams_id_t)bid;
1047 if (bid == 0) {
1048 callbparams_free(sq, cbp);
1049 }
1050 mutex_exit(SQLOCK(sq));
1051 /* use local id because the params memory could be free by now */
1052 return (bid);
1053 }
1054
1055 /*
1056 * cancel a timeout callback which enters the inner perimeter.
1057 * cancelling of all callback types on a given syncq is serialized.
1058 * the SQ_CALLB_BYPASSED flag indicates that the callback fn did
1059 * not execute. The quntimeout return value needs to reflect this.
1060 * As with out existing callback programming model - callbacks must
1061 * be cancelled before a close completes - so ensuring that the sq
1062 * is valid when the callback wrapper is executed.
1063 */
1064 clock_t
quntimeout(queue_t * q,timeout_id_t id)1065 quntimeout(queue_t *q, timeout_id_t id)
1066 {
1067 syncq_t *sq = q->q_syncq;
1068 clock_t ret;
1069
1070 mutex_enter(SQLOCK(sq));
1071 /* callbacks are processed serially on each syncq */
1072 while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1073 sq->sq_flags |= SQ_WANTWAKEUP;
1074 cv_wait(&sq->sq_wait, SQLOCK(sq));
1075 }
1076 sq->sq_cancelid = (callbparams_id_t)id;
1077 sq->sq_callbflags = SQ_CANCEL_TOUT;
1078 if (sq->sq_flags & SQ_WANTWAKEUP) {
1079 cv_broadcast(&sq->sq_wait);
1080 sq->sq_flags &= ~SQ_WANTWAKEUP;
1081 }
1082 mutex_exit(SQLOCK(sq));
1083 ret = untimeout(id);
1084 mutex_enter(SQLOCK(sq));
1085 if (ret != -1) {
1086 /* The wrapper was never called - need to free based on id */
1087 callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_TOUT);
1088 }
1089 if (sq->sq_callbflags & SQ_CALLB_BYPASSED) {
1090 ret = 0; /* this was how much time left */
1091 }
1092 sq->sq_callbflags = 0;
1093 if (sq->sq_flags & SQ_WANTWAKEUP) {
1094 cv_broadcast(&sq->sq_wait);
1095 sq->sq_flags &= ~SQ_WANTWAKEUP;
1096 }
1097 mutex_exit(SQLOCK(sq));
1098 return (ret);
1099 }
1100
1101
1102 void
qunbufcall(queue_t * q,bufcall_id_t id)1103 qunbufcall(queue_t *q, bufcall_id_t id)
1104 {
1105 syncq_t *sq = q->q_syncq;
1106
1107 mutex_enter(SQLOCK(sq));
1108 /* callbacks are processed serially on each syncq */
1109 while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1110 sq->sq_flags |= SQ_WANTWAKEUP;
1111 cv_wait(&sq->sq_wait, SQLOCK(sq));
1112 }
1113 sq->sq_cancelid = (callbparams_id_t)id;
1114 sq->sq_callbflags = SQ_CANCEL_BUFCALL;
1115 if (sq->sq_flags & SQ_WANTWAKEUP) {
1116 cv_broadcast(&sq->sq_wait);
1117 sq->sq_flags &= ~SQ_WANTWAKEUP;
1118 }
1119 mutex_exit(SQLOCK(sq));
1120 unbufcall(id);
1121 mutex_enter(SQLOCK(sq));
1122 /*
1123 * No indication from unbufcall if the callback has already run.
1124 * Always attempt to free it.
1125 */
1126 callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_BUFCALL);
1127 sq->sq_callbflags = 0;
1128 if (sq->sq_flags & SQ_WANTWAKEUP) {
1129 cv_broadcast(&sq->sq_wait);
1130 sq->sq_flags &= ~SQ_WANTWAKEUP;
1131 }
1132 mutex_exit(SQLOCK(sq));
1133 }
1134
1135 /*
1136 * Associate the stream with an instance of the bottom driver. This
1137 * function is called by APIs that establish or modify the hardware
1138 * association (ppa) of an open stream. Two examples of such
1139 * post-open(9E) APIs are the dlpi(7p) DL_ATTACH_REQ message, and the
1140 * ndd(1M) "instance=" ioctl(2). This interface may be called from a
1141 * stream driver's wput procedure and from within syncq perimeters,
1142 * so it can't block.
1143 *
1144 * The qassociate() "model" is that it should drive attach(9E), yet it
1145 * can't really do that because driving attach(9E) is a blocking
1146 * operation. Instead, the qassociate() implementation has complex
1147 * dependencies on the implementation behavior of other parts of the
1148 * kernel to ensure all appropriate instances (ones that have not been
1149 * made inaccessible by DR) are attached at stream open() time, and
1150 * that they will not autodetach. The code relies on the fact that an
1151 * open() of a stream that ends up using qassociate() always occurs on
1152 * a minor node created with CLONE_DEV. The open() comes through
1153 * clnopen() and since clnopen() calls ddi_hold_installed_driver() we
1154 * attach all instances and mark them DN_NO_AUTODETACH (given
1155 * DN_DRIVER_HELD is maintained correctly).
1156 *
1157 * Since qassociate() can't really drive attach(9E), there are corner
1158 * cases where the compromise described above leads to qassociate()
1159 * returning failure. This can happen when administrative functions
1160 * that cause detach(9E), such as "update_drv" or "modunload -i", are
1161 * performed on the driver between the time the stream was opened and
1162 * the time its hardware association was established. Although this can
1163 * theoretically be an arbitrary amount of time, in practice the window
1164 * is usually quite small, since applications almost always issue their
1165 * hardware association request immediately after opening the stream,
1166 * and do not typically switch association while open. When these
1167 * corner cases occur, and qassociate() finds the requested instance
1168 * detached, it will return failure. This failure should be propagated
1169 * to the requesting administrative application using the appropriate
1170 * post-open(9E) API error mechanism.
1171 *
1172 * All qassociate() callers are expected to check for and gracefully handle
1173 * failure return, propagating errors back to the requesting administrative
1174 * application.
1175 */
1176 int
qassociate(queue_t * q,int instance)1177 qassociate(queue_t *q, int instance)
1178 {
1179 vnode_t *vp;
1180 major_t major;
1181 dev_info_t *dip;
1182
1183 if (instance == -1) {
1184 ddi_assoc_queue_with_devi(q, NULL);
1185 return (0);
1186 }
1187
1188 vp = STREAM(q)->sd_vnode;
1189 major = getmajor(vp->v_rdev);
1190 dip = ddi_hold_devi_by_instance(major, instance,
1191 E_DDI_HOLD_DEVI_NOATTACH);
1192 if (dip == NULL)
1193 return (-1);
1194
1195 ddi_assoc_queue_with_devi(q, dip);
1196 ddi_release_devi(dip);
1197 return (0);
1198 }
1199
1200 /*
1201 * This routine is the SVR4MP 'replacement' for
1202 * hat_getkpfnum. The only major difference is
1203 * the return value for illegal addresses - since
1204 * sunm_getkpfnum() and srmmu_getkpfnum() both
1205 * return '-1' for bogus mappings, we can (more or
1206 * less) return the value directly.
1207 */
1208 ppid_t
kvtoppid(caddr_t addr)1209 kvtoppid(caddr_t addr)
1210 {
1211 return ((ppid_t)hat_getpfnum(kas.a_hat, addr));
1212 }
1213
1214 /*
1215 * This is used to set the timeout value for cv_timed_wait() or
1216 * cv_timedwait_sig().
1217 */
1218 void
time_to_wait(clock_t * now,clock_t time)1219 time_to_wait(clock_t *now, clock_t time)
1220 {
1221 *now = ddi_get_lbolt() + time;
1222 }
1223