xref: /illumos-gate/usr/src/uts/common/io/logindmux.c (revision 4f364e7c95ee7fd9d5bbeddc1940e92405bb0e72)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * Description: logindmux.c
29  *
30  * The logindmux driver is used with login modules (like telmod/rlmod).
31  * This is a 1x1 cloning mux and two of these muxes are used. The lower link
32  * of one of the muxes receives input from net and the lower link of the
33  * other mux receives input from pseudo terminal subsystem.
34  *
35  * The logdmux_qexch_lock mutex manages the race between LOGDMX_IOC_QEXCHANGE,
36  * logdmuxunlink() and logdmuxclose(), so that the instance selected as a peer
37  * in LOGDMX_IOC_QEXCHANGE cannot be unlinked or closed until the qexchange
38  * is complete; see the inline comments in the code for details.
39  *
40  * The logdmux_peerq_lock mutex manages the race between logdmuxlwsrv() and
41  * logdmuxlrput() (when null'ing tmxp->peerq during LOGDMUX_UNLINK_REQ
42  * processing).
43  *
44  * The logdmux_minor_lock mutex serializes the growth of logdmux_minor_arena
45  * (the arena is grown gradually rather than allocated all at once so that
46  * minor numbers are recycled sooner; for simplicity it is never shrunk).
47  *
48  * The unlink operation is implemented using protocol messages that flow
49  * between the two logindmux peer instances. The instance processing the
50  * I_UNLINK ioctl will send a LOGDMUX_UNLINK_REQ protocol message to its
51  * peer to indicate that it wishes to unlink; the peer will process this
52  * message in its lrput, null its tmxp->peerq and then send a
53  * LOGDMUX_UNLINK_RESP protocol message in reply to indicate that the
54  * unlink can proceed; having received the reply in its lrput, the
55  * instance processing the I_UNLINK can then continue. To ensure that only
56  * one of the peer instances will be actively processing an I_UNLINK at
57  * any one time, a single structure (an unlinkinfo_t containing a mutex,
58  * state variable and pointer to an M_CTL mblk) is allocated during
59  * the processing of the LOGDMX_IOC_QEXCHANGE ioctl. The two instances, if
60  * trying to unlink simultaneously, will race to get control of this
61  * structure which contains the resources necessary to process the
62  * I_UNLINK. The instance that wins this race will be able to continue
63  * with the unlink whilst the other instance will be obliged to wait.
64  */
65 
66 #include <sys/types.h>
67 #include <sys/param.h>
68 #include <sys/errno.h>
69 #include <sys/debug.h>
70 #include <sys/stropts.h>
71 #include <sys/stream.h>
72 #include <sys/logindmux.h>
73 #include <sys/logindmux_impl.h>
74 #include <sys/stat.h>
75 #include <sys/kmem.h>
76 #include <sys/vmem.h>
77 #include <sys/strsun.h>
78 #include <sys/sysmacros.h>
79 #include <sys/mkdev.h>
80 #include <sys/ddi.h>
81 #include <sys/sunddi.h>
82 #include <sys/modctl.h>
83 #include <sys/termios.h>
84 #include <sys/cmn_err.h>
85 
86 static int logdmuxopen(queue_t *, dev_t *, int, int, cred_t *);
87 static int logdmuxclose(queue_t *, int, cred_t *);
88 static int logdmuxursrv(queue_t *);
89 static int logdmuxuwput(queue_t *, mblk_t *);
90 static int logdmuxlrput(queue_t *, mblk_t *);
91 static int logdmuxlrsrv(queue_t *);
92 static int logdmuxlwsrv(queue_t *);
93 static int logdmuxuwsrv(queue_t *);
94 static int logdmux_alloc_unlinkinfo(struct tmx *, struct tmx *);
95 
96 static void logdmuxlink(queue_t *, mblk_t *);
97 static void logdmuxunlink(queue_t *, mblk_t *);
98 static void logdmux_finish_unlink(queue_t *, mblk_t *);
99 static void logdmux_unlink_timer(void *arg);
100 static void recover(queue_t *, mblk_t *, size_t);
101 static void flushq_dataonly(queue_t *);
102 
103 static kmutex_t logdmux_qexch_lock;
104 static kmutex_t logdmux_peerq_lock;
105 static kmutex_t logdmux_minor_lock;
106 static minor_t	logdmux_maxminor = 256;	/* grown as necessary */
107 static vmem_t	*logdmux_minor_arena;
108 static void	*logdmux_statep;
109 
110 static struct module_info logdmuxm_info = {
111 	LOGDMX_ID,
112 	"logindmux",
113 	0,
114 	256,
115 	512,
116 	256
117 };
118 
119 static struct qinit logdmuxurinit = {
120 	NULL,
121 	logdmuxursrv,
122 	logdmuxopen,
123 	logdmuxclose,
124 	NULL,
125 	&logdmuxm_info
126 };
127 
128 static struct qinit logdmuxuwinit = {
129 	logdmuxuwput,
130 	logdmuxuwsrv,
131 	NULL,
132 	NULL,
133 	NULL,
134 	&logdmuxm_info
135 };
136 
137 static struct qinit logdmuxlrinit = {
138 	logdmuxlrput,
139 	logdmuxlrsrv,
140 	NULL,
141 	NULL,
142 	NULL,
143 	&logdmuxm_info
144 };
145 
146 static struct qinit logdmuxlwinit = {
147 	NULL,
148 	logdmuxlwsrv,
149 	NULL,
150 	NULL,
151 	NULL,
152 	&logdmuxm_info
153 };
154 
155 struct streamtab logdmuxinfo = {
156 	&logdmuxurinit,
157 	&logdmuxuwinit,
158 	&logdmuxlrinit,
159 	&logdmuxlwinit
160 };
161 
162 static int logdmux_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
163 static int logdmux_attach(dev_info_t *, ddi_attach_cmd_t);
164 static int logdmux_detach(dev_info_t *, ddi_detach_cmd_t);
165 static dev_info_t *logdmux_dip;
166 
167 DDI_DEFINE_STREAM_OPS(logdmux_ops, nulldev, nulldev, logdmux_attach,
168     logdmux_detach, nulldev, logdmux_info, D_MP | D_MTPERQ, &logdmuxinfo,
169     ddi_quiesce_not_needed);
170 
171 static struct modldrv modldrv = {
172 	&mod_driverops,
173 	"logindmux driver",
174 	&logdmux_ops
175 };
176 
177 static struct modlinkage modlinkage = {
178 	MODREV_1, &modldrv, NULL
179 };
180 
181 int
182 _init(void)
183 {
184 	int	ret;
185 
186 	mutex_init(&logdmux_peerq_lock, NULL, MUTEX_DRIVER, NULL);
187 	mutex_init(&logdmux_qexch_lock, NULL, MUTEX_DRIVER, NULL);
188 
189 	if ((ret = mod_install(&modlinkage)) != 0) {
190 		mutex_destroy(&logdmux_peerq_lock);
191 		mutex_destroy(&logdmux_qexch_lock);
192 		return (ret);
193 	}
194 
195 	logdmux_minor_arena = vmem_create("logdmux_minor", (void *)1,
196 	    logdmux_maxminor, 1, NULL, NULL, NULL, 0,
197 	    VM_SLEEP | VMC_IDENTIFIER);
198 	(void) ddi_soft_state_init(&logdmux_statep, sizeof (struct tmx), 1);
199 
200 	return (0);
201 }
202 
203 int
204 _fini(void)
205 {
206 	int	ret;
207 
208 	if ((ret = mod_remove(&modlinkage)) == 0) {
209 		mutex_destroy(&logdmux_peerq_lock);
210 		mutex_destroy(&logdmux_qexch_lock);
211 		ddi_soft_state_fini(&logdmux_statep);
212 		vmem_destroy(logdmux_minor_arena);
213 		logdmux_minor_arena = NULL;
214 	}
215 
216 	return (ret);
217 }
218 
219 int
220 _info(struct modinfo *modinfop)
221 {
222 	return (mod_info(&modlinkage, modinfop));
223 }
224 
225 static int
226 logdmux_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
227 {
228 	if (cmd != DDI_ATTACH)
229 		return (DDI_FAILURE);
230 
231 	if (ddi_create_minor_node(devi, "logindmux", S_IFCHR, 0, DDI_PSEUDO,
232 	    CLONE_DEV) == DDI_FAILURE)
233 		return (DDI_FAILURE);
234 
235 	logdmux_dip = devi;
236 	return (DDI_SUCCESS);
237 }
238 
239 static int
240 logdmux_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
241 {
242 	if (cmd != DDI_DETACH)
243 		return (DDI_FAILURE);
244 
245 	ddi_remove_minor_node(devi, NULL);
246 	return (DDI_SUCCESS);
247 }
248 
249 /* ARGSUSED */
250 static int
251 logdmux_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
252 {
253 	int error;
254 
255 	switch (infocmd) {
256 	case DDI_INFO_DEVT2DEVINFO:
257 		if (logdmux_dip == NULL) {
258 			error = DDI_FAILURE;
259 		} else {
260 			*result = logdmux_dip;
261 			error = DDI_SUCCESS;
262 		}
263 		break;
264 	case DDI_INFO_DEVT2INSTANCE:
265 		*result = (void *)0;
266 		error = DDI_SUCCESS;
267 		break;
268 	default:
269 		error = DDI_FAILURE;
270 	}
271 	return (error);
272 }
273 
274 /*
275  * Logindmux open routine
276  */
277 /*ARGSUSED*/
278 static int
279 logdmuxopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
280 {
281 	struct	tmx *tmxp;
282 	minor_t	minor, omaxminor;
283 
284 	if (sflag != CLONEOPEN)
285 		return (EINVAL);
286 
287 	mutex_enter(&logdmux_minor_lock);
288 	if (vmem_size(logdmux_minor_arena, VMEM_FREE) == 0) {
289 		/*
290 		 * The arena has been exhausted; grow by powers of two
291 		 * up to MAXMIN; bail if we've run out of minors.
292 		 */
293 		if (logdmux_maxminor == MAXMIN) {
294 			mutex_exit(&logdmux_minor_lock);
295 			return (ENOMEM);
296 		}
297 
298 		omaxminor = logdmux_maxminor;
299 		logdmux_maxminor = MIN(logdmux_maxminor << 1, MAXMIN);
300 
301 		(void) vmem_add(logdmux_minor_arena,
302 		    (void *)(uintptr_t)(omaxminor + 1),
303 		    logdmux_maxminor - omaxminor, VM_SLEEP);
304 	}
305 	minor = (minor_t)(uintptr_t)
306 	    vmem_alloc(logdmux_minor_arena, 1, VM_SLEEP);
307 	mutex_exit(&logdmux_minor_lock);
308 
309 	if (ddi_soft_state_zalloc(logdmux_statep, minor) == DDI_FAILURE) {
310 		vmem_free(logdmux_minor_arena, (void *)(uintptr_t)minor, 1);
311 		return (ENOMEM);
312 	}
313 
314 	tmxp = ddi_get_soft_state(logdmux_statep, minor);
315 	tmxp->rdq = q;
316 	tmxp->muxq = NULL;
317 	tmxp->peerq = NULL;
318 	tmxp->unlinkinfop = NULL;
319 	tmxp->dev0 = minor;
320 
321 	*devp = makedevice(getmajor(*devp), tmxp->dev0);
322 	q->q_ptr = tmxp;
323 	WR(q)->q_ptr = tmxp;
324 
325 	qprocson(q);
326 	return (0);
327 }
328 
329 /*
330  * Logindmux close routine gets called when telnet connection is closed
331  */
332 /*ARGSUSED*/
333 static int
334 logdmuxclose(queue_t *q, int flag, cred_t *crp)
335 {
336 	struct tmx	*tmxp = q->q_ptr;
337 	minor_t		minor = tmxp->dev0;
338 
339 	ASSERT(tmxp->muxq == NULL);
340 	ASSERT(tmxp->peerq == NULL);
341 
342 	qprocsoff(q);
343 	if (tmxp->wbufcid != 0) {
344 		qunbufcall(q, tmxp->wbufcid);
345 		tmxp->wbufcid = 0;
346 	}
347 	if (tmxp->rbufcid != 0) {
348 		qunbufcall(q, tmxp->rbufcid);
349 		tmxp->rbufcid = 0;
350 	}
351 	if (tmxp->rtimoutid != 0) {
352 		(void) quntimeout(q, tmxp->rtimoutid);
353 		tmxp->rtimoutid = 0;
354 	}
355 	if (tmxp->wtimoutid != 0) {
356 		(void) quntimeout(q, tmxp->wtimoutid);
357 		tmxp->wtimoutid = 0;
358 	}
359 	if (tmxp->utimoutid != 0) {
360 		(void) quntimeout(q, tmxp->utimoutid);
361 		tmxp->utimoutid = 0;
362 	}
363 
364 	/*
365 	 * Hold logdmux_qexch_lock to prevent another thread that might be
366 	 * in LOGDMX_IOC_QEXCHANGE from looking up our state while we're
367 	 * disposing of it.
368 	 */
369 	mutex_enter(&logdmux_qexch_lock);
370 	ddi_soft_state_free(logdmux_statep, minor);
371 	vmem_free(logdmux_minor_arena, (void *)(uintptr_t)minor, 1);
372 	mutex_exit(&logdmux_qexch_lock);
373 
374 	q->q_ptr = NULL;
375 	WR(q)->q_ptr = NULL;
376 
377 	return (0);
378 }
379 
380 /*
381  * Upper read service routine
382  */
383 static int
384 logdmuxursrv(queue_t *q)
385 {
386 	struct tmx *tmxp = q->q_ptr;
387 
388 	if (tmxp->muxq != NULL)
389 		qenable(RD(tmxp->muxq));
390 	return (0);
391 }
392 
393 /*
394  * This routine gets called when telnet daemon sends data or ioctl messages
395  * to upper mux queue.
396  */
397 static int
398 logdmuxuwput(queue_t *q, mblk_t *mp)
399 {
400 	queue_t		*qp;
401 	mblk_t		*newmp;
402 	struct iocblk	*ioc;
403 	minor_t		minor;
404 	STRUCT_HANDLE(protocol_arg, protoh);
405 	struct tmx	*tmxp, *tmxpeerp;
406 	int		error;
407 
408 	tmxp = q->q_ptr;
409 
410 	switch (mp->b_datap->db_type) {
411 
412 	case M_IOCTL:
413 		ASSERT(MBLKL(mp) == sizeof (struct iocblk));
414 
415 		ioc = (struct iocblk *)mp->b_rptr;
416 		switch (ioc->ioc_cmd) {
417 		/*
418 		 * This is a special ioctl which exchanges q info
419 		 * of the two peers, connected to netf and ptmx.
420 		 */
421 		case LOGDMX_IOC_QEXCHANGE:
422 			error = miocpullup(mp,
423 			    SIZEOF_STRUCT(protocol_arg, ioc->ioc_flag));
424 			if (error != 0) {
425 				miocnak(q, mp, 0, error);
426 				break;
427 			}
428 			STRUCT_SET_HANDLE(protoh, ioc->ioc_flag,
429 			    (struct protocol_arg *)mp->b_cont->b_rptr);
430 #ifdef _SYSCALL32_IMPL
431 			if ((ioc->ioc_flag & DATAMODEL_MASK) ==
432 			    DATAMODEL_ILP32) {
433 				minor = getminor(expldev(
434 				    STRUCT_FGET(protoh, dev)));
435 			} else
436 #endif
437 			{
438 				minor = getminor(STRUCT_FGET(protoh, dev));
439 			}
440 
441 			/*
442 			 * The second argument to ddi_get_soft_state() is
443 			 * interpreted as an `int', so prohibit negative
444 			 * values.
445 			 */
446 			if ((int)minor < 0) {
447 				miocnak(q, mp, 0, EINVAL);
448 				break;
449 			}
450 
451 			/*
452 			 * We must hold logdmux_qexch_lock while looking up
453 			 * the proposed peer to prevent another thread from
454 			 * simultaneously I_UNLINKing or closing it.
455 			 */
456 			mutex_enter(&logdmux_qexch_lock);
457 
458 			/*
459 			 * For LOGDMX_IOC_QEXCHANGE to succeed, our peer must
460 			 * exist (and not be us), and both we and our peer
461 			 * must be I_LINKed (i.e., muxq must not be NULL) and
462 			 * not already have a peer.
463 			 */
464 			tmxpeerp = ddi_get_soft_state(logdmux_statep, minor);
465 			if (tmxpeerp == NULL || tmxpeerp == tmxp ||
466 			    tmxpeerp->muxq == NULL || tmxpeerp->peerq != NULL ||
467 			    tmxp->muxq == NULL || tmxp->peerq != NULL) {
468 				mutex_exit(&logdmux_qexch_lock);
469 				miocnak(q, mp, 0, EINVAL);
470 				break;
471 			}
472 
473 			/*
474 			 * If `flag' is set then exchange queues and assume
475 			 * tmxp refers to the ptmx stream.
476 			 */
477 			if (STRUCT_FGET(protoh, flag)) {
478 				/*
479 				 * Allocate and populate the structure we
480 				 * need when processing an I_UNLINK ioctl.
481 				 * Give both logindmux instances a pointer
482 				 * to it from their tmx structure.
483 				 */
484 				if ((error = logdmux_alloc_unlinkinfo(
485 				    tmxp, tmxpeerp)) != 0) {
486 					mutex_exit(&logdmux_qexch_lock);
487 					miocnak(q, mp, 0, error);
488 					break;
489 				}
490 				tmxp->peerq = tmxpeerp->muxq;
491 				tmxpeerp->peerq = tmxp->muxq;
492 				tmxp->isptm = B_TRUE;
493 			}
494 			mutex_exit(&logdmux_qexch_lock);
495 			miocack(q, mp, 0, 0);
496 			break;
497 
498 		case I_LINK:
499 			ASSERT(MBLKL(mp->b_cont) == sizeof (struct linkblk));
500 			logdmuxlink(q, mp);
501 			break;
502 
503 		case I_UNLINK:
504 			ASSERT(MBLKL(mp->b_cont) == sizeof (struct linkblk));
505 			logdmuxunlink(q, mp);
506 			break;
507 
508 		default:
509 			if (tmxp->muxq == NULL) {
510 				miocnak(q, mp, 0, EINVAL);
511 				return (0);
512 			}
513 			putnext(tmxp->muxq, mp);
514 			break;
515 		}
516 
517 		break;
518 
519 	case M_DATA:
520 		if (!tmxp->isptm) {
521 			if ((newmp = allocb(sizeof (char), BPRI_MED)) == NULL) {
522 				recover(q, mp, sizeof (char));
523 				return (0);
524 			}
525 			newmp->b_datap->db_type = M_CTL;
526 			*newmp->b_wptr++ = M_CTL_MAGIC_NUMBER;
527 			newmp->b_cont = mp;
528 			mp = newmp;
529 		}
530 		/* FALLTHRU */
531 
532 	case M_PROTO:
533 	case M_PCPROTO:
534 		qp = tmxp->muxq;
535 		if (qp == NULL) {
536 			merror(q, mp, EINVAL);
537 			return (0);
538 		}
539 
540 		if (queclass(mp) < QPCTL) {
541 			if (q->q_first != NULL || !canputnext(qp)) {
542 				(void) putq(q, mp);
543 				return (0);
544 			}
545 		}
546 		putnext(qp, mp);
547 		break;
548 
549 	case M_FLUSH:
550 		if (*mp->b_rptr & FLUSHW)
551 			flushq(q, FLUSHALL);
552 
553 		if (tmxp->muxq != NULL) {
554 			putnext(tmxp->muxq, mp);
555 			return (0);
556 		}
557 
558 		*mp->b_rptr &= ~FLUSHW;
559 		if (*mp->b_rptr & FLUSHR)
560 			qreply(q, mp);
561 		else
562 			freemsg(mp);
563 		break;
564 
565 	default:
566 		cmn_err(CE_NOTE, "logdmuxuwput: received unexpected message"
567 		    " of type 0x%x", mp->b_datap->db_type);
568 		freemsg(mp);
569 	}
570 	return (0);
571 }
572 
573 /*
574  * Upper write service routine
575  */
576 static int
577 logdmuxuwsrv(queue_t *q)
578 {
579 	mblk_t		*mp, *newmp;
580 	queue_t		*qp;
581 	struct tmx	*tmxp = q->q_ptr;
582 
583 	while ((mp = getq(q)) != NULL) {
584 		switch (mp->b_datap->db_type) {
585 		case M_DATA:
586 			if (!tmxp->isptm) {
587 				if ((newmp = allocb(sizeof (char), BPRI_MED)) ==
588 				    NULL) {
589 					recover(q, mp, sizeof (char));
590 					return (0);
591 				}
592 				newmp->b_datap->db_type = M_CTL;
593 				*newmp->b_wptr++ = M_CTL_MAGIC_NUMBER;
594 				newmp->b_cont = mp;
595 				mp = newmp;
596 			}
597 			/* FALLTHRU */
598 
599 		case M_CTL:
600 		case M_PROTO:
601 			if (tmxp->muxq == NULL) {
602 				merror(q, mp, EIO);
603 				break;
604 			}
605 			qp = tmxp->muxq;
606 			if (!canputnext(qp)) {
607 				(void) putbq(q, mp);
608 				return (0);
609 			}
610 			putnext(qp, mp);
611 			break;
612 
613 
614 		default:
615 			cmn_err(CE_NOTE, "logdmuxuwsrv: received unexpected"
616 			    " message of type 0x%x", mp->b_datap->db_type);
617 			freemsg(mp);
618 		}
619 	}
620 	return (0);
621 }
622 
623 /*
624  * Logindmux lower put routine detects from which of the two lower queues
625  * the data needs to be read from and writes it out to its peer queue.
626  * For protocol, it detects M_CTL and sends its data to the daemon. Also,
627  * for ioctl and other types of messages, it lets the daemon handle it.
628  */
629 static int
630 logdmuxlrput(queue_t *q, mblk_t *mp)
631 {
632 	mblk_t		*savemp;
633 	queue_t 	*qp;
634 	struct iocblk	*ioc;
635 	struct tmx	*tmxp = q->q_ptr;
636 	uchar_t		flush;
637 	uint_t		*messagep;
638 	unlinkinfo_t	*unlinkinfop = tmxp->unlinkinfop;
639 
640 	if (tmxp->muxq == NULL || tmxp->peerq == NULL) {
641 		freemsg(mp);
642 		return (0);
643 	}
644 
645 	/*
646 	 * If there's already a message on our queue and the incoming
647 	 * message is not of a high-priority, enqueue the message --
648 	 * but not if it's a logindmux protocol message.
649 	 */
650 	if ((q->q_first != NULL) && (queclass(mp) < QPCTL) &&
651 	    (!LOGDMUX_PROTO_MBLK(mp))) {
652 		(void) putq(q, mp);
653 		return (0);
654 	}
655 
656 	switch (mp->b_datap->db_type) {
657 
658 	case M_IOCTL:
659 		ioc = (struct iocblk *)mp->b_rptr;
660 		switch (ioc->ioc_cmd) {
661 
662 		case TIOCSWINSZ:
663 		case TCSETAF:
664 		case TCSETSF:
665 		case TCSETA:
666 		case TCSETAW:
667 		case TCSETS:
668 		case TCSETSW:
669 		case TCSBRK:
670 		case TIOCSTI:
671 			qp = tmxp->peerq;
672 			break;
673 
674 		default:
675 			cmn_err(CE_NOTE, "logdmuxlrput: received unexpected"
676 			    " request for ioctl 0x%x", ioc->ioc_cmd);
677 
678 			/* NAK unrecognized ioctl's. */
679 			miocnak(q, mp, 0, 0);
680 			return (0);
681 		}
682 		break;
683 
684 	case M_DATA:
685 	case M_HANGUP:
686 		qp = tmxp->peerq;
687 		break;
688 
689 	case M_CTL:
690 		/*
691 		 * The protocol messages that flow between the peers
692 		 * to implement the unlink functionality are M_CTLs
693 		 * which have the M_IOCTL/I_UNLINK mblk of the ioctl
694 		 * attached via b_cont.  LOGDMUX_PROTO_MBLK() uses
695 		 * this to determine whether a particular M_CTL is a
696 		 * peer protocol message.
697 		 */
698 		if (LOGDMUX_PROTO_MBLK(mp)) {
699 			messagep = (uint_t *)mp->b_rptr;
700 
701 			switch (*messagep) {
702 
703 			case LOGDMUX_UNLINK_REQ:
704 				/*
705 				 * We've received a message from our
706 				 * peer indicating that it wants to
707 				 * unlink.
708 				 */
709 				*messagep = LOGDMUX_UNLINK_RESP;
710 				qp = tmxp->peerq;
711 
712 				mutex_enter(&logdmux_peerq_lock);
713 				tmxp->peerq = NULL;
714 				mutex_exit(&logdmux_peerq_lock);
715 
716 				put(RD(qp), mp);
717 				return (0);
718 
719 			case LOGDMUX_UNLINK_RESP:
720 				/*
721 				 * We've received a positive response
722 				 * from our peer to an earlier
723 				 * LOGDMUX_UNLINK_REQ that we sent.
724 				 * We can now carry on with the unlink.
725 				 */
726 				qp = tmxp->rdq;
727 				mutex_enter(&unlinkinfop->state_lock);
728 				ASSERT(unlinkinfop->state ==
729 				    LOGDMUX_UNLINK_PENDING);
730 				unlinkinfop->state = LOGDMUX_UNLINKED;
731 				mutex_exit(&unlinkinfop->state_lock);
732 				logdmux_finish_unlink(WR(qp), mp->b_cont);
733 				return (0);
734 			}
735 		}
736 
737 		qp = tmxp->rdq;
738 		if (q->q_first != NULL || !canputnext(qp)) {
739 			(void) putq(q, mp);
740 			return (0);
741 		}
742 		if ((MBLKL(mp) == 1) && (*mp->b_rptr == M_CTL_MAGIC_NUMBER)) {
743 			savemp = mp->b_cont;
744 			freeb(mp);
745 			mp = savemp;
746 		}
747 		putnext(qp, mp);
748 		return (0);
749 
750 	case M_IOCACK:
751 	case M_IOCNAK:
752 	case M_PROTO:
753 	case M_PCPROTO:
754 	case M_PCSIG:
755 	case M_SETOPTS:
756 		qp = tmxp->rdq;
757 		break;
758 
759 	case M_ERROR:
760 		if (tmxp->isptm) {
761 			/*
762 			 * This error is from ptm.  We could tell TCP to
763 			 * shutdown the connection, but it's easier to just
764 			 * wait for the daemon to get SIGCHLD and close from
765 			 * above.
766 			 */
767 			freemsg(mp);
768 			return (0);
769 		}
770 		/*
771 		 * This is from TCP.  Don't really know why we'd
772 		 * get this, but we have a pretty good idea what
773 		 * to do:  Send M_HANGUP to the pty.
774 		 */
775 		mp->b_datap->db_type = M_HANGUP;
776 		mp->b_wptr = mp->b_rptr;
777 		qp = tmxp->peerq;
778 		break;
779 
780 	case M_FLUSH:
781 		if (*mp->b_rptr & FLUSHR)
782 			flushq_dataonly(q);
783 
784 		if (mp->b_flag & MSGMARK) {
785 			/*
786 			 * This M_FLUSH has been marked by the module
787 			 * below as intended for the upper queue,
788 			 * not the peer queue.
789 			 */
790 			qp = tmxp->rdq;
791 			mp->b_flag &= ~MSGMARK;
792 		} else {
793 			/*
794 			 * Wrap this M_FLUSH through the mux.
795 			 * The FLUSHR and FLUSHW bits must be
796 			 * reversed.
797 			 */
798 			qp = tmxp->peerq;
799 			flush = *mp->b_rptr;
800 			*mp->b_rptr &= ~(FLUSHR | FLUSHW);
801 			if (flush & FLUSHW)
802 				*mp->b_rptr |= FLUSHR;
803 			if (flush & FLUSHR)
804 				*mp->b_rptr |= FLUSHW;
805 		}
806 		break;
807 
808 	case M_START:
809 	case M_STOP:
810 	case M_STARTI:
811 	case M_STOPI:
812 		freemsg(mp);
813 		return (0);
814 
815 	default:
816 		cmn_err(CE_NOTE, "logdmuxlrput: received unexpected "
817 		    "message of type 0x%x", mp->b_datap->db_type);
818 		freemsg(mp);
819 		return (0);
820 	}
821 	if (queclass(mp) < QPCTL) {
822 		if (q->q_first != NULL || !canputnext(qp)) {
823 			(void) putq(q, mp);
824 			return (0);
825 		}
826 	}
827 	putnext(qp, mp);
828 	return (0);
829 }
830 
831 /*
832  * Lower read service routine
833  */
834 static int
835 logdmuxlrsrv(queue_t *q)
836 {
837 	mblk_t		*mp, *savemp;
838 	queue_t 	*qp;
839 	struct iocblk	*ioc;
840 	struct tmx	*tmxp = q->q_ptr;
841 
842 	while ((mp = getq(q)) != NULL) {
843 		if (tmxp->muxq == NULL || tmxp->peerq == NULL) {
844 			freemsg(mp);
845 			continue;
846 		}
847 
848 		switch (mp->b_datap->db_type) {
849 
850 		case M_IOCTL:
851 			ioc = (struct iocblk *)mp->b_rptr;
852 
853 			switch (ioc->ioc_cmd) {
854 
855 			case TIOCSWINSZ:
856 			case TCSETAF:
857 			case TCSETSF:
858 			case TCSETA:
859 			case TCSETAW:
860 			case TCSETS:
861 			case TCSETSW:
862 			case TCSBRK:
863 			case TIOCSTI:
864 				qp = tmxp->peerq;
865 				break;
866 
867 			default:
868 				cmn_err(CE_NOTE, "logdmuxlrsrv: received "
869 				    "unexpected request for ioctl 0x%x",
870 				    ioc->ioc_cmd);
871 
872 				/* NAK unrecognized ioctl's. */
873 				miocnak(q, mp, 0, 0);
874 				continue;
875 			}
876 			break;
877 
878 		case M_DATA:
879 		case M_HANGUP:
880 			qp = tmxp->peerq;
881 			break;
882 
883 		case M_CTL:
884 			qp = tmxp->rdq;
885 			if (!canputnext(qp)) {
886 				(void) putbq(q, mp);
887 				return (0);
888 			}
889 			if (MBLKL(mp) == 1 &&
890 			    (*mp->b_rptr == M_CTL_MAGIC_NUMBER)) {
891 				savemp = mp->b_cont;
892 				freeb(mp);
893 				mp = savemp;
894 			}
895 			putnext(qp, mp);
896 			continue;
897 
898 		case M_PROTO:
899 		case M_SETOPTS:
900 			qp = tmxp->rdq;
901 			break;
902 
903 		default:
904 			cmn_err(CE_NOTE, "logdmuxlrsrv: received unexpected "
905 			    "message of type 0x%x", mp->b_datap->db_type);
906 			freemsg(mp);
907 			continue;
908 		}
909 		ASSERT(queclass(mp) < QPCTL);
910 		if (!canputnext(qp)) {
911 			(void) putbq(q, mp);
912 			return (0);
913 		}
914 		putnext(qp, mp);
915 	}
916 	return (0);
917 }
918 
919 /*
920  * Lower side write service procedure.  No messages are ever placed on
921  * the write queue here, this just back-enables all of the upper side
922  * write service procedures.
923  */
924 static int
925 logdmuxlwsrv(queue_t *q)
926 {
927 	struct tmx *tmxp = q->q_ptr;
928 
929 	/*
930 	 * Qenable upper write queue and find out which lower
931 	 * queue needs to be restarted with flow control.
932 	 * Qenable the peer queue so canputnext will
933 	 * succeed on next call to logdmuxlrput.
934 	 */
935 	qenable(WR(tmxp->rdq));
936 
937 	mutex_enter(&logdmux_peerq_lock);
938 	if (tmxp->peerq != NULL)
939 		qenable(RD(tmxp->peerq));
940 	mutex_exit(&logdmux_peerq_lock);
941 
942 	return (0);
943 }
944 
945 /*
946  * This routine does I_LINK operation.
947  */
948 static void
949 logdmuxlink(queue_t *q, mblk_t *mp)
950 {
951 	struct tmx	*tmxp = q->q_ptr;
952 	struct linkblk	*lp = (struct linkblk *)mp->b_cont->b_rptr;
953 
954 	/*
955 	 * Fail if we're already linked.
956 	 */
957 	if (tmxp->muxq != NULL) {
958 		miocnak(q, mp, 0, EINVAL);
959 		return;
960 	}
961 
962 	tmxp->muxq = lp->l_qbot;
963 	tmxp->muxq->q_ptr = tmxp;
964 	RD(tmxp->muxq)->q_ptr = tmxp;
965 
966 	miocack(q, mp, 0, 0);
967 }
968 
969 /*
970  * logdmuxunlink() is called from logdmuxuwput() and is the first of two
971  * functions which process an I_UNLINK ioctl. logdmuxunlink() will determine
972  * the state of logindmux peer linkage and, based on this, control when the
973  * second function, logdmux_finish_unlink(), is called.  It's
974  * logdmux_finish_unlink() that's sending the M_IOCACK upstream and
975  * resetting the link state.
976  */
977 static void
978 logdmuxunlink(queue_t *q, mblk_t *mp)
979 {
980 	struct tmx	*tmxp = q->q_ptr;
981 	unlinkinfo_t	*unlinkinfop;
982 
983 	/*
984 	 * If we don't have a peer, just unlink.  Note that this check needs
985 	 * to be done under logdmux_qexch_lock to prevent racing with
986 	 * LOGDMX_IOC_QEXCHANGE, and we *must* set muxq to NULL prior to
987 	 * releasing the lock so that LOGDMX_IOC_QEXCHANGE will not consider
988 	 * us as a possible peer anymore (if it already considers us to be a
989 	 * peer, then unlinkinfop will not be NULL) -- NULLing muxq precludes
990 	 * use of logdmux_finish_unlink() here.
991 	 */
992 	mutex_enter(&logdmux_qexch_lock);
993 	unlinkinfop = tmxp->unlinkinfop;
994 	if (unlinkinfop == NULL) {
995 		ASSERT(tmxp->peerq == NULL);
996 		tmxp->muxq = NULL;
997 		mutex_exit(&logdmux_qexch_lock);
998 		miocack(q, mp, 0, 0);
999 		return;
1000 	}
1001 	mutex_exit(&logdmux_qexch_lock);
1002 
1003 	mutex_enter(&unlinkinfop->state_lock);
1004 
1005 	switch (unlinkinfop->state) {
1006 
1007 	case LOGDMUX_LINKED:
1008 		/*
1009 		 * We're the first instance to process an I_UNLINK --
1010 		 * ie, the peer instance is still there. We'll change
1011 		 * the state so that only one instance is executing an
1012 		 * I_UNLINK at any one time.
1013 		 */
1014 		unlinkinfop->state = LOGDMUX_UNLINK_PENDING;
1015 		mutex_exit(&unlinkinfop->state_lock);
1016 		/*
1017 		 * Attach the original M_IOCTL message to a
1018 		 * LOGDMUX_UNLINK_REQ message and send it to our peer to
1019 		 * tell it to unlink from us. When it has completed the
1020 		 * task, it will send us a LOGDMUX_UNLINK_RESP message
1021 		 * with the original M_IOCTL still attached, which will be
1022 		 * processed in our logdmuxlrput(). At that point, we will
1023 		 * call logdmux_finish_unlink() to complete the unlink
1024 		 * operation using the attached M_IOCTL.
1025 		 */
1026 		unlinkinfop->prot_mp->b_cont = mp;
1027 		/*
1028 		 * Put the M_CTL directly to the peer's lower RQ.
1029 		 */
1030 		put(RD(tmxp->peerq), unlinkinfop->prot_mp);
1031 		break;
1032 
1033 	case LOGDMUX_UNLINK_PENDING:
1034 		mutex_exit(&unlinkinfop->state_lock);
1035 		/*
1036 		 * Our peer is actively processing an I_UNLINK itself.
1037 		 * We have to wait for the peer to complete and we use
1038 		 * qtimeout as a way to poll for its completion.
1039 		 * We save a reference to our mblk so that we can send
1040 		 * it upstream once our peer is done.
1041 		 */
1042 		tmxp->unlink_mp = mp;
1043 		tmxp->utimoutid = qtimeout(q, logdmux_unlink_timer, q,
1044 		    drv_usectohz(LOGDMUX_POLL_WAIT));
1045 		break;
1046 
1047 	case LOGDMUX_UNLINKED:
1048 		/*
1049 		 * Our peer is no longer linked so we can proceed.
1050 		 */
1051 		mutex_exit(&unlinkinfop->state_lock);
1052 		mutex_destroy(&unlinkinfop->state_lock);
1053 		freeb(unlinkinfop->prot_mp);
1054 		kmem_free(unlinkinfop, sizeof (unlinkinfo_t));
1055 		logdmux_finish_unlink(q, mp);
1056 		break;
1057 
1058 	default:
1059 		mutex_exit(&unlinkinfop->state_lock);
1060 		cmn_err(CE_PANIC,
1061 		    "logdmuxunlink: peer linkage is in an unrecognized state");
1062 		break;
1063 	}
1064 }
1065 
1066 /*
1067  * Finish the unlink operation.  Note that no locks should be held since
1068  * this routine calls into other queues.
1069  */
1070 static void
1071 logdmux_finish_unlink(queue_t *q, mblk_t *unlink_mp)
1072 {
1073 	struct tmx *tmxp = q->q_ptr;
1074 	mblk_t *mp;
1075 
1076 	/*
1077 	 * Flush any write side data downstream.
1078 	 */
1079 	while ((mp = getq(WR(q))) != NULL)
1080 		putnext(tmxp->muxq, mp);
1081 
1082 	/*
1083 	 * Note that we do not NULL out q_ptr since another thread (e.g., a
1084 	 * STREAMS service thread) might call logdmuxlrput() between the time
1085 	 * we exit the logindmux perimeter and the time the STREAMS framework
1086 	 * resets q_ptr to stdata (since muxq is set to NULL, any messages
1087 	 * will just be discarded).
1088 	 */
1089 	tmxp->muxq = NULL;
1090 	tmxp->unlinkinfop = NULL;
1091 	tmxp->peerq = NULL;
1092 	miocack(q, unlink_mp, 0, 0);
1093 }
1094 
1095 /*
1096  * logdmux_unlink_timer() is executed by qtimeout(). This function will
1097  * check unlinkinfop->state to determine whether the peer has completed
1098  * its I_UNLINK. If it hasn't, we use qtimeout() to initiate another poll.
1099  */
1100 static void
1101 logdmux_unlink_timer(void *arg)
1102 {
1103 	queue_t		*q = arg;
1104 	struct	tmx	*tmxp = q->q_ptr;
1105 	unlinkinfo_t	*unlinkinfop = tmxp->unlinkinfop;
1106 
1107 	tmxp->utimoutid = 0;
1108 
1109 	mutex_enter(&unlinkinfop->state_lock);
1110 
1111 	if (unlinkinfop->state != LOGDMUX_UNLINKED) {
1112 		ASSERT(unlinkinfop->state == LOGDMUX_UNLINK_PENDING);
1113 		mutex_exit(&unlinkinfop->state_lock);
1114 		/*
1115 		 * We need to wait longer for our peer to complete.
1116 		 */
1117 		tmxp->utimoutid = qtimeout(q, logdmux_unlink_timer, q,
1118 		    drv_usectohz(LOGDMUX_POLL_WAIT));
1119 	} else {
1120 		/*
1121 		 * Our peer is no longer linked so we can proceed with
1122 		 * the cleanup.
1123 		 */
1124 		mutex_exit(&unlinkinfop->state_lock);
1125 		mutex_destroy(&unlinkinfop->state_lock);
1126 		freeb(unlinkinfop->prot_mp);
1127 		kmem_free(unlinkinfop, sizeof (unlinkinfo_t));
1128 		logdmux_finish_unlink(q, tmxp->unlink_mp);
1129 	}
1130 }
1131 
1132 static void
1133 logdmux_timer(void *arg)
1134 {
1135 	queue_t		*q = arg;
1136 	struct tmx	*tmxp = q->q_ptr;
1137 
1138 	ASSERT(tmxp != NULL);
1139 
1140 	if (q->q_flag & QREADR) {
1141 		ASSERT(tmxp->rtimoutid != 0);
1142 		tmxp->rtimoutid = 0;
1143 	} else {
1144 		ASSERT(tmxp->wtimoutid != 0);
1145 		tmxp->wtimoutid = 0;
1146 	}
1147 	enableok(q);
1148 	qenable(q);
1149 }
1150 
1151 static void
1152 logdmux_buffer(void *arg)
1153 {
1154 	queue_t		*q = arg;
1155 	struct tmx	*tmxp = q->q_ptr;
1156 
1157 	ASSERT(tmxp != NULL);
1158 
1159 	if (q->q_flag & QREADR) {
1160 		ASSERT(tmxp->rbufcid != 0);
1161 		tmxp->rbufcid = 0;
1162 	} else {
1163 		ASSERT(tmxp->wbufcid != 0);
1164 		tmxp->wbufcid = 0;
1165 	}
1166 	enableok(q);
1167 	qenable(q);
1168 }
1169 
1170 static void
1171 recover(queue_t *q, mblk_t *mp, size_t size)
1172 {
1173 	timeout_id_t	tid;
1174 	bufcall_id_t	bid;
1175 	struct	tmx	*tmxp = q->q_ptr;
1176 
1177 	/*
1178 	 * Avoid re-enabling the queue.
1179 	 */
1180 	ASSERT(queclass(mp) < QPCTL);
1181 	ASSERT(WR(q)->q_next == NULL); /* Called from upper queue only */
1182 	noenable(q);
1183 	(void) putbq(q, mp);
1184 
1185 	/*
1186 	 * Make sure there is at most one outstanding request per queue.
1187 	 */
1188 	if (q->q_flag & QREADR) {
1189 		if (tmxp->rtimoutid != 0 || tmxp->rbufcid != 0)
1190 			return;
1191 	} else {
1192 		if (tmxp->wtimoutid != 0 || tmxp->wbufcid != 0)
1193 			return;
1194 	}
1195 	if (!(bid = qbufcall(RD(q), size, BPRI_MED, logdmux_buffer, q))) {
1196 		tid = qtimeout(RD(q), logdmux_timer, q, drv_usectohz(SIMWAIT));
1197 		if (q->q_flag & QREADR)
1198 			tmxp->rtimoutid = tid;
1199 		else
1200 			tmxp->wtimoutid = tid;
1201 	} else	{
1202 		if (q->q_flag & QREADR)
1203 			tmxp->rbufcid = bid;
1204 		else
1205 			tmxp->wbufcid = bid;
1206 	}
1207 }
1208 
1209 static void
1210 flushq_dataonly(queue_t *q)
1211 {
1212 	mblk_t *mp, *nmp;
1213 
1214 	/*
1215 	 * Since we are already in the perimeter, and we are not a put-shared
1216 	 * perimeter, we don't need to freeze the stream or anything to
1217 	 * be ensured of exclusivity.
1218 	 */
1219 	mp = q->q_first;
1220 	while (mp != NULL) {
1221 		if (mp->b_datap->db_type == M_DATA) {
1222 			nmp = mp->b_next;
1223 			rmvq(q, mp);
1224 			freemsg(mp);
1225 			mp = nmp;
1226 		} else {
1227 			mp = mp->b_next;
1228 		}
1229 	}
1230 }
1231 
1232 /*
1233  * logdmux_alloc_unlinkinfo() is called from logdmuxuwput() during the
1234  * processing of a LOGDMX_IOC_QEXCHANGE ioctl() to allocate the
1235  * unlinkinfo_t which is needed during the processing of an I_UNLINK.
1236  */
1237 static int
1238 logdmux_alloc_unlinkinfo(struct tmx *t0, struct tmx *t1)
1239 {
1240 	unlinkinfo_t	*p;
1241 	uint_t		*messagep;
1242 
1243 	if ((p = kmem_zalloc(sizeof (unlinkinfo_t), KM_NOSLEEP)) == NULL)
1244 		return (ENOSR);
1245 
1246 	if ((p->prot_mp = allocb(sizeof (uint_t), BPRI_MED)) == NULL) {
1247 		kmem_free(p, sizeof (unlinkinfo_t));
1248 		return (ENOSR);
1249 	}
1250 
1251 	DB_TYPE(p->prot_mp) = M_CTL;
1252 	messagep = (uint_t *)p->prot_mp->b_wptr;
1253 	*messagep = LOGDMUX_UNLINK_REQ;
1254 	p->prot_mp->b_wptr += sizeof (*messagep);
1255 	p->state = LOGDMUX_LINKED;
1256 	mutex_init(&p->state_lock, NULL, MUTEX_DRIVER, NULL);
1257 
1258 	t0->unlinkinfop = t1->unlinkinfop = p;
1259 
1260 	return (0);
1261 }
1262