xref: /illumos-gate/usr/src/uts/common/io/logindmux.c (revision 24da5b34f49324ed742a340010ed5bd3d4e06625)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Description: logindmux.c
31  *
32  * The logindmux driver is used with login modules (like telmod/rlmod).
33  * This is a 1x1 cloning mux and two of these muxes are used. The lower link
34  * of one of the muxes receives input from net and the lower link of the
35  * other mux receives input from pseudo terminal subsystem.
36  *
37  * The logdmux_qexch_lock mutex manages the race between LOGDMX_IOC_QEXCHANGE,
38  * logdmuxunlink() and logdmuxclose(), so that the instance selected as a peer
39  * in LOGDMX_IOC_QEXCHANGE cannot be unlinked or closed until the qexchange
40  * is complete; see the inline comments in the code for details.
41  *
42  * The logdmux_peerq_lock mutex manages the race between logdmuxlwsrv() and
43  * logdmuxlrput() (when null'ing tmxp->peerq during LOGDMUX_UNLINK_REQ
44  * processing).
45  *
46  * The logdmux_minor_lock mutex serializes the growth of logdmux_minor_arena
47  * (the arena is grown gradually rather than allocated all at once so that
48  * minor numbers are recycled sooner; for simplicity it is never shrunk).
49  *
50  * The unlink operation is implemented using protocol messages that flow
51  * between the two logindmux peer instances. The instance processing the
52  * I_UNLINK ioctl will send a LOGDMUX_UNLINK_REQ protocol message to its
53  * peer to indicate that it wishes to unlink; the peer will process this
54  * message in its lrput, null its tmxp->peerq and then send a
55  * LOGDMUX_UNLINK_RESP protocol message in reply to indicate that the
56  * unlink can proceed; having received the reply in its lrput, the
57  * instance processing the I_UNLINK can then continue. To ensure that only
58  * one of the peer instances will be actively processing an I_UNLINK at
59  * any one time, a single structure (an unlinkinfo_t containing a mutex,
60  * state variable and pointer to an M_CTL mblk) is allocated during
61  * the processing of the LOGDMX_IOC_QEXCHANGE ioctl. The two instances, if
62  * trying to unlink simultaneously, will race to get control of this
63  * structure which contains the resources necessary to process the
64  * I_UNLINK. The instance that wins this race will be able to continue
65  * with the unlink whilst the other instance will be obliged to wait.
66  */
67 
68 #include <sys/types.h>
69 #include <sys/param.h>
70 #include <sys/errno.h>
71 #include <sys/debug.h>
72 #include <sys/stropts.h>
73 #include <sys/stream.h>
74 #include <sys/logindmux.h>
75 #include <sys/logindmux_impl.h>
76 #include <sys/stat.h>
77 #include <sys/kmem.h>
78 #include <sys/vmem.h>
79 #include <sys/strsun.h>
80 #include <sys/sysmacros.h>
81 #include <sys/mkdev.h>
82 #include <sys/ddi.h>
83 #include <sys/sunddi.h>
84 #include <sys/modctl.h>
85 #include <sys/termios.h>
86 #include <sys/cmn_err.h>
87 
88 static int logdmuxopen(queue_t *, dev_t *, int, int, cred_t *);
89 static int logdmuxclose(queue_t *, int, cred_t *);
90 static int logdmuxursrv(queue_t *);
91 static int logdmuxuwput(queue_t *, mblk_t *);
92 static int logdmuxlrput(queue_t *, mblk_t *);
93 static int logdmuxlrsrv(queue_t *);
94 static int logdmuxlwsrv(queue_t *);
95 static int logdmuxuwsrv(queue_t *);
96 static int logdmux_alloc_unlinkinfo(struct tmx *, struct tmx *);
97 
98 static void logdmuxlink(queue_t *, mblk_t *);
99 static void logdmuxunlink(queue_t *, mblk_t *);
100 static void logdmux_finish_unlink(queue_t *, mblk_t *);
101 static void logdmux_unlink_timer(void *arg);
102 static void recover(queue_t *, mblk_t *, size_t);
103 static void flushq_dataonly(queue_t *);
104 
105 static kmutex_t logdmux_qexch_lock;
106 static kmutex_t logdmux_peerq_lock;
107 static kmutex_t logdmux_minor_lock;
108 static minor_t	logdmux_maxminor = 256;	/* grown as necessary */
109 static vmem_t	*logdmux_minor_arena;
110 static void	*logdmux_statep;
111 
112 static struct module_info logdmuxm_info = {
113 	LOGDMX_ID,
114 	"logindmux",
115 	0,
116 	256,
117 	512,
118 	256
119 };
120 
121 static struct qinit logdmuxurinit = {
122 	NULL,
123 	logdmuxursrv,
124 	logdmuxopen,
125 	logdmuxclose,
126 	NULL,
127 	&logdmuxm_info
128 };
129 
130 static struct qinit logdmuxuwinit = {
131 	logdmuxuwput,
132 	logdmuxuwsrv,
133 	NULL,
134 	NULL,
135 	NULL,
136 	&logdmuxm_info
137 };
138 
139 static struct qinit logdmuxlrinit = {
140 	logdmuxlrput,
141 	logdmuxlrsrv,
142 	NULL,
143 	NULL,
144 	NULL,
145 	&logdmuxm_info
146 };
147 
148 static struct qinit logdmuxlwinit = {
149 	NULL,
150 	logdmuxlwsrv,
151 	NULL,
152 	NULL,
153 	NULL,
154 	&logdmuxm_info
155 };
156 
157 struct streamtab logdmuxinfo = {
158 	&logdmuxurinit,
159 	&logdmuxuwinit,
160 	&logdmuxlrinit,
161 	&logdmuxlwinit
162 };
163 
164 static int logdmux_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
165 static int logdmux_attach(dev_info_t *, ddi_attach_cmd_t);
166 static int logdmux_detach(dev_info_t *, ddi_detach_cmd_t);
167 static dev_info_t *logdmux_dip;
168 
169 DDI_DEFINE_STREAM_OPS(logdmux_ops, nulldev, nulldev, logdmux_attach,
170     logdmux_detach, nulldev, logdmux_info, D_MP | D_MTPERQ, &logdmuxinfo);
171 
172 static struct modldrv modldrv = {
173 	&mod_driverops,
174 	"logindmux driver %I%",
175 	&logdmux_ops
176 };
177 
178 static struct modlinkage modlinkage = {
179 	MODREV_1, &modldrv, NULL
180 };
181 
182 int
183 _init(void)
184 {
185 	int	ret;
186 
187 	mutex_init(&logdmux_peerq_lock, NULL, MUTEX_DRIVER, NULL);
188 	mutex_init(&logdmux_qexch_lock, NULL, MUTEX_DRIVER, NULL);
189 
190 	if ((ret = mod_install(&modlinkage)) != 0) {
191 		mutex_destroy(&logdmux_peerq_lock);
192 		mutex_destroy(&logdmux_qexch_lock);
193 		return (ret);
194 	}
195 
196 	logdmux_minor_arena = vmem_create("logdmux_minor", (void *)1,
197 	    logdmux_maxminor, 1, NULL, NULL, NULL, 0,
198 	    VM_SLEEP | VMC_IDENTIFIER);
199 	(void) ddi_soft_state_init(&logdmux_statep, sizeof (struct tmx), 1);
200 
201 	return (0);
202 }
203 
204 int
205 _fini(void)
206 {
207 	int	ret;
208 
209 	if ((ret = mod_remove(&modlinkage)) == 0) {
210 		mutex_destroy(&logdmux_peerq_lock);
211 		mutex_destroy(&logdmux_qexch_lock);
212 		ddi_soft_state_fini(&logdmux_statep);
213 		vmem_destroy(logdmux_minor_arena);
214 		logdmux_minor_arena = NULL;
215 	}
216 
217 	return (ret);
218 }
219 
220 int
221 _info(struct modinfo *modinfop)
222 {
223 	return (mod_info(&modlinkage, modinfop));
224 }
225 
226 static int
227 logdmux_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
228 {
229 	if (cmd != DDI_ATTACH)
230 		return (DDI_FAILURE);
231 
232 	if (ddi_create_minor_node(devi, "logindmux", S_IFCHR, 0, DDI_PSEUDO,
233 	    CLONE_DEV) == DDI_FAILURE)
234 		return (DDI_FAILURE);
235 
236 	logdmux_dip = devi;
237 	return (DDI_SUCCESS);
238 }
239 
240 static int
241 logdmux_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
242 {
243 	if (cmd != DDI_DETACH)
244 		return (DDI_FAILURE);
245 
246 	ddi_remove_minor_node(devi, NULL);
247 	return (DDI_SUCCESS);
248 }
249 
250 /* ARGSUSED */
251 static int
252 logdmux_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
253 {
254 	int error;
255 
256 	switch (infocmd) {
257 	case DDI_INFO_DEVT2DEVINFO:
258 		if (logdmux_dip == NULL) {
259 			error = DDI_FAILURE;
260 		} else {
261 			*result = logdmux_dip;
262 			error = DDI_SUCCESS;
263 		}
264 		break;
265 	case DDI_INFO_DEVT2INSTANCE:
266 		*result = (void *)0;
267 		error = DDI_SUCCESS;
268 		break;
269 	default:
270 		error = DDI_FAILURE;
271 	}
272 	return (error);
273 }
274 
275 /*
276  * Logindmux open routine
277  */
278 /*ARGSUSED*/
279 static int
280 logdmuxopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
281 {
282 	struct	tmx *tmxp;
283 	minor_t	minor, omaxminor;
284 
285 	if (sflag != CLONEOPEN)
286 		return (EINVAL);
287 
288 	mutex_enter(&logdmux_minor_lock);
289 	if (vmem_size(logdmux_minor_arena, VMEM_FREE) == 0) {
290 		/*
291 		 * The arena has been exhausted; grow by powers of two
292 		 * up to MAXMIN; bail if we've run out of minors.
293 		 */
294 		if (logdmux_maxminor == MAXMIN) {
295 			mutex_exit(&logdmux_minor_lock);
296 			return (ENOMEM);
297 		}
298 
299 		omaxminor = logdmux_maxminor;
300 		logdmux_maxminor = MIN(logdmux_maxminor << 1, MAXMIN);
301 
302 		(void) vmem_add(logdmux_minor_arena,
303 		    (void *)(uintptr_t)(omaxminor + 1),
304 		    logdmux_maxminor - omaxminor, VM_SLEEP);
305 	}
306 	minor = (minor_t)(uintptr_t)
307 	    vmem_alloc(logdmux_minor_arena, 1, VM_SLEEP);
308 	mutex_exit(&logdmux_minor_lock);
309 
310 	if (ddi_soft_state_zalloc(logdmux_statep, minor) == DDI_FAILURE) {
311 		vmem_free(logdmux_minor_arena, (void *)(uintptr_t)minor, 1);
312 		return (ENOMEM);
313 	}
314 
315 	tmxp = ddi_get_soft_state(logdmux_statep, minor);
316 	tmxp->rdq = q;
317 	tmxp->muxq = NULL;
318 	tmxp->peerq = NULL;
319 	tmxp->unlinkinfop = NULL;
320 	tmxp->dev0 = minor;
321 
322 	*devp = makedevice(getmajor(*devp), tmxp->dev0);
323 	q->q_ptr = tmxp;
324 	WR(q)->q_ptr = tmxp;
325 
326 	qprocson(q);
327 	return (0);
328 }
329 
330 /*
331  * Logindmux close routine gets called when telnet connection is closed
332  */
333 /*ARGSUSED*/
334 static int
335 logdmuxclose(queue_t *q, int flag, cred_t *crp)
336 {
337 	struct tmx	*tmxp = q->q_ptr;
338 	minor_t		minor = tmxp->dev0;
339 
340 	ASSERT(tmxp->muxq == NULL);
341 	ASSERT(tmxp->peerq == NULL);
342 
343 	qprocsoff(q);
344 	if (tmxp->wbufcid != 0) {
345 		qunbufcall(q, tmxp->wbufcid);
346 		tmxp->wbufcid = 0;
347 	}
348 	if (tmxp->rbufcid != 0) {
349 		qunbufcall(q, tmxp->rbufcid);
350 		tmxp->rbufcid = 0;
351 	}
352 	if (tmxp->rtimoutid != 0) {
353 		(void) quntimeout(q, tmxp->rtimoutid);
354 		tmxp->rtimoutid = 0;
355 	}
356 	if (tmxp->wtimoutid != 0) {
357 		(void) quntimeout(q, tmxp->wtimoutid);
358 		tmxp->wtimoutid = 0;
359 	}
360 	if (tmxp->utimoutid != 0) {
361 		(void) quntimeout(q, tmxp->utimoutid);
362 		tmxp->utimoutid = 0;
363 	}
364 
365 	/*
366 	 * Hold logdmux_qexch_lock to prevent another thread that might be
367 	 * in LOGDMX_IOC_QEXCHANGE from looking up our state while we're
368 	 * disposing of it.
369 	 */
370 	mutex_enter(&logdmux_qexch_lock);
371 	ddi_soft_state_free(logdmux_statep, minor);
372 	vmem_free(logdmux_minor_arena, (void *)(uintptr_t)minor, 1);
373 	mutex_exit(&logdmux_qexch_lock);
374 
375 	q->q_ptr = NULL;
376 	WR(q)->q_ptr = NULL;
377 
378 	return (0);
379 }
380 
381 /*
382  * Upper read service routine
383  */
384 static int
385 logdmuxursrv(queue_t *q)
386 {
387 	struct tmx *tmxp = q->q_ptr;
388 
389 	if (tmxp->muxq != NULL)
390 		qenable(RD(tmxp->muxq));
391 	return (0);
392 }
393 
394 /*
395  * This routine gets called when telnet daemon sends data or ioctl messages
396  * to upper mux queue.
397  */
398 static int
399 logdmuxuwput(queue_t *q, mblk_t *mp)
400 {
401 	queue_t		*qp;
402 	mblk_t		*newmp;
403 	struct iocblk	*ioc;
404 	minor_t		minor;
405 	STRUCT_HANDLE(protocol_arg, protoh);
406 	struct tmx	*tmxp, *tmxpeerp;
407 	int		error;
408 
409 	tmxp = q->q_ptr;
410 
411 	switch (mp->b_datap->db_type) {
412 
413 	case M_IOCTL:
414 		ASSERT(MBLKL(mp) == sizeof (struct iocblk));
415 
416 		ioc = (struct iocblk *)mp->b_rptr;
417 		switch (ioc->ioc_cmd) {
418 		/*
419 		 * This is a special ioctl which exchanges q info
420 		 * of the two peers, connected to netf and ptmx.
421 		 */
422 		case LOGDMX_IOC_QEXCHANGE:
423 			error = miocpullup(mp,
424 			    SIZEOF_STRUCT(protocol_arg, ioc->ioc_flag));
425 			if (error != 0) {
426 				miocnak(q, mp, 0, error);
427 				break;
428 			}
429 			STRUCT_SET_HANDLE(protoh, ioc->ioc_flag,
430 			    (struct protocol_arg *)mp->b_cont->b_rptr);
431 #ifdef _SYSCALL32_IMPL
432 			if ((ioc->ioc_flag & DATAMODEL_MASK) ==
433 			    DATAMODEL_ILP32) {
434 				minor = getminor(expldev(
435 				    STRUCT_FGET(protoh, dev)));
436 			} else
437 #endif
438 			{
439 				minor = getminor(STRUCT_FGET(protoh, dev));
440 			}
441 
442 			/*
443 			 * The second argument to ddi_get_soft_state() is
444 			 * interpreted as an `int', so prohibit negative
445 			 * values.
446 			 */
447 			if ((int)minor < 0) {
448 				miocnak(q, mp, 0, EINVAL);
449 				break;
450 			}
451 
452 			/*
453 			 * We must hold logdmux_qexch_lock while looking up
454 			 * the proposed peer to prevent another thread from
455 			 * simultaneously I_UNLINKing or closing it.
456 			 */
457 			mutex_enter(&logdmux_qexch_lock);
458 
459 			/*
460 			 * For LOGDMX_IOC_QEXCHANGE to succeed, our peer must
461 			 * exist (and not be us), and both we and our peer
462 			 * must be I_LINKed (i.e., muxq must not be NULL) and
463 			 * not already have a peer.
464 			 */
465 			tmxpeerp = ddi_get_soft_state(logdmux_statep, minor);
466 			if (tmxpeerp == NULL || tmxpeerp == tmxp ||
467 			    tmxpeerp->muxq == NULL || tmxpeerp->peerq != NULL ||
468 			    tmxp->muxq == NULL || tmxp->peerq != NULL) {
469 				mutex_exit(&logdmux_qexch_lock);
470 				miocnak(q, mp, 0, EINVAL);
471 				break;
472 			}
473 
474 			/*
475 			 * If `flag' is set then exchange queues and assume
476 			 * tmxp refers to the ptmx stream.
477 			 */
478 			if (STRUCT_FGET(protoh, flag)) {
479 				/*
480 				 * Allocate and populate the structure we
481 				 * need when processing an I_UNLINK ioctl.
482 				 * Give both logindmux instances a pointer
483 				 * to it from their tmx structure.
484 				 */
485 				if ((error = logdmux_alloc_unlinkinfo(
486 				    tmxp, tmxpeerp)) != 0) {
487 					mutex_exit(&logdmux_qexch_lock);
488 					miocnak(q, mp, 0, error);
489 					break;
490 				}
491 				tmxp->peerq = tmxpeerp->muxq;
492 				tmxpeerp->peerq = tmxp->muxq;
493 				tmxp->isptm = B_TRUE;
494 			}
495 			mutex_exit(&logdmux_qexch_lock);
496 			miocack(q, mp, 0, 0);
497 			break;
498 
499 		case I_LINK:
500 			ASSERT(MBLKL(mp->b_cont) == sizeof (struct linkblk));
501 			logdmuxlink(q, mp);
502 			break;
503 
504 		case I_UNLINK:
505 			ASSERT(MBLKL(mp->b_cont) == sizeof (struct linkblk));
506 			logdmuxunlink(q, mp);
507 			break;
508 
509 		default:
510 			if (tmxp->muxq == NULL) {
511 				miocnak(q, mp, 0, EINVAL);
512 				return (0);
513 			}
514 			putnext(tmxp->muxq, mp);
515 			break;
516 		}
517 
518 		break;
519 
520 	case M_DATA:
521 		if (!tmxp->isptm) {
522 			if ((newmp = allocb(sizeof (char), BPRI_MED)) == NULL) {
523 				recover(q, mp, sizeof (char));
524 				return (0);
525 			}
526 			newmp->b_datap->db_type = M_CTL;
527 			*newmp->b_wptr++ = M_CTL_MAGIC_NUMBER;
528 			newmp->b_cont = mp;
529 			mp = newmp;
530 		}
531 		/* FALLTHRU */
532 
533 	case M_PROTO:
534 	case M_PCPROTO:
535 		qp = tmxp->muxq;
536 		if (qp == NULL) {
537 			merror(q, mp, EINVAL);
538 			return (0);
539 		}
540 
541 		if (queclass(mp) < QPCTL) {
542 			if (q->q_first != NULL || !canputnext(qp)) {
543 				(void) putq(q, mp);
544 				return (0);
545 			}
546 		}
547 		putnext(qp, mp);
548 		break;
549 
550 	case M_FLUSH:
551 		if (*mp->b_rptr & FLUSHW)
552 			flushq(q, FLUSHALL);
553 
554 		if (tmxp->muxq != NULL) {
555 			putnext(tmxp->muxq, mp);
556 			return (0);
557 		}
558 
559 		*mp->b_rptr &= ~FLUSHW;
560 		if (*mp->b_rptr & FLUSHR)
561 			qreply(q, mp);
562 		else
563 			freemsg(mp);
564 		break;
565 
566 	default:
567 		cmn_err(CE_NOTE, "logdmuxuwput: received unexpected message"
568 		    " of type 0x%x", mp->b_datap->db_type);
569 		freemsg(mp);
570 	}
571 	return (0);
572 }
573 
574 /*
575  * Upper write service routine
576  */
577 static int
578 logdmuxuwsrv(queue_t *q)
579 {
580 	mblk_t		*mp, *newmp;
581 	queue_t		*qp;
582 	struct tmx	*tmxp = q->q_ptr;
583 
584 	while ((mp = getq(q)) != NULL) {
585 		switch (mp->b_datap->db_type) {
586 		case M_DATA:
587 			if (!tmxp->isptm) {
588 				if ((newmp = allocb(sizeof (char), BPRI_MED)) ==
589 				    NULL) {
590 					recover(q, mp, sizeof (char));
591 					return (0);
592 				}
593 				newmp->b_datap->db_type = M_CTL;
594 				*newmp->b_wptr++ = M_CTL_MAGIC_NUMBER;
595 				newmp->b_cont = mp;
596 				mp = newmp;
597 			}
598 			/* FALLTHRU */
599 
600 		case M_CTL:
601 		case M_PROTO:
602 			if (tmxp->muxq == NULL) {
603 				merror(q, mp, EIO);
604 				break;
605 			}
606 			qp = tmxp->muxq;
607 			if (!canputnext(qp)) {
608 				(void) putbq(q, mp);
609 				return (0);
610 			}
611 			putnext(qp, mp);
612 			break;
613 
614 
615 		default:
616 			cmn_err(CE_NOTE, "logdmuxuwsrv: received unexpected"
617 			    " message of type 0x%x", mp->b_datap->db_type);
618 			freemsg(mp);
619 		}
620 	}
621 	return (0);
622 }
623 
624 /*
625  * Logindmux lower put routine detects from which of the two lower queues
626  * the data needs to be read from and writes it out to its peer queue.
627  * For protocol, it detects M_CTL and sends its data to the daemon. Also,
628  * for ioctl and other types of messages, it lets the daemon handle it.
629  */
630 static int
631 logdmuxlrput(queue_t *q, mblk_t *mp)
632 {
633 	mblk_t		*savemp;
634 	queue_t 	*qp;
635 	struct iocblk	*ioc;
636 	struct tmx	*tmxp = q->q_ptr;
637 	uchar_t		flush;
638 	uint_t		*messagep;
639 	unlinkinfo_t	*unlinkinfop = tmxp->unlinkinfop;
640 
641 	if (tmxp->muxq == NULL || tmxp->peerq == NULL) {
642 		freemsg(mp);
643 		return (0);
644 	}
645 
646 	/*
647 	 * If there's already a message on our queue and the incoming
648 	 * message is not of a high-priority, enqueue the message --
649 	 * but not if it's a logindmux protocol message.
650 	 */
651 	if ((q->q_first != NULL) && (queclass(mp) < QPCTL) &&
652 	    (!LOGDMUX_PROTO_MBLK(mp))) {
653 		(void) putq(q, mp);
654 		return (0);
655 	}
656 
657 	switch (mp->b_datap->db_type) {
658 
659 	case M_IOCTL:
660 		ioc = (struct iocblk *)mp->b_rptr;
661 		switch (ioc->ioc_cmd) {
662 
663 		case TIOCSWINSZ:
664 		case TCSETAF:
665 		case TCSETSF:
666 		case TCSETA:
667 		case TCSETAW:
668 		case TCSETS:
669 		case TCSETSW:
670 		case TCSBRK:
671 		case TIOCSTI:
672 			qp = tmxp->peerq;
673 			break;
674 
675 		default:
676 			cmn_err(CE_NOTE, "logdmuxlrput: received unexpected"
677 			    " request for ioctl 0x%x", ioc->ioc_cmd);
678 
679 			/* NAK unrecognized ioctl's. */
680 			miocnak(q, mp, 0, 0);
681 			return (0);
682 		}
683 		break;
684 
685 	case M_DATA:
686 	case M_HANGUP:
687 		qp = tmxp->peerq;
688 		break;
689 
690 	case M_CTL:
691 		/*
692 		 * The protocol messages that flow between the peers
693 		 * to implement the unlink functionality are M_CTLs
694 		 * which have the M_IOCTL/I_UNLINK mblk of the ioctl
695 		 * attached via b_cont.  LOGDMUX_PROTO_MBLK() uses
696 		 * this to determine whether a particular M_CTL is a
697 		 * peer protocol message.
698 		 */
699 		if (LOGDMUX_PROTO_MBLK(mp)) {
700 			messagep = (uint_t *)mp->b_rptr;
701 
702 			switch (*messagep) {
703 
704 			case LOGDMUX_UNLINK_REQ:
705 				/*
706 				 * We've received a message from our
707 				 * peer indicating that it wants to
708 				 * unlink.
709 				 */
710 				*messagep = LOGDMUX_UNLINK_RESP;
711 				qp = tmxp->peerq;
712 
713 				mutex_enter(&logdmux_peerq_lock);
714 				tmxp->peerq = NULL;
715 				mutex_exit(&logdmux_peerq_lock);
716 
717 				put(RD(qp), mp);
718 				return (0);
719 
720 			case LOGDMUX_UNLINK_RESP:
721 				/*
722 				 * We've received a positive response
723 				 * from our peer to an earlier
724 				 * LOGDMUX_UNLINK_REQ that we sent.
725 				 * We can now carry on with the unlink.
726 				 */
727 				qp = tmxp->rdq;
728 				mutex_enter(&unlinkinfop->state_lock);
729 				ASSERT(unlinkinfop->state ==
730 				    LOGDMUX_UNLINK_PENDING);
731 				unlinkinfop->state = LOGDMUX_UNLINKED;
732 				mutex_exit(&unlinkinfop->state_lock);
733 				logdmux_finish_unlink(WR(qp), mp->b_cont);
734 				return (0);
735 			}
736 		}
737 
738 		qp = tmxp->rdq;
739 		if (q->q_first != NULL || !canputnext(qp)) {
740 			(void) putq(q, mp);
741 			return (0);
742 		}
743 		if ((MBLKL(mp) == 1) && (*mp->b_rptr == M_CTL_MAGIC_NUMBER)) {
744 			savemp = mp->b_cont;
745 			freeb(mp);
746 			mp = savemp;
747 		}
748 		putnext(qp, mp);
749 		return (0);
750 
751 	case M_IOCACK:
752 	case M_IOCNAK:
753 	case M_PROTO:
754 	case M_PCPROTO:
755 	case M_PCSIG:
756 	case M_SETOPTS:
757 		qp = tmxp->rdq;
758 		break;
759 
760 	case M_ERROR:
761 		if (tmxp->isptm) {
762 			/*
763 			 * This error is from ptm.  We could tell TCP to
764 			 * shutdown the connection, but it's easier to just
765 			 * wait for the daemon to get SIGCHLD and close from
766 			 * above.
767 			 */
768 			freemsg(mp);
769 			return (0);
770 		}
771 		/*
772 		 * This is from TCP.  Don't really know why we'd
773 		 * get this, but we have a pretty good idea what
774 		 * to do:  Send M_HANGUP to the pty.
775 		 */
776 		mp->b_datap->db_type = M_HANGUP;
777 		mp->b_wptr = mp->b_rptr;
778 		qp = tmxp->peerq;
779 		break;
780 
781 	case M_FLUSH:
782 		if (*mp->b_rptr & FLUSHR)
783 			flushq_dataonly(q);
784 
785 		if (mp->b_flag & MSGMARK) {
786 			/*
787 			 * This M_FLUSH has been marked by the module
788 			 * below as intended for the upper queue,
789 			 * not the peer queue.
790 			 */
791 			qp = tmxp->rdq;
792 			mp->b_flag &= ~MSGMARK;
793 		} else {
794 			/*
795 			 * Wrap this M_FLUSH through the mux.
796 			 * The FLUSHR and FLUSHW bits must be
797 			 * reversed.
798 			 */
799 			qp = tmxp->peerq;
800 			flush = *mp->b_rptr;
801 			*mp->b_rptr &= ~(FLUSHR | FLUSHW);
802 			if (flush & FLUSHW)
803 				*mp->b_rptr |= FLUSHR;
804 			if (flush & FLUSHR)
805 				*mp->b_rptr |= FLUSHW;
806 		}
807 		break;
808 
809 	case M_START:
810 	case M_STOP:
811 	case M_STARTI:
812 	case M_STOPI:
813 		freemsg(mp);
814 		return (0);
815 
816 	default:
817 		cmn_err(CE_NOTE, "logdmuxlrput: received unexpected "
818 		    "message of type 0x%x", mp->b_datap->db_type);
819 		freemsg(mp);
820 		return (0);
821 	}
822 	if (queclass(mp) < QPCTL) {
823 		if (q->q_first != NULL || !canputnext(qp)) {
824 			(void) putq(q, mp);
825 			return (0);
826 		}
827 	}
828 	putnext(qp, mp);
829 	return (0);
830 }
831 
832 /*
833  * Lower read service routine
834  */
835 static int
836 logdmuxlrsrv(queue_t *q)
837 {
838 	mblk_t		*mp, *savemp;
839 	queue_t 	*qp;
840 	struct iocblk	*ioc;
841 	struct tmx	*tmxp = q->q_ptr;
842 
843 	while ((mp = getq(q)) != NULL) {
844 		if (tmxp->muxq == NULL || tmxp->peerq == NULL) {
845 			freemsg(mp);
846 			continue;
847 		}
848 
849 		switch (mp->b_datap->db_type) {
850 
851 		case M_IOCTL:
852 			ioc = (struct iocblk *)mp->b_rptr;
853 
854 			switch (ioc->ioc_cmd) {
855 
856 			case TIOCSWINSZ:
857 			case TCSETAF:
858 			case TCSETSF:
859 			case TCSETA:
860 			case TCSETAW:
861 			case TCSETS:
862 			case TCSETSW:
863 			case TCSBRK:
864 			case TIOCSTI:
865 				qp = tmxp->peerq;
866 				break;
867 
868 			default:
869 				cmn_err(CE_NOTE, "logdmuxlrsrv: received "
870 				    "unexpected request for ioctl 0x%x",
871 				    ioc->ioc_cmd);
872 
873 				/* NAK unrecognized ioctl's. */
874 				miocnak(q, mp, 0, 0);
875 				continue;
876 			}
877 			break;
878 
879 		case M_DATA:
880 		case M_HANGUP:
881 			qp = tmxp->peerq;
882 			break;
883 
884 		case M_CTL:
885 			qp = tmxp->rdq;
886 			if (!canputnext(qp)) {
887 				(void) putbq(q, mp);
888 				return (0);
889 			}
890 			if (MBLKL(mp) == 1 &&
891 			    (*mp->b_rptr == M_CTL_MAGIC_NUMBER)) {
892 				savemp = mp->b_cont;
893 				freeb(mp);
894 				mp = savemp;
895 			}
896 			putnext(qp, mp);
897 			continue;
898 
899 		case M_PROTO:
900 		case M_SETOPTS:
901 			qp = tmxp->rdq;
902 			break;
903 
904 		default:
905 			cmn_err(CE_NOTE, "logdmuxlrsrv: received unexpected "
906 			    "message of type 0x%x", mp->b_datap->db_type);
907 			freemsg(mp);
908 			continue;
909 		}
910 		ASSERT(queclass(mp) < QPCTL);
911 		if (!canputnext(qp)) {
912 			(void) putbq(q, mp);
913 			return (0);
914 		}
915 		putnext(qp, mp);
916 	}
917 	return (0);
918 }
919 
920 /*
921  * Lower side write service procedure.  No messages are ever placed on
922  * the write queue here, this just back-enables all of the upper side
923  * write service procedures.
924  */
925 static int
926 logdmuxlwsrv(queue_t *q)
927 {
928 	struct tmx *tmxp = q->q_ptr;
929 
930 	/*
931 	 * Qenable upper write queue and find out which lower
932 	 * queue needs to be restarted with flow control.
933 	 * Qenable the peer queue so canputnext will
934 	 * succeed on next call to logdmuxlrput.
935 	 */
936 	qenable(WR(tmxp->rdq));
937 
938 	mutex_enter(&logdmux_peerq_lock);
939 	if (tmxp->peerq != NULL)
940 		qenable(RD(tmxp->peerq));
941 	mutex_exit(&logdmux_peerq_lock);
942 
943 	return (0);
944 }
945 
946 /*
947  * This routine does I_LINK operation.
948  */
949 static void
950 logdmuxlink(queue_t *q, mblk_t *mp)
951 {
952 	struct tmx	*tmxp = q->q_ptr;
953 	struct linkblk	*lp = (struct linkblk *)mp->b_cont->b_rptr;
954 
955 	/*
956 	 * Fail if we're already linked.
957 	 */
958 	if (tmxp->muxq != NULL) {
959 		miocnak(q, mp, 0, EINVAL);
960 		return;
961 	}
962 
963 	tmxp->muxq = lp->l_qbot;
964 	tmxp->muxq->q_ptr = tmxp;
965 	RD(tmxp->muxq)->q_ptr = tmxp;
966 
967 	miocack(q, mp, 0, 0);
968 }
969 
970 /*
971  * logdmuxunlink() is called from logdmuxuwput() and is the first of two
972  * functions which process an I_UNLINK ioctl. logdmuxunlink() will determine
973  * the state of logindmux peer linkage and, based on this, control when the
974  * second function, logdmux_finish_unlink(), is called.  It's
975  * logdmux_finish_unlink() that's sending the M_IOCACK upstream and
976  * resetting the link state.
977  */
978 static void
979 logdmuxunlink(queue_t *q, mblk_t *mp)
980 {
981 	struct tmx	*tmxp = q->q_ptr;
982 	unlinkinfo_t	*unlinkinfop;
983 
984 	/*
985 	 * If we don't have a peer, just unlink.  Note that this check needs
986 	 * to be done under logdmux_qexch_lock to prevent racing with
987 	 * LOGDMX_IOC_QEXCHANGE, and we *must* set muxq to NULL prior to
988 	 * releasing the lock so that LOGDMX_IOC_QEXCHANGE will not consider
989 	 * us as a possible peer anymore (if it already considers us to be a
990 	 * peer, then unlinkinfop will not be NULL) -- NULLing muxq precludes
991 	 * use of logdmux_finish_unlink() here.
992 	 */
993 	mutex_enter(&logdmux_qexch_lock);
994 	unlinkinfop = tmxp->unlinkinfop;
995 	if (unlinkinfop == NULL) {
996 		ASSERT(tmxp->peerq == NULL);
997 		tmxp->muxq = NULL;
998 		mutex_exit(&logdmux_qexch_lock);
999 		miocack(q, mp, 0, 0);
1000 		return;
1001 	}
1002 	mutex_exit(&logdmux_qexch_lock);
1003 
1004 	mutex_enter(&unlinkinfop->state_lock);
1005 
1006 	switch (unlinkinfop->state) {
1007 
1008 	case LOGDMUX_LINKED:
1009 		/*
1010 		 * We're the first instance to process an I_UNLINK --
1011 		 * ie, the peer instance is still there. We'll change
1012 		 * the state so that only one instance is executing an
1013 		 * I_UNLINK at any one time.
1014 		 */
1015 		unlinkinfop->state = LOGDMUX_UNLINK_PENDING;
1016 		mutex_exit(&unlinkinfop->state_lock);
1017 		/*
1018 		 * Attach the original M_IOCTL message to a
1019 		 * LOGDMUX_UNLINK_REQ message and send it to our peer to
1020 		 * tell it to unlink from us. When it has completed the
1021 		 * task, it will send us a LOGDMUX_UNLINK_RESP message
1022 		 * with the original M_IOCTL still attached, which will be
1023 		 * processed in our logdmuxlrput(). At that point, we will
1024 		 * call logdmux_finish_unlink() to complete the unlink
1025 		 * operation using the attached M_IOCTL.
1026 		 */
1027 		unlinkinfop->prot_mp->b_cont = mp;
1028 		/*
1029 		 * Put the M_CTL directly to the peer's lower RQ.
1030 		 */
1031 		put(RD(tmxp->peerq), unlinkinfop->prot_mp);
1032 		break;
1033 
1034 	case LOGDMUX_UNLINK_PENDING:
1035 		mutex_exit(&unlinkinfop->state_lock);
1036 		/*
1037 		 * Our peer is actively processing an I_UNLINK itself.
1038 		 * We have to wait for the peer to complete and we use
1039 		 * qtimeout as a way to poll for its completion.
1040 		 * We save a reference to our mblk so that we can send
1041 		 * it upstream once our peer is done.
1042 		 */
1043 		tmxp->unlink_mp = mp;
1044 		tmxp->utimoutid = qtimeout(q, logdmux_unlink_timer, q,
1045 		    drv_usectohz(LOGDMUX_POLL_WAIT));
1046 		break;
1047 
1048 	case LOGDMUX_UNLINKED:
1049 		/*
1050 		 * Our peer is no longer linked so we can proceed.
1051 		 */
1052 		mutex_exit(&unlinkinfop->state_lock);
1053 		mutex_destroy(&unlinkinfop->state_lock);
1054 		freeb(unlinkinfop->prot_mp);
1055 		kmem_free(unlinkinfop, sizeof (unlinkinfo_t));
1056 		logdmux_finish_unlink(q, mp);
1057 		break;
1058 
1059 	default:
1060 		mutex_exit(&unlinkinfop->state_lock);
1061 		cmn_err(CE_PANIC,
1062 		    "logdmuxunlink: peer linkage is in an unrecognized state");
1063 		break;
1064 	}
1065 }
1066 
1067 /*
1068  * Finish the unlink operation.  Note that no locks should be held since
1069  * this routine calls into other queues.
1070  */
1071 static void
1072 logdmux_finish_unlink(queue_t *q, mblk_t *unlink_mp)
1073 {
1074 	struct tmx *tmxp = q->q_ptr;
1075 	mblk_t *mp;
1076 
1077 	/*
1078 	 * Flush any write side data downstream.
1079 	 */
1080 	while ((mp = getq(WR(q))) != NULL)
1081 		putnext(tmxp->muxq, mp);
1082 
1083 	/*
1084 	 * Note that we do not NULL out q_ptr since another thread (e.g., a
1085 	 * STREAMS service thread) might call logdmuxlrput() between the time
1086 	 * we exit the logindmux perimeter and the time the STREAMS framework
1087 	 * resets q_ptr to stdata (since muxq is set to NULL, any messages
1088 	 * will just be discarded).
1089 	 */
1090 	tmxp->muxq = NULL;
1091 	tmxp->unlinkinfop = NULL;
1092 	tmxp->peerq = NULL;
1093 	miocack(q, unlink_mp, 0, 0);
1094 }
1095 
1096 /*
1097  * logdmux_unlink_timer() is executed by qtimeout(). This function will
1098  * check unlinkinfop->state to determine whether the peer has completed
1099  * its I_UNLINK. If it hasn't, we use qtimeout() to initiate another poll.
1100  */
1101 static void
1102 logdmux_unlink_timer(void *arg)
1103 {
1104 	queue_t		*q = arg;
1105 	struct	tmx	*tmxp = q->q_ptr;
1106 	unlinkinfo_t	*unlinkinfop = tmxp->unlinkinfop;
1107 
1108 	tmxp->utimoutid = 0;
1109 
1110 	mutex_enter(&unlinkinfop->state_lock);
1111 
1112 	if (unlinkinfop->state != LOGDMUX_UNLINKED) {
1113 		ASSERT(unlinkinfop->state == LOGDMUX_UNLINK_PENDING);
1114 		mutex_exit(&unlinkinfop->state_lock);
1115 		/*
1116 		 * We need to wait longer for our peer to complete.
1117 		 */
1118 		tmxp->utimoutid = qtimeout(q, logdmux_unlink_timer, q,
1119 		    drv_usectohz(LOGDMUX_POLL_WAIT));
1120 	} else {
1121 		/*
1122 		 * Our peer is no longer linked so we can proceed with
1123 		 * the cleanup.
1124 		 */
1125 		mutex_exit(&unlinkinfop->state_lock);
1126 		mutex_destroy(&unlinkinfop->state_lock);
1127 		freeb(unlinkinfop->prot_mp);
1128 		kmem_free(unlinkinfop, sizeof (unlinkinfo_t));
1129 		logdmux_finish_unlink(q, tmxp->unlink_mp);
1130 	}
1131 }
1132 
1133 static void
1134 logdmux_timer(void *arg)
1135 {
1136 	queue_t		*q = arg;
1137 	struct tmx	*tmxp = q->q_ptr;
1138 
1139 	ASSERT(tmxp != NULL);
1140 
1141 	if (q->q_flag & QREADR) {
1142 		ASSERT(tmxp->rtimoutid != 0);
1143 		tmxp->rtimoutid = 0;
1144 	} else {
1145 		ASSERT(tmxp->wtimoutid != 0);
1146 		tmxp->wtimoutid = 0;
1147 	}
1148 	enableok(q);
1149 	qenable(q);
1150 }
1151 
1152 static void
1153 logdmux_buffer(void *arg)
1154 {
1155 	queue_t		*q = arg;
1156 	struct tmx	*tmxp = q->q_ptr;
1157 
1158 	ASSERT(tmxp != NULL);
1159 
1160 	if (q->q_flag & QREADR) {
1161 		ASSERT(tmxp->rbufcid != 0);
1162 		tmxp->rbufcid = 0;
1163 	} else {
1164 		ASSERT(tmxp->wbufcid != 0);
1165 		tmxp->wbufcid = 0;
1166 	}
1167 	enableok(q);
1168 	qenable(q);
1169 }
1170 
1171 static void
1172 recover(queue_t *q, mblk_t *mp, size_t size)
1173 {
1174 	timeout_id_t	tid;
1175 	bufcall_id_t	bid;
1176 	struct	tmx	*tmxp = q->q_ptr;
1177 
1178 	/*
1179 	 * Avoid re-enabling the queue.
1180 	 */
1181 	ASSERT(queclass(mp) < QPCTL);
1182 	ASSERT(WR(q)->q_next == NULL); /* Called from upper queue only */
1183 	noenable(q);
1184 	(void) putbq(q, mp);
1185 
1186 	/*
1187 	 * Make sure there is at most one outstanding request per queue.
1188 	 */
1189 	if (q->q_flag & QREADR) {
1190 		if (tmxp->rtimoutid != 0 || tmxp->rbufcid != 0)
1191 			return;
1192 	} else {
1193 		if (tmxp->wtimoutid != 0 || tmxp->wbufcid != 0)
1194 			return;
1195 	}
1196 	if (!(bid = qbufcall(RD(q), size, BPRI_MED, logdmux_buffer, q))) {
1197 		tid = qtimeout(RD(q), logdmux_timer, q, drv_usectohz(SIMWAIT));
1198 		if (q->q_flag & QREADR)
1199 			tmxp->rtimoutid = tid;
1200 		else
1201 			tmxp->wtimoutid = tid;
1202 	} else	{
1203 		if (q->q_flag & QREADR)
1204 			tmxp->rbufcid = bid;
1205 		else
1206 			tmxp->wbufcid = bid;
1207 	}
1208 }
1209 
1210 static void
1211 flushq_dataonly(queue_t *q)
1212 {
1213 	mblk_t *mp, *nmp;
1214 
1215 	/*
1216 	 * Since we are already in the perimeter, and we are not a put-shared
1217 	 * perimeter, we don't need to freeze the stream or anything to
1218 	 * be ensured of exclusivity.
1219 	 */
1220 	mp = q->q_first;
1221 	while (mp != NULL) {
1222 		if (mp->b_datap->db_type == M_DATA) {
1223 			nmp = mp->b_next;
1224 			rmvq(q, mp);
1225 			freemsg(mp);
1226 			mp = nmp;
1227 		} else {
1228 			mp = mp->b_next;
1229 		}
1230 	}
1231 }
1232 
1233 /*
1234  * logdmux_alloc_unlinkinfo() is called from logdmuxuwput() during the
1235  * processing of a LOGDMX_IOC_QEXCHANGE ioctl() to allocate the
1236  * unlinkinfo_t which is needed during the processing of an I_UNLINK.
1237  */
1238 static int
1239 logdmux_alloc_unlinkinfo(struct tmx *t0, struct tmx *t1)
1240 {
1241 	unlinkinfo_t	*p;
1242 	uint_t		*messagep;
1243 
1244 	if ((p = kmem_zalloc(sizeof (unlinkinfo_t), KM_NOSLEEP)) == NULL)
1245 		return (ENOSR);
1246 
1247 	if ((p->prot_mp = allocb(sizeof (uint_t), BPRI_MED)) == NULL) {
1248 		kmem_free(p, sizeof (unlinkinfo_t));
1249 		return (ENOSR);
1250 	}
1251 
1252 	DB_TYPE(p->prot_mp) = M_CTL;
1253 	messagep = (uint_t *)p->prot_mp->b_wptr;
1254 	*messagep = LOGDMUX_UNLINK_REQ;
1255 	p->prot_mp->b_wptr += sizeof (*messagep);
1256 	p->state = LOGDMUX_LINKED;
1257 	mutex_init(&p->state_lock, NULL, MUTEX_DRIVER, NULL);
1258 
1259 	t0->unlinkinfop = t1->unlinkinfop = p;
1260 
1261 	return (0);
1262 }
1263