xref: /titanic_44/usr/src/uts/common/io/tl.c (revision 5679c89fcd2facbb4334df8870d3d7a4d2b11673)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Multithreaded STREAMS Local Transport Provider.
28  *
29  * OVERVIEW
30  * ========
31  *
32  * This driver provides TLI as well as socket semantics.  It provides
33  * connectionless, connection oriented, and connection oriented with orderly
34  * release transports for TLI and sockets. Each transport type has separate name
35  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
36  * this removes any name space conflicts when binding to socket style transport
37  * addresses.
38  *
39  * NOTE: There is one exception: Socket ticots and ticotsord transports share
40  * the same namespace. In fact, sockets always use ticotsord type transport.
41  *
42  * The driver mode is specified during open() by the minor number used for
43  * open.
44  *
45  *  The sockets in addition have the following semantic differences:
46  *  No support for passing up credentials (TL_SET[U]CRED).
47  *
48  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
49  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
50  *	T_OPTDATA_IND.
51  *
52  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
53  *	a T_CONN_RES is received from the acceptor. This means that a socket
54  *	connect will complete before the peer has called accept.
55  *
56  *
57  * MULTITHREADING
58  * ==============
59  *
60  * The driver does not use STREAMS protection mechanisms. Instead it uses a
61  * generic "serializer" abstraction. Most of the operations are executed behind
62  * the serializer and are, essentially single-threaded. All functions executed
63  * behind the same serializer are strictly serialized. So if one thread calls
64  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
65  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
66  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
67  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
68  * same time.
69  *
70  * Connectionless transport use a single serializer per transport type (one for
71  * TLI and one for sockets. Connection-oriented transports use finer-grained
72  * serializers.
73  *
74  * All COTS-type endpoints start their life with private serializers. During
75  * connection request processing the endpoint serializer is switched to the
76  * listener's serializer and the rest of T_CONN_REQ processing is done on the
77  * listener serializer. During T_CONN_RES processing the eager serializer is
78  * switched from listener to acceptor serializer and after that point all
79  * processing for eager and acceptor happens on this serializer. To avoid races
80  * with endpoint closes while its serializer may be changing closes are blocked
81  * while serializers are manipulated.
82  *
83  * References accounting
84  * ---------------------
85  *
86  * Endpoints are reference counted and freed when the last reference is
87  * dropped. Functions within the serializer may access an endpoint state even
88  * after an endpoint closed. The te_closing being set on the endpoint indicates
89  * that the endpoint entered its close routine.
90  *
91  * One reference is held for each opened endpoint instance. The reference
92  * counter is incremented when the endpoint is linked to another endpoint and
93  * decremented when the link disappears. It is also incremented when the
94  * endpoint is found by the hash table lookup. This increment is atomic with the
95  * lookup itself and happens while the hash table read lock is held.
96  *
97  * Close synchronization
98  * ---------------------
99  *
100  * During close the endpoint as marked as closing using te_closing flag. It is
101  * usually enough to check for te_closing flag since all other state changes
102  * happen after this flag is set and the close entered serializer. Immediately
103  * after setting te_closing flag tl_close() enters serializer and waits until
104  * the callback finishes. This allows all functions called within serializer to
105  * simply check te_closing without any locks.
106  *
107  * Serializer management.
108  * ---------------------
109  *
110  * For COTS transports serializers are created when the endpoint is constructed
111  * and destroyed when the endpoint is destructed. CLTS transports use global
112  * serializers - one for sockets and one for TLI.
113  *
114  * COTS serializers have separate reference counts to deal with several
115  * endpoints sharing the same serializer. There is a subtle problem related to
116  * the serializer destruction. The serializer should never be destroyed by any
117  * function executed inside serializer. This means that close has to wait till
118  * all serializer activity for this endpoint is finished before it can drop the
119  * last reference on the endpoint (which may as well free the serializer).  This
120  * is only relevant for COTS transports which manage serializers
121  * dynamically. For CLTS transports close may complete without waiting for all
122  * serializer activity to finish since serializer is only destroyed at driver
123  * detach time.
124  *
125  * COTS endpoints keep track of the number of outstanding requests on the
126  * serializer for the endpoint. The code handling accept() avoids changing
127  * client serializer if it has any pending messages on the serializer and
128  * instead moves acceptor to listener's serializer.
129  *
130  *
131  * Use of hash tables
132  * ------------------
133  *
134  * The driver uses modhash hash table implementation. Each transport uses two
135  * hash tables - one for finding endpoints by acceptor ID and another one for
136  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
137  * pair of hash tables since sockets only use TICOTSORD.
138  *
139  * All hash tables lookups increment a reference count for returned endpoints,
140  * so we may safely check the endpoint state even when the endpoint is removed
141  * from the hash by another thread immediately after it is found.
142  *
143  *
144  * CLOSE processing
145  * ================
146  *
147  * The driver enters serializer twice on close(). The close sequence is the
148  * following:
149  *
150  * 1) Wait until closing is safe (te_closewait becomes zero)
151  *	This step is needed to prevent close during serializer switches. In most
152  *	cases (close happening after connection establishment) te_closewait is
153  *	zero.
154  * 1) Set te_closing.
155  * 2) Call tl_close_ser() within serializer and wait for it to complete.
156  *
157  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
158  *	It also needs to clear write-side q_next pointers - this should be done
159  *	before qprocsoff().
160  *
161  *    This synchronous serializer entry during close is needed to ensure that
162  *    the queue is valid everywhere inside the serializer.
163  *
164  *    Note that in many cases close will execute tl_close_ser() synchronously,
165  *    so it will not wait at all.
166  *
167  * 3) Calls qprocsoff().
168  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
169  *	complete (for COTS transports). For CLTS transport there is no wait.
170  *
171  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
172  *	close if there is any.
173  *
174  *    Note that in most cases close will enter te_close_ser_finish()
175  *    synchronously and will not wait at all.
176  *
177  *
178  * Flow Control
179  * ============
180  *
181  * The driver implements both read and write side service routines. No one calls
182  * putq() on the read queue. The read side service routine tl_rsrv() is called
183  * when the read side stream is back-enabled. It enters serializer synchronously
184  * (waits till serializer processing is complete). Within serializer it
185  * back-enables all endpoints blocked by the queue for connection-less
186  * transports and enables write side service processing for the peer for
187  * connection-oriented transports.
188  *
189  * Read and write side service routines use special mblk_sized space in the
190  * endpoint structure to enter perimeter.
191  *
192  * Write-side flow control
193  * -----------------------
194  *
195  * Write side flow control is a bit tricky. The driver needs to deal with two
196  * message queues - the explicit STREAMS message queue maintained by
197  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
198  * queues should be synchronized to preserve message ordering and should
199  * maintain a single order determined by the order in which messages enter
200  * tl_wput(). In order to maintain the ordering between these two queues the
201  * STREAMS queue is only manipulated within the serializer, so the ordering is
202  * provided by the serializer.
203  *
204  * Functions called from the tl_wsrv() sometimes may call putbq(). To
205  * immediately stop any further processing of the STREAMS message queues the
206  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
207  * side service processing stops when the flag is set.
208  *
209  * The tl_wsrv() function enters serializer synchronously and waits for it to
210  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
211  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
212  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
213  * always bounded by the amount of messages on the STREAMS queue at the time
214  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
215  * queue from another serialized entry which can't happen in parallel. This
216  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
217  * of it draining forever while writer places new messages on the STREAMS
218  * queue).
219  *
220  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
221  *
222  *
223  * Unix Domain Sockets
224  * ===================
225  *
226  * The driver knows the structure of Unix Domain sockets addresses and treats
227  * them differently from generic TLI addresses. For sockets implicit binds are
228  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
229  * instead of using address length of zero. Explicit binds specify
230  * SOU_MAGIC_EXPLICIT as magic.
231  *
232  * For implicit binds we always use minor number as soua_vp part of the address
233  * and avoid any hash table lookups. This saves two hash tables lookups per
234  * anonymous bind.
235  *
236  * For explicit address we hash the vnode pointer instead of hashing the
237  * full-scale address+zone+length. Hashing by pointer is more efficient then
238  * hashing by the full address.
239  *
240  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
241  * tep structure, so it should be never freed.
242  *
243  * Also for sockets the driver always uses minor number as acceptor id.
244  *
245  * TPI VIOLATIONS
246  * --------------
247  *
248  * This driver violates TPI in several respects for Unix Domain Sockets:
249  *
250  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
251  *	is requested and the endpoint is already in use. There is no point in
252  *	generating an unused address since this address will be rejected by
253  *	sockfs anyway. For implicit binds it always generates a new address
254  *	(sets soua_vp to its minor number).
255  *
256  * 2) It always uses minor number as acceptor ID and never uses queue
257  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
258  *	message and they do not use the queue pointer.
259  *
260  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
261  *	followed by listen(). The listen() should be issued with non-zero
262  *	backlog, so sotpi_listen() issues unbind request followed by bind
263  *	request to the same address but with a non-zero qlen value. Both
264  *	tl_bind() and tl_unbind() require write lock on the hash table to
265  *	insert/remove the address. The driver does not remove the address from
266  *	the hash for endpoints that are bound to the explicit address and have
267  *	backlog of zero. During T_BIND_REQ processing if the address requested
268  *	is equal to the address the endpoint already has it updates the backlog
269  *	without reinserting the address in the hash table. This optimization
270  *	avoids two hash table updates for each listener created. It always
271  *	avoids the problem of a "stolen" address when another listener may use
272  *	the same address between the unbind and bind and suddenly listen() fails
273  *	because address is in use even though the bind() succeeded.
274  *
275  *
276  * CONNECTIONLESS TRANSPORTS
277  * =========================
278  *
279  * Connectionless transports all share the same serializer (one for TLI and one
280  * for Sockets). Functions executing behind serializer can check or modify state
281  * of any endpoint.
282  *
283  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
284  * te_lastep field. The next time X talks to some address A it checks whether A
285  * is the same as Y's address and if it is there is no need to lookup Y. If the
286  * address is different or the state of Y is not appropriate (e.g. closed or not
287  * idle) X does a lookup using tl_find_peer() and caches the new address.
288  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
289  * on the endpoint found.
290  *
291  * During close of endpoint Y it doesn't try to remove itself from other
292  * endpoints caches. They will detect that Y is gone and will search the peer
293  * endpoint again.
294  *
295  * Flow Control Handling.
296  * ----------------------
297  *
298  * Each connectionless endpoint keeps a list of endpoints which are
299  * flow-controlled by its queue. It also keeps a pointer to the queue which
300  * flow-controls itself.  Whenever flow control releases for endpoint X it
301  * enables all queues from the list. During close it also back-enables everyone
302  * in the list. If X is flow-controlled when it is closing it removes it from
303  * the peers list.
304  *
305  * DATA STRUCTURES
306  * ===============
307  *
308  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
309  * endpoint state. For connection-oriented transports it has a keeps a list
310  * of pending connections (tl_icon_t). For connectionless transports it keeps a
311  * list of endpoints flow controlled by this one.
312  *
313  * Each transport type is represented by a per-transport data structure
314  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
315  * endpoint address hash tables for each transport. It also contains pointer to
316  * transport serializer for connectionless transports.
317  *
318  * Each endpoint keeps a link to its transport structure, so the code can find
319  * all per-transport information quickly.
320  */
321 
322 #include	<sys/types.h>
323 #include	<sys/inttypes.h>
324 #include	<sys/stream.h>
325 #include	<sys/stropts.h>
326 #define	_SUN_TPI_VERSION 2
327 #include	<sys/tihdr.h>
328 #include	<sys/strlog.h>
329 #include	<sys/debug.h>
330 #include	<sys/cred.h>
331 #include	<sys/errno.h>
332 #include	<sys/kmem.h>
333 #include	<sys/id_space.h>
334 #include	<sys/modhash.h>
335 #include	<sys/mkdev.h>
336 #include	<sys/tl.h>
337 #include	<sys/stat.h>
338 #include	<sys/conf.h>
339 #include	<sys/modctl.h>
340 #include	<sys/strsun.h>
341 #include	<sys/socket.h>
342 #include	<sys/socketvar.h>
343 #include	<sys/sysmacros.h>
344 #include	<sys/xti_xtiopt.h>
345 #include	<sys/ddi.h>
346 #include	<sys/sunddi.h>
347 #include	<sys/zone.h>
348 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
349 #include	<inet/optcom.h>
350 #include	<sys/strsubr.h>
351 #include	<sys/ucred.h>
352 #include	<sys/suntpi.h>
353 #include	<sys/list.h>
354 #include	<sys/serializer.h>
355 
356 /*
357  * TBD List
358  * 14 Eliminate state changes through table
359  * 16. AF_UNIX socket options
360  * 17. connect() for ticlts
361  * 18. support for "netstat" to show AF_UNIX plus TLI local
362  *	transport connections
363  * 21. sanity check to flushing on sending M_ERROR
364  */
365 
366 /*
367  * CONSTANT DECLARATIONS
368  * --------------------
369  */
370 
371 /*
372  * Local declarations
373  */
374 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
375 
376 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
377 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
378 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
379 /*
380  * Hash tables size.
381  */
382 #define	TL_HASH_SIZE 311
383 
384 /*
385  * Definitions for module_info
386  */
387 #define		TL_ID		(104)		/* module ID number */
388 #define		TL_NAME		"tl"		/* module name */
389 #define		TL_MINPSZ	(0)		/* min packet size */
390 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
391 #define		TL_HIWAT	(16*1024)	/* hi water mark */
392 #define		TL_LOWAT	(256)		/* lo water mark */
393 /*
394  * Definition of minor numbers/modes for new transport provider modes.
395  * We view the socket use as a separate mode to get a separate name space.
396  */
397 #define		TL_TICOTS	0	/* connection oriented transport */
398 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
399 #define		TL_TICLTS 	2	/* connectionless transport */
400 #define		TL_UNUSED	3
401 #define		TL_SOCKET	4	/* Socket */
402 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
403 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
404 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
405 
406 #define		TL_MINOR_MASK	0x7
407 #define		TL_MINOR_START	(TL_TICLTS + 1)
408 
409 /*
410  * LOCAL MACROS
411  */
412 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
413 
414 /*
415  * EXTERNAL VARIABLE DECLARATIONS
416  * -----------------------------
417  */
418 /*
419  * state table defined in the OS space.c
420  */
421 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
422 
423 /*
424  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
425  */
426 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
427 static int tl_close(queue_t *, int, cred_t *);
428 static void tl_wput(queue_t *, mblk_t *);
429 static void tl_wsrv(queue_t *);
430 static void tl_rsrv(queue_t *);
431 
432 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
433 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
434 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
435 
436 
437 /*
438  * GLOBAL DATA STRUCTURES AND VARIABLES
439  * -----------------------------------
440  */
441 
442 /*
443  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
444  * For now, we only manage the SO_RECVUCRED option but we also have
445  * harmless dummy options to make things work with some common code we access.
446  */
447 opdes_t	tl_opt_arr[] = {
448 	/* The SO_TYPE is needed for the hack below */
449 	{
450 		SO_TYPE,
451 		SOL_SOCKET,
452 		OA_R,
453 		OA_R,
454 		OP_NP,
455 		OP_PASSNEXT,
456 		sizeof (t_scalar_t),
457 		0
458 	},
459 	{
460 		SO_RECVUCRED,
461 		SOL_SOCKET,
462 		OA_RW,
463 		OA_RW,
464 		OP_NP,
465 		OP_PASSNEXT,
466 		sizeof (int),
467 		0
468 	}
469 };
470 
471 /*
472  * Table of all supported levels
473  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
474  * any supported options so we need this info separately.
475  *
476  * This is needed only for topmost tpi providers.
477  */
478 optlevel_t	tl_valid_levels_arr[] = {
479 	XTI_GENERIC,
480 	SOL_SOCKET,
481 	TL_PROT_LEVEL
482 };
483 
484 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
485 /*
486  * Current upper bound on the amount of space needed to return all options.
487  * Additional options with data size of sizeof(long) are handled automatically.
488  * Others need hand job.
489  */
490 #define	TL_MAX_OPT_BUF_LEN						\
491 		((A_CNT(tl_opt_arr) << 2) +				\
492 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
493 		+ 64 + sizeof (struct T_optmgmt_ack))
494 
495 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
496 
497 /*
498  *	transport addr structure
499  */
500 typedef struct tl_addr {
501 	zoneid_t	ta_zoneid;		/* Zone scope of address */
502 	t_scalar_t	ta_alen;		/* length of abuf */
503 	void		*ta_abuf;		/* the addr itself */
504 } tl_addr_t;
505 
506 /*
507  * Refcounted version of serializer.
508  */
509 typedef struct tl_serializer {
510 	uint_t		ts_refcnt;
511 	serializer_t	*ts_serializer;
512 } tl_serializer_t;
513 
514 /*
515  * Each transport type has a separate state.
516  * Per-transport state.
517  */
518 typedef struct tl_transport_state {
519 	char		*tr_name;
520 	minor_t		tr_minor;
521 	uint32_t	tr_defaddr;
522 	mod_hash_t	*tr_ai_hash;
523 	mod_hash_t	*tr_addr_hash;
524 	tl_serializer_t	*tr_serializer;
525 } tl_transport_state_t;
526 
527 #define	TL_DFADDR 0x1000
528 
529 static tl_transport_state_t tl_transports[] = {
530 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
531 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
532 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
533 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
534 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
535 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
536 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
537 };
538 
539 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
540 
541 struct tl_endpt;
542 typedef struct tl_endpt tl_endpt_t;
543 
544 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
545 
546 /*
547  * Data structure used to represent pending connects.
548  * Records enough information so that the connecting peer can close
549  * before the connection gets accepted.
550  */
551 typedef struct tl_icon {
552 	list_node_t	ti_node;
553 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
554 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
555 	t_scalar_t	ti_seqno;	/* Sequence number */
556 } tl_icon_t;
557 
558 typedef struct so_ux_addr soux_addr_t;
559 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
560 
561 /*
562  * Maximum number of unaccepted connection indications allowed per listener.
563  */
564 #define	TL_MAXQLEN	4096
565 int tl_maxqlen = TL_MAXQLEN;
566 
567 /*
568  *	transport endpoint structure
569  */
570 struct tl_endpt {
571 	queue_t		*te_rq;		/* stream read queue */
572 	queue_t		*te_wq;		/* stream write queue */
573 	uint32_t	te_refcnt;
574 	int32_t 	te_state;	/* TPI state of endpoint */
575 	minor_t		te_minor;	/* minor number */
576 #define	te_seqno	te_minor
577 	uint_t		te_flag;	/* flag field */
578 	boolean_t	te_nowsrv;
579 	tl_serializer_t	*te_ser;	/* Serializer to use */
580 #define	te_serializer	te_ser->ts_serializer
581 
582 	soux_addr_t	te_uxaddr;	/* Socket address */
583 #define	te_magic	te_uxaddr.soua_magic
584 #define	te_vp		te_uxaddr.soua_vp
585 	tl_addr_t	te_ap;		/* addr bound to this endpt */
586 #define	te_zoneid te_ap.ta_zoneid
587 #define	te_alen	te_ap.ta_alen
588 #define	te_abuf	te_ap.ta_abuf
589 
590 	tl_transport_state_t *te_transport;
591 #define	te_addrhash	te_transport->tr_addr_hash
592 #define	te_aihash	te_transport->tr_ai_hash
593 #define	te_defaddr	te_transport->tr_defaddr
594 	cred_t		*te_credp;	/* endpoint user credentials */
595 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
596 
597 	/*
598 	 * State specific for connection-oriented and connectionless transports.
599 	 */
600 	union {
601 		/* Connection-oriented state. */
602 		struct {
603 			t_uscalar_t _te_nicon;	/* count of conn requests */
604 			t_uscalar_t _te_qlen;	/* max conn requests */
605 			tl_endpt_t  *_te_oconp;	/* conn request pending */
606 			tl_endpt_t  *_te_conp;	/* connected endpt */
607 #ifndef _ILP32
608 			void	    *_te_pad;
609 #endif
610 			list_t	_te_iconp;	/* list of conn ind. pending */
611 		} _te_cots_state;
612 		/* Connection-less state. */
613 		struct {
614 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
615 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
616 			list_node_t _te_flows;	/* lists of connections */
617 			list_t  _te_flowlist;	/* Who flowcontrols on me */
618 		} _te_clts_state;
619 	} _te_transport_state;
620 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
621 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
622 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
623 #define	te_conp		_te_transport_state._te_cots_state._te_conp
624 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
625 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
626 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
627 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
628 #define	te_flows	_te_transport_state._te_clts_state._te_flows
629 
630 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
631 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
632 	pid_t		te_cpid;	/* cached pid of endpoint */
633 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
634 	/*
635 	 * Pieces of the endpoint state needed for closing.
636 	 */
637 	kmutex_t	te_closelock;
638 	kcondvar_t	te_closecv;
639 	uint8_t		te_closing;	/* The endpoint started closing */
640 	uint8_t		te_closewait;	/* Wait in close until zero */
641 	mblk_t		te_closemp;	/* for entering serializer on close */
642 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
643 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
644 	kmutex_t	te_srv_lock;
645 	kcondvar_t	te_srv_cv;
646 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
647 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
648 	/*
649 	 * Pieces of the endpoint state needed for serializer transitions.
650 	 */
651 	kmutex_t	te_ser_lock;	/* Protects the count below */
652 	uint_t		te_ser_count;	/* Number of messages on serializer */
653 };
654 
655 /*
656  * Flag values. Lower 4 bits specify that transport used.
657  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
658  * they allow to identify the endpoint more easily.
659  */
660 #define	TL_LISTENER	0x00010	/* the listener endpoint */
661 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
662 #define	TL_EAGER	0x00040	/* connecting endpoint */
663 #define	TL_ACCEPTED	0x00080	/* accepted connection */
664 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
665 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
666 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
667 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
668 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
669 /*
670  * Boolean checks for the endpoint type.
671  */
672 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
673 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
674 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
675 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
676 
677 #define	TLPID(mp, tep)	(DB_CPID(mp) == -1 ? (tep)->te_cpid : DB_CPID(mp))
678 
679 /*
680  * Certain operations are always used together. These macros reduce the chance
681  * of missing a part of a combination.
682  */
683 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
684 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
685 
686 #define	TL_PUTBQ(x, mp) {		\
687 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
688 	(x)->te_nowsrv = B_TRUE;	\
689 	(void) putbq((x)->te_wq, mp);	\
690 }
691 
692 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
693 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
694 
695 /*
696  * STREAMS driver glue data structures.
697  */
698 static	struct	module_info	tl_minfo = {
699 	TL_ID,			/* mi_idnum */
700 	TL_NAME,		/* mi_idname */
701 	TL_MINPSZ,		/* mi_minpsz */
702 	TL_MAXPSZ,		/* mi_maxpsz */
703 	TL_HIWAT,		/* mi_hiwat */
704 	TL_LOWAT		/* mi_lowat */
705 };
706 
707 static	struct	qinit	tl_rinit = {
708 	NULL,			/* qi_putp */
709 	(int (*)())tl_rsrv,	/* qi_srvp */
710 	tl_open,		/* qi_qopen */
711 	tl_close,		/* qi_qclose */
712 	NULL,			/* qi_qadmin */
713 	&tl_minfo,		/* qi_minfo */
714 	NULL			/* qi_mstat */
715 };
716 
717 static	struct	qinit	tl_winit = {
718 	(int (*)())tl_wput,	/* qi_putp */
719 	(int (*)())tl_wsrv,	/* qi_srvp */
720 	NULL,			/* qi_qopen */
721 	NULL,			/* qi_qclose */
722 	NULL,			/* qi_qadmin */
723 	&tl_minfo,		/* qi_minfo */
724 	NULL			/* qi_mstat */
725 };
726 
727 static	struct streamtab	tlinfo = {
728 	&tl_rinit,		/* st_rdinit */
729 	&tl_winit,		/* st_wrinit */
730 	NULL,			/* st_muxrinit */
731 	NULL			/* st_muxwrinit */
732 };
733 
734 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
735     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
736 
737 static struct modldrv modldrv = {
738 	&mod_driverops,		/* Type of module -- pseudo driver here */
739 	"TPI Local Transport (tl)",
740 	&tl_devops,		/* driver ops */
741 };
742 
743 /*
744  * Module linkage information for the kernel.
745  */
746 static struct modlinkage modlinkage = {
747 	MODREV_1,
748 	&modldrv,
749 	NULL
750 };
751 
752 /*
753  * Templates for response to info request
754  * Check sanity of unlimited connect data etc.
755  */
756 
757 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
758 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
759 
760 static struct T_info_ack tl_cots_info_ack =
761 	{
762 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
763 		T_INFINITE,	/* TSDU size */
764 		T_INFINITE,	/* ETSDU size */
765 		T_INFINITE,	/* CDATA_size */
766 		T_INFINITE,	/* DDATA_size */
767 		T_INFINITE,	/* ADDR_size  */
768 		T_INFINITE,	/* OPT_size */
769 		0,		/* TIDU_size - fill at run time */
770 		T_COTS,		/* SERV_type */
771 		-1,		/* CURRENT_state */
772 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
773 	};
774 
775 static struct T_info_ack tl_clts_info_ack =
776 	{
777 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
778 		0,		/* TSDU_size - fill at run time */
779 		-2,		/* ETSDU_size -2 => not supported */
780 		-2,		/* CDATA_size -2 => not supported */
781 		-2,		/* DDATA_size  -2 => not supported */
782 		-1,		/* ADDR_size -1 => unlimited */
783 		-1,		/* OPT_size */
784 		0,		/* TIDU_size - fill at run time */
785 		T_CLTS,		/* SERV_type */
786 		-1,		/* CURRENT_state */
787 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
788 	};
789 
790 /*
791  * private copy of devinfo pointer used in tl_info
792  */
793 static dev_info_t *tl_dip;
794 
795 /*
796  * Endpoints cache.
797  */
798 static kmem_cache_t *tl_cache;
799 /*
800  * Minor number space.
801  */
802 static id_space_t *tl_minors;
803 
804 /*
805  * Default Data Unit size.
806  */
807 static t_scalar_t tl_tidusz;
808 
809 /*
810  * Size of hash tables.
811  */
812 static size_t tl_hash_size = TL_HASH_SIZE;
813 
814 /*
815  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
816  * for sockets.
817  */
818 static int tl_disable_early_connect = 0;
819 static int tl_client_closing_when_accepting;
820 
821 static int tl_serializer_noswitch;
822 
823 /*
824  * LOCAL FUNCTION PROTOTYPES
825  * -------------------------
826  */
827 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
828 static void tl_do_proto(mblk_t *, tl_endpt_t *);
829 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
830 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
831 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
832 	t_scalar_t);
833 static void tl_bind(mblk_t *, tl_endpt_t *);
834 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
835 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
836 static void tl_unbind(mblk_t *, tl_endpt_t *);
837 static void tl_optmgmt(queue_t *, mblk_t *);
838 static void tl_conn_req(queue_t *, mblk_t *);
839 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
840 static void tl_conn_res(mblk_t *, tl_endpt_t *);
841 static void tl_discon_req(mblk_t *, tl_endpt_t *);
842 static void tl_capability_req(mblk_t *, tl_endpt_t *);
843 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
844 static void tl_info_req(mblk_t *, tl_endpt_t *);
845 static void tl_addr_req(mblk_t *, tl_endpt_t *);
846 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
847 static void tl_data(mblk_t  *, tl_endpt_t *);
848 static void tl_exdata(mblk_t *, tl_endpt_t *);
849 static void tl_ordrel(mblk_t *, tl_endpt_t *);
850 static void tl_unitdata(mblk_t *, tl_endpt_t *);
851 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
852 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
853 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
854 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
855 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
856 static void tl_cl_backenable(tl_endpt_t *);
857 static void tl_co_unconnect(tl_endpt_t *);
858 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
859 static void tl_discon_ind(tl_endpt_t *, uint32_t);
860 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
861 static mblk_t *tl_ordrel_ind_alloc(void);
862 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
863 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
864 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
865 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
866 static void tl_icon_freemsgs(mblk_t **);
867 static void tl_merror(queue_t *, mblk_t *, int);
868 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
869 static int tl_default_opt(queue_t *, int, int, uchar_t *);
870 static int tl_get_opt(queue_t *, int, int, uchar_t *);
871 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
872     uchar_t *, void *, cred_t *, mblk_t *);
873 static void tl_memrecover(queue_t *, mblk_t *, size_t);
874 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
875 static void tl_free(tl_endpt_t *);
876 static int  tl_constructor(void *, void *, int);
877 static void tl_destructor(void *, void *);
878 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
879 static tl_serializer_t *tl_serializer_alloc(int);
880 static void tl_serializer_refhold(tl_serializer_t *);
881 static void tl_serializer_refrele(tl_serializer_t *);
882 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
883 static void tl_serializer_exit(tl_endpt_t *);
884 static boolean_t tl_noclose(tl_endpt_t *);
885 static void tl_closeok(tl_endpt_t *);
886 static void tl_refhold(tl_endpt_t *);
887 static void tl_refrele(tl_endpt_t *);
888 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
889 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
890 static void tl_close_ser(mblk_t *, tl_endpt_t *);
891 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
892 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
893 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
894 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
895 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
896 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
897 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
898 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
899 static void tl_addr_unbind(tl_endpt_t *);
900 
901 /*
902  * Intialize option database object for TL
903  */
904 
905 optdb_obj_t tl_opt_obj = {
906 	tl_default_opt,		/* TL default value function pointer */
907 	tl_get_opt,		/* TL get function pointer */
908 	tl_set_opt,		/* TL set function pointer */
909 	B_TRUE,			/* TL is tpi provider */
910 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
911 	tl_opt_arr,		/* TL option database */
912 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
913 	tl_valid_levels_arr	/* TL valid level array */
914 };
915 
916 /*
917  * Logical operations.
918  *
919  * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y
920  * should also be true.
921  *
922  * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or false at
923  * the same time.
924  */
925 #define	IMPLY(X, Y)	(!(X) || (Y))
926 #define	EQUIV(X, Y)	(IMPLY(X, Y) && IMPLY(Y, X))
927 
928 /*
929  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
930  * ---------------------------------------
931  */
932 
933 /*
934  * Loadable module routines
935  */
936 int
937 _init(void)
938 {
939 	return (mod_install(&modlinkage));
940 }
941 
942 int
943 _fini(void)
944 {
945 	return (mod_remove(&modlinkage));
946 }
947 
948 int
949 _info(struct modinfo *modinfop)
950 {
951 	return (mod_info(&modlinkage, modinfop));
952 }
953 
954 /*
955  * Driver Entry Points and Other routines
956  */
957 static int
958 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
959 {
960 	int i;
961 	char name[32];
962 
963 	/*
964 	 * Resume from a checkpoint state.
965 	 */
966 	if (cmd == DDI_RESUME)
967 		return (DDI_SUCCESS);
968 
969 	if (cmd != DDI_ATTACH)
970 		return (DDI_FAILURE);
971 
972 	/*
973 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
974 	 * streams message sizes can be unlimited. We use a defined constant
975 	 * instead.
976 	 */
977 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
978 
979 	/*
980 	 * Create subdevices for each transport.
981 	 */
982 	for (i = 0; i < TL_UNUSED; i++) {
983 		if (ddi_create_minor_node(devi,
984 		    tl_transports[i].tr_name,
985 		    S_IFCHR, tl_transports[i].tr_minor,
986 		    DDI_PSEUDO, NULL) == DDI_FAILURE) {
987 			ddi_remove_minor_node(devi, NULL);
988 			return (DDI_FAILURE);
989 		}
990 	}
991 
992 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
993 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
994 
995 	if (tl_cache == NULL) {
996 		ddi_remove_minor_node(devi, NULL);
997 		return (DDI_FAILURE);
998 	}
999 
1000 	tl_minors = id_space_create("tl_minor_space",
1001 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
1002 
1003 	/*
1004 	 * Create ID space for minor numbers
1005 	 */
1006 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1007 		tl_transport_state_t *t = &tl_transports[i];
1008 
1009 		if (i == TL_UNUSED)
1010 			continue;
1011 
1012 		/* Socket COTSORD shares namespace with COTS */
1013 		if (i == TL_SOCK_COTSORD) {
1014 			t->tr_ai_hash =
1015 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1016 			ASSERT(t->tr_ai_hash != NULL);
1017 			t->tr_addr_hash =
1018 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1019 			ASSERT(t->tr_addr_hash != NULL);
1020 			continue;
1021 		}
1022 
1023 		/*
1024 		 * Create hash tables.
1025 		 */
1026 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1027 		    t->tr_name);
1028 #ifdef _ILP32
1029 		if (i & TL_SOCKET)
1030 			t->tr_ai_hash =
1031 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1032 			    mod_hash_null_valdtor);
1033 		else
1034 			t->tr_ai_hash =
1035 			    mod_hash_create_ptrhash(name, tl_hash_size,
1036 			    mod_hash_null_valdtor, sizeof (queue_t));
1037 #else
1038 		t->tr_ai_hash =
1039 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1040 		    mod_hash_null_valdtor);
1041 #endif /* _ILP32 */
1042 
1043 		if (i & TL_SOCKET) {
1044 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1045 			    t->tr_name);
1046 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1047 			    tl_hash_size, mod_hash_null_valdtor,
1048 			    sizeof (uintptr_t));
1049 		} else {
1050 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1051 			    t->tr_name);
1052 			t->tr_addr_hash = mod_hash_create_extended(name,
1053 			    tl_hash_size, mod_hash_null_keydtor,
1054 			    mod_hash_null_valdtor,
1055 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1056 		}
1057 
1058 		/* Create serializer for connectionless transports. */
1059 		if (i & TL_TICLTS)
1060 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1061 	}
1062 
1063 	tl_dip = devi;
1064 
1065 	return (DDI_SUCCESS);
1066 }
1067 
1068 static int
1069 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1070 {
1071 	int i;
1072 
1073 	if (cmd == DDI_SUSPEND)
1074 		return (DDI_SUCCESS);
1075 
1076 	if (cmd != DDI_DETACH)
1077 		return (DDI_FAILURE);
1078 
1079 	/*
1080 	 * Destroy arenas and hash tables.
1081 	 */
1082 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1083 		tl_transport_state_t *t = &tl_transports[i];
1084 
1085 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1086 			continue;
1087 
1088 		ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL));
1089 		if (t->tr_serializer != NULL) {
1090 			tl_serializer_refrele(t->tr_serializer);
1091 			t->tr_serializer = NULL;
1092 		}
1093 
1094 #ifdef _ILP32
1095 		if (i & TL_SOCKET)
1096 			mod_hash_destroy_idhash(t->tr_ai_hash);
1097 		else
1098 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1099 #else
1100 		mod_hash_destroy_idhash(t->tr_ai_hash);
1101 #endif /* _ILP32 */
1102 		t->tr_ai_hash = NULL;
1103 		if (i & TL_SOCKET)
1104 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1105 		else
1106 			mod_hash_destroy_hash(t->tr_addr_hash);
1107 		t->tr_addr_hash = NULL;
1108 	}
1109 
1110 	kmem_cache_destroy(tl_cache);
1111 	tl_cache = NULL;
1112 	id_space_destroy(tl_minors);
1113 	tl_minors = NULL;
1114 	ddi_remove_minor_node(devi, NULL);
1115 	return (DDI_SUCCESS);
1116 }
1117 
1118 /* ARGSUSED */
1119 static int
1120 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1121 {
1122 
1123 	int retcode = DDI_FAILURE;
1124 
1125 	switch (infocmd) {
1126 
1127 	case DDI_INFO_DEVT2DEVINFO:
1128 		if (tl_dip != NULL) {
1129 			*result = (void *)tl_dip;
1130 			retcode = DDI_SUCCESS;
1131 		}
1132 		break;
1133 
1134 	case DDI_INFO_DEVT2INSTANCE:
1135 		*result = (void *)0;
1136 		retcode = DDI_SUCCESS;
1137 		break;
1138 
1139 	default:
1140 		break;
1141 	}
1142 	return (retcode);
1143 }
1144 
1145 /*
1146  * Endpoint reference management.
1147  */
1148 static void
1149 tl_refhold(tl_endpt_t *tep)
1150 {
1151 	atomic_add_32(&tep->te_refcnt, 1);
1152 }
1153 
1154 static void
1155 tl_refrele(tl_endpt_t *tep)
1156 {
1157 	ASSERT(tep->te_refcnt != 0);
1158 
1159 	if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1160 		tl_free(tep);
1161 }
1162 
1163 /*ARGSUSED*/
1164 static int
1165 tl_constructor(void *buf, void *cdrarg, int kmflags)
1166 {
1167 	tl_endpt_t *tep = buf;
1168 
1169 	bzero(tep, sizeof (tl_endpt_t));
1170 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1171 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1172 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1173 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1174 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1175 
1176 	return (0);
1177 }
1178 
1179 /*ARGSUSED*/
1180 static void
1181 tl_destructor(void *buf, void *cdrarg)
1182 {
1183 	tl_endpt_t *tep = buf;
1184 
1185 	mutex_destroy(&tep->te_closelock);
1186 	cv_destroy(&tep->te_closecv);
1187 	mutex_destroy(&tep->te_srv_lock);
1188 	cv_destroy(&tep->te_srv_cv);
1189 	mutex_destroy(&tep->te_ser_lock);
1190 }
1191 
1192 static void
1193 tl_free(tl_endpt_t *tep)
1194 {
1195 	ASSERT(tep->te_refcnt == 0);
1196 	ASSERT(tep->te_transport != NULL);
1197 	ASSERT(tep->te_rq == NULL);
1198 	ASSERT(tep->te_wq == NULL);
1199 	ASSERT(tep->te_ser != NULL);
1200 	ASSERT(tep->te_ser_count == 0);
1201 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1202 
1203 	if (IS_SOCKET(tep)) {
1204 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1205 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1206 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1207 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1208 	} else if (tep->te_abuf != NULL) {
1209 		kmem_free(tep->te_abuf, tep->te_alen);
1210 		tep->te_alen = -1; /* uninitialized */
1211 		tep->te_abuf = NULL;
1212 	} else {
1213 		ASSERT(tep->te_alen == -1);
1214 	}
1215 
1216 	id_free(tl_minors, tep->te_minor);
1217 	ASSERT(tep->te_credp == NULL);
1218 
1219 	if (tep->te_hash_hndl != NULL)
1220 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1221 
1222 	if (IS_COTS(tep)) {
1223 		TL_REMOVE_PEER(tep->te_conp);
1224 		TL_REMOVE_PEER(tep->te_oconp);
1225 		tl_serializer_refrele(tep->te_ser);
1226 		tep->te_ser = NULL;
1227 		ASSERT(tep->te_nicon == 0);
1228 		ASSERT(list_head(&tep->te_iconp) == NULL);
1229 	} else {
1230 		ASSERT(tep->te_lastep == NULL);
1231 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1232 		ASSERT(tep->te_flowq == NULL);
1233 	}
1234 
1235 	ASSERT(tep->te_bufcid == 0);
1236 	ASSERT(tep->te_timoutid == 0);
1237 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1238 	tep->te_acceptor_id = 0;
1239 
1240 	ASSERT(tep->te_closewait == 0);
1241 	ASSERT(!tep->te_rsrv_active);
1242 	ASSERT(!tep->te_wsrv_active);
1243 	tep->te_closing = 0;
1244 	tep->te_nowsrv = B_FALSE;
1245 	tep->te_flag = 0;
1246 
1247 	kmem_cache_free(tl_cache, tep);
1248 }
1249 
1250 /*
1251  * Allocate/free reference-counted wrappers for serializers.
1252  */
1253 static tl_serializer_t *
1254 tl_serializer_alloc(int flags)
1255 {
1256 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1257 	serializer_t *ser;
1258 
1259 	if (s == NULL)
1260 		return (NULL);
1261 
1262 	ser = serializer_create(flags);
1263 
1264 	if (ser == NULL) {
1265 		kmem_free(s, sizeof (tl_serializer_t));
1266 		return (NULL);
1267 	}
1268 
1269 	s->ts_refcnt = 1;
1270 	s->ts_serializer = ser;
1271 	return (s);
1272 }
1273 
1274 static void
1275 tl_serializer_refhold(tl_serializer_t *s)
1276 {
1277 	atomic_add_32(&s->ts_refcnt, 1);
1278 }
1279 
1280 static void
1281 tl_serializer_refrele(tl_serializer_t *s)
1282 {
1283 	if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1284 		serializer_destroy(s->ts_serializer);
1285 		kmem_free(s, sizeof (tl_serializer_t));
1286 	}
1287 }
1288 
1289 /*
1290  * Post a request on the endpoint serializer. For COTS transports keep track of
1291  * the number of pending requests.
1292  */
1293 static void
1294 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1295 {
1296 	if (IS_COTS(tep)) {
1297 		mutex_enter(&tep->te_ser_lock);
1298 		tep->te_ser_count++;
1299 		mutex_exit(&tep->te_ser_lock);
1300 	}
1301 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1302 }
1303 
1304 /*
1305  * Complete processing the request on the serializer. Decrement the counter for
1306  * pending requests for COTS transports.
1307  */
1308 static void
1309 tl_serializer_exit(tl_endpt_t *tep)
1310 {
1311 	if (IS_COTS(tep)) {
1312 		mutex_enter(&tep->te_ser_lock);
1313 		ASSERT(tep->te_ser_count != 0);
1314 		tep->te_ser_count--;
1315 		mutex_exit(&tep->te_ser_lock);
1316 	}
1317 }
1318 
1319 /*
1320  * Hash management functions.
1321  */
1322 
1323 /*
1324  * Return TRUE if two addresses are equal, false otherwise.
1325  */
1326 static boolean_t
1327 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1328 {
1329 	return ((ap1->ta_alen > 0) &&
1330 	    (ap1->ta_alen == ap2->ta_alen) &&
1331 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1332 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1333 }
1334 
1335 /*
1336  * This function is called whenever an endpoint is found in the hash table.
1337  */
1338 /* ARGSUSED0 */
1339 static void
1340 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1341 {
1342 	tl_refhold((tl_endpt_t *)val);
1343 }
1344 
1345 /*
1346  * Address hash function.
1347  */
1348 /* ARGSUSED */
1349 static uint_t
1350 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1351 {
1352 	tl_addr_t *ap = (tl_addr_t *)key;
1353 	size_t	len = ap->ta_alen;
1354 	uchar_t *p = ap->ta_abuf;
1355 	uint_t i, g;
1356 
1357 	ASSERT((len > 0) && (p != NULL));
1358 
1359 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1360 		i = (i << 4) + (*p);
1361 		if ((g = (i & 0xf0000000U)) != 0) {
1362 			i ^= (g >> 24);
1363 			i ^= g;
1364 		}
1365 	}
1366 	return (i);
1367 }
1368 
1369 /*
1370  * This function is used by hash lookups. It compares two generic addresses.
1371  */
1372 static int
1373 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1374 {
1375 #ifdef 	DEBUG
1376 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1377 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1378 
1379 	ASSERT(key1 != NULL);
1380 	ASSERT(key2 != NULL);
1381 
1382 	ASSERT(ap1->ta_abuf != NULL);
1383 	ASSERT(ap2->ta_abuf != NULL);
1384 	ASSERT(ap1->ta_alen > 0);
1385 	ASSERT(ap2->ta_alen > 0);
1386 #endif
1387 
1388 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1389 }
1390 
1391 /*
1392  * Prevent endpoint from closing if possible.
1393  * Return B_TRUE on success, B_FALSE on failure.
1394  */
1395 static boolean_t
1396 tl_noclose(tl_endpt_t *tep)
1397 {
1398 	boolean_t rc = B_FALSE;
1399 
1400 	mutex_enter(&tep->te_closelock);
1401 	if (! tep->te_closing) {
1402 		ASSERT(tep->te_closewait == 0);
1403 		tep->te_closewait++;
1404 		rc = B_TRUE;
1405 	}
1406 	mutex_exit(&tep->te_closelock);
1407 	return (rc);
1408 }
1409 
1410 /*
1411  * Allow endpoint to close if needed.
1412  */
1413 static void
1414 tl_closeok(tl_endpt_t *tep)
1415 {
1416 	ASSERT(tep->te_closewait > 0);
1417 	mutex_enter(&tep->te_closelock);
1418 	ASSERT(tep->te_closewait == 1);
1419 	tep->te_closewait--;
1420 	cv_signal(&tep->te_closecv);
1421 	mutex_exit(&tep->te_closelock);
1422 }
1423 
1424 /*
1425  * STREAMS open entry point.
1426  */
1427 /* ARGSUSED */
1428 static int
1429 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1430 {
1431 	tl_endpt_t *tep;
1432 	minor_t	    minor = getminor(*devp);
1433 
1434 	/*
1435 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1436 	 * are illegal
1437 	 */
1438 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1439 		return (ENXIO);
1440 
1441 	if (rq->q_ptr != NULL)
1442 		return (0);
1443 
1444 	/* Minor number should specify the mode used for the driver. */
1445 	if ((minor >= TL_UNUSED))
1446 		return (ENXIO);
1447 
1448 	if (oflag & SO_SOCKSTR) {
1449 		minor |= TL_SOCKET;
1450 	}
1451 
1452 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1453 	tep->te_refcnt = 1;
1454 	tep->te_cpid = curproc->p_pid;
1455 	rq->q_ptr = WR(rq)->q_ptr = tep;
1456 	tep->te_state = TS_UNBND;
1457 	tep->te_credp = credp;
1458 	crhold(credp);
1459 	tep->te_zoneid = getzoneid();
1460 
1461 	tep->te_flag = minor & TL_MINOR_MASK;
1462 	tep->te_transport = &tl_transports[minor];
1463 
1464 	/* Allocate a unique minor number for this instance. */
1465 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1466 
1467 	/* Reserve hash handle for bind(). */
1468 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1469 
1470 	/* Transport-specific initialization */
1471 	if (IS_COTS(tep)) {
1472 		/* Use private serializer */
1473 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1474 
1475 		/* Create list for pending connections */
1476 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1477 		    offsetof(tl_icon_t, ti_node));
1478 		tep->te_qlen = 0;
1479 		tep->te_nicon = 0;
1480 		tep->te_oconp = NULL;
1481 		tep->te_conp = NULL;
1482 	} else {
1483 		/* Use shared serializer */
1484 		tep->te_ser = tep->te_transport->tr_serializer;
1485 		bzero(&tep->te_flows, sizeof (list_node_t));
1486 		/* Create list for flow control */
1487 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1488 		    offsetof(tl_endpt_t, te_flows));
1489 		tep->te_flowq = NULL;
1490 		tep->te_lastep = NULL;
1491 
1492 	}
1493 
1494 	/* Initialize endpoint address */
1495 	if (IS_SOCKET(tep)) {
1496 		/* Socket-specific address handling. */
1497 		tep->te_alen = TL_SOUX_ADDRLEN;
1498 		tep->te_abuf = &tep->te_uxaddr;
1499 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1500 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1501 	} else {
1502 		tep->te_alen = -1;
1503 		tep->te_abuf = NULL;
1504 	}
1505 
1506 	/* clone the driver */
1507 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1508 
1509 	tep->te_rq = rq;
1510 	tep->te_wq = WR(rq);
1511 
1512 #ifdef	_ILP32
1513 	if (IS_SOCKET(tep))
1514 		tep->te_acceptor_id = tep->te_minor;
1515 	else
1516 		tep->te_acceptor_id = (t_uscalar_t)rq;
1517 #else
1518 	tep->te_acceptor_id = tep->te_minor;
1519 #endif	/* _ILP32 */
1520 
1521 
1522 	qprocson(rq);
1523 
1524 	/*
1525 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1526 	 * insertion so insertion can't fail.
1527 	 */
1528 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1529 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1530 	    (mod_hash_val_t)tep);
1531 
1532 	return (0);
1533 }
1534 
1535 /* ARGSUSED1 */
1536 static int
1537 tl_close(queue_t *rq, int flag,	cred_t *credp)
1538 {
1539 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1540 	tl_endpt_t *elp = NULL;
1541 	queue_t *wq = tep->te_wq;
1542 	int rc;
1543 
1544 	ASSERT(wq == WR(rq));
1545 
1546 	/*
1547 	 * Remove the endpoint from acceptor hash.
1548 	 */
1549 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1550 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1551 	    (mod_hash_val_t *)&elp);
1552 	ASSERT(rc == 0 && tep == elp);
1553 	if ((rc != 0) || (tep != elp)) {
1554 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1555 		    SL_TRACE|SL_ERROR,
1556 		    "tl_close:inconsistency in AI hash"));
1557 	}
1558 
1559 	/*
1560 	 * Wait till close is safe, then mark endpoint as closing.
1561 	 */
1562 	mutex_enter(&tep->te_closelock);
1563 	while (tep->te_closewait)
1564 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1565 	tep->te_closing = B_TRUE;
1566 	/*
1567 	 * Will wait for the serializer part of the close to finish, so set
1568 	 * te_closewait now.
1569 	 */
1570 	tep->te_closewait = 1;
1571 	tep->te_nowsrv = B_FALSE;
1572 	mutex_exit(&tep->te_closelock);
1573 
1574 	/*
1575 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1576 	 * It is safe because close will wait for tl_close_ser to finish.
1577 	 */
1578 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1579 
1580 	/*
1581 	 * Wait for the first phase of close to complete before qprocsoff().
1582 	 */
1583 	mutex_enter(&tep->te_closelock);
1584 	while (tep->te_closewait)
1585 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1586 	mutex_exit(&tep->te_closelock);
1587 
1588 	qprocsoff(rq);
1589 
1590 	if (tep->te_bufcid) {
1591 		qunbufcall(rq, tep->te_bufcid);
1592 		tep->te_bufcid = 0;
1593 	}
1594 	if (tep->te_timoutid) {
1595 		(void) quntimeout(rq, tep->te_timoutid);
1596 		tep->te_timoutid = 0;
1597 	}
1598 
1599 	/*
1600 	 * Finish close behind serializer.
1601 	 *
1602 	 * For a CLTS endpoint increase a refcount and continue close processing
1603 	 * with serializer protection. This processing may happen asynchronously
1604 	 * with the completion of tl_close().
1605 	 *
1606 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1607 	 * may go away together with tep and we need to destroy serializer
1608 	 * outside of serializer context.
1609 	 */
1610 	ASSERT(tep->te_closewait == 0);
1611 	if (IS_COTS(tep))
1612 		tep->te_closewait = 1;
1613 	else
1614 		tl_refhold(tep);
1615 
1616 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1617 
1618 	/*
1619 	 * For connection-oriented transports wait for all serializer activity
1620 	 * to settle down.
1621 	 */
1622 	if (IS_COTS(tep)) {
1623 		mutex_enter(&tep->te_closelock);
1624 		while (tep->te_closewait)
1625 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1626 		mutex_exit(&tep->te_closelock);
1627 	}
1628 
1629 	crfree(tep->te_credp);
1630 	tep->te_credp = NULL;
1631 	tep->te_wq = NULL;
1632 	tl_refrele(tep);
1633 	/*
1634 	 * tep is likely to be destroyed now, so can't reference it any more.
1635 	 */
1636 
1637 	rq->q_ptr = wq->q_ptr = NULL;
1638 	return (0);
1639 }
1640 
1641 /*
1642  * First phase of close processing done behind the serializer.
1643  *
1644  * Do not drop the reference in the end - tl_close() wants this reference to
1645  * stay.
1646  */
1647 /* ARGSUSED0 */
1648 static void
1649 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1650 {
1651 	ASSERT(tep->te_closing);
1652 	ASSERT(tep->te_closewait == 1);
1653 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1654 
1655 	tep->te_flag |= TL_CLOSE_SER;
1656 
1657 	/*
1658 	 * Drain out all messages on queue except for TL_TICOTS where the
1659 	 * abortive release semantics permit discarding of data on close
1660 	 */
1661 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1662 		tl_wsrv_ser(NULL, tep);
1663 	}
1664 
1665 	/* Remove address from hash table. */
1666 	tl_addr_unbind(tep);
1667 	/*
1668 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1669 	 * queue of the driver, so clear these before qprocsoff() is called.
1670 	 * Also clear q_next for the peer since this queue is going away.
1671 	 */
1672 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1673 		tl_endpt_t *peer_tep = tep->te_conp;
1674 
1675 		tep->te_wq->q_next = NULL;
1676 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1677 			peer_tep->te_wq->q_next = NULL;
1678 	}
1679 
1680 	tep->te_rq = NULL;
1681 
1682 	/* wake up tl_close() */
1683 	tl_closeok(tep);
1684 	tl_serializer_exit(tep);
1685 }
1686 
1687 /*
1688  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1689  * the reference for CLTS.
1690  *
1691  * Called from serializer. Should drop reference count for CLTS only.
1692  */
1693 /* ARGSUSED0 */
1694 static void
1695 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1696 {
1697 	ASSERT(tep->te_closing);
1698 	ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0));
1699 	ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1));
1700 
1701 	tep->te_state = -1;	/* Uninitialized */
1702 	if (IS_COTS(tep)) {
1703 		tl_co_unconnect(tep);
1704 	} else {
1705 		/* Connectionless specific cleanup */
1706 		TL_REMOVE_PEER(tep->te_lastep);
1707 		/*
1708 		 * Backenable anybody that is flow controlled waiting for
1709 		 * this endpoint.
1710 		 */
1711 		tl_cl_backenable(tep);
1712 		if (tep->te_flowq != NULL) {
1713 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1714 			tep->te_flowq = NULL;
1715 		}
1716 	}
1717 
1718 	tl_serializer_exit(tep);
1719 	if (IS_COTS(tep))
1720 		tl_closeok(tep);
1721 	else
1722 		tl_refrele(tep);
1723 }
1724 
1725 /*
1726  * STREAMS write-side put procedure.
1727  * Enter serializer for most of the processing.
1728  *
1729  * The T_CONN_REQ is processed outside of serializer.
1730  */
1731 static void
1732 tl_wput(queue_t *wq, mblk_t *mp)
1733 {
1734 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1735 	ssize_t			msz = MBLKL(mp);
1736 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1737 	tlproc_t		*tl_proc = NULL;
1738 
1739 	switch (DB_TYPE(mp)) {
1740 	case M_DATA:
1741 		/* Only valid for connection-oriented transports */
1742 		if (IS_CLTS(tep)) {
1743 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1744 			    SL_TRACE|SL_ERROR,
1745 			    "tl_wput:M_DATA invalid for ticlts driver"));
1746 			tl_merror(wq, mp, EPROTO);
1747 			return;
1748 		}
1749 		tl_proc = tl_wput_data_ser;
1750 		break;
1751 
1752 	case M_IOCTL:
1753 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1754 		case TL_IOC_CREDOPT:
1755 			/* FALLTHROUGH */
1756 		case TL_IOC_UCREDOPT:
1757 			/*
1758 			 * Serialize endpoint state change.
1759 			 */
1760 			tl_proc = tl_do_ioctl_ser;
1761 			break;
1762 
1763 		default:
1764 			miocnak(wq, mp, 0, EINVAL);
1765 			return;
1766 		}
1767 		break;
1768 
1769 	case M_FLUSH:
1770 		/*
1771 		 * do canonical M_FLUSH processing
1772 		 */
1773 		if (*mp->b_rptr & FLUSHW) {
1774 			flushq(wq, FLUSHALL);
1775 			*mp->b_rptr &= ~FLUSHW;
1776 		}
1777 		if (*mp->b_rptr & FLUSHR) {
1778 			flushq(RD(wq), FLUSHALL);
1779 			qreply(wq, mp);
1780 		} else {
1781 			freemsg(mp);
1782 		}
1783 		return;
1784 
1785 	case M_PROTO:
1786 		if (msz < sizeof (prim->type)) {
1787 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1788 			    SL_TRACE|SL_ERROR,
1789 			    "tl_wput:M_PROTO data too short"));
1790 			tl_merror(wq, mp, EPROTO);
1791 			return;
1792 		}
1793 		switch (prim->type) {
1794 		case T_OPTMGMT_REQ:
1795 		case T_SVR4_OPTMGMT_REQ:
1796 			/*
1797 			 * Process TPI option management requests immediately
1798 			 * in put procedure regardless of in-order processing
1799 			 * of already queued messages.
1800 			 * (Note: This driver supports AF_UNIX socket
1801 			 * implementation.  Unless we implement this processing,
1802 			 * setsockopt() on socket endpoint will block on flow
1803 			 * controlled endpoints which it should not. That is
1804 			 * required for successful execution of VSU socket tests
1805 			 * and is consistent with BSD socket behavior).
1806 			 */
1807 			tl_optmgmt(wq, mp);
1808 			return;
1809 		case O_T_BIND_REQ:
1810 		case T_BIND_REQ:
1811 			tl_proc = tl_bind_ser;
1812 			break;
1813 		case T_CONN_REQ:
1814 			if (IS_CLTS(tep)) {
1815 				tl_merror(wq, mp, EPROTO);
1816 				return;
1817 			}
1818 			tl_conn_req(wq, mp);
1819 			return;
1820 		case T_DATA_REQ:
1821 		case T_OPTDATA_REQ:
1822 		case T_EXDATA_REQ:
1823 		case T_ORDREL_REQ:
1824 			tl_proc = tl_putq_ser;
1825 			break;
1826 		case T_UNITDATA_REQ:
1827 			if (IS_COTS(tep) ||
1828 			    (msz < sizeof (struct T_unitdata_req))) {
1829 				tl_merror(wq, mp, EPROTO);
1830 				return;
1831 			}
1832 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1833 				tl_proc = tl_unitdata_ser;
1834 			} else {
1835 				tl_proc = tl_putq_ser;
1836 			}
1837 			break;
1838 		default:
1839 			/*
1840 			 * process in service procedure if message already
1841 			 * queued (maintain in-order processing)
1842 			 */
1843 			if (wq->q_first != NULL) {
1844 				tl_proc = tl_putq_ser;
1845 			} else {
1846 				tl_proc = tl_wput_ser;
1847 			}
1848 			break;
1849 		}
1850 		break;
1851 
1852 	case M_PCPROTO:
1853 		/*
1854 		 * Check that the message has enough data to figure out TPI
1855 		 * primitive.
1856 		 */
1857 		if (msz < sizeof (prim->type)) {
1858 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1859 			    SL_TRACE|SL_ERROR,
1860 			    "tl_wput:M_PCROTO data too short"));
1861 			tl_merror(wq, mp, EPROTO);
1862 			return;
1863 		}
1864 		switch (prim->type) {
1865 		case T_CAPABILITY_REQ:
1866 			tl_capability_req(mp, tep);
1867 			return;
1868 		case T_INFO_REQ:
1869 			tl_proc = tl_info_req_ser;
1870 			break;
1871 		default:
1872 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1873 			    SL_TRACE|SL_ERROR,
1874 			    "tl_wput:unknown TPI msg primitive"));
1875 			tl_merror(wq, mp, EPROTO);
1876 			return;
1877 		}
1878 		break;
1879 	default:
1880 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1881 		    "tl_wput:default:unexpected Streams message"));
1882 		freemsg(mp);
1883 		return;
1884 	}
1885 
1886 	/*
1887 	 * Continue processing via serializer.
1888 	 */
1889 	ASSERT(tl_proc != NULL);
1890 	tl_refhold(tep);
1891 	tl_serializer_enter(tep, tl_proc, mp);
1892 }
1893 
1894 /*
1895  * Place message on the queue while preserving order.
1896  */
1897 static void
1898 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1899 {
1900 	if (tep->te_closing) {
1901 		tl_wput_ser(mp, tep);
1902 	} else {
1903 		TL_PUTQ(tep, mp);
1904 		tl_serializer_exit(tep);
1905 		tl_refrele(tep);
1906 	}
1907 
1908 }
1909 
1910 static void
1911 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1912 {
1913 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1914 
1915 	switch (DB_TYPE(mp)) {
1916 	case M_DATA:
1917 		tl_data(mp, tep);
1918 		break;
1919 	case M_PROTO:
1920 		tl_do_proto(mp, tep);
1921 		break;
1922 	default:
1923 		freemsg(mp);
1924 		break;
1925 	}
1926 }
1927 
1928 /*
1929  * Write side put procedure called from serializer.
1930  */
1931 static void
1932 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1933 {
1934 	tl_wput_common_ser(mp, tep);
1935 	tl_serializer_exit(tep);
1936 	tl_refrele(tep);
1937 }
1938 
1939 /*
1940  * M_DATA processing. Called from serializer.
1941  */
1942 static void
1943 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1944 {
1945 	tl_endpt_t	*peer_tep = tep->te_conp;
1946 	queue_t		*peer_rq;
1947 
1948 	ASSERT(DB_TYPE(mp) == M_DATA);
1949 	ASSERT(IS_COTS(tep));
1950 
1951 	ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer));
1952 
1953 	/*
1954 	 * fastpath for data. Ignore flow control if tep is closing.
1955 	 */
1956 	if ((peer_tep != NULL) &&
1957 	    !peer_tep->te_closing &&
1958 	    ((tep->te_state == TS_DATA_XFER) ||
1959 	    (tep->te_state == TS_WREQ_ORDREL)) &&
1960 	    (tep->te_wq != NULL) &&
1961 	    (tep->te_wq->q_first == NULL) &&
1962 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1963 	    (peer_tep->te_state == TS_WREQ_ORDREL))	&&
1964 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1965 	    (canputnext(peer_rq) || tep->te_closing)) {
1966 		putnext(peer_rq, mp);
1967 	} else if (tep->te_closing) {
1968 		/*
1969 		 * It is possible that by the time we got here tep started to
1970 		 * close. If the write queue is not empty, and the state is
1971 		 * TS_DATA_XFER the data should be delivered in order, so we
1972 		 * call putq() instead of freeing the data.
1973 		 */
1974 		if ((tep->te_wq != NULL) &&
1975 		    ((tep->te_state == TS_DATA_XFER) ||
1976 		    (tep->te_state == TS_WREQ_ORDREL))) {
1977 			TL_PUTQ(tep, mp);
1978 		} else {
1979 			freemsg(mp);
1980 		}
1981 	} else {
1982 		TL_PUTQ(tep, mp);
1983 	}
1984 
1985 	tl_serializer_exit(tep);
1986 	tl_refrele(tep);
1987 }
1988 
1989 /*
1990  * Write side service routine.
1991  *
1992  * All actual processing happens within serializer which is entered
1993  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1994  * messages that need processing may have arrived, so tl_wsrv repeats until
1995  * queue is empty or te_nowsrv is set.
1996  */
1997 static void
1998 tl_wsrv(queue_t *wq)
1999 {
2000 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2001 
2002 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
2003 		mutex_enter(&tep->te_srv_lock);
2004 		ASSERT(tep->te_wsrv_active == B_FALSE);
2005 		tep->te_wsrv_active = B_TRUE;
2006 		mutex_exit(&tep->te_srv_lock);
2007 
2008 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2009 
2010 		/*
2011 		 * Wait for serializer job to complete.
2012 		 */
2013 		mutex_enter(&tep->te_srv_lock);
2014 		while (tep->te_wsrv_active) {
2015 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2016 		}
2017 		cv_signal(&tep->te_srv_cv);
2018 		mutex_exit(&tep->te_srv_lock);
2019 	}
2020 }
2021 
2022 /*
2023  * Serialized write side processing of the STREAMS queue.
2024  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2025  * is NULL.
2026  */
2027 static void
2028 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2029 {
2030 	mblk_t *mp;
2031 	queue_t *wq = tep->te_wq;
2032 
2033 	ASSERT(wq != NULL);
2034 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2035 		tl_wput_common_ser(mp, tep);
2036 	}
2037 
2038 	/*
2039 	 * Wakeup service routine unless called from close.
2040 	 * If ser_mp is specified, the caller is tl_wsrv().
2041 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2042 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2043 	 * be no matching tl_serializer_exit() in this case.
2044 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2045 	 * waiting on te_srv_cv.
2046 	 */
2047 	if (ser_mp != NULL) {
2048 		/*
2049 		 * We are called from tl_wsrv.
2050 		 */
2051 		mutex_enter(&tep->te_srv_lock);
2052 		ASSERT(tep->te_wsrv_active);
2053 		tep->te_wsrv_active = B_FALSE;
2054 		cv_signal(&tep->te_srv_cv);
2055 		mutex_exit(&tep->te_srv_lock);
2056 		tl_serializer_exit(tep);
2057 	}
2058 }
2059 
2060 /*
2061  * Called when the stream is backenabled. Enter serializer and qenable everyone
2062  * flow controlled by tep.
2063  *
2064  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2065  * is possible that two instances of tl_rsrv will be running reusing the same
2066  * rsrv mblk.
2067  */
2068 static void
2069 tl_rsrv(queue_t *rq)
2070 {
2071 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2072 
2073 	ASSERT(rq->q_first == NULL);
2074 	ASSERT(tep->te_rsrv_active == 0);
2075 
2076 	tep->te_rsrv_active = B_TRUE;
2077 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2078 	/*
2079 	 * Wait for serializer job to complete.
2080 	 */
2081 	mutex_enter(&tep->te_srv_lock);
2082 	while (tep->te_rsrv_active) {
2083 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2084 	}
2085 	cv_signal(&tep->te_srv_cv);
2086 	mutex_exit(&tep->te_srv_lock);
2087 }
2088 
2089 /* ARGSUSED */
2090 static void
2091 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2092 {
2093 	tl_endpt_t *peer_tep;
2094 
2095 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2096 		tl_cl_backenable(tep);
2097 	} else if (
2098 	    IS_COTS(tep) &&
2099 	    ((peer_tep = tep->te_conp) != NULL) &&
2100 	    !peer_tep->te_closing &&
2101 	    ((tep->te_state == TS_DATA_XFER) ||
2102 	    (tep->te_state == TS_WIND_ORDREL)||
2103 	    (tep->te_state == TS_WREQ_ORDREL))) {
2104 		TL_QENABLE(peer_tep);
2105 	}
2106 
2107 	/*
2108 	 * Wakeup read side service routine.
2109 	 */
2110 	mutex_enter(&tep->te_srv_lock);
2111 	ASSERT(tep->te_rsrv_active);
2112 	tep->te_rsrv_active = B_FALSE;
2113 	cv_signal(&tep->te_srv_cv);
2114 	mutex_exit(&tep->te_srv_lock);
2115 	tl_serializer_exit(tep);
2116 }
2117 
2118 /*
2119  * process M_PROTO messages. Always called from serializer.
2120  */
2121 static void
2122 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2123 {
2124 	ssize_t			msz = MBLKL(mp);
2125 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2126 
2127 	/* Message size was validated by tl_wput(). */
2128 	ASSERT(msz >= sizeof (prim->type));
2129 
2130 	switch (prim->type) {
2131 	case T_UNBIND_REQ:
2132 		tl_unbind(mp, tep);
2133 		break;
2134 
2135 	case T_ADDR_REQ:
2136 		tl_addr_req(mp, tep);
2137 		break;
2138 
2139 	case O_T_CONN_RES:
2140 	case T_CONN_RES:
2141 		if (IS_CLTS(tep)) {
2142 			tl_merror(tep->te_wq, mp, EPROTO);
2143 			break;
2144 		}
2145 		tl_conn_res(mp, tep);
2146 		break;
2147 
2148 	case T_DISCON_REQ:
2149 		if (IS_CLTS(tep)) {
2150 			tl_merror(tep->te_wq, mp, EPROTO);
2151 			break;
2152 		}
2153 		tl_discon_req(mp, tep);
2154 		break;
2155 
2156 	case T_DATA_REQ:
2157 		if (IS_CLTS(tep)) {
2158 			tl_merror(tep->te_wq, mp, EPROTO);
2159 			break;
2160 		}
2161 		tl_data(mp, tep);
2162 		break;
2163 
2164 	case T_OPTDATA_REQ:
2165 		if (IS_CLTS(tep)) {
2166 			tl_merror(tep->te_wq, mp, EPROTO);
2167 			break;
2168 		}
2169 		tl_data(mp, tep);
2170 		break;
2171 
2172 	case T_EXDATA_REQ:
2173 		if (IS_CLTS(tep)) {
2174 			tl_merror(tep->te_wq, mp, EPROTO);
2175 			break;
2176 		}
2177 		tl_exdata(mp, tep);
2178 		break;
2179 
2180 	case T_ORDREL_REQ:
2181 		if (! IS_COTSORD(tep)) {
2182 			tl_merror(tep->te_wq, mp, EPROTO);
2183 			break;
2184 		}
2185 		tl_ordrel(mp, tep);
2186 		break;
2187 
2188 	case T_UNITDATA_REQ:
2189 		if (IS_COTS(tep)) {
2190 			tl_merror(tep->te_wq, mp, EPROTO);
2191 			break;
2192 		}
2193 		tl_unitdata(mp, tep);
2194 		break;
2195 
2196 	default:
2197 		tl_merror(tep->te_wq, mp, EPROTO);
2198 		break;
2199 	}
2200 }
2201 
2202 /*
2203  * Process ioctl from serializer.
2204  * This is a wrapper around tl_do_ioctl().
2205  */
2206 static void
2207 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2208 {
2209 	if (! tep->te_closing)
2210 		tl_do_ioctl(mp, tep);
2211 	else
2212 		freemsg(mp);
2213 
2214 	tl_serializer_exit(tep);
2215 	tl_refrele(tep);
2216 }
2217 
2218 static void
2219 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2220 {
2221 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2222 	int cmd = iocbp->ioc_cmd;
2223 	queue_t *wq = tep->te_wq;
2224 	int error;
2225 	int thisopt, otheropt;
2226 
2227 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2228 
2229 	switch (cmd) {
2230 	case TL_IOC_CREDOPT:
2231 		if (cmd == TL_IOC_CREDOPT) {
2232 			thisopt = TL_SETCRED;
2233 			otheropt = TL_SETUCRED;
2234 		} else {
2235 			/* FALLTHROUGH */
2236 	case TL_IOC_UCREDOPT:
2237 			thisopt = TL_SETUCRED;
2238 			otheropt = TL_SETCRED;
2239 		}
2240 		/*
2241 		 * The credentials passing does not apply to sockets.
2242 		 * Only one of the cred options can be set at a given time.
2243 		 */
2244 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2245 			miocnak(wq, mp, 0, EINVAL);
2246 			return;
2247 		}
2248 
2249 		/*
2250 		 * Turn on generation of credential options for
2251 		 * T_conn_req, T_conn_con, T_unidata_ind.
2252 		 */
2253 		error = miocpullup(mp, sizeof (uint32_t));
2254 		if (error != 0) {
2255 			miocnak(wq, mp, 0, error);
2256 			return;
2257 		}
2258 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2259 			miocnak(wq, mp, 0, EINVAL);
2260 			return;
2261 		}
2262 
2263 		if (*(uint32_t *)mp->b_cont->b_rptr)
2264 			tep->te_flag |= thisopt;
2265 		else
2266 			tep->te_flag &= ~thisopt;
2267 
2268 		miocack(wq, mp, 0, 0);
2269 		break;
2270 
2271 	default:
2272 		/* Should not be here */
2273 		miocnak(wq, mp, 0, EINVAL);
2274 		break;
2275 	}
2276 }
2277 
2278 
2279 /*
2280  * send T_ERROR_ACK
2281  * Note: assumes enough memory or caller passed big enough mp
2282  *	- no recovery from allocb failures
2283  */
2284 
2285 static void
2286 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2287     t_scalar_t unix_err, t_scalar_t type)
2288 {
2289 	struct T_error_ack *err_ack;
2290 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2291 	    M_PCPROTO, T_ERROR_ACK);
2292 
2293 	if (ackmp == NULL) {
2294 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2295 		    "tl_error_ack:out of mblk memory"));
2296 		tl_merror(wq, NULL, ENOSR);
2297 		return;
2298 	}
2299 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2300 	err_ack->ERROR_prim = type;
2301 	err_ack->TLI_error = tli_err;
2302 	err_ack->UNIX_error = unix_err;
2303 
2304 	/*
2305 	 * send error ack message
2306 	 */
2307 	qreply(wq, ackmp);
2308 }
2309 
2310 
2311 
2312 /*
2313  * send T_OK_ACK
2314  * Note: assumes enough memory or caller passed big enough mp
2315  *	- no recovery from allocb failures
2316  */
2317 static void
2318 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2319 {
2320 	struct T_ok_ack *ok_ack;
2321 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2322 	    M_PCPROTO, T_OK_ACK);
2323 
2324 	if (ackmp == NULL) {
2325 		tl_merror(wq, NULL, ENOMEM);
2326 		return;
2327 	}
2328 
2329 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2330 	ok_ack->CORRECT_prim = type;
2331 
2332 	(void) qreply(wq, ackmp);
2333 }
2334 
2335 /*
2336  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2337  * This is a wrapper around tl_bind().
2338  */
2339 static void
2340 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2341 {
2342 	if (! tep->te_closing)
2343 		tl_bind(mp, tep);
2344 	else
2345 		freemsg(mp);
2346 
2347 	tl_serializer_exit(tep);
2348 	tl_refrele(tep);
2349 }
2350 
2351 /*
2352  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2353  * Assumes that the endpoint is in the unbound.
2354  */
2355 static void
2356 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2357 {
2358 	queue_t			*wq = tep->te_wq;
2359 	struct T_bind_ack	*b_ack;
2360 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2361 	mblk_t			*ackmp, *bamp;
2362 	soux_addr_t		ux_addr;
2363 	t_uscalar_t		qlen = 0;
2364 	t_scalar_t		alen, aoff;
2365 	tl_addr_t		addr_req;
2366 	void			*addr_startp;
2367 	ssize_t			msz = MBLKL(mp), basize;
2368 	t_scalar_t		tli_err = 0, unix_err = 0;
2369 	t_scalar_t		save_prim_type = bind->PRIM_type;
2370 	t_scalar_t		save_state = tep->te_state;
2371 
2372 	if (tep->te_state != TS_UNBND) {
2373 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2374 		    SL_TRACE|SL_ERROR,
2375 		    "tl_wput:bind_request:out of state, state=%d",
2376 		    tep->te_state));
2377 		tli_err = TOUTSTATE;
2378 		goto error;
2379 	}
2380 
2381 	if (msz < sizeof (struct T_bind_req)) {
2382 		tli_err = TSYSERR; unix_err = EINVAL;
2383 		goto error;
2384 	}
2385 
2386 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2387 
2388 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2389 	    (bind->PRIM_type == T_BIND_REQ));
2390 
2391 	alen = bind->ADDR_length;
2392 	aoff = bind->ADDR_offset;
2393 
2394 	/* negotiate max conn req pending */
2395 	if (IS_COTS(tep)) {
2396 		qlen = bind->CONIND_number;
2397 		if (qlen > tl_maxqlen)
2398 			qlen = tl_maxqlen;
2399 	}
2400 
2401 	/*
2402 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2403 	 * and bound again.
2404 	 */
2405 	if ((tep->te_hash_hndl == NULL) &&
2406 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2407 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2408 	    &tep->te_hash_hndl) != 0) {
2409 		tli_err = TSYSERR; unix_err = ENOSR;
2410 		goto error;
2411 	}
2412 
2413 	/*
2414 	 * Verify address correctness.
2415 	 */
2416 	if (IS_SOCKET(tep)) {
2417 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2418 
2419 		if ((alen != TL_SOUX_ADDRLEN) ||
2420 		    (aoff < 0) ||
2421 		    (aoff + alen > msz)) {
2422 			(void) (STRLOG(TL_ID, tep->te_minor,
2423 			    1, SL_TRACE|SL_ERROR,
2424 			    "tl_bind: invalid socket addr"));
2425 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2426 			tli_err = TSYSERR; unix_err = EINVAL;
2427 			goto error;
2428 		}
2429 		/* Copy address from message to local buffer. */
2430 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2431 		/*
2432 		 * Check that we got correct address from sockets
2433 		 */
2434 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2435 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2436 			(void) (STRLOG(TL_ID, tep->te_minor,
2437 			    1, SL_TRACE|SL_ERROR,
2438 			    "tl_bind: invalid socket magic"));
2439 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2440 			tli_err = TSYSERR; unix_err = EINVAL;
2441 			goto error;
2442 		}
2443 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2444 		    (ux_addr.soua_vp != NULL)) {
2445 			(void) (STRLOG(TL_ID, tep->te_minor,
2446 			    1, SL_TRACE|SL_ERROR,
2447 			    "tl_bind: implicit addr non-empty"));
2448 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2449 			tli_err = TSYSERR; unix_err = EINVAL;
2450 			goto error;
2451 		}
2452 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2453 		    (ux_addr.soua_vp == NULL)) {
2454 			(void) (STRLOG(TL_ID, tep->te_minor,
2455 			    1, SL_TRACE|SL_ERROR,
2456 			    "tl_bind: explicit addr empty"));
2457 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2458 			tli_err = TSYSERR; unix_err = EINVAL;
2459 			goto error;
2460 		}
2461 	} else {
2462 		if ((alen > 0) && ((aoff < 0) ||
2463 		    ((ssize_t)(aoff + alen) > msz) ||
2464 		    ((aoff + alen) < 0))) {
2465 			(void) (STRLOG(TL_ID, tep->te_minor,
2466 			    1, SL_TRACE|SL_ERROR,
2467 			    "tl_bind: invalid message"));
2468 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2469 			tli_err = TSYSERR; unix_err = EINVAL;
2470 			goto error;
2471 		}
2472 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2473 			(void) (STRLOG(TL_ID, tep->te_minor,
2474 			    1, SL_TRACE|SL_ERROR,
2475 			    "tl_bind: bad addr in  message"));
2476 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2477 			tli_err = TBADADDR;
2478 			goto error;
2479 		}
2480 #ifdef DEBUG
2481 		/*
2482 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2483 		 * if (! assertion)
2484 		 *	log warning;
2485 		 */
2486 		if (! ((alen == 0 && aoff == 0) ||
2487 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2488 			(void) (STRLOG(TL_ID, tep->te_minor,
2489 				    3, SL_TRACE|SL_ERROR,
2490 				    "tl_bind: addr overlaps TPI message"));
2491 		}
2492 #endif
2493 	}
2494 
2495 	/*
2496 	 * Bind the address provided or allocate one if requested.
2497 	 * Allow rebinds with a new qlen value.
2498 	 */
2499 	if (IS_SOCKET(tep)) {
2500 		/*
2501 		 * For anonymous requests the te_ap is already set up properly
2502 		 * so use minor number as an address.
2503 		 * For explicit requests need to check whether the address is
2504 		 * already in use.
2505 		 */
2506 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2507 			int rc;
2508 
2509 			if (tep->te_flag & TL_ADDRHASHED) {
2510 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2511 				if (tep->te_vp == ux_addr.soua_vp)
2512 					goto skip_addr_bind;
2513 				else /* Rebind to a new address. */
2514 					tl_addr_unbind(tep);
2515 			}
2516 			/*
2517 			 * Insert address in the hash if it is not already
2518 			 * there.  Since we use preallocated handle, the insert
2519 			 * can fail only if the key is already present.
2520 			 */
2521 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2522 			    (mod_hash_key_t)ux_addr.soua_vp,
2523 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2524 
2525 			if (rc != 0) {
2526 				ASSERT(rc == MH_ERR_DUPLICATE);
2527 				/*
2528 				 * Violate O_T_BIND_REQ semantics and fail with
2529 				 * TADDRBUSY - sockets will not use any address
2530 				 * other than supplied one for explicit binds.
2531 				 */
2532 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2533 				    SL_TRACE|SL_ERROR,
2534 				    "tl_bind:requested addr %p is busy",
2535 				    ux_addr.soua_vp));
2536 				tli_err = TADDRBUSY; unix_err = 0;
2537 				goto error;
2538 			}
2539 			tep->te_uxaddr = ux_addr;
2540 			tep->te_flag |= TL_ADDRHASHED;
2541 			tep->te_hash_hndl = NULL;
2542 		}
2543 	} else if (alen == 0) {
2544 		/*
2545 		 * assign any free address
2546 		 */
2547 		if (! tl_get_any_addr(tep, NULL)) {
2548 			(void) (STRLOG(TL_ID, tep->te_minor,
2549 			    1, SL_TRACE|SL_ERROR,
2550 			    "tl_bind:failed to get buffer for any "
2551 			    "address"));
2552 			tli_err = TSYSERR; unix_err = ENOSR;
2553 			goto error;
2554 		}
2555 	} else {
2556 		addr_req.ta_alen = alen;
2557 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2558 		addr_req.ta_zoneid = tep->te_zoneid;
2559 
2560 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2561 		if (tep->te_abuf == NULL) {
2562 			tli_err = TSYSERR; unix_err = ENOSR;
2563 			goto error;
2564 		}
2565 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2566 		tep->te_alen = alen;
2567 
2568 		if (mod_hash_insert_reserve(tep->te_addrhash,
2569 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2570 		    tep->te_hash_hndl) != 0) {
2571 			if (save_prim_type == T_BIND_REQ) {
2572 				/*
2573 				 * The bind semantics for this primitive
2574 				 * require a failure if the exact address
2575 				 * requested is busy
2576 				 */
2577 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2578 				    SL_TRACE|SL_ERROR,
2579 				    "tl_bind:requested addr is busy"));
2580 				tli_err = TADDRBUSY; unix_err = 0;
2581 				goto error;
2582 			}
2583 
2584 			/*
2585 			 * O_T_BIND_REQ semantics say if address if requested
2586 			 * address is busy, bind to any available free address
2587 			 */
2588 			if (! tl_get_any_addr(tep, &addr_req)) {
2589 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2590 				    SL_TRACE|SL_ERROR,
2591 				    "tl_bind:unable to get any addr buf"));
2592 				tli_err = TSYSERR; unix_err = ENOMEM;
2593 				goto error;
2594 			}
2595 		} else {
2596 			tep->te_flag |= TL_ADDRHASHED;
2597 			tep->te_hash_hndl = NULL;
2598 		}
2599 	}
2600 
2601 	ASSERT(tep->te_alen >= 0);
2602 
2603 skip_addr_bind:
2604 	/*
2605 	 * prepare T_BIND_ACK TPI message
2606 	 */
2607 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2608 	bamp = reallocb(mp, basize, 0);
2609 	if (bamp == NULL) {
2610 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2611 		    "tl_wput:tl_bind: allocb failed"));
2612 		/*
2613 		 * roll back state changes
2614 		 */
2615 		tl_addr_unbind(tep);
2616 		tep->te_state = TS_UNBND;
2617 		tl_memrecover(wq, mp, basize);
2618 		return;
2619 	}
2620 
2621 	DB_TYPE(bamp) = M_PCPROTO;
2622 	bamp->b_wptr = bamp->b_rptr + basize;
2623 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2624 	b_ack->PRIM_type = T_BIND_ACK;
2625 	b_ack->CONIND_number = qlen;
2626 	b_ack->ADDR_length = tep->te_alen;
2627 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2628 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2629 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2630 
2631 	if (IS_COTS(tep)) {
2632 		tep->te_qlen = qlen;
2633 		if (qlen > 0)
2634 			tep->te_flag |= TL_LISTENER;
2635 	}
2636 
2637 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2638 	/*
2639 	 * send T_BIND_ACK message
2640 	 */
2641 	(void) qreply(wq, bamp);
2642 	return;
2643 
2644 error:
2645 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2646 	if (ackmp == NULL) {
2647 		/*
2648 		 * roll back state changes
2649 		 */
2650 		tep->te_state = save_state;
2651 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2652 		return;
2653 	}
2654 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2655 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2656 }
2657 
2658 /*
2659  * Process T_UNBIND_REQ.
2660  * Called from serializer.
2661  */
2662 static void
2663 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2664 {
2665 	queue_t *wq;
2666 	mblk_t *ackmp;
2667 
2668 	if (tep->te_closing) {
2669 		freemsg(mp);
2670 		return;
2671 	}
2672 
2673 	wq = tep->te_wq;
2674 
2675 	/*
2676 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2677 	 * ==> allocate for T_ERROR_ACK (known max)
2678 	 */
2679 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2680 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2681 		return;
2682 	}
2683 	/*
2684 	 * memory resources committed
2685 	 * Note: no message validation. T_UNBIND_REQ message is
2686 	 * same size as PRIM_type field so already verified earlier.
2687 	 */
2688 
2689 	/*
2690 	 * validate state
2691 	 */
2692 	if (tep->te_state != TS_IDLE) {
2693 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2694 		    SL_TRACE|SL_ERROR,
2695 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2696 		    tep->te_state));
2697 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2698 		return;
2699 	}
2700 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2701 
2702 	/*
2703 	 * TPI says on T_UNBIND_REQ:
2704 	 *    send up a M_FLUSH to flush both
2705 	 *    read and write queues
2706 	 */
2707 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2708 
2709 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2710 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2711 
2712 		/*
2713 		 * Sockets use bind with qlen==0 followed by bind() to
2714 		 * the same address with qlen > 0 for listeners.
2715 		 * We allow rebind with a new qlen value.
2716 		 */
2717 		tl_addr_unbind(tep);
2718 	}
2719 
2720 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2721 	/*
2722 	 * send  T_OK_ACK
2723 	 */
2724 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2725 }
2726 
2727 
2728 /*
2729  * Option management code from drv/ip is used here
2730  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2731  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2732  *	However, that is what we want as that option is 'unorthodox'
2733  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2734  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2735  * Note2: use of optcom_req means this routine is an exception to
2736  *	 recovery from allocb() failures.
2737  */
2738 
2739 static void
2740 tl_optmgmt(queue_t *wq, mblk_t *mp)
2741 {
2742 	tl_endpt_t *tep;
2743 	mblk_t *ackmp;
2744 	union T_primitives *prim;
2745 
2746 	tep = (tl_endpt_t *)wq->q_ptr;
2747 	prim = (union T_primitives *)mp->b_rptr;
2748 
2749 	/*  all states OK for AF_UNIX options ? */
2750 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2751 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2752 		/*
2753 		 * Broken TLI semantics that options can only be managed
2754 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2755 		 * tests this TLI (mis)feature using this device driver.
2756 		 */
2757 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2758 		    SL_TRACE|SL_ERROR,
2759 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2760 		    tep->te_state));
2761 		/*
2762 		 * preallocate memory for T_ERROR_ACK
2763 		 */
2764 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2765 		if (! ackmp) {
2766 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2767 			return;
2768 		}
2769 
2770 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2771 		freemsg(mp);
2772 		return;
2773 	}
2774 
2775 	/*
2776 	 * call common option management routine from drv/ip
2777 	 */
2778 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2779 		(void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj,
2780 		    B_FALSE);
2781 	} else {
2782 		ASSERT(prim->type == T_OPTMGMT_REQ);
2783 		(void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj,
2784 		    B_FALSE);
2785 	}
2786 }
2787 
2788 /*
2789  * Handle T_conn_req - the driver part of accept().
2790  * If TL_SET[U]CRED generate the credentials options.
2791  * If this is a socket pass through options unmodified.
2792  * For sockets generate the T_CONN_CON here instead of
2793  * waiting for the T_CONN_RES.
2794  */
2795 static void
2796 tl_conn_req(queue_t *wq, mblk_t *mp)
2797 {
2798 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2799 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2800 	ssize_t			msz = MBLKL(mp);
2801 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2802 	tl_endpt_t		*peer_tep = NULL;
2803 	mblk_t			*ackmp;
2804 	mblk_t			*dimp;
2805 	struct T_discon_ind	*di;
2806 	soux_addr_t		ux_addr;
2807 	tl_addr_t		dst;
2808 
2809 	ASSERT(IS_COTS(tep));
2810 
2811 	if (tep->te_closing) {
2812 		freemsg(mp);
2813 		return;
2814 	}
2815 
2816 	/*
2817 	 * preallocate memory for:
2818 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2819 	 *	==> known max T_ERROR_ACK
2820 	 * 2. max of T_DISCON_IND and T_CONN_IND
2821 	 */
2822 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2823 	if (! ackmp) {
2824 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2825 		return;
2826 	}
2827 	/*
2828 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2829 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2830 	 */
2831 
2832 	if (tep->te_state != TS_IDLE) {
2833 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2834 		    SL_TRACE|SL_ERROR,
2835 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2836 		    tep->te_state));
2837 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2838 		freemsg(mp);
2839 		return;
2840 	}
2841 
2842 	/*
2843 	 * validate the message
2844 	 * Note: dereference fields in struct inside message only
2845 	 * after validating the message length.
2846 	 */
2847 	if (msz < sizeof (struct T_conn_req)) {
2848 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2849 		    "tl_conn_req:invalid message length"));
2850 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2851 		freemsg(mp);
2852 		return;
2853 	}
2854 	alen = creq->DEST_length;
2855 	aoff = creq->DEST_offset;
2856 	olen = creq->OPT_length;
2857 	ooff = creq->OPT_offset;
2858 	if (olen == 0)
2859 		ooff = 0;
2860 
2861 	if (IS_SOCKET(tep)) {
2862 		if ((alen != TL_SOUX_ADDRLEN) ||
2863 		    (aoff < 0) ||
2864 		    (aoff + alen > msz) ||
2865 		    (alen > msz - sizeof (struct T_conn_req))) {
2866 			(void) (STRLOG(TL_ID, tep->te_minor,
2867 				    1, SL_TRACE|SL_ERROR,
2868 				    "tl_conn_req: invalid socket addr"));
2869 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2870 			freemsg(mp);
2871 			return;
2872 		}
2873 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2874 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2875 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2876 			(void) (STRLOG(TL_ID, tep->te_minor,
2877 			    1, SL_TRACE|SL_ERROR,
2878 			    "tl_conn_req: invalid socket magic"));
2879 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2880 			freemsg(mp);
2881 			return;
2882 		}
2883 	} else {
2884 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2885 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2886 		    ooff + olen < 0)) ||
2887 		    olen < 0 || ooff < 0) {
2888 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2889 			    SL_TRACE|SL_ERROR,
2890 			    "tl_conn_req:invalid message"));
2891 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2892 			freemsg(mp);
2893 			return;
2894 		}
2895 
2896 		if (alen <= 0 || aoff < 0 ||
2897 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2898 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2899 				    SL_TRACE|SL_ERROR,
2900 				    "tl_conn_req:bad addr in message, "
2901 				    "alen=%d, msz=%ld",
2902 				    alen, msz));
2903 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2904 			freemsg(mp);
2905 			return;
2906 		}
2907 #ifdef DEBUG
2908 		/*
2909 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2910 		 * if (! assertion)
2911 		 *	log warning;
2912 		 */
2913 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2914 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2915 			    SL_TRACE|SL_ERROR,
2916 			    "tl_conn_req: addr overlaps TPI message"));
2917 		}
2918 #endif
2919 		if (olen) {
2920 			/*
2921 			 * no opts in connect req
2922 			 * supported in this provider except for sockets.
2923 			 */
2924 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2925 			    SL_TRACE|SL_ERROR,
2926 			    "tl_conn_req:options not supported "
2927 			    "in message"));
2928 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2929 			freemsg(mp);
2930 			return;
2931 		}
2932 	}
2933 
2934 	/*
2935 	 * Prevent tep from closing on us.
2936 	 */
2937 	if (! tl_noclose(tep)) {
2938 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2939 		    "tl_conn_req:endpoint is closing"));
2940 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2941 		freemsg(mp);
2942 		return;
2943 	}
2944 
2945 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2946 	/*
2947 	 * get endpoint to connect to
2948 	 * check that peer with DEST addr is bound to addr
2949 	 * and has CONIND_number > 0
2950 	 */
2951 	dst.ta_alen = alen;
2952 	dst.ta_abuf = mp->b_rptr + aoff;
2953 	dst.ta_zoneid = tep->te_zoneid;
2954 
2955 	/*
2956 	 * Verify if remote addr is in use
2957 	 */
2958 	peer_tep = (IS_SOCKET(tep) ?
2959 	    tl_sock_find_peer(tep, &ux_addr) :
2960 	    tl_find_peer(tep, &dst));
2961 
2962 	if (peer_tep == NULL) {
2963 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2964 		    "tl_conn_req:no one at connect address"));
2965 		err = ECONNREFUSED;
2966 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2967 		/*
2968 		 * validate that number of incoming connection is
2969 		 * not to capacity on destination endpoint
2970 		 */
2971 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2972 		    "tl_conn_req: qlen overflow connection refused"));
2973 			err = ECONNREFUSED;
2974 	}
2975 
2976 	/*
2977 	 * Send T_DISCON_IND in case of error
2978 	 */
2979 	if (err != 0) {
2980 		if (peer_tep != NULL)
2981 			tl_refrele(peer_tep);
2982 		/* We are still expected to send T_OK_ACK */
2983 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2984 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2985 		tl_closeok(tep);
2986 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2987 		    M_PROTO, T_DISCON_IND);
2988 		if (dimp == NULL) {
2989 			tl_merror(wq, NULL, ENOSR);
2990 			return;
2991 		}
2992 		di = (struct T_discon_ind *)dimp->b_rptr;
2993 		di->DISCON_reason = err;
2994 		di->SEQ_number = BADSEQNUM;
2995 
2996 		tep->te_state = TS_IDLE;
2997 		/*
2998 		 * send T_DISCON_IND message
2999 		 */
3000 		putnext(tep->te_rq, dimp);
3001 		return;
3002 	}
3003 
3004 	ASSERT(IS_COTS(peer_tep));
3005 
3006 	/*
3007 	 * Found the listener. At this point processing will continue on
3008 	 * listener serializer. Close of the endpoint should be blocked while we
3009 	 * switch serializers.
3010 	 */
3011 	tl_serializer_refhold(peer_tep->te_ser);
3012 	tl_serializer_refrele(tep->te_ser);
3013 	tep->te_ser = peer_tep->te_ser;
3014 	ASSERT(tep->te_oconp == NULL);
3015 	tep->te_oconp = peer_tep;
3016 
3017 	/*
3018 	 * It is safe to close now. Close may continue on listener serializer.
3019 	 */
3020 	tl_closeok(tep);
3021 
3022 	/*
3023 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3024 	 * data, so we link mp to ackmp.
3025 	 */
3026 	ackmp->b_cont = mp;
3027 	mp = ackmp;
3028 
3029 	tl_refhold(tep);
3030 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3031 }
3032 
3033 /*
3034  * Finish T_CONN_REQ processing on listener serializer.
3035  */
3036 static void
3037 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3038 {
3039 	queue_t		*wq;
3040 	tl_endpt_t	*peer_tep = tep->te_oconp;
3041 	mblk_t		*confmp, *cimp, *indmp;
3042 	void		*opts = NULL;
3043 	mblk_t		*ackmp = mp;
3044 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3045 	struct T_conn_ind	*ci;
3046 	tl_icon_t	*tip;
3047 	void		*addr_startp;
3048 	t_scalar_t	olen = creq->OPT_length;
3049 	t_scalar_t	ooff = creq->OPT_offset;
3050 	size_t 		ci_msz;
3051 	size_t		size;
3052 
3053 	if (tep->te_closing) {
3054 		TL_UNCONNECT(tep->te_oconp);
3055 		tl_serializer_exit(tep);
3056 		tl_refrele(tep);
3057 		freemsg(mp);
3058 		return;
3059 	}
3060 
3061 	wq = tep->te_wq;
3062 	tep->te_flag |= TL_EAGER;
3063 
3064 	/*
3065 	 * Extract preallocated ackmp from mp.
3066 	 */
3067 	mp = mp->b_cont;
3068 	ackmp->b_cont = NULL;
3069 
3070 	if (olen == 0)
3071 		ooff = 0;
3072 
3073 	if (peer_tep->te_closing ||
3074 	    !((peer_tep->te_state == TS_IDLE) ||
3075 	    (peer_tep->te_state == TS_WRES_CIND))) {
3076 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3077 		    "tl_conn_req:peer in bad state (%d)",
3078 		    peer_tep->te_state));
3079 		TL_UNCONNECT(tep->te_oconp);
3080 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3081 		freemsg(ackmp);
3082 		tl_serializer_exit(tep);
3083 		tl_refrele(tep);
3084 		return;
3085 	}
3086 
3087 	/*
3088 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3089 	 */
3090 	/*
3091 	 * calculate length of T_CONN_IND message
3092 	 */
3093 	if (peer_tep->te_flag & TL_SETCRED) {
3094 		ooff = 0;
3095 		olen = (t_scalar_t) sizeof (struct opthdr) +
3096 		    OPTLEN(sizeof (tl_credopt_t));
3097 		/* 1 option only */
3098 	} else if (peer_tep->te_flag & TL_SETUCRED) {
3099 		ooff = 0;
3100 		olen = (t_scalar_t)sizeof (struct opthdr) +
3101 		    OPTLEN(ucredsize);
3102 		/* 1 option only */
3103 	}
3104 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3105 	ci_msz = T_ALIGN(ci_msz) + olen;
3106 	size = max(ci_msz, sizeof (struct T_discon_ind));
3107 
3108 	/*
3109 	 * Save options from mp - we'll need them for T_CONN_IND.
3110 	 */
3111 	if (ooff != 0) {
3112 		opts = kmem_alloc(olen, KM_NOSLEEP);
3113 		if (opts == NULL) {
3114 			/*
3115 			 * roll back state changes
3116 			 */
3117 			tep->te_state = TS_IDLE;
3118 			tl_memrecover(wq, mp, size);
3119 			freemsg(ackmp);
3120 			TL_UNCONNECT(tep->te_oconp);
3121 			tl_serializer_exit(tep);
3122 			tl_refrele(tep);
3123 			return;
3124 		}
3125 		/* Copy options to a temp buffer */
3126 		bcopy(mp->b_rptr + ooff, opts, olen);
3127 	}
3128 
3129 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3130 		/*
3131 		 * Generate a T_CONN_CON that has the identical address
3132 		 * (and options) as the T_CONN_REQ.
3133 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3134 		 * are isomorphic.
3135 		 */
3136 		confmp = copyb(mp);
3137 		if (! confmp) {
3138 			/*
3139 			 * roll back state changes
3140 			 */
3141 			tep->te_state = TS_IDLE;
3142 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3143 			freemsg(ackmp);
3144 			if (opts != NULL)
3145 				kmem_free(opts, olen);
3146 			TL_UNCONNECT(tep->te_oconp);
3147 			tl_serializer_exit(tep);
3148 			tl_refrele(tep);
3149 			return;
3150 		}
3151 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3152 		    T_CONN_CON;
3153 	} else {
3154 		confmp = NULL;
3155 	}
3156 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3157 		/*
3158 		 * roll back state changes
3159 		 */
3160 		tep->te_state = TS_IDLE;
3161 		tl_memrecover(wq, mp, size);
3162 		freemsg(ackmp);
3163 		if (opts != NULL)
3164 			kmem_free(opts, olen);
3165 		freemsg(confmp);
3166 		TL_UNCONNECT(tep->te_oconp);
3167 		tl_serializer_exit(tep);
3168 		tl_refrele(tep);
3169 		return;
3170 	}
3171 
3172 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3173 	if (tip == NULL) {
3174 		/*
3175 		 * roll back state changes
3176 		 */
3177 		tep->te_state = TS_IDLE;
3178 		tl_memrecover(wq, indmp, sizeof (*tip));
3179 		freemsg(ackmp);
3180 		if (opts != NULL)
3181 			kmem_free(opts, olen);
3182 		freemsg(confmp);
3183 		TL_UNCONNECT(tep->te_oconp);
3184 		tl_serializer_exit(tep);
3185 		tl_refrele(tep);
3186 		return;
3187 	}
3188 	tip->ti_mp = NULL;
3189 
3190 	/*
3191 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3192 	 * and tl_icon_t cell.
3193 	 */
3194 
3195 	/*
3196 	 * ack validity of request and send the peer credential in the ACK.
3197 	 */
3198 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3199 
3200 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3201 	    confmp != NULL) {
3202 		mblk_setcred(confmp, peer_tep->te_credp);
3203 		DB_CPID(confmp) = peer_tep->te_cpid;
3204 	}
3205 
3206 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3207 
3208 	/*
3209 	 * prepare message to send T_CONN_IND
3210 	 */
3211 	/*
3212 	 * allocate the message - original data blocks retained
3213 	 * in the returned mblk
3214 	 */
3215 	cimp = tl_resizemp(indmp, size);
3216 	if (! cimp) {
3217 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3218 		    "tl_conn_req:con_ind:allocb failure"));
3219 		tl_merror(wq, indmp, ENOMEM);
3220 		TL_UNCONNECT(tep->te_oconp);
3221 		tl_serializer_exit(tep);
3222 		tl_refrele(tep);
3223 		if (opts != NULL)
3224 			kmem_free(opts, olen);
3225 		freemsg(confmp);
3226 		ASSERT(tip->ti_mp == NULL);
3227 		kmem_free(tip, sizeof (*tip));
3228 		return;
3229 	}
3230 
3231 	DB_TYPE(cimp) = M_PROTO;
3232 	ci = (struct T_conn_ind *)cimp->b_rptr;
3233 	ci->PRIM_type  = T_CONN_IND;
3234 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3235 	ci->SRC_length = tep->te_alen;
3236 	ci->SEQ_number = tep->te_seqno;
3237 
3238 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3239 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3240 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3241 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3242 		    ci->SRC_length);
3243 		ci->OPT_length = olen; /* because only 1 option */
3244 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3245 		    DB_CREDDEF(cimp, tep->te_credp),
3246 		    TLPID(cimp, tep),
3247 		    peer_tep->te_flag, peer_tep->te_credp);
3248 	} else if (ooff != 0) {
3249 		/* Copy option from T_CONN_REQ */
3250 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3251 		    ci->SRC_length);
3252 		ci->OPT_length = olen;
3253 		ASSERT(opts != NULL);
3254 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3255 	} else {
3256 		ci->OPT_offset = 0;
3257 		ci->OPT_length = 0;
3258 	}
3259 	if (opts != NULL)
3260 		kmem_free(opts, olen);
3261 
3262 	/*
3263 	 * register connection request with server peer
3264 	 * append to list of incoming connections
3265 	 * increment references for both peer_tep and tep: peer_tep is placed on
3266 	 * te_oconp and tep is placed on listeners queue.
3267 	 */
3268 	tip->ti_tep = tep;
3269 	tip->ti_seqno = tep->te_seqno;
3270 	list_insert_tail(&peer_tep->te_iconp, tip);
3271 	peer_tep->te_nicon++;
3272 
3273 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3274 	/*
3275 	 * send the T_CONN_IND message
3276 	 */
3277 	putnext(peer_tep->te_rq, cimp);
3278 
3279 	/*
3280 	 * Send a T_CONN_CON message for sockets.
3281 	 * Disable the queues until we have reached the correct state!
3282 	 */
3283 	if (confmp != NULL) {
3284 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3285 		noenable(wq);
3286 		putnext(tep->te_rq, confmp);
3287 	}
3288 	/*
3289 	 * Now we need to increment tep reference because tep is referenced by
3290 	 * server list of pending connections. We also need to decrement
3291 	 * reference before exiting serializer. Two operations void each other
3292 	 * so we don't modify reference at all.
3293 	 */
3294 	ASSERT(tep->te_refcnt >= 2);
3295 	ASSERT(peer_tep->te_refcnt >= 2);
3296 	tl_serializer_exit(tep);
3297 }
3298 
3299 
3300 
3301 /*
3302  * Handle T_conn_res on listener stream. Called on listener serializer.
3303  * tl_conn_req has already generated the T_CONN_CON.
3304  * tl_conn_res is called on listener serializer.
3305  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3306  * Switch eager serializer to acceptor's.
3307  *
3308  * If TL_SET[U]CRED generate the credentials options.
3309  * For sockets tl_conn_req has already generated the T_CONN_CON.
3310  */
3311 static void
3312 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3313 {
3314 	queue_t			*wq;
3315 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3316 	ssize_t			msz = MBLKL(mp);
3317 	t_scalar_t		olen, ooff, err = 0;
3318 	t_scalar_t		prim = cres->PRIM_type;
3319 	uchar_t			*addr_startp;
3320 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3321 	tl_icon_t		*tip;
3322 	size_t			size;
3323 	mblk_t			*ackmp, *respmp;
3324 	mblk_t			*dimp, *ccmp = NULL;
3325 	struct T_discon_ind	*di;
3326 	struct T_conn_con	*cc;
3327 	boolean_t		client_noclose_set = B_FALSE;
3328 	boolean_t		switch_client_serializer = B_TRUE;
3329 
3330 	ASSERT(IS_COTS(tep));
3331 
3332 	if (tep->te_closing) {
3333 		freemsg(mp);
3334 		return;
3335 	}
3336 
3337 	wq = tep->te_wq;
3338 
3339 	/*
3340 	 * preallocate memory for:
3341 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3342 	 *	==> known max T_ERROR_ACK
3343 	 * 2. max of T_DISCON_IND and T_CONN_CON
3344 	 */
3345 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3346 	if (! ackmp) {
3347 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3348 		return;
3349 	}
3350 	/*
3351 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3352 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3353 	 */
3354 
3355 
3356 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3357 
3358 	/*
3359 	 * validate state
3360 	 */
3361 	if (tep->te_state != TS_WRES_CIND) {
3362 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3363 		    SL_TRACE|SL_ERROR,
3364 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3365 		    tep->te_state));
3366 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3367 		freemsg(mp);
3368 		return;
3369 	}
3370 
3371 	/*
3372 	 * validate the message
3373 	 * Note: dereference fields in struct inside message only
3374 	 * after validating the message length.
3375 	 */
3376 	if (msz < sizeof (struct T_conn_res)) {
3377 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3378 		    "tl_conn_res:invalid message length"));
3379 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3380 		freemsg(mp);
3381 		return;
3382 	}
3383 	olen = cres->OPT_length;
3384 	ooff = cres->OPT_offset;
3385 	if (((olen > 0) && ((ooff + olen) > msz))) {
3386 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3387 		    "tl_conn_res:invalid message"));
3388 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3389 		freemsg(mp);
3390 		return;
3391 	}
3392 	if (olen) {
3393 		/*
3394 		 * no opts in connect res
3395 		 * supported in this provider
3396 		 */
3397 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3398 		    "tl_conn_res:options not supported in message"));
3399 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3400 		freemsg(mp);
3401 		return;
3402 	}
3403 
3404 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3405 	ASSERT(tep->te_state == TS_WACK_CRES);
3406 
3407 	if (cres->SEQ_number < TL_MINOR_START &&
3408 	    cres->SEQ_number >= BADSEQNUM) {
3409 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3410 		    "tl_conn_res:remote endpoint sequence number bad"));
3411 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3412 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3413 		freemsg(mp);
3414 		return;
3415 	}
3416 
3417 	/*
3418 	 * find accepting endpoint. Will have extra reference if found.
3419 	 */
3420 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3421 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3422 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3423 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3424 		    "tl_conn_res:bad accepting endpoint"));
3425 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3426 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3427 		freemsg(mp);
3428 		return;
3429 	}
3430 
3431 	/*
3432 	 * Prevent acceptor from closing.
3433 	 */
3434 	if (! tl_noclose(acc_ep)) {
3435 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3436 		    "tl_conn_res:bad accepting endpoint"));
3437 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3438 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3439 		tl_refrele(acc_ep);
3440 		freemsg(mp);
3441 		return;
3442 	}
3443 
3444 	acc_ep->te_flag |= TL_ACCEPTOR;
3445 
3446 	/*
3447 	 * validate that accepting endpoint, if different from listening
3448 	 * has address bound => state is TS_IDLE
3449 	 * TROUBLE in XPG4 !!?
3450 	 */
3451 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3452 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3453 		    "tl_conn_res:accepting endpoint has no address bound,"
3454 		    "state=%d", acc_ep->te_state));
3455 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3456 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3457 		freemsg(mp);
3458 		tl_closeok(acc_ep);
3459 		tl_refrele(acc_ep);
3460 		return;
3461 	}
3462 
3463 	/*
3464 	 * validate if accepting endpt same as listening, then
3465 	 * no other incoming connection should be on the queue
3466 	 */
3467 
3468 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3469 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3470 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3471 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3472 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3473 		freemsg(mp);
3474 		tl_closeok(acc_ep);
3475 		tl_refrele(acc_ep);
3476 		return;
3477 	}
3478 
3479 	/*
3480 	 * Mark for deletion, the entry corresponding to client
3481 	 * on list of pending connections made by the listener
3482 	 *  search list to see if client is one of the
3483 	 * recorded as a listener.
3484 	 */
3485 	tip = tl_icon_find(tep, cres->SEQ_number);
3486 	if (tip == NULL) {
3487 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3488 		    "tl_conn_res:no client in listener list"));
3489 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3490 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3491 		freemsg(mp);
3492 		tl_closeok(acc_ep);
3493 		tl_refrele(acc_ep);
3494 		return;
3495 	}
3496 
3497 	/*
3498 	 * If ti_tep is NULL the client has already closed. In this case
3499 	 * the code below will avoid any action on the client side
3500 	 * but complete the server and acceptor state transitions.
3501 	 */
3502 	ASSERT(tip->ti_tep == NULL ||
3503 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3504 	cl_ep = tip->ti_tep;
3505 
3506 	/*
3507 	 * If the client is present it is switched from listener's to acceptor's
3508 	 * serializer. We should block client closes while serializers are
3509 	 * being switched.
3510 	 *
3511 	 * It is possible that the client is present but is currently being
3512 	 * closed. There are two possible cases:
3513 	 *
3514 	 * 1) The client has already entered tl_close_finish_ser() and sent
3515 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3516 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3517 	 *
3518 	 * 2) The client started the close but has not entered
3519 	 *    tl_close_finish_ser() yet. In this case, the client is already
3520 	 *    proceeding asynchronously on the listener's serializer, so we're
3521 	 *    forced to change the acceptor to use the listener's serializer to
3522 	 *    ensure that any operations on the acceptor are serialized with
3523 	 *    respect to the close that's in-progress.
3524 	 */
3525 	if (cl_ep != NULL) {
3526 		if (tl_noclose(cl_ep)) {
3527 			client_noclose_set = B_TRUE;
3528 		} else {
3529 			/*
3530 			 * Client is closing. If it it has sent the
3531 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3532 			 * we have to let let the client continue until it is
3533 			 * sent.
3534 			 *
3535 			 * If we do continue using the client, acceptor will
3536 			 * switch to client's serializer which is used by client
3537 			 * for its close.
3538 			 */
3539 			tl_client_closing_when_accepting++;
3540 			switch_client_serializer = B_FALSE;
3541 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3542 			    cl_ep->te_state == -1)
3543 				cl_ep = NULL;
3544 		}
3545 	}
3546 
3547 	if (cl_ep != NULL) {
3548 		/*
3549 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3550 		 * (latter for sockets only)
3551 		 */
3552 		if (cl_ep->te_state != TS_WCON_CREQ &&
3553 		    (cl_ep->te_state != TS_DATA_XFER &&
3554 		    IS_SOCKET(cl_ep))) {
3555 			err = ECONNREFUSED;
3556 			/*
3557 			 * T_DISCON_IND sent later after committing memory
3558 			 * and acking validity of request
3559 			 */
3560 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3561 			    "tl_conn_res:peer in bad state"));
3562 		}
3563 
3564 		/*
3565 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3566 		 * ack validity of request (T_OK_ACK) after memory committed
3567 		 */
3568 
3569 		if (err)
3570 			size = sizeof (struct T_discon_ind);
3571 		else {
3572 			/*
3573 			 * calculate length of T_CONN_CON message
3574 			 */
3575 			olen = 0;
3576 			if (cl_ep->te_flag & TL_SETCRED) {
3577 				olen = (t_scalar_t)sizeof (struct opthdr) +
3578 				    OPTLEN(sizeof (tl_credopt_t));
3579 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3580 				olen = (t_scalar_t)sizeof (struct opthdr) +
3581 				    OPTLEN(ucredsize);
3582 			}
3583 			size = T_ALIGN(sizeof (struct T_conn_con) +
3584 			    acc_ep->te_alen) + olen;
3585 		}
3586 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3587 			/*
3588 			 * roll back state changes
3589 			 */
3590 			tep->te_state = TS_WRES_CIND;
3591 			tl_memrecover(wq, mp, size);
3592 			freemsg(ackmp);
3593 			if (client_noclose_set)
3594 				tl_closeok(cl_ep);
3595 			tl_closeok(acc_ep);
3596 			tl_refrele(acc_ep);
3597 			return;
3598 		}
3599 		mp = NULL;
3600 	}
3601 
3602 	/*
3603 	 * Now ack validity of request
3604 	 */
3605 	if (tep->te_nicon == 1) {
3606 		if (tep == acc_ep)
3607 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3608 		else
3609 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3610 	} else
3611 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3612 
3613 	/*
3614 	 * send T_DISCON_IND now if client state validation failed earlier
3615 	 */
3616 	if (err) {
3617 		tl_ok_ack(wq, ackmp, prim);
3618 		/*
3619 		 * flush the queues - why always ?
3620 		 */
3621 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3622 
3623 		dimp = tl_resizemp(respmp, size);
3624 		if (! dimp) {
3625 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3626 			    SL_TRACE|SL_ERROR,
3627 			    "tl_conn_res:con_ind:allocb failure"));
3628 			tl_merror(wq, respmp, ENOMEM);
3629 			tl_closeok(acc_ep);
3630 			if (client_noclose_set)
3631 				tl_closeok(cl_ep);
3632 			tl_refrele(acc_ep);
3633 			return;
3634 		}
3635 		if (dimp->b_cont) {
3636 			/* no user data in provider generated discon ind */
3637 			freemsg(dimp->b_cont);
3638 			dimp->b_cont = NULL;
3639 		}
3640 
3641 		DB_TYPE(dimp) = M_PROTO;
3642 		di = (struct T_discon_ind *)dimp->b_rptr;
3643 		di->PRIM_type  = T_DISCON_IND;
3644 		di->DISCON_reason = err;
3645 		di->SEQ_number = BADSEQNUM;
3646 
3647 		tep->te_state = TS_IDLE;
3648 		/*
3649 		 * send T_DISCON_IND message
3650 		 */
3651 		putnext(acc_ep->te_rq, dimp);
3652 		if (client_noclose_set)
3653 			tl_closeok(cl_ep);
3654 		tl_closeok(acc_ep);
3655 		tl_refrele(acc_ep);
3656 		return;
3657 	}
3658 
3659 	/*
3660 	 * now start connecting the accepting endpoint
3661 	 */
3662 	if (tep != acc_ep)
3663 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3664 
3665 	if (cl_ep == NULL) {
3666 		/*
3667 		 * The client has already closed. Send up any queued messages
3668 		 * and change the state accordingly.
3669 		 */
3670 		tl_ok_ack(wq, ackmp, prim);
3671 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3672 
3673 		/*
3674 		 * remove endpoint from incoming connection
3675 		 * delete client from list of incoming connections
3676 		 */
3677 		tl_freetip(tep, tip);
3678 		freemsg(mp);
3679 		tl_closeok(acc_ep);
3680 		tl_refrele(acc_ep);
3681 		return;
3682 	} else if (tip->ti_mp != NULL) {
3683 		/*
3684 		 * The client could have queued a T_DISCON_IND which needs
3685 		 * to be sent up.
3686 		 * Note that t_discon_req can not operate the same as
3687 		 * t_data_req since it is not possible for it to putbq
3688 		 * the message and return -1 due to the use of qwriter.
3689 		 */
3690 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3691 	}
3692 
3693 	/*
3694 	 * prepare connect confirm T_CONN_CON message
3695 	 */
3696 
3697 	/*
3698 	 * allocate the message - original data blocks
3699 	 * retained in the returned mblk
3700 	 */
3701 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3702 		ccmp = tl_resizemp(respmp, size);
3703 		if (ccmp == NULL) {
3704 			tl_ok_ack(wq, ackmp, prim);
3705 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3706 			    SL_TRACE|SL_ERROR,
3707 			    "tl_conn_res:conn_con:allocb failure"));
3708 			tl_merror(wq, respmp, ENOMEM);
3709 			tl_closeok(acc_ep);
3710 			if (client_noclose_set)
3711 				tl_closeok(cl_ep);
3712 			tl_refrele(acc_ep);
3713 			return;
3714 		}
3715 
3716 		DB_TYPE(ccmp) = M_PROTO;
3717 		cc = (struct T_conn_con *)ccmp->b_rptr;
3718 		cc->PRIM_type  = T_CONN_CON;
3719 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3720 		cc->RES_length = acc_ep->te_alen;
3721 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3722 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3723 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3724 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3725 			    cc->RES_length);
3726 			cc->OPT_length = olen;
3727 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3728 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3729 			    cl_ep->te_credp);
3730 		} else {
3731 			cc->OPT_offset = 0;
3732 			cc->OPT_length = 0;
3733 		}
3734 		/*
3735 		 * Forward the credential in the packet so it can be picked up
3736 		 * at the higher layers for more complete credential processing
3737 		 */
3738 		mblk_setcred(ccmp, acc_ep->te_credp);
3739 		DB_CPID(ccmp) = acc_ep->te_cpid;
3740 	} else {
3741 		freemsg(respmp);
3742 		respmp = NULL;
3743 	}
3744 
3745 	/*
3746 	 * make connection linking
3747 	 * accepting and client endpoints
3748 	 * No need to increment references:
3749 	 *	on client: it should already have one from tip->ti_tep linkage.
3750 	 *	on acceptor is should already have one from the table lookup.
3751 	 *
3752 	 * At this point both client and acceptor can't close. Set client
3753 	 * serializer to acceptor's.
3754 	 */
3755 	ASSERT(cl_ep->te_refcnt >= 2);
3756 	ASSERT(acc_ep->te_refcnt >= 2);
3757 	ASSERT(cl_ep->te_conp == NULL);
3758 	ASSERT(acc_ep->te_conp == NULL);
3759 	cl_ep->te_conp = acc_ep;
3760 	acc_ep->te_conp = cl_ep;
3761 	ASSERT(cl_ep->te_ser == tep->te_ser);
3762 	if (switch_client_serializer) {
3763 		mutex_enter(&cl_ep->te_ser_lock);
3764 		if (cl_ep->te_ser_count > 0) {
3765 			switch_client_serializer = B_FALSE;
3766 			tl_serializer_noswitch++;
3767 		} else {
3768 			/*
3769 			 * Move client to the acceptor's serializer.
3770 			 */
3771 			tl_serializer_refhold(acc_ep->te_ser);
3772 			tl_serializer_refrele(cl_ep->te_ser);
3773 			cl_ep->te_ser = acc_ep->te_ser;
3774 		}
3775 		mutex_exit(&cl_ep->te_ser_lock);
3776 	}
3777 	if (!switch_client_serializer) {
3778 		/*
3779 		 * It is not possible to switch client to use acceptor's.
3780 		 * Move acceptor to client's serializer (which is the same as
3781 		 * listener's).
3782 		 */
3783 		tl_serializer_refhold(cl_ep->te_ser);
3784 		tl_serializer_refrele(acc_ep->te_ser);
3785 		acc_ep->te_ser = cl_ep->te_ser;
3786 	}
3787 
3788 	TL_REMOVE_PEER(cl_ep->te_oconp);
3789 	TL_REMOVE_PEER(acc_ep->te_oconp);
3790 
3791 	/*
3792 	 * remove endpoint from incoming connection
3793 	 * delete client from list of incoming connections
3794 	 */
3795 	tip->ti_tep = NULL;
3796 	tl_freetip(tep, tip);
3797 	tl_ok_ack(wq, ackmp, prim);
3798 
3799 	/*
3800 	 * data blocks already linked in reallocb()
3801 	 */
3802 
3803 	/*
3804 	 * link queues so that I_SENDFD will work
3805 	 */
3806 	if (! IS_SOCKET(tep)) {
3807 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3808 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3809 	}
3810 
3811 	/*
3812 	 * send T_CONN_CON up on client side unless it was already
3813 	 * done (for a socket). In cases any data or ordrel req has been
3814 	 * queued make sure that the service procedure runs.
3815 	 */
3816 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3817 		enableok(cl_ep->te_wq);
3818 		TL_QENABLE(cl_ep);
3819 		if (ccmp != NULL)
3820 			freemsg(ccmp);
3821 	} else {
3822 		/*
3823 		 * change client state on TE_CONN_CON event
3824 		 */
3825 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3826 		putnext(cl_ep->te_rq, ccmp);
3827 	}
3828 
3829 	/* Mark the both endpoints as accepted */
3830 	cl_ep->te_flag |= TL_ACCEPTED;
3831 	acc_ep->te_flag |= TL_ACCEPTED;
3832 
3833 	/*
3834 	 * Allow client and acceptor to close.
3835 	 */
3836 	tl_closeok(acc_ep);
3837 	if (client_noclose_set)
3838 		tl_closeok(cl_ep);
3839 }
3840 
3841 
3842 
3843 
3844 static void
3845 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3846 {
3847 	queue_t			*wq;
3848 	struct T_discon_req	*dr;
3849 	ssize_t			msz;
3850 	tl_endpt_t		*peer_tep = tep->te_conp;
3851 	tl_endpt_t		*srv_tep = tep->te_oconp;
3852 	tl_icon_t		*tip;
3853 	size_t			size;
3854 	mblk_t			*ackmp, *dimp, *respmp;
3855 	struct T_discon_ind	*di;
3856 	t_scalar_t		save_state, new_state;
3857 
3858 	if (tep->te_closing) {
3859 		freemsg(mp);
3860 		return;
3861 	}
3862 
3863 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3864 		TL_UNCONNECT(tep->te_conp);
3865 		peer_tep = NULL;
3866 	}
3867 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3868 		TL_UNCONNECT(tep->te_oconp);
3869 		srv_tep = NULL;
3870 	}
3871 
3872 	wq = tep->te_wq;
3873 
3874 	/*
3875 	 * preallocate memory for:
3876 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3877 	 *	==> known max T_ERROR_ACK
3878 	 * 2. for  T_DISCON_IND
3879 	 */
3880 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3881 	if (! ackmp) {
3882 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3883 		return;
3884 	}
3885 	/*
3886 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3887 	 * will be committed for T_DISCON_IND  later
3888 	 */
3889 
3890 	dr = (struct T_discon_req *)mp->b_rptr;
3891 	msz = MBLKL(mp);
3892 
3893 	/*
3894 	 * validate the state
3895 	 */
3896 	save_state = new_state = tep->te_state;
3897 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3898 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3899 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3900 		    SL_TRACE|SL_ERROR,
3901 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
3902 		    tep->te_state));
3903 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3904 		freemsg(mp);
3905 		return;
3906 	}
3907 	/*
3908 	 * Defer committing the state change until it is determined if
3909 	 * the message will be queued with the tl_icon or not.
3910 	 */
3911 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3912 
3913 	/* validate the message */
3914 	if (msz < sizeof (struct T_discon_req)) {
3915 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3916 		    "tl_discon_req:invalid message"));
3917 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3918 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3919 		freemsg(mp);
3920 		return;
3921 	}
3922 
3923 	/*
3924 	 * if server, then validate that client exists
3925 	 * by connection sequence number etc.
3926 	 */
3927 	if (tep->te_nicon > 0) { /* server */
3928 
3929 		/*
3930 		 * search server list for disconnect client
3931 		 */
3932 		tip = tl_icon_find(tep, dr->SEQ_number);
3933 		if (tip == NULL) {
3934 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3935 			    SL_TRACE|SL_ERROR,
3936 			    "tl_discon_req:no disconnect endpoint"));
3937 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3938 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3939 			freemsg(mp);
3940 			return;
3941 		}
3942 		/*
3943 		 * If ti_tep is NULL the client has already closed. In this case
3944 		 * the code below will avoid any action on the client side.
3945 		 */
3946 
3947 		ASSERT(IMPLY(tip->ti_tep != NULL,
3948 		    tip->ti_tep->te_seqno == dr->SEQ_number));
3949 		peer_tep = tip->ti_tep;
3950 	}
3951 
3952 	/*
3953 	 * preallocate now for T_DISCON_IND
3954 	 * ack validity of request (T_OK_ACK) after memory committed
3955 	 */
3956 	size = sizeof (struct T_discon_ind);
3957 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3958 		tl_memrecover(wq, mp, size);
3959 		freemsg(ackmp);
3960 		return;
3961 	}
3962 
3963 	/*
3964 	 * prepare message to ack validity of request
3965 	 */
3966 	if (tep->te_nicon == 0)
3967 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3968 	else
3969 		if (tep->te_nicon == 1)
3970 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3971 		else
3972 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3973 
3974 	/*
3975 	 * Flushing queues according to TPI. Using the old state.
3976 	 */
3977 	if ((tep->te_nicon <= 1) &&
3978 	    ((save_state == TS_DATA_XFER) ||
3979 	    (save_state == TS_WIND_ORDREL) ||
3980 	    (save_state == TS_WREQ_ORDREL)))
3981 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3982 
3983 	/* send T_OK_ACK up  */
3984 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3985 
3986 	/*
3987 	 * now do disconnect business
3988 	 */
3989 	if (tep->te_nicon > 0) { /* listener */
3990 		if (peer_tep != NULL && !peer_tep->te_closing) {
3991 			/*
3992 			 * disconnect incoming connect request pending to tep
3993 			 */
3994 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
3995 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
3996 				    SL_TRACE|SL_ERROR,
3997 				    "tl_discon_req: reallocb failed"));
3998 				tep->te_state = new_state;
3999 				tl_merror(wq, respmp, ENOMEM);
4000 				return;
4001 			}
4002 			di = (struct T_discon_ind *)dimp->b_rptr;
4003 			di->SEQ_number = BADSEQNUM;
4004 			save_state = peer_tep->te_state;
4005 			peer_tep->te_state = TS_IDLE;
4006 
4007 			TL_REMOVE_PEER(peer_tep->te_oconp);
4008 			enableok(peer_tep->te_wq);
4009 			TL_QENABLE(peer_tep);
4010 		} else {
4011 			freemsg(respmp);
4012 			dimp = NULL;
4013 		}
4014 
4015 		/*
4016 		 * remove endpoint from incoming connection list
4017 		 * - remove disconnect client from list on server
4018 		 */
4019 		tl_freetip(tep, tip);
4020 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4021 		/*
4022 		 * disconnect an outgoing request pending from tep
4023 		 */
4024 
4025 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4026 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4027 			    SL_TRACE|SL_ERROR,
4028 			    "tl_discon_req: reallocb failed"));
4029 			tep->te_state = new_state;
4030 			tl_merror(wq, respmp, ENOMEM);
4031 			return;
4032 		}
4033 		di = (struct T_discon_ind *)dimp->b_rptr;
4034 		DB_TYPE(dimp) = M_PROTO;
4035 		di->PRIM_type  = T_DISCON_IND;
4036 		di->DISCON_reason = ECONNRESET;
4037 		di->SEQ_number = tep->te_seqno;
4038 
4039 		/*
4040 		 * If this is a socket the T_DISCON_IND is queued with
4041 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4042 		 * from the list of pending connections.
4043 		 * Note that when te_oconp is set the peer better have
4044 		 * a t_connind_t for the client.
4045 		 */
4046 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4047 			/*
4048 			 * No need to check that
4049 			 * ti_tep == NULL since the T_DISCON_IND
4050 			 * takes precedence over other queued
4051 			 * messages.
4052 			 */
4053 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4054 			peer_tep = NULL;
4055 			dimp = NULL;
4056 			/*
4057 			 * Can't clear te_oconp since tl_co_unconnect needs
4058 			 * it as a hint not to free the tep.
4059 			 * Keep the state unchanged since tl_conn_res inspects
4060 			 * it.
4061 			 */
4062 			new_state = tep->te_state;
4063 		} else {
4064 			/* Found - delete it */
4065 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4066 			if (tip != NULL) {
4067 				ASSERT(tep == tip->ti_tep);
4068 				save_state = peer_tep->te_state;
4069 				if (peer_tep->te_nicon == 1)
4070 					peer_tep->te_state =
4071 					    NEXTSTATE(TE_DISCON_IND2,
4072 					    peer_tep->te_state);
4073 				else
4074 					peer_tep->te_state =
4075 					    NEXTSTATE(TE_DISCON_IND3,
4076 					    peer_tep->te_state);
4077 				tl_freetip(peer_tep, tip);
4078 			}
4079 			ASSERT(tep->te_oconp != NULL);
4080 			TL_UNCONNECT(tep->te_oconp);
4081 		}
4082 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4083 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4084 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4085 			    SL_TRACE|SL_ERROR,
4086 			    "tl_discon_req: reallocb failed"));
4087 			tep->te_state = new_state;
4088 			tl_merror(wq, respmp, ENOMEM);
4089 			return;
4090 		}
4091 		di = (struct T_discon_ind *)dimp->b_rptr;
4092 		di->SEQ_number = BADSEQNUM;
4093 
4094 		save_state = peer_tep->te_state;
4095 		peer_tep->te_state = TS_IDLE;
4096 	} else {
4097 		/* Not connected */
4098 		tep->te_state = new_state;
4099 		freemsg(respmp);
4100 		return;
4101 	}
4102 
4103 	/* Commit state changes */
4104 	tep->te_state = new_state;
4105 
4106 	if (peer_tep == NULL) {
4107 		ASSERT(dimp == NULL);
4108 		goto done;
4109 	}
4110 	/*
4111 	 * Flush queues on peer before sending up
4112 	 * T_DISCON_IND according to TPI
4113 	 */
4114 
4115 	if ((save_state == TS_DATA_XFER) ||
4116 	    (save_state == TS_WIND_ORDREL) ||
4117 	    (save_state == TS_WREQ_ORDREL))
4118 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4119 
4120 	DB_TYPE(dimp) = M_PROTO;
4121 	di->PRIM_type  = T_DISCON_IND;
4122 	di->DISCON_reason = ECONNRESET;
4123 
4124 	/*
4125 	 * data blocks already linked into dimp by reallocb()
4126 	 */
4127 	/*
4128 	 * send indication message to peer user module
4129 	 */
4130 	ASSERT(dimp != NULL);
4131 	putnext(peer_tep->te_rq, dimp);
4132 done:
4133 	if (tep->te_conp) {	/* disconnect pointers if connected */
4134 		ASSERT(! peer_tep->te_closing);
4135 
4136 		/*
4137 		 * Messages may be queued on peer's write queue
4138 		 * waiting to be processed by its write service
4139 		 * procedure. Before the pointer to the peer transport
4140 		 * structure is set to NULL, qenable the peer's write
4141 		 * queue so that the queued up messages are processed.
4142 		 */
4143 		if ((save_state == TS_DATA_XFER) ||
4144 		    (save_state == TS_WIND_ORDREL) ||
4145 		    (save_state == TS_WREQ_ORDREL))
4146 			TL_QENABLE(peer_tep);
4147 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4148 		TL_UNCONNECT(peer_tep->te_conp);
4149 		if (! IS_SOCKET(tep)) {
4150 			/*
4151 			 * unlink the streams
4152 			 */
4153 			tep->te_wq->q_next = NULL;
4154 			peer_tep->te_wq->q_next = NULL;
4155 		}
4156 		TL_UNCONNECT(tep->te_conp);
4157 	}
4158 }
4159 
4160 
4161 static void
4162 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4163 {
4164 	queue_t			*wq;
4165 	size_t			ack_sz;
4166 	mblk_t			*ackmp;
4167 	struct T_addr_ack	*taa;
4168 
4169 	if (tep->te_closing) {
4170 		freemsg(mp);
4171 		return;
4172 	}
4173 
4174 	wq = tep->te_wq;
4175 
4176 	/*
4177 	 * Note: T_ADDR_REQ message has only PRIM_type field
4178 	 * so it is already validated earlier.
4179 	 */
4180 
4181 	if (IS_CLTS(tep) ||
4182 	    (tep->te_state > TS_WREQ_ORDREL) ||
4183 	    (tep->te_state < TS_DATA_XFER)) {
4184 		/*
4185 		 * Either connectionless or connection oriented but not
4186 		 * in connected data transfer state or half-closed states.
4187 		 */
4188 		ack_sz = sizeof (struct T_addr_ack);
4189 		if (tep->te_state >= TS_IDLE)
4190 			/* is bound */
4191 			ack_sz += tep->te_alen;
4192 		ackmp = reallocb(mp, ack_sz, 0);
4193 		if (ackmp == NULL) {
4194 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4195 			    SL_TRACE|SL_ERROR,
4196 			    "tl_addr_req: reallocb failed"));
4197 			tl_memrecover(wq, mp, ack_sz);
4198 			return;
4199 		}
4200 
4201 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4202 
4203 		bzero(taa, sizeof (struct T_addr_ack));
4204 
4205 		taa->PRIM_type = T_ADDR_ACK;
4206 		ackmp->b_datap->db_type = M_PCPROTO;
4207 		ackmp->b_wptr = (uchar_t *)&taa[1];
4208 
4209 		if (tep->te_state >= TS_IDLE) {
4210 			/* endpoint is bound */
4211 			taa->LOCADDR_length = tep->te_alen;
4212 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4213 
4214 			bcopy(tep->te_abuf, ackmp->b_wptr,
4215 			    tep->te_alen);
4216 			ackmp->b_wptr += tep->te_alen;
4217 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4218 		}
4219 
4220 		(void) qreply(wq, ackmp);
4221 	} else {
4222 		ASSERT(tep->te_state == TS_DATA_XFER ||
4223 		    tep->te_state == TS_WIND_ORDREL ||
4224 		    tep->te_state == TS_WREQ_ORDREL);
4225 		/* connection oriented in data transfer */
4226 		tl_connected_cots_addr_req(mp, tep);
4227 	}
4228 }
4229 
4230 
4231 static void
4232 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4233 {
4234 	tl_endpt_t		*peer_tep;
4235 	size_t			ack_sz;
4236 	mblk_t			*ackmp;
4237 	struct T_addr_ack	*taa;
4238 	uchar_t			*addr_startp;
4239 
4240 	if (tep->te_closing) {
4241 		freemsg(mp);
4242 		return;
4243 	}
4244 
4245 	ASSERT(tep->te_state >= TS_IDLE);
4246 
4247 	ack_sz = sizeof (struct T_addr_ack);
4248 	ack_sz += T_ALIGN(tep->te_alen);
4249 	peer_tep = tep->te_conp;
4250 	ack_sz += peer_tep->te_alen;
4251 
4252 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4253 	if (ackmp == NULL) {
4254 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4255 		    "tl_connected_cots_addr_req: reallocb failed"));
4256 		tl_memrecover(tep->te_wq, mp, ack_sz);
4257 		return;
4258 	}
4259 
4260 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4261 
4262 	/* endpoint is bound */
4263 	taa->LOCADDR_length = tep->te_alen;
4264 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4265 
4266 	addr_startp = (uchar_t *)&taa[1];
4267 
4268 	bcopy(tep->te_abuf, addr_startp,
4269 	    tep->te_alen);
4270 
4271 	taa->REMADDR_length = peer_tep->te_alen;
4272 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4273 	    taa->LOCADDR_length);
4274 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4275 	bcopy(peer_tep->te_abuf, addr_startp,
4276 	    peer_tep->te_alen);
4277 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4278 	    taa->REMADDR_offset + peer_tep->te_alen;
4279 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4280 
4281 	putnext(tep->te_rq, ackmp);
4282 }
4283 
4284 static void
4285 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4286 {
4287 	if (IS_CLTS(tep)) {
4288 		*ia = tl_clts_info_ack;
4289 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4290 	} else {
4291 		*ia = tl_cots_info_ack;
4292 		if (IS_COTSORD(tep))
4293 			ia->SERV_type = T_COTS_ORD;
4294 	}
4295 	ia->TIDU_size = tl_tidusz;
4296 	ia->CURRENT_state = tep->te_state;
4297 }
4298 
4299 /*
4300  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4301  * tl_wput.
4302  */
4303 static void
4304 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4305 {
4306 	mblk_t			*ackmp;
4307 	t_uscalar_t		cap_bits1;
4308 	struct T_capability_ack	*tcap;
4309 
4310 	if (tep->te_closing) {
4311 		freemsg(mp);
4312 		return;
4313 	}
4314 
4315 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4316 
4317 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4318 	    M_PCPROTO, T_CAPABILITY_ACK);
4319 	if (ackmp == NULL) {
4320 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4321 		    "tl_capability_req: reallocb failed"));
4322 		tl_memrecover(tep->te_wq, mp,
4323 		    sizeof (struct T_capability_ack));
4324 		return;
4325 	}
4326 
4327 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4328 	tcap->CAP_bits1 = 0;
4329 
4330 	if (cap_bits1 & TC1_INFO) {
4331 		tl_copy_info(&tcap->INFO_ack, tep);
4332 		tcap->CAP_bits1 |= TC1_INFO;
4333 	}
4334 
4335 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4336 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4337 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4338 	}
4339 
4340 	putnext(tep->te_rq, ackmp);
4341 }
4342 
4343 static void
4344 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4345 {
4346 	if (! tep->te_closing)
4347 		tl_info_req(mp, tep);
4348 	else
4349 		freemsg(mp);
4350 
4351 	tl_serializer_exit(tep);
4352 	tl_refrele(tep);
4353 }
4354 
4355 static void
4356 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4357 {
4358 	mblk_t *ackmp;
4359 
4360 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4361 	    M_PCPROTO, T_INFO_ACK);
4362 	if (ackmp == NULL) {
4363 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4364 		    "tl_info_req: reallocb failed"));
4365 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4366 		return;
4367 	}
4368 
4369 	/*
4370 	 * fill in T_INFO_ACK contents
4371 	 */
4372 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4373 
4374 	/*
4375 	 * send ack message
4376 	 */
4377 	putnext(tep->te_rq, ackmp);
4378 }
4379 
4380 /*
4381  * Handle M_DATA, T_data_req and T_optdata_req.
4382  * If this is a socket pass through T_optdata_req options unmodified.
4383  */
4384 static void
4385 tl_data(mblk_t *mp, tl_endpt_t *tep)
4386 {
4387 	queue_t			*wq = tep->te_wq;
4388 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4389 	ssize_t			msz = MBLKL(mp);
4390 	tl_endpt_t		*peer_tep;
4391 	queue_t			*peer_rq;
4392 	boolean_t		closing = tep->te_closing;
4393 
4394 	if (IS_CLTS(tep)) {
4395 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4396 		    SL_TRACE|SL_ERROR,
4397 		    "tl_wput:clts:unattached M_DATA"));
4398 		if (!closing) {
4399 			tl_merror(wq, mp, EPROTO);
4400 		} else {
4401 			freemsg(mp);
4402 		}
4403 		return;
4404 	}
4405 
4406 	/*
4407 	 * If the endpoint is closing it should still forward any data to the
4408 	 * peer (if it has one). If it is not allowed to forward it can just
4409 	 * free the message.
4410 	 */
4411 	if (closing &&
4412 	    (tep->te_state != TS_DATA_XFER) &&
4413 	    (tep->te_state != TS_WREQ_ORDREL)) {
4414 		freemsg(mp);
4415 		return;
4416 	}
4417 
4418 	if (DB_TYPE(mp) == M_PROTO) {
4419 		if (prim->type == T_DATA_REQ &&
4420 		    msz < sizeof (struct T_data_req)) {
4421 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4422 				SL_TRACE|SL_ERROR,
4423 				"tl_data:T_DATA_REQ:invalid message"));
4424 			if (!closing) {
4425 				tl_merror(wq, mp, EPROTO);
4426 			} else {
4427 				freemsg(mp);
4428 			}
4429 			return;
4430 		} else if (prim->type == T_OPTDATA_REQ &&
4431 		    (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4432 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4433 			    SL_TRACE|SL_ERROR,
4434 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4435 			if (!closing) {
4436 				tl_merror(wq, mp, EPROTO);
4437 			} else {
4438 				freemsg(mp);
4439 			}
4440 			return;
4441 		}
4442 	}
4443 
4444 	/*
4445 	 * connection oriented provider
4446 	 */
4447 	switch (tep->te_state) {
4448 	case TS_IDLE:
4449 		/*
4450 		 * Other end not here - do nothing.
4451 		 */
4452 		freemsg(mp);
4453 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4454 		    "tl_data:cots with endpoint idle"));
4455 		return;
4456 
4457 	case TS_DATA_XFER:
4458 		/* valid states */
4459 		if (tep->te_conp != NULL)
4460 			break;
4461 
4462 		if (tep->te_oconp == NULL) {
4463 			if (!closing) {
4464 				tl_merror(wq, mp, EPROTO);
4465 			} else {
4466 				freemsg(mp);
4467 			}
4468 			return;
4469 		}
4470 		/*
4471 		 * For a socket the T_CONN_CON is sent early thus
4472 		 * the peer might not yet have accepted the connection.
4473 		 * If we are closing queue the packet with the T_CONN_IND.
4474 		 * Otherwise defer processing the packet until the peer
4475 		 * accepts the connection.
4476 		 * Note that the queue is noenabled when we go into this
4477 		 * state.
4478 		 */
4479 		if (!closing) {
4480 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4481 			    SL_TRACE|SL_ERROR,
4482 			    "tl_data: ocon"));
4483 			TL_PUTBQ(tep, mp);
4484 			return;
4485 		}
4486 		if (DB_TYPE(mp) == M_PROTO) {
4487 			if (msz < sizeof (t_scalar_t)) {
4488 				freemsg(mp);
4489 				return;
4490 			}
4491 			/* reuse message block - just change REQ to IND */
4492 			if (prim->type == T_DATA_REQ)
4493 				prim->type = T_DATA_IND;
4494 			else
4495 				prim->type = T_OPTDATA_IND;
4496 		}
4497 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4498 		return;
4499 
4500 	case TS_WREQ_ORDREL:
4501 		if (tep->te_conp == NULL) {
4502 			/*
4503 			 * Other end closed - generate discon_ind
4504 			 * with reason 0 to cause an EPIPE but no
4505 			 * read side error on AF_UNIX sockets.
4506 			 */
4507 			freemsg(mp);
4508 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4509 			    SL_TRACE|SL_ERROR,
4510 			    "tl_data: WREQ_ORDREL and no peer"));
4511 			tl_discon_ind(tep, 0);
4512 			return;
4513 		}
4514 		break;
4515 
4516 	default:
4517 		/* invalid state for event TE_DATA_REQ */
4518 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4519 		    "tl_data:cots:out of state"));
4520 		tl_merror(wq, mp, EPROTO);
4521 		return;
4522 	}
4523 	/*
4524 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4525 	 * (State stays same on this event)
4526 	 */
4527 
4528 	/*
4529 	 * get connected endpoint
4530 	 */
4531 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4532 		freemsg(mp);
4533 		/* Peer closed */
4534 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4535 		    "tl_data: peer gone"));
4536 		return;
4537 	}
4538 
4539 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4540 	peer_rq = peer_tep->te_rq;
4541 
4542 	/*
4543 	 * Put it back if flow controlled
4544 	 * Note: Messages already on queue when we are closing is bounded
4545 	 * so we can ignore flow control.
4546 	 */
4547 	if (!canputnext(peer_rq) && !closing) {
4548 		TL_PUTBQ(tep, mp);
4549 		return;
4550 	}
4551 
4552 	/*
4553 	 * validate peer state
4554 	 */
4555 	switch (peer_tep->te_state) {
4556 	case TS_DATA_XFER:
4557 	case TS_WIND_ORDREL:
4558 		/* valid states */
4559 		break;
4560 	default:
4561 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4562 		    "tl_data:rx side:invalid state"));
4563 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4564 		return;
4565 	}
4566 	if (DB_TYPE(mp) == M_PROTO) {
4567 		/* reuse message block - just change REQ to IND */
4568 		if (prim->type == T_DATA_REQ)
4569 			prim->type = T_DATA_IND;
4570 		else
4571 			prim->type = T_OPTDATA_IND;
4572 	}
4573 	/*
4574 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4575 	 * (peer state stays same on this event)
4576 	 */
4577 	/*
4578 	 * send data to connected peer
4579 	 */
4580 	putnext(peer_rq, mp);
4581 }
4582 
4583 
4584 
4585 static void
4586 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4587 {
4588 	queue_t			*wq = tep->te_wq;
4589 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4590 	ssize_t			msz = MBLKL(mp);
4591 	tl_endpt_t		*peer_tep;
4592 	queue_t			*peer_rq;
4593 	boolean_t		closing = tep->te_closing;
4594 
4595 	if (msz < sizeof (struct T_exdata_req)) {
4596 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4597 		    "tl_exdata:invalid message"));
4598 		if (!closing) {
4599 			tl_merror(wq, mp, EPROTO);
4600 		} else {
4601 			freemsg(mp);
4602 		}
4603 		return;
4604 	}
4605 
4606 	/*
4607 	 * If the endpoint is closing it should still forward any data to the
4608 	 * peer (if it has one). If it is not allowed to forward it can just
4609 	 * free the message.
4610 	 */
4611 	if (closing &&
4612 	    (tep->te_state != TS_DATA_XFER) &&
4613 	    (tep->te_state != TS_WREQ_ORDREL)) {
4614 		freemsg(mp);
4615 		return;
4616 	}
4617 
4618 	/*
4619 	 * validate state
4620 	 */
4621 	switch (tep->te_state) {
4622 	case TS_IDLE:
4623 		/*
4624 		 * Other end not here - do nothing.
4625 		 */
4626 		freemsg(mp);
4627 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4628 		    "tl_exdata:cots with endpoint idle"));
4629 		return;
4630 
4631 	case TS_DATA_XFER:
4632 		/* valid states */
4633 		if (tep->te_conp != NULL)
4634 			break;
4635 
4636 		if (tep->te_oconp == NULL) {
4637 			if (!closing) {
4638 				tl_merror(wq, mp, EPROTO);
4639 			} else {
4640 				freemsg(mp);
4641 			}
4642 			return;
4643 		}
4644 		/*
4645 		 * For a socket the T_CONN_CON is sent early thus
4646 		 * the peer might not yet have accepted the connection.
4647 		 * If we are closing queue the packet with the T_CONN_IND.
4648 		 * Otherwise defer processing the packet until the peer
4649 		 * accepts the connection.
4650 		 * Note that the queue is noenabled when we go into this
4651 		 * state.
4652 		 */
4653 		if (!closing) {
4654 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4655 			    SL_TRACE|SL_ERROR,
4656 			    "tl_exdata: ocon"));
4657 			TL_PUTBQ(tep, mp);
4658 			return;
4659 		}
4660 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4661 		    "tl_exdata: closing socket ocon"));
4662 		prim->type = T_EXDATA_IND;
4663 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4664 		return;
4665 
4666 	case TS_WREQ_ORDREL:
4667 		if (tep->te_conp == NULL) {
4668 			/*
4669 			 * Other end closed - generate discon_ind
4670 			 * with reason 0 to cause an EPIPE but no
4671 			 * read side error on AF_UNIX sockets.
4672 			 */
4673 			freemsg(mp);
4674 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4675 			    SL_TRACE|SL_ERROR,
4676 			    "tl_exdata: WREQ_ORDREL and no peer"));
4677 			tl_discon_ind(tep, 0);
4678 			return;
4679 		}
4680 		break;
4681 
4682 	default:
4683 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4684 		    SL_TRACE|SL_ERROR,
4685 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4686 		    tep->te_state));
4687 		tl_merror(wq, mp, EPROTO);
4688 		return;
4689 	}
4690 	/*
4691 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4692 	 * (state stays same on this event)
4693 	 */
4694 
4695 	/*
4696 	 * get connected endpoint
4697 	 */
4698 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4699 		freemsg(mp);
4700 		/* Peer closed */
4701 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4702 		    "tl_exdata: peer gone"));
4703 		return;
4704 	}
4705 
4706 	peer_rq = peer_tep->te_rq;
4707 
4708 	/*
4709 	 * Put it back if flow controlled
4710 	 * Note: Messages already on queue when we are closing is bounded
4711 	 * so we can ignore flow control.
4712 	 */
4713 	if (!canputnext(peer_rq) && !closing) {
4714 		TL_PUTBQ(tep, mp);
4715 		return;
4716 	}
4717 
4718 	/*
4719 	 * validate state on peer
4720 	 */
4721 	switch (peer_tep->te_state) {
4722 	case TS_DATA_XFER:
4723 	case TS_WIND_ORDREL:
4724 		/* valid states */
4725 		break;
4726 	default:
4727 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4728 		    "tl_exdata:rx side:invalid state"));
4729 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4730 		return;
4731 	}
4732 	/*
4733 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4734 	 * (peer state stays same on this event)
4735 	 */
4736 	/*
4737 	 * reuse message block
4738 	 */
4739 	prim->type = T_EXDATA_IND;
4740 
4741 	/*
4742 	 * send data to connected peer
4743 	 */
4744 	putnext(peer_rq, mp);
4745 }
4746 
4747 
4748 
4749 static void
4750 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4751 {
4752 	queue_t			*wq =  tep->te_wq;
4753 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4754 	ssize_t			msz = MBLKL(mp);
4755 	tl_endpt_t		*peer_tep;
4756 	queue_t			*peer_rq;
4757 	boolean_t		closing = tep->te_closing;
4758 
4759 	if (msz < sizeof (struct T_ordrel_req)) {
4760 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4761 		    "tl_ordrel:invalid message"));
4762 		if (!closing) {
4763 			tl_merror(wq, mp, EPROTO);
4764 		} else {
4765 			freemsg(mp);
4766 		}
4767 		return;
4768 	}
4769 
4770 	/*
4771 	 * validate state
4772 	 */
4773 	switch (tep->te_state) {
4774 	case TS_DATA_XFER:
4775 	case TS_WREQ_ORDREL:
4776 		/* valid states */
4777 		if (tep->te_conp != NULL)
4778 			break;
4779 
4780 		if (tep->te_oconp == NULL)
4781 			break;
4782 
4783 		/*
4784 		 * For a socket the T_CONN_CON is sent early thus
4785 		 * the peer might not yet have accepted the connection.
4786 		 * If we are closing queue the packet with the T_CONN_IND.
4787 		 * Otherwise defer processing the packet until the peer
4788 		 * accepts the connection.
4789 		 * Note that the queue is noenabled when we go into this
4790 		 * state.
4791 		 */
4792 		if (!closing) {
4793 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4794 			    SL_TRACE|SL_ERROR,
4795 			    "tl_ordlrel: ocon"));
4796 			TL_PUTBQ(tep, mp);
4797 			return;
4798 		}
4799 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4800 		    "tl_ordlrel: closing socket ocon"));
4801 		prim->type = T_ORDREL_IND;
4802 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4803 		return;
4804 
4805 	default:
4806 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4807 		    SL_TRACE|SL_ERROR,
4808 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4809 		    tep->te_state));
4810 		if (!closing) {
4811 			tl_merror(wq, mp, EPROTO);
4812 		} else {
4813 			freemsg(mp);
4814 		}
4815 		return;
4816 	}
4817 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4818 
4819 	/*
4820 	 * get connected endpoint
4821 	 */
4822 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4823 		/* Peer closed */
4824 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4825 		    "tl_ordrel: peer gone"));
4826 		freemsg(mp);
4827 		return;
4828 	}
4829 
4830 	peer_rq = peer_tep->te_rq;
4831 
4832 	/*
4833 	 * Put it back if flow controlled except when we are closing.
4834 	 * Note: Messages already on queue when we are closing is bounded
4835 	 * so we can ignore flow control.
4836 	 */
4837 	if (! canputnext(peer_rq) && !closing) {
4838 		TL_PUTBQ(tep, mp);
4839 		return;
4840 	}
4841 
4842 	/*
4843 	 * validate state on peer
4844 	 */
4845 	switch (peer_tep->te_state) {
4846 	case TS_DATA_XFER:
4847 	case TS_WIND_ORDREL:
4848 		/* valid states */
4849 		break;
4850 	default:
4851 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4852 		    "tl_ordrel:rx side:invalid state"));
4853 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4854 		return;
4855 	}
4856 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4857 
4858 	/*
4859 	 * reuse message block
4860 	 */
4861 	prim->type = T_ORDREL_IND;
4862 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4863 	    "tl_ordrel: send ordrel_ind"));
4864 
4865 	/*
4866 	 * send data to connected peer
4867 	 */
4868 	putnext(peer_rq, mp);
4869 }
4870 
4871 
4872 /*
4873  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4874  */
4875 static void
4876 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4877 {
4878 	size_t			err_sz;
4879 	tl_endpt_t		*tep;
4880 	struct T_unitdata_req	*udreq;
4881 	mblk_t			*err_mp;
4882 	t_scalar_t		alen;
4883 	t_scalar_t		olen;
4884 	struct T_uderror_ind	*uderr;
4885 	uchar_t			*addr_startp;
4886 
4887 	err_sz = sizeof (struct T_uderror_ind);
4888 	tep = (tl_endpt_t *)wq->q_ptr;
4889 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4890 	alen = udreq->DEST_length;
4891 	olen = udreq->OPT_length;
4892 
4893 	if (alen > 0)
4894 		err_sz = T_ALIGN(err_sz + alen);
4895 	if (olen > 0)
4896 		err_sz += olen;
4897 
4898 	err_mp = allocb(err_sz, BPRI_MED);
4899 	if (! err_mp) {
4900 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4901 		    "tl_uderr:allocb failure"));
4902 		/*
4903 		 * Note: no rollback of state needed as it does
4904 		 * not change in connectionless transport
4905 		 */
4906 		tl_memrecover(wq, mp, err_sz);
4907 		return;
4908 	}
4909 
4910 	DB_TYPE(err_mp) = M_PROTO;
4911 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4912 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4913 	uderr->PRIM_type = T_UDERROR_IND;
4914 	uderr->ERROR_type = err;
4915 	uderr->DEST_length = alen;
4916 	uderr->OPT_length = olen;
4917 	if (alen <= 0) {
4918 		uderr->DEST_offset = 0;
4919 	} else {
4920 		uderr->DEST_offset =
4921 		    (t_scalar_t)sizeof (struct T_uderror_ind);
4922 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4923 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4924 		    (size_t)alen);
4925 	}
4926 	if (olen <= 0) {
4927 		uderr->OPT_offset = 0;
4928 	} else {
4929 		uderr->OPT_offset =
4930 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4931 		    uderr->DEST_length);
4932 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4933 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4934 		    (size_t)olen);
4935 	}
4936 	freemsg(mp);
4937 
4938 	/*
4939 	 * send indication message
4940 	 */
4941 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4942 
4943 	qreply(wq, err_mp);
4944 }
4945 
4946 static void
4947 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4948 {
4949 	queue_t *wq = tep->te_wq;
4950 
4951 	if (!tep->te_closing && (wq->q_first != NULL)) {
4952 		TL_PUTQ(tep, mp);
4953 	} else if (tep->te_rq != NULL)
4954 		tl_unitdata(mp, tep);
4955 	else
4956 		freemsg(mp);
4957 
4958 	tl_serializer_exit(tep);
4959 	tl_refrele(tep);
4960 }
4961 
4962 /*
4963  * Handle T_unitdata_req.
4964  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4965  * If this is a socket pass through options unmodified.
4966  */
4967 static void
4968 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4969 {
4970 	queue_t			*wq = tep->te_wq;
4971 	soux_addr_t		ux_addr;
4972 	tl_addr_t		destaddr;
4973 	uchar_t			*addr_startp;
4974 	tl_endpt_t		*peer_tep;
4975 	struct T_unitdata_ind	*udind;
4976 	struct T_unitdata_req	*udreq;
4977 	ssize_t			msz, ui_sz;
4978 	t_scalar_t		alen, aoff, olen, ooff;
4979 	t_scalar_t		oldolen = 0;
4980 
4981 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4982 	msz = MBLKL(mp);
4983 
4984 	/*
4985 	 * validate the state
4986 	 */
4987 	if (tep->te_state != TS_IDLE) {
4988 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4989 		    SL_TRACE|SL_ERROR,
4990 		    "tl_wput:T_CONN_REQ:out of state"));
4991 		tl_merror(wq, mp, EPROTO);
4992 		return;
4993 	}
4994 	/*
4995 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
4996 	 * (state does not change on this event)
4997 	 */
4998 
4999 	/*
5000 	 * validate the message
5001 	 * Note: dereference fields in struct inside message only
5002 	 * after validating the message length.
5003 	 */
5004 	if (msz < sizeof (struct T_unitdata_req)) {
5005 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5006 		    "tl_unitdata:invalid message length"));
5007 		tl_merror(wq, mp, EINVAL);
5008 		return;
5009 	}
5010 	alen = udreq->DEST_length;
5011 	aoff = udreq->DEST_offset;
5012 	oldolen = olen = udreq->OPT_length;
5013 	ooff = udreq->OPT_offset;
5014 	if (olen == 0)
5015 		ooff = 0;
5016 
5017 	if (IS_SOCKET(tep)) {
5018 		if ((alen != TL_SOUX_ADDRLEN) ||
5019 		    (aoff < 0) ||
5020 		    (aoff + alen > msz) ||
5021 		    (olen < 0) || (ooff < 0) ||
5022 		    ((olen > 0) && ((ooff + olen) > msz))) {
5023 			(void) (STRLOG(TL_ID, tep->te_minor,
5024 			    1, SL_TRACE|SL_ERROR,
5025 			    "tl_unitdata_req: invalid socket addr "
5026 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5027 			    (int)msz, alen, aoff, olen, ooff));
5028 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5029 			return;
5030 		}
5031 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5032 
5033 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5034 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5035 			(void) (STRLOG(TL_ID, tep->te_minor,
5036 			    1, SL_TRACE|SL_ERROR,
5037 			    "tl_conn_req: invalid socket magic"));
5038 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5039 			return;
5040 		}
5041 	} else {
5042 		if ((alen < 0) ||
5043 		    (aoff < 0) ||
5044 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5045 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5046 		    ((aoff + alen) < 0) ||
5047 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5048 		    (olen < 0) ||
5049 		    (ooff < 0) ||
5050 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5051 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5052 				    SL_TRACE|SL_ERROR,
5053 				    "tl_unitdata:invalid unit data message"));
5054 			tl_merror(wq, mp, EINVAL);
5055 			return;
5056 		}
5057 	}
5058 
5059 	/* Options not supported unless it's a socket */
5060 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5061 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5062 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5063 		tl_uderr(wq, mp, EPROTO);
5064 		return;
5065 	}
5066 #ifdef DEBUG
5067 	/*
5068 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5069 	 * if (! assertion)
5070 	 *	log warning;
5071 	 */
5072 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5073 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5074 		    "tl_unitdata:addr overlaps TPI message"));
5075 	}
5076 #endif
5077 	/*
5078 	 * get destination endpoint
5079 	 */
5080 	destaddr.ta_alen = alen;
5081 	destaddr.ta_abuf = mp->b_rptr + aoff;
5082 	destaddr.ta_zoneid = tep->te_zoneid;
5083 
5084 	/*
5085 	 * Check whether the destination is the same that was used previously
5086 	 * and the destination endpoint is in the right state. If something is
5087 	 * wrong, find destination again and cache it.
5088 	 */
5089 	peer_tep = tep->te_lastep;
5090 
5091 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5092 	    (peer_tep->te_state != TS_IDLE) ||
5093 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5094 		/*
5095 		 * Not the same as cached destination , need to find the right
5096 		 * destination.
5097 		 */
5098 		peer_tep = (IS_SOCKET(tep) ?
5099 		    tl_sock_find_peer(tep, &ux_addr) :
5100 		    tl_find_peer(tep, &destaddr));
5101 
5102 		if (peer_tep == NULL) {
5103 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5104 			    SL_TRACE|SL_ERROR,
5105 			    "tl_unitdata:no one at destination address"));
5106 			tl_uderr(wq, mp, ECONNRESET);
5107 			return;
5108 		}
5109 
5110 		/*
5111 		 * Cache the new peer.
5112 		 */
5113 		if (tep->te_lastep != NULL)
5114 			tl_refrele(tep->te_lastep);
5115 
5116 		tep->te_lastep = peer_tep;
5117 	}
5118 
5119 	if (peer_tep->te_state != TS_IDLE) {
5120 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5121 		    "tl_unitdata:provider in invalid state"));
5122 		tl_uderr(wq, mp, EPROTO);
5123 		return;
5124 	}
5125 
5126 	ASSERT(peer_tep->te_rq != NULL);
5127 
5128 	/*
5129 	 * Put it back if flow controlled except when we are closing.
5130 	 * Note: Messages already on queue when we are closing is bounded
5131 	 * so we can ignore flow control.
5132 	 */
5133 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5134 		/* record what we are flow controlled on */
5135 		if (tep->te_flowq != NULL) {
5136 			list_remove(&tep->te_flowq->te_flowlist, tep);
5137 		}
5138 		list_insert_head(&peer_tep->te_flowlist, tep);
5139 		tep->te_flowq = peer_tep;
5140 		TL_PUTBQ(tep, mp);
5141 		return;
5142 	}
5143 	/*
5144 	 * prepare indication message
5145 	 */
5146 
5147 	/*
5148 	 * calculate length of message
5149 	 */
5150 	if (peer_tep->te_flag & TL_SETCRED) {
5151 		ASSERT(olen == 0);
5152 		olen = (t_scalar_t)sizeof (struct opthdr) +
5153 		    OPTLEN(sizeof (tl_credopt_t));
5154 					/* 1 option only */
5155 	} else if (peer_tep->te_flag & TL_SETUCRED) {
5156 		ASSERT(olen == 0);
5157 		olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredsize);
5158 					/* 1 option only */
5159 	} else if (peer_tep->te_flag & TL_SOCKUCRED) {
5160 		/* Possibly more than one option */
5161 		olen += (t_scalar_t)sizeof (struct T_opthdr) +
5162 		    OPTLEN(ucredsize);
5163 	}
5164 
5165 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5166 	    olen;
5167 	/*
5168 	 * If the unitdata_ind fits and we are not adding options
5169 	 * reuse the udreq mblk.
5170 	 */
5171 	if (msz >= ui_sz && alen >= tep->te_alen &&
5172 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5173 		/*
5174 		 * Reuse the original mblk. Leave options in place.
5175 		 */
5176 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5177 		udind->PRIM_type = T_UNITDATA_IND;
5178 		udind->SRC_length = tep->te_alen;
5179 		addr_startp = mp->b_rptr + udind->SRC_offset;
5180 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5181 	} else {
5182 		/* Allocate a new T_unidata_ind message */
5183 		mblk_t *ui_mp;
5184 
5185 		ui_mp = allocb(ui_sz, BPRI_MED);
5186 		if (! ui_mp) {
5187 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5188 			    "tl_unitdata:allocb failure:message queued"));
5189 			tl_memrecover(wq, mp, ui_sz);
5190 			return;
5191 		}
5192 
5193 		/*
5194 		 * fill in T_UNITDATA_IND contents
5195 		 */
5196 		DB_TYPE(ui_mp) = M_PROTO;
5197 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5198 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5199 		udind->PRIM_type = T_UNITDATA_IND;
5200 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5201 		udind->SRC_length = tep->te_alen;
5202 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5203 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5204 		udind->OPT_offset =
5205 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5206 		udind->OPT_length = olen;
5207 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5208 			if (oldolen != 0) {
5209 				bcopy((void *)((uintptr_t)udreq + ooff),
5210 				    (void *)((uintptr_t)udind +
5211 				    udind->OPT_offset),
5212 				    oldolen);
5213 			}
5214 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5215 			    oldolen,
5216 			    DB_CREDDEF(mp, tep->te_credp), TLPID(mp, tep),
5217 			    peer_tep->te_flag, peer_tep->te_credp);
5218 		} else {
5219 			bcopy((void *)((uintptr_t)udreq + ooff),
5220 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5221 			    olen);
5222 		}
5223 
5224 		/*
5225 		 * relink data blocks from mp to ui_mp
5226 		 */
5227 		ui_mp->b_cont = mp->b_cont;
5228 		freeb(mp);
5229 		mp = ui_mp;
5230 	}
5231 	/*
5232 	 * send indication message
5233 	 */
5234 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5235 	putnext(peer_tep->te_rq, mp);
5236 }
5237 
5238 
5239 
5240 /*
5241  * Check if a given addr is in use.
5242  * Endpoint ptr returned or NULL if not found.
5243  * The name space is separate for each mode. This implies that
5244  * sockets get their own name space.
5245  */
5246 static tl_endpt_t *
5247 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5248 {
5249 	tl_endpt_t *peer_tep = NULL;
5250 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5251 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5252 
5253 	ASSERT(! IS_SOCKET(tep));
5254 
5255 	ASSERT(ap != NULL && ap->ta_alen > 0);
5256 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5257 	ASSERT(ap->ta_abuf != NULL);
5258 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5259 	ASSERT(IMPLY(rc == 0,
5260 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5261 	    (tep->te_transport == peer_tep->te_transport)));
5262 
5263 	if ((rc == 0) && (peer_tep->te_closing)) {
5264 		tl_refrele(peer_tep);
5265 		peer_tep = NULL;
5266 	}
5267 
5268 	return (peer_tep);
5269 }
5270 
5271 /*
5272  * Find peer for a socket based on unix domain address.
5273  * For implicit addresses our peer can be found by minor number in ai hash. For
5274  * explicit binds we look vnode address at addr_hash.
5275  */
5276 static tl_endpt_t *
5277 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5278 {
5279 	tl_endpt_t *peer_tep = NULL;
5280 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5281 	    tep->te_aihash : tep->te_addrhash;
5282 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5283 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5284 
5285 	ASSERT(IS_SOCKET(tep));
5286 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5287 	ASSERT(IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport)));
5288 
5289 	if (peer_tep != NULL) {
5290 		/* Don't attempt to use closing peer. */
5291 		if (peer_tep->te_closing)
5292 			goto errout;
5293 
5294 		/*
5295 		 * Cross-zone unix sockets are permitted, but for Trusted
5296 		 * Extensions only, the "server" for these must be in the
5297 		 * global zone.
5298 		 */
5299 		if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5300 		    is_system_labeled() &&
5301 		    (peer_tep->te_zoneid != GLOBAL_ZONEID))
5302 			goto errout;
5303 	}
5304 
5305 	return (peer_tep);
5306 
5307 errout:
5308 	tl_refrele(peer_tep);
5309 	return (NULL);
5310 }
5311 
5312 /*
5313  * Generate a free addr and return it in struct pointed by ap
5314  * but allocating space for address buffer.
5315  * The generated address will be at least 4 bytes long and, if req->ta_alen
5316  * exceeds 4 bytes, be req->ta_alen bytes long.
5317  *
5318  * If address is found it will be inserted in the hash.
5319  *
5320  * If req->ta_alen is larger than the default alen (4 bytes) the last
5321  * alen-4 bytes will always be the same as in req.
5322  *
5323  * Return 0 for failure.
5324  * Return non-zero for success.
5325  */
5326 static boolean_t
5327 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5328 {
5329 	t_scalar_t	alen;
5330 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5331 
5332 	ASSERT(tep->te_hash_hndl != NULL);
5333 	ASSERT(! IS_SOCKET(tep));
5334 
5335 	if (tep->te_hash_hndl == NULL)
5336 		return (B_FALSE);
5337 
5338 	/*
5339 	 * check if default addr is in use
5340 	 * if it is - bump it and try again
5341 	 */
5342 	if (req == NULL) {
5343 		alen = sizeof (uint32_t);
5344 	} else {
5345 		alen = max(req->ta_alen, sizeof (uint32_t));
5346 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5347 	}
5348 
5349 	if (tep->te_alen < alen) {
5350 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5351 
5352 		/*
5353 		 * Not enough space in tep->ta_ap to hold the address,
5354 		 * allocate a bigger space.
5355 		 */
5356 		if (abuf == NULL)
5357 			return (B_FALSE);
5358 
5359 		if (tep->te_alen > 0)
5360 			kmem_free(tep->te_abuf, tep->te_alen);
5361 
5362 		tep->te_alen = alen;
5363 		tep->te_abuf = abuf;
5364 	}
5365 
5366 	/* Copy in the address in req */
5367 	if (req != NULL) {
5368 		ASSERT(alen >= req->ta_alen);
5369 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5370 	}
5371 
5372 	/*
5373 	 * First try minor number then try default addresses.
5374 	 */
5375 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5376 
5377 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5378 		if (mod_hash_insert_reserve(tep->te_addrhash,
5379 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5380 		    tep->te_hash_hndl) == 0) {
5381 			/*
5382 			 * found free address
5383 			 */
5384 			tep->te_flag |= TL_ADDRHASHED;
5385 			tep->te_hash_hndl = NULL;
5386 
5387 			return (B_TRUE); /* successful return */
5388 		}
5389 		/*
5390 		 * Use default address.
5391 		 */
5392 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5393 		atomic_add_32(&tep->te_defaddr, 1);
5394 	}
5395 
5396 	/*
5397 	 * Failed to find anything.
5398 	 */
5399 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5400 	    "tl_get_any_addr:looped 2^32 times"));
5401 	return (B_FALSE);
5402 }
5403 
5404 /*
5405  * reallocb + set r/w ptrs to reflect size.
5406  */
5407 static mblk_t *
5408 tl_resizemp(mblk_t *mp, ssize_t new_size)
5409 {
5410 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5411 		return (NULL);
5412 
5413 	mp->b_rptr = DB_BASE(mp);
5414 	mp->b_wptr = mp->b_rptr + new_size;
5415 	return (mp);
5416 }
5417 
5418 static void
5419 tl_cl_backenable(tl_endpt_t *tep)
5420 {
5421 	list_t *l = &tep->te_flowlist;
5422 	tl_endpt_t *elp;
5423 
5424 	ASSERT(IS_CLTS(tep));
5425 
5426 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5427 		ASSERT(tep->te_ser == elp->te_ser);
5428 		ASSERT(elp->te_flowq == tep);
5429 		if (! elp->te_closing)
5430 			TL_QENABLE(elp);
5431 		elp->te_flowq = NULL;
5432 		list_remove(l, elp);
5433 	}
5434 }
5435 
5436 /*
5437  * Unconnect endpoints.
5438  */
5439 static void
5440 tl_co_unconnect(tl_endpt_t *tep)
5441 {
5442 	tl_endpt_t	*peer_tep = tep->te_conp;
5443 	tl_endpt_t	*srv_tep = tep->te_oconp;
5444 	list_t		*l;
5445 	tl_icon_t  	*tip;
5446 	tl_endpt_t	*cl_tep;
5447 	mblk_t		*d_mp;
5448 
5449 	ASSERT(IS_COTS(tep));
5450 	/*
5451 	 * If our peer is closing, don't use it.
5452 	 */
5453 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5454 		TL_UNCONNECT(tep->te_conp);
5455 		peer_tep = NULL;
5456 	}
5457 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5458 		TL_UNCONNECT(tep->te_oconp);
5459 		srv_tep = NULL;
5460 	}
5461 
5462 	if (tep->te_nicon > 0) {
5463 		l = &tep->te_iconp;
5464 		/*
5465 		 * If incoming requests pending, change state
5466 		 * of clients on disconnect ind event and send
5467 		 * discon_ind pdu to modules above them
5468 		 * for server: all clients get disconnect
5469 		 */
5470 
5471 		while (tep->te_nicon > 0) {
5472 			tip    = list_head(l);
5473 			cl_tep = tip->ti_tep;
5474 
5475 			if (cl_tep == NULL) {
5476 				tl_freetip(tep, tip);
5477 				continue;
5478 			}
5479 
5480 			if (cl_tep->te_oconp != NULL) {
5481 				ASSERT(cl_tep != cl_tep->te_oconp);
5482 				TL_UNCONNECT(cl_tep->te_oconp);
5483 			}
5484 
5485 			if (cl_tep->te_closing) {
5486 				tl_freetip(tep, tip);
5487 				continue;
5488 			}
5489 
5490 			enableok(cl_tep->te_wq);
5491 			TL_QENABLE(cl_tep);
5492 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5493 			if (d_mp != NULL) {
5494 				cl_tep->te_state = TS_IDLE;
5495 				putnext(cl_tep->te_rq, d_mp);
5496 			} else {
5497 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5498 				    SL_TRACE|SL_ERROR,
5499 				    "tl_co_unconnect:icmng: "
5500 				    "allocb failure"));
5501 			}
5502 			tl_freetip(tep, tip);
5503 		}
5504 	} else if (srv_tep != NULL) {
5505 		/*
5506 		 * If outgoing request pending, change state
5507 		 * of server on discon ind event
5508 		 */
5509 
5510 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5511 		    IS_COTSORD(srv_tep) &&
5512 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5513 			/*
5514 			 * Queue ordrel_ind for server to be picked up
5515 			 * when the connection is accepted.
5516 			 */
5517 			d_mp = tl_ordrel_ind_alloc();
5518 		} else {
5519 			/*
5520 			 * send discon_ind to server
5521 			 */
5522 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5523 		}
5524 		if (d_mp == NULL) {
5525 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5526 			    SL_TRACE|SL_ERROR,
5527 			    "tl_co_unconnect:outgoing:allocb failure"));
5528 			TL_UNCONNECT(tep->te_oconp);
5529 			goto discon_peer;
5530 		}
5531 
5532 		/*
5533 		 * If this is a socket the T_DISCON_IND is queued with
5534 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5535 		 * from the list of pending connections.
5536 		 * Note that when te_oconp is set the peer better have
5537 		 * a t_connind_t for the client.
5538 		 */
5539 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5540 			/*
5541 			 * Queue the disconnection message.
5542 			 */
5543 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5544 		} else {
5545 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5546 			if (tip == NULL) {
5547 				freemsg(d_mp);
5548 			} else {
5549 				ASSERT(tep == tip->ti_tep);
5550 				ASSERT(tep->te_ser == srv_tep->te_ser);
5551 				/*
5552 				 * Delete tip from the server list.
5553 				 */
5554 				if (srv_tep->te_nicon == 1) {
5555 					srv_tep->te_state =
5556 					    NEXTSTATE(TE_DISCON_IND2,
5557 					    srv_tep->te_state);
5558 				} else {
5559 					srv_tep->te_state =
5560 					    NEXTSTATE(TE_DISCON_IND3,
5561 					    srv_tep->te_state);
5562 				}
5563 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5564 				    T_DISCON_IND);
5565 				putnext(srv_tep->te_rq, d_mp);
5566 				tl_freetip(srv_tep, tip);
5567 			}
5568 			TL_UNCONNECT(tep->te_oconp);
5569 			srv_tep = NULL;
5570 		}
5571 	} else if (peer_tep != NULL) {
5572 		/*
5573 		 * unconnect existing connection
5574 		 * If connected, change state of peer on
5575 		 * discon ind event and send discon ind pdu
5576 		 * to module above it
5577 		 */
5578 
5579 		ASSERT(tep->te_ser == peer_tep->te_ser);
5580 		if (IS_COTSORD(peer_tep) &&
5581 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5582 		    peer_tep->te_state == TS_DATA_XFER)) {
5583 			/*
5584 			 * send ordrel ind
5585 			 */
5586 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5587 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5588 			    peer_tep->te_state,
5589 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5590 			d_mp = tl_ordrel_ind_alloc();
5591 			if (! d_mp) {
5592 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5593 				    SL_TRACE|SL_ERROR,
5594 				    "tl_co_unconnect:connected:"
5595 				    "allocb failure"));
5596 				/*
5597 				 * Continue with cleaning up peer as
5598 				 * this side may go away with the close
5599 				 */
5600 				TL_QENABLE(peer_tep);
5601 				goto discon_peer;
5602 			}
5603 			peer_tep->te_state =
5604 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5605 
5606 			putnext(peer_tep->te_rq, d_mp);
5607 			/*
5608 			 * Handle flow control case.  This will generate
5609 			 * a t_discon_ind message with reason 0 if there
5610 			 * is data queued on the write side.
5611 			 */
5612 			TL_QENABLE(peer_tep);
5613 		} else if (IS_COTSORD(peer_tep) &&
5614 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5615 			/*
5616 			 * Sent an ordrel_ind. We send a discon with
5617 			 * with error 0 to inform that the peer is gone.
5618 			 */
5619 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5620 			    SL_TRACE|SL_ERROR,
5621 			    "tl_co_unconnect: discon in state %d",
5622 			    tep->te_state));
5623 			tl_discon_ind(peer_tep, 0);
5624 		} else {
5625 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5626 			    SL_TRACE|SL_ERROR,
5627 			    "tl_co_unconnect: state %d", tep->te_state));
5628 			tl_discon_ind(peer_tep, ECONNRESET);
5629 		}
5630 
5631 discon_peer:
5632 		/*
5633 		 * Disconnect cross-pointers only for close
5634 		 */
5635 		if (tep->te_closing) {
5636 			peer_tep = tep->te_conp;
5637 			TL_REMOVE_PEER(peer_tep->te_conp);
5638 			TL_REMOVE_PEER(tep->te_conp);
5639 		}
5640 	}
5641 }
5642 
5643 /*
5644  * Note: The following routine does not recover from allocb()
5645  * failures
5646  * The reason should be from the <sys/errno.h> space.
5647  */
5648 static void
5649 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5650 {
5651 	mblk_t *d_mp;
5652 
5653 	if (tep->te_closing)
5654 		return;
5655 
5656 	/*
5657 	 * flush the queues.
5658 	 */
5659 	flushq(tep->te_rq, FLUSHDATA);
5660 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5661 
5662 	/*
5663 	 * send discon ind
5664 	 */
5665 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5666 	if (! d_mp) {
5667 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5668 		    "tl_discon_ind:allocb failure"));
5669 		return;
5670 	}
5671 	tep->te_state = TS_IDLE;
5672 	putnext(tep->te_rq, d_mp);
5673 }
5674 
5675 /*
5676  * Note: The following routine does not recover from allocb()
5677  * failures
5678  * The reason should be from the <sys/errno.h> space.
5679  */
5680 static mblk_t *
5681 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5682 {
5683 	mblk_t *mp;
5684 	struct T_discon_ind *tdi;
5685 
5686 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5687 		DB_TYPE(mp) = M_PROTO;
5688 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5689 		tdi = (struct T_discon_ind *)mp->b_rptr;
5690 		tdi->PRIM_type = T_DISCON_IND;
5691 		tdi->DISCON_reason = reason;
5692 		tdi->SEQ_number = seqnum;
5693 	}
5694 	return (mp);
5695 }
5696 
5697 
5698 /*
5699  * Note: The following routine does not recover from allocb()
5700  * failures
5701  */
5702 static mblk_t *
5703 tl_ordrel_ind_alloc(void)
5704 {
5705 	mblk_t *mp;
5706 	struct T_ordrel_ind *toi;
5707 
5708 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5709 		DB_TYPE(mp) = M_PROTO;
5710 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5711 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5712 		toi->PRIM_type = T_ORDREL_IND;
5713 	}
5714 	return (mp);
5715 }
5716 
5717 
5718 /*
5719  * Lookup the seqno in the list of queued connections.
5720  */
5721 static tl_icon_t *
5722 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5723 {
5724 	list_t *l = &tep->te_iconp;
5725 	tl_icon_t *tip = list_head(l);
5726 
5727 	ASSERT(seqno != 0);
5728 
5729 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5730 		;
5731 
5732 	return (tip);
5733 }
5734 
5735 /*
5736  * Queue data for a given T_CONN_IND while verifying that redundant
5737  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5738  * Used when the originator of the connection closes.
5739  */
5740 static void
5741 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5742 {
5743 	tl_icon_t		*tip;
5744 	mblk_t			**mpp, *mp;
5745 	int			prim, nprim;
5746 
5747 	if (nmp->b_datap->db_type == M_PROTO)
5748 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5749 	else
5750 		nprim = -1;	/* M_DATA */
5751 
5752 	tip = tl_icon_find(tep, seqno);
5753 	if (tip == NULL) {
5754 		freemsg(nmp);
5755 		return;
5756 	}
5757 
5758 	ASSERT(tip->ti_seqno != 0);
5759 	mpp = &tip->ti_mp;
5760 	while (*mpp != NULL) {
5761 		mp = *mpp;
5762 
5763 		if (mp->b_datap->db_type == M_PROTO)
5764 			prim = ((union T_primitives *)mp->b_rptr)->type;
5765 		else
5766 			prim = -1;	/* M_DATA */
5767 
5768 		/*
5769 		 * Allow nothing after a T_DISCON_IND
5770 		 */
5771 		if (prim == T_DISCON_IND) {
5772 			freemsg(nmp);
5773 			return;
5774 		}
5775 		/*
5776 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5777 		 */
5778 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5779 			freemsg(nmp);
5780 			return;
5781 		}
5782 		mpp = &(mp->b_next);
5783 	}
5784 	*mpp = nmp;
5785 }
5786 
5787 /*
5788  * Verify if a certain TPI primitive exists on the connind queue.
5789  * Use prim -1 for M_DATA.
5790  * Return non-zero if found.
5791  */
5792 static boolean_t
5793 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5794 {
5795 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5796 	boolean_t found = B_FALSE;
5797 
5798 	if (tip != NULL) {
5799 		mblk_t *mp;
5800 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5801 			found = (DB_TYPE(mp) == M_PROTO &&
5802 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5803 		}
5804 	}
5805 	return (found);
5806 }
5807 
5808 /*
5809  * Send the b_next mblk chain that has accumulated before the connection
5810  * was accepted. Perform the necessary state transitions.
5811  */
5812 static void
5813 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5814 {
5815 	mblk_t			*mp;
5816 	union T_primitives	*primp;
5817 
5818 	if (tep->te_closing) {
5819 		tl_icon_freemsgs(mpp);
5820 		return;
5821 	}
5822 
5823 	ASSERT(tep->te_state == TS_DATA_XFER);
5824 	ASSERT(tep->te_rq->q_first == NULL);
5825 
5826 	while ((mp = *mpp) != NULL) {
5827 		*mpp = mp->b_next;
5828 		mp->b_next = NULL;
5829 
5830 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5831 		switch (DB_TYPE(mp)) {
5832 		default:
5833 			freemsg(mp);
5834 			break;
5835 		case M_DATA:
5836 			putnext(tep->te_rq, mp);
5837 			break;
5838 		case M_PROTO:
5839 			primp = (union T_primitives *)mp->b_rptr;
5840 			switch (primp->type) {
5841 			case T_UNITDATA_IND:
5842 			case T_DATA_IND:
5843 			case T_OPTDATA_IND:
5844 			case T_EXDATA_IND:
5845 				putnext(tep->te_rq, mp);
5846 				break;
5847 			case T_ORDREL_IND:
5848 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5849 				    tep->te_state);
5850 				putnext(tep->te_rq, mp);
5851 				break;
5852 			case T_DISCON_IND:
5853 				tep->te_state = TS_IDLE;
5854 				putnext(tep->te_rq, mp);
5855 				break;
5856 			default:
5857 #ifdef DEBUG
5858 				cmn_err(CE_PANIC,
5859 				    "tl_icon_sendmsgs: unknown primitive");
5860 #endif /* DEBUG */
5861 				freemsg(mp);
5862 				break;
5863 			}
5864 			break;
5865 		}
5866 	}
5867 }
5868 
5869 /*
5870  * Free the b_next mblk chain that has accumulated before the connection
5871  * was accepted.
5872  */
5873 static void
5874 tl_icon_freemsgs(mblk_t **mpp)
5875 {
5876 	mblk_t *mp;
5877 
5878 	while ((mp = *mpp) != NULL) {
5879 		*mpp = mp->b_next;
5880 		mp->b_next = NULL;
5881 		freemsg(mp);
5882 	}
5883 }
5884 
5885 /*
5886  * Send M_ERROR
5887  * Note: assumes caller ensured enough space in mp or enough
5888  *	memory available. Does not attempt recovery from allocb()
5889  *	failures
5890  */
5891 
5892 static void
5893 tl_merror(queue_t *wq, mblk_t *mp, int error)
5894 {
5895 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5896 
5897 	if (tep->te_closing) {
5898 		freemsg(mp);
5899 		return;
5900 	}
5901 
5902 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5903 	    SL_TRACE|SL_ERROR,
5904 	    "tl_merror: tep=%p, err=%d", (void *)tep, error));
5905 
5906 	/*
5907 	 * flush all messages on queue. we are shutting
5908 	 * the stream down on fatal error
5909 	 */
5910 	flushq(wq, FLUSHALL);
5911 	if (IS_COTS(tep)) {
5912 		/* connection oriented - unconnect endpoints */
5913 		tl_co_unconnect(tep);
5914 	}
5915 	if (mp->b_cont) {
5916 		freemsg(mp->b_cont);
5917 		mp->b_cont = NULL;
5918 	}
5919 
5920 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5921 		freemsg(mp);
5922 		mp = allocb(1, BPRI_HI);
5923 		if (!mp) {
5924 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5925 			    SL_TRACE|SL_ERROR,
5926 			    "tl_merror:M_PROTO: out of memory"));
5927 			return;
5928 		}
5929 	}
5930 	if (mp) {
5931 		DB_TYPE(mp) = M_ERROR;
5932 		mp->b_rptr = DB_BASE(mp);
5933 		*mp->b_rptr = (char)error;
5934 		mp->b_wptr = mp->b_rptr + sizeof (char);
5935 		qreply(wq, mp);
5936 	} else {
5937 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
5938 	}
5939 }
5940 
5941 static void
5942 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5943 {
5944 	if (flag & TL_SETCRED) {
5945 		struct opthdr *opt = (struct opthdr *)buf;
5946 		tl_credopt_t *tlcred;
5947 
5948 		opt->level = TL_PROT_LEVEL;
5949 		opt->name = TL_OPT_PEER_CRED;
5950 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5951 
5952 		tlcred = (tl_credopt_t *)(opt + 1);
5953 		tlcred->tc_uid = crgetuid(cr);
5954 		tlcred->tc_gid = crgetgid(cr);
5955 		tlcred->tc_ruid = crgetruid(cr);
5956 		tlcred->tc_rgid = crgetrgid(cr);
5957 		tlcred->tc_suid = crgetsuid(cr);
5958 		tlcred->tc_sgid = crgetsgid(cr);
5959 		tlcred->tc_ngroups = crgetngroups(cr);
5960 	} else if (flag & TL_SETUCRED) {
5961 		struct opthdr *opt = (struct opthdr *)buf;
5962 
5963 		opt->level = TL_PROT_LEVEL;
5964 		opt->name = TL_OPT_PEER_UCRED;
5965 		opt->len = (t_uscalar_t)OPTLEN(ucredsize);
5966 
5967 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
5968 	} else {
5969 		struct T_opthdr *topt = (struct T_opthdr *)buf;
5970 		ASSERT(flag & TL_SOCKUCRED);
5971 
5972 		topt->level = SOL_SOCKET;
5973 		topt->name = SCM_UCRED;
5974 		topt->len = ucredsize + sizeof (*topt);
5975 		topt->status = 0;
5976 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
5977 	}
5978 }
5979 
5980 /* ARGSUSED */
5981 static int
5982 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5983 {
5984 	/* no default value processed in protocol specific code currently */
5985 	return (-1);
5986 }
5987 
5988 /* ARGSUSED */
5989 static int
5990 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5991 {
5992 	int len;
5993 	tl_endpt_t *tep;
5994 	int *valp;
5995 
5996 	tep = (tl_endpt_t *)wq->q_ptr;
5997 
5998 	len = 0;
5999 
6000 	/*
6001 	 * Assumes: option level and name sanity check done elsewhere
6002 	 */
6003 
6004 	switch (level) {
6005 	case SOL_SOCKET:
6006 		if (! IS_SOCKET(tep))
6007 			break;
6008 		switch (name) {
6009 		case SO_RECVUCRED:
6010 			len = sizeof (int);
6011 			valp = (int *)ptr;
6012 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6013 			break;
6014 		default:
6015 			break;
6016 		}
6017 		break;
6018 	case TL_PROT_LEVEL:
6019 		switch (name) {
6020 		case TL_OPT_PEER_CRED:
6021 		case TL_OPT_PEER_UCRED:
6022 			/*
6023 			 * option not supposed to retrieved directly
6024 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6025 			 * when some internal flags set by other options
6026 			 * Direct retrieval always designed to fail(ignored)
6027 			 * for this option.
6028 			 */
6029 			break;
6030 		}
6031 	}
6032 	return (len);
6033 }
6034 
6035 /* ARGSUSED */
6036 static int
6037 tl_set_opt(
6038 	queue_t		*wq,
6039 	uint_t		mgmt_flags,
6040 	int		level,
6041 	int		name,
6042 	uint_t		inlen,
6043 	uchar_t		*invalp,
6044 	uint_t		*outlenp,
6045 	uchar_t		*outvalp,
6046 	void		*thisdg_attrs,
6047 	cred_t		*cr,
6048 	mblk_t		*mblk)
6049 {
6050 	int error;
6051 	tl_endpt_t *tep;
6052 
6053 	tep = (tl_endpt_t *)wq->q_ptr;
6054 
6055 	error = 0;		/* NOERROR */
6056 
6057 	/*
6058 	 * Assumes: option level and name sanity checks done elsewhere
6059 	 */
6060 
6061 	switch (level) {
6062 	case SOL_SOCKET:
6063 		if (! IS_SOCKET(tep)) {
6064 			error = EINVAL;
6065 			break;
6066 		}
6067 		/*
6068 		 * TBD: fill in other AF_UNIX socket options and then stop
6069 		 * returning error.
6070 		 */
6071 		switch (name) {
6072 		case SO_RECVUCRED:
6073 			/*
6074 			 * We only support this for datagram sockets;
6075 			 * getpeerucred handles the connection oriented
6076 			 * transports.
6077 			 */
6078 			if (! IS_CLTS(tep)) {
6079 				error = EINVAL;
6080 				break;
6081 			}
6082 			if (*(int *)invalp == 0)
6083 				tep->te_flag &= ~TL_SOCKUCRED;
6084 			else
6085 				tep->te_flag |= TL_SOCKUCRED;
6086 			break;
6087 		default:
6088 			error = EINVAL;
6089 			break;
6090 		}
6091 		break;
6092 	case TL_PROT_LEVEL:
6093 		switch (name) {
6094 		case TL_OPT_PEER_CRED:
6095 		case TL_OPT_PEER_UCRED:
6096 			/*
6097 			 * option not supposed to be set directly
6098 			 * Its value in initialized for each endpoint at
6099 			 * driver open time.
6100 			 * Direct setting always designed to fail for this
6101 			 * option.
6102 			 */
6103 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6104 			    SL_TRACE|SL_ERROR,
6105 			    "tl_set_opt: option is not supported"));
6106 			error = EPROTO;
6107 			break;
6108 		}
6109 	}
6110 	return (error);
6111 }
6112 
6113 
6114 static void
6115 tl_timer(void *arg)
6116 {
6117 	queue_t *wq = arg;
6118 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6119 
6120 	ASSERT(tep);
6121 
6122 	tep->te_timoutid = 0;
6123 
6124 	enableok(wq);
6125 	/*
6126 	 * Note: can call wsrv directly here and save context switch
6127 	 * Consider change when qtimeout (not timeout) is active
6128 	 */
6129 	qenable(wq);
6130 }
6131 
6132 static void
6133 tl_buffer(void *arg)
6134 {
6135 	queue_t *wq = arg;
6136 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6137 
6138 	ASSERT(tep);
6139 
6140 	tep->te_bufcid = 0;
6141 	tep->te_nowsrv = B_FALSE;
6142 
6143 	enableok(wq);
6144 	/*
6145 	 *  Note: can call wsrv directly here and save context switch
6146 	 * Consider change when qbufcall (not bufcall) is active
6147 	 */
6148 	qenable(wq);
6149 }
6150 
6151 static void
6152 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6153 {
6154 	tl_endpt_t *tep;
6155 
6156 	tep = (tl_endpt_t *)wq->q_ptr;
6157 
6158 	if (tep->te_closing) {
6159 		freemsg(mp);
6160 		return;
6161 	}
6162 	noenable(wq);
6163 
6164 	(void) insq(wq, wq->q_first, mp);
6165 
6166 	if (tep->te_bufcid || tep->te_timoutid) {
6167 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6168 		    "tl_memrecover:recover %p pending", (void *)wq));
6169 		return;
6170 	}
6171 
6172 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6173 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6174 		    drv_usectohz(TL_BUFWAIT));
6175 	}
6176 }
6177 
6178 static void
6179 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6180 {
6181 	ASSERT(tip->ti_seqno != 0);
6182 
6183 	if (tip->ti_mp != NULL) {
6184 		tl_icon_freemsgs(&tip->ti_mp);
6185 		tip->ti_mp = NULL;
6186 	}
6187 	if (tip->ti_tep != NULL) {
6188 		tl_refrele(tip->ti_tep);
6189 		tip->ti_tep = NULL;
6190 	}
6191 	list_remove(&tep->te_iconp, tip);
6192 	kmem_free(tip, sizeof (tl_icon_t));
6193 	tep->te_nicon--;
6194 }
6195 
6196 /*
6197  * Remove address from address hash.
6198  */
6199 static void
6200 tl_addr_unbind(tl_endpt_t *tep)
6201 {
6202 	tl_endpt_t *elp;
6203 
6204 	if (tep->te_flag & TL_ADDRHASHED) {
6205 		if (IS_SOCKET(tep)) {
6206 			(void) mod_hash_remove(tep->te_addrhash,
6207 			    (mod_hash_key_t)tep->te_vp,
6208 			    (mod_hash_val_t *)&elp);
6209 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6210 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6211 		} else {
6212 			(void) mod_hash_remove(tep->te_addrhash,
6213 			    (mod_hash_key_t)&tep->te_ap,
6214 			    (mod_hash_val_t *)&elp);
6215 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6216 			tep->te_alen = -1;
6217 			tep->te_abuf = NULL;
6218 		}
6219 		tep->te_flag &= ~TL_ADDRHASHED;
6220 	}
6221 }
6222