xref: /illumos-gate/usr/src/uts/common/io/tl.c (revision d6555420322a42c16b93414c29a62f8e841abc7b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Multithreaded STREAMS Local Transport Provider.
31  *
32  * OVERVIEW
33  * ========
34  *
35  * This driver provides TLI as well as socket semantics.  It provides
36  * connectionless, connection oriented, and connection oriented with orderly
37  * release transports for TLI and sockets. Each transport type has separate name
38  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
39  * this removes any name space conflicts when binding to socket style transport
40  * addresses.
41  *
42  * NOTE: There is one exception: Socket ticots and ticotsord transports share
43  * the same namespace. In fact, sockets always use ticotsord type transport.
44  *
45  * The driver mode is specified during open() by the minor number used for
46  * open.
47  *
48  *  The sockets in addition have the following semantic differences:
49  *  No support for passing up credentials (TL_SET[U]CRED).
50  *
51  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
52  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
53  *	T_OPTDATA_IND.
54  *
55  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
56  *	a T_CONN_RES is received from the acceptor. This means that a socket
57  *	connect will complete before the peer has called accept.
58  *
59  *
60  * MULTITHREADING
61  * ==============
62  *
63  * The driver does not use STREAMS protection mechanisms. Instead it uses a
64  * generic "serializer" abstraction. Most of the operations are executed behind
65  * the serializer and are, essentially single-threaded. All functions executed
66  * behind the same serializer are strictly serialized. So if one thread calls
67  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
68  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
69  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
70  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
71  * same time.
72  *
73  * Connectionless transport use a single serializer per transport type (one for
74  * TLI and one for sockets. Connection-oriented transports use finer-grained
75  * serializers.
76  *
77  * All COTS-type endpoints start their life with private serializers. During
78  * connection request processing the endpoint serializer is switched to the
79  * listener's serializer and the rest of T_CONN_REQ processing is done on the
80  * listener serializer. During T_CONN_RES processing the eager serializer is
81  * switched from listener to acceptor serializer and after that point all
82  * processing for eager and acceptor happens on this serializer. To avoid races
83  * with endpoint closes while its serializer may be changing closes are blocked
84  * while serializers are manipulated.
85  *
86  * References accounting
87  * ---------------------
88  *
89  * Endpoints are reference counted and freed when the last reference is
90  * dropped. Functions within the serializer may access an endpoint state even
91  * after an endpoint closed. The te_closing being set on the endpoint indicates
92  * that the endpoint entered its close routine.
93  *
94  * One reference is held for each opened endpoint instance. The reference
95  * counter is incremented when the endpoint is linked to another endpoint and
96  * decremented when the link disappears. It is also incremented when the
97  * endpoint is found by the hash table lookup. This increment is atomic with the
98  * lookup itself and happens while the hash table read lock is held.
99  *
100  * Close synchronization
101  * ---------------------
102  *
103  * During close the endpoint as marked as closing using te_closing flag. It is
104  * usually enough to check for te_closing flag since all other state changes
105  * happen after this flag is set and the close entered serializer. Immediately
106  * after setting te_closing flag tl_close() enters serializer and waits until
107  * the callback finishes. This allows all functions called within serializer to
108  * simply check te_closing without any locks.
109  *
110  * Serializer management.
111  * ---------------------
112  *
113  * For COTS transports serializers are created when the endpoint is constructed
114  * and destroyed when the endpoint is destructed. CLTS transports use global
115  * serializers - one for sockets and one for TLI.
116  *
117  * COTS serializers have separate reference counts to deal with several
118  * endpoints sharing the same serializer. There is a subtle problem related to
119  * the serializer destruction. The serializer should never be destroyed by any
120  * function executed inside serializer. This means that close has to wait till
121  * all serializer activity for this endpoint is finished before it can drop the
122  * last reference on the endpoint (which may as well free the serializer).  This
123  * is only relevant for COTS transports which manage serializers
124  * dynamically. For CLTS transports close may complete without waiting for all
125  * serializer activity to finish since serializer is only destroyed at driver
126  * detach time.
127  *
128  * COTS endpoints keep track of the number of outstanding requests on the
129  * serializer for the endpoint. The code handling accept() avoids changing
130  * client serializer if it has any pending messages on the serializer and
131  * instead moves acceptor to listener's serializer.
132  *
133  *
134  * Use of hash tables
135  * ------------------
136  *
137  * The driver uses modhash hash table implementation. Each transport uses two
138  * hash tables - one for finding endpoints by acceptor ID and another one for
139  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
140  * pair of hash tables since sockets only use TICOTSORD.
141  *
142  * All hash tables lookups increment a reference count for returned endpoints,
143  * so we may safely check the endpoint state even when the endpoint is removed
144  * from the hash by another thread immediately after it is found.
145  *
146  *
147  * CLOSE processing
148  * ================
149  *
150  * The driver enters serializer twice on close(). The close sequence is the
151  * following:
152  *
153  * 1) Wait until closing is safe (te_closewait becomes zero)
154  *	This step is needed to prevent close during serializer switches. In most
155  *	cases (close happening after connection establishment) te_closewait is
156  *	zero.
157  * 1) Set te_closing.
158  * 2) Call tl_close_ser() within serializer and wait for it to complete.
159  *
160  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
161  *	It also needs to clear write-side q_next pointers - this should be done
162  *	before qprocsoff().
163  *
164  *    This synchronous serializer entry during close is needed to ensure that
165  *    the queue is valid everywhere inside the serializer.
166  *
167  *    Note that in many cases close will execute tl_close_ser() synchronously,
168  *    so it will not wait at all.
169  *
170  * 3) Calls qprocsoff().
171  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
172  *	complete (for COTS transports). For CLTS transport there is no wait.
173  *
174  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
175  *	close if there is any.
176  *
177  *    Note that in most cases close will enter te_close_ser_finish()
178  *    synchronously and will not wait at all.
179  *
180  *
181  * Flow Control
182  * ============
183  *
184  * The driver implements both read and write side service routines. No one calls
185  * putq() on the read queue. The read side service routine tl_rsrv() is called
186  * when the read side stream is back-enabled. It enters serializer synchronously
187  * (waits till serializer processing is complete). Within serializer it
188  * back-enables all endpoints blocked by the queue for connection-less
189  * transports and enables write side service processing for the peer for
190  * connection-oriented transports.
191  *
192  * Read and write side service routines use special mblk_sized space in the
193  * endpoint structure to enter perimeter.
194  *
195  * Write-side flow control
196  * -----------------------
197  *
198  * Write side flow control is a bit tricky. The driver needs to deal with two
199  * message queues - the explicit STREAMS message queue maintained by
200  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
201  * queues should be synchronized to preserve message ordering and should
202  * maintain a single order determined by the order in which messages enter
203  * tl_wput(). In order to maintain the ordering between these two queues the
204  * STREAMS queue is only manipulated within the serializer, so the ordering is
205  * provided by the serializer.
206  *
207  * Functions called from the tl_wsrv() sometimes may call putbq(). To
208  * immediately stop any further processing of the STREAMS message queues the
209  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
210  * side service processing stops when the flag is set.
211  *
212  * The tl_wsrv() function enters serializer synchronously and waits for it to
213  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
214  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
215  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
216  * always bounded by the amount of messages on the STREAMS queue at the time
217  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
218  * queue from another serialized entry which can't happen in parallel. This
219  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
220  * of it draining forever while writer places new messages on the STREAMS
221  * queue).
222  *
223  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
224  *
225  *
226  * Unix Domain Sockets
227  * ===================
228  *
229  * The driver knows the structure of Unix Domain sockets addresses and treats
230  * them differently from generic TLI addresses. For sockets implicit binds are
231  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
232  * instead of using address length of zero. Explicit binds specify
233  * SOU_MAGIC_EXPLICIT as magic.
234  *
235  * For implicit binds we always use minor number as soua_vp part of the address
236  * and avoid any hash table lookups. This saves two hash tables lookups per
237  * anonymous bind.
238  *
239  * For explicit address we hash the vnode pointer instead of hashing the
240  * full-scale address+zone+length. Hashing by pointer is more efficient then
241  * hashing by the full address.
242  *
243  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
244  * tep structure, so it should be never freed.
245  *
246  * Also for sockets the driver always uses minor number as acceptor id.
247  *
248  * TPI VIOLATIONS
249  * --------------
250  *
251  * This driver violates TPI in several respects for Unix Domain Sockets:
252  *
253  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
254  *	is requested and the endpoint is already in use. There is no point in
255  *	generating an unused address since this address will be rejected by
256  *	sockfs anyway. For implicit binds it always generates a new address
257  *	(sets soua_vp to its minor number).
258  *
259  * 2) It always uses minor number as acceptor ID and never uses queue
260  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
261  *	message and they do not use the queue pointer.
262  *
263  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
264  *	followed by listen(). The listen() should be issued with non-zero
265  *	backlog, so sotpi_listen() issues unbind request followed by bind
266  *	request to the same address but with a non-zero qlen value. Both
267  *	tl_bind() and tl_unbind() require write lock on the hash table to
268  *	insert/remove the address. The driver does not remove the address from
269  *	the hash for endpoints that are bound to the explicit address and have
270  *	backlog of zero. During T_BIND_REQ processing if the address requested
271  *	is equal to the address the endpoint already has it updates the backlog
272  *	without reinserting the address in the hash table. This optimization
273  *	avoids two hash table updates for each listener created. It always
274  *	avoids the problem of a "stolen" address when another listener may use
275  *	the same address between the unbind and bind and suddenly listen() fails
276  *	because address is in use even though the bind() succeeded.
277  *
278  *
279  * CONNECTIONLESS TRANSPORTS
280  * =========================
281  *
282  * Connectionless transports all share the same serializer (one for TLI and one
283  * for Sockets). Functions executing behind serializer can check or modify state
284  * of any endpoint.
285  *
286  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
287  * te_lastep field. The next time X talks to some address A it checks whether A
288  * is the same as Y's address and if it is there is no need to lookup Y. If the
289  * address is different or the state of Y is not appropriate (e.g. closed or not
290  * idle) X does a lookup using tl_find_peer() and caches the new address.
291  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
292  * on the endpoint found.
293  *
294  * During close of endpoint Y it doesn't try to remove itself from other
295  * endpoints caches. They will detect that Y is gone and will search the peer
296  * endpoint again.
297  *
298  * Flow Control Handling.
299  * ----------------------
300  *
301  * Each connectionless endpoint keeps a list of endpoints which are
302  * flow-controlled by its queue. It also keeps a pointer to the queue which
303  * flow-controls itself.  Whenever flow control releases for endpoint X it
304  * enables all queues from the list. During close it also back-enables everyone
305  * in the list. If X is flow-controlled when it is closing it removes it from
306  * the peers list.
307  *
308  * DATA STRUCTURES
309  * ===============
310  *
311  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
312  * endpoint state. For connection-oriented transports it has a keeps a list
313  * of pending connections (tl_icon_t). For connectionless transports it keeps a
314  * list of endpoints flow controlled by this one.
315  *
316  * Each transport type is represented by a per-transport data structure
317  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
318  * endpoint address hash tables for each transport. It also contains pointer to
319  * transport serializer for connectionless transports.
320  *
321  * Each endpoint keeps a link to its transport structure, so the code can find
322  * all per-transport information quickly.
323  */
324 
325 #include	<sys/types.h>
326 #include	<sys/inttypes.h>
327 #include	<sys/stream.h>
328 #include	<sys/stropts.h>
329 #define	_SUN_TPI_VERSION 2
330 #include	<sys/tihdr.h>
331 #include	<sys/strlog.h>
332 #include	<sys/debug.h>
333 #include	<sys/cred.h>
334 #include	<sys/errno.h>
335 #include	<sys/kmem.h>
336 #include	<sys/id_space.h>
337 #include	<sys/modhash.h>
338 #include	<sys/mkdev.h>
339 #include	<sys/tl.h>
340 #include	<sys/stat.h>
341 #include	<sys/conf.h>
342 #include	<sys/modctl.h>
343 #include	<sys/strsun.h>
344 #include	<sys/socket.h>
345 #include	<sys/socketvar.h>
346 #include	<sys/sysmacros.h>
347 #include	<sys/xti_xtiopt.h>
348 #include	<sys/ddi.h>
349 #include	<sys/sunddi.h>
350 #include	<sys/zone.h>
351 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
352 #include	<inet/optcom.h>
353 #include	<sys/strsubr.h>
354 #include	<sys/ucred.h>
355 #include	<sys/suntpi.h>
356 #include	<sys/list.h>
357 #include	<sys/serializer.h>
358 
359 /*
360  * TBD List
361  * 14 Eliminate state changes through table
362  * 16. AF_UNIX socket options
363  * 17. connect() for ticlts
364  * 18. support for "netstat" to show AF_UNIX plus TLI local
365  *	transport connections
366  * 21. sanity check to flushing on sending M_ERROR
367  */
368 
369 /*
370  * CONSTANT DECLARATIONS
371  * --------------------
372  */
373 
374 /*
375  * Local declarations
376  */
377 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
378 
379 #define	TL_MAXQLEN	128	/* Max conn indications allowed. */
380 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
381 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
382 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
383 /*
384  * Hash tables size.
385  */
386 #define	TL_HASH_SIZE 311
387 
388 /*
389  * Definitions for module_info
390  */
391 #define		TL_ID		(104)		/* module ID number */
392 #define		TL_NAME		"tl"		/* module name */
393 #define		TL_MINPSZ	(0)		/* min packet size */
394 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
395 #define		TL_HIWAT	(16*1024)	/* hi water mark */
396 #define		TL_LOWAT	(256)		/* lo water mark */
397 /*
398  * Definition of minor numbers/modes for new transport provider modes.
399  * We view the socket use as a separate mode to get a separate name space.
400  */
401 #define		TL_TICOTS	0	/* connection oriented transport */
402 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
403 #define		TL_TICLTS 	2	/* connectionless transport */
404 #define		TL_UNUSED	3
405 #define		TL_SOCKET	4	/* Socket */
406 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
407 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
408 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
409 
410 #define		TL_MINOR_MASK	0x7
411 #define		TL_MINOR_START	(TL_TICLTS + 1)
412 
413 /*
414  * LOCAL MACROS
415  */
416 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
417 
418 /*
419  * EXTERNAL VARIABLE DECLARATIONS
420  * -----------------------------
421  */
422 /*
423  * state table defined in the OS space.c
424  */
425 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
426 
427 /*
428  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
429  */
430 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
431 static int tl_close(queue_t *, int, cred_t *);
432 static void tl_wput(queue_t *, mblk_t *);
433 static void tl_wsrv(queue_t *);
434 static void tl_rsrv(queue_t *);
435 
436 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
437 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
438 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
439 
440 
441 /*
442  * GLOBAL DATA STRUCTURES AND VARIABLES
443  * -----------------------------------
444  */
445 
446 /*
447  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
448  * For now, we only manage the SO_RECVUCRED option but we also have
449  * harmless dummy options to make things work with some common code we access.
450  */
451 opdes_t	tl_opt_arr[] = {
452 	/* The SO_TYPE is needed for the hack below */
453 	{
454 		SO_TYPE,
455 		SOL_SOCKET,
456 		OA_R,
457 		OA_R,
458 		OP_NP,
459 		OP_PASSNEXT,
460 		sizeof (t_scalar_t),
461 		0
462 	},
463 	{
464 		SO_RECVUCRED,
465 		SOL_SOCKET,
466 		OA_RW,
467 		OA_RW,
468 		OP_NP,
469 		OP_PASSNEXT,
470 		sizeof (int),
471 		0
472 	}
473 };
474 
475 /*
476  * Table of all supported levels
477  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
478  * any supported options so we need this info separately.
479  *
480  * This is needed only for topmost tpi providers.
481  */
482 optlevel_t	tl_valid_levels_arr[] = {
483 	XTI_GENERIC,
484 	SOL_SOCKET,
485 	TL_PROT_LEVEL
486 };
487 
488 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
489 /*
490  * Current upper bound on the amount of space needed to return all options.
491  * Additional options with data size of sizeof(long) are handled automatically.
492  * Others need hand job.
493  */
494 #define	TL_MAX_OPT_BUF_LEN						\
495 		((A_CNT(tl_opt_arr) << 2) +				\
496 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
497 		+ 64 + sizeof (struct T_optmgmt_ack))
498 
499 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
500 
501 /*
502  *	transport addr structure
503  */
504 typedef struct tl_addr {
505 	zoneid_t	ta_zoneid;		/* Zone scope of address */
506 	t_scalar_t	ta_alen;		/* length of abuf */
507 	void		*ta_abuf;		/* the addr itself */
508 } tl_addr_t;
509 
510 /*
511  * Refcounted version of serializer.
512  */
513 typedef struct tl_serializer {
514 	uint_t		ts_refcnt;
515 	serializer_t	*ts_serializer;
516 } tl_serializer_t;
517 
518 /*
519  * Each transport type has a separate state.
520  * Per-transport state.
521  */
522 typedef struct tl_transport_state {
523 	char		*tr_name;
524 	minor_t		tr_minor;
525 	uint32_t	tr_defaddr;
526 	mod_hash_t	*tr_ai_hash;
527 	mod_hash_t	*tr_addr_hash;
528 	tl_serializer_t	*tr_serializer;
529 } tl_transport_state_t;
530 
531 #define	TL_DFADDR 0x1000
532 
533 static tl_transport_state_t tl_transports[] = {
534 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
535 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
536 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
537 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
538 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
539 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
540 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
541 };
542 
543 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
544 
545 struct tl_endpt;
546 typedef struct tl_endpt tl_endpt_t;
547 
548 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
549 
550 /*
551  * Data structure used to represent pending connects.
552  * Records enough information so that the connecting peer can close
553  * before the connection gets accepted.
554  */
555 typedef struct tl_icon {
556 	list_node_t	ti_node;
557 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
558 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
559 	t_scalar_t	ti_seqno;	/* Sequence number */
560 } tl_icon_t;
561 
562 typedef struct so_ux_addr soux_addr_t;
563 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
564 
565 /*
566  *	transport endpoint structure
567  */
568 struct tl_endpt {
569 	queue_t		*te_rq;		/* stream read queue */
570 	queue_t		*te_wq;		/* stream write queue */
571 	uint32_t	te_refcnt;
572 	int32_t 	te_state;	/* TPI state of endpoint */
573 	minor_t		te_minor;	/* minor number */
574 #define	te_seqno	te_minor
575 	uint_t		te_flag;	/* flag field */
576 	boolean_t	te_nowsrv;
577 	tl_serializer_t	*te_ser;	/* Serializer to use */
578 #define	te_serializer	te_ser->ts_serializer
579 
580 	soux_addr_t	te_uxaddr;	/* Socket address */
581 #define	te_magic	te_uxaddr.soua_magic
582 #define	te_vp		te_uxaddr.soua_vp
583 	tl_addr_t	te_ap;		/* addr bound to this endpt */
584 #define	te_zoneid te_ap.ta_zoneid
585 #define	te_alen	te_ap.ta_alen
586 #define	te_abuf	te_ap.ta_abuf
587 
588 	tl_transport_state_t *te_transport;
589 #define	te_addrhash	te_transport->tr_addr_hash
590 #define	te_aihash	te_transport->tr_ai_hash
591 #define	te_defaddr	te_transport->tr_defaddr
592 	cred_t		*te_credp;	/* endpoint user credentials */
593 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
594 
595 	/*
596 	 * State specific for connection-oriented and connectionless transports.
597 	 */
598 	union {
599 		/* Connection-oriented state. */
600 		struct {
601 			t_uscalar_t _te_nicon;	/* count of conn requests */
602 			t_uscalar_t _te_qlen;	/* max conn requests */
603 			tl_endpt_t  *_te_oconp;	/* conn request pending */
604 			tl_endpt_t  *_te_conp;	/* connected endpt */
605 #ifndef _ILP32
606 			void	    *_te_pad;
607 #endif
608 			list_t	_te_iconp;	/* list of conn ind. pending */
609 		} _te_cots_state;
610 		/* Connection-less state. */
611 		struct {
612 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
613 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
614 			list_node_t _te_flows;	/* lists of connections */
615 			list_t  _te_flowlist;	/* Who flowcontrols on me */
616 		} _te_clts_state;
617 	} _te_transport_state;
618 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
619 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
620 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
621 #define	te_conp		_te_transport_state._te_cots_state._te_conp
622 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
623 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
624 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
625 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
626 #define	te_flows	_te_transport_state._te_clts_state._te_flows
627 
628 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
629 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
630 	pid_t		te_cpid;	/* cached pid of endpoint */
631 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
632 	/*
633 	 * Pieces of the endpoint state needed for closing.
634 	 */
635 	kmutex_t	te_closelock;
636 	kcondvar_t	te_closecv;
637 	uint8_t		te_closing;	/* The endpoint started closing */
638 	uint8_t		te_closewait;	/* Wait in close until zero */
639 	mblk_t		te_closemp;	/* for entering serializer on close */
640 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
641 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
642 	kmutex_t	te_srv_lock;
643 	kcondvar_t	te_srv_cv;
644 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
645 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
646 	/*
647 	 * Pieces of the endpoint state needed for serializer transitions.
648 	 */
649 	kmutex_t	te_ser_lock;	/* Protects the count below */
650 	uint_t		te_ser_count;	/* Number of messages on serializer */
651 };
652 
653 /*
654  * Flag values. Lower 4 bits specify that transport used.
655  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
656  * they allow to identify the endpoint more easily.
657  */
658 #define	TL_LISTENER	0x00010	/* the listener endpoint */
659 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
660 #define	TL_EAGER	0x00040	/* connecting endpoint */
661 #define	TL_ACCEPTED	0x00080	/* accepted connection */
662 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
663 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
664 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
665 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
666 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
667 /*
668  * Boolean checks for the endpoint type.
669  */
670 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
671 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
672 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
673 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
674 
675 #define	TLPID(mp, tep)	(DB_CPID(mp) == -1 ? (tep)->te_cpid : DB_CPID(mp))
676 
677 /*
678  * Certain operations are always used together. These macros reduce the chance
679  * of missing a part of a combination.
680  */
681 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
682 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
683 
684 #define	TL_PUTBQ(x, mp) {		\
685 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
686 	(x)->te_nowsrv = B_TRUE;	\
687 	(void) putbq((x)->te_wq, mp);	\
688 }
689 
690 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
691 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
692 
693 /*
694  * STREAMS driver glue data structures.
695  */
696 static	struct	module_info	tl_minfo = {
697 	TL_ID,			/* mi_idnum */
698 	TL_NAME,		/* mi_idname */
699 	TL_MINPSZ,		/* mi_minpsz */
700 	TL_MAXPSZ,		/* mi_maxpsz */
701 	TL_HIWAT,		/* mi_hiwat */
702 	TL_LOWAT		/* mi_lowat */
703 };
704 
705 static	struct	qinit	tl_rinit = {
706 	NULL,			/* qi_putp */
707 	(int (*)())tl_rsrv,	/* qi_srvp */
708 	tl_open,		/* qi_qopen */
709 	tl_close,		/* qi_qclose */
710 	NULL,			/* qi_qadmin */
711 	&tl_minfo,		/* qi_minfo */
712 	NULL			/* qi_mstat */
713 };
714 
715 static	struct	qinit	tl_winit = {
716 	(int (*)())tl_wput,	/* qi_putp */
717 	(int (*)())tl_wsrv,	/* qi_srvp */
718 	NULL,			/* qi_qopen */
719 	NULL,			/* qi_qclose */
720 	NULL,			/* qi_qadmin */
721 	&tl_minfo,		/* qi_minfo */
722 	NULL			/* qi_mstat */
723 };
724 
725 static	struct streamtab	tlinfo = {
726 	&tl_rinit,		/* st_rdinit */
727 	&tl_winit,		/* st_wrinit */
728 	NULL,			/* st_muxrinit */
729 	NULL			/* st_muxwrinit */
730 };
731 
732 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
733     nulldev, tl_info, D_MP, &tlinfo);
734 
735 static struct modldrv modldrv = {
736 	&mod_driverops,		/* Type of module -- pseudo driver here */
737 	"TPI Local Transport (tl) %I%",
738 	&tl_devops,		/* driver ops */
739 };
740 
741 /*
742  * Module linkage information for the kernel.
743  */
744 static struct modlinkage modlinkage = {
745 	MODREV_1,
746 	&modldrv,
747 	NULL
748 };
749 
750 /*
751  * Templates for response to info request
752  * Check sanity of unlimited connect data etc.
753  */
754 
755 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
756 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
757 
758 static struct T_info_ack tl_cots_info_ack =
759 	{
760 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
761 		T_INFINITE,	/* TSDU size */
762 		T_INFINITE,	/* ETSDU size */
763 		T_INFINITE,	/* CDATA_size */
764 		T_INFINITE,	/* DDATA_size */
765 		T_INFINITE,	/* ADDR_size  */
766 		T_INFINITE,	/* OPT_size */
767 		0,		/* TIDU_size - fill at run time */
768 		T_COTS,		/* SERV_type */
769 		-1,		/* CURRENT_state */
770 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
771 	};
772 
773 static struct T_info_ack tl_clts_info_ack =
774 	{
775 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
776 		0,		/* TSDU_size - fill at run time */
777 		-2,		/* ETSDU_size -2 => not supported */
778 		-2,		/* CDATA_size -2 => not supported */
779 		-2,		/* DDATA_size  -2 => not supported */
780 		-1,		/* ADDR_size -1 => unlimited */
781 		-1,		/* OPT_size */
782 		0,		/* TIDU_size - fill at run time */
783 		T_CLTS,		/* SERV_type */
784 		-1,		/* CURRENT_state */
785 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
786 	};
787 
788 /*
789  * private copy of devinfo pointer used in tl_info
790  */
791 static dev_info_t *tl_dip;
792 
793 /*
794  * Endpoints cache.
795  */
796 static kmem_cache_t *tl_cache;
797 /*
798  * Minor number space.
799  */
800 static id_space_t *tl_minors;
801 
802 /*
803  * Default Data Unit size.
804  */
805 static t_scalar_t tl_tidusz;
806 
807 /*
808  * Size of hash tables.
809  */
810 static size_t tl_hash_size = TL_HASH_SIZE;
811 
812 /*
813  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
814  * for sockets.
815  */
816 static int tl_disable_early_connect = 0;
817 static int tl_client_closing_when_accepting;
818 
819 static int tl_serializer_noswitch;
820 
821 /*
822  * LOCAL FUNCTION PROTOTYPES
823  * -------------------------
824  */
825 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
826 static void tl_do_proto(mblk_t *, tl_endpt_t *);
827 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
828 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
829 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
830 	t_scalar_t);
831 static void tl_bind(mblk_t *, tl_endpt_t *);
832 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
833 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
834 static void tl_unbind(mblk_t *, tl_endpt_t *);
835 static void tl_optmgmt(queue_t *, mblk_t *);
836 static void tl_conn_req(queue_t *, mblk_t *);
837 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
838 static void tl_conn_res(mblk_t *, tl_endpt_t *);
839 static void tl_discon_req(mblk_t *, tl_endpt_t *);
840 static void tl_capability_req(mblk_t *, tl_endpt_t *);
841 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
842 static void tl_info_req(mblk_t *, tl_endpt_t *);
843 static void tl_addr_req(mblk_t *, tl_endpt_t *);
844 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
845 static void tl_data(mblk_t  *, tl_endpt_t *);
846 static void tl_exdata(mblk_t *, tl_endpt_t *);
847 static void tl_ordrel(mblk_t *, tl_endpt_t *);
848 static void tl_unitdata(mblk_t *, tl_endpt_t *);
849 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
850 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
851 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
852 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
853 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
854 static void tl_cl_backenable(tl_endpt_t *);
855 static void tl_co_unconnect(tl_endpt_t *);
856 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
857 static void tl_discon_ind(tl_endpt_t *, uint32_t);
858 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
859 static mblk_t *tl_ordrel_ind_alloc(void);
860 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
861 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
862 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
863 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
864 static void tl_icon_freemsgs(mblk_t **);
865 static void tl_merror(queue_t *, mblk_t *, int);
866 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int);
867 static int tl_default_opt(queue_t *, int, int, uchar_t *);
868 static int tl_get_opt(queue_t *, int, int, uchar_t *);
869 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
870     uchar_t *, void *, cred_t *, mblk_t *);
871 static void tl_memrecover(queue_t *, mblk_t *, size_t);
872 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
873 static void tl_free(tl_endpt_t *);
874 static int  tl_constructor(void *, void *, int);
875 static void tl_destructor(void *, void *);
876 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
877 static tl_serializer_t *tl_serializer_alloc(int);
878 static void tl_serializer_refhold(tl_serializer_t *);
879 static void tl_serializer_refrele(tl_serializer_t *);
880 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
881 static void tl_serializer_exit(tl_endpt_t *);
882 static boolean_t tl_noclose(tl_endpt_t *);
883 static void tl_closeok(tl_endpt_t *);
884 static void tl_refhold(tl_endpt_t *);
885 static void tl_refrele(tl_endpt_t *);
886 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
887 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
888 static void tl_close_ser(mblk_t *, tl_endpt_t *);
889 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
890 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
891 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
892 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
893 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
894 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
895 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
896 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
897 static void tl_addr_unbind(tl_endpt_t *);
898 
899 /*
900  * Intialize option database object for TL
901  */
902 
903 optdb_obj_t tl_opt_obj = {
904 	tl_default_opt,		/* TL default value function pointer */
905 	tl_get_opt,		/* TL get function pointer */
906 	tl_set_opt,		/* TL set function pointer */
907 	B_TRUE,			/* TL is tpi provider */
908 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
909 	tl_opt_arr,		/* TL option database */
910 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
911 	tl_valid_levels_arr	/* TL valid level array */
912 };
913 
914 /*
915  * Logical operations.
916  *
917  * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y
918  * should also be true.
919  *
920  * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or falce at
921  * the same time.
922  */
923 #define	IMPLY(X, Y)	(!(X) || (Y))
924 #define	EQUIV(X, Y)	(IMPLY(X, Y) && IMPLY(Y, X))
925 
926 /*
927  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
928  * ---------------------------------------
929  */
930 
931 /*
932  * Loadable module routines
933  */
934 int
935 _init(void)
936 {
937 	return (mod_install(&modlinkage));
938 }
939 
940 int
941 _fini(void)
942 {
943 	return (mod_remove(&modlinkage));
944 }
945 
946 int
947 _info(struct modinfo *modinfop)
948 {
949 	return (mod_info(&modlinkage, modinfop));
950 }
951 
952 /*
953  * Driver Entry Points and Other routines
954  */
955 static int
956 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
957 {
958 	int i;
959 	char name[32];
960 
961 	/*
962 	 * Resume from a checkpoint state.
963 	 */
964 	if (cmd == DDI_RESUME)
965 		return (DDI_SUCCESS);
966 
967 	if (cmd != DDI_ATTACH)
968 		return (DDI_FAILURE);
969 
970 	/*
971 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
972 	 * streams message sizes can be unlimited. We use a defined constant
973 	 * instead.
974 	 */
975 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
976 
977 	/*
978 	 * Create subdevices for each transport.
979 	 */
980 	for (i = 0; i < TL_UNUSED; i++) {
981 		if (ddi_create_minor_node(devi,
982 			tl_transports[i].tr_name,
983 			S_IFCHR, tl_transports[i].tr_minor,
984 			DDI_PSEUDO, NULL) == DDI_FAILURE) {
985 			ddi_remove_minor_node(devi, NULL);
986 			return (DDI_FAILURE);
987 		}
988 	}
989 
990 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
991 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
992 
993 	if (tl_cache == NULL) {
994 		ddi_remove_minor_node(devi, NULL);
995 		return (DDI_FAILURE);
996 	}
997 
998 	tl_minors = id_space_create("tl_minor_space",
999 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
1000 
1001 	/*
1002 	 * Create ID space for minor numbers
1003 	 */
1004 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1005 		tl_transport_state_t *t = &tl_transports[i];
1006 
1007 		if (i == TL_UNUSED)
1008 			continue;
1009 
1010 		/* Socket COTSORD shares namespace with COTS */
1011 		if (i == TL_SOCK_COTSORD) {
1012 			t->tr_ai_hash =
1013 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1014 			ASSERT(t->tr_ai_hash != NULL);
1015 			t->tr_addr_hash =
1016 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1017 			ASSERT(t->tr_addr_hash != NULL);
1018 			continue;
1019 		}
1020 
1021 		/*
1022 		 * Create hash tables.
1023 		 */
1024 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1025 		    t->tr_name);
1026 #ifdef _ILP32
1027 		if (i & TL_SOCKET)
1028 			t->tr_ai_hash =
1029 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1030 				mod_hash_null_valdtor);
1031 		else
1032 			t->tr_ai_hash =
1033 			    mod_hash_create_ptrhash(name, tl_hash_size,
1034 				mod_hash_null_valdtor, sizeof (queue_t));
1035 #else
1036 		t->tr_ai_hash =
1037 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1038 			mod_hash_null_valdtor);
1039 #endif /* _ILP32 */
1040 
1041 		if (i & TL_SOCKET) {
1042 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1043 			    t->tr_name);
1044 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1045 			    tl_hash_size, mod_hash_null_valdtor,
1046 			    sizeof (uintptr_t));
1047 		} else {
1048 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1049 			    t->tr_name);
1050 			t->tr_addr_hash = mod_hash_create_extended(name,
1051 			    tl_hash_size, mod_hash_null_keydtor,
1052 			    mod_hash_null_valdtor,
1053 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1054 		}
1055 
1056 		/* Create serializer for connectionless transports. */
1057 		if (i & TL_TICLTS)
1058 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1059 	}
1060 
1061 	tl_dip = devi;
1062 
1063 	return (DDI_SUCCESS);
1064 }
1065 
1066 static int
1067 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1068 {
1069 	int i;
1070 
1071 	if (cmd == DDI_SUSPEND)
1072 		return (DDI_SUCCESS);
1073 
1074 	if (cmd != DDI_DETACH)
1075 		return (DDI_FAILURE);
1076 
1077 	/*
1078 	 * Destroy arenas and hash tables.
1079 	 */
1080 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1081 		tl_transport_state_t *t = &tl_transports[i];
1082 
1083 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1084 			continue;
1085 
1086 		ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL));
1087 		if (t->tr_serializer != NULL) {
1088 			tl_serializer_refrele(t->tr_serializer);
1089 			t->tr_serializer = NULL;
1090 		}
1091 
1092 #ifdef _ILP32
1093 		if (i & TL_SOCKET)
1094 			mod_hash_destroy_idhash(t->tr_ai_hash);
1095 		else
1096 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1097 #else
1098 		mod_hash_destroy_idhash(t->tr_ai_hash);
1099 #endif /* _ILP32 */
1100 		t->tr_ai_hash = NULL;
1101 		if (i & TL_SOCKET)
1102 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1103 		else
1104 			mod_hash_destroy_hash(t->tr_addr_hash);
1105 		t->tr_addr_hash = NULL;
1106 	}
1107 
1108 	kmem_cache_destroy(tl_cache);
1109 	tl_cache = NULL;
1110 	id_space_destroy(tl_minors);
1111 	tl_minors = NULL;
1112 	ddi_remove_minor_node(devi, NULL);
1113 	return (DDI_SUCCESS);
1114 }
1115 
1116 /* ARGSUSED */
1117 static int
1118 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1119 {
1120 
1121 	int retcode = DDI_FAILURE;
1122 
1123 	switch (infocmd) {
1124 
1125 	case DDI_INFO_DEVT2DEVINFO:
1126 		if (tl_dip != NULL) {
1127 			*result = (void *)tl_dip;
1128 			retcode = DDI_SUCCESS;
1129 		}
1130 		break;
1131 
1132 	case DDI_INFO_DEVT2INSTANCE:
1133 		*result = (void *)0;
1134 		retcode = DDI_SUCCESS;
1135 		break;
1136 
1137 	default:
1138 		break;
1139 	}
1140 	return (retcode);
1141 }
1142 
1143 /*
1144  * Endpoint reference management.
1145  */
1146 static void
1147 tl_refhold(tl_endpt_t *tep)
1148 {
1149 	atomic_add_32(&tep->te_refcnt, 1);
1150 }
1151 
1152 static void
1153 tl_refrele(tl_endpt_t *tep)
1154 {
1155 	ASSERT(tep->te_refcnt != 0);
1156 
1157 	if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1158 		tl_free(tep);
1159 }
1160 
1161 /*ARGSUSED*/
1162 static int
1163 tl_constructor(void *buf, void *cdrarg, int kmflags)
1164 {
1165 	tl_endpt_t *tep = buf;
1166 
1167 	bzero(tep, sizeof (tl_endpt_t));
1168 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1169 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1170 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1171 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1172 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1173 
1174 	return (0);
1175 }
1176 
1177 /*ARGSUSED*/
1178 static void
1179 tl_destructor(void *buf, void *cdrarg)
1180 {
1181 	tl_endpt_t *tep = buf;
1182 
1183 	mutex_destroy(&tep->te_closelock);
1184 	cv_destroy(&tep->te_closecv);
1185 	mutex_destroy(&tep->te_srv_lock);
1186 	cv_destroy(&tep->te_srv_cv);
1187 	mutex_destroy(&tep->te_ser_lock);
1188 }
1189 
1190 static void
1191 tl_free(tl_endpt_t *tep)
1192 {
1193 	ASSERT(tep->te_refcnt == 0);
1194 	ASSERT(tep->te_transport != NULL);
1195 	ASSERT(tep->te_rq == NULL);
1196 	ASSERT(tep->te_wq == NULL);
1197 	ASSERT(tep->te_ser != NULL);
1198 	ASSERT(tep->te_ser_count == 0);
1199 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1200 
1201 	if (IS_SOCKET(tep)) {
1202 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1203 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1204 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1205 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1206 	} else if (tep->te_abuf != NULL) {
1207 		kmem_free(tep->te_abuf, tep->te_alen);
1208 		tep->te_alen = -1; /* uninitialized */
1209 		tep->te_abuf = NULL;
1210 	} else {
1211 		ASSERT(tep->te_alen == -1);
1212 	}
1213 
1214 	id_free(tl_minors, tep->te_minor);
1215 	ASSERT(tep->te_credp == NULL);
1216 
1217 	if (tep->te_hash_hndl != NULL)
1218 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1219 
1220 	if (IS_COTS(tep)) {
1221 		TL_REMOVE_PEER(tep->te_conp);
1222 		TL_REMOVE_PEER(tep->te_oconp);
1223 		tl_serializer_refrele(tep->te_ser);
1224 		tep->te_ser = NULL;
1225 		ASSERT(tep->te_nicon == 0);
1226 		ASSERT(list_head(&tep->te_iconp) == NULL);
1227 	} else {
1228 		ASSERT(tep->te_lastep == NULL);
1229 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1230 		ASSERT(tep->te_flowq == NULL);
1231 	}
1232 
1233 	ASSERT(tep->te_bufcid == 0);
1234 	ASSERT(tep->te_timoutid == 0);
1235 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1236 	tep->te_acceptor_id = 0;
1237 
1238 	ASSERT(tep->te_closewait == 0);
1239 	ASSERT(!tep->te_rsrv_active);
1240 	ASSERT(!tep->te_wsrv_active);
1241 	tep->te_closing = 0;
1242 	tep->te_nowsrv = B_FALSE;
1243 	tep->te_flag = 0;
1244 
1245 	kmem_cache_free(tl_cache, tep);
1246 }
1247 
1248 /*
1249  * Allocate/free reference-counted wrappers for serializers.
1250  */
1251 static tl_serializer_t *
1252 tl_serializer_alloc(int flags)
1253 {
1254 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1255 	serializer_t *ser;
1256 
1257 	if (s == NULL)
1258 		return (NULL);
1259 
1260 	ser = serializer_create(flags);
1261 
1262 	if (ser == NULL) {
1263 		kmem_free(s, sizeof (tl_serializer_t));
1264 		return (NULL);
1265 	}
1266 
1267 	s->ts_refcnt = 1;
1268 	s->ts_serializer = ser;
1269 	return (s);
1270 }
1271 
1272 static void
1273 tl_serializer_refhold(tl_serializer_t *s)
1274 {
1275 	atomic_add_32(&s->ts_refcnt, 1);
1276 }
1277 
1278 static void
1279 tl_serializer_refrele(tl_serializer_t *s)
1280 {
1281 	if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1282 		serializer_destroy(s->ts_serializer);
1283 		kmem_free(s, sizeof (tl_serializer_t));
1284 	}
1285 }
1286 
1287 /*
1288  * Post a request on the endpoint serializer. For COTS transports keep track of
1289  * the number of pending requests.
1290  */
1291 static void
1292 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1293 {
1294 	if (IS_COTS(tep)) {
1295 		mutex_enter(&tep->te_ser_lock);
1296 		tep->te_ser_count++;
1297 		mutex_exit(&tep->te_ser_lock);
1298 	}
1299 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1300 }
1301 
1302 /*
1303  * Complete processing the request on the serializer. Decrement the counter for
1304  * pending requests for COTS transports.
1305  */
1306 static void
1307 tl_serializer_exit(tl_endpt_t *tep)
1308 {
1309 	if (IS_COTS(tep)) {
1310 		mutex_enter(&tep->te_ser_lock);
1311 		ASSERT(tep->te_ser_count != 0);
1312 		tep->te_ser_count--;
1313 		mutex_exit(&tep->te_ser_lock);
1314 	}
1315 }
1316 
1317 /*
1318  * Hash management functions.
1319  */
1320 
1321 /*
1322  * Return TRUE if two addresses are equal, false otherwise.
1323  */
1324 static boolean_t
1325 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1326 {
1327 	return ((ap1->ta_alen > 0) &&
1328 	    (ap1->ta_alen == ap2->ta_alen) &&
1329 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1330 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1331 }
1332 
1333 /*
1334  * This function is called whenever an endpoint is found in the hash table.
1335  */
1336 /* ARGSUSED0 */
1337 static void
1338 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1339 {
1340 	tl_refhold((tl_endpt_t *)val);
1341 }
1342 
1343 /*
1344  * Address hash function.
1345  */
1346 /* ARGSUSED */
1347 static uint_t
1348 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1349 {
1350 	tl_addr_t *ap = (tl_addr_t *)key;
1351 	size_t	len = ap->ta_alen;
1352 	uchar_t *p = ap->ta_abuf;
1353 	uint_t i, g;
1354 
1355 	ASSERT((len > 0) && (p != NULL));
1356 
1357 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1358 		i = (i << 4) + (*p);
1359 		if ((g = (i & 0xf0000000U)) != 0) {
1360 			i ^= (g >> 24);
1361 			i ^= g;
1362 		}
1363 	}
1364 	return (i);
1365 }
1366 
1367 /*
1368  * This function is used by hash lookups. It compares two generic addresses.
1369  */
1370 static int
1371 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1372 {
1373 #ifdef 	DEBUG
1374 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1375 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1376 
1377 	ASSERT(key1 != NULL);
1378 	ASSERT(key2 != NULL);
1379 
1380 	ASSERT(ap1->ta_abuf != NULL);
1381 	ASSERT(ap2->ta_abuf != NULL);
1382 	ASSERT(ap1->ta_alen > 0);
1383 	ASSERT(ap2->ta_alen > 0);
1384 #endif
1385 
1386 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1387 }
1388 
1389 /*
1390  * Prevent endpoint from closing if possible.
1391  * Return B_TRUE on success, B_FALSE on failure.
1392  */
1393 static boolean_t
1394 tl_noclose(tl_endpt_t *tep)
1395 {
1396 	boolean_t rc = B_FALSE;
1397 
1398 	mutex_enter(&tep->te_closelock);
1399 	if (! tep->te_closing) {
1400 		ASSERT(tep->te_closewait == 0);
1401 		tep->te_closewait++;
1402 		rc = B_TRUE;
1403 	}
1404 	mutex_exit(&tep->te_closelock);
1405 	return (rc);
1406 }
1407 
1408 /*
1409  * Allow endpoint to close if needed.
1410  */
1411 static void
1412 tl_closeok(tl_endpt_t *tep)
1413 {
1414 	ASSERT(tep->te_closewait > 0);
1415 	mutex_enter(&tep->te_closelock);
1416 	ASSERT(tep->te_closewait == 1);
1417 	tep->te_closewait--;
1418 	cv_signal(&tep->te_closecv);
1419 	mutex_exit(&tep->te_closelock);
1420 }
1421 
1422 /*
1423  * STREAMS open entry point.
1424  */
1425 /* ARGSUSED */
1426 static int
1427 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1428 {
1429 	tl_endpt_t *tep;
1430 	minor_t	    minor = getminor(*devp);
1431 
1432 	/*
1433 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1434 	 * are illegal
1435 	 */
1436 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1437 		return (ENXIO);
1438 
1439 	if (rq->q_ptr != NULL)
1440 		return (0);
1441 
1442 	/* Minor number should specify the mode used for the driver. */
1443 	if ((minor >= TL_UNUSED))
1444 		return (ENXIO);
1445 
1446 	if (oflag & SO_SOCKSTR) {
1447 		minor |= TL_SOCKET;
1448 	}
1449 
1450 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1451 	tep->te_refcnt = 1;
1452 	tep->te_cpid = curproc->p_pid;
1453 	rq->q_ptr = WR(rq)->q_ptr = tep;
1454 	tep->te_state = TS_UNBND;
1455 	tep->te_credp = credp;
1456 	crhold(credp);
1457 	tep->te_zoneid = getzoneid();
1458 
1459 	tep->te_flag = minor & TL_MINOR_MASK;
1460 	tep->te_transport = &tl_transports[minor];
1461 
1462 	/* Allocate a unique minor number for this instance. */
1463 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1464 
1465 	/* Reserve hash handle for bind(). */
1466 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1467 
1468 	/* Transport-specific initialization */
1469 	if (IS_COTS(tep)) {
1470 		/* Use private serializer */
1471 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1472 
1473 		/* Create list for pending connections */
1474 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1475 		    offsetof(tl_icon_t, ti_node));
1476 		tep->te_qlen = 0;
1477 		tep->te_nicon = 0;
1478 		tep->te_oconp = NULL;
1479 		tep->te_conp = NULL;
1480 	} else {
1481 		/* Use shared serializer */
1482 		tep->te_ser = tep->te_transport->tr_serializer;
1483 		bzero(&tep->te_flows, sizeof (list_node_t));
1484 		/* Create list for flow control */
1485 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1486 		    offsetof(tl_endpt_t, te_flows));
1487 		tep->te_flowq = NULL;
1488 		tep->te_lastep = NULL;
1489 
1490 	}
1491 
1492 	/* Initialize endpoint address */
1493 	if (IS_SOCKET(tep)) {
1494 		/* Socket-specific address handling. */
1495 		tep->te_alen = TL_SOUX_ADDRLEN;
1496 		tep->te_abuf = &tep->te_uxaddr;
1497 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1498 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1499 	} else {
1500 		tep->te_alen = -1;
1501 		tep->te_abuf = NULL;
1502 	}
1503 
1504 	/* clone the driver */
1505 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1506 
1507 	tep->te_rq = rq;
1508 	tep->te_wq = WR(rq);
1509 
1510 #ifdef	_ILP32
1511 	if (IS_SOCKET(tep))
1512 		tep->te_acceptor_id = tep->te_minor;
1513 	else
1514 		tep->te_acceptor_id = (t_uscalar_t)rq;
1515 #else
1516 	tep->te_acceptor_id = tep->te_minor;
1517 #endif	/* _ILP32 */
1518 
1519 
1520 	qprocson(rq);
1521 
1522 	/*
1523 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1524 	 * insertion so insertion can't fail.
1525 	 */
1526 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1527 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1528 	    (mod_hash_val_t)tep);
1529 
1530 	return (0);
1531 }
1532 
1533 /* ARGSUSED1 */
1534 static int
1535 tl_close(queue_t *rq, int flag,	cred_t *credp)
1536 {
1537 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1538 	tl_endpt_t *elp = NULL;
1539 	queue_t *wq = tep->te_wq;
1540 	int rc;
1541 
1542 	ASSERT(wq == WR(rq));
1543 
1544 	/*
1545 	 * Remove the endpoint from acceptor hash.
1546 	 */
1547 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1548 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1549 	    (mod_hash_val_t *)&elp);
1550 	ASSERT(rc == 0 && tep == elp);
1551 	if ((rc != 0) || (tep != elp)) {
1552 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1553 			    SL_TRACE|SL_ERROR,
1554 			    "tl_close:inconsistency in AI hash"));
1555 	}
1556 
1557 	/*
1558 	 * Wait till close is safe, then mark endpoint as closing.
1559 	 */
1560 	mutex_enter(&tep->te_closelock);
1561 	while (tep->te_closewait)
1562 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1563 	tep->te_closing = B_TRUE;
1564 	/*
1565 	 * Will wait for the serializer part of the close to finish, so set
1566 	 * te_closewait now.
1567 	 */
1568 	tep->te_closewait = 1;
1569 	tep->te_nowsrv = B_FALSE;
1570 	mutex_exit(&tep->te_closelock);
1571 
1572 	/*
1573 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1574 	 * It is safe because close will wait for tl_close_ser to finish.
1575 	 */
1576 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1577 
1578 	/*
1579 	 * Wait for the first phase of close to complete before qprocsoff().
1580 	 */
1581 	mutex_enter(&tep->te_closelock);
1582 	while (tep->te_closewait)
1583 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1584 	mutex_exit(&tep->te_closelock);
1585 
1586 	qprocsoff(rq);
1587 
1588 	if (tep->te_bufcid) {
1589 		qunbufcall(rq, tep->te_bufcid);
1590 		tep->te_bufcid = 0;
1591 	}
1592 	if (tep->te_timoutid) {
1593 		(void) quntimeout(rq, tep->te_timoutid);
1594 		tep->te_timoutid = 0;
1595 	}
1596 
1597 	/*
1598 	 * Finish close behind serializer.
1599 	 *
1600 	 * For a CLTS endpoint increase a refcount and continue close processing
1601 	 * with serializer protection. This processing may happen asynchronously
1602 	 * with the completion of tl_close().
1603 	 *
1604 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1605 	 * may go away together with tep and we need to destroy serializer
1606 	 * outside of serializer context.
1607 	 */
1608 	ASSERT(tep->te_closewait == 0);
1609 	if (IS_COTS(tep))
1610 		tep->te_closewait = 1;
1611 	else
1612 		tl_refhold(tep);
1613 
1614 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1615 
1616 	/*
1617 	 * For connection-oriented transports wait for all serializer activity
1618 	 * to settle down.
1619 	 */
1620 	if (IS_COTS(tep)) {
1621 		mutex_enter(&tep->te_closelock);
1622 		while (tep->te_closewait)
1623 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1624 		mutex_exit(&tep->te_closelock);
1625 	}
1626 
1627 	crfree(tep->te_credp);
1628 	tep->te_credp = NULL;
1629 	tep->te_wq = NULL;
1630 	tl_refrele(tep);
1631 	/*
1632 	 * tep is likely to be destroyed now, so can't reference it any more.
1633 	 */
1634 
1635 	rq->q_ptr = wq->q_ptr = NULL;
1636 	return (0);
1637 }
1638 
1639 /*
1640  * First phase of close processing done behind the serializer.
1641  *
1642  * Do not drop the reference in the end - tl_close() wants this reference to
1643  * stay.
1644  */
1645 /* ARGSUSED0 */
1646 static void
1647 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1648 {
1649 	ASSERT(tep->te_closing);
1650 	ASSERT(tep->te_closewait == 1);
1651 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1652 
1653 	tep->te_flag |= TL_CLOSE_SER;
1654 
1655 	/*
1656 	 * Drain out all messages on queue except for TL_TICOTS where the
1657 	 * abortive release semantics permit discarding of data on close
1658 	 */
1659 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1660 		tl_wsrv_ser(NULL, tep);
1661 	}
1662 
1663 	/* Remove address from hash table. */
1664 	tl_addr_unbind(tep);
1665 	/*
1666 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1667 	 * queue of the driver, so clear these before qprocsoff() is called.
1668 	 * Also clear q_next for the peer since this queue is going away.
1669 	 */
1670 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1671 		tl_endpt_t *peer_tep = tep->te_conp;
1672 
1673 		tep->te_wq->q_next = NULL;
1674 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1675 			peer_tep->te_wq->q_next = NULL;
1676 	}
1677 
1678 	tep->te_rq = NULL;
1679 
1680 	/* wake up tl_close() */
1681 	tl_closeok(tep);
1682 	tl_serializer_exit(tep);
1683 }
1684 
1685 /*
1686  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1687  * the reference for CLTS.
1688  *
1689  * Called from serializer. Should drop reference count for CLTS only.
1690  */
1691 /* ARGSUSED0 */
1692 static void
1693 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1694 {
1695 	ASSERT(tep->te_closing);
1696 	ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0));
1697 	ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1));
1698 
1699 	tep->te_state = -1;	/* Uninitialized */
1700 	if (IS_COTS(tep)) {
1701 		tl_co_unconnect(tep);
1702 	} else {
1703 		/* Connectionless specific cleanup */
1704 		TL_REMOVE_PEER(tep->te_lastep);
1705 		/*
1706 		 * Backenable anybody that is flow controlled waiting for
1707 		 * this endpoint.
1708 		 */
1709 		tl_cl_backenable(tep);
1710 		if (tep->te_flowq != NULL) {
1711 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1712 			tep->te_flowq = NULL;
1713 		}
1714 	}
1715 
1716 	tl_serializer_exit(tep);
1717 	if (IS_COTS(tep))
1718 		tl_closeok(tep);
1719 	else
1720 		tl_refrele(tep);
1721 }
1722 
1723 /*
1724  * STREAMS write-side put procedure.
1725  * Enter serializer for most of the processing.
1726  *
1727  * The T_CONN_REQ is processed outside of serializer.
1728  */
1729 static void
1730 tl_wput(queue_t *wq, mblk_t *mp)
1731 {
1732 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1733 	ssize_t			msz = MBLKL(mp);
1734 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1735 	tlproc_t		*tl_proc = NULL;
1736 
1737 	switch (DB_TYPE(mp)) {
1738 	case M_DATA:
1739 		/* Only valid for connection-oriented transports */
1740 		if (IS_CLTS(tep)) {
1741 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1742 				SL_TRACE|SL_ERROR,
1743 				"tl_wput:M_DATA invalid for ticlts driver"));
1744 			tl_merror(wq, mp, EPROTO);
1745 			return;
1746 		}
1747 		tl_proc = tl_wput_data_ser;
1748 		break;
1749 
1750 	case M_IOCTL:
1751 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1752 		case TL_IOC_CREDOPT:
1753 			/* FALLTHROUGH */
1754 		case TL_IOC_UCREDOPT:
1755 			/*
1756 			 * Serialize endpoint state change.
1757 			 */
1758 			tl_proc = tl_do_ioctl_ser;
1759 			break;
1760 
1761 		default:
1762 			miocnak(wq, mp, 0, EINVAL);
1763 			return;
1764 		}
1765 		break;
1766 
1767 	case M_FLUSH:
1768 		/*
1769 		 * do canonical M_FLUSH processing
1770 		 */
1771 		if (*mp->b_rptr & FLUSHW) {
1772 			flushq(wq, FLUSHALL);
1773 			*mp->b_rptr &= ~FLUSHW;
1774 		}
1775 		if (*mp->b_rptr & FLUSHR) {
1776 			flushq(RD(wq), FLUSHALL);
1777 			qreply(wq, mp);
1778 		} else {
1779 			freemsg(mp);
1780 		}
1781 		return;
1782 
1783 	case M_PROTO:
1784 		if (msz < sizeof (prim->type)) {
1785 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1786 				SL_TRACE|SL_ERROR,
1787 				"tl_wput:M_PROTO data too short"));
1788 			tl_merror(wq, mp, EPROTO);
1789 			return;
1790 		}
1791 		switch (prim->type) {
1792 		case T_OPTMGMT_REQ:
1793 		case T_SVR4_OPTMGMT_REQ:
1794 			/*
1795 			 * Process TPI option management requests immediately
1796 			 * in put procedure regardless of in-order processing
1797 			 * of already queued messages.
1798 			 * (Note: This driver supports AF_UNIX socket
1799 			 * implementation.  Unless we implement this processing,
1800 			 * setsockopt() on socket endpoint will block on flow
1801 			 * controlled endpoints which it should not. That is
1802 			 * required for successful execution of VSU socket tests
1803 			 * and is consistent with BSD socket behavior).
1804 			 */
1805 			tl_optmgmt(wq, mp);
1806 			return;
1807 		case O_T_BIND_REQ:
1808 		case T_BIND_REQ:
1809 			tl_proc = tl_bind_ser;
1810 			break;
1811 		case T_CONN_REQ:
1812 			if (IS_CLTS(tep)) {
1813 				tl_merror(wq, mp, EPROTO);
1814 				return;
1815 			}
1816 			tl_conn_req(wq, mp);
1817 			return;
1818 		case T_DATA_REQ:
1819 		case T_OPTDATA_REQ:
1820 		case T_EXDATA_REQ:
1821 		case T_ORDREL_REQ:
1822 			tl_proc = tl_putq_ser;
1823 			break;
1824 		case T_UNITDATA_REQ:
1825 			if (IS_COTS(tep) ||
1826 			    (msz < sizeof (struct T_unitdata_req))) {
1827 				tl_merror(wq, mp, EPROTO);
1828 				return;
1829 			}
1830 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1831 				tl_proc = tl_unitdata_ser;
1832 			} else {
1833 				tl_proc = tl_putq_ser;
1834 			}
1835 			break;
1836 		default:
1837 			/*
1838 			 * process in service procedure if message already
1839 			 * queued (maintain in-order processing)
1840 			 */
1841 			if (wq->q_first != NULL) {
1842 				tl_proc = tl_putq_ser;
1843 			} else {
1844 				tl_proc = tl_wput_ser;
1845 			}
1846 			break;
1847 		}
1848 		break;
1849 
1850 	case M_PCPROTO:
1851 		/*
1852 		 * Check that the message has enough data to figure out TPI
1853 		 * primitive.
1854 		 */
1855 		if (msz < sizeof (prim->type)) {
1856 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1857 				SL_TRACE|SL_ERROR,
1858 				"tl_wput:M_PCROTO data too short"));
1859 			tl_merror(wq, mp, EPROTO);
1860 			return;
1861 		}
1862 		switch (prim->type) {
1863 		case T_CAPABILITY_REQ:
1864 			tl_capability_req(mp, tep);
1865 			return;
1866 		case T_INFO_REQ:
1867 			tl_proc = tl_info_req_ser;
1868 			break;
1869 		default:
1870 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1871 				    SL_TRACE|SL_ERROR,
1872 				    "tl_wput:unknown TPI msg primitive"));
1873 			tl_merror(wq, mp, EPROTO);
1874 			return;
1875 		}
1876 		break;
1877 	default:
1878 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1879 			"tl_wput:default:unexpected Streams message"));
1880 		freemsg(mp);
1881 		return;
1882 	}
1883 
1884 	/*
1885 	 * Continue processing via serializer.
1886 	 */
1887 	ASSERT(tl_proc != NULL);
1888 	tl_refhold(tep);
1889 	tl_serializer_enter(tep, tl_proc, mp);
1890 }
1891 
1892 /*
1893  * Place message on the queue while preserving order.
1894  */
1895 static void
1896 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1897 {
1898 	if (tep->te_closing) {
1899 		tl_wput_ser(mp, tep);
1900 	} else {
1901 		TL_PUTQ(tep, mp);
1902 		tl_serializer_exit(tep);
1903 		tl_refrele(tep);
1904 	}
1905 
1906 }
1907 
1908 static void
1909 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1910 {
1911 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1912 
1913 	switch (DB_TYPE(mp)) {
1914 	case M_DATA:
1915 		tl_data(mp, tep);
1916 		break;
1917 	case M_PROTO:
1918 		tl_do_proto(mp, tep);
1919 		break;
1920 	default:
1921 		freemsg(mp);
1922 		break;
1923 	}
1924 }
1925 
1926 /*
1927  * Write side put procedure called from serializer.
1928  */
1929 static void
1930 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1931 {
1932 	tl_wput_common_ser(mp, tep);
1933 	tl_serializer_exit(tep);
1934 	tl_refrele(tep);
1935 }
1936 
1937 /*
1938  * M_DATA processing. Called from serializer.
1939  */
1940 static void
1941 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1942 {
1943 	tl_endpt_t	*peer_tep = tep->te_conp;
1944 	queue_t		*peer_rq;
1945 
1946 	ASSERT(DB_TYPE(mp) == M_DATA);
1947 	ASSERT(IS_COTS(tep));
1948 
1949 	ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer));
1950 
1951 	/*
1952 	 * fastpath for data. Ignore flow control if tep is closing.
1953 	 */
1954 	if ((peer_tep != NULL) &&
1955 	    !peer_tep->te_closing &&
1956 	    ((tep->te_state == TS_DATA_XFER) ||
1957 		(tep->te_state == TS_WREQ_ORDREL)) &&
1958 	    (tep->te_wq != NULL) &&
1959 	    (tep->te_wq->q_first == NULL) &&
1960 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1961 		(peer_tep->te_state == TS_WREQ_ORDREL))	&&
1962 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1963 	    (canputnext(peer_rq) || tep->te_closing)) {
1964 		putnext(peer_rq, mp);
1965 	} else if (tep->te_closing) {
1966 		/*
1967 		 * It is possible that by the time we got here tep started to
1968 		 * close. If the write queue is not empty, and the state is
1969 		 * TS_DATA_XFER the data should be delivered in order, so we
1970 		 * call putq() instead of freeing the data.
1971 		 */
1972 		if ((tep->te_wq != NULL) &&
1973 		    ((tep->te_state == TS_DATA_XFER) ||
1974 			(tep->te_state == TS_WREQ_ORDREL))) {
1975 			TL_PUTQ(tep, mp);
1976 		} else {
1977 			freemsg(mp);
1978 		}
1979 	} else {
1980 		TL_PUTQ(tep, mp);
1981 	}
1982 
1983 	tl_serializer_exit(tep);
1984 	tl_refrele(tep);
1985 }
1986 
1987 /*
1988  * Write side service routine.
1989  *
1990  * All actual processing happens within serializer which is entered
1991  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1992  * messages that need processing may have arrived, so tl_wsrv repeats until
1993  * queue is empty or te_nowsrv is set.
1994  */
1995 static void
1996 tl_wsrv(queue_t *wq)
1997 {
1998 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1999 
2000 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
2001 		mutex_enter(&tep->te_srv_lock);
2002 		ASSERT(tep->te_wsrv_active == B_FALSE);
2003 		tep->te_wsrv_active = B_TRUE;
2004 		mutex_exit(&tep->te_srv_lock);
2005 
2006 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2007 
2008 		/*
2009 		 * Wait for serializer job to complete.
2010 		 */
2011 		mutex_enter(&tep->te_srv_lock);
2012 		while (tep->te_wsrv_active) {
2013 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2014 		}
2015 		cv_signal(&tep->te_srv_cv);
2016 		mutex_exit(&tep->te_srv_lock);
2017 	}
2018 }
2019 
2020 /*
2021  * Serialized write side processing of the STREAMS queue.
2022  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2023  * is NULL.
2024  */
2025 static void
2026 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2027 {
2028 	mblk_t *mp;
2029 	queue_t *wq = tep->te_wq;
2030 
2031 	ASSERT(wq != NULL);
2032 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2033 		tl_wput_common_ser(mp, tep);
2034 	}
2035 
2036 	/*
2037 	 * Wakeup service routine unless called from close.
2038 	 * If ser_mp is specified, the caller is tl_wsrv().
2039 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2040 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2041 	 * be no matching tl_serializer_exit() in this case.
2042 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2043 	 * waiting on te_srv_cv.
2044 	 */
2045 	if (ser_mp != NULL) {
2046 		/*
2047 		 * We are called from tl_wsrv.
2048 		 */
2049 		mutex_enter(&tep->te_srv_lock);
2050 		ASSERT(tep->te_wsrv_active);
2051 		tep->te_wsrv_active = B_FALSE;
2052 		cv_signal(&tep->te_srv_cv);
2053 		mutex_exit(&tep->te_srv_lock);
2054 		tl_serializer_exit(tep);
2055 	}
2056 }
2057 
2058 /*
2059  * Called when the stream is backenabled. Enter serializer and qenable everyone
2060  * flow controlled by tep.
2061  *
2062  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2063  * is possible that two instances of tl_rsrv will be running reusing the same
2064  * rsrv mblk.
2065  */
2066 static void
2067 tl_rsrv(queue_t *rq)
2068 {
2069 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2070 
2071 	ASSERT(rq->q_first == NULL);
2072 	ASSERT(tep->te_rsrv_active == 0);
2073 
2074 	tep->te_rsrv_active = B_TRUE;
2075 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2076 	/*
2077 	 * Wait for serializer job to complete.
2078 	 */
2079 	mutex_enter(&tep->te_srv_lock);
2080 	while (tep->te_rsrv_active) {
2081 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2082 	}
2083 	cv_signal(&tep->te_srv_cv);
2084 	mutex_exit(&tep->te_srv_lock);
2085 }
2086 
2087 /* ARGSUSED */
2088 static void
2089 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2090 {
2091 	tl_endpt_t *peer_tep;
2092 
2093 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2094 		tl_cl_backenable(tep);
2095 	} else if (
2096 		IS_COTS(tep) &&
2097 		    ((peer_tep = tep->te_conp) != NULL) &&
2098 		    !peer_tep->te_closing &&
2099 		    ((tep->te_state == TS_DATA_XFER) ||
2100 			(tep->te_state == TS_WIND_ORDREL)||
2101 			(tep->te_state == TS_WREQ_ORDREL))) {
2102 		TL_QENABLE(peer_tep);
2103 	}
2104 
2105 	/*
2106 	 * Wakeup read side service routine.
2107 	 */
2108 	mutex_enter(&tep->te_srv_lock);
2109 	ASSERT(tep->te_rsrv_active);
2110 	tep->te_rsrv_active = B_FALSE;
2111 	cv_signal(&tep->te_srv_cv);
2112 	mutex_exit(&tep->te_srv_lock);
2113 	tl_serializer_exit(tep);
2114 }
2115 
2116 /*
2117  * process M_PROTO messages. Always called from serializer.
2118  */
2119 static void
2120 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2121 {
2122 	ssize_t			msz = MBLKL(mp);
2123 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2124 
2125 	/* Message size was validated by tl_wput(). */
2126 	ASSERT(msz >= sizeof (prim->type));
2127 
2128 	switch (prim->type) {
2129 	case T_UNBIND_REQ:
2130 		tl_unbind(mp, tep);
2131 		break;
2132 
2133 	case T_ADDR_REQ:
2134 		tl_addr_req(mp, tep);
2135 		break;
2136 
2137 	case O_T_CONN_RES:
2138 	case T_CONN_RES:
2139 		if (IS_CLTS(tep)) {
2140 			tl_merror(tep->te_wq, mp, EPROTO);
2141 			break;
2142 		}
2143 		tl_conn_res(mp, tep);
2144 		break;
2145 
2146 	case T_DISCON_REQ:
2147 		if (IS_CLTS(tep)) {
2148 			tl_merror(tep->te_wq, mp, EPROTO);
2149 			break;
2150 		}
2151 		tl_discon_req(mp, tep);
2152 		break;
2153 
2154 	case T_DATA_REQ:
2155 		if (IS_CLTS(tep)) {
2156 			tl_merror(tep->te_wq, mp, EPROTO);
2157 			break;
2158 		}
2159 		tl_data(mp, tep);
2160 		break;
2161 
2162 	case T_OPTDATA_REQ:
2163 		if (IS_CLTS(tep)) {
2164 			tl_merror(tep->te_wq, mp, EPROTO);
2165 			break;
2166 		}
2167 		tl_data(mp, tep);
2168 		break;
2169 
2170 	case T_EXDATA_REQ:
2171 		if (IS_CLTS(tep)) {
2172 			tl_merror(tep->te_wq, mp, EPROTO);
2173 			break;
2174 		}
2175 		tl_exdata(mp, tep);
2176 		break;
2177 
2178 	case T_ORDREL_REQ:
2179 		if (! IS_COTSORD(tep)) {
2180 			tl_merror(tep->te_wq, mp, EPROTO);
2181 			break;
2182 		}
2183 		tl_ordrel(mp, tep);
2184 		break;
2185 
2186 	case T_UNITDATA_REQ:
2187 		if (IS_COTS(tep)) {
2188 			tl_merror(tep->te_wq, mp, EPROTO);
2189 			break;
2190 		}
2191 		tl_unitdata(mp, tep);
2192 		break;
2193 
2194 	default:
2195 		tl_merror(tep->te_wq, mp, EPROTO);
2196 		break;
2197 	}
2198 }
2199 
2200 /*
2201  * Process ioctl from serializer.
2202  * This is a wrapper around tl_do_ioctl().
2203  */
2204 static void
2205 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2206 {
2207 	if (! tep->te_closing)
2208 		tl_do_ioctl(mp, tep);
2209 	else
2210 		freemsg(mp);
2211 
2212 	tl_serializer_exit(tep);
2213 	tl_refrele(tep);
2214 }
2215 
2216 static void
2217 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2218 {
2219 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2220 	int cmd = iocbp->ioc_cmd;
2221 	queue_t *wq = tep->te_wq;
2222 	int error;
2223 	int thisopt, otheropt;
2224 
2225 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2226 
2227 	switch (cmd) {
2228 	case TL_IOC_CREDOPT:
2229 		if (cmd == TL_IOC_CREDOPT) {
2230 			thisopt = TL_SETCRED;
2231 			otheropt = TL_SETUCRED;
2232 		} else {
2233 			/* FALLTHROUGH */
2234 	case TL_IOC_UCREDOPT:
2235 			thisopt = TL_SETUCRED;
2236 			otheropt = TL_SETCRED;
2237 		}
2238 		/*
2239 		 * The credentials passing does not apply to sockets.
2240 		 * Only one of the cred options can be set at a given time.
2241 		 */
2242 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2243 			miocnak(wq, mp, 0, EINVAL);
2244 			return;
2245 		}
2246 
2247 		/*
2248 		 * Turn on generation of credential options for
2249 		 * T_conn_req, T_conn_con, T_unidata_ind.
2250 		 */
2251 		error = miocpullup(mp, sizeof (uint32_t));
2252 		if (error != 0) {
2253 			miocnak(wq, mp, 0, error);
2254 			return;
2255 		}
2256 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2257 			miocnak(wq, mp, 0, EINVAL);
2258 			return;
2259 		}
2260 
2261 		if (*(uint32_t *)mp->b_cont->b_rptr)
2262 			tep->te_flag |= thisopt;
2263 		else
2264 			tep->te_flag &= ~thisopt;
2265 
2266 		miocack(wq, mp, 0, 0);
2267 		break;
2268 
2269 	default:
2270 		/* Should not be here */
2271 		miocnak(wq, mp, 0, EINVAL);
2272 		break;
2273 	}
2274 }
2275 
2276 
2277 /*
2278  * send T_ERROR_ACK
2279  * Note: assumes enough memory or caller passed big enough mp
2280  *	- no recovery from allocb failures
2281  */
2282 
2283 static void
2284 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2285     t_scalar_t unix_err, t_scalar_t type)
2286 {
2287 	struct T_error_ack *err_ack;
2288 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2289 	    M_PCPROTO, T_ERROR_ACK);
2290 
2291 	if (ackmp == NULL) {
2292 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2293 			    "tl_error_ack:out of mblk memory"));
2294 		tl_merror(wq, NULL, ENOSR);
2295 		return;
2296 	}
2297 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2298 	err_ack->ERROR_prim = type;
2299 	err_ack->TLI_error = tli_err;
2300 	err_ack->UNIX_error = unix_err;
2301 
2302 	/*
2303 	 * send error ack message
2304 	 */
2305 	qreply(wq, ackmp);
2306 }
2307 
2308 
2309 
2310 /*
2311  * send T_OK_ACK
2312  * Note: assumes enough memory or caller passed big enough mp
2313  *	- no recovery from allocb failures
2314  */
2315 static void
2316 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2317 {
2318 	struct T_ok_ack *ok_ack;
2319 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2320 	    M_PCPROTO, T_OK_ACK);
2321 
2322 	if (ackmp == NULL) {
2323 		tl_merror(wq, NULL, ENOMEM);
2324 		return;
2325 	}
2326 
2327 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2328 	ok_ack->CORRECT_prim = type;
2329 
2330 	(void) qreply(wq, ackmp);
2331 }
2332 
2333 /*
2334  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2335  * This is a wrapper around tl_bind().
2336  */
2337 static void
2338 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2339 {
2340 	if (! tep->te_closing)
2341 		tl_bind(mp, tep);
2342 	else
2343 		freemsg(mp);
2344 
2345 	tl_serializer_exit(tep);
2346 	tl_refrele(tep);
2347 }
2348 
2349 /*
2350  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2351  * Assumes that the endpoint is in the unbound.
2352  */
2353 static void
2354 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2355 {
2356 	queue_t			*wq = tep->te_wq;
2357 	struct T_bind_ack	*b_ack;
2358 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2359 	mblk_t			*ackmp, *bamp;
2360 	soux_addr_t		ux_addr;
2361 	t_uscalar_t		qlen = 0;
2362 	t_scalar_t		alen, aoff;
2363 	tl_addr_t		addr_req;
2364 	void			*addr_startp;
2365 	ssize_t			msz = MBLKL(mp), basize;
2366 	t_scalar_t		tli_err = 0, unix_err = 0;
2367 	t_scalar_t		save_prim_type = bind->PRIM_type;
2368 	t_scalar_t		save_state = tep->te_state;
2369 
2370 	if (tep->te_state != TS_UNBND) {
2371 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2372 			SL_TRACE|SL_ERROR,
2373 			"tl_wput:bind_request:out of state, state=%d",
2374 			tep->te_state));
2375 		tli_err = TOUTSTATE;
2376 		goto error;
2377 	}
2378 
2379 	if (msz < sizeof (struct T_bind_req)) {
2380 		tli_err = TSYSERR; unix_err = EINVAL;
2381 		goto error;
2382 	}
2383 
2384 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2385 
2386 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2387 	    (bind->PRIM_type == T_BIND_REQ));
2388 
2389 	alen = bind->ADDR_length;
2390 	aoff = bind->ADDR_offset;
2391 
2392 	/* negotiate max conn req pending */
2393 	if (IS_COTS(tep)) {
2394 		qlen = bind->CONIND_number;
2395 		if (qlen > TL_MAXQLEN)
2396 			qlen = TL_MAXQLEN;
2397 	}
2398 
2399 	/*
2400 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2401 	 * and bound again.
2402 	 */
2403 	if ((tep->te_hash_hndl == NULL) &&
2404 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2405 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2406 		&tep->te_hash_hndl) != 0) {
2407 		tli_err = TSYSERR; unix_err = ENOSR;
2408 		goto error;
2409 	}
2410 
2411 	/*
2412 	 * Verify address correctness.
2413 	 */
2414 	if (IS_SOCKET(tep)) {
2415 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2416 
2417 		if ((alen != TL_SOUX_ADDRLEN) ||
2418 		    (aoff < 0) ||
2419 		    (aoff + alen > msz)) {
2420 			(void) (STRLOG(TL_ID, tep->te_minor,
2421 				    1, SL_TRACE|SL_ERROR,
2422 				    "tl_bind: invalid socket addr"));
2423 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2424 			tli_err = TSYSERR; unix_err = EINVAL;
2425 			goto error;
2426 		}
2427 		/* Copy address from message to local buffer. */
2428 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2429 		/*
2430 		 * Check that we got correct address from sockets
2431 		 */
2432 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2433 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2434 			(void) (STRLOG(TL_ID, tep->te_minor,
2435 				    1, SL_TRACE|SL_ERROR,
2436 				    "tl_bind: invalid socket magic"));
2437 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2438 			tli_err = TSYSERR; unix_err = EINVAL;
2439 			goto error;
2440 		}
2441 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2442 		    (ux_addr.soua_vp != NULL)) {
2443 			(void) (STRLOG(TL_ID, tep->te_minor,
2444 				    1, SL_TRACE|SL_ERROR,
2445 				    "tl_bind: implicit addr non-empty"));
2446 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2447 			tli_err = TSYSERR; unix_err = EINVAL;
2448 			goto error;
2449 		}
2450 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2451 		    (ux_addr.soua_vp == NULL)) {
2452 			(void) (STRLOG(TL_ID, tep->te_minor,
2453 				    1, SL_TRACE|SL_ERROR,
2454 				    "tl_bind: explicit addr empty"));
2455 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2456 			tli_err = TSYSERR; unix_err = EINVAL;
2457 			goto error;
2458 		}
2459 	} else {
2460 		if ((alen > 0) && ((aoff < 0) ||
2461 			((ssize_t)(aoff + alen) > msz) ||
2462 			((aoff + alen) < 0))) {
2463 			(void) (STRLOG(TL_ID, tep->te_minor,
2464 				    1, SL_TRACE|SL_ERROR,
2465 				    "tl_bind: invalid message"));
2466 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2467 			tli_err = TSYSERR; unix_err = EINVAL;
2468 			goto error;
2469 		}
2470 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2471 			(void) (STRLOG(TL_ID, tep->te_minor,
2472 				    1, SL_TRACE|SL_ERROR,
2473 				    "tl_bind: bad addr in  message"));
2474 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2475 			tli_err = TBADADDR;
2476 			goto error;
2477 		}
2478 #ifdef DEBUG
2479 		/*
2480 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2481 		 * if (! assertion)
2482 		 *	log warning;
2483 		 */
2484 		if (! ((alen == 0 && aoff == 0) ||
2485 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2486 			(void) (STRLOG(TL_ID, tep->te_minor,
2487 				    3, SL_TRACE|SL_ERROR,
2488 				    "tl_bind: addr overlaps TPI message"));
2489 		}
2490 #endif
2491 	}
2492 
2493 	/*
2494 	 * Bind the address provided or allocate one if requested.
2495 	 * Allow rebinds with a new qlen value.
2496 	 */
2497 	if (IS_SOCKET(tep)) {
2498 		/*
2499 		 * For anonymous requests the te_ap is already set up properly
2500 		 * so use minor number as an address.
2501 		 * For explicit requests need to check whether the address is
2502 		 * already in use.
2503 		 */
2504 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2505 			int rc;
2506 
2507 			if (tep->te_flag & TL_ADDRHASHED) {
2508 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2509 				if (tep->te_vp == ux_addr.soua_vp)
2510 					goto skip_addr_bind;
2511 				else /* Rebind to a new address. */
2512 					tl_addr_unbind(tep);
2513 			}
2514 			/*
2515 			 * Insert address in the hash if it is not already
2516 			 * there.  Since we use preallocated handle, the insert
2517 			 * can fail only if the key is already present.
2518 			 */
2519 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2520 			    (mod_hash_key_t)ux_addr.soua_vp,
2521 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2522 
2523 			if (rc != 0) {
2524 				ASSERT(rc == MH_ERR_DUPLICATE);
2525 				/*
2526 				 * Violate O_T_BIND_REQ semantics and fail with
2527 				 * TADDRBUSY - sockets will not use any address
2528 				 * other than supplied one for explicit binds.
2529 				 */
2530 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2531 					SL_TRACE|SL_ERROR,
2532 					"tl_bind:requested addr %p is busy",
2533 					    ux_addr.soua_vp));
2534 				tli_err = TADDRBUSY; unix_err = 0;
2535 				goto error;
2536 			}
2537 			tep->te_uxaddr = ux_addr;
2538 			tep->te_flag |= TL_ADDRHASHED;
2539 			tep->te_hash_hndl = NULL;
2540 		}
2541 	} else if (alen == 0) {
2542 		/*
2543 		 * assign any free address
2544 		 */
2545 		if (! tl_get_any_addr(tep, NULL)) {
2546 			(void) (STRLOG(TL_ID, tep->te_minor,
2547 				1, SL_TRACE|SL_ERROR,
2548 				"tl_bind:failed to get buffer for any "
2549 				"address"));
2550 			tli_err = TSYSERR; unix_err = ENOSR;
2551 			goto error;
2552 		}
2553 	} else {
2554 		addr_req.ta_alen = alen;
2555 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2556 		addr_req.ta_zoneid = tep->te_zoneid;
2557 
2558 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2559 		if (tep->te_abuf == NULL) {
2560 			tli_err = TSYSERR; unix_err = ENOSR;
2561 			goto error;
2562 		}
2563 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2564 		tep->te_alen = alen;
2565 
2566 		if (mod_hash_insert_reserve(tep->te_addrhash,
2567 			(mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2568 			tep->te_hash_hndl) != 0) {
2569 			if (save_prim_type == T_BIND_REQ) {
2570 				/*
2571 				 * The bind semantics for this primitive
2572 				 * require a failure if the exact address
2573 				 * requested is busy
2574 				 */
2575 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2576 					SL_TRACE|SL_ERROR,
2577 					"tl_bind:requested addr is busy"));
2578 				tli_err = TADDRBUSY; unix_err = 0;
2579 				goto error;
2580 			}
2581 
2582 			/*
2583 			 * O_T_BIND_REQ semantics say if address if requested
2584 			 * address is busy, bind to any available free address
2585 			 */
2586 			if (! tl_get_any_addr(tep, &addr_req)) {
2587 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2588 					SL_TRACE|SL_ERROR,
2589 					"tl_bind:unable to get any addr buf"));
2590 				tli_err = TSYSERR; unix_err = ENOMEM;
2591 				goto error;
2592 			}
2593 		} else {
2594 			tep->te_flag |= TL_ADDRHASHED;
2595 			tep->te_hash_hndl = NULL;
2596 		}
2597 	}
2598 
2599 	ASSERT(tep->te_alen >= 0);
2600 
2601 skip_addr_bind:
2602 	/*
2603 	 * prepare T_BIND_ACK TPI message
2604 	 */
2605 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2606 	bamp = reallocb(mp, basize, 0);
2607 	if (bamp == NULL) {
2608 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2609 			"tl_wput:tl_bind: allocb failed"));
2610 		/*
2611 		 * roll back state changes
2612 		 */
2613 		tl_addr_unbind(tep);
2614 		tep->te_state = TS_UNBND;
2615 		tl_memrecover(wq, mp, basize);
2616 		return;
2617 	}
2618 
2619 	DB_TYPE(bamp) = M_PCPROTO;
2620 	bamp->b_wptr = bamp->b_rptr + basize;
2621 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2622 	b_ack->PRIM_type = T_BIND_ACK;
2623 	b_ack->CONIND_number = qlen;
2624 	b_ack->ADDR_length = tep->te_alen;
2625 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2626 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2627 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2628 
2629 	if (IS_COTS(tep)) {
2630 		tep->te_qlen = qlen;
2631 		if (qlen > 0)
2632 			tep->te_flag |= TL_LISTENER;
2633 	}
2634 
2635 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2636 	/*
2637 	 * send T_BIND_ACK message
2638 	 */
2639 	(void) qreply(wq, bamp);
2640 	return;
2641 
2642 error:
2643 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2644 	if (ackmp == NULL) {
2645 		/*
2646 		 * roll back state changes
2647 		 */
2648 		tep->te_state = save_state;
2649 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2650 		return;
2651 	}
2652 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2653 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2654 }
2655 
2656 /*
2657  * Process T_UNBIND_REQ.
2658  * Called from serializer.
2659  */
2660 static void
2661 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2662 {
2663 	queue_t *wq;
2664 	mblk_t *ackmp;
2665 
2666 	if (tep->te_closing) {
2667 		freemsg(mp);
2668 		return;
2669 	}
2670 
2671 	wq = tep->te_wq;
2672 
2673 	/*
2674 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2675 	 * ==> allocate for T_ERROR_ACK (known max)
2676 	 */
2677 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2678 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2679 		return;
2680 	}
2681 	/*
2682 	 * memory resources committed
2683 	 * Note: no message validation. T_UNBIND_REQ message is
2684 	 * same size as PRIM_type field so already verified earlier.
2685 	 */
2686 
2687 	/*
2688 	 * validate state
2689 	 */
2690 	if (tep->te_state != TS_IDLE) {
2691 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2692 			SL_TRACE|SL_ERROR,
2693 			"tl_wput:T_UNBIND_REQ:out of state, state=%d",
2694 			tep->te_state));
2695 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2696 		return;
2697 	}
2698 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2699 
2700 	/*
2701 	 * TPI says on T_UNBIND_REQ:
2702 	 *    send up a M_FLUSH to flush both
2703 	 *    read and write queues
2704 	 */
2705 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2706 
2707 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2708 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2709 
2710 		/*
2711 		 * Sockets use bind with qlen==0 followed by bind() to
2712 		 * the same address with qlen > 0 for listeners.
2713 		 * We allow rebind with a new qlen value.
2714 		 */
2715 		tl_addr_unbind(tep);
2716 	}
2717 
2718 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2719 	/*
2720 	 * send  T_OK_ACK
2721 	 */
2722 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2723 }
2724 
2725 
2726 /*
2727  * Option management code from drv/ip is used here
2728  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2729  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2730  *	However, that is what we want as that option is 'unorthodox'
2731  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2732  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2733  * Note2: use of optcom_req means this routine is an exception to
2734  *	 recovery from allocb() failures.
2735  */
2736 
2737 static void
2738 tl_optmgmt(queue_t *wq, mblk_t *mp)
2739 {
2740 	tl_endpt_t *tep;
2741 	mblk_t *ackmp;
2742 	union T_primitives *prim;
2743 
2744 	tep = (tl_endpt_t *)wq->q_ptr;
2745 	prim = (union T_primitives *)mp->b_rptr;
2746 
2747 	/*  all states OK for AF_UNIX options ? */
2748 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2749 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2750 		/*
2751 		 * Broken TLI semantics that options can only be managed
2752 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2753 		 * tests this TLI (mis)feature using this device driver.
2754 		 */
2755 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2756 			SL_TRACE|SL_ERROR,
2757 			"tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2758 			tep->te_state));
2759 		/*
2760 		 * preallocate memory for T_ERROR_ACK
2761 		 */
2762 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2763 		if (! ackmp) {
2764 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2765 			return;
2766 		}
2767 
2768 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2769 		freemsg(mp);
2770 		return;
2771 	}
2772 
2773 	/*
2774 	 * call common option management routine from drv/ip
2775 	 */
2776 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2777 		(void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj);
2778 	} else {
2779 		ASSERT(prim->type == T_OPTMGMT_REQ);
2780 		(void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj);
2781 	}
2782 }
2783 
2784 /*
2785  * Handle T_conn_req - the driver part of accept().
2786  * If TL_SET[U]CRED generate the credentials options.
2787  * If this is a socket pass through options unmodified.
2788  * For sockets generate the T_CONN_CON here instead of
2789  * waiting for the T_CONN_RES.
2790  */
2791 static void
2792 tl_conn_req(queue_t *wq, mblk_t *mp)
2793 {
2794 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2795 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2796 	ssize_t			msz = MBLKL(mp);
2797 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2798 	tl_endpt_t		*peer_tep = NULL;
2799 	mblk_t			*ackmp;
2800 	mblk_t			*dimp;
2801 	struct T_discon_ind	*di;
2802 	soux_addr_t		ux_addr;
2803 	tl_addr_t		dst;
2804 
2805 	ASSERT(IS_COTS(tep));
2806 
2807 	if (tep->te_closing) {
2808 		freemsg(mp);
2809 		return;
2810 	}
2811 
2812 	/*
2813 	 * preallocate memory for:
2814 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2815 	 *	==> known max T_ERROR_ACK
2816 	 * 2. max of T_DISCON_IND and T_CONN_IND
2817 	 */
2818 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2819 	if (! ackmp) {
2820 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2821 		return;
2822 	}
2823 	/*
2824 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2825 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2826 	 */
2827 
2828 	if (tep->te_state != TS_IDLE) {
2829 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2830 			SL_TRACE|SL_ERROR,
2831 			"tl_wput:T_CONN_REQ:out of state, state=%d",
2832 			tep->te_state));
2833 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2834 		freemsg(mp);
2835 		return;
2836 	}
2837 
2838 	/*
2839 	 * validate the message
2840 	 * Note: dereference fields in struct inside message only
2841 	 * after validating the message length.
2842 	 */
2843 	if (msz < sizeof (struct T_conn_req)) {
2844 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2845 			"tl_conn_req:invalid message length"));
2846 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2847 		freemsg(mp);
2848 		return;
2849 	}
2850 	alen = creq->DEST_length;
2851 	aoff = creq->DEST_offset;
2852 	olen = creq->OPT_length;
2853 	ooff = creq->OPT_offset;
2854 	if (olen == 0)
2855 		ooff = 0;
2856 
2857 	if (IS_SOCKET(tep)) {
2858 		if ((alen != TL_SOUX_ADDRLEN) ||
2859 		    (aoff < 0) ||
2860 		    (aoff + alen > msz) ||
2861 		    (alen > msz - sizeof (struct T_conn_req))) {
2862 			(void) (STRLOG(TL_ID, tep->te_minor,
2863 				    1, SL_TRACE|SL_ERROR,
2864 				    "tl_conn_req: invalid socket addr"));
2865 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2866 			freemsg(mp);
2867 			return;
2868 		}
2869 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2870 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2871 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2872 			(void) (STRLOG(TL_ID, tep->te_minor,
2873 				    1, SL_TRACE|SL_ERROR,
2874 				    "tl_conn_req: invalid socket magic"));
2875 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2876 			freemsg(mp);
2877 			return;
2878 		}
2879 	} else {
2880 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2881 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2882 			ooff + olen < 0)) ||
2883 		    olen < 0 || ooff < 0) {
2884 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2885 				    SL_TRACE|SL_ERROR,
2886 				    "tl_conn_req:invalid message"));
2887 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2888 			freemsg(mp);
2889 			return;
2890 		}
2891 
2892 		if (alen <= 0 || aoff < 0 ||
2893 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2894 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2895 				    SL_TRACE|SL_ERROR,
2896 				    "tl_conn_req:bad addr in message, "
2897 				    "alen=%d, msz=%ld",
2898 				    alen, msz));
2899 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2900 			freemsg(mp);
2901 			return;
2902 		}
2903 #ifdef DEBUG
2904 		/*
2905 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2906 		 * if (! assertion)
2907 		 *	log warning;
2908 		 */
2909 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2910 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2911 				    SL_TRACE|SL_ERROR,
2912 				    "tl_conn_req: addr overlaps TPI message"));
2913 		}
2914 #endif
2915 		if (olen) {
2916 			/*
2917 			 * no opts in connect req
2918 			 * supported in this provider except for sockets.
2919 			 */
2920 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2921 				    SL_TRACE|SL_ERROR,
2922 				    "tl_conn_req:options not supported "
2923 				    "in message"));
2924 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2925 			freemsg(mp);
2926 			return;
2927 		}
2928 	}
2929 
2930 	/*
2931 	 * Prevent tep from closing on us.
2932 	 */
2933 	if (! tl_noclose(tep)) {
2934 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2935 			"tl_conn_req:endpoint is closing"));
2936 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2937 		freemsg(mp);
2938 		return;
2939 	}
2940 
2941 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2942 	/*
2943 	 * get endpoint to connect to
2944 	 * check that peer with DEST addr is bound to addr
2945 	 * and has CONIND_number > 0
2946 	 */
2947 	dst.ta_alen = alen;
2948 	dst.ta_abuf = mp->b_rptr + aoff;
2949 	dst.ta_zoneid = tep->te_zoneid;
2950 
2951 	/*
2952 	 * Verify if remote addr is in use
2953 	 */
2954 	peer_tep = (IS_SOCKET(tep) ?
2955 	    tl_sock_find_peer(tep, &ux_addr) :
2956 	    tl_find_peer(tep, &dst));
2957 
2958 	if (peer_tep == NULL) {
2959 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2960 			"tl_conn_req:no one at connect address"));
2961 		err = ECONNREFUSED;
2962 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2963 		/*
2964 		 * validate that number of incoming connection is
2965 		 * not to capacity on destination endpoint
2966 		 */
2967 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2968 			"tl_conn_req: qlen overflow connection refused"));
2969 			err = ECONNREFUSED;
2970 	} else if (!((peer_tep->te_state == TS_IDLE) ||
2971 			(peer_tep->te_state == TS_WRES_CIND))) {
2972 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2973 			"tl_conn_req:peer in bad state"));
2974 		err = ECONNREFUSED;
2975 	}
2976 
2977 	/*
2978 	 * preallocate now for T_DISCON_IND or T_CONN_IND
2979 	 */
2980 	if (err != 0) {
2981 		if (peer_tep != NULL)
2982 			tl_refrele(peer_tep);
2983 		/* We are still expected to send T_OK_ACK */
2984 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2985 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2986 		tl_closeok(tep);
2987 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2988 		    M_PROTO, T_DISCON_IND);
2989 		if (dimp == NULL) {
2990 			tl_merror(wq, NULL, ENOSR);
2991 			return;
2992 		}
2993 		di = (struct T_discon_ind *)dimp->b_rptr;
2994 		di->DISCON_reason = err;
2995 		di->SEQ_number = BADSEQNUM;
2996 
2997 		tep->te_state = TS_IDLE;
2998 		/*
2999 		 * send T_DISCON_IND message
3000 		 */
3001 		putnext(tep->te_rq, dimp);
3002 		return;
3003 	}
3004 
3005 	ASSERT(IS_COTS(peer_tep));
3006 
3007 	/*
3008 	 * Found the listener. At this point processing will continue on
3009 	 * listener serializer. Close of the endpoint should be blocked while we
3010 	 * switch serializers.
3011 	 */
3012 	tl_serializer_refhold(peer_tep->te_ser);
3013 	tl_serializer_refrele(tep->te_ser);
3014 	tep->te_ser = peer_tep->te_ser;
3015 	ASSERT(tep->te_oconp == NULL);
3016 	tep->te_oconp = peer_tep;
3017 
3018 	/*
3019 	 * It is safe to close now. Close may continue on listener serializer.
3020 	 */
3021 	tl_closeok(tep);
3022 
3023 	/*
3024 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3025 	 * data, so we link mp to ackmp.
3026 	 */
3027 	ackmp->b_cont = mp;
3028 	mp = ackmp;
3029 
3030 	tl_refhold(tep);
3031 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3032 }
3033 
3034 /*
3035  * Finish T_CONN_REQ processing on listener serializer.
3036  */
3037 static void
3038 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3039 {
3040 	queue_t		*wq;
3041 	tl_endpt_t	*peer_tep = tep->te_oconp;
3042 	mblk_t		*confmp, *cimp, *indmp;
3043 	void		*opts = NULL;
3044 	mblk_t		*ackmp = mp;
3045 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3046 	struct T_conn_ind	*ci;
3047 	tl_icon_t	*tip;
3048 	void		*addr_startp;
3049 	t_scalar_t	olen = creq->OPT_length;
3050 	t_scalar_t	ooff = creq->OPT_offset;
3051 	size_t 		ci_msz;
3052 	size_t		size;
3053 
3054 	if (tep->te_closing) {
3055 		TL_UNCONNECT(tep->te_oconp);
3056 		tl_serializer_exit(tep);
3057 		tl_refrele(tep);
3058 		freemsg(mp);
3059 		return;
3060 	}
3061 
3062 	wq = tep->te_wq;
3063 	tep->te_flag |= TL_EAGER;
3064 
3065 	/*
3066 	 * Extract preallocated ackmp from mp.
3067 	 */
3068 	mp = mp->b_cont;
3069 	ackmp->b_cont = NULL;
3070 
3071 	if (olen == 0)
3072 		ooff = 0;
3073 
3074 	if (peer_tep->te_closing ||
3075 	    !((peer_tep->te_state == TS_IDLE) ||
3076 		(peer_tep->te_state == TS_WRES_CIND))) {
3077 		TL_UNCONNECT(tep->te_oconp);
3078 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3079 		freemsg(ackmp);
3080 		tl_serializer_exit(tep);
3081 		tl_refrele(tep);
3082 		return;
3083 	}
3084 
3085 	/*
3086 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3087 	 */
3088 	/*
3089 	 * calculate length of T_CONN_IND message
3090 	 */
3091 	if (peer_tep->te_flag & TL_SETCRED) {
3092 		ooff = 0;
3093 		olen = (t_scalar_t) sizeof (struct opthdr) +
3094 		    OPTLEN(sizeof (tl_credopt_t));
3095 		/* 1 option only */
3096 	} else if (peer_tep->te_flag & TL_SETUCRED) {
3097 		ooff = 0;
3098 		olen = (t_scalar_t)sizeof (struct opthdr) +
3099 		    OPTLEN(ucredsize);
3100 		/* 1 option only */
3101 	}
3102 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3103 	ci_msz = T_ALIGN(ci_msz) + olen;
3104 	size = max(ci_msz, sizeof (struct T_discon_ind));
3105 
3106 	/*
3107 	 * Save options from mp - we'll need them for T_CONN_IND.
3108 	 */
3109 	if (ooff != 0) {
3110 		opts = kmem_alloc(olen, KM_NOSLEEP);
3111 		if (opts == NULL) {
3112 			/*
3113 			 * roll back state changes
3114 			 */
3115 			tep->te_state = TS_IDLE;
3116 			tl_memrecover(wq, mp, size);
3117 			freemsg(ackmp);
3118 			TL_UNCONNECT(tep->te_oconp);
3119 			tl_serializer_exit(tep);
3120 			tl_refrele(tep);
3121 			return;
3122 		}
3123 		/* Copy options to a temp buffer */
3124 		bcopy(mp->b_rptr + ooff, opts, olen);
3125 	}
3126 
3127 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3128 		/*
3129 		 * Generate a T_CONN_CON that has the identical address
3130 		 * (and options) as the T_CONN_REQ.
3131 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3132 		 * are isomorphic.
3133 		 */
3134 		confmp = copyb(mp);
3135 		if (! confmp) {
3136 			/*
3137 			 * roll back state changes
3138 			 */
3139 			tep->te_state = TS_IDLE;
3140 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3141 			freemsg(ackmp);
3142 			if (opts != NULL)
3143 				kmem_free(opts, olen);
3144 			TL_UNCONNECT(tep->te_oconp);
3145 			tl_serializer_exit(tep);
3146 			tl_refrele(tep);
3147 			return;
3148 		}
3149 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3150 			T_CONN_CON;
3151 	} else {
3152 		confmp = NULL;
3153 	}
3154 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3155 		/*
3156 		 * roll back state changes
3157 		 */
3158 		tep->te_state = TS_IDLE;
3159 		tl_memrecover(wq, mp, size);
3160 		freemsg(ackmp);
3161 		if (opts != NULL)
3162 			kmem_free(opts, olen);
3163 		freemsg(confmp);
3164 		TL_UNCONNECT(tep->te_oconp);
3165 		tl_serializer_exit(tep);
3166 		tl_refrele(tep);
3167 		return;
3168 	}
3169 
3170 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3171 	if (tip == NULL) {
3172 		/*
3173 		 * roll back state changes
3174 		 */
3175 		tep->te_state = TS_IDLE;
3176 		tl_memrecover(wq, indmp, sizeof (*tip));
3177 		freemsg(ackmp);
3178 		if (opts != NULL)
3179 			kmem_free(opts, olen);
3180 		freemsg(confmp);
3181 		TL_UNCONNECT(tep->te_oconp);
3182 		tl_serializer_exit(tep);
3183 		tl_refrele(tep);
3184 		return;
3185 	}
3186 	tip->ti_mp = NULL;
3187 
3188 	/*
3189 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3190 	 * and tl_icon_t cell.
3191 	 */
3192 
3193 	/*
3194 	 * ack validity of request and send the peer credential in the ACK.
3195 	 */
3196 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3197 
3198 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3199 	    confmp != NULL) {
3200 		mblk_setcred(confmp, peer_tep->te_credp);
3201 		DB_CPID(confmp) = peer_tep->te_cpid;
3202 	}
3203 
3204 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3205 
3206 	/*
3207 	 * prepare message to send T_CONN_IND
3208 	 */
3209 	/*
3210 	 * allocate the message - original data blocks retained
3211 	 * in the returned mblk
3212 	 */
3213 	cimp = tl_resizemp(indmp, size);
3214 	if (! cimp) {
3215 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3216 			"tl_conn_req:con_ind:allocb failure"));
3217 		tl_merror(wq, indmp, ENOMEM);
3218 		TL_UNCONNECT(tep->te_oconp);
3219 		tl_serializer_exit(tep);
3220 		tl_refrele(tep);
3221 		if (opts != NULL)
3222 			kmem_free(opts, olen);
3223 		freemsg(confmp);
3224 		ASSERT(tip->ti_mp == NULL);
3225 		kmem_free(tip, sizeof (*tip));
3226 		return;
3227 	}
3228 
3229 	DB_TYPE(cimp) = M_PROTO;
3230 	ci = (struct T_conn_ind *)cimp->b_rptr;
3231 	ci->PRIM_type  = T_CONN_IND;
3232 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3233 	ci->SRC_length = tep->te_alen;
3234 	ci->SEQ_number = tep->te_seqno;
3235 
3236 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3237 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3238 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3239 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3240 					ci->SRC_length);
3241 		ci->OPT_length = olen; /* because only 1 option */
3242 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3243 			DB_CREDDEF(cimp, tep->te_credp),
3244 			TLPID(cimp, tep),
3245 			peer_tep->te_flag);
3246 	} else if (ooff != 0) {
3247 		/* Copy option from T_CONN_REQ */
3248 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3249 					ci->SRC_length);
3250 		ci->OPT_length = olen;
3251 		ASSERT(opts != NULL);
3252 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3253 	} else {
3254 		ci->OPT_offset = 0;
3255 		ci->OPT_length = 0;
3256 	}
3257 	if (opts != NULL)
3258 		kmem_free(opts, olen);
3259 
3260 	/*
3261 	 * register connection request with server peer
3262 	 * append to list of incoming connections
3263 	 * increment references for both peer_tep and tep: peer_tep is placed on
3264 	 * te_oconp and tep is placed on listeners queue.
3265 	 */
3266 	tip->ti_tep = tep;
3267 	tip->ti_seqno = tep->te_seqno;
3268 	list_insert_tail(&peer_tep->te_iconp, tip);
3269 	peer_tep->te_nicon++;
3270 
3271 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3272 	/*
3273 	 * send the T_CONN_IND message
3274 	 */
3275 	putnext(peer_tep->te_rq, cimp);
3276 
3277 	/*
3278 	 * Send a T_CONN_CON message for sockets.
3279 	 * Disable the queues until we have reached the correct state!
3280 	 */
3281 	if (confmp != NULL) {
3282 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3283 		noenable(wq);
3284 		putnext(tep->te_rq, confmp);
3285 	}
3286 	/*
3287 	 * Now we need to increment tep reference because tep is referenced by
3288 	 * server list of pending connections. We also need to decrement
3289 	 * reference before exiting serializer. Two operations void each other
3290 	 * so we don't modify reference at all.
3291 	 */
3292 	ASSERT(tep->te_refcnt >= 2);
3293 	ASSERT(peer_tep->te_refcnt >= 2);
3294 	tl_serializer_exit(tep);
3295 }
3296 
3297 
3298 
3299 /*
3300  * Handle T_conn_res on listener stream. Called on listener serializer.
3301  * tl_conn_req has already generated the T_CONN_CON.
3302  * tl_conn_res is called on listener serializer.
3303  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3304  * Switch eager serializer to acceptor's.
3305  *
3306  * If TL_SET[U]CRED generate the credentials options.
3307  * For sockets tl_conn_req has already generated the T_CONN_CON.
3308  */
3309 static void
3310 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3311 {
3312 	queue_t			*wq;
3313 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3314 	ssize_t			msz = MBLKL(mp);
3315 	t_scalar_t		olen, ooff, err = 0;
3316 	t_scalar_t		prim = cres->PRIM_type;
3317 	uchar_t			*addr_startp;
3318 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3319 	tl_icon_t		*tip;
3320 	size_t			size;
3321 	mblk_t			*ackmp, *respmp;
3322 	mblk_t			*dimp, *ccmp = NULL;
3323 	struct T_discon_ind	*di;
3324 	struct T_conn_con	*cc;
3325 	boolean_t		client_noclose_set = B_FALSE;
3326 	boolean_t		switch_client_serializer = B_TRUE;
3327 
3328 	ASSERT(IS_COTS(tep));
3329 
3330 	if (tep->te_closing) {
3331 		freemsg(mp);
3332 		return;
3333 	}
3334 
3335 	wq = tep->te_wq;
3336 
3337 	/*
3338 	 * preallocate memory for:
3339 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3340 	 *	==> known max T_ERROR_ACK
3341 	 * 2. max of T_DISCON_IND and T_CONN_CON
3342 	 */
3343 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3344 	if (! ackmp) {
3345 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3346 		return;
3347 	}
3348 	/*
3349 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3350 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3351 	 */
3352 
3353 
3354 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3355 
3356 	/*
3357 	 * validate state
3358 	 */
3359 	if (tep->te_state != TS_WRES_CIND) {
3360 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3361 			SL_TRACE|SL_ERROR,
3362 			"tl_wput:T_CONN_RES:out of state, state=%d",
3363 			tep->te_state));
3364 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3365 		freemsg(mp);
3366 		return;
3367 	}
3368 
3369 	/*
3370 	 * validate the message
3371 	 * Note: dereference fields in struct inside message only
3372 	 * after validating the message length.
3373 	 */
3374 	if (msz < sizeof (struct T_conn_res)) {
3375 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3376 			"tl_conn_res:invalid message length"));
3377 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3378 		freemsg(mp);
3379 		return;
3380 	}
3381 	olen = cres->OPT_length;
3382 	ooff = cres->OPT_offset;
3383 	if (((olen > 0) && ((ooff + olen) > msz))) {
3384 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3385 			"tl_conn_res:invalid message"));
3386 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3387 		freemsg(mp);
3388 		return;
3389 	}
3390 	if (olen) {
3391 		/*
3392 		 * no opts in connect res
3393 		 * supported in this provider
3394 		 */
3395 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3396 			"tl_conn_res:options not supported in message"));
3397 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3398 		freemsg(mp);
3399 		return;
3400 	}
3401 
3402 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3403 	ASSERT(tep->te_state == TS_WACK_CRES);
3404 
3405 	if (cres->SEQ_number < TL_MINOR_START &&
3406 		cres->SEQ_number >= BADSEQNUM) {
3407 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3408 			"tl_conn_res:remote endpoint sequence number bad"));
3409 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3410 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3411 		freemsg(mp);
3412 		return;
3413 	}
3414 
3415 	/*
3416 	 * find accepting endpoint. Will have extra reference if found.
3417 	 */
3418 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3419 		(mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3420 		(mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3421 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3422 			"tl_conn_res:bad accepting endpoint"));
3423 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3424 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3425 		freemsg(mp);
3426 		return;
3427 	}
3428 
3429 	/*
3430 	 * Prevent acceptor from closing.
3431 	 */
3432 	if (! tl_noclose(acc_ep)) {
3433 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3434 			"tl_conn_res:bad accepting endpoint"));
3435 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3436 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3437 		tl_refrele(acc_ep);
3438 		freemsg(mp);
3439 		return;
3440 	}
3441 
3442 	acc_ep->te_flag |= TL_ACCEPTOR;
3443 
3444 	/*
3445 	 * validate that accepting endpoint, if different from listening
3446 	 * has address bound => state is TS_IDLE
3447 	 * TROUBLE in XPG4 !!?
3448 	 */
3449 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3450 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3451 			"tl_conn_res:accepting endpoint has no address bound,"
3452 			"state=%d", acc_ep->te_state));
3453 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3454 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3455 		freemsg(mp);
3456 		tl_closeok(acc_ep);
3457 		tl_refrele(acc_ep);
3458 		return;
3459 	}
3460 
3461 	/*
3462 	 * validate if accepting endpt same as listening, then
3463 	 * no other incoming connection should be on the queue
3464 	 */
3465 
3466 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3467 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3468 			"tl_conn_res: > 1 conn_ind on listener-acceptor"));
3469 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3470 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3471 		freemsg(mp);
3472 		tl_closeok(acc_ep);
3473 		tl_refrele(acc_ep);
3474 		return;
3475 	}
3476 
3477 	/*
3478 	 * Mark for deletion, the entry corresponding to client
3479 	 * on list of pending connections made by the listener
3480 	 *  search list to see if client is one of the
3481 	 * recorded as a listener.
3482 	 */
3483 	tip = tl_icon_find(tep, cres->SEQ_number);
3484 	if (tip == NULL) {
3485 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3486 			"tl_conn_res:no client in listener list"));
3487 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3488 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3489 		freemsg(mp);
3490 		tl_closeok(acc_ep);
3491 		tl_refrele(acc_ep);
3492 		return;
3493 	}
3494 
3495 	/*
3496 	 * If ti_tep is NULL the client has already closed. In this case
3497 	 * the code below will avoid any action on the client side
3498 	 * but complete the server and acceptor state transitions.
3499 	 */
3500 	ASSERT(tip->ti_tep == NULL ||
3501 		tip->ti_tep->te_seqno == cres->SEQ_number);
3502 	cl_ep = tip->ti_tep;
3503 
3504 	/*
3505 	 * If the client is present it is switched from listener's to acceptor's
3506 	 * serializer. We should block client closes while serializers are
3507 	 * being switched.
3508 	 *
3509 	 * It is possible that the client is present but is currently being
3510 	 * closed. There are two possible cases:
3511 	 *
3512 	 * 1) The client has already entered tl_close_finish_ser() and sent
3513 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3514 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3515 	 *
3516 	 * 2) The client started the close but has not entered
3517 	 *    tl_close_finish_ser() yet. In this case, the client is already
3518 	 *    proceeding asynchronously on the listener's serializer, so we're
3519 	 *    forced to change the acceptor to use the listener's serializer to
3520 	 *    ensure that any operations on the acceptor are serialized with
3521 	 *    respect to the close that's in-progress.
3522 	 */
3523 	if (cl_ep != NULL) {
3524 		if (tl_noclose(cl_ep)) {
3525 			client_noclose_set = B_TRUE;
3526 		} else {
3527 			/*
3528 			 * Client is closing. If it it has sent the
3529 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3530 			 * we have to let let the client continue until it is
3531 			 * sent.
3532 			 *
3533 			 * If we do continue using the client, acceptor will
3534 			 * switch to client's serializer which is used by client
3535 			 * for its close.
3536 			 */
3537 			tl_client_closing_when_accepting++;
3538 			switch_client_serializer = B_FALSE;
3539 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3540 			    cl_ep->te_state == -1)
3541 				cl_ep = NULL;
3542 		}
3543 	}
3544 
3545 	if (cl_ep != NULL) {
3546 		/*
3547 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3548 		 * (latter for sockets only)
3549 		 */
3550 		if (cl_ep->te_state != TS_WCON_CREQ &&
3551 		    (cl_ep->te_state != TS_DATA_XFER &&
3552 		    IS_SOCKET(cl_ep))) {
3553 			err = ECONNREFUSED;
3554 			/*
3555 			 * T_DISCON_IND sent later after committing memory
3556 			 * and acking validity of request
3557 			 */
3558 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3559 				"tl_conn_res:peer in bad state"));
3560 		}
3561 
3562 		/*
3563 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3564 		 * ack validity of request (T_OK_ACK) after memory committed
3565 		 */
3566 
3567 		if (err)
3568 			size = sizeof (struct T_discon_ind);
3569 		else {
3570 			/*
3571 			 * calculate length of T_CONN_CON message
3572 			 */
3573 			olen = 0;
3574 			if (cl_ep->te_flag & TL_SETCRED) {
3575 				olen = (t_scalar_t)sizeof (struct opthdr) +
3576 					OPTLEN(sizeof (tl_credopt_t));
3577 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3578 				olen = (t_scalar_t)sizeof (struct opthdr) +
3579 					OPTLEN(ucredsize);
3580 			}
3581 			size = T_ALIGN(sizeof (struct T_conn_con) +
3582 					acc_ep->te_alen) + olen;
3583 		}
3584 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3585 			/*
3586 			 * roll back state changes
3587 			 */
3588 			tep->te_state = TS_WRES_CIND;
3589 			tl_memrecover(wq, mp, size);
3590 			freemsg(ackmp);
3591 			if (client_noclose_set)
3592 				tl_closeok(cl_ep);
3593 			tl_closeok(acc_ep);
3594 			tl_refrele(acc_ep);
3595 			return;
3596 		}
3597 		mp = NULL;
3598 	}
3599 
3600 	/*
3601 	 * Now ack validity of request
3602 	 */
3603 	if (tep->te_nicon == 1) {
3604 		if (tep == acc_ep)
3605 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3606 		else
3607 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3608 	} else
3609 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3610 
3611 	/*
3612 	 * send T_DISCON_IND now if client state validation failed earlier
3613 	 */
3614 	if (err) {
3615 		tl_ok_ack(wq, ackmp, prim);
3616 		/*
3617 		 * flush the queues - why always ?
3618 		 */
3619 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3620 
3621 		dimp = tl_resizemp(respmp, size);
3622 		if (! dimp) {
3623 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3624 				SL_TRACE|SL_ERROR,
3625 				"tl_conn_res:con_ind:allocb failure"));
3626 			tl_merror(wq, respmp, ENOMEM);
3627 			tl_closeok(acc_ep);
3628 			if (client_noclose_set)
3629 				tl_closeok(cl_ep);
3630 			tl_refrele(acc_ep);
3631 			return;
3632 		}
3633 		if (dimp->b_cont) {
3634 			/* no user data in provider generated discon ind */
3635 			freemsg(dimp->b_cont);
3636 			dimp->b_cont = NULL;
3637 		}
3638 
3639 		DB_TYPE(dimp) = M_PROTO;
3640 		di = (struct T_discon_ind *)dimp->b_rptr;
3641 		di->PRIM_type  = T_DISCON_IND;
3642 		di->DISCON_reason = err;
3643 		di->SEQ_number = BADSEQNUM;
3644 
3645 		tep->te_state = TS_IDLE;
3646 		/*
3647 		 * send T_DISCON_IND message
3648 		 */
3649 		putnext(acc_ep->te_rq, dimp);
3650 		if (client_noclose_set)
3651 			tl_closeok(cl_ep);
3652 		tl_closeok(acc_ep);
3653 		tl_refrele(acc_ep);
3654 		return;
3655 	}
3656 
3657 	/*
3658 	 * now start connecting the accepting endpoint
3659 	 */
3660 	if (tep != acc_ep)
3661 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3662 
3663 	if (cl_ep == NULL) {
3664 		/*
3665 		 * The client has already closed. Send up any queued messages
3666 		 * and change the state accordingly.
3667 		 */
3668 		tl_ok_ack(wq, ackmp, prim);
3669 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3670 
3671 		/*
3672 		 * remove endpoint from incoming connection
3673 		 * delete client from list of incoming connections
3674 		 */
3675 		tl_freetip(tep, tip);
3676 		freemsg(mp);
3677 		tl_closeok(acc_ep);
3678 		tl_refrele(acc_ep);
3679 		return;
3680 	} else if (tip->ti_mp != NULL) {
3681 		/*
3682 		 * The client could have queued a T_DISCON_IND which needs
3683 		 * to be sent up.
3684 		 * Note that t_discon_req can not operate the same as
3685 		 * t_data_req since it is not possible for it to putbq
3686 		 * the message and return -1 due to the use of qwriter.
3687 		 */
3688 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3689 	}
3690 
3691 	/*
3692 	 * prepare connect confirm T_CONN_CON message
3693 	 */
3694 
3695 	/*
3696 	 * allocate the message - original data blocks
3697 	 * retained in the returned mblk
3698 	 */
3699 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3700 		ccmp = tl_resizemp(respmp, size);
3701 		if (ccmp == NULL) {
3702 			tl_ok_ack(wq, ackmp, prim);
3703 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3704 				    SL_TRACE|SL_ERROR,
3705 				    "tl_conn_res:conn_con:allocb failure"));
3706 			tl_merror(wq, respmp, ENOMEM);
3707 			tl_closeok(acc_ep);
3708 			if (client_noclose_set)
3709 				tl_closeok(cl_ep);
3710 			tl_refrele(acc_ep);
3711 			return;
3712 		}
3713 
3714 		DB_TYPE(ccmp) = M_PROTO;
3715 		cc = (struct T_conn_con *)ccmp->b_rptr;
3716 		cc->PRIM_type  = T_CONN_CON;
3717 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3718 		cc->RES_length = acc_ep->te_alen;
3719 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3720 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3721 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3722 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3723 			    cc->RES_length);
3724 			cc->OPT_length = olen;
3725 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3726 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag);
3727 		} else {
3728 			cc->OPT_offset = 0;
3729 			cc->OPT_length = 0;
3730 		}
3731 		/*
3732 		 * Forward the credential in the packet so it can be picked up
3733 		 * at the higher layers for more complete credential processing
3734 		 */
3735 		mblk_setcred(ccmp, acc_ep->te_credp);
3736 		DB_CPID(ccmp) = acc_ep->te_cpid;
3737 	} else {
3738 		freemsg(respmp);
3739 		respmp = NULL;
3740 	}
3741 
3742 	/*
3743 	 * make connection linking
3744 	 * accepting and client endpoints
3745 	 * No need to increment references:
3746 	 *	on client: it should already have one from tip->ti_tep linkage.
3747 	 *	on acceptor is should already have one from the table lookup.
3748 	 *
3749 	 * At this point both client and acceptor can't close. Set client
3750 	 * serializer to acceptor's.
3751 	 */
3752 	ASSERT(cl_ep->te_refcnt >= 2);
3753 	ASSERT(acc_ep->te_refcnt >= 2);
3754 	ASSERT(cl_ep->te_conp == NULL);
3755 	ASSERT(acc_ep->te_conp == NULL);
3756 	cl_ep->te_conp = acc_ep;
3757 	acc_ep->te_conp = cl_ep;
3758 	ASSERT(cl_ep->te_ser == tep->te_ser);
3759 	if (switch_client_serializer) {
3760 		mutex_enter(&cl_ep->te_ser_lock);
3761 		if (cl_ep->te_ser_count > 0) {
3762 			switch_client_serializer = B_FALSE;
3763 			tl_serializer_noswitch++;
3764 		} else {
3765 			/*
3766 			 * Move client to the acceptor's serializer.
3767 			 */
3768 			tl_serializer_refhold(acc_ep->te_ser);
3769 			tl_serializer_refrele(cl_ep->te_ser);
3770 			cl_ep->te_ser = acc_ep->te_ser;
3771 		}
3772 		mutex_exit(&cl_ep->te_ser_lock);
3773 	}
3774 	if (!switch_client_serializer) {
3775 		/*
3776 		 * It is not possible to switch client to use acceptor's.
3777 		 * Move acceptor to client's serializer (which is the same as
3778 		 * listener's).
3779 		 */
3780 		tl_serializer_refhold(cl_ep->te_ser);
3781 		tl_serializer_refrele(acc_ep->te_ser);
3782 		acc_ep->te_ser = cl_ep->te_ser;
3783 	}
3784 
3785 	TL_REMOVE_PEER(cl_ep->te_oconp);
3786 	TL_REMOVE_PEER(acc_ep->te_oconp);
3787 
3788 	/*
3789 	 * remove endpoint from incoming connection
3790 	 * delete client from list of incoming connections
3791 	 */
3792 	tip->ti_tep = NULL;
3793 	tl_freetip(tep, tip);
3794 	tl_ok_ack(wq, ackmp, prim);
3795 
3796 	/*
3797 	 * data blocks already linked in reallocb()
3798 	 */
3799 
3800 	/*
3801 	 * link queues so that I_SENDFD will work
3802 	 */
3803 	if (! IS_SOCKET(tep)) {
3804 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3805 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3806 	}
3807 
3808 	/*
3809 	 * send T_CONN_CON up on client side unless it was already
3810 	 * done (for a socket). In cases any data or ordrel req has been
3811 	 * queued make sure that the service procedure runs.
3812 	 */
3813 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3814 		enableok(cl_ep->te_wq);
3815 		TL_QENABLE(cl_ep);
3816 		if (ccmp != NULL)
3817 			freemsg(ccmp);
3818 	} else {
3819 		/*
3820 		 * change client state on TE_CONN_CON event
3821 		 */
3822 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3823 		putnext(cl_ep->te_rq, ccmp);
3824 	}
3825 
3826 	/* Mark the both endpoints as accepted */
3827 	cl_ep->te_flag |= TL_ACCEPTED;
3828 	acc_ep->te_flag |= TL_ACCEPTED;
3829 
3830 	/*
3831 	 * Allow client and acceptor to close.
3832 	 */
3833 	tl_closeok(acc_ep);
3834 	if (client_noclose_set)
3835 		tl_closeok(cl_ep);
3836 }
3837 
3838 
3839 
3840 
3841 static void
3842 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3843 {
3844 	queue_t			*wq;
3845 	struct T_discon_req	*dr;
3846 	ssize_t			msz;
3847 	tl_endpt_t		*peer_tep = tep->te_conp;
3848 	tl_endpt_t		*srv_tep = tep->te_oconp;
3849 	tl_icon_t		*tip;
3850 	size_t			size;
3851 	mblk_t			*ackmp, *dimp, *respmp;
3852 	struct T_discon_ind	*di;
3853 	t_scalar_t		save_state, new_state;
3854 
3855 	if (tep->te_closing) {
3856 		freemsg(mp);
3857 		return;
3858 	}
3859 
3860 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3861 		TL_UNCONNECT(tep->te_conp);
3862 		peer_tep = NULL;
3863 	}
3864 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3865 		TL_UNCONNECT(tep->te_oconp);
3866 		srv_tep = NULL;
3867 	}
3868 
3869 	wq = tep->te_wq;
3870 
3871 	/*
3872 	 * preallocate memory for:
3873 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3874 	 *	==> known max T_ERROR_ACK
3875 	 * 2. for  T_DISCON_IND
3876 	 */
3877 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3878 	if (! ackmp) {
3879 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3880 		return;
3881 	}
3882 	/*
3883 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3884 	 * will be committed for T_DISCON_IND  later
3885 	 */
3886 
3887 	dr = (struct T_discon_req *)mp->b_rptr;
3888 	msz = MBLKL(mp);
3889 
3890 	/*
3891 	 * validate the state
3892 	 */
3893 	save_state = new_state = tep->te_state;
3894 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3895 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3896 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3897 			SL_TRACE|SL_ERROR,
3898 			"tl_wput:T_DISCON_REQ:out of state, state=%d",
3899 			tep->te_state));
3900 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3901 		freemsg(mp);
3902 		return;
3903 	}
3904 	/*
3905 	 * Defer committing the state change until it is determined if
3906 	 * the message will be queued with the tl_icon or not.
3907 	 */
3908 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3909 
3910 	/* validate the message */
3911 	if (msz < sizeof (struct T_discon_req)) {
3912 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3913 			"tl_discon_req:invalid message"));
3914 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3915 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3916 		freemsg(mp);
3917 		return;
3918 	}
3919 
3920 	/*
3921 	 * if server, then validate that client exists
3922 	 * by connection sequence number etc.
3923 	 */
3924 	if (tep->te_nicon > 0) { /* server */
3925 
3926 		/*
3927 		 * search server list for disconnect client
3928 		 */
3929 		tip = tl_icon_find(tep, dr->SEQ_number);
3930 		if (tip == NULL) {
3931 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3932 				SL_TRACE|SL_ERROR,
3933 				"tl_discon_req:no disconnect endpoint"));
3934 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3935 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3936 			freemsg(mp);
3937 			return;
3938 		}
3939 		/*
3940 		 * If ti_tep is NULL the client has already closed. In this case
3941 		 * the code below will avoid any action on the client side.
3942 		 */
3943 
3944 		ASSERT(IMPLY(tip->ti_tep != NULL,
3945 			tip->ti_tep->te_seqno == dr->SEQ_number));
3946 		peer_tep = tip->ti_tep;
3947 	}
3948 
3949 	/*
3950 	 * preallocate now for T_DISCON_IND
3951 	 * ack validity of request (T_OK_ACK) after memory committed
3952 	 */
3953 	size = sizeof (struct T_discon_ind);
3954 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3955 		tl_memrecover(wq, mp, size);
3956 		freemsg(ackmp);
3957 		return;
3958 	}
3959 
3960 	/*
3961 	 * prepare message to ack validity of request
3962 	 */
3963 	if (tep->te_nicon == 0)
3964 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3965 	else
3966 		if (tep->te_nicon == 1)
3967 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3968 		else
3969 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3970 
3971 	/*
3972 	 * Flushing queues according to TPI. Using the old state.
3973 	 */
3974 	if ((tep->te_nicon <= 1) &&
3975 	    ((save_state == TS_DATA_XFER) ||
3976 	    (save_state == TS_WIND_ORDREL) ||
3977 	    (save_state == TS_WREQ_ORDREL)))
3978 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3979 
3980 	/* send T_OK_ACK up  */
3981 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3982 
3983 	/*
3984 	 * now do disconnect business
3985 	 */
3986 	if (tep->te_nicon > 0) { /* listener */
3987 		if (peer_tep != NULL && !peer_tep->te_closing) {
3988 			/*
3989 			 * disconnect incoming connect request pending to tep
3990 			 */
3991 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
3992 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
3993 					SL_TRACE|SL_ERROR,
3994 					"tl_discon_req: reallocb failed"));
3995 				tep->te_state = new_state;
3996 				tl_merror(wq, respmp, ENOMEM);
3997 				return;
3998 			}
3999 			di = (struct T_discon_ind *)dimp->b_rptr;
4000 			di->SEQ_number = BADSEQNUM;
4001 			save_state = peer_tep->te_state;
4002 			peer_tep->te_state = TS_IDLE;
4003 
4004 			TL_REMOVE_PEER(peer_tep->te_oconp);
4005 			enableok(peer_tep->te_wq);
4006 			TL_QENABLE(peer_tep);
4007 		} else {
4008 			freemsg(respmp);
4009 			dimp = NULL;
4010 		}
4011 
4012 		/*
4013 		 * remove endpoint from incoming connection list
4014 		 * - remove disconnect client from list on server
4015 		 */
4016 		tl_freetip(tep, tip);
4017 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4018 		/*
4019 		 * disconnect an outgoing request pending from tep
4020 		 */
4021 
4022 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4023 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4024 				SL_TRACE|SL_ERROR,
4025 				"tl_discon_req: reallocb failed"));
4026 			tep->te_state = new_state;
4027 			tl_merror(wq, respmp, ENOMEM);
4028 			return;
4029 		}
4030 		di = (struct T_discon_ind *)dimp->b_rptr;
4031 		DB_TYPE(dimp) = M_PROTO;
4032 		di->PRIM_type  = T_DISCON_IND;
4033 		di->DISCON_reason = ECONNRESET;
4034 		di->SEQ_number = tep->te_seqno;
4035 
4036 		/*
4037 		 * If this is a socket the T_DISCON_IND is queued with
4038 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4039 		 * from the list of pending connections.
4040 		 * Note that when te_oconp is set the peer better have
4041 		 * a t_connind_t for the client.
4042 		 */
4043 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4044 			/*
4045 			 * No need to check that
4046 			 * ti_tep == NULL since the T_DISCON_IND
4047 			 * takes precedence over other queued
4048 			 * messages.
4049 			 */
4050 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4051 			peer_tep = NULL;
4052 			dimp = NULL;
4053 			/*
4054 			 * Can't clear te_oconp since tl_co_unconnect needs
4055 			 * it as a hint not to free the tep.
4056 			 * Keep the state unchanged since tl_conn_res inspects
4057 			 * it.
4058 			 */
4059 			new_state = tep->te_state;
4060 		} else {
4061 			/* Found - delete it */
4062 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4063 			if (tip != NULL) {
4064 				ASSERT(tep == tip->ti_tep);
4065 				save_state = peer_tep->te_state;
4066 				if (peer_tep->te_nicon == 1)
4067 					peer_tep->te_state =
4068 					    NEXTSTATE(TE_DISCON_IND2,
4069 						peer_tep->te_state);
4070 				else
4071 					peer_tep->te_state =
4072 					    NEXTSTATE(TE_DISCON_IND3,
4073 						peer_tep->te_state);
4074 				tl_freetip(peer_tep, tip);
4075 			}
4076 			ASSERT(tep->te_oconp != NULL);
4077 			TL_UNCONNECT(tep->te_oconp);
4078 		}
4079 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4080 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4081 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4082 				SL_TRACE|SL_ERROR,
4083 				"tl_discon_req: reallocb failed"));
4084 			tep->te_state = new_state;
4085 			tl_merror(wq, respmp, ENOMEM);
4086 			return;
4087 		}
4088 		di = (struct T_discon_ind *)dimp->b_rptr;
4089 		di->SEQ_number = BADSEQNUM;
4090 
4091 		save_state = peer_tep->te_state;
4092 		peer_tep->te_state = TS_IDLE;
4093 	} else {
4094 		/* Not connected */
4095 		tep->te_state = new_state;
4096 		freemsg(respmp);
4097 		return;
4098 	}
4099 
4100 	/* Commit state changes */
4101 	tep->te_state = new_state;
4102 
4103 	if (peer_tep == NULL) {
4104 		ASSERT(dimp == NULL);
4105 		goto done;
4106 	}
4107 	/*
4108 	 * Flush queues on peer before sending up
4109 	 * T_DISCON_IND according to TPI
4110 	 */
4111 
4112 	if ((save_state == TS_DATA_XFER) ||
4113 	    (save_state == TS_WIND_ORDREL) ||
4114 	    (save_state == TS_WREQ_ORDREL))
4115 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4116 
4117 	DB_TYPE(dimp) = M_PROTO;
4118 	di->PRIM_type  = T_DISCON_IND;
4119 	di->DISCON_reason = ECONNRESET;
4120 
4121 	/*
4122 	 * data blocks already linked into dimp by reallocb()
4123 	 */
4124 	/*
4125 	 * send indication message to peer user module
4126 	 */
4127 	ASSERT(dimp != NULL);
4128 	putnext(peer_tep->te_rq, dimp);
4129 done:
4130 	if (tep->te_conp) {	/* disconnect pointers if connected */
4131 		ASSERT(! peer_tep->te_closing);
4132 
4133 		/*
4134 		 * Messages may be queued on peer's write queue
4135 		 * waiting to be processed by its write service
4136 		 * procedure. Before the pointer to the peer transport
4137 		 * structure is set to NULL, qenable the peer's write
4138 		 * queue so that the queued up messages are processed.
4139 		 */
4140 		if ((save_state == TS_DATA_XFER) ||
4141 		    (save_state == TS_WIND_ORDREL) ||
4142 		    (save_state == TS_WREQ_ORDREL))
4143 			TL_QENABLE(peer_tep);
4144 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4145 		TL_UNCONNECT(peer_tep->te_conp);
4146 		if (! IS_SOCKET(tep)) {
4147 			/*
4148 			 * unlink the streams
4149 			 */
4150 			tep->te_wq->q_next = NULL;
4151 			peer_tep->te_wq->q_next = NULL;
4152 		}
4153 		TL_UNCONNECT(tep->te_conp);
4154 	}
4155 }
4156 
4157 
4158 static void
4159 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4160 {
4161 	queue_t			*wq;
4162 	size_t			ack_sz;
4163 	mblk_t			*ackmp;
4164 	struct T_addr_ack	*taa;
4165 
4166 	if (tep->te_closing) {
4167 		freemsg(mp);
4168 		return;
4169 	}
4170 
4171 	wq = tep->te_wq;
4172 
4173 	/*
4174 	 * Note: T_ADDR_REQ message has only PRIM_type field
4175 	 * so it is already validated earlier.
4176 	 */
4177 
4178 	if (IS_CLTS(tep) ||
4179 	    (tep->te_state > TS_WREQ_ORDREL) ||
4180 	    (tep->te_state < TS_DATA_XFER)) {
4181 		/*
4182 		 * Either connectionless or connection oriented but not
4183 		 * in connected data transfer state or half-closed states.
4184 		 */
4185 		ack_sz = sizeof (struct T_addr_ack);
4186 		if (tep->te_state >= TS_IDLE)
4187 			/* is bound */
4188 			ack_sz += tep->te_alen;
4189 		ackmp = reallocb(mp, ack_sz, 0);
4190 		if (ackmp == NULL) {
4191 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4192 				SL_TRACE|SL_ERROR,
4193 				"tl_addr_req: reallocb failed"));
4194 			tl_memrecover(wq, mp, ack_sz);
4195 			return;
4196 		}
4197 
4198 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4199 
4200 		bzero(taa, sizeof (struct T_addr_ack));
4201 
4202 		taa->PRIM_type = T_ADDR_ACK;
4203 		ackmp->b_datap->db_type = M_PCPROTO;
4204 		ackmp->b_wptr = (uchar_t *)&taa[1];
4205 
4206 		if (tep->te_state >= TS_IDLE) {
4207 			/* endpoint is bound */
4208 			taa->LOCADDR_length = tep->te_alen;
4209 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4210 
4211 			bcopy(tep->te_abuf, ackmp->b_wptr,
4212 				tep->te_alen);
4213 			ackmp->b_wptr += tep->te_alen;
4214 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4215 		}
4216 
4217 		(void) qreply(wq, ackmp);
4218 	} else {
4219 		ASSERT(tep->te_state == TS_DATA_XFER ||
4220 			tep->te_state == TS_WIND_ORDREL ||
4221 			tep->te_state == TS_WREQ_ORDREL);
4222 		/* connection oriented in data transfer */
4223 		tl_connected_cots_addr_req(mp, tep);
4224 	}
4225 }
4226 
4227 
4228 static void
4229 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4230 {
4231 	tl_endpt_t		*peer_tep;
4232 	size_t			ack_sz;
4233 	mblk_t			*ackmp;
4234 	struct T_addr_ack	*taa;
4235 	uchar_t			*addr_startp;
4236 
4237 	if (tep->te_closing) {
4238 		freemsg(mp);
4239 		return;
4240 	}
4241 
4242 	ASSERT(tep->te_state >= TS_IDLE);
4243 
4244 	ack_sz = sizeof (struct T_addr_ack);
4245 	ack_sz += T_ALIGN(tep->te_alen);
4246 	peer_tep = tep->te_conp;
4247 	ack_sz += peer_tep->te_alen;
4248 
4249 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4250 	if (ackmp == NULL) {
4251 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4252 			"tl_connected_cots_addr_req: reallocb failed"));
4253 		tl_memrecover(tep->te_wq, mp, ack_sz);
4254 		return;
4255 	}
4256 
4257 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4258 
4259 	/* endpoint is bound */
4260 	taa->LOCADDR_length = tep->te_alen;
4261 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4262 
4263 	addr_startp = (uchar_t *)&taa[1];
4264 
4265 	bcopy(tep->te_abuf, addr_startp,
4266 	    tep->te_alen);
4267 
4268 	taa->REMADDR_length = peer_tep->te_alen;
4269 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4270 				    taa->LOCADDR_length);
4271 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4272 	bcopy(peer_tep->te_abuf, addr_startp,
4273 	    peer_tep->te_alen);
4274 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4275 	    taa->REMADDR_offset + peer_tep->te_alen;
4276 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4277 
4278 	putnext(tep->te_rq, ackmp);
4279 }
4280 
4281 static void
4282 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4283 {
4284 	if (IS_CLTS(tep)) {
4285 		*ia = tl_clts_info_ack;
4286 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4287 	} else {
4288 		*ia = tl_cots_info_ack;
4289 		if (IS_COTSORD(tep))
4290 			ia->SERV_type = T_COTS_ORD;
4291 	}
4292 	ia->TIDU_size = tl_tidusz;
4293 	ia->CURRENT_state = tep->te_state;
4294 }
4295 
4296 /*
4297  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4298  * tl_wput.
4299  */
4300 static void
4301 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4302 {
4303 	mblk_t			*ackmp;
4304 	t_uscalar_t		cap_bits1;
4305 	struct T_capability_ack	*tcap;
4306 
4307 	if (tep->te_closing) {
4308 		freemsg(mp);
4309 		return;
4310 	}
4311 
4312 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4313 
4314 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4315 	    M_PCPROTO, T_CAPABILITY_ACK);
4316 	if (ackmp == NULL) {
4317 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4318 			"tl_capability_req: reallocb failed"));
4319 		tl_memrecover(tep->te_wq, mp,
4320 		    sizeof (struct T_capability_ack));
4321 		return;
4322 	}
4323 
4324 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4325 	tcap->CAP_bits1 = 0;
4326 
4327 	if (cap_bits1 & TC1_INFO) {
4328 		tl_copy_info(&tcap->INFO_ack, tep);
4329 		tcap->CAP_bits1 |= TC1_INFO;
4330 	}
4331 
4332 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4333 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4334 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4335 	}
4336 
4337 	putnext(tep->te_rq, ackmp);
4338 }
4339 
4340 static void
4341 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4342 {
4343 	if (! tep->te_closing)
4344 		tl_info_req(mp, tep);
4345 	else
4346 		freemsg(mp);
4347 
4348 	tl_serializer_exit(tep);
4349 	tl_refrele(tep);
4350 }
4351 
4352 static void
4353 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4354 {
4355 	mblk_t *ackmp;
4356 
4357 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4358 	    M_PCPROTO, T_INFO_ACK);
4359 	if (ackmp == NULL) {
4360 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4361 			"tl_info_req: reallocb failed"));
4362 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4363 		return;
4364 	}
4365 
4366 	/*
4367 	 * fill in T_INFO_ACK contents
4368 	 */
4369 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4370 
4371 	/*
4372 	 * send ack message
4373 	 */
4374 	putnext(tep->te_rq, ackmp);
4375 }
4376 
4377 /*
4378  * Handle M_DATA, T_data_req and T_optdata_req.
4379  * If this is a socket pass through T_optdata_req options unmodified.
4380  */
4381 static void
4382 tl_data(mblk_t *mp, tl_endpt_t *tep)
4383 {
4384 	queue_t			*wq = tep->te_wq;
4385 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4386 	ssize_t			msz = MBLKL(mp);
4387 	tl_endpt_t		*peer_tep;
4388 	queue_t			*peer_rq;
4389 	boolean_t		closing = tep->te_closing;
4390 
4391 	if (IS_CLTS(tep)) {
4392 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4393 			    SL_TRACE|SL_ERROR,
4394 			    "tl_wput:clts:unattached M_DATA"));
4395 		if (!closing) {
4396 			tl_merror(wq, mp, EPROTO);
4397 		} else {
4398 			freemsg(mp);
4399 		}
4400 		return;
4401 	}
4402 
4403 	/*
4404 	 * If the endpoint is closing it should still forward any data to the
4405 	 * peer (if it has one). If it is not allowed to forward it can just
4406 	 * free the message.
4407 	 */
4408 	if (closing &&
4409 	    (tep->te_state != TS_DATA_XFER) &&
4410 	    (tep->te_state != TS_WREQ_ORDREL)) {
4411 		freemsg(mp);
4412 		return;
4413 	}
4414 
4415 	if (DB_TYPE(mp) == M_PROTO) {
4416 		if (prim->type == T_DATA_REQ &&
4417 		    msz < sizeof (struct T_data_req)) {
4418 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4419 				SL_TRACE|SL_ERROR,
4420 				"tl_data:T_DATA_REQ:invalid message"));
4421 			if (!closing) {
4422 				tl_merror(wq, mp, EPROTO);
4423 			} else {
4424 				freemsg(mp);
4425 			}
4426 			return;
4427 		} else if (prim->type == T_OPTDATA_REQ &&
4428 			    (msz < sizeof (struct T_optdata_req) ||
4429 			    !IS_SOCKET(tep))) {
4430 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4431 				SL_TRACE|SL_ERROR,
4432 				"tl_data:T_OPTDATA_REQ:invalid message"));
4433 			if (!closing) {
4434 				tl_merror(wq, mp, EPROTO);
4435 			} else {
4436 				freemsg(mp);
4437 			}
4438 			return;
4439 		}
4440 	}
4441 
4442 	/*
4443 	 * connection oriented provider
4444 	 */
4445 	switch (tep->te_state) {
4446 	case TS_IDLE:
4447 		/*
4448 		 * Other end not here - do nothing.
4449 		 */
4450 		freemsg(mp);
4451 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4452 			"tl_data:cots with endpoint idle"));
4453 		return;
4454 
4455 	case TS_DATA_XFER:
4456 		/* valid states */
4457 		if (tep->te_conp != NULL)
4458 			break;
4459 
4460 		if (tep->te_oconp == NULL) {
4461 			if (!closing) {
4462 				tl_merror(wq, mp, EPROTO);
4463 			} else {
4464 				freemsg(mp);
4465 			}
4466 			return;
4467 		}
4468 		/*
4469 		 * For a socket the T_CONN_CON is sent early thus
4470 		 * the peer might not yet have accepted the connection.
4471 		 * If we are closing queue the packet with the T_CONN_IND.
4472 		 * Otherwise defer processing the packet until the peer
4473 		 * accepts the connection.
4474 		 * Note that the queue is noenabled when we go into this
4475 		 * state.
4476 		 */
4477 		if (!closing) {
4478 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4479 				    SL_TRACE|SL_ERROR,
4480 				    "tl_data: ocon"));
4481 			TL_PUTBQ(tep, mp);
4482 			return;
4483 		}
4484 		if (DB_TYPE(mp) == M_PROTO) {
4485 			if (msz < sizeof (t_scalar_t)) {
4486 				freemsg(mp);
4487 				return;
4488 			}
4489 			/* reuse message block - just change REQ to IND */
4490 			if (prim->type == T_DATA_REQ)
4491 				prim->type = T_DATA_IND;
4492 			else
4493 				prim->type = T_OPTDATA_IND;
4494 		}
4495 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4496 		return;
4497 
4498 	case TS_WREQ_ORDREL:
4499 		if (tep->te_conp == NULL) {
4500 			/*
4501 			 * Other end closed - generate discon_ind
4502 			 * with reason 0 to cause an EPIPE but no
4503 			 * read side error on AF_UNIX sockets.
4504 			 */
4505 			freemsg(mp);
4506 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4507 				SL_TRACE|SL_ERROR,
4508 				"tl_data: WREQ_ORDREL and no peer"));
4509 			tl_discon_ind(tep, 0);
4510 			return;
4511 		}
4512 		break;
4513 
4514 	default:
4515 		/* invalid state for event TE_DATA_REQ */
4516 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4517 			"tl_data:cots:out of state"));
4518 		tl_merror(wq, mp, EPROTO);
4519 		return;
4520 	}
4521 	/*
4522 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4523 	 * (State stays same on this event)
4524 	 */
4525 
4526 	/*
4527 	 * get connected endpoint
4528 	 */
4529 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4530 		freemsg(mp);
4531 		/* Peer closed */
4532 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4533 			"tl_data: peer gone"));
4534 		return;
4535 	}
4536 
4537 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4538 	peer_rq = peer_tep->te_rq;
4539 
4540 	/*
4541 	 * Put it back if flow controlled
4542 	 * Note: Messages already on queue when we are closing is bounded
4543 	 * so we can ignore flow control.
4544 	 */
4545 	if (!canputnext(peer_rq) && !closing) {
4546 		TL_PUTBQ(tep, mp);
4547 		return;
4548 	}
4549 
4550 	/*
4551 	 * validate peer state
4552 	 */
4553 	switch (peer_tep->te_state) {
4554 	case TS_DATA_XFER:
4555 	case TS_WIND_ORDREL:
4556 		/* valid states */
4557 		break;
4558 	default:
4559 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4560 			"tl_data:rx side:invalid state"));
4561 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4562 		return;
4563 	}
4564 	if (DB_TYPE(mp) == M_PROTO) {
4565 		/* reuse message block - just change REQ to IND */
4566 		if (prim->type == T_DATA_REQ)
4567 			prim->type = T_DATA_IND;
4568 		else
4569 			prim->type = T_OPTDATA_IND;
4570 	}
4571 	/*
4572 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4573 	 * (peer state stays same on this event)
4574 	 */
4575 	/*
4576 	 * send data to connected peer
4577 	 */
4578 	putnext(peer_rq, mp);
4579 }
4580 
4581 
4582 
4583 static void
4584 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4585 {
4586 	queue_t			*wq = tep->te_wq;
4587 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4588 	ssize_t			msz = MBLKL(mp);
4589 	tl_endpt_t		*peer_tep;
4590 	queue_t			*peer_rq;
4591 	boolean_t		closing = tep->te_closing;
4592 
4593 	if (msz < sizeof (struct T_exdata_req)) {
4594 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4595 			"tl_exdata:invalid message"));
4596 		if (!closing) {
4597 			tl_merror(wq, mp, EPROTO);
4598 		} else {
4599 			freemsg(mp);
4600 		}
4601 		return;
4602 	}
4603 
4604 	/*
4605 	 * If the endpoint is closing it should still forward any data to the
4606 	 * peer (if it has one). If it is not allowed to forward it can just
4607 	 * free the message.
4608 	 */
4609 	if (closing &&
4610 	    (tep->te_state != TS_DATA_XFER) &&
4611 	    (tep->te_state != TS_WREQ_ORDREL)) {
4612 		freemsg(mp);
4613 		return;
4614 	}
4615 
4616 	/*
4617 	 * validate state
4618 	 */
4619 	switch (tep->te_state) {
4620 	case TS_IDLE:
4621 		/*
4622 		 * Other end not here - do nothing.
4623 		 */
4624 		freemsg(mp);
4625 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4626 			"tl_exdata:cots with endpoint idle"));
4627 		return;
4628 
4629 	case TS_DATA_XFER:
4630 		/* valid states */
4631 		if (tep->te_conp != NULL)
4632 			break;
4633 
4634 		if (tep->te_oconp == NULL) {
4635 			if (!closing) {
4636 				tl_merror(wq, mp, EPROTO);
4637 			} else {
4638 				freemsg(mp);
4639 			}
4640 			return;
4641 		}
4642 		/*
4643 		 * For a socket the T_CONN_CON is sent early thus
4644 		 * the peer might not yet have accepted the connection.
4645 		 * If we are closing queue the packet with the T_CONN_IND.
4646 		 * Otherwise defer processing the packet until the peer
4647 		 * accepts the connection.
4648 		 * Note that the queue is noenabled when we go into this
4649 		 * state.
4650 		 */
4651 		if (!closing) {
4652 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4653 				    SL_TRACE|SL_ERROR,
4654 				    "tl_exdata: ocon"));
4655 			TL_PUTBQ(tep, mp);
4656 			return;
4657 		}
4658 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4659 			    "tl_exdata: closing socket ocon"));
4660 		prim->type = T_EXDATA_IND;
4661 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4662 		return;
4663 
4664 	case TS_WREQ_ORDREL:
4665 		if (tep->te_conp == NULL) {
4666 			/*
4667 			 * Other end closed - generate discon_ind
4668 			 * with reason 0 to cause an EPIPE but no
4669 			 * read side error on AF_UNIX sockets.
4670 			 */
4671 			freemsg(mp);
4672 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4673 				SL_TRACE|SL_ERROR,
4674 				"tl_exdata: WREQ_ORDREL and no peer"));
4675 			tl_discon_ind(tep, 0);
4676 			return;
4677 		}
4678 		break;
4679 
4680 	default:
4681 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4682 			SL_TRACE|SL_ERROR,
4683 			"tl_wput:T_EXDATA_REQ:out of state, state=%d",
4684 			tep->te_state));
4685 		tl_merror(wq, mp, EPROTO);
4686 		return;
4687 	}
4688 	/*
4689 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4690 	 * (state stays same on this event)
4691 	 */
4692 
4693 	/*
4694 	 * get connected endpoint
4695 	 */
4696 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4697 		freemsg(mp);
4698 		/* Peer closed */
4699 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4700 			"tl_exdata: peer gone"));
4701 		return;
4702 	}
4703 
4704 	peer_rq = peer_tep->te_rq;
4705 
4706 	/*
4707 	 * Put it back if flow controlled
4708 	 * Note: Messages already on queue when we are closing is bounded
4709 	 * so we can ignore flow control.
4710 	 */
4711 	if (!canputnext(peer_rq) && !closing) {
4712 		TL_PUTBQ(tep, mp);
4713 		return;
4714 	}
4715 
4716 	/*
4717 	 * validate state on peer
4718 	 */
4719 	switch (peer_tep->te_state) {
4720 	case TS_DATA_XFER:
4721 	case TS_WIND_ORDREL:
4722 		/* valid states */
4723 		break;
4724 	default:
4725 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4726 			"tl_exdata:rx side:invalid state"));
4727 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4728 		return;
4729 	}
4730 	/*
4731 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4732 	 * (peer state stays same on this event)
4733 	 */
4734 	/*
4735 	 * reuse message block
4736 	 */
4737 	prim->type = T_EXDATA_IND;
4738 
4739 	/*
4740 	 * send data to connected peer
4741 	 */
4742 	putnext(peer_rq, mp);
4743 }
4744 
4745 
4746 
4747 static void
4748 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4749 {
4750 	queue_t			*wq =  tep->te_wq;
4751 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4752 	ssize_t			msz = MBLKL(mp);
4753 	tl_endpt_t		*peer_tep;
4754 	queue_t			*peer_rq;
4755 	boolean_t		closing = tep->te_closing;
4756 
4757 	if (msz < sizeof (struct T_ordrel_req)) {
4758 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4759 			"tl_ordrel:invalid message"));
4760 		if (!closing) {
4761 			tl_merror(wq, mp, EPROTO);
4762 		} else {
4763 			freemsg(mp);
4764 		}
4765 		return;
4766 	}
4767 
4768 	/*
4769 	 * validate state
4770 	 */
4771 	switch (tep->te_state) {
4772 	case TS_DATA_XFER:
4773 	case TS_WREQ_ORDREL:
4774 		/* valid states */
4775 		if (tep->te_conp != NULL)
4776 			break;
4777 
4778 		if (tep->te_oconp == NULL)
4779 			break;
4780 
4781 		/*
4782 		 * For a socket the T_CONN_CON is sent early thus
4783 		 * the peer might not yet have accepted the connection.
4784 		 * If we are closing queue the packet with the T_CONN_IND.
4785 		 * Otherwise defer processing the packet until the peer
4786 		 * accepts the connection.
4787 		 * Note that the queue is noenabled when we go into this
4788 		 * state.
4789 		 */
4790 		if (!closing) {
4791 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4792 				SL_TRACE|SL_ERROR,
4793 				"tl_ordlrel: ocon"));
4794 			TL_PUTBQ(tep, mp);
4795 			return;
4796 		}
4797 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4798 			"tl_ordlrel: closing socket ocon"));
4799 		prim->type = T_ORDREL_IND;
4800 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4801 		return;
4802 
4803 	default:
4804 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4805 			SL_TRACE|SL_ERROR,
4806 			"tl_wput:T_ORDREL_REQ:out of state, state=%d",
4807 			tep->te_state));
4808 		if (!closing) {
4809 			tl_merror(wq, mp, EPROTO);
4810 		} else {
4811 			freemsg(mp);
4812 		}
4813 		return;
4814 	}
4815 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4816 
4817 	/*
4818 	 * get connected endpoint
4819 	 */
4820 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4821 		/* Peer closed */
4822 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4823 			"tl_ordrel: peer gone"));
4824 		freemsg(mp);
4825 		return;
4826 	}
4827 
4828 	peer_rq = peer_tep->te_rq;
4829 
4830 	/*
4831 	 * Put it back if flow controlled except when we are closing.
4832 	 * Note: Messages already on queue when we are closing is bounded
4833 	 * so we can ignore flow control.
4834 	 */
4835 	if (! canputnext(peer_rq) && !closing) {
4836 		TL_PUTBQ(tep, mp);
4837 		return;
4838 	}
4839 
4840 	/*
4841 	 * validate state on peer
4842 	 */
4843 	switch (peer_tep->te_state) {
4844 	case TS_DATA_XFER:
4845 	case TS_WIND_ORDREL:
4846 		/* valid states */
4847 		break;
4848 	default:
4849 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4850 			"tl_ordrel:rx side:invalid state"));
4851 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4852 		return;
4853 	}
4854 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4855 
4856 	/*
4857 	 * reuse message block
4858 	 */
4859 	prim->type = T_ORDREL_IND;
4860 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4861 		"tl_ordrel: send ordrel_ind"));
4862 
4863 	/*
4864 	 * send data to connected peer
4865 	 */
4866 	putnext(peer_rq, mp);
4867 }
4868 
4869 
4870 /*
4871  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4872  */
4873 static void
4874 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4875 {
4876 	size_t			err_sz;
4877 	tl_endpt_t		*tep;
4878 	struct T_unitdata_req	*udreq;
4879 	mblk_t			*err_mp;
4880 	t_scalar_t		alen;
4881 	t_scalar_t		olen;
4882 	struct T_uderror_ind	*uderr;
4883 	uchar_t			*addr_startp;
4884 
4885 	err_sz = sizeof (struct T_uderror_ind);
4886 	tep = (tl_endpt_t *)wq->q_ptr;
4887 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4888 	alen = udreq->DEST_length;
4889 	olen = udreq->OPT_length;
4890 
4891 	if (alen > 0)
4892 		err_sz = T_ALIGN(err_sz + alen);
4893 	if (olen > 0)
4894 		err_sz += olen;
4895 
4896 	err_mp = allocb(err_sz, BPRI_MED);
4897 	if (! err_mp) {
4898 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4899 			"tl_uderr:allocb failure"));
4900 		/*
4901 		 * Note: no rollback of state needed as it does
4902 		 * not change in connectionless transport
4903 		 */
4904 		tl_memrecover(wq, mp, err_sz);
4905 		return;
4906 	}
4907 
4908 	DB_TYPE(err_mp) = M_PROTO;
4909 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4910 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4911 	uderr->PRIM_type = T_UDERROR_IND;
4912 	uderr->ERROR_type = err;
4913 	uderr->DEST_length = alen;
4914 	uderr->OPT_length = olen;
4915 	if (alen <= 0) {
4916 		uderr->DEST_offset = 0;
4917 	} else {
4918 		uderr->DEST_offset =
4919 			(t_scalar_t)sizeof (struct T_uderror_ind);
4920 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4921 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4922 			(size_t)alen);
4923 	}
4924 	if (olen <= 0) {
4925 		uderr->OPT_offset = 0;
4926 	} else {
4927 		uderr->OPT_offset =
4928 			(t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4929 						uderr->DEST_length);
4930 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4931 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4932 			(size_t)olen);
4933 	}
4934 	freemsg(mp);
4935 
4936 	/*
4937 	 * send indication message
4938 	 */
4939 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4940 
4941 	qreply(wq, err_mp);
4942 }
4943 
4944 static void
4945 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4946 {
4947 	queue_t *wq = tep->te_wq;
4948 
4949 	if (!tep->te_closing && (wq->q_first != NULL)) {
4950 		TL_PUTQ(tep, mp);
4951 	} else if (tep->te_rq != NULL)
4952 		tl_unitdata(mp, tep);
4953 	else
4954 		freemsg(mp);
4955 
4956 	tl_serializer_exit(tep);
4957 	tl_refrele(tep);
4958 }
4959 
4960 /*
4961  * Handle T_unitdata_req.
4962  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4963  * If this is a socket pass through options unmodified.
4964  */
4965 static void
4966 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4967 {
4968 	queue_t			*wq = tep->te_wq;
4969 	soux_addr_t		ux_addr;
4970 	tl_addr_t		destaddr;
4971 	uchar_t			*addr_startp;
4972 	tl_endpt_t		*peer_tep;
4973 	struct T_unitdata_ind	*udind;
4974 	struct T_unitdata_req	*udreq;
4975 	ssize_t			msz, ui_sz;
4976 	t_scalar_t		alen, aoff, olen, ooff;
4977 	t_scalar_t		oldolen = 0;
4978 
4979 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4980 	msz = MBLKL(mp);
4981 
4982 	/*
4983 	 * validate the state
4984 	 */
4985 	if (tep->te_state != TS_IDLE) {
4986 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4987 			SL_TRACE|SL_ERROR,
4988 			"tl_wput:T_CONN_REQ:out of state"));
4989 		tl_merror(wq, mp, EPROTO);
4990 		return;
4991 	}
4992 	/*
4993 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
4994 	 * (state does not change on this event)
4995 	 */
4996 
4997 	/*
4998 	 * validate the message
4999 	 * Note: dereference fields in struct inside message only
5000 	 * after validating the message length.
5001 	 */
5002 	if (msz < sizeof (struct T_unitdata_req)) {
5003 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5004 			"tl_unitdata:invalid message length"));
5005 		tl_merror(wq, mp, EINVAL);
5006 		return;
5007 	}
5008 	alen = udreq->DEST_length;
5009 	aoff = udreq->DEST_offset;
5010 	oldolen = olen = udreq->OPT_length;
5011 	ooff = udreq->OPT_offset;
5012 	if (olen == 0)
5013 		ooff = 0;
5014 
5015 	if (IS_SOCKET(tep)) {
5016 		if ((alen != TL_SOUX_ADDRLEN) ||
5017 		    (aoff < 0) ||
5018 		    (aoff + alen > msz) ||
5019 		    (olen < 0) || (ooff < 0) ||
5020 		    ((olen > 0) && ((ooff + olen) > msz))) {
5021 			(void) (STRLOG(TL_ID, tep->te_minor,
5022 				    1, SL_TRACE|SL_ERROR,
5023 				    "tl_unitdata_req: invalid socket addr "
5024 				    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5025 				    (int)msz, alen, aoff, olen, ooff));
5026 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5027 			return;
5028 		}
5029 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5030 
5031 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5032 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5033 			(void) (STRLOG(TL_ID, tep->te_minor,
5034 				    1, SL_TRACE|SL_ERROR,
5035 				    "tl_conn_req: invalid socket magic"));
5036 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5037 			return;
5038 		}
5039 	} else {
5040 		if ((alen < 0) ||
5041 		    (aoff < 0) ||
5042 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5043 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5044 		    ((aoff + alen) < 0) ||
5045 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5046 		    (olen < 0) ||
5047 		    (ooff < 0) ||
5048 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5049 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5050 				    SL_TRACE|SL_ERROR,
5051 				    "tl_unitdata:invalid unit data message"));
5052 			tl_merror(wq, mp, EINVAL);
5053 			return;
5054 		}
5055 	}
5056 
5057 	/* Options not supported unless it's a socket */
5058 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5059 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5060 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5061 		tl_uderr(wq, mp, EPROTO);
5062 		return;
5063 	}
5064 #ifdef DEBUG
5065 	/*
5066 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5067 	 * if (! assertion)
5068 	 *	log warning;
5069 	 */
5070 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5071 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5072 			"tl_unitdata:addr overlaps TPI message"));
5073 	}
5074 #endif
5075 	/*
5076 	 * get destination endpoint
5077 	 */
5078 	destaddr.ta_alen = alen;
5079 	destaddr.ta_abuf = mp->b_rptr + aoff;
5080 	destaddr.ta_zoneid = tep->te_zoneid;
5081 
5082 	/*
5083 	 * Check whether the destination is the same that was used previously
5084 	 * and the destination endpoint is in the right state. If something is
5085 	 * wrong, find destination again and cache it.
5086 	 */
5087 	peer_tep = tep->te_lastep;
5088 
5089 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5090 	    (peer_tep->te_state != TS_IDLE) ||
5091 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5092 		/*
5093 		 * Not the same as cached destination , need to find the right
5094 		 * destination.
5095 		 */
5096 		peer_tep = (IS_SOCKET(tep) ?
5097 		    tl_sock_find_peer(tep, &ux_addr) :
5098 		    tl_find_peer(tep, &destaddr));
5099 
5100 		if (peer_tep == NULL) {
5101 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5102 				SL_TRACE|SL_ERROR,
5103 				"tl_unitdata:no one at destination address"));
5104 			tl_uderr(wq, mp, ECONNRESET);
5105 			return;
5106 		}
5107 
5108 		/*
5109 		 * Cache the new peer.
5110 		 */
5111 		if (tep->te_lastep != NULL)
5112 			tl_refrele(tep->te_lastep);
5113 
5114 		tep->te_lastep = peer_tep;
5115 	}
5116 
5117 	if (peer_tep->te_state != TS_IDLE) {
5118 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5119 			"tl_unitdata:provider in invalid state"));
5120 		tl_uderr(wq, mp, EPROTO);
5121 		return;
5122 	}
5123 
5124 	ASSERT(peer_tep->te_rq != NULL);
5125 
5126 	/*
5127 	 * Put it back if flow controlled except when we are closing.
5128 	 * Note: Messages already on queue when we are closing is bounded
5129 	 * so we can ignore flow control.
5130 	 */
5131 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5132 		/* record what we are flow controlled on */
5133 		if (tep->te_flowq != NULL) {
5134 			list_remove(&tep->te_flowq->te_flowlist, tep);
5135 		}
5136 		list_insert_head(&peer_tep->te_flowlist, tep);
5137 		tep->te_flowq = peer_tep;
5138 		TL_PUTBQ(tep, mp);
5139 		return;
5140 	}
5141 	/*
5142 	 * prepare indication message
5143 	 */
5144 
5145 	/*
5146 	 * calculate length of message
5147 	 */
5148 	if (peer_tep->te_flag & TL_SETCRED) {
5149 		ASSERT(olen == 0);
5150 		olen = (t_scalar_t)sizeof (struct opthdr) +
5151 				OPTLEN(sizeof (tl_credopt_t));
5152 					/* 1 option only */
5153 	} else if (peer_tep->te_flag & TL_SETUCRED) {
5154 		ASSERT(olen == 0);
5155 		olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredsize);
5156 					/* 1 option only */
5157 	} else if (peer_tep->te_flag & TL_SOCKUCRED) {
5158 		/* Possibly more than one option */
5159 		olen += (t_scalar_t)sizeof (struct T_opthdr) +
5160 		    OPTLEN(ucredsize);
5161 	}
5162 
5163 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5164 		olen;
5165 	/*
5166 	 * If the unitdata_ind fits and we are not adding options
5167 	 * reuse the udreq mblk.
5168 	 */
5169 	if (msz >= ui_sz && alen >= tep->te_alen &&
5170 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5171 		/*
5172 		 * Reuse the original mblk. Leave options in place.
5173 		 */
5174 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5175 		udind->PRIM_type = T_UNITDATA_IND;
5176 		udind->SRC_length = tep->te_alen;
5177 		addr_startp = mp->b_rptr + udind->SRC_offset;
5178 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5179 	} else {
5180 		/* Allocate a new T_unidata_ind message */
5181 		mblk_t *ui_mp;
5182 
5183 		ui_mp = allocb(ui_sz, BPRI_MED);
5184 		if (! ui_mp) {
5185 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5186 				"tl_unitdata:allocb failure:message queued"));
5187 			tl_memrecover(wq, mp, ui_sz);
5188 			return;
5189 		}
5190 
5191 		/*
5192 		 * fill in T_UNITDATA_IND contents
5193 		 */
5194 		DB_TYPE(ui_mp) = M_PROTO;
5195 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5196 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5197 		udind->PRIM_type = T_UNITDATA_IND;
5198 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5199 		udind->SRC_length = tep->te_alen;
5200 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5201 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5202 		udind->OPT_offset =
5203 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5204 		udind->OPT_length = olen;
5205 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5206 			if (oldolen != 0) {
5207 				bcopy((void *)((uintptr_t)udreq + ooff),
5208 				    (void *)((uintptr_t)udind +
5209 				    udind->OPT_offset),
5210 				    oldolen);
5211 			}
5212 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5213 			    oldolen,
5214 			    DB_CREDDEF(mp, tep->te_credp), TLPID(mp, tep),
5215 			    peer_tep->te_flag);
5216 		} else {
5217 			bcopy((void *)((uintptr_t)udreq + ooff),
5218 				(void *)((uintptr_t)udind + udind->OPT_offset),
5219 				olen);
5220 		}
5221 
5222 		/*
5223 		 * relink data blocks from mp to ui_mp
5224 		 */
5225 		ui_mp->b_cont = mp->b_cont;
5226 		freeb(mp);
5227 		mp = ui_mp;
5228 	}
5229 	/*
5230 	 * send indication message
5231 	 */
5232 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5233 	putnext(peer_tep->te_rq, mp);
5234 }
5235 
5236 
5237 
5238 /*
5239  * Check if a given addr is in use.
5240  * Endpoint ptr returned or NULL if not found.
5241  * The name space is separate for each mode. This implies that
5242  * sockets get their own name space.
5243  */
5244 static tl_endpt_t *
5245 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5246 {
5247 	tl_endpt_t *peer_tep = NULL;
5248 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5249 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5250 
5251 	ASSERT(! IS_SOCKET(tep));
5252 
5253 	ASSERT(ap != NULL && ap->ta_alen > 0);
5254 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5255 	ASSERT(ap->ta_abuf != NULL);
5256 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5257 	ASSERT(IMPLY(rc == 0,
5258 		(tep->te_zoneid == peer_tep->te_zoneid) &&
5259 		(tep->te_transport == peer_tep->te_transport)));
5260 
5261 	if ((rc == 0) && (peer_tep->te_closing)) {
5262 		tl_refrele(peer_tep);
5263 		peer_tep = NULL;
5264 	}
5265 
5266 	return (peer_tep);
5267 }
5268 
5269 /*
5270  * Find peer for a socket based on unix domain address.
5271  * For implicit addresses our peer can be found by minor number in ai hash. For
5272  * explici binds we look vnode address at addr_hash.
5273  */
5274 static tl_endpt_t *
5275 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5276 {
5277 	tl_endpt_t *peer_tep = NULL;
5278 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5279 	    tep->te_aihash : tep->te_addrhash;
5280 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5281 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5282 
5283 	ASSERT(IS_SOCKET(tep));
5284 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5285 	ASSERT(IMPLY(rc == 0,
5286 		(tep->te_zoneid == peer_tep->te_zoneid) &&
5287 		(tep->te_transport == peer_tep->te_transport)));
5288 	/*
5289 	 * Don't attempt to use closing peer.
5290 	 */
5291 	if ((peer_tep != NULL) &&
5292 	    (peer_tep->te_closing ||
5293 		(peer_tep->te_zoneid != tep->te_zoneid))) {
5294 		tl_refrele(peer_tep);
5295 		peer_tep = NULL;
5296 	}
5297 
5298 	return (peer_tep);
5299 }
5300 
5301 /*
5302  * Generate a free addr and return it in struct pointed by ap
5303  * but allocating space for address buffer.
5304  * The generated address will be at least 4 bytes long and, if req->ta_alen
5305  * exceeds 4 bytes, be req->ta_alen bytes long.
5306  *
5307  * If address is found it will be inserted in the hash.
5308  *
5309  * If req->ta_alen is larger than the default alen (4 bytes) the last
5310  * alen-4 bytes will always be the same as in req.
5311  *
5312  * Return 0 for failure.
5313  * Return non-zero for success.
5314  */
5315 static boolean_t
5316 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5317 {
5318 	t_scalar_t	alen;
5319 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5320 
5321 	ASSERT(tep->te_hash_hndl != NULL);
5322 	ASSERT(! IS_SOCKET(tep));
5323 
5324 	if (tep->te_hash_hndl == NULL)
5325 		return (B_FALSE);
5326 
5327 	/*
5328 	 * check if default addr is in use
5329 	 * if it is - bump it and try again
5330 	 */
5331 	if (req == NULL) {
5332 		alen = sizeof (uint32_t);
5333 	} else {
5334 		alen = max(req->ta_alen, sizeof (uint32_t));
5335 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5336 	}
5337 
5338 	if (tep->te_alen < alen) {
5339 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5340 
5341 		/*
5342 		 * Not enough space in tep->ta_ap to hold the address,
5343 		 * allocate a bigger space.
5344 		 */
5345 		if (abuf == NULL)
5346 			return (B_FALSE);
5347 
5348 		if (tep->te_alen > 0)
5349 			kmem_free(tep->te_abuf, tep->te_alen);
5350 
5351 		tep->te_alen = alen;
5352 		tep->te_abuf = abuf;
5353 	}
5354 
5355 	/* Copy in the address in req */
5356 	if (req != NULL) {
5357 		ASSERT(alen >= req->ta_alen);
5358 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5359 	}
5360 
5361 	/*
5362 	 * First try minor number then try default addresses.
5363 	 */
5364 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5365 
5366 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5367 		if (mod_hash_insert_reserve(tep->te_addrhash,
5368 			(mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5369 			tep->te_hash_hndl) == 0) {
5370 			/*
5371 			 * found free address
5372 			 */
5373 			tep->te_flag |= TL_ADDRHASHED;
5374 			tep->te_hash_hndl = NULL;
5375 
5376 			return (B_TRUE); /* successful return */
5377 		}
5378 		/*
5379 		 * Use default address.
5380 		 */
5381 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5382 		atomic_add_32(&tep->te_defaddr, 1);
5383 	}
5384 
5385 	/*
5386 	 * Failed to find anything.
5387 	 */
5388 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5389 		"tl_get_any_addr:looped 2^32 times"));
5390 	return (B_FALSE);
5391 }
5392 
5393 /*
5394  * reallocb + set r/w ptrs to reflect size.
5395  */
5396 static mblk_t *
5397 tl_resizemp(mblk_t *mp, ssize_t new_size)
5398 {
5399 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5400 		return (NULL);
5401 
5402 	mp->b_rptr = DB_BASE(mp);
5403 	mp->b_wptr = mp->b_rptr + new_size;
5404 	return (mp);
5405 }
5406 
5407 static void
5408 tl_cl_backenable(tl_endpt_t *tep)
5409 {
5410 	list_t *l = &tep->te_flowlist;
5411 	tl_endpt_t *elp;
5412 
5413 	ASSERT(IS_CLTS(tep));
5414 
5415 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5416 		ASSERT(tep->te_ser == elp->te_ser);
5417 		ASSERT(elp->te_flowq == tep);
5418 		if (! elp->te_closing)
5419 			TL_QENABLE(elp);
5420 		elp->te_flowq = NULL;
5421 		list_remove(l, elp);
5422 	}
5423 }
5424 
5425 /*
5426  * Unconnect endpoints.
5427  */
5428 static void
5429 tl_co_unconnect(tl_endpt_t *tep)
5430 {
5431 	tl_endpt_t	*peer_tep = tep->te_conp;
5432 	tl_endpt_t	*srv_tep = tep->te_oconp;
5433 	list_t		*l;
5434 	tl_icon_t  	*tip;
5435 	tl_endpt_t	*cl_tep;
5436 	mblk_t		*d_mp;
5437 
5438 	ASSERT(IS_COTS(tep));
5439 	/*
5440 	 * If our peer is closing, don't use it.
5441 	 */
5442 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5443 		TL_UNCONNECT(tep->te_conp);
5444 		peer_tep = NULL;
5445 	}
5446 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5447 		TL_UNCONNECT(tep->te_oconp);
5448 		srv_tep = NULL;
5449 	}
5450 
5451 	if (tep->te_nicon > 0) {
5452 		l = &tep->te_iconp;
5453 		/*
5454 		 * If incoming requests pending, change state
5455 		 * of clients on disconnect ind event and send
5456 		 * discon_ind pdu to modules above them
5457 		 * for server: all clients get disconnect
5458 		 */
5459 
5460 		while (tep->te_nicon > 0) {
5461 			tip    = list_head(l);
5462 			cl_tep = tip->ti_tep;
5463 
5464 			if (cl_tep == NULL) {
5465 				tl_freetip(tep, tip);
5466 				continue;
5467 			}
5468 
5469 			if (cl_tep->te_oconp != NULL) {
5470 				ASSERT(cl_tep != cl_tep->te_oconp);
5471 				TL_UNCONNECT(cl_tep->te_oconp);
5472 			}
5473 
5474 			if (cl_tep->te_closing) {
5475 				tl_freetip(tep, tip);
5476 				continue;
5477 			}
5478 
5479 			enableok(cl_tep->te_wq);
5480 			TL_QENABLE(cl_tep);
5481 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5482 			if (d_mp != NULL) {
5483 				cl_tep->te_state = TS_IDLE;
5484 				putnext(cl_tep->te_rq, d_mp);
5485 			} else {
5486 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5487 					    SL_TRACE|SL_ERROR,
5488 					    "tl_co_unconnect:icmng: "
5489 					    "allocb failure"));
5490 			}
5491 			tl_freetip(tep, tip);
5492 		}
5493 	} else if (srv_tep != NULL) {
5494 		/*
5495 		 * If outgoing request pending, change state
5496 		 * of server on discon ind event
5497 		 */
5498 
5499 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5500 		    IS_COTSORD(srv_tep) &&
5501 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5502 			/*
5503 			 * Queue ordrel_ind for server to be picked up
5504 			 * when the connection is accepted.
5505 			 */
5506 			d_mp = tl_ordrel_ind_alloc();
5507 		} else {
5508 			/*
5509 			 * send discon_ind to server
5510 			 */
5511 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5512 		}
5513 		if (d_mp == NULL) {
5514 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5515 				SL_TRACE|SL_ERROR,
5516 				"tl_co_unconnect:outgoing:allocb failure"));
5517 			TL_UNCONNECT(tep->te_oconp);
5518 			goto discon_peer;
5519 		}
5520 
5521 		/*
5522 		 * If this is a socket the T_DISCON_IND is queued with
5523 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5524 		 * from the list of pending connections.
5525 		 * Note that when te_oconp is set the peer better have
5526 		 * a t_connind_t for the client.
5527 		 */
5528 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5529 			/*
5530 			 * Queue the disconnection message.
5531 			 */
5532 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5533 		} else {
5534 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5535 			if (tip == NULL) {
5536 				freemsg(d_mp);
5537 			} else {
5538 				ASSERT(tep == tip->ti_tep);
5539 				ASSERT(tep->te_ser == srv_tep->te_ser);
5540 				/*
5541 				 * Delete tip from the server list.
5542 				 */
5543 				if (srv_tep->te_nicon == 1) {
5544 					srv_tep->te_state =
5545 					    NEXTSTATE(TE_DISCON_IND2,
5546 						srv_tep->te_state);
5547 				} else {
5548 					srv_tep->te_state =
5549 					    NEXTSTATE(TE_DISCON_IND3,
5550 						srv_tep->te_state);
5551 				}
5552 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5553 				    T_DISCON_IND);
5554 				putnext(srv_tep->te_rq, d_mp);
5555 				tl_freetip(srv_tep, tip);
5556 			}
5557 			TL_UNCONNECT(tep->te_oconp);
5558 			srv_tep = NULL;
5559 		}
5560 	} else if (peer_tep != NULL) {
5561 		/*
5562 		 * unconnect existing connection
5563 		 * If connected, change state of peer on
5564 		 * discon ind event and send discon ind pdu
5565 		 * to module above it
5566 		 */
5567 
5568 		ASSERT(tep->te_ser == peer_tep->te_ser);
5569 		if (IS_COTSORD(peer_tep) &&
5570 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5571 		    peer_tep->te_state == TS_DATA_XFER)) {
5572 			/*
5573 			 * send ordrel ind
5574 			 */
5575 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5576 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5577 				peer_tep->te_state,
5578 				NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5579 			d_mp = tl_ordrel_ind_alloc();
5580 			if (! d_mp) {
5581 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5582 				    SL_TRACE|SL_ERROR,
5583 				    "tl_co_unconnect:connected:"
5584 				    "allocb failure"));
5585 				/*
5586 				 * Continue with cleaning up peer as
5587 				 * this side may go away with the close
5588 				 */
5589 				TL_QENABLE(peer_tep);
5590 				goto discon_peer;
5591 			}
5592 			peer_tep->te_state =
5593 				NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5594 
5595 			putnext(peer_tep->te_rq, d_mp);
5596 			/*
5597 			 * Handle flow control case.  This will generate
5598 			 * a t_discon_ind message with reason 0 if there
5599 			 * is data queued on the write side.
5600 			 */
5601 			TL_QENABLE(peer_tep);
5602 		} else if (IS_COTSORD(peer_tep) &&
5603 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5604 			/*
5605 			 * Sent an ordrel_ind. We send a discon with
5606 			 * with error 0 to inform that the peer is gone.
5607 			 */
5608 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5609 				SL_TRACE|SL_ERROR,
5610 				"tl_co_unconnect: discon in state %d",
5611 				tep->te_state));
5612 			tl_discon_ind(peer_tep, 0);
5613 		} else {
5614 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5615 				SL_TRACE|SL_ERROR,
5616 				"tl_co_unconnect: state %d", tep->te_state));
5617 			tl_discon_ind(peer_tep, ECONNRESET);
5618 		}
5619 
5620 discon_peer:
5621 		/*
5622 		 * Disconnect cross-pointers only for close
5623 		 */
5624 		if (tep->te_closing) {
5625 			peer_tep = tep->te_conp;
5626 			TL_REMOVE_PEER(peer_tep->te_conp);
5627 			TL_REMOVE_PEER(tep->te_conp);
5628 		}
5629 	}
5630 }
5631 
5632 /*
5633  * Note: The following routine does not recover from allocb()
5634  * failures
5635  * The reason should be from the <sys/errno.h> space.
5636  */
5637 static void
5638 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5639 {
5640 	mblk_t *d_mp;
5641 
5642 	if (tep->te_closing)
5643 		return;
5644 
5645 	/*
5646 	 * flush the queues.
5647 	 */
5648 	flushq(tep->te_rq, FLUSHDATA);
5649 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5650 
5651 	/*
5652 	 * send discon ind
5653 	 */
5654 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5655 	if (! d_mp) {
5656 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5657 			"tl_discon_ind:allocb failure"));
5658 		return;
5659 	}
5660 	tep->te_state = TS_IDLE;
5661 	putnext(tep->te_rq, d_mp);
5662 }
5663 
5664 /*
5665  * Note: The following routine does not recover from allocb()
5666  * failures
5667  * The reason should be from the <sys/errno.h> space.
5668  */
5669 static mblk_t *
5670 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5671 {
5672 	mblk_t *mp;
5673 	struct T_discon_ind *tdi;
5674 
5675 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5676 		DB_TYPE(mp) = M_PROTO;
5677 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5678 		tdi = (struct T_discon_ind *)mp->b_rptr;
5679 		tdi->PRIM_type = T_DISCON_IND;
5680 		tdi->DISCON_reason = reason;
5681 		tdi->SEQ_number = seqnum;
5682 	}
5683 	return (mp);
5684 }
5685 
5686 
5687 /*
5688  * Note: The following routine does not recover from allocb()
5689  * failures
5690  */
5691 static mblk_t *
5692 tl_ordrel_ind_alloc(void)
5693 {
5694 	mblk_t *mp;
5695 	struct T_ordrel_ind *toi;
5696 
5697 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5698 		DB_TYPE(mp) = M_PROTO;
5699 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5700 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5701 		toi->PRIM_type = T_ORDREL_IND;
5702 	}
5703 	return (mp);
5704 }
5705 
5706 
5707 /*
5708  * Lookup the seqno in the list of queued connections.
5709  */
5710 static tl_icon_t *
5711 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5712 {
5713 	list_t *l = &tep->te_iconp;
5714 	tl_icon_t *tip = list_head(l);
5715 
5716 	ASSERT(seqno != 0);
5717 
5718 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5719 		;
5720 
5721 	return (tip);
5722 }
5723 
5724 /*
5725  * Queue data for a given T_CONN_IND while verifying that redundant
5726  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5727  * Used when the originator of the connection closes.
5728  */
5729 static void
5730 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5731 {
5732 	tl_icon_t		*tip;
5733 	mblk_t			**mpp, *mp;
5734 	int			prim, nprim;
5735 
5736 	if (nmp->b_datap->db_type == M_PROTO)
5737 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5738 	else
5739 		nprim = -1;	/* M_DATA */
5740 
5741 	tip = tl_icon_find(tep, seqno);
5742 	if (tip == NULL) {
5743 		freemsg(nmp);
5744 		return;
5745 	}
5746 
5747 	ASSERT(tip->ti_seqno != 0);
5748 	mpp = &tip->ti_mp;
5749 	while (*mpp != NULL) {
5750 		mp = *mpp;
5751 
5752 		if (mp->b_datap->db_type == M_PROTO)
5753 			prim = ((union T_primitives *)mp->b_rptr)->type;
5754 		else
5755 			prim = -1;	/* M_DATA */
5756 
5757 		/*
5758 		 * Allow nothing after a T_DISCON_IND
5759 		 */
5760 		if (prim == T_DISCON_IND) {
5761 			freemsg(nmp);
5762 			return;
5763 		}
5764 		/*
5765 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5766 		 */
5767 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5768 			freemsg(nmp);
5769 			return;
5770 		}
5771 		mpp = &(mp->b_next);
5772 	}
5773 	*mpp = nmp;
5774 }
5775 
5776 /*
5777  * Verify if a certain TPI primitive exists on the connind queue.
5778  * Use prim -1 for M_DATA.
5779  * Return non-zero if found.
5780  */
5781 static boolean_t
5782 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5783 {
5784 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5785 	boolean_t found = B_FALSE;
5786 
5787 	if (tip != NULL) {
5788 		mblk_t *mp;
5789 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5790 			found = (DB_TYPE(mp) == M_PROTO &&
5791 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5792 		}
5793 	}
5794 	return (found);
5795 }
5796 
5797 /*
5798  * Send the b_next mblk chain that has accumulated before the connection
5799  * was accepted. Perform the necessary state transitions.
5800  */
5801 static void
5802 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5803 {
5804 	mblk_t			*mp;
5805 	union T_primitives	*primp;
5806 
5807 	if (tep->te_closing) {
5808 		tl_icon_freemsgs(mpp);
5809 		return;
5810 	}
5811 
5812 	ASSERT(tep->te_state == TS_DATA_XFER);
5813 	ASSERT(tep->te_rq->q_first == NULL);
5814 
5815 	while ((mp = *mpp) != NULL) {
5816 		*mpp = mp->b_next;
5817 		mp->b_next = NULL;
5818 
5819 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5820 		switch (DB_TYPE(mp)) {
5821 		default:
5822 			freemsg(mp);
5823 			break;
5824 		case M_DATA:
5825 			putnext(tep->te_rq, mp);
5826 			break;
5827 		case M_PROTO:
5828 			primp = (union T_primitives *)mp->b_rptr;
5829 			switch (primp->type) {
5830 			case T_UNITDATA_IND:
5831 			case T_DATA_IND:
5832 			case T_OPTDATA_IND:
5833 			case T_EXDATA_IND:
5834 				putnext(tep->te_rq, mp);
5835 				break;
5836 			case T_ORDREL_IND:
5837 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5838 							tep->te_state);
5839 				putnext(tep->te_rq, mp);
5840 				break;
5841 			case T_DISCON_IND:
5842 				tep->te_state = TS_IDLE;
5843 				putnext(tep->te_rq, mp);
5844 				break;
5845 			default:
5846 #ifdef DEBUG
5847 				cmn_err(CE_PANIC,
5848 					"tl_icon_sendmsgs: unknown primitive");
5849 #endif /* DEBUG */
5850 				freemsg(mp);
5851 				break;
5852 			}
5853 			break;
5854 		}
5855 	}
5856 }
5857 
5858 /*
5859  * Free the b_next mblk chain that has accumulated before the connection
5860  * was accepted.
5861  */
5862 static void
5863 tl_icon_freemsgs(mblk_t **mpp)
5864 {
5865 	mblk_t *mp;
5866 
5867 	while ((mp = *mpp) != NULL) {
5868 		*mpp = mp->b_next;
5869 		mp->b_next = NULL;
5870 		freemsg(mp);
5871 	}
5872 }
5873 
5874 /*
5875  * Send M_ERROR
5876  * Note: assumes caller ensured enough space in mp or enough
5877  *	memory available. Does not attempt recovery from allocb()
5878  *	failures
5879  */
5880 
5881 static void
5882 tl_merror(queue_t *wq, mblk_t *mp, int error)
5883 {
5884 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5885 
5886 	if (tep->te_closing) {
5887 		freemsg(mp);
5888 		return;
5889 	}
5890 
5891 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5892 		    SL_TRACE|SL_ERROR,
5893 		    "tl_merror: tep=%p, err=%d", tep, error));
5894 
5895 	/*
5896 	 * flush all messages on queue. we are shutting
5897 	 * the stream down on fatal error
5898 	 */
5899 	flushq(wq, FLUSHALL);
5900 	if (IS_COTS(tep)) {
5901 		/* connection oriented - unconnect endpoints */
5902 		tl_co_unconnect(tep);
5903 	}
5904 	if (mp->b_cont) {
5905 		freemsg(mp->b_cont);
5906 		mp->b_cont = NULL;
5907 	}
5908 
5909 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5910 		freemsg(mp);
5911 		mp = allocb(1, BPRI_HI);
5912 		if (!mp) {
5913 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5914 				SL_TRACE|SL_ERROR,
5915 				"tl_merror:M_PROTO: out of memory"));
5916 			return;
5917 		}
5918 	}
5919 	if (mp) {
5920 		DB_TYPE(mp) = M_ERROR;
5921 		mp->b_rptr = DB_BASE(mp);
5922 		*mp->b_rptr = (char)error;
5923 		mp->b_wptr = mp->b_rptr + sizeof (char);
5924 		qreply(wq, mp);
5925 	} else {
5926 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
5927 	}
5928 }
5929 
5930 static void
5931 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag)
5932 {
5933 	if (flag & TL_SETCRED) {
5934 		struct opthdr *opt = (struct opthdr *)buf;
5935 		tl_credopt_t *tlcred;
5936 
5937 		opt->level = TL_PROT_LEVEL;
5938 		opt->name = TL_OPT_PEER_CRED;
5939 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5940 
5941 		tlcred = (tl_credopt_t *)(opt + 1);
5942 		tlcred->tc_uid = crgetuid(cr);
5943 		tlcred->tc_gid = crgetgid(cr);
5944 		tlcred->tc_ruid = crgetruid(cr);
5945 		tlcred->tc_rgid = crgetrgid(cr);
5946 		tlcred->tc_suid = crgetsuid(cr);
5947 		tlcred->tc_sgid = crgetsgid(cr);
5948 		tlcred->tc_ngroups = crgetngroups(cr);
5949 	} else if (flag & TL_SETUCRED) {
5950 		struct opthdr *opt = (struct opthdr *)buf;
5951 
5952 		opt->level = TL_PROT_LEVEL;
5953 		opt->name = TL_OPT_PEER_UCRED;
5954 		opt->len = (t_uscalar_t)OPTLEN(ucredsize);
5955 
5956 		(void) cred2ucred(cr, cpid, (void *)(opt + 1));
5957 	} else {
5958 		struct T_opthdr *topt = (struct T_opthdr *)buf;
5959 		ASSERT(flag & TL_SOCKUCRED);
5960 
5961 		topt->level = SOL_SOCKET;
5962 		topt->name = SCM_UCRED;
5963 		topt->len = ucredsize + sizeof (*topt);
5964 		topt->status = 0;
5965 		(void) cred2ucred(cr, cpid, (void *)(topt + 1));
5966 	}
5967 }
5968 
5969 /* ARGSUSED */
5970 static int
5971 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5972 {
5973 	/* no default value processed in protocol specific code currently */
5974 	return (-1);
5975 }
5976 
5977 /* ARGSUSED */
5978 static int
5979 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5980 {
5981 	int len;
5982 	tl_endpt_t *tep;
5983 	int *valp;
5984 
5985 	tep = (tl_endpt_t *)wq->q_ptr;
5986 
5987 	len = 0;
5988 
5989 	/*
5990 	 * Assumes: option level and name sanity check done elsewhere
5991 	 */
5992 
5993 	switch (level) {
5994 	case SOL_SOCKET:
5995 		if (! IS_SOCKET(tep))
5996 			break;
5997 		switch (name) {
5998 		case SO_RECVUCRED:
5999 			len = sizeof (int);
6000 			valp = (int *)ptr;
6001 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6002 			break;
6003 		default:
6004 			break;
6005 		}
6006 		break;
6007 	case TL_PROT_LEVEL:
6008 		switch (name) {
6009 		case TL_OPT_PEER_CRED:
6010 		case TL_OPT_PEER_UCRED:
6011 			/*
6012 			 * option not supposed to retrieved directly
6013 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6014 			 * when some internal flags set by other options
6015 			 * Direct retrieval always designed to fail(ignored)
6016 			 * for this option.
6017 			 */
6018 			break;
6019 		}
6020 	}
6021 	return (len);
6022 }
6023 
6024 /* ARGSUSED */
6025 static int
6026 tl_set_opt(
6027 	queue_t		*wq,
6028 	uint_t		mgmt_flags,
6029 	int		level,
6030 	int		name,
6031 	uint_t		inlen,
6032 	uchar_t		*invalp,
6033 	uint_t		*outlenp,
6034 	uchar_t		*outvalp,
6035 	void		*thisdg_attrs,
6036 	cred_t		*cr,
6037 	mblk_t		*mblk)
6038 {
6039 	int error;
6040 	tl_endpt_t *tep;
6041 
6042 	tep = (tl_endpt_t *)wq->q_ptr;
6043 
6044 	error = 0;		/* NOERROR */
6045 
6046 	/*
6047 	 * Assumes: option level and name sanity checks done elsewhere
6048 	 */
6049 
6050 	switch (level) {
6051 	case SOL_SOCKET:
6052 		if (! IS_SOCKET(tep)) {
6053 			error = EINVAL;
6054 			break;
6055 		}
6056 		/*
6057 		 * TBD: fill in other AF_UNIX socket options and then stop
6058 		 * returning error.
6059 		 */
6060 		switch (name) {
6061 		case SO_RECVUCRED:
6062 			/*
6063 			 * We only support this for datagram sockets;
6064 			 * getpeerucred handles the connection oriented
6065 			 * transports.
6066 			 */
6067 			if (! IS_CLTS(tep)) {
6068 				error = EINVAL;
6069 				break;
6070 			}
6071 			if (*(int *)invalp == 0)
6072 				tep->te_flag &= ~TL_SOCKUCRED;
6073 			else
6074 				tep->te_flag |= TL_SOCKUCRED;
6075 			break;
6076 		default:
6077 			error = EINVAL;
6078 			break;
6079 		}
6080 		break;
6081 	case TL_PROT_LEVEL:
6082 		switch (name) {
6083 		case TL_OPT_PEER_CRED:
6084 		case TL_OPT_PEER_UCRED:
6085 			/*
6086 			 * option not supposed to be set directly
6087 			 * Its value in initialized for each endpoint at
6088 			 * driver open time.
6089 			 * Direct setting always designed to fail for this
6090 			 * option.
6091 			 */
6092 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6093 				    SL_TRACE|SL_ERROR,
6094 				    "tl_set_opt: option is not supported"));
6095 			error = EPROTO;
6096 			break;
6097 		}
6098 	}
6099 	return (error);
6100 }
6101 
6102 
6103 static void
6104 tl_timer(void *arg)
6105 {
6106 	queue_t *wq = arg;
6107 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6108 
6109 	ASSERT(tep);
6110 
6111 	tep->te_timoutid = 0;
6112 
6113 	enableok(wq);
6114 	/*
6115 	 * Note: can call wsrv directly here and save context switch
6116 	 * Consider change when qtimeout (not timeout) is active
6117 	 */
6118 	qenable(wq);
6119 }
6120 
6121 static void
6122 tl_buffer(void *arg)
6123 {
6124 	queue_t *wq = arg;
6125 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6126 
6127 	ASSERT(tep);
6128 
6129 	tep->te_bufcid = 0;
6130 	tep->te_nowsrv = B_FALSE;
6131 
6132 	enableok(wq);
6133 	/*
6134 	 *  Note: can call wsrv directly here and save context switch
6135 	 * Consider change when qbufcall (not bufcall) is active
6136 	 */
6137 	qenable(wq);
6138 }
6139 
6140 static void
6141 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6142 {
6143 	tl_endpt_t *tep;
6144 
6145 	tep = (tl_endpt_t *)wq->q_ptr;
6146 
6147 	if (tep->te_closing) {
6148 		freemsg(mp);
6149 		return;
6150 	}
6151 	noenable(wq);
6152 
6153 	(void) insq(wq, wq->q_first, mp);
6154 
6155 	if (tep->te_bufcid || tep->te_timoutid) {
6156 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6157 			"tl_memrecover:recover %p pending", (void *)wq));
6158 		return;
6159 	}
6160 
6161 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6162 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6163 		    drv_usectohz(TL_BUFWAIT));
6164 	}
6165 }
6166 
6167 static void
6168 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6169 {
6170 	ASSERT(tip->ti_seqno != 0);
6171 
6172 	if (tip->ti_mp != NULL) {
6173 		tl_icon_freemsgs(&tip->ti_mp);
6174 		tip->ti_mp = NULL;
6175 	}
6176 	if (tip->ti_tep != NULL) {
6177 		tl_refrele(tip->ti_tep);
6178 		tip->ti_tep = NULL;
6179 	}
6180 	list_remove(&tep->te_iconp, tip);
6181 	kmem_free(tip, sizeof (tl_icon_t));
6182 	tep->te_nicon--;
6183 }
6184 
6185 /*
6186  * Remove address from address hash.
6187  */
6188 static void
6189 tl_addr_unbind(tl_endpt_t *tep)
6190 {
6191 	tl_endpt_t *elp;
6192 
6193 	if (tep->te_flag & TL_ADDRHASHED) {
6194 		if (IS_SOCKET(tep)) {
6195 			(void) mod_hash_remove(tep->te_addrhash,
6196 			    (mod_hash_key_t)tep->te_vp,
6197 			    (mod_hash_val_t *)&elp);
6198 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6199 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6200 		} else {
6201 			(void) mod_hash_remove(tep->te_addrhash,
6202 			    (mod_hash_key_t)&tep->te_ap,
6203 			    (mod_hash_val_t *)&elp);
6204 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6205 			tep->te_alen = -1;
6206 			tep->te_abuf = NULL;
6207 		}
6208 		tep->te_flag &= ~TL_ADDRHASHED;
6209 	}
6210 }
6211