xref: /titanic_51/usr/src/uts/common/io/tl.c (revision f0b62587229842fad8c5df20795bf9bca17327bd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Multithreaded STREAMS Local Transport Provider.
28  *
29  * OVERVIEW
30  * ========
31  *
32  * This driver provides TLI as well as socket semantics.  It provides
33  * connectionless, connection oriented, and connection oriented with orderly
34  * release transports for TLI and sockets. Each transport type has separate name
35  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
36  * this removes any name space conflicts when binding to socket style transport
37  * addresses.
38  *
39  * NOTE: There is one exception: Socket ticots and ticotsord transports share
40  * the same namespace. In fact, sockets always use ticotsord type transport.
41  *
42  * The driver mode is specified during open() by the minor number used for
43  * open.
44  *
45  *  The sockets in addition have the following semantic differences:
46  *  No support for passing up credentials (TL_SET[U]CRED).
47  *
48  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
49  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
50  *	T_OPTDATA_IND.
51  *
52  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
53  *	a T_CONN_RES is received from the acceptor. This means that a socket
54  *	connect will complete before the peer has called accept.
55  *
56  *
57  * MULTITHREADING
58  * ==============
59  *
60  * The driver does not use STREAMS protection mechanisms. Instead it uses a
61  * generic "serializer" abstraction. Most of the operations are executed behind
62  * the serializer and are, essentially single-threaded. All functions executed
63  * behind the same serializer are strictly serialized. So if one thread calls
64  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
65  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
66  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
67  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
68  * same time.
69  *
70  * Connectionless transport use a single serializer per transport type (one for
71  * TLI and one for sockets. Connection-oriented transports use finer-grained
72  * serializers.
73  *
74  * All COTS-type endpoints start their life with private serializers. During
75  * connection request processing the endpoint serializer is switched to the
76  * listener's serializer and the rest of T_CONN_REQ processing is done on the
77  * listener serializer. During T_CONN_RES processing the eager serializer is
78  * switched from listener to acceptor serializer and after that point all
79  * processing for eager and acceptor happens on this serializer. To avoid races
80  * with endpoint closes while its serializer may be changing closes are blocked
81  * while serializers are manipulated.
82  *
83  * References accounting
84  * ---------------------
85  *
86  * Endpoints are reference counted and freed when the last reference is
87  * dropped. Functions within the serializer may access an endpoint state even
88  * after an endpoint closed. The te_closing being set on the endpoint indicates
89  * that the endpoint entered its close routine.
90  *
91  * One reference is held for each opened endpoint instance. The reference
92  * counter is incremented when the endpoint is linked to another endpoint and
93  * decremented when the link disappears. It is also incremented when the
94  * endpoint is found by the hash table lookup. This increment is atomic with the
95  * lookup itself and happens while the hash table read lock is held.
96  *
97  * Close synchronization
98  * ---------------------
99  *
100  * During close the endpoint as marked as closing using te_closing flag. It is
101  * usually enough to check for te_closing flag since all other state changes
102  * happen after this flag is set and the close entered serializer. Immediately
103  * after setting te_closing flag tl_close() enters serializer and waits until
104  * the callback finishes. This allows all functions called within serializer to
105  * simply check te_closing without any locks.
106  *
107  * Serializer management.
108  * ---------------------
109  *
110  * For COTS transports serializers are created when the endpoint is constructed
111  * and destroyed when the endpoint is destructed. CLTS transports use global
112  * serializers - one for sockets and one for TLI.
113  *
114  * COTS serializers have separate reference counts to deal with several
115  * endpoints sharing the same serializer. There is a subtle problem related to
116  * the serializer destruction. The serializer should never be destroyed by any
117  * function executed inside serializer. This means that close has to wait till
118  * all serializer activity for this endpoint is finished before it can drop the
119  * last reference on the endpoint (which may as well free the serializer).  This
120  * is only relevant for COTS transports which manage serializers
121  * dynamically. For CLTS transports close may complete without waiting for all
122  * serializer activity to finish since serializer is only destroyed at driver
123  * detach time.
124  *
125  * COTS endpoints keep track of the number of outstanding requests on the
126  * serializer for the endpoint. The code handling accept() avoids changing
127  * client serializer if it has any pending messages on the serializer and
128  * instead moves acceptor to listener's serializer.
129  *
130  *
131  * Use of hash tables
132  * ------------------
133  *
134  * The driver uses modhash hash table implementation. Each transport uses two
135  * hash tables - one for finding endpoints by acceptor ID and another one for
136  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
137  * pair of hash tables since sockets only use TICOTSORD.
138  *
139  * All hash tables lookups increment a reference count for returned endpoints,
140  * so we may safely check the endpoint state even when the endpoint is removed
141  * from the hash by another thread immediately after it is found.
142  *
143  *
144  * CLOSE processing
145  * ================
146  *
147  * The driver enters serializer twice on close(). The close sequence is the
148  * following:
149  *
150  * 1) Wait until closing is safe (te_closewait becomes zero)
151  *	This step is needed to prevent close during serializer switches. In most
152  *	cases (close happening after connection establishment) te_closewait is
153  *	zero.
154  * 1) Set te_closing.
155  * 2) Call tl_close_ser() within serializer and wait for it to complete.
156  *
157  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
158  *	It also needs to clear write-side q_next pointers - this should be done
159  *	before qprocsoff().
160  *
161  *    This synchronous serializer entry during close is needed to ensure that
162  *    the queue is valid everywhere inside the serializer.
163  *
164  *    Note that in many cases close will execute tl_close_ser() synchronously,
165  *    so it will not wait at all.
166  *
167  * 3) Calls qprocsoff().
168  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
169  *	complete (for COTS transports). For CLTS transport there is no wait.
170  *
171  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
172  *	close if there is any.
173  *
174  *    Note that in most cases close will enter te_close_ser_finish()
175  *    synchronously and will not wait at all.
176  *
177  *
178  * Flow Control
179  * ============
180  *
181  * The driver implements both read and write side service routines. No one calls
182  * putq() on the read queue. The read side service routine tl_rsrv() is called
183  * when the read side stream is back-enabled. It enters serializer synchronously
184  * (waits till serializer processing is complete). Within serializer it
185  * back-enables all endpoints blocked by the queue for connection-less
186  * transports and enables write side service processing for the peer for
187  * connection-oriented transports.
188  *
189  * Read and write side service routines use special mblk_sized space in the
190  * endpoint structure to enter perimeter.
191  *
192  * Write-side flow control
193  * -----------------------
194  *
195  * Write side flow control is a bit tricky. The driver needs to deal with two
196  * message queues - the explicit STREAMS message queue maintained by
197  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
198  * queues should be synchronized to preserve message ordering and should
199  * maintain a single order determined by the order in which messages enter
200  * tl_wput(). In order to maintain the ordering between these two queues the
201  * STREAMS queue is only manipulated within the serializer, so the ordering is
202  * provided by the serializer.
203  *
204  * Functions called from the tl_wsrv() sometimes may call putbq(). To
205  * immediately stop any further processing of the STREAMS message queues the
206  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
207  * side service processing stops when the flag is set.
208  *
209  * The tl_wsrv() function enters serializer synchronously and waits for it to
210  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
211  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
212  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
213  * always bounded by the amount of messages on the STREAMS queue at the time
214  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
215  * queue from another serialized entry which can't happen in parallel. This
216  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
217  * of it draining forever while writer places new messages on the STREAMS
218  * queue).
219  *
220  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
221  *
222  *
223  * Unix Domain Sockets
224  * ===================
225  *
226  * The driver knows the structure of Unix Domain sockets addresses and treats
227  * them differently from generic TLI addresses. For sockets implicit binds are
228  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
229  * instead of using address length of zero. Explicit binds specify
230  * SOU_MAGIC_EXPLICIT as magic.
231  *
232  * For implicit binds we always use minor number as soua_vp part of the address
233  * and avoid any hash table lookups. This saves two hash tables lookups per
234  * anonymous bind.
235  *
236  * For explicit address we hash the vnode pointer instead of hashing the
237  * full-scale address+zone+length. Hashing by pointer is more efficient then
238  * hashing by the full address.
239  *
240  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
241  * tep structure, so it should be never freed.
242  *
243  * Also for sockets the driver always uses minor number as acceptor id.
244  *
245  * TPI VIOLATIONS
246  * --------------
247  *
248  * This driver violates TPI in several respects for Unix Domain Sockets:
249  *
250  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
251  *	is requested and the endpoint is already in use. There is no point in
252  *	generating an unused address since this address will be rejected by
253  *	sockfs anyway. For implicit binds it always generates a new address
254  *	(sets soua_vp to its minor number).
255  *
256  * 2) It always uses minor number as acceptor ID and never uses queue
257  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
258  *	message and they do not use the queue pointer.
259  *
260  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
261  *	followed by listen(). The listen() should be issued with non-zero
262  *	backlog, so sotpi_listen() issues unbind request followed by bind
263  *	request to the same address but with a non-zero qlen value. Both
264  *	tl_bind() and tl_unbind() require write lock on the hash table to
265  *	insert/remove the address. The driver does not remove the address from
266  *	the hash for endpoints that are bound to the explicit address and have
267  *	backlog of zero. During T_BIND_REQ processing if the address requested
268  *	is equal to the address the endpoint already has it updates the backlog
269  *	without reinserting the address in the hash table. This optimization
270  *	avoids two hash table updates for each listener created. It always
271  *	avoids the problem of a "stolen" address when another listener may use
272  *	the same address between the unbind and bind and suddenly listen() fails
273  *	because address is in use even though the bind() succeeded.
274  *
275  *
276  * CONNECTIONLESS TRANSPORTS
277  * =========================
278  *
279  * Connectionless transports all share the same serializer (one for TLI and one
280  * for Sockets). Functions executing behind serializer can check or modify state
281  * of any endpoint.
282  *
283  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
284  * te_lastep field. The next time X talks to some address A it checks whether A
285  * is the same as Y's address and if it is there is no need to lookup Y. If the
286  * address is different or the state of Y is not appropriate (e.g. closed or not
287  * idle) X does a lookup using tl_find_peer() and caches the new address.
288  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
289  * on the endpoint found.
290  *
291  * During close of endpoint Y it doesn't try to remove itself from other
292  * endpoints caches. They will detect that Y is gone and will search the peer
293  * endpoint again.
294  *
295  * Flow Control Handling.
296  * ----------------------
297  *
298  * Each connectionless endpoint keeps a list of endpoints which are
299  * flow-controlled by its queue. It also keeps a pointer to the queue which
300  * flow-controls itself.  Whenever flow control releases for endpoint X it
301  * enables all queues from the list. During close it also back-enables everyone
302  * in the list. If X is flow-controlled when it is closing it removes it from
303  * the peers list.
304  *
305  * DATA STRUCTURES
306  * ===============
307  *
308  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
309  * endpoint state. For connection-oriented transports it has a keeps a list
310  * of pending connections (tl_icon_t). For connectionless transports it keeps a
311  * list of endpoints flow controlled by this one.
312  *
313  * Each transport type is represented by a per-transport data structure
314  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
315  * endpoint address hash tables for each transport. It also contains pointer to
316  * transport serializer for connectionless transports.
317  *
318  * Each endpoint keeps a link to its transport structure, so the code can find
319  * all per-transport information quickly.
320  */
321 
322 #include	<sys/types.h>
323 #include	<sys/inttypes.h>
324 #include	<sys/stream.h>
325 #include	<sys/stropts.h>
326 #define	_SUN_TPI_VERSION 2
327 #include	<sys/tihdr.h>
328 #include	<sys/strlog.h>
329 #include	<sys/debug.h>
330 #include	<sys/cred.h>
331 #include	<sys/errno.h>
332 #include	<sys/kmem.h>
333 #include	<sys/id_space.h>
334 #include	<sys/modhash.h>
335 #include	<sys/mkdev.h>
336 #include	<sys/tl.h>
337 #include	<sys/stat.h>
338 #include	<sys/conf.h>
339 #include	<sys/modctl.h>
340 #include	<sys/strsun.h>
341 #include	<sys/socket.h>
342 #include	<sys/socketvar.h>
343 #include	<sys/sysmacros.h>
344 #include	<sys/xti_xtiopt.h>
345 #include	<sys/ddi.h>
346 #include	<sys/sunddi.h>
347 #include	<sys/zone.h>
348 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
349 #include	<inet/optcom.h>
350 #include	<sys/strsubr.h>
351 #include	<sys/ucred.h>
352 #include	<sys/suntpi.h>
353 #include	<sys/list.h>
354 #include	<sys/serializer.h>
355 
356 /*
357  * TBD List
358  * 14 Eliminate state changes through table
359  * 16. AF_UNIX socket options
360  * 17. connect() for ticlts
361  * 18. support for "netstat" to show AF_UNIX plus TLI local
362  *	transport connections
363  * 21. sanity check to flushing on sending M_ERROR
364  */
365 
366 /*
367  * CONSTANT DECLARATIONS
368  * --------------------
369  */
370 
371 /*
372  * Local declarations
373  */
374 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
375 
376 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
377 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
378 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
379 /*
380  * Hash tables size.
381  */
382 #define	TL_HASH_SIZE 311
383 
384 /*
385  * Definitions for module_info
386  */
387 #define		TL_ID		(104)		/* module ID number */
388 #define		TL_NAME		"tl"		/* module name */
389 #define		TL_MINPSZ	(0)		/* min packet size */
390 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
391 #define		TL_HIWAT	(16*1024)	/* hi water mark */
392 #define		TL_LOWAT	(256)		/* lo water mark */
393 /*
394  * Definition of minor numbers/modes for new transport provider modes.
395  * We view the socket use as a separate mode to get a separate name space.
396  */
397 #define		TL_TICOTS	0	/* connection oriented transport */
398 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
399 #define		TL_TICLTS 	2	/* connectionless transport */
400 #define		TL_UNUSED	3
401 #define		TL_SOCKET	4	/* Socket */
402 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
403 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
404 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
405 
406 #define		TL_MINOR_MASK	0x7
407 #define		TL_MINOR_START	(TL_TICLTS + 1)
408 
409 /*
410  * LOCAL MACROS
411  */
412 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
413 
414 /*
415  * EXTERNAL VARIABLE DECLARATIONS
416  * -----------------------------
417  */
418 /*
419  * state table defined in the OS space.c
420  */
421 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
422 
423 /*
424  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
425  */
426 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
427 static int tl_close(queue_t *, int, cred_t *);
428 static void tl_wput(queue_t *, mblk_t *);
429 static void tl_wsrv(queue_t *);
430 static void tl_rsrv(queue_t *);
431 
432 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
433 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
434 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
435 
436 
437 /*
438  * GLOBAL DATA STRUCTURES AND VARIABLES
439  * -----------------------------------
440  */
441 
442 /*
443  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
444  * For now, we only manage the SO_RECVUCRED option but we also have
445  * harmless dummy options to make things work with some common code we access.
446  */
447 opdes_t	tl_opt_arr[] = {
448 	/* The SO_TYPE is needed for the hack below */
449 	{
450 		SO_TYPE,
451 		SOL_SOCKET,
452 		OA_R,
453 		OA_R,
454 		OP_NP,
455 		0,
456 		sizeof (t_scalar_t),
457 		0
458 	},
459 	{
460 		SO_RECVUCRED,
461 		SOL_SOCKET,
462 		OA_RW,
463 		OA_RW,
464 		OP_NP,
465 		0,
466 		sizeof (int),
467 		0
468 	}
469 };
470 
471 /*
472  * Table of all supported levels
473  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
474  * any supported options so we need this info separately.
475  *
476  * This is needed only for topmost tpi providers.
477  */
478 optlevel_t	tl_valid_levels_arr[] = {
479 	XTI_GENERIC,
480 	SOL_SOCKET,
481 	TL_PROT_LEVEL
482 };
483 
484 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
485 /*
486  * Current upper bound on the amount of space needed to return all options.
487  * Additional options with data size of sizeof(long) are handled automatically.
488  * Others need hand job.
489  */
490 #define	TL_MAX_OPT_BUF_LEN						\
491 		((A_CNT(tl_opt_arr) << 2) +				\
492 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
493 		+ 64 + sizeof (struct T_optmgmt_ack))
494 
495 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
496 
497 /*
498  *	transport addr structure
499  */
500 typedef struct tl_addr {
501 	zoneid_t	ta_zoneid;		/* Zone scope of address */
502 	t_scalar_t	ta_alen;		/* length of abuf */
503 	void		*ta_abuf;		/* the addr itself */
504 } tl_addr_t;
505 
506 /*
507  * Refcounted version of serializer.
508  */
509 typedef struct tl_serializer {
510 	uint_t		ts_refcnt;
511 	serializer_t	*ts_serializer;
512 } tl_serializer_t;
513 
514 /*
515  * Each transport type has a separate state.
516  * Per-transport state.
517  */
518 typedef struct tl_transport_state {
519 	char		*tr_name;
520 	minor_t		tr_minor;
521 	uint32_t	tr_defaddr;
522 	mod_hash_t	*tr_ai_hash;
523 	mod_hash_t	*tr_addr_hash;
524 	tl_serializer_t	*tr_serializer;
525 } tl_transport_state_t;
526 
527 #define	TL_DFADDR 0x1000
528 
529 static tl_transport_state_t tl_transports[] = {
530 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
531 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
532 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
533 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
534 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
535 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
536 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
537 };
538 
539 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
540 
541 struct tl_endpt;
542 typedef struct tl_endpt tl_endpt_t;
543 
544 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
545 
546 /*
547  * Data structure used to represent pending connects.
548  * Records enough information so that the connecting peer can close
549  * before the connection gets accepted.
550  */
551 typedef struct tl_icon {
552 	list_node_t	ti_node;
553 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
554 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
555 	t_scalar_t	ti_seqno;	/* Sequence number */
556 } tl_icon_t;
557 
558 typedef struct so_ux_addr soux_addr_t;
559 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
560 
561 /*
562  * Maximum number of unaccepted connection indications allowed per listener.
563  */
564 #define	TL_MAXQLEN	4096
565 int tl_maxqlen = TL_MAXQLEN;
566 
567 /*
568  *	transport endpoint structure
569  */
570 struct tl_endpt {
571 	queue_t		*te_rq;		/* stream read queue */
572 	queue_t		*te_wq;		/* stream write queue */
573 	uint32_t	te_refcnt;
574 	int32_t 	te_state;	/* TPI state of endpoint */
575 	minor_t		te_minor;	/* minor number */
576 #define	te_seqno	te_minor
577 	uint_t		te_flag;	/* flag field */
578 	boolean_t	te_nowsrv;
579 	tl_serializer_t	*te_ser;	/* Serializer to use */
580 #define	te_serializer	te_ser->ts_serializer
581 
582 	soux_addr_t	te_uxaddr;	/* Socket address */
583 #define	te_magic	te_uxaddr.soua_magic
584 #define	te_vp		te_uxaddr.soua_vp
585 	tl_addr_t	te_ap;		/* addr bound to this endpt */
586 #define	te_zoneid te_ap.ta_zoneid
587 #define	te_alen	te_ap.ta_alen
588 #define	te_abuf	te_ap.ta_abuf
589 
590 	tl_transport_state_t *te_transport;
591 #define	te_addrhash	te_transport->tr_addr_hash
592 #define	te_aihash	te_transport->tr_ai_hash
593 #define	te_defaddr	te_transport->tr_defaddr
594 	cred_t		*te_credp;	/* endpoint user credentials */
595 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
596 
597 	/*
598 	 * State specific for connection-oriented and connectionless transports.
599 	 */
600 	union {
601 		/* Connection-oriented state. */
602 		struct {
603 			t_uscalar_t _te_nicon;	/* count of conn requests */
604 			t_uscalar_t _te_qlen;	/* max conn requests */
605 			tl_endpt_t  *_te_oconp;	/* conn request pending */
606 			tl_endpt_t  *_te_conp;	/* connected endpt */
607 #ifndef _ILP32
608 			void	    *_te_pad;
609 #endif
610 			list_t	_te_iconp;	/* list of conn ind. pending */
611 		} _te_cots_state;
612 		/* Connection-less state. */
613 		struct {
614 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
615 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
616 			list_node_t _te_flows;	/* lists of connections */
617 			list_t  _te_flowlist;	/* Who flowcontrols on me */
618 		} _te_clts_state;
619 	} _te_transport_state;
620 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
621 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
622 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
623 #define	te_conp		_te_transport_state._te_cots_state._te_conp
624 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
625 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
626 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
627 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
628 #define	te_flows	_te_transport_state._te_clts_state._te_flows
629 
630 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
631 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
632 	pid_t		te_cpid;	/* cached pid of endpoint */
633 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
634 	/*
635 	 * Pieces of the endpoint state needed for closing.
636 	 */
637 	kmutex_t	te_closelock;
638 	kcondvar_t	te_closecv;
639 	uint8_t		te_closing;	/* The endpoint started closing */
640 	uint8_t		te_closewait;	/* Wait in close until zero */
641 	mblk_t		te_closemp;	/* for entering serializer on close */
642 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
643 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
644 	kmutex_t	te_srv_lock;
645 	kcondvar_t	te_srv_cv;
646 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
647 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
648 	/*
649 	 * Pieces of the endpoint state needed for serializer transitions.
650 	 */
651 	kmutex_t	te_ser_lock;	/* Protects the count below */
652 	uint_t		te_ser_count;	/* Number of messages on serializer */
653 };
654 
655 /*
656  * Flag values. Lower 4 bits specify that transport used.
657  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
658  * they allow to identify the endpoint more easily.
659  */
660 #define	TL_LISTENER	0x00010	/* the listener endpoint */
661 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
662 #define	TL_EAGER	0x00040	/* connecting endpoint */
663 #define	TL_ACCEPTED	0x00080	/* accepted connection */
664 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
665 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
666 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
667 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
668 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
669 /*
670  * Boolean checks for the endpoint type.
671  */
672 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
673 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
674 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
675 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
676 
677 /*
678  * Certain operations are always used together. These macros reduce the chance
679  * of missing a part of a combination.
680  */
681 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
682 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
683 
684 #define	TL_PUTBQ(x, mp) {		\
685 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
686 	(x)->te_nowsrv = B_TRUE;	\
687 	(void) putbq((x)->te_wq, mp);	\
688 }
689 
690 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
691 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
692 
693 /*
694  * STREAMS driver glue data structures.
695  */
696 static	struct	module_info	tl_minfo = {
697 	TL_ID,			/* mi_idnum */
698 	TL_NAME,		/* mi_idname */
699 	TL_MINPSZ,		/* mi_minpsz */
700 	TL_MAXPSZ,		/* mi_maxpsz */
701 	TL_HIWAT,		/* mi_hiwat */
702 	TL_LOWAT		/* mi_lowat */
703 };
704 
705 static	struct	qinit	tl_rinit = {
706 	NULL,			/* qi_putp */
707 	(int (*)())tl_rsrv,	/* qi_srvp */
708 	tl_open,		/* qi_qopen */
709 	tl_close,		/* qi_qclose */
710 	NULL,			/* qi_qadmin */
711 	&tl_minfo,		/* qi_minfo */
712 	NULL			/* qi_mstat */
713 };
714 
715 static	struct	qinit	tl_winit = {
716 	(int (*)())tl_wput,	/* qi_putp */
717 	(int (*)())tl_wsrv,	/* qi_srvp */
718 	NULL,			/* qi_qopen */
719 	NULL,			/* qi_qclose */
720 	NULL,			/* qi_qadmin */
721 	&tl_minfo,		/* qi_minfo */
722 	NULL			/* qi_mstat */
723 };
724 
725 static	struct streamtab	tlinfo = {
726 	&tl_rinit,		/* st_rdinit */
727 	&tl_winit,		/* st_wrinit */
728 	NULL,			/* st_muxrinit */
729 	NULL			/* st_muxwrinit */
730 };
731 
732 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
733     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
734 
735 static struct modldrv modldrv = {
736 	&mod_driverops,		/* Type of module -- pseudo driver here */
737 	"TPI Local Transport (tl)",
738 	&tl_devops,		/* driver ops */
739 };
740 
741 /*
742  * Module linkage information for the kernel.
743  */
744 static struct modlinkage modlinkage = {
745 	MODREV_1,
746 	&modldrv,
747 	NULL
748 };
749 
750 /*
751  * Templates for response to info request
752  * Check sanity of unlimited connect data etc.
753  */
754 
755 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
756 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
757 
758 static struct T_info_ack tl_cots_info_ack =
759 	{
760 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
761 		T_INFINITE,	/* TSDU size */
762 		T_INFINITE,	/* ETSDU size */
763 		T_INFINITE,	/* CDATA_size */
764 		T_INFINITE,	/* DDATA_size */
765 		T_INFINITE,	/* ADDR_size  */
766 		T_INFINITE,	/* OPT_size */
767 		0,		/* TIDU_size - fill at run time */
768 		T_COTS,		/* SERV_type */
769 		-1,		/* CURRENT_state */
770 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
771 	};
772 
773 static struct T_info_ack tl_clts_info_ack =
774 	{
775 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
776 		0,		/* TSDU_size - fill at run time */
777 		-2,		/* ETSDU_size -2 => not supported */
778 		-2,		/* CDATA_size -2 => not supported */
779 		-2,		/* DDATA_size  -2 => not supported */
780 		-1,		/* ADDR_size -1 => unlimited */
781 		-1,		/* OPT_size */
782 		0,		/* TIDU_size - fill at run time */
783 		T_CLTS,		/* SERV_type */
784 		-1,		/* CURRENT_state */
785 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
786 	};
787 
788 /*
789  * private copy of devinfo pointer used in tl_info
790  */
791 static dev_info_t *tl_dip;
792 
793 /*
794  * Endpoints cache.
795  */
796 static kmem_cache_t *tl_cache;
797 /*
798  * Minor number space.
799  */
800 static id_space_t *tl_minors;
801 
802 /*
803  * Default Data Unit size.
804  */
805 static t_scalar_t tl_tidusz;
806 
807 /*
808  * Size of hash tables.
809  */
810 static size_t tl_hash_size = TL_HASH_SIZE;
811 
812 /*
813  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
814  * for sockets.
815  */
816 static int tl_disable_early_connect = 0;
817 static int tl_client_closing_when_accepting;
818 
819 static int tl_serializer_noswitch;
820 
821 /*
822  * LOCAL FUNCTION PROTOTYPES
823  * -------------------------
824  */
825 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
826 static void tl_do_proto(mblk_t *, tl_endpt_t *);
827 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
828 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
829 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
830 	t_scalar_t);
831 static void tl_bind(mblk_t *, tl_endpt_t *);
832 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
833 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
834 static void tl_unbind(mblk_t *, tl_endpt_t *);
835 static void tl_optmgmt(queue_t *, mblk_t *);
836 static void tl_conn_req(queue_t *, mblk_t *);
837 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
838 static void tl_conn_res(mblk_t *, tl_endpt_t *);
839 static void tl_discon_req(mblk_t *, tl_endpt_t *);
840 static void tl_capability_req(mblk_t *, tl_endpt_t *);
841 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
842 static void tl_info_req(mblk_t *, tl_endpt_t *);
843 static void tl_addr_req(mblk_t *, tl_endpt_t *);
844 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
845 static void tl_data(mblk_t  *, tl_endpt_t *);
846 static void tl_exdata(mblk_t *, tl_endpt_t *);
847 static void tl_ordrel(mblk_t *, tl_endpt_t *);
848 static void tl_unitdata(mblk_t *, tl_endpt_t *);
849 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
850 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
851 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
852 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
853 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
854 static void tl_cl_backenable(tl_endpt_t *);
855 static void tl_co_unconnect(tl_endpt_t *);
856 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
857 static void tl_discon_ind(tl_endpt_t *, uint32_t);
858 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
859 static mblk_t *tl_ordrel_ind_alloc(void);
860 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
861 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
862 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
863 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
864 static void tl_icon_freemsgs(mblk_t **);
865 static void tl_merror(queue_t *, mblk_t *, int);
866 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
867 static int tl_default_opt(queue_t *, int, int, uchar_t *);
868 static int tl_get_opt(queue_t *, int, int, uchar_t *);
869 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
870     uchar_t *, void *, cred_t *);
871 static void tl_memrecover(queue_t *, mblk_t *, size_t);
872 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
873 static void tl_free(tl_endpt_t *);
874 static int  tl_constructor(void *, void *, int);
875 static void tl_destructor(void *, void *);
876 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
877 static tl_serializer_t *tl_serializer_alloc(int);
878 static void tl_serializer_refhold(tl_serializer_t *);
879 static void tl_serializer_refrele(tl_serializer_t *);
880 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
881 static void tl_serializer_exit(tl_endpt_t *);
882 static boolean_t tl_noclose(tl_endpt_t *);
883 static void tl_closeok(tl_endpt_t *);
884 static void tl_refhold(tl_endpt_t *);
885 static void tl_refrele(tl_endpt_t *);
886 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
887 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
888 static void tl_close_ser(mblk_t *, tl_endpt_t *);
889 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
890 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
891 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
892 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
893 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
894 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
895 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
896 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
897 static void tl_addr_unbind(tl_endpt_t *);
898 
899 /*
900  * Intialize option database object for TL
901  */
902 
903 optdb_obj_t tl_opt_obj = {
904 	tl_default_opt,		/* TL default value function pointer */
905 	tl_get_opt,		/* TL get function pointer */
906 	tl_set_opt,		/* TL set function pointer */
907 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
908 	tl_opt_arr,		/* TL option database */
909 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
910 	tl_valid_levels_arr	/* TL valid level array */
911 };
912 
913 /*
914  * Logical operations.
915  *
916  * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y
917  * should also be true.
918  *
919  * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or false at
920  * the same time.
921  */
922 #define	IMPLY(X, Y)	(!(X) || (Y))
923 #define	EQUIV(X, Y)	(IMPLY(X, Y) && IMPLY(Y, X))
924 
925 /*
926  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
927  * ---------------------------------------
928  */
929 
930 /*
931  * Loadable module routines
932  */
933 int
934 _init(void)
935 {
936 	return (mod_install(&modlinkage));
937 }
938 
939 int
940 _fini(void)
941 {
942 	return (mod_remove(&modlinkage));
943 }
944 
945 int
946 _info(struct modinfo *modinfop)
947 {
948 	return (mod_info(&modlinkage, modinfop));
949 }
950 
951 /*
952  * Driver Entry Points and Other routines
953  */
954 static int
955 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
956 {
957 	int i;
958 	char name[32];
959 
960 	/*
961 	 * Resume from a checkpoint state.
962 	 */
963 	if (cmd == DDI_RESUME)
964 		return (DDI_SUCCESS);
965 
966 	if (cmd != DDI_ATTACH)
967 		return (DDI_FAILURE);
968 
969 	/*
970 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
971 	 * streams message sizes can be unlimited. We use a defined constant
972 	 * instead.
973 	 */
974 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
975 
976 	/*
977 	 * Create subdevices for each transport.
978 	 */
979 	for (i = 0; i < TL_UNUSED; i++) {
980 		if (ddi_create_minor_node(devi,
981 		    tl_transports[i].tr_name,
982 		    S_IFCHR, tl_transports[i].tr_minor,
983 		    DDI_PSEUDO, NULL) == DDI_FAILURE) {
984 			ddi_remove_minor_node(devi, NULL);
985 			return (DDI_FAILURE);
986 		}
987 	}
988 
989 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
990 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
991 
992 	if (tl_cache == NULL) {
993 		ddi_remove_minor_node(devi, NULL);
994 		return (DDI_FAILURE);
995 	}
996 
997 	tl_minors = id_space_create("tl_minor_space",
998 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
999 
1000 	/*
1001 	 * Create ID space for minor numbers
1002 	 */
1003 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1004 		tl_transport_state_t *t = &tl_transports[i];
1005 
1006 		if (i == TL_UNUSED)
1007 			continue;
1008 
1009 		/* Socket COTSORD shares namespace with COTS */
1010 		if (i == TL_SOCK_COTSORD) {
1011 			t->tr_ai_hash =
1012 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1013 			ASSERT(t->tr_ai_hash != NULL);
1014 			t->tr_addr_hash =
1015 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1016 			ASSERT(t->tr_addr_hash != NULL);
1017 			continue;
1018 		}
1019 
1020 		/*
1021 		 * Create hash tables.
1022 		 */
1023 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1024 		    t->tr_name);
1025 #ifdef _ILP32
1026 		if (i & TL_SOCKET)
1027 			t->tr_ai_hash =
1028 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1029 			    mod_hash_null_valdtor);
1030 		else
1031 			t->tr_ai_hash =
1032 			    mod_hash_create_ptrhash(name, tl_hash_size,
1033 			    mod_hash_null_valdtor, sizeof (queue_t));
1034 #else
1035 		t->tr_ai_hash =
1036 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1037 		    mod_hash_null_valdtor);
1038 #endif /* _ILP32 */
1039 
1040 		if (i & TL_SOCKET) {
1041 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1042 			    t->tr_name);
1043 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1044 			    tl_hash_size, mod_hash_null_valdtor,
1045 			    sizeof (uintptr_t));
1046 		} else {
1047 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1048 			    t->tr_name);
1049 			t->tr_addr_hash = mod_hash_create_extended(name,
1050 			    tl_hash_size, mod_hash_null_keydtor,
1051 			    mod_hash_null_valdtor,
1052 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1053 		}
1054 
1055 		/* Create serializer for connectionless transports. */
1056 		if (i & TL_TICLTS)
1057 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1058 	}
1059 
1060 	tl_dip = devi;
1061 
1062 	return (DDI_SUCCESS);
1063 }
1064 
1065 static int
1066 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1067 {
1068 	int i;
1069 
1070 	if (cmd == DDI_SUSPEND)
1071 		return (DDI_SUCCESS);
1072 
1073 	if (cmd != DDI_DETACH)
1074 		return (DDI_FAILURE);
1075 
1076 	/*
1077 	 * Destroy arenas and hash tables.
1078 	 */
1079 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1080 		tl_transport_state_t *t = &tl_transports[i];
1081 
1082 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1083 			continue;
1084 
1085 		ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL));
1086 		if (t->tr_serializer != NULL) {
1087 			tl_serializer_refrele(t->tr_serializer);
1088 			t->tr_serializer = NULL;
1089 		}
1090 
1091 #ifdef _ILP32
1092 		if (i & TL_SOCKET)
1093 			mod_hash_destroy_idhash(t->tr_ai_hash);
1094 		else
1095 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1096 #else
1097 		mod_hash_destroy_idhash(t->tr_ai_hash);
1098 #endif /* _ILP32 */
1099 		t->tr_ai_hash = NULL;
1100 		if (i & TL_SOCKET)
1101 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1102 		else
1103 			mod_hash_destroy_hash(t->tr_addr_hash);
1104 		t->tr_addr_hash = NULL;
1105 	}
1106 
1107 	kmem_cache_destroy(tl_cache);
1108 	tl_cache = NULL;
1109 	id_space_destroy(tl_minors);
1110 	tl_minors = NULL;
1111 	ddi_remove_minor_node(devi, NULL);
1112 	return (DDI_SUCCESS);
1113 }
1114 
1115 /* ARGSUSED */
1116 static int
1117 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1118 {
1119 
1120 	int retcode = DDI_FAILURE;
1121 
1122 	switch (infocmd) {
1123 
1124 	case DDI_INFO_DEVT2DEVINFO:
1125 		if (tl_dip != NULL) {
1126 			*result = (void *)tl_dip;
1127 			retcode = DDI_SUCCESS;
1128 		}
1129 		break;
1130 
1131 	case DDI_INFO_DEVT2INSTANCE:
1132 		*result = (void *)0;
1133 		retcode = DDI_SUCCESS;
1134 		break;
1135 
1136 	default:
1137 		break;
1138 	}
1139 	return (retcode);
1140 }
1141 
1142 /*
1143  * Endpoint reference management.
1144  */
1145 static void
1146 tl_refhold(tl_endpt_t *tep)
1147 {
1148 	atomic_add_32(&tep->te_refcnt, 1);
1149 }
1150 
1151 static void
1152 tl_refrele(tl_endpt_t *tep)
1153 {
1154 	ASSERT(tep->te_refcnt != 0);
1155 
1156 	if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1157 		tl_free(tep);
1158 }
1159 
1160 /*ARGSUSED*/
1161 static int
1162 tl_constructor(void *buf, void *cdrarg, int kmflags)
1163 {
1164 	tl_endpt_t *tep = buf;
1165 
1166 	bzero(tep, sizeof (tl_endpt_t));
1167 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1168 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1169 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1170 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1171 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1172 
1173 	return (0);
1174 }
1175 
1176 /*ARGSUSED*/
1177 static void
1178 tl_destructor(void *buf, void *cdrarg)
1179 {
1180 	tl_endpt_t *tep = buf;
1181 
1182 	mutex_destroy(&tep->te_closelock);
1183 	cv_destroy(&tep->te_closecv);
1184 	mutex_destroy(&tep->te_srv_lock);
1185 	cv_destroy(&tep->te_srv_cv);
1186 	mutex_destroy(&tep->te_ser_lock);
1187 }
1188 
1189 static void
1190 tl_free(tl_endpt_t *tep)
1191 {
1192 	ASSERT(tep->te_refcnt == 0);
1193 	ASSERT(tep->te_transport != NULL);
1194 	ASSERT(tep->te_rq == NULL);
1195 	ASSERT(tep->te_wq == NULL);
1196 	ASSERT(tep->te_ser != NULL);
1197 	ASSERT(tep->te_ser_count == 0);
1198 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1199 
1200 	if (IS_SOCKET(tep)) {
1201 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1202 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1203 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1204 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1205 	} else if (tep->te_abuf != NULL) {
1206 		kmem_free(tep->te_abuf, tep->te_alen);
1207 		tep->te_alen = -1; /* uninitialized */
1208 		tep->te_abuf = NULL;
1209 	} else {
1210 		ASSERT(tep->te_alen == -1);
1211 	}
1212 
1213 	id_free(tl_minors, tep->te_minor);
1214 	ASSERT(tep->te_credp == NULL);
1215 
1216 	if (tep->te_hash_hndl != NULL)
1217 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1218 
1219 	if (IS_COTS(tep)) {
1220 		TL_REMOVE_PEER(tep->te_conp);
1221 		TL_REMOVE_PEER(tep->te_oconp);
1222 		tl_serializer_refrele(tep->te_ser);
1223 		tep->te_ser = NULL;
1224 		ASSERT(tep->te_nicon == 0);
1225 		ASSERT(list_head(&tep->te_iconp) == NULL);
1226 	} else {
1227 		ASSERT(tep->te_lastep == NULL);
1228 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1229 		ASSERT(tep->te_flowq == NULL);
1230 	}
1231 
1232 	ASSERT(tep->te_bufcid == 0);
1233 	ASSERT(tep->te_timoutid == 0);
1234 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1235 	tep->te_acceptor_id = 0;
1236 
1237 	ASSERT(tep->te_closewait == 0);
1238 	ASSERT(!tep->te_rsrv_active);
1239 	ASSERT(!tep->te_wsrv_active);
1240 	tep->te_closing = 0;
1241 	tep->te_nowsrv = B_FALSE;
1242 	tep->te_flag = 0;
1243 
1244 	kmem_cache_free(tl_cache, tep);
1245 }
1246 
1247 /*
1248  * Allocate/free reference-counted wrappers for serializers.
1249  */
1250 static tl_serializer_t *
1251 tl_serializer_alloc(int flags)
1252 {
1253 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1254 	serializer_t *ser;
1255 
1256 	if (s == NULL)
1257 		return (NULL);
1258 
1259 	ser = serializer_create(flags);
1260 
1261 	if (ser == NULL) {
1262 		kmem_free(s, sizeof (tl_serializer_t));
1263 		return (NULL);
1264 	}
1265 
1266 	s->ts_refcnt = 1;
1267 	s->ts_serializer = ser;
1268 	return (s);
1269 }
1270 
1271 static void
1272 tl_serializer_refhold(tl_serializer_t *s)
1273 {
1274 	atomic_add_32(&s->ts_refcnt, 1);
1275 }
1276 
1277 static void
1278 tl_serializer_refrele(tl_serializer_t *s)
1279 {
1280 	if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1281 		serializer_destroy(s->ts_serializer);
1282 		kmem_free(s, sizeof (tl_serializer_t));
1283 	}
1284 }
1285 
1286 /*
1287  * Post a request on the endpoint serializer. For COTS transports keep track of
1288  * the number of pending requests.
1289  */
1290 static void
1291 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1292 {
1293 	if (IS_COTS(tep)) {
1294 		mutex_enter(&tep->te_ser_lock);
1295 		tep->te_ser_count++;
1296 		mutex_exit(&tep->te_ser_lock);
1297 	}
1298 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1299 }
1300 
1301 /*
1302  * Complete processing the request on the serializer. Decrement the counter for
1303  * pending requests for COTS transports.
1304  */
1305 static void
1306 tl_serializer_exit(tl_endpt_t *tep)
1307 {
1308 	if (IS_COTS(tep)) {
1309 		mutex_enter(&tep->te_ser_lock);
1310 		ASSERT(tep->te_ser_count != 0);
1311 		tep->te_ser_count--;
1312 		mutex_exit(&tep->te_ser_lock);
1313 	}
1314 }
1315 
1316 /*
1317  * Hash management functions.
1318  */
1319 
1320 /*
1321  * Return TRUE if two addresses are equal, false otherwise.
1322  */
1323 static boolean_t
1324 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1325 {
1326 	return ((ap1->ta_alen > 0) &&
1327 	    (ap1->ta_alen == ap2->ta_alen) &&
1328 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1329 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1330 }
1331 
1332 /*
1333  * This function is called whenever an endpoint is found in the hash table.
1334  */
1335 /* ARGSUSED0 */
1336 static void
1337 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1338 {
1339 	tl_refhold((tl_endpt_t *)val);
1340 }
1341 
1342 /*
1343  * Address hash function.
1344  */
1345 /* ARGSUSED */
1346 static uint_t
1347 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1348 {
1349 	tl_addr_t *ap = (tl_addr_t *)key;
1350 	size_t	len = ap->ta_alen;
1351 	uchar_t *p = ap->ta_abuf;
1352 	uint_t i, g;
1353 
1354 	ASSERT((len > 0) && (p != NULL));
1355 
1356 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1357 		i = (i << 4) + (*p);
1358 		if ((g = (i & 0xf0000000U)) != 0) {
1359 			i ^= (g >> 24);
1360 			i ^= g;
1361 		}
1362 	}
1363 	return (i);
1364 }
1365 
1366 /*
1367  * This function is used by hash lookups. It compares two generic addresses.
1368  */
1369 static int
1370 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1371 {
1372 #ifdef 	DEBUG
1373 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1374 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1375 
1376 	ASSERT(key1 != NULL);
1377 	ASSERT(key2 != NULL);
1378 
1379 	ASSERT(ap1->ta_abuf != NULL);
1380 	ASSERT(ap2->ta_abuf != NULL);
1381 	ASSERT(ap1->ta_alen > 0);
1382 	ASSERT(ap2->ta_alen > 0);
1383 #endif
1384 
1385 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1386 }
1387 
1388 /*
1389  * Prevent endpoint from closing if possible.
1390  * Return B_TRUE on success, B_FALSE on failure.
1391  */
1392 static boolean_t
1393 tl_noclose(tl_endpt_t *tep)
1394 {
1395 	boolean_t rc = B_FALSE;
1396 
1397 	mutex_enter(&tep->te_closelock);
1398 	if (! tep->te_closing) {
1399 		ASSERT(tep->te_closewait == 0);
1400 		tep->te_closewait++;
1401 		rc = B_TRUE;
1402 	}
1403 	mutex_exit(&tep->te_closelock);
1404 	return (rc);
1405 }
1406 
1407 /*
1408  * Allow endpoint to close if needed.
1409  */
1410 static void
1411 tl_closeok(tl_endpt_t *tep)
1412 {
1413 	ASSERT(tep->te_closewait > 0);
1414 	mutex_enter(&tep->te_closelock);
1415 	ASSERT(tep->te_closewait == 1);
1416 	tep->te_closewait--;
1417 	cv_signal(&tep->te_closecv);
1418 	mutex_exit(&tep->te_closelock);
1419 }
1420 
1421 /*
1422  * STREAMS open entry point.
1423  */
1424 /* ARGSUSED */
1425 static int
1426 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1427 {
1428 	tl_endpt_t *tep;
1429 	minor_t	    minor = getminor(*devp);
1430 
1431 	/*
1432 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1433 	 * are illegal
1434 	 */
1435 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1436 		return (ENXIO);
1437 
1438 	if (rq->q_ptr != NULL)
1439 		return (0);
1440 
1441 	/* Minor number should specify the mode used for the driver. */
1442 	if ((minor >= TL_UNUSED))
1443 		return (ENXIO);
1444 
1445 	if (oflag & SO_SOCKSTR) {
1446 		minor |= TL_SOCKET;
1447 	}
1448 
1449 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1450 	tep->te_refcnt = 1;
1451 	tep->te_cpid = curproc->p_pid;
1452 	rq->q_ptr = WR(rq)->q_ptr = tep;
1453 	tep->te_state = TS_UNBND;
1454 	tep->te_credp = credp;
1455 	crhold(credp);
1456 	tep->te_zoneid = getzoneid();
1457 
1458 	tep->te_flag = minor & TL_MINOR_MASK;
1459 	tep->te_transport = &tl_transports[minor];
1460 
1461 	/* Allocate a unique minor number for this instance. */
1462 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1463 
1464 	/* Reserve hash handle for bind(). */
1465 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1466 
1467 	/* Transport-specific initialization */
1468 	if (IS_COTS(tep)) {
1469 		/* Use private serializer */
1470 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1471 
1472 		/* Create list for pending connections */
1473 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1474 		    offsetof(tl_icon_t, ti_node));
1475 		tep->te_qlen = 0;
1476 		tep->te_nicon = 0;
1477 		tep->te_oconp = NULL;
1478 		tep->te_conp = NULL;
1479 	} else {
1480 		/* Use shared serializer */
1481 		tep->te_ser = tep->te_transport->tr_serializer;
1482 		bzero(&tep->te_flows, sizeof (list_node_t));
1483 		/* Create list for flow control */
1484 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1485 		    offsetof(tl_endpt_t, te_flows));
1486 		tep->te_flowq = NULL;
1487 		tep->te_lastep = NULL;
1488 
1489 	}
1490 
1491 	/* Initialize endpoint address */
1492 	if (IS_SOCKET(tep)) {
1493 		/* Socket-specific address handling. */
1494 		tep->te_alen = TL_SOUX_ADDRLEN;
1495 		tep->te_abuf = &tep->te_uxaddr;
1496 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1497 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1498 	} else {
1499 		tep->te_alen = -1;
1500 		tep->te_abuf = NULL;
1501 	}
1502 
1503 	/* clone the driver */
1504 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1505 
1506 	tep->te_rq = rq;
1507 	tep->te_wq = WR(rq);
1508 
1509 #ifdef	_ILP32
1510 	if (IS_SOCKET(tep))
1511 		tep->te_acceptor_id = tep->te_minor;
1512 	else
1513 		tep->te_acceptor_id = (t_uscalar_t)rq;
1514 #else
1515 	tep->te_acceptor_id = tep->te_minor;
1516 #endif	/* _ILP32 */
1517 
1518 
1519 	qprocson(rq);
1520 
1521 	/*
1522 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1523 	 * insertion so insertion can't fail.
1524 	 */
1525 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1526 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1527 	    (mod_hash_val_t)tep);
1528 
1529 	return (0);
1530 }
1531 
1532 /* ARGSUSED1 */
1533 static int
1534 tl_close(queue_t *rq, int flag,	cred_t *credp)
1535 {
1536 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1537 	tl_endpt_t *elp = NULL;
1538 	queue_t *wq = tep->te_wq;
1539 	int rc;
1540 
1541 	ASSERT(wq == WR(rq));
1542 
1543 	/*
1544 	 * Remove the endpoint from acceptor hash.
1545 	 */
1546 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1547 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1548 	    (mod_hash_val_t *)&elp);
1549 	ASSERT(rc == 0 && tep == elp);
1550 	if ((rc != 0) || (tep != elp)) {
1551 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1552 		    SL_TRACE|SL_ERROR,
1553 		    "tl_close:inconsistency in AI hash"));
1554 	}
1555 
1556 	/*
1557 	 * Wait till close is safe, then mark endpoint as closing.
1558 	 */
1559 	mutex_enter(&tep->te_closelock);
1560 	while (tep->te_closewait)
1561 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1562 	tep->te_closing = B_TRUE;
1563 	/*
1564 	 * Will wait for the serializer part of the close to finish, so set
1565 	 * te_closewait now.
1566 	 */
1567 	tep->te_closewait = 1;
1568 	tep->te_nowsrv = B_FALSE;
1569 	mutex_exit(&tep->te_closelock);
1570 
1571 	/*
1572 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1573 	 * It is safe because close will wait for tl_close_ser to finish.
1574 	 */
1575 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1576 
1577 	/*
1578 	 * Wait for the first phase of close to complete before qprocsoff().
1579 	 */
1580 	mutex_enter(&tep->te_closelock);
1581 	while (tep->te_closewait)
1582 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1583 	mutex_exit(&tep->te_closelock);
1584 
1585 	qprocsoff(rq);
1586 
1587 	if (tep->te_bufcid) {
1588 		qunbufcall(rq, tep->te_bufcid);
1589 		tep->te_bufcid = 0;
1590 	}
1591 	if (tep->te_timoutid) {
1592 		(void) quntimeout(rq, tep->te_timoutid);
1593 		tep->te_timoutid = 0;
1594 	}
1595 
1596 	/*
1597 	 * Finish close behind serializer.
1598 	 *
1599 	 * For a CLTS endpoint increase a refcount and continue close processing
1600 	 * with serializer protection. This processing may happen asynchronously
1601 	 * with the completion of tl_close().
1602 	 *
1603 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1604 	 * may go away together with tep and we need to destroy serializer
1605 	 * outside of serializer context.
1606 	 */
1607 	ASSERT(tep->te_closewait == 0);
1608 	if (IS_COTS(tep))
1609 		tep->te_closewait = 1;
1610 	else
1611 		tl_refhold(tep);
1612 
1613 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1614 
1615 	/*
1616 	 * For connection-oriented transports wait for all serializer activity
1617 	 * to settle down.
1618 	 */
1619 	if (IS_COTS(tep)) {
1620 		mutex_enter(&tep->te_closelock);
1621 		while (tep->te_closewait)
1622 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1623 		mutex_exit(&tep->te_closelock);
1624 	}
1625 
1626 	crfree(tep->te_credp);
1627 	tep->te_credp = NULL;
1628 	tep->te_wq = NULL;
1629 	tl_refrele(tep);
1630 	/*
1631 	 * tep is likely to be destroyed now, so can't reference it any more.
1632 	 */
1633 
1634 	rq->q_ptr = wq->q_ptr = NULL;
1635 	return (0);
1636 }
1637 
1638 /*
1639  * First phase of close processing done behind the serializer.
1640  *
1641  * Do not drop the reference in the end - tl_close() wants this reference to
1642  * stay.
1643  */
1644 /* ARGSUSED0 */
1645 static void
1646 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1647 {
1648 	ASSERT(tep->te_closing);
1649 	ASSERT(tep->te_closewait == 1);
1650 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1651 
1652 	tep->te_flag |= TL_CLOSE_SER;
1653 
1654 	/*
1655 	 * Drain out all messages on queue except for TL_TICOTS where the
1656 	 * abortive release semantics permit discarding of data on close
1657 	 */
1658 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1659 		tl_wsrv_ser(NULL, tep);
1660 	}
1661 
1662 	/* Remove address from hash table. */
1663 	tl_addr_unbind(tep);
1664 	/*
1665 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1666 	 * queue of the driver, so clear these before qprocsoff() is called.
1667 	 * Also clear q_next for the peer since this queue is going away.
1668 	 */
1669 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1670 		tl_endpt_t *peer_tep = tep->te_conp;
1671 
1672 		tep->te_wq->q_next = NULL;
1673 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1674 			peer_tep->te_wq->q_next = NULL;
1675 	}
1676 
1677 	tep->te_rq = NULL;
1678 
1679 	/* wake up tl_close() */
1680 	tl_closeok(tep);
1681 	tl_serializer_exit(tep);
1682 }
1683 
1684 /*
1685  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1686  * the reference for CLTS.
1687  *
1688  * Called from serializer. Should drop reference count for CLTS only.
1689  */
1690 /* ARGSUSED0 */
1691 static void
1692 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1693 {
1694 	ASSERT(tep->te_closing);
1695 	ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0));
1696 	ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1));
1697 
1698 	tep->te_state = -1;	/* Uninitialized */
1699 	if (IS_COTS(tep)) {
1700 		tl_co_unconnect(tep);
1701 	} else {
1702 		/* Connectionless specific cleanup */
1703 		TL_REMOVE_PEER(tep->te_lastep);
1704 		/*
1705 		 * Backenable anybody that is flow controlled waiting for
1706 		 * this endpoint.
1707 		 */
1708 		tl_cl_backenable(tep);
1709 		if (tep->te_flowq != NULL) {
1710 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1711 			tep->te_flowq = NULL;
1712 		}
1713 	}
1714 
1715 	tl_serializer_exit(tep);
1716 	if (IS_COTS(tep))
1717 		tl_closeok(tep);
1718 	else
1719 		tl_refrele(tep);
1720 }
1721 
1722 /*
1723  * STREAMS write-side put procedure.
1724  * Enter serializer for most of the processing.
1725  *
1726  * The T_CONN_REQ is processed outside of serializer.
1727  */
1728 static void
1729 tl_wput(queue_t *wq, mblk_t *mp)
1730 {
1731 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1732 	ssize_t			msz = MBLKL(mp);
1733 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1734 	tlproc_t		*tl_proc = NULL;
1735 
1736 	switch (DB_TYPE(mp)) {
1737 	case M_DATA:
1738 		/* Only valid for connection-oriented transports */
1739 		if (IS_CLTS(tep)) {
1740 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1741 			    SL_TRACE|SL_ERROR,
1742 			    "tl_wput:M_DATA invalid for ticlts driver"));
1743 			tl_merror(wq, mp, EPROTO);
1744 			return;
1745 		}
1746 		tl_proc = tl_wput_data_ser;
1747 		break;
1748 
1749 	case M_IOCTL:
1750 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1751 		case TL_IOC_CREDOPT:
1752 			/* FALLTHROUGH */
1753 		case TL_IOC_UCREDOPT:
1754 			/*
1755 			 * Serialize endpoint state change.
1756 			 */
1757 			tl_proc = tl_do_ioctl_ser;
1758 			break;
1759 
1760 		default:
1761 			miocnak(wq, mp, 0, EINVAL);
1762 			return;
1763 		}
1764 		break;
1765 
1766 	case M_FLUSH:
1767 		/*
1768 		 * do canonical M_FLUSH processing
1769 		 */
1770 		if (*mp->b_rptr & FLUSHW) {
1771 			flushq(wq, FLUSHALL);
1772 			*mp->b_rptr &= ~FLUSHW;
1773 		}
1774 		if (*mp->b_rptr & FLUSHR) {
1775 			flushq(RD(wq), FLUSHALL);
1776 			qreply(wq, mp);
1777 		} else {
1778 			freemsg(mp);
1779 		}
1780 		return;
1781 
1782 	case M_PROTO:
1783 		if (msz < sizeof (prim->type)) {
1784 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1785 			    SL_TRACE|SL_ERROR,
1786 			    "tl_wput:M_PROTO data too short"));
1787 			tl_merror(wq, mp, EPROTO);
1788 			return;
1789 		}
1790 		switch (prim->type) {
1791 		case T_OPTMGMT_REQ:
1792 		case T_SVR4_OPTMGMT_REQ:
1793 			/*
1794 			 * Process TPI option management requests immediately
1795 			 * in put procedure regardless of in-order processing
1796 			 * of already queued messages.
1797 			 * (Note: This driver supports AF_UNIX socket
1798 			 * implementation.  Unless we implement this processing,
1799 			 * setsockopt() on socket endpoint will block on flow
1800 			 * controlled endpoints which it should not. That is
1801 			 * required for successful execution of VSU socket tests
1802 			 * and is consistent with BSD socket behavior).
1803 			 */
1804 			tl_optmgmt(wq, mp);
1805 			return;
1806 		case O_T_BIND_REQ:
1807 		case T_BIND_REQ:
1808 			tl_proc = tl_bind_ser;
1809 			break;
1810 		case T_CONN_REQ:
1811 			if (IS_CLTS(tep)) {
1812 				tl_merror(wq, mp, EPROTO);
1813 				return;
1814 			}
1815 			tl_conn_req(wq, mp);
1816 			return;
1817 		case T_DATA_REQ:
1818 		case T_OPTDATA_REQ:
1819 		case T_EXDATA_REQ:
1820 		case T_ORDREL_REQ:
1821 			tl_proc = tl_putq_ser;
1822 			break;
1823 		case T_UNITDATA_REQ:
1824 			if (IS_COTS(tep) ||
1825 			    (msz < sizeof (struct T_unitdata_req))) {
1826 				tl_merror(wq, mp, EPROTO);
1827 				return;
1828 			}
1829 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1830 				tl_proc = tl_unitdata_ser;
1831 			} else {
1832 				tl_proc = tl_putq_ser;
1833 			}
1834 			break;
1835 		default:
1836 			/*
1837 			 * process in service procedure if message already
1838 			 * queued (maintain in-order processing)
1839 			 */
1840 			if (wq->q_first != NULL) {
1841 				tl_proc = tl_putq_ser;
1842 			} else {
1843 				tl_proc = tl_wput_ser;
1844 			}
1845 			break;
1846 		}
1847 		break;
1848 
1849 	case M_PCPROTO:
1850 		/*
1851 		 * Check that the message has enough data to figure out TPI
1852 		 * primitive.
1853 		 */
1854 		if (msz < sizeof (prim->type)) {
1855 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1856 			    SL_TRACE|SL_ERROR,
1857 			    "tl_wput:M_PCROTO data too short"));
1858 			tl_merror(wq, mp, EPROTO);
1859 			return;
1860 		}
1861 		switch (prim->type) {
1862 		case T_CAPABILITY_REQ:
1863 			tl_capability_req(mp, tep);
1864 			return;
1865 		case T_INFO_REQ:
1866 			tl_proc = tl_info_req_ser;
1867 			break;
1868 		default:
1869 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1870 			    SL_TRACE|SL_ERROR,
1871 			    "tl_wput:unknown TPI msg primitive"));
1872 			tl_merror(wq, mp, EPROTO);
1873 			return;
1874 		}
1875 		break;
1876 	default:
1877 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1878 		    "tl_wput:default:unexpected Streams message"));
1879 		freemsg(mp);
1880 		return;
1881 	}
1882 
1883 	/*
1884 	 * Continue processing via serializer.
1885 	 */
1886 	ASSERT(tl_proc != NULL);
1887 	tl_refhold(tep);
1888 	tl_serializer_enter(tep, tl_proc, mp);
1889 }
1890 
1891 /*
1892  * Place message on the queue while preserving order.
1893  */
1894 static void
1895 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1896 {
1897 	if (tep->te_closing) {
1898 		tl_wput_ser(mp, tep);
1899 	} else {
1900 		TL_PUTQ(tep, mp);
1901 		tl_serializer_exit(tep);
1902 		tl_refrele(tep);
1903 	}
1904 
1905 }
1906 
1907 static void
1908 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1909 {
1910 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1911 
1912 	switch (DB_TYPE(mp)) {
1913 	case M_DATA:
1914 		tl_data(mp, tep);
1915 		break;
1916 	case M_PROTO:
1917 		tl_do_proto(mp, tep);
1918 		break;
1919 	default:
1920 		freemsg(mp);
1921 		break;
1922 	}
1923 }
1924 
1925 /*
1926  * Write side put procedure called from serializer.
1927  */
1928 static void
1929 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1930 {
1931 	tl_wput_common_ser(mp, tep);
1932 	tl_serializer_exit(tep);
1933 	tl_refrele(tep);
1934 }
1935 
1936 /*
1937  * M_DATA processing. Called from serializer.
1938  */
1939 static void
1940 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1941 {
1942 	tl_endpt_t	*peer_tep = tep->te_conp;
1943 	queue_t		*peer_rq;
1944 
1945 	ASSERT(DB_TYPE(mp) == M_DATA);
1946 	ASSERT(IS_COTS(tep));
1947 
1948 	ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer));
1949 
1950 	/*
1951 	 * fastpath for data. Ignore flow control if tep is closing.
1952 	 */
1953 	if ((peer_tep != NULL) &&
1954 	    !peer_tep->te_closing &&
1955 	    ((tep->te_state == TS_DATA_XFER) ||
1956 	    (tep->te_state == TS_WREQ_ORDREL)) &&
1957 	    (tep->te_wq != NULL) &&
1958 	    (tep->te_wq->q_first == NULL) &&
1959 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1960 	    (peer_tep->te_state == TS_WREQ_ORDREL))	&&
1961 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1962 	    (canputnext(peer_rq) || tep->te_closing)) {
1963 		putnext(peer_rq, mp);
1964 	} else if (tep->te_closing) {
1965 		/*
1966 		 * It is possible that by the time we got here tep started to
1967 		 * close. If the write queue is not empty, and the state is
1968 		 * TS_DATA_XFER the data should be delivered in order, so we
1969 		 * call putq() instead of freeing the data.
1970 		 */
1971 		if ((tep->te_wq != NULL) &&
1972 		    ((tep->te_state == TS_DATA_XFER) ||
1973 		    (tep->te_state == TS_WREQ_ORDREL))) {
1974 			TL_PUTQ(tep, mp);
1975 		} else {
1976 			freemsg(mp);
1977 		}
1978 	} else {
1979 		TL_PUTQ(tep, mp);
1980 	}
1981 
1982 	tl_serializer_exit(tep);
1983 	tl_refrele(tep);
1984 }
1985 
1986 /*
1987  * Write side service routine.
1988  *
1989  * All actual processing happens within serializer which is entered
1990  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1991  * messages that need processing may have arrived, so tl_wsrv repeats until
1992  * queue is empty or te_nowsrv is set.
1993  */
1994 static void
1995 tl_wsrv(queue_t *wq)
1996 {
1997 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1998 
1999 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
2000 		mutex_enter(&tep->te_srv_lock);
2001 		ASSERT(tep->te_wsrv_active == B_FALSE);
2002 		tep->te_wsrv_active = B_TRUE;
2003 		mutex_exit(&tep->te_srv_lock);
2004 
2005 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2006 
2007 		/*
2008 		 * Wait for serializer job to complete.
2009 		 */
2010 		mutex_enter(&tep->te_srv_lock);
2011 		while (tep->te_wsrv_active) {
2012 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2013 		}
2014 		cv_signal(&tep->te_srv_cv);
2015 		mutex_exit(&tep->te_srv_lock);
2016 	}
2017 }
2018 
2019 /*
2020  * Serialized write side processing of the STREAMS queue.
2021  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2022  * is NULL.
2023  */
2024 static void
2025 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2026 {
2027 	mblk_t *mp;
2028 	queue_t *wq = tep->te_wq;
2029 
2030 	ASSERT(wq != NULL);
2031 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2032 		tl_wput_common_ser(mp, tep);
2033 	}
2034 
2035 	/*
2036 	 * Wakeup service routine unless called from close.
2037 	 * If ser_mp is specified, the caller is tl_wsrv().
2038 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2039 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2040 	 * be no matching tl_serializer_exit() in this case.
2041 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2042 	 * waiting on te_srv_cv.
2043 	 */
2044 	if (ser_mp != NULL) {
2045 		/*
2046 		 * We are called from tl_wsrv.
2047 		 */
2048 		mutex_enter(&tep->te_srv_lock);
2049 		ASSERT(tep->te_wsrv_active);
2050 		tep->te_wsrv_active = B_FALSE;
2051 		cv_signal(&tep->te_srv_cv);
2052 		mutex_exit(&tep->te_srv_lock);
2053 		tl_serializer_exit(tep);
2054 	}
2055 }
2056 
2057 /*
2058  * Called when the stream is backenabled. Enter serializer and qenable everyone
2059  * flow controlled by tep.
2060  *
2061  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2062  * is possible that two instances of tl_rsrv will be running reusing the same
2063  * rsrv mblk.
2064  */
2065 static void
2066 tl_rsrv(queue_t *rq)
2067 {
2068 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2069 
2070 	ASSERT(rq->q_first == NULL);
2071 	ASSERT(tep->te_rsrv_active == 0);
2072 
2073 	tep->te_rsrv_active = B_TRUE;
2074 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2075 	/*
2076 	 * Wait for serializer job to complete.
2077 	 */
2078 	mutex_enter(&tep->te_srv_lock);
2079 	while (tep->te_rsrv_active) {
2080 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2081 	}
2082 	cv_signal(&tep->te_srv_cv);
2083 	mutex_exit(&tep->te_srv_lock);
2084 }
2085 
2086 /* ARGSUSED */
2087 static void
2088 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2089 {
2090 	tl_endpt_t *peer_tep;
2091 
2092 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2093 		tl_cl_backenable(tep);
2094 	} else if (
2095 	    IS_COTS(tep) &&
2096 	    ((peer_tep = tep->te_conp) != NULL) &&
2097 	    !peer_tep->te_closing &&
2098 	    ((tep->te_state == TS_DATA_XFER) ||
2099 	    (tep->te_state == TS_WIND_ORDREL)||
2100 	    (tep->te_state == TS_WREQ_ORDREL))) {
2101 		TL_QENABLE(peer_tep);
2102 	}
2103 
2104 	/*
2105 	 * Wakeup read side service routine.
2106 	 */
2107 	mutex_enter(&tep->te_srv_lock);
2108 	ASSERT(tep->te_rsrv_active);
2109 	tep->te_rsrv_active = B_FALSE;
2110 	cv_signal(&tep->te_srv_cv);
2111 	mutex_exit(&tep->te_srv_lock);
2112 	tl_serializer_exit(tep);
2113 }
2114 
2115 /*
2116  * process M_PROTO messages. Always called from serializer.
2117  */
2118 static void
2119 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2120 {
2121 	ssize_t			msz = MBLKL(mp);
2122 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2123 
2124 	/* Message size was validated by tl_wput(). */
2125 	ASSERT(msz >= sizeof (prim->type));
2126 
2127 	switch (prim->type) {
2128 	case T_UNBIND_REQ:
2129 		tl_unbind(mp, tep);
2130 		break;
2131 
2132 	case T_ADDR_REQ:
2133 		tl_addr_req(mp, tep);
2134 		break;
2135 
2136 	case O_T_CONN_RES:
2137 	case T_CONN_RES:
2138 		if (IS_CLTS(tep)) {
2139 			tl_merror(tep->te_wq, mp, EPROTO);
2140 			break;
2141 		}
2142 		tl_conn_res(mp, tep);
2143 		break;
2144 
2145 	case T_DISCON_REQ:
2146 		if (IS_CLTS(tep)) {
2147 			tl_merror(tep->te_wq, mp, EPROTO);
2148 			break;
2149 		}
2150 		tl_discon_req(mp, tep);
2151 		break;
2152 
2153 	case T_DATA_REQ:
2154 		if (IS_CLTS(tep)) {
2155 			tl_merror(tep->te_wq, mp, EPROTO);
2156 			break;
2157 		}
2158 		tl_data(mp, tep);
2159 		break;
2160 
2161 	case T_OPTDATA_REQ:
2162 		if (IS_CLTS(tep)) {
2163 			tl_merror(tep->te_wq, mp, EPROTO);
2164 			break;
2165 		}
2166 		tl_data(mp, tep);
2167 		break;
2168 
2169 	case T_EXDATA_REQ:
2170 		if (IS_CLTS(tep)) {
2171 			tl_merror(tep->te_wq, mp, EPROTO);
2172 			break;
2173 		}
2174 		tl_exdata(mp, tep);
2175 		break;
2176 
2177 	case T_ORDREL_REQ:
2178 		if (! IS_COTSORD(tep)) {
2179 			tl_merror(tep->te_wq, mp, EPROTO);
2180 			break;
2181 		}
2182 		tl_ordrel(mp, tep);
2183 		break;
2184 
2185 	case T_UNITDATA_REQ:
2186 		if (IS_COTS(tep)) {
2187 			tl_merror(tep->te_wq, mp, EPROTO);
2188 			break;
2189 		}
2190 		tl_unitdata(mp, tep);
2191 		break;
2192 
2193 	default:
2194 		tl_merror(tep->te_wq, mp, EPROTO);
2195 		break;
2196 	}
2197 }
2198 
2199 /*
2200  * Process ioctl from serializer.
2201  * This is a wrapper around tl_do_ioctl().
2202  */
2203 static void
2204 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2205 {
2206 	if (! tep->te_closing)
2207 		tl_do_ioctl(mp, tep);
2208 	else
2209 		freemsg(mp);
2210 
2211 	tl_serializer_exit(tep);
2212 	tl_refrele(tep);
2213 }
2214 
2215 static void
2216 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2217 {
2218 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2219 	int cmd = iocbp->ioc_cmd;
2220 	queue_t *wq = tep->te_wq;
2221 	int error;
2222 	int thisopt, otheropt;
2223 
2224 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2225 
2226 	switch (cmd) {
2227 	case TL_IOC_CREDOPT:
2228 		if (cmd == TL_IOC_CREDOPT) {
2229 			thisopt = TL_SETCRED;
2230 			otheropt = TL_SETUCRED;
2231 		} else {
2232 			/* FALLTHROUGH */
2233 	case TL_IOC_UCREDOPT:
2234 			thisopt = TL_SETUCRED;
2235 			otheropt = TL_SETCRED;
2236 		}
2237 		/*
2238 		 * The credentials passing does not apply to sockets.
2239 		 * Only one of the cred options can be set at a given time.
2240 		 */
2241 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2242 			miocnak(wq, mp, 0, EINVAL);
2243 			return;
2244 		}
2245 
2246 		/*
2247 		 * Turn on generation of credential options for
2248 		 * T_conn_req, T_conn_con, T_unidata_ind.
2249 		 */
2250 		error = miocpullup(mp, sizeof (uint32_t));
2251 		if (error != 0) {
2252 			miocnak(wq, mp, 0, error);
2253 			return;
2254 		}
2255 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2256 			miocnak(wq, mp, 0, EINVAL);
2257 			return;
2258 		}
2259 
2260 		if (*(uint32_t *)mp->b_cont->b_rptr)
2261 			tep->te_flag |= thisopt;
2262 		else
2263 			tep->te_flag &= ~thisopt;
2264 
2265 		miocack(wq, mp, 0, 0);
2266 		break;
2267 
2268 	default:
2269 		/* Should not be here */
2270 		miocnak(wq, mp, 0, EINVAL);
2271 		break;
2272 	}
2273 }
2274 
2275 
2276 /*
2277  * send T_ERROR_ACK
2278  * Note: assumes enough memory or caller passed big enough mp
2279  *	- no recovery from allocb failures
2280  */
2281 
2282 static void
2283 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2284     t_scalar_t unix_err, t_scalar_t type)
2285 {
2286 	struct T_error_ack *err_ack;
2287 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2288 	    M_PCPROTO, T_ERROR_ACK);
2289 
2290 	if (ackmp == NULL) {
2291 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2292 		    "tl_error_ack:out of mblk memory"));
2293 		tl_merror(wq, NULL, ENOSR);
2294 		return;
2295 	}
2296 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2297 	err_ack->ERROR_prim = type;
2298 	err_ack->TLI_error = tli_err;
2299 	err_ack->UNIX_error = unix_err;
2300 
2301 	/*
2302 	 * send error ack message
2303 	 */
2304 	qreply(wq, ackmp);
2305 }
2306 
2307 
2308 
2309 /*
2310  * send T_OK_ACK
2311  * Note: assumes enough memory or caller passed big enough mp
2312  *	- no recovery from allocb failures
2313  */
2314 static void
2315 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2316 {
2317 	struct T_ok_ack *ok_ack;
2318 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2319 	    M_PCPROTO, T_OK_ACK);
2320 
2321 	if (ackmp == NULL) {
2322 		tl_merror(wq, NULL, ENOMEM);
2323 		return;
2324 	}
2325 
2326 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2327 	ok_ack->CORRECT_prim = type;
2328 
2329 	(void) qreply(wq, ackmp);
2330 }
2331 
2332 /*
2333  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2334  * This is a wrapper around tl_bind().
2335  */
2336 static void
2337 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2338 {
2339 	if (! tep->te_closing)
2340 		tl_bind(mp, tep);
2341 	else
2342 		freemsg(mp);
2343 
2344 	tl_serializer_exit(tep);
2345 	tl_refrele(tep);
2346 }
2347 
2348 /*
2349  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2350  * Assumes that the endpoint is in the unbound.
2351  */
2352 static void
2353 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2354 {
2355 	queue_t			*wq = tep->te_wq;
2356 	struct T_bind_ack	*b_ack;
2357 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2358 	mblk_t			*ackmp, *bamp;
2359 	soux_addr_t		ux_addr;
2360 	t_uscalar_t		qlen = 0;
2361 	t_scalar_t		alen, aoff;
2362 	tl_addr_t		addr_req;
2363 	void			*addr_startp;
2364 	ssize_t			msz = MBLKL(mp), basize;
2365 	t_scalar_t		tli_err = 0, unix_err = 0;
2366 	t_scalar_t		save_prim_type = bind->PRIM_type;
2367 	t_scalar_t		save_state = tep->te_state;
2368 
2369 	if (tep->te_state != TS_UNBND) {
2370 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2371 		    SL_TRACE|SL_ERROR,
2372 		    "tl_wput:bind_request:out of state, state=%d",
2373 		    tep->te_state));
2374 		tli_err = TOUTSTATE;
2375 		goto error;
2376 	}
2377 
2378 	if (msz < sizeof (struct T_bind_req)) {
2379 		tli_err = TSYSERR; unix_err = EINVAL;
2380 		goto error;
2381 	}
2382 
2383 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2384 
2385 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2386 	    (bind->PRIM_type == T_BIND_REQ));
2387 
2388 	alen = bind->ADDR_length;
2389 	aoff = bind->ADDR_offset;
2390 
2391 	/* negotiate max conn req pending */
2392 	if (IS_COTS(tep)) {
2393 		qlen = bind->CONIND_number;
2394 		if (qlen > tl_maxqlen)
2395 			qlen = tl_maxqlen;
2396 	}
2397 
2398 	/*
2399 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2400 	 * and bound again.
2401 	 */
2402 	if ((tep->te_hash_hndl == NULL) &&
2403 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2404 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2405 	    &tep->te_hash_hndl) != 0) {
2406 		tli_err = TSYSERR; unix_err = ENOSR;
2407 		goto error;
2408 	}
2409 
2410 	/*
2411 	 * Verify address correctness.
2412 	 */
2413 	if (IS_SOCKET(tep)) {
2414 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2415 
2416 		if ((alen != TL_SOUX_ADDRLEN) ||
2417 		    (aoff < 0) ||
2418 		    (aoff + alen > msz)) {
2419 			(void) (STRLOG(TL_ID, tep->te_minor,
2420 			    1, SL_TRACE|SL_ERROR,
2421 			    "tl_bind: invalid socket addr"));
2422 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2423 			tli_err = TSYSERR; unix_err = EINVAL;
2424 			goto error;
2425 		}
2426 		/* Copy address from message to local buffer. */
2427 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2428 		/*
2429 		 * Check that we got correct address from sockets
2430 		 */
2431 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2432 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2433 			(void) (STRLOG(TL_ID, tep->te_minor,
2434 			    1, SL_TRACE|SL_ERROR,
2435 			    "tl_bind: invalid socket magic"));
2436 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2437 			tli_err = TSYSERR; unix_err = EINVAL;
2438 			goto error;
2439 		}
2440 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2441 		    (ux_addr.soua_vp != NULL)) {
2442 			(void) (STRLOG(TL_ID, tep->te_minor,
2443 			    1, SL_TRACE|SL_ERROR,
2444 			    "tl_bind: implicit addr non-empty"));
2445 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2446 			tli_err = TSYSERR; unix_err = EINVAL;
2447 			goto error;
2448 		}
2449 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2450 		    (ux_addr.soua_vp == NULL)) {
2451 			(void) (STRLOG(TL_ID, tep->te_minor,
2452 			    1, SL_TRACE|SL_ERROR,
2453 			    "tl_bind: explicit addr empty"));
2454 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2455 			tli_err = TSYSERR; unix_err = EINVAL;
2456 			goto error;
2457 		}
2458 	} else {
2459 		if ((alen > 0) && ((aoff < 0) ||
2460 		    ((ssize_t)(aoff + alen) > msz) ||
2461 		    ((aoff + alen) < 0))) {
2462 			(void) (STRLOG(TL_ID, tep->te_minor,
2463 			    1, SL_TRACE|SL_ERROR,
2464 			    "tl_bind: invalid message"));
2465 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2466 			tli_err = TSYSERR; unix_err = EINVAL;
2467 			goto error;
2468 		}
2469 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2470 			(void) (STRLOG(TL_ID, tep->te_minor,
2471 			    1, SL_TRACE|SL_ERROR,
2472 			    "tl_bind: bad addr in  message"));
2473 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2474 			tli_err = TBADADDR;
2475 			goto error;
2476 		}
2477 #ifdef DEBUG
2478 		/*
2479 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2480 		 * if (! assertion)
2481 		 *	log warning;
2482 		 */
2483 		if (! ((alen == 0 && aoff == 0) ||
2484 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2485 			(void) (STRLOG(TL_ID, tep->te_minor,
2486 				    3, SL_TRACE|SL_ERROR,
2487 				    "tl_bind: addr overlaps TPI message"));
2488 		}
2489 #endif
2490 	}
2491 
2492 	/*
2493 	 * Bind the address provided or allocate one if requested.
2494 	 * Allow rebinds with a new qlen value.
2495 	 */
2496 	if (IS_SOCKET(tep)) {
2497 		/*
2498 		 * For anonymous requests the te_ap is already set up properly
2499 		 * so use minor number as an address.
2500 		 * For explicit requests need to check whether the address is
2501 		 * already in use.
2502 		 */
2503 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2504 			int rc;
2505 
2506 			if (tep->te_flag & TL_ADDRHASHED) {
2507 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2508 				if (tep->te_vp == ux_addr.soua_vp)
2509 					goto skip_addr_bind;
2510 				else /* Rebind to a new address. */
2511 					tl_addr_unbind(tep);
2512 			}
2513 			/*
2514 			 * Insert address in the hash if it is not already
2515 			 * there.  Since we use preallocated handle, the insert
2516 			 * can fail only if the key is already present.
2517 			 */
2518 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2519 			    (mod_hash_key_t)ux_addr.soua_vp,
2520 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2521 
2522 			if (rc != 0) {
2523 				ASSERT(rc == MH_ERR_DUPLICATE);
2524 				/*
2525 				 * Violate O_T_BIND_REQ semantics and fail with
2526 				 * TADDRBUSY - sockets will not use any address
2527 				 * other than supplied one for explicit binds.
2528 				 */
2529 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2530 				    SL_TRACE|SL_ERROR,
2531 				    "tl_bind:requested addr %p is busy",
2532 				    ux_addr.soua_vp));
2533 				tli_err = TADDRBUSY; unix_err = 0;
2534 				goto error;
2535 			}
2536 			tep->te_uxaddr = ux_addr;
2537 			tep->te_flag |= TL_ADDRHASHED;
2538 			tep->te_hash_hndl = NULL;
2539 		}
2540 	} else if (alen == 0) {
2541 		/*
2542 		 * assign any free address
2543 		 */
2544 		if (! tl_get_any_addr(tep, NULL)) {
2545 			(void) (STRLOG(TL_ID, tep->te_minor,
2546 			    1, SL_TRACE|SL_ERROR,
2547 			    "tl_bind:failed to get buffer for any "
2548 			    "address"));
2549 			tli_err = TSYSERR; unix_err = ENOSR;
2550 			goto error;
2551 		}
2552 	} else {
2553 		addr_req.ta_alen = alen;
2554 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2555 		addr_req.ta_zoneid = tep->te_zoneid;
2556 
2557 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2558 		if (tep->te_abuf == NULL) {
2559 			tli_err = TSYSERR; unix_err = ENOSR;
2560 			goto error;
2561 		}
2562 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2563 		tep->te_alen = alen;
2564 
2565 		if (mod_hash_insert_reserve(tep->te_addrhash,
2566 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2567 		    tep->te_hash_hndl) != 0) {
2568 			if (save_prim_type == T_BIND_REQ) {
2569 				/*
2570 				 * The bind semantics for this primitive
2571 				 * require a failure if the exact address
2572 				 * requested is busy
2573 				 */
2574 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2575 				    SL_TRACE|SL_ERROR,
2576 				    "tl_bind:requested addr is busy"));
2577 				tli_err = TADDRBUSY; unix_err = 0;
2578 				goto error;
2579 			}
2580 
2581 			/*
2582 			 * O_T_BIND_REQ semantics say if address if requested
2583 			 * address is busy, bind to any available free address
2584 			 */
2585 			if (! tl_get_any_addr(tep, &addr_req)) {
2586 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2587 				    SL_TRACE|SL_ERROR,
2588 				    "tl_bind:unable to get any addr buf"));
2589 				tli_err = TSYSERR; unix_err = ENOMEM;
2590 				goto error;
2591 			}
2592 		} else {
2593 			tep->te_flag |= TL_ADDRHASHED;
2594 			tep->te_hash_hndl = NULL;
2595 		}
2596 	}
2597 
2598 	ASSERT(tep->te_alen >= 0);
2599 
2600 skip_addr_bind:
2601 	/*
2602 	 * prepare T_BIND_ACK TPI message
2603 	 */
2604 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2605 	bamp = reallocb(mp, basize, 0);
2606 	if (bamp == NULL) {
2607 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2608 		    "tl_wput:tl_bind: allocb failed"));
2609 		/*
2610 		 * roll back state changes
2611 		 */
2612 		tl_addr_unbind(tep);
2613 		tep->te_state = TS_UNBND;
2614 		tl_memrecover(wq, mp, basize);
2615 		return;
2616 	}
2617 
2618 	DB_TYPE(bamp) = M_PCPROTO;
2619 	bamp->b_wptr = bamp->b_rptr + basize;
2620 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2621 	b_ack->PRIM_type = T_BIND_ACK;
2622 	b_ack->CONIND_number = qlen;
2623 	b_ack->ADDR_length = tep->te_alen;
2624 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2625 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2626 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2627 
2628 	if (IS_COTS(tep)) {
2629 		tep->te_qlen = qlen;
2630 		if (qlen > 0)
2631 			tep->te_flag |= TL_LISTENER;
2632 	}
2633 
2634 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2635 	/*
2636 	 * send T_BIND_ACK message
2637 	 */
2638 	(void) qreply(wq, bamp);
2639 	return;
2640 
2641 error:
2642 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2643 	if (ackmp == NULL) {
2644 		/*
2645 		 * roll back state changes
2646 		 */
2647 		tep->te_state = save_state;
2648 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2649 		return;
2650 	}
2651 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2652 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2653 }
2654 
2655 /*
2656  * Process T_UNBIND_REQ.
2657  * Called from serializer.
2658  */
2659 static void
2660 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2661 {
2662 	queue_t *wq;
2663 	mblk_t *ackmp;
2664 
2665 	if (tep->te_closing) {
2666 		freemsg(mp);
2667 		return;
2668 	}
2669 
2670 	wq = tep->te_wq;
2671 
2672 	/*
2673 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2674 	 * ==> allocate for T_ERROR_ACK (known max)
2675 	 */
2676 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2677 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2678 		return;
2679 	}
2680 	/*
2681 	 * memory resources committed
2682 	 * Note: no message validation. T_UNBIND_REQ message is
2683 	 * same size as PRIM_type field so already verified earlier.
2684 	 */
2685 
2686 	/*
2687 	 * validate state
2688 	 */
2689 	if (tep->te_state != TS_IDLE) {
2690 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2691 		    SL_TRACE|SL_ERROR,
2692 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2693 		    tep->te_state));
2694 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2695 		return;
2696 	}
2697 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2698 
2699 	/*
2700 	 * TPI says on T_UNBIND_REQ:
2701 	 *    send up a M_FLUSH to flush both
2702 	 *    read and write queues
2703 	 */
2704 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2705 
2706 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2707 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2708 
2709 		/*
2710 		 * Sockets use bind with qlen==0 followed by bind() to
2711 		 * the same address with qlen > 0 for listeners.
2712 		 * We allow rebind with a new qlen value.
2713 		 */
2714 		tl_addr_unbind(tep);
2715 	}
2716 
2717 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2718 	/*
2719 	 * send  T_OK_ACK
2720 	 */
2721 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2722 }
2723 
2724 
2725 /*
2726  * Option management code from drv/ip is used here
2727  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2728  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2729  *	However, that is what we want as that option is 'unorthodox'
2730  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2731  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2732  * Note2: use of optcom_req means this routine is an exception to
2733  *	 recovery from allocb() failures.
2734  */
2735 
2736 static void
2737 tl_optmgmt(queue_t *wq, mblk_t *mp)
2738 {
2739 	tl_endpt_t *tep;
2740 	mblk_t *ackmp;
2741 	union T_primitives *prim;
2742 	cred_t *cr;
2743 
2744 	tep = (tl_endpt_t *)wq->q_ptr;
2745 	prim = (union T_primitives *)mp->b_rptr;
2746 
2747 	/*
2748 	 * All Solaris components should pass a db_credp
2749 	 * for this TPI message, hence we ASSERT.
2750 	 * But in case there is some other M_PROTO that looks
2751 	 * like a TPI message sent by some other kernel
2752 	 * component, we check and return an error.
2753 	 */
2754 	cr = msg_getcred(mp, NULL);
2755 	ASSERT(cr != NULL);
2756 	if (cr == NULL) {
2757 		tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2758 		return;
2759 	}
2760 
2761 	/*  all states OK for AF_UNIX options ? */
2762 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2763 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2764 		/*
2765 		 * Broken TLI semantics that options can only be managed
2766 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2767 		 * tests this TLI (mis)feature using this device driver.
2768 		 */
2769 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2770 		    SL_TRACE|SL_ERROR,
2771 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2772 		    tep->te_state));
2773 		/*
2774 		 * preallocate memory for T_ERROR_ACK
2775 		 */
2776 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2777 		if (! ackmp) {
2778 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2779 			return;
2780 		}
2781 
2782 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2783 		freemsg(mp);
2784 		return;
2785 	}
2786 
2787 	/*
2788 	 * call common option management routine from drv/ip
2789 	 */
2790 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2791 		svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2792 	} else {
2793 		ASSERT(prim->type == T_OPTMGMT_REQ);
2794 		tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2795 	}
2796 }
2797 
2798 /*
2799  * Handle T_conn_req - the driver part of accept().
2800  * If TL_SET[U]CRED generate the credentials options.
2801  * If this is a socket pass through options unmodified.
2802  * For sockets generate the T_CONN_CON here instead of
2803  * waiting for the T_CONN_RES.
2804  */
2805 static void
2806 tl_conn_req(queue_t *wq, mblk_t *mp)
2807 {
2808 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2809 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2810 	ssize_t			msz = MBLKL(mp);
2811 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2812 	tl_endpt_t		*peer_tep = NULL;
2813 	mblk_t			*ackmp;
2814 	mblk_t			*dimp;
2815 	struct T_discon_ind	*di;
2816 	soux_addr_t		ux_addr;
2817 	tl_addr_t		dst;
2818 
2819 	ASSERT(IS_COTS(tep));
2820 
2821 	if (tep->te_closing) {
2822 		freemsg(mp);
2823 		return;
2824 	}
2825 
2826 	/*
2827 	 * preallocate memory for:
2828 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2829 	 *	==> known max T_ERROR_ACK
2830 	 * 2. max of T_DISCON_IND and T_CONN_IND
2831 	 */
2832 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2833 	if (! ackmp) {
2834 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2835 		return;
2836 	}
2837 	/*
2838 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2839 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2840 	 */
2841 
2842 	if (tep->te_state != TS_IDLE) {
2843 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2844 		    SL_TRACE|SL_ERROR,
2845 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2846 		    tep->te_state));
2847 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2848 		freemsg(mp);
2849 		return;
2850 	}
2851 
2852 	/*
2853 	 * validate the message
2854 	 * Note: dereference fields in struct inside message only
2855 	 * after validating the message length.
2856 	 */
2857 	if (msz < sizeof (struct T_conn_req)) {
2858 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2859 		    "tl_conn_req:invalid message length"));
2860 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2861 		freemsg(mp);
2862 		return;
2863 	}
2864 	alen = creq->DEST_length;
2865 	aoff = creq->DEST_offset;
2866 	olen = creq->OPT_length;
2867 	ooff = creq->OPT_offset;
2868 	if (olen == 0)
2869 		ooff = 0;
2870 
2871 	if (IS_SOCKET(tep)) {
2872 		if ((alen != TL_SOUX_ADDRLEN) ||
2873 		    (aoff < 0) ||
2874 		    (aoff + alen > msz) ||
2875 		    (alen > msz - sizeof (struct T_conn_req))) {
2876 			(void) (STRLOG(TL_ID, tep->te_minor,
2877 				    1, SL_TRACE|SL_ERROR,
2878 				    "tl_conn_req: invalid socket addr"));
2879 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2880 			freemsg(mp);
2881 			return;
2882 		}
2883 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2884 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2885 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2886 			(void) (STRLOG(TL_ID, tep->te_minor,
2887 			    1, SL_TRACE|SL_ERROR,
2888 			    "tl_conn_req: invalid socket magic"));
2889 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2890 			freemsg(mp);
2891 			return;
2892 		}
2893 	} else {
2894 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2895 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2896 		    ooff + olen < 0)) ||
2897 		    olen < 0 || ooff < 0) {
2898 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2899 			    SL_TRACE|SL_ERROR,
2900 			    "tl_conn_req:invalid message"));
2901 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2902 			freemsg(mp);
2903 			return;
2904 		}
2905 
2906 		if (alen <= 0 || aoff < 0 ||
2907 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2908 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2909 				    SL_TRACE|SL_ERROR,
2910 				    "tl_conn_req:bad addr in message, "
2911 				    "alen=%d, msz=%ld",
2912 				    alen, msz));
2913 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2914 			freemsg(mp);
2915 			return;
2916 		}
2917 #ifdef DEBUG
2918 		/*
2919 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2920 		 * if (! assertion)
2921 		 *	log warning;
2922 		 */
2923 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2924 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2925 			    SL_TRACE|SL_ERROR,
2926 			    "tl_conn_req: addr overlaps TPI message"));
2927 		}
2928 #endif
2929 		if (olen) {
2930 			/*
2931 			 * no opts in connect req
2932 			 * supported in this provider except for sockets.
2933 			 */
2934 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2935 			    SL_TRACE|SL_ERROR,
2936 			    "tl_conn_req:options not supported "
2937 			    "in message"));
2938 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2939 			freemsg(mp);
2940 			return;
2941 		}
2942 	}
2943 
2944 	/*
2945 	 * Prevent tep from closing on us.
2946 	 */
2947 	if (! tl_noclose(tep)) {
2948 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2949 		    "tl_conn_req:endpoint is closing"));
2950 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2951 		freemsg(mp);
2952 		return;
2953 	}
2954 
2955 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2956 	/*
2957 	 * get endpoint to connect to
2958 	 * check that peer with DEST addr is bound to addr
2959 	 * and has CONIND_number > 0
2960 	 */
2961 	dst.ta_alen = alen;
2962 	dst.ta_abuf = mp->b_rptr + aoff;
2963 	dst.ta_zoneid = tep->te_zoneid;
2964 
2965 	/*
2966 	 * Verify if remote addr is in use
2967 	 */
2968 	peer_tep = (IS_SOCKET(tep) ?
2969 	    tl_sock_find_peer(tep, &ux_addr) :
2970 	    tl_find_peer(tep, &dst));
2971 
2972 	if (peer_tep == NULL) {
2973 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2974 		    "tl_conn_req:no one at connect address"));
2975 		err = ECONNREFUSED;
2976 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2977 		/*
2978 		 * validate that number of incoming connection is
2979 		 * not to capacity on destination endpoint
2980 		 */
2981 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2982 		    "tl_conn_req: qlen overflow connection refused"));
2983 			err = ECONNREFUSED;
2984 	}
2985 
2986 	/*
2987 	 * Send T_DISCON_IND in case of error
2988 	 */
2989 	if (err != 0) {
2990 		if (peer_tep != NULL)
2991 			tl_refrele(peer_tep);
2992 		/* We are still expected to send T_OK_ACK */
2993 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2994 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2995 		tl_closeok(tep);
2996 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2997 		    M_PROTO, T_DISCON_IND);
2998 		if (dimp == NULL) {
2999 			tl_merror(wq, NULL, ENOSR);
3000 			return;
3001 		}
3002 		di = (struct T_discon_ind *)dimp->b_rptr;
3003 		di->DISCON_reason = err;
3004 		di->SEQ_number = BADSEQNUM;
3005 
3006 		tep->te_state = TS_IDLE;
3007 		/*
3008 		 * send T_DISCON_IND message
3009 		 */
3010 		putnext(tep->te_rq, dimp);
3011 		return;
3012 	}
3013 
3014 	ASSERT(IS_COTS(peer_tep));
3015 
3016 	/*
3017 	 * Found the listener. At this point processing will continue on
3018 	 * listener serializer. Close of the endpoint should be blocked while we
3019 	 * switch serializers.
3020 	 */
3021 	tl_serializer_refhold(peer_tep->te_ser);
3022 	tl_serializer_refrele(tep->te_ser);
3023 	tep->te_ser = peer_tep->te_ser;
3024 	ASSERT(tep->te_oconp == NULL);
3025 	tep->te_oconp = peer_tep;
3026 
3027 	/*
3028 	 * It is safe to close now. Close may continue on listener serializer.
3029 	 */
3030 	tl_closeok(tep);
3031 
3032 	/*
3033 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3034 	 * data, so we link mp to ackmp.
3035 	 */
3036 	ackmp->b_cont = mp;
3037 	mp = ackmp;
3038 
3039 	tl_refhold(tep);
3040 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3041 }
3042 
3043 /*
3044  * Finish T_CONN_REQ processing on listener serializer.
3045  */
3046 static void
3047 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3048 {
3049 	queue_t		*wq;
3050 	tl_endpt_t	*peer_tep = tep->te_oconp;
3051 	mblk_t		*confmp, *cimp, *indmp;
3052 	void		*opts = NULL;
3053 	mblk_t		*ackmp = mp;
3054 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3055 	struct T_conn_ind	*ci;
3056 	tl_icon_t	*tip;
3057 	void		*addr_startp;
3058 	t_scalar_t	olen = creq->OPT_length;
3059 	t_scalar_t	ooff = creq->OPT_offset;
3060 	size_t 		ci_msz;
3061 	size_t		size;
3062 	cred_t		*cr = NULL;
3063 	pid_t		cpid;
3064 
3065 	if (tep->te_closing) {
3066 		TL_UNCONNECT(tep->te_oconp);
3067 		tl_serializer_exit(tep);
3068 		tl_refrele(tep);
3069 		freemsg(mp);
3070 		return;
3071 	}
3072 
3073 	wq = tep->te_wq;
3074 	tep->te_flag |= TL_EAGER;
3075 
3076 	/*
3077 	 * Extract preallocated ackmp from mp.
3078 	 */
3079 	mp = mp->b_cont;
3080 	ackmp->b_cont = NULL;
3081 
3082 	if (olen == 0)
3083 		ooff = 0;
3084 
3085 	if (peer_tep->te_closing ||
3086 	    !((peer_tep->te_state == TS_IDLE) ||
3087 	    (peer_tep->te_state == TS_WRES_CIND))) {
3088 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3089 		    "tl_conn_req:peer in bad state (%d)",
3090 		    peer_tep->te_state));
3091 		TL_UNCONNECT(tep->te_oconp);
3092 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3093 		freemsg(ackmp);
3094 		tl_serializer_exit(tep);
3095 		tl_refrele(tep);
3096 		return;
3097 	}
3098 
3099 	/*
3100 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3101 	 */
3102 	/*
3103 	 * calculate length of T_CONN_IND message
3104 	 */
3105 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3106 		cr = msg_getcred(mp, &cpid);
3107 		ASSERT(cr != NULL);
3108 		if (peer_tep->te_flag & TL_SETCRED) {
3109 			ooff = 0;
3110 			olen = (t_scalar_t) sizeof (struct opthdr) +
3111 			    OPTLEN(sizeof (tl_credopt_t));
3112 			/* 1 option only */
3113 		} else {
3114 			ooff = 0;
3115 			olen = (t_scalar_t)sizeof (struct opthdr) +
3116 			    OPTLEN(ucredminsize(cr));
3117 			/* 1 option only */
3118 		}
3119 	}
3120 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3121 	ci_msz = T_ALIGN(ci_msz) + olen;
3122 	size = max(ci_msz, sizeof (struct T_discon_ind));
3123 
3124 	/*
3125 	 * Save options from mp - we'll need them for T_CONN_IND.
3126 	 */
3127 	if (ooff != 0) {
3128 		opts = kmem_alloc(olen, KM_NOSLEEP);
3129 		if (opts == NULL) {
3130 			/*
3131 			 * roll back state changes
3132 			 */
3133 			tep->te_state = TS_IDLE;
3134 			tl_memrecover(wq, mp, size);
3135 			freemsg(ackmp);
3136 			TL_UNCONNECT(tep->te_oconp);
3137 			tl_serializer_exit(tep);
3138 			tl_refrele(tep);
3139 			return;
3140 		}
3141 		/* Copy options to a temp buffer */
3142 		bcopy(mp->b_rptr + ooff, opts, olen);
3143 	}
3144 
3145 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3146 		/*
3147 		 * Generate a T_CONN_CON that has the identical address
3148 		 * (and options) as the T_CONN_REQ.
3149 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3150 		 * are isomorphic.
3151 		 */
3152 		confmp = copyb(mp);
3153 		if (! confmp) {
3154 			/*
3155 			 * roll back state changes
3156 			 */
3157 			tep->te_state = TS_IDLE;
3158 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3159 			freemsg(ackmp);
3160 			if (opts != NULL)
3161 				kmem_free(opts, olen);
3162 			TL_UNCONNECT(tep->te_oconp);
3163 			tl_serializer_exit(tep);
3164 			tl_refrele(tep);
3165 			return;
3166 		}
3167 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3168 		    T_CONN_CON;
3169 	} else {
3170 		confmp = NULL;
3171 	}
3172 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3173 		/*
3174 		 * roll back state changes
3175 		 */
3176 		tep->te_state = TS_IDLE;
3177 		tl_memrecover(wq, mp, size);
3178 		freemsg(ackmp);
3179 		if (opts != NULL)
3180 			kmem_free(opts, olen);
3181 		freemsg(confmp);
3182 		TL_UNCONNECT(tep->te_oconp);
3183 		tl_serializer_exit(tep);
3184 		tl_refrele(tep);
3185 		return;
3186 	}
3187 
3188 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3189 	if (tip == NULL) {
3190 		/*
3191 		 * roll back state changes
3192 		 */
3193 		tep->te_state = TS_IDLE;
3194 		tl_memrecover(wq, indmp, sizeof (*tip));
3195 		freemsg(ackmp);
3196 		if (opts != NULL)
3197 			kmem_free(opts, olen);
3198 		freemsg(confmp);
3199 		TL_UNCONNECT(tep->te_oconp);
3200 		tl_serializer_exit(tep);
3201 		tl_refrele(tep);
3202 		return;
3203 	}
3204 	tip->ti_mp = NULL;
3205 
3206 	/*
3207 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3208 	 * and tl_icon_t cell.
3209 	 */
3210 
3211 	/*
3212 	 * ack validity of request and send the peer credential in the ACK.
3213 	 */
3214 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3215 
3216 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3217 	    confmp != NULL) {
3218 		mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3219 	}
3220 
3221 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3222 
3223 	/*
3224 	 * prepare message to send T_CONN_IND
3225 	 */
3226 	/*
3227 	 * allocate the message - original data blocks retained
3228 	 * in the returned mblk
3229 	 */
3230 	cimp = tl_resizemp(indmp, size);
3231 	if (! cimp) {
3232 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3233 		    "tl_conn_req:con_ind:allocb failure"));
3234 		tl_merror(wq, indmp, ENOMEM);
3235 		TL_UNCONNECT(tep->te_oconp);
3236 		tl_serializer_exit(tep);
3237 		tl_refrele(tep);
3238 		if (opts != NULL)
3239 			kmem_free(opts, olen);
3240 		freemsg(confmp);
3241 		ASSERT(tip->ti_mp == NULL);
3242 		kmem_free(tip, sizeof (*tip));
3243 		return;
3244 	}
3245 
3246 	DB_TYPE(cimp) = M_PROTO;
3247 	ci = (struct T_conn_ind *)cimp->b_rptr;
3248 	ci->PRIM_type  = T_CONN_IND;
3249 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3250 	ci->SRC_length = tep->te_alen;
3251 	ci->SEQ_number = tep->te_seqno;
3252 
3253 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3254 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3255 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3256 
3257 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3258 		    ci->SRC_length);
3259 		ci->OPT_length = olen; /* because only 1 option */
3260 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3261 		    cr, cpid,
3262 		    peer_tep->te_flag, peer_tep->te_credp);
3263 	} else if (ooff != 0) {
3264 		/* Copy option from T_CONN_REQ */
3265 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3266 		    ci->SRC_length);
3267 		ci->OPT_length = olen;
3268 		ASSERT(opts != NULL);
3269 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3270 	} else {
3271 		ci->OPT_offset = 0;
3272 		ci->OPT_length = 0;
3273 	}
3274 	if (opts != NULL)
3275 		kmem_free(opts, olen);
3276 
3277 	/*
3278 	 * register connection request with server peer
3279 	 * append to list of incoming connections
3280 	 * increment references for both peer_tep and tep: peer_tep is placed on
3281 	 * te_oconp and tep is placed on listeners queue.
3282 	 */
3283 	tip->ti_tep = tep;
3284 	tip->ti_seqno = tep->te_seqno;
3285 	list_insert_tail(&peer_tep->te_iconp, tip);
3286 	peer_tep->te_nicon++;
3287 
3288 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3289 	/*
3290 	 * send the T_CONN_IND message
3291 	 */
3292 	putnext(peer_tep->te_rq, cimp);
3293 
3294 	/*
3295 	 * Send a T_CONN_CON message for sockets.
3296 	 * Disable the queues until we have reached the correct state!
3297 	 */
3298 	if (confmp != NULL) {
3299 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3300 		noenable(wq);
3301 		putnext(tep->te_rq, confmp);
3302 	}
3303 	/*
3304 	 * Now we need to increment tep reference because tep is referenced by
3305 	 * server list of pending connections. We also need to decrement
3306 	 * reference before exiting serializer. Two operations void each other
3307 	 * so we don't modify reference at all.
3308 	 */
3309 	ASSERT(tep->te_refcnt >= 2);
3310 	ASSERT(peer_tep->te_refcnt >= 2);
3311 	tl_serializer_exit(tep);
3312 }
3313 
3314 
3315 
3316 /*
3317  * Handle T_conn_res on listener stream. Called on listener serializer.
3318  * tl_conn_req has already generated the T_CONN_CON.
3319  * tl_conn_res is called on listener serializer.
3320  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3321  * Switch eager serializer to acceptor's.
3322  *
3323  * If TL_SET[U]CRED generate the credentials options.
3324  * For sockets tl_conn_req has already generated the T_CONN_CON.
3325  */
3326 static void
3327 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3328 {
3329 	queue_t			*wq;
3330 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3331 	ssize_t			msz = MBLKL(mp);
3332 	t_scalar_t		olen, ooff, err = 0;
3333 	t_scalar_t		prim = cres->PRIM_type;
3334 	uchar_t			*addr_startp;
3335 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3336 	tl_icon_t		*tip;
3337 	size_t			size;
3338 	mblk_t			*ackmp, *respmp;
3339 	mblk_t			*dimp, *ccmp = NULL;
3340 	struct T_discon_ind	*di;
3341 	struct T_conn_con	*cc;
3342 	boolean_t		client_noclose_set = B_FALSE;
3343 	boolean_t		switch_client_serializer = B_TRUE;
3344 
3345 	ASSERT(IS_COTS(tep));
3346 
3347 	if (tep->te_closing) {
3348 		freemsg(mp);
3349 		return;
3350 	}
3351 
3352 	wq = tep->te_wq;
3353 
3354 	/*
3355 	 * preallocate memory for:
3356 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3357 	 *	==> known max T_ERROR_ACK
3358 	 * 2. max of T_DISCON_IND and T_CONN_CON
3359 	 */
3360 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3361 	if (! ackmp) {
3362 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3363 		return;
3364 	}
3365 	/*
3366 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3367 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3368 	 */
3369 
3370 
3371 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3372 
3373 	/*
3374 	 * validate state
3375 	 */
3376 	if (tep->te_state != TS_WRES_CIND) {
3377 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3378 		    SL_TRACE|SL_ERROR,
3379 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3380 		    tep->te_state));
3381 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3382 		freemsg(mp);
3383 		return;
3384 	}
3385 
3386 	/*
3387 	 * validate the message
3388 	 * Note: dereference fields in struct inside message only
3389 	 * after validating the message length.
3390 	 */
3391 	if (msz < sizeof (struct T_conn_res)) {
3392 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3393 		    "tl_conn_res:invalid message length"));
3394 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3395 		freemsg(mp);
3396 		return;
3397 	}
3398 	olen = cres->OPT_length;
3399 	ooff = cres->OPT_offset;
3400 	if (((olen > 0) && ((ooff + olen) > msz))) {
3401 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3402 		    "tl_conn_res:invalid message"));
3403 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3404 		freemsg(mp);
3405 		return;
3406 	}
3407 	if (olen) {
3408 		/*
3409 		 * no opts in connect res
3410 		 * supported in this provider
3411 		 */
3412 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3413 		    "tl_conn_res:options not supported in message"));
3414 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3415 		freemsg(mp);
3416 		return;
3417 	}
3418 
3419 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3420 	ASSERT(tep->te_state == TS_WACK_CRES);
3421 
3422 	if (cres->SEQ_number < TL_MINOR_START &&
3423 	    cres->SEQ_number >= BADSEQNUM) {
3424 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3425 		    "tl_conn_res:remote endpoint sequence number bad"));
3426 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3427 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3428 		freemsg(mp);
3429 		return;
3430 	}
3431 
3432 	/*
3433 	 * find accepting endpoint. Will have extra reference if found.
3434 	 */
3435 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3436 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3437 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3438 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3439 		    "tl_conn_res:bad accepting endpoint"));
3440 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3441 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3442 		freemsg(mp);
3443 		return;
3444 	}
3445 
3446 	/*
3447 	 * Prevent acceptor from closing.
3448 	 */
3449 	if (! tl_noclose(acc_ep)) {
3450 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3451 		    "tl_conn_res:bad accepting endpoint"));
3452 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3453 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3454 		tl_refrele(acc_ep);
3455 		freemsg(mp);
3456 		return;
3457 	}
3458 
3459 	acc_ep->te_flag |= TL_ACCEPTOR;
3460 
3461 	/*
3462 	 * validate that accepting endpoint, if different from listening
3463 	 * has address bound => state is TS_IDLE
3464 	 * TROUBLE in XPG4 !!?
3465 	 */
3466 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3467 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3468 		    "tl_conn_res:accepting endpoint has no address bound,"
3469 		    "state=%d", acc_ep->te_state));
3470 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3471 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3472 		freemsg(mp);
3473 		tl_closeok(acc_ep);
3474 		tl_refrele(acc_ep);
3475 		return;
3476 	}
3477 
3478 	/*
3479 	 * validate if accepting endpt same as listening, then
3480 	 * no other incoming connection should be on the queue
3481 	 */
3482 
3483 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3484 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3485 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3486 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3487 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3488 		freemsg(mp);
3489 		tl_closeok(acc_ep);
3490 		tl_refrele(acc_ep);
3491 		return;
3492 	}
3493 
3494 	/*
3495 	 * Mark for deletion, the entry corresponding to client
3496 	 * on list of pending connections made by the listener
3497 	 *  search list to see if client is one of the
3498 	 * recorded as a listener.
3499 	 */
3500 	tip = tl_icon_find(tep, cres->SEQ_number);
3501 	if (tip == NULL) {
3502 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3503 		    "tl_conn_res:no client in listener list"));
3504 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3505 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3506 		freemsg(mp);
3507 		tl_closeok(acc_ep);
3508 		tl_refrele(acc_ep);
3509 		return;
3510 	}
3511 
3512 	/*
3513 	 * If ti_tep is NULL the client has already closed. In this case
3514 	 * the code below will avoid any action on the client side
3515 	 * but complete the server and acceptor state transitions.
3516 	 */
3517 	ASSERT(tip->ti_tep == NULL ||
3518 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3519 	cl_ep = tip->ti_tep;
3520 
3521 	/*
3522 	 * If the client is present it is switched from listener's to acceptor's
3523 	 * serializer. We should block client closes while serializers are
3524 	 * being switched.
3525 	 *
3526 	 * It is possible that the client is present but is currently being
3527 	 * closed. There are two possible cases:
3528 	 *
3529 	 * 1) The client has already entered tl_close_finish_ser() and sent
3530 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3531 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3532 	 *
3533 	 * 2) The client started the close but has not entered
3534 	 *    tl_close_finish_ser() yet. In this case, the client is already
3535 	 *    proceeding asynchronously on the listener's serializer, so we're
3536 	 *    forced to change the acceptor to use the listener's serializer to
3537 	 *    ensure that any operations on the acceptor are serialized with
3538 	 *    respect to the close that's in-progress.
3539 	 */
3540 	if (cl_ep != NULL) {
3541 		if (tl_noclose(cl_ep)) {
3542 			client_noclose_set = B_TRUE;
3543 		} else {
3544 			/*
3545 			 * Client is closing. If it it has sent the
3546 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3547 			 * we have to let let the client continue until it is
3548 			 * sent.
3549 			 *
3550 			 * If we do continue using the client, acceptor will
3551 			 * switch to client's serializer which is used by client
3552 			 * for its close.
3553 			 */
3554 			tl_client_closing_when_accepting++;
3555 			switch_client_serializer = B_FALSE;
3556 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3557 			    cl_ep->te_state == -1)
3558 				cl_ep = NULL;
3559 		}
3560 	}
3561 
3562 	if (cl_ep != NULL) {
3563 		/*
3564 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3565 		 * (latter for sockets only)
3566 		 */
3567 		if (cl_ep->te_state != TS_WCON_CREQ &&
3568 		    (cl_ep->te_state != TS_DATA_XFER &&
3569 		    IS_SOCKET(cl_ep))) {
3570 			err = ECONNREFUSED;
3571 			/*
3572 			 * T_DISCON_IND sent later after committing memory
3573 			 * and acking validity of request
3574 			 */
3575 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3576 			    "tl_conn_res:peer in bad state"));
3577 		}
3578 
3579 		/*
3580 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3581 		 * ack validity of request (T_OK_ACK) after memory committed
3582 		 */
3583 
3584 		if (err)
3585 			size = sizeof (struct T_discon_ind);
3586 		else {
3587 			/*
3588 			 * calculate length of T_CONN_CON message
3589 			 */
3590 			olen = 0;
3591 			if (cl_ep->te_flag & TL_SETCRED) {
3592 				olen = (t_scalar_t)sizeof (struct opthdr) +
3593 				    OPTLEN(sizeof (tl_credopt_t));
3594 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3595 				olen = (t_scalar_t)sizeof (struct opthdr) +
3596 				    OPTLEN(ucredminsize(acc_ep->te_credp));
3597 			}
3598 			size = T_ALIGN(sizeof (struct T_conn_con) +
3599 			    acc_ep->te_alen) + olen;
3600 		}
3601 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3602 			/*
3603 			 * roll back state changes
3604 			 */
3605 			tep->te_state = TS_WRES_CIND;
3606 			tl_memrecover(wq, mp, size);
3607 			freemsg(ackmp);
3608 			if (client_noclose_set)
3609 				tl_closeok(cl_ep);
3610 			tl_closeok(acc_ep);
3611 			tl_refrele(acc_ep);
3612 			return;
3613 		}
3614 		mp = NULL;
3615 	}
3616 
3617 	/*
3618 	 * Now ack validity of request
3619 	 */
3620 	if (tep->te_nicon == 1) {
3621 		if (tep == acc_ep)
3622 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3623 		else
3624 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3625 	} else
3626 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3627 
3628 	/*
3629 	 * send T_DISCON_IND now if client state validation failed earlier
3630 	 */
3631 	if (err) {
3632 		tl_ok_ack(wq, ackmp, prim);
3633 		/*
3634 		 * flush the queues - why always ?
3635 		 */
3636 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3637 
3638 		dimp = tl_resizemp(respmp, size);
3639 		if (! dimp) {
3640 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3641 			    SL_TRACE|SL_ERROR,
3642 			    "tl_conn_res:con_ind:allocb failure"));
3643 			tl_merror(wq, respmp, ENOMEM);
3644 			tl_closeok(acc_ep);
3645 			if (client_noclose_set)
3646 				tl_closeok(cl_ep);
3647 			tl_refrele(acc_ep);
3648 			return;
3649 		}
3650 		if (dimp->b_cont) {
3651 			/* no user data in provider generated discon ind */
3652 			freemsg(dimp->b_cont);
3653 			dimp->b_cont = NULL;
3654 		}
3655 
3656 		DB_TYPE(dimp) = M_PROTO;
3657 		di = (struct T_discon_ind *)dimp->b_rptr;
3658 		di->PRIM_type  = T_DISCON_IND;
3659 		di->DISCON_reason = err;
3660 		di->SEQ_number = BADSEQNUM;
3661 
3662 		tep->te_state = TS_IDLE;
3663 		/*
3664 		 * send T_DISCON_IND message
3665 		 */
3666 		putnext(acc_ep->te_rq, dimp);
3667 		if (client_noclose_set)
3668 			tl_closeok(cl_ep);
3669 		tl_closeok(acc_ep);
3670 		tl_refrele(acc_ep);
3671 		return;
3672 	}
3673 
3674 	/*
3675 	 * now start connecting the accepting endpoint
3676 	 */
3677 	if (tep != acc_ep)
3678 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3679 
3680 	if (cl_ep == NULL) {
3681 		/*
3682 		 * The client has already closed. Send up any queued messages
3683 		 * and change the state accordingly.
3684 		 */
3685 		tl_ok_ack(wq, ackmp, prim);
3686 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3687 
3688 		/*
3689 		 * remove endpoint from incoming connection
3690 		 * delete client from list of incoming connections
3691 		 */
3692 		tl_freetip(tep, tip);
3693 		freemsg(mp);
3694 		tl_closeok(acc_ep);
3695 		tl_refrele(acc_ep);
3696 		return;
3697 	} else if (tip->ti_mp != NULL) {
3698 		/*
3699 		 * The client could have queued a T_DISCON_IND which needs
3700 		 * to be sent up.
3701 		 * Note that t_discon_req can not operate the same as
3702 		 * t_data_req since it is not possible for it to putbq
3703 		 * the message and return -1 due to the use of qwriter.
3704 		 */
3705 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3706 	}
3707 
3708 	/*
3709 	 * prepare connect confirm T_CONN_CON message
3710 	 */
3711 
3712 	/*
3713 	 * allocate the message - original data blocks
3714 	 * retained in the returned mblk
3715 	 */
3716 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3717 		ccmp = tl_resizemp(respmp, size);
3718 		if (ccmp == NULL) {
3719 			tl_ok_ack(wq, ackmp, prim);
3720 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3721 			    SL_TRACE|SL_ERROR,
3722 			    "tl_conn_res:conn_con:allocb failure"));
3723 			tl_merror(wq, respmp, ENOMEM);
3724 			tl_closeok(acc_ep);
3725 			if (client_noclose_set)
3726 				tl_closeok(cl_ep);
3727 			tl_refrele(acc_ep);
3728 			return;
3729 		}
3730 
3731 		DB_TYPE(ccmp) = M_PROTO;
3732 		cc = (struct T_conn_con *)ccmp->b_rptr;
3733 		cc->PRIM_type  = T_CONN_CON;
3734 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3735 		cc->RES_length = acc_ep->te_alen;
3736 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3737 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3738 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3739 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3740 			    cc->RES_length);
3741 			cc->OPT_length = olen;
3742 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3743 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3744 			    cl_ep->te_credp);
3745 		} else {
3746 			cc->OPT_offset = 0;
3747 			cc->OPT_length = 0;
3748 		}
3749 		/*
3750 		 * Forward the credential in the packet so it can be picked up
3751 		 * at the higher layers for more complete credential processing
3752 		 */
3753 		mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3754 	} else {
3755 		freemsg(respmp);
3756 		respmp = NULL;
3757 	}
3758 
3759 	/*
3760 	 * make connection linking
3761 	 * accepting and client endpoints
3762 	 * No need to increment references:
3763 	 *	on client: it should already have one from tip->ti_tep linkage.
3764 	 *	on acceptor is should already have one from the table lookup.
3765 	 *
3766 	 * At this point both client and acceptor can't close. Set client
3767 	 * serializer to acceptor's.
3768 	 */
3769 	ASSERT(cl_ep->te_refcnt >= 2);
3770 	ASSERT(acc_ep->te_refcnt >= 2);
3771 	ASSERT(cl_ep->te_conp == NULL);
3772 	ASSERT(acc_ep->te_conp == NULL);
3773 	cl_ep->te_conp = acc_ep;
3774 	acc_ep->te_conp = cl_ep;
3775 	ASSERT(cl_ep->te_ser == tep->te_ser);
3776 	if (switch_client_serializer) {
3777 		mutex_enter(&cl_ep->te_ser_lock);
3778 		if (cl_ep->te_ser_count > 0) {
3779 			switch_client_serializer = B_FALSE;
3780 			tl_serializer_noswitch++;
3781 		} else {
3782 			/*
3783 			 * Move client to the acceptor's serializer.
3784 			 */
3785 			tl_serializer_refhold(acc_ep->te_ser);
3786 			tl_serializer_refrele(cl_ep->te_ser);
3787 			cl_ep->te_ser = acc_ep->te_ser;
3788 		}
3789 		mutex_exit(&cl_ep->te_ser_lock);
3790 	}
3791 	if (!switch_client_serializer) {
3792 		/*
3793 		 * It is not possible to switch client to use acceptor's.
3794 		 * Move acceptor to client's serializer (which is the same as
3795 		 * listener's).
3796 		 */
3797 		tl_serializer_refhold(cl_ep->te_ser);
3798 		tl_serializer_refrele(acc_ep->te_ser);
3799 		acc_ep->te_ser = cl_ep->te_ser;
3800 	}
3801 
3802 	TL_REMOVE_PEER(cl_ep->te_oconp);
3803 	TL_REMOVE_PEER(acc_ep->te_oconp);
3804 
3805 	/*
3806 	 * remove endpoint from incoming connection
3807 	 * delete client from list of incoming connections
3808 	 */
3809 	tip->ti_tep = NULL;
3810 	tl_freetip(tep, tip);
3811 	tl_ok_ack(wq, ackmp, prim);
3812 
3813 	/*
3814 	 * data blocks already linked in reallocb()
3815 	 */
3816 
3817 	/*
3818 	 * link queues so that I_SENDFD will work
3819 	 */
3820 	if (! IS_SOCKET(tep)) {
3821 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3822 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3823 	}
3824 
3825 	/*
3826 	 * send T_CONN_CON up on client side unless it was already
3827 	 * done (for a socket). In cases any data or ordrel req has been
3828 	 * queued make sure that the service procedure runs.
3829 	 */
3830 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3831 		enableok(cl_ep->te_wq);
3832 		TL_QENABLE(cl_ep);
3833 		if (ccmp != NULL)
3834 			freemsg(ccmp);
3835 	} else {
3836 		/*
3837 		 * change client state on TE_CONN_CON event
3838 		 */
3839 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3840 		putnext(cl_ep->te_rq, ccmp);
3841 	}
3842 
3843 	/* Mark the both endpoints as accepted */
3844 	cl_ep->te_flag |= TL_ACCEPTED;
3845 	acc_ep->te_flag |= TL_ACCEPTED;
3846 
3847 	/*
3848 	 * Allow client and acceptor to close.
3849 	 */
3850 	tl_closeok(acc_ep);
3851 	if (client_noclose_set)
3852 		tl_closeok(cl_ep);
3853 }
3854 
3855 
3856 
3857 
3858 static void
3859 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3860 {
3861 	queue_t			*wq;
3862 	struct T_discon_req	*dr;
3863 	ssize_t			msz;
3864 	tl_endpt_t		*peer_tep = tep->te_conp;
3865 	tl_endpt_t		*srv_tep = tep->te_oconp;
3866 	tl_icon_t		*tip;
3867 	size_t			size;
3868 	mblk_t			*ackmp, *dimp, *respmp;
3869 	struct T_discon_ind	*di;
3870 	t_scalar_t		save_state, new_state;
3871 
3872 	if (tep->te_closing) {
3873 		freemsg(mp);
3874 		return;
3875 	}
3876 
3877 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3878 		TL_UNCONNECT(tep->te_conp);
3879 		peer_tep = NULL;
3880 	}
3881 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3882 		TL_UNCONNECT(tep->te_oconp);
3883 		srv_tep = NULL;
3884 	}
3885 
3886 	wq = tep->te_wq;
3887 
3888 	/*
3889 	 * preallocate memory for:
3890 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3891 	 *	==> known max T_ERROR_ACK
3892 	 * 2. for  T_DISCON_IND
3893 	 */
3894 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3895 	if (! ackmp) {
3896 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3897 		return;
3898 	}
3899 	/*
3900 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3901 	 * will be committed for T_DISCON_IND  later
3902 	 */
3903 
3904 	dr = (struct T_discon_req *)mp->b_rptr;
3905 	msz = MBLKL(mp);
3906 
3907 	/*
3908 	 * validate the state
3909 	 */
3910 	save_state = new_state = tep->te_state;
3911 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3912 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3913 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3914 		    SL_TRACE|SL_ERROR,
3915 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
3916 		    tep->te_state));
3917 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3918 		freemsg(mp);
3919 		return;
3920 	}
3921 	/*
3922 	 * Defer committing the state change until it is determined if
3923 	 * the message will be queued with the tl_icon or not.
3924 	 */
3925 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3926 
3927 	/* validate the message */
3928 	if (msz < sizeof (struct T_discon_req)) {
3929 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3930 		    "tl_discon_req:invalid message"));
3931 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3932 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3933 		freemsg(mp);
3934 		return;
3935 	}
3936 
3937 	/*
3938 	 * if server, then validate that client exists
3939 	 * by connection sequence number etc.
3940 	 */
3941 	if (tep->te_nicon > 0) { /* server */
3942 
3943 		/*
3944 		 * search server list for disconnect client
3945 		 */
3946 		tip = tl_icon_find(tep, dr->SEQ_number);
3947 		if (tip == NULL) {
3948 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3949 			    SL_TRACE|SL_ERROR,
3950 			    "tl_discon_req:no disconnect endpoint"));
3951 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3952 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3953 			freemsg(mp);
3954 			return;
3955 		}
3956 		/*
3957 		 * If ti_tep is NULL the client has already closed. In this case
3958 		 * the code below will avoid any action on the client side.
3959 		 */
3960 
3961 		ASSERT(IMPLY(tip->ti_tep != NULL,
3962 		    tip->ti_tep->te_seqno == dr->SEQ_number));
3963 		peer_tep = tip->ti_tep;
3964 	}
3965 
3966 	/*
3967 	 * preallocate now for T_DISCON_IND
3968 	 * ack validity of request (T_OK_ACK) after memory committed
3969 	 */
3970 	size = sizeof (struct T_discon_ind);
3971 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3972 		tl_memrecover(wq, mp, size);
3973 		freemsg(ackmp);
3974 		return;
3975 	}
3976 
3977 	/*
3978 	 * prepare message to ack validity of request
3979 	 */
3980 	if (tep->te_nicon == 0)
3981 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3982 	else
3983 		if (tep->te_nicon == 1)
3984 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3985 		else
3986 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3987 
3988 	/*
3989 	 * Flushing queues according to TPI. Using the old state.
3990 	 */
3991 	if ((tep->te_nicon <= 1) &&
3992 	    ((save_state == TS_DATA_XFER) ||
3993 	    (save_state == TS_WIND_ORDREL) ||
3994 	    (save_state == TS_WREQ_ORDREL)))
3995 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3996 
3997 	/* send T_OK_ACK up  */
3998 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3999 
4000 	/*
4001 	 * now do disconnect business
4002 	 */
4003 	if (tep->te_nicon > 0) { /* listener */
4004 		if (peer_tep != NULL && !peer_tep->te_closing) {
4005 			/*
4006 			 * disconnect incoming connect request pending to tep
4007 			 */
4008 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4009 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
4010 				    SL_TRACE|SL_ERROR,
4011 				    "tl_discon_req: reallocb failed"));
4012 				tep->te_state = new_state;
4013 				tl_merror(wq, respmp, ENOMEM);
4014 				return;
4015 			}
4016 			di = (struct T_discon_ind *)dimp->b_rptr;
4017 			di->SEQ_number = BADSEQNUM;
4018 			save_state = peer_tep->te_state;
4019 			peer_tep->te_state = TS_IDLE;
4020 
4021 			TL_REMOVE_PEER(peer_tep->te_oconp);
4022 			enableok(peer_tep->te_wq);
4023 			TL_QENABLE(peer_tep);
4024 		} else {
4025 			freemsg(respmp);
4026 			dimp = NULL;
4027 		}
4028 
4029 		/*
4030 		 * remove endpoint from incoming connection list
4031 		 * - remove disconnect client from list on server
4032 		 */
4033 		tl_freetip(tep, tip);
4034 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4035 		/*
4036 		 * disconnect an outgoing request pending from tep
4037 		 */
4038 
4039 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4040 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4041 			    SL_TRACE|SL_ERROR,
4042 			    "tl_discon_req: reallocb failed"));
4043 			tep->te_state = new_state;
4044 			tl_merror(wq, respmp, ENOMEM);
4045 			return;
4046 		}
4047 		di = (struct T_discon_ind *)dimp->b_rptr;
4048 		DB_TYPE(dimp) = M_PROTO;
4049 		di->PRIM_type  = T_DISCON_IND;
4050 		di->DISCON_reason = ECONNRESET;
4051 		di->SEQ_number = tep->te_seqno;
4052 
4053 		/*
4054 		 * If this is a socket the T_DISCON_IND is queued with
4055 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4056 		 * from the list of pending connections.
4057 		 * Note that when te_oconp is set the peer better have
4058 		 * a t_connind_t for the client.
4059 		 */
4060 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4061 			/*
4062 			 * No need to check that
4063 			 * ti_tep == NULL since the T_DISCON_IND
4064 			 * takes precedence over other queued
4065 			 * messages.
4066 			 */
4067 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4068 			peer_tep = NULL;
4069 			dimp = NULL;
4070 			/*
4071 			 * Can't clear te_oconp since tl_co_unconnect needs
4072 			 * it as a hint not to free the tep.
4073 			 * Keep the state unchanged since tl_conn_res inspects
4074 			 * it.
4075 			 */
4076 			new_state = tep->te_state;
4077 		} else {
4078 			/* Found - delete it */
4079 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4080 			if (tip != NULL) {
4081 				ASSERT(tep == tip->ti_tep);
4082 				save_state = peer_tep->te_state;
4083 				if (peer_tep->te_nicon == 1)
4084 					peer_tep->te_state =
4085 					    NEXTSTATE(TE_DISCON_IND2,
4086 					    peer_tep->te_state);
4087 				else
4088 					peer_tep->te_state =
4089 					    NEXTSTATE(TE_DISCON_IND3,
4090 					    peer_tep->te_state);
4091 				tl_freetip(peer_tep, tip);
4092 			}
4093 			ASSERT(tep->te_oconp != NULL);
4094 			TL_UNCONNECT(tep->te_oconp);
4095 		}
4096 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4097 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4098 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4099 			    SL_TRACE|SL_ERROR,
4100 			    "tl_discon_req: reallocb failed"));
4101 			tep->te_state = new_state;
4102 			tl_merror(wq, respmp, ENOMEM);
4103 			return;
4104 		}
4105 		di = (struct T_discon_ind *)dimp->b_rptr;
4106 		di->SEQ_number = BADSEQNUM;
4107 
4108 		save_state = peer_tep->te_state;
4109 		peer_tep->te_state = TS_IDLE;
4110 	} else {
4111 		/* Not connected */
4112 		tep->te_state = new_state;
4113 		freemsg(respmp);
4114 		return;
4115 	}
4116 
4117 	/* Commit state changes */
4118 	tep->te_state = new_state;
4119 
4120 	if (peer_tep == NULL) {
4121 		ASSERT(dimp == NULL);
4122 		goto done;
4123 	}
4124 	/*
4125 	 * Flush queues on peer before sending up
4126 	 * T_DISCON_IND according to TPI
4127 	 */
4128 
4129 	if ((save_state == TS_DATA_XFER) ||
4130 	    (save_state == TS_WIND_ORDREL) ||
4131 	    (save_state == TS_WREQ_ORDREL))
4132 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4133 
4134 	DB_TYPE(dimp) = M_PROTO;
4135 	di->PRIM_type  = T_DISCON_IND;
4136 	di->DISCON_reason = ECONNRESET;
4137 
4138 	/*
4139 	 * data blocks already linked into dimp by reallocb()
4140 	 */
4141 	/*
4142 	 * send indication message to peer user module
4143 	 */
4144 	ASSERT(dimp != NULL);
4145 	putnext(peer_tep->te_rq, dimp);
4146 done:
4147 	if (tep->te_conp) {	/* disconnect pointers if connected */
4148 		ASSERT(! peer_tep->te_closing);
4149 
4150 		/*
4151 		 * Messages may be queued on peer's write queue
4152 		 * waiting to be processed by its write service
4153 		 * procedure. Before the pointer to the peer transport
4154 		 * structure is set to NULL, qenable the peer's write
4155 		 * queue so that the queued up messages are processed.
4156 		 */
4157 		if ((save_state == TS_DATA_XFER) ||
4158 		    (save_state == TS_WIND_ORDREL) ||
4159 		    (save_state == TS_WREQ_ORDREL))
4160 			TL_QENABLE(peer_tep);
4161 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4162 		TL_UNCONNECT(peer_tep->te_conp);
4163 		if (! IS_SOCKET(tep)) {
4164 			/*
4165 			 * unlink the streams
4166 			 */
4167 			tep->te_wq->q_next = NULL;
4168 			peer_tep->te_wq->q_next = NULL;
4169 		}
4170 		TL_UNCONNECT(tep->te_conp);
4171 	}
4172 }
4173 
4174 
4175 static void
4176 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4177 {
4178 	queue_t			*wq;
4179 	size_t			ack_sz;
4180 	mblk_t			*ackmp;
4181 	struct T_addr_ack	*taa;
4182 
4183 	if (tep->te_closing) {
4184 		freemsg(mp);
4185 		return;
4186 	}
4187 
4188 	wq = tep->te_wq;
4189 
4190 	/*
4191 	 * Note: T_ADDR_REQ message has only PRIM_type field
4192 	 * so it is already validated earlier.
4193 	 */
4194 
4195 	if (IS_CLTS(tep) ||
4196 	    (tep->te_state > TS_WREQ_ORDREL) ||
4197 	    (tep->te_state < TS_DATA_XFER)) {
4198 		/*
4199 		 * Either connectionless or connection oriented but not
4200 		 * in connected data transfer state or half-closed states.
4201 		 */
4202 		ack_sz = sizeof (struct T_addr_ack);
4203 		if (tep->te_state >= TS_IDLE)
4204 			/* is bound */
4205 			ack_sz += tep->te_alen;
4206 		ackmp = reallocb(mp, ack_sz, 0);
4207 		if (ackmp == NULL) {
4208 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4209 			    SL_TRACE|SL_ERROR,
4210 			    "tl_addr_req: reallocb failed"));
4211 			tl_memrecover(wq, mp, ack_sz);
4212 			return;
4213 		}
4214 
4215 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4216 
4217 		bzero(taa, sizeof (struct T_addr_ack));
4218 
4219 		taa->PRIM_type = T_ADDR_ACK;
4220 		ackmp->b_datap->db_type = M_PCPROTO;
4221 		ackmp->b_wptr = (uchar_t *)&taa[1];
4222 
4223 		if (tep->te_state >= TS_IDLE) {
4224 			/* endpoint is bound */
4225 			taa->LOCADDR_length = tep->te_alen;
4226 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4227 
4228 			bcopy(tep->te_abuf, ackmp->b_wptr,
4229 			    tep->te_alen);
4230 			ackmp->b_wptr += tep->te_alen;
4231 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4232 		}
4233 
4234 		(void) qreply(wq, ackmp);
4235 	} else {
4236 		ASSERT(tep->te_state == TS_DATA_XFER ||
4237 		    tep->te_state == TS_WIND_ORDREL ||
4238 		    tep->te_state == TS_WREQ_ORDREL);
4239 		/* connection oriented in data transfer */
4240 		tl_connected_cots_addr_req(mp, tep);
4241 	}
4242 }
4243 
4244 
4245 static void
4246 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4247 {
4248 	tl_endpt_t		*peer_tep;
4249 	size_t			ack_sz;
4250 	mblk_t			*ackmp;
4251 	struct T_addr_ack	*taa;
4252 	uchar_t			*addr_startp;
4253 
4254 	if (tep->te_closing) {
4255 		freemsg(mp);
4256 		return;
4257 	}
4258 
4259 	ASSERT(tep->te_state >= TS_IDLE);
4260 
4261 	ack_sz = sizeof (struct T_addr_ack);
4262 	ack_sz += T_ALIGN(tep->te_alen);
4263 	peer_tep = tep->te_conp;
4264 	ack_sz += peer_tep->te_alen;
4265 
4266 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4267 	if (ackmp == NULL) {
4268 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4269 		    "tl_connected_cots_addr_req: reallocb failed"));
4270 		tl_memrecover(tep->te_wq, mp, ack_sz);
4271 		return;
4272 	}
4273 
4274 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4275 
4276 	/* endpoint is bound */
4277 	taa->LOCADDR_length = tep->te_alen;
4278 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4279 
4280 	addr_startp = (uchar_t *)&taa[1];
4281 
4282 	bcopy(tep->te_abuf, addr_startp,
4283 	    tep->te_alen);
4284 
4285 	taa->REMADDR_length = peer_tep->te_alen;
4286 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4287 	    taa->LOCADDR_length);
4288 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4289 	bcopy(peer_tep->te_abuf, addr_startp,
4290 	    peer_tep->te_alen);
4291 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4292 	    taa->REMADDR_offset + peer_tep->te_alen;
4293 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4294 
4295 	putnext(tep->te_rq, ackmp);
4296 }
4297 
4298 static void
4299 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4300 {
4301 	if (IS_CLTS(tep)) {
4302 		*ia = tl_clts_info_ack;
4303 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4304 	} else {
4305 		*ia = tl_cots_info_ack;
4306 		if (IS_COTSORD(tep))
4307 			ia->SERV_type = T_COTS_ORD;
4308 	}
4309 	ia->TIDU_size = tl_tidusz;
4310 	ia->CURRENT_state = tep->te_state;
4311 }
4312 
4313 /*
4314  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4315  * tl_wput.
4316  */
4317 static void
4318 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4319 {
4320 	mblk_t			*ackmp;
4321 	t_uscalar_t		cap_bits1;
4322 	struct T_capability_ack	*tcap;
4323 
4324 	if (tep->te_closing) {
4325 		freemsg(mp);
4326 		return;
4327 	}
4328 
4329 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4330 
4331 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4332 	    M_PCPROTO, T_CAPABILITY_ACK);
4333 	if (ackmp == NULL) {
4334 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4335 		    "tl_capability_req: reallocb failed"));
4336 		tl_memrecover(tep->te_wq, mp,
4337 		    sizeof (struct T_capability_ack));
4338 		return;
4339 	}
4340 
4341 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4342 	tcap->CAP_bits1 = 0;
4343 
4344 	if (cap_bits1 & TC1_INFO) {
4345 		tl_copy_info(&tcap->INFO_ack, tep);
4346 		tcap->CAP_bits1 |= TC1_INFO;
4347 	}
4348 
4349 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4350 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4351 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4352 	}
4353 
4354 	putnext(tep->te_rq, ackmp);
4355 }
4356 
4357 static void
4358 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4359 {
4360 	if (! tep->te_closing)
4361 		tl_info_req(mp, tep);
4362 	else
4363 		freemsg(mp);
4364 
4365 	tl_serializer_exit(tep);
4366 	tl_refrele(tep);
4367 }
4368 
4369 static void
4370 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4371 {
4372 	mblk_t *ackmp;
4373 
4374 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4375 	    M_PCPROTO, T_INFO_ACK);
4376 	if (ackmp == NULL) {
4377 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4378 		    "tl_info_req: reallocb failed"));
4379 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4380 		return;
4381 	}
4382 
4383 	/*
4384 	 * fill in T_INFO_ACK contents
4385 	 */
4386 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4387 
4388 	/*
4389 	 * send ack message
4390 	 */
4391 	putnext(tep->te_rq, ackmp);
4392 }
4393 
4394 /*
4395  * Handle M_DATA, T_data_req and T_optdata_req.
4396  * If this is a socket pass through T_optdata_req options unmodified.
4397  */
4398 static void
4399 tl_data(mblk_t *mp, tl_endpt_t *tep)
4400 {
4401 	queue_t			*wq = tep->te_wq;
4402 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4403 	ssize_t			msz = MBLKL(mp);
4404 	tl_endpt_t		*peer_tep;
4405 	queue_t			*peer_rq;
4406 	boolean_t		closing = tep->te_closing;
4407 
4408 	if (IS_CLTS(tep)) {
4409 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4410 		    SL_TRACE|SL_ERROR,
4411 		    "tl_wput:clts:unattached M_DATA"));
4412 		if (!closing) {
4413 			tl_merror(wq, mp, EPROTO);
4414 		} else {
4415 			freemsg(mp);
4416 		}
4417 		return;
4418 	}
4419 
4420 	/*
4421 	 * If the endpoint is closing it should still forward any data to the
4422 	 * peer (if it has one). If it is not allowed to forward it can just
4423 	 * free the message.
4424 	 */
4425 	if (closing &&
4426 	    (tep->te_state != TS_DATA_XFER) &&
4427 	    (tep->te_state != TS_WREQ_ORDREL)) {
4428 		freemsg(mp);
4429 		return;
4430 	}
4431 
4432 	if (DB_TYPE(mp) == M_PROTO) {
4433 		if (prim->type == T_DATA_REQ &&
4434 		    msz < sizeof (struct T_data_req)) {
4435 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4436 				SL_TRACE|SL_ERROR,
4437 				"tl_data:T_DATA_REQ:invalid message"));
4438 			if (!closing) {
4439 				tl_merror(wq, mp, EPROTO);
4440 			} else {
4441 				freemsg(mp);
4442 			}
4443 			return;
4444 		} else if (prim->type == T_OPTDATA_REQ &&
4445 		    (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4446 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4447 			    SL_TRACE|SL_ERROR,
4448 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4449 			if (!closing) {
4450 				tl_merror(wq, mp, EPROTO);
4451 			} else {
4452 				freemsg(mp);
4453 			}
4454 			return;
4455 		}
4456 	}
4457 
4458 	/*
4459 	 * connection oriented provider
4460 	 */
4461 	switch (tep->te_state) {
4462 	case TS_IDLE:
4463 		/*
4464 		 * Other end not here - do nothing.
4465 		 */
4466 		freemsg(mp);
4467 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4468 		    "tl_data:cots with endpoint idle"));
4469 		return;
4470 
4471 	case TS_DATA_XFER:
4472 		/* valid states */
4473 		if (tep->te_conp != NULL)
4474 			break;
4475 
4476 		if (tep->te_oconp == NULL) {
4477 			if (!closing) {
4478 				tl_merror(wq, mp, EPROTO);
4479 			} else {
4480 				freemsg(mp);
4481 			}
4482 			return;
4483 		}
4484 		/*
4485 		 * For a socket the T_CONN_CON is sent early thus
4486 		 * the peer might not yet have accepted the connection.
4487 		 * If we are closing queue the packet with the T_CONN_IND.
4488 		 * Otherwise defer processing the packet until the peer
4489 		 * accepts the connection.
4490 		 * Note that the queue is noenabled when we go into this
4491 		 * state.
4492 		 */
4493 		if (!closing) {
4494 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4495 			    SL_TRACE|SL_ERROR,
4496 			    "tl_data: ocon"));
4497 			TL_PUTBQ(tep, mp);
4498 			return;
4499 		}
4500 		if (DB_TYPE(mp) == M_PROTO) {
4501 			if (msz < sizeof (t_scalar_t)) {
4502 				freemsg(mp);
4503 				return;
4504 			}
4505 			/* reuse message block - just change REQ to IND */
4506 			if (prim->type == T_DATA_REQ)
4507 				prim->type = T_DATA_IND;
4508 			else
4509 				prim->type = T_OPTDATA_IND;
4510 		}
4511 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4512 		return;
4513 
4514 	case TS_WREQ_ORDREL:
4515 		if (tep->te_conp == NULL) {
4516 			/*
4517 			 * Other end closed - generate discon_ind
4518 			 * with reason 0 to cause an EPIPE but no
4519 			 * read side error on AF_UNIX sockets.
4520 			 */
4521 			freemsg(mp);
4522 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4523 			    SL_TRACE|SL_ERROR,
4524 			    "tl_data: WREQ_ORDREL and no peer"));
4525 			tl_discon_ind(tep, 0);
4526 			return;
4527 		}
4528 		break;
4529 
4530 	default:
4531 		/* invalid state for event TE_DATA_REQ */
4532 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4533 		    "tl_data:cots:out of state"));
4534 		tl_merror(wq, mp, EPROTO);
4535 		return;
4536 	}
4537 	/*
4538 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4539 	 * (State stays same on this event)
4540 	 */
4541 
4542 	/*
4543 	 * get connected endpoint
4544 	 */
4545 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4546 		freemsg(mp);
4547 		/* Peer closed */
4548 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4549 		    "tl_data: peer gone"));
4550 		return;
4551 	}
4552 
4553 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4554 	peer_rq = peer_tep->te_rq;
4555 
4556 	/*
4557 	 * Put it back if flow controlled
4558 	 * Note: Messages already on queue when we are closing is bounded
4559 	 * so we can ignore flow control.
4560 	 */
4561 	if (!canputnext(peer_rq) && !closing) {
4562 		TL_PUTBQ(tep, mp);
4563 		return;
4564 	}
4565 
4566 	/*
4567 	 * validate peer state
4568 	 */
4569 	switch (peer_tep->te_state) {
4570 	case TS_DATA_XFER:
4571 	case TS_WIND_ORDREL:
4572 		/* valid states */
4573 		break;
4574 	default:
4575 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4576 		    "tl_data:rx side:invalid state"));
4577 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4578 		return;
4579 	}
4580 	if (DB_TYPE(mp) == M_PROTO) {
4581 		/* reuse message block - just change REQ to IND */
4582 		if (prim->type == T_DATA_REQ)
4583 			prim->type = T_DATA_IND;
4584 		else
4585 			prim->type = T_OPTDATA_IND;
4586 	}
4587 	/*
4588 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4589 	 * (peer state stays same on this event)
4590 	 */
4591 	/*
4592 	 * send data to connected peer
4593 	 */
4594 	putnext(peer_rq, mp);
4595 }
4596 
4597 
4598 
4599 static void
4600 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4601 {
4602 	queue_t			*wq = tep->te_wq;
4603 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4604 	ssize_t			msz = MBLKL(mp);
4605 	tl_endpt_t		*peer_tep;
4606 	queue_t			*peer_rq;
4607 	boolean_t		closing = tep->te_closing;
4608 
4609 	if (msz < sizeof (struct T_exdata_req)) {
4610 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4611 		    "tl_exdata:invalid message"));
4612 		if (!closing) {
4613 			tl_merror(wq, mp, EPROTO);
4614 		} else {
4615 			freemsg(mp);
4616 		}
4617 		return;
4618 	}
4619 
4620 	/*
4621 	 * If the endpoint is closing it should still forward any data to the
4622 	 * peer (if it has one). If it is not allowed to forward it can just
4623 	 * free the message.
4624 	 */
4625 	if (closing &&
4626 	    (tep->te_state != TS_DATA_XFER) &&
4627 	    (tep->te_state != TS_WREQ_ORDREL)) {
4628 		freemsg(mp);
4629 		return;
4630 	}
4631 
4632 	/*
4633 	 * validate state
4634 	 */
4635 	switch (tep->te_state) {
4636 	case TS_IDLE:
4637 		/*
4638 		 * Other end not here - do nothing.
4639 		 */
4640 		freemsg(mp);
4641 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4642 		    "tl_exdata:cots with endpoint idle"));
4643 		return;
4644 
4645 	case TS_DATA_XFER:
4646 		/* valid states */
4647 		if (tep->te_conp != NULL)
4648 			break;
4649 
4650 		if (tep->te_oconp == NULL) {
4651 			if (!closing) {
4652 				tl_merror(wq, mp, EPROTO);
4653 			} else {
4654 				freemsg(mp);
4655 			}
4656 			return;
4657 		}
4658 		/*
4659 		 * For a socket the T_CONN_CON is sent early thus
4660 		 * the peer might not yet have accepted the connection.
4661 		 * If we are closing queue the packet with the T_CONN_IND.
4662 		 * Otherwise defer processing the packet until the peer
4663 		 * accepts the connection.
4664 		 * Note that the queue is noenabled when we go into this
4665 		 * state.
4666 		 */
4667 		if (!closing) {
4668 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4669 			    SL_TRACE|SL_ERROR,
4670 			    "tl_exdata: ocon"));
4671 			TL_PUTBQ(tep, mp);
4672 			return;
4673 		}
4674 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4675 		    "tl_exdata: closing socket ocon"));
4676 		prim->type = T_EXDATA_IND;
4677 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4678 		return;
4679 
4680 	case TS_WREQ_ORDREL:
4681 		if (tep->te_conp == NULL) {
4682 			/*
4683 			 * Other end closed - generate discon_ind
4684 			 * with reason 0 to cause an EPIPE but no
4685 			 * read side error on AF_UNIX sockets.
4686 			 */
4687 			freemsg(mp);
4688 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4689 			    SL_TRACE|SL_ERROR,
4690 			    "tl_exdata: WREQ_ORDREL and no peer"));
4691 			tl_discon_ind(tep, 0);
4692 			return;
4693 		}
4694 		break;
4695 
4696 	default:
4697 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4698 		    SL_TRACE|SL_ERROR,
4699 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4700 		    tep->te_state));
4701 		tl_merror(wq, mp, EPROTO);
4702 		return;
4703 	}
4704 	/*
4705 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4706 	 * (state stays same on this event)
4707 	 */
4708 
4709 	/*
4710 	 * get connected endpoint
4711 	 */
4712 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4713 		freemsg(mp);
4714 		/* Peer closed */
4715 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4716 		    "tl_exdata: peer gone"));
4717 		return;
4718 	}
4719 
4720 	peer_rq = peer_tep->te_rq;
4721 
4722 	/*
4723 	 * Put it back if flow controlled
4724 	 * Note: Messages already on queue when we are closing is bounded
4725 	 * so we can ignore flow control.
4726 	 */
4727 	if (!canputnext(peer_rq) && !closing) {
4728 		TL_PUTBQ(tep, mp);
4729 		return;
4730 	}
4731 
4732 	/*
4733 	 * validate state on peer
4734 	 */
4735 	switch (peer_tep->te_state) {
4736 	case TS_DATA_XFER:
4737 	case TS_WIND_ORDREL:
4738 		/* valid states */
4739 		break;
4740 	default:
4741 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4742 		    "tl_exdata:rx side:invalid state"));
4743 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4744 		return;
4745 	}
4746 	/*
4747 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4748 	 * (peer state stays same on this event)
4749 	 */
4750 	/*
4751 	 * reuse message block
4752 	 */
4753 	prim->type = T_EXDATA_IND;
4754 
4755 	/*
4756 	 * send data to connected peer
4757 	 */
4758 	putnext(peer_rq, mp);
4759 }
4760 
4761 
4762 
4763 static void
4764 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4765 {
4766 	queue_t			*wq =  tep->te_wq;
4767 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4768 	ssize_t			msz = MBLKL(mp);
4769 	tl_endpt_t		*peer_tep;
4770 	queue_t			*peer_rq;
4771 	boolean_t		closing = tep->te_closing;
4772 
4773 	if (msz < sizeof (struct T_ordrel_req)) {
4774 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4775 		    "tl_ordrel:invalid message"));
4776 		if (!closing) {
4777 			tl_merror(wq, mp, EPROTO);
4778 		} else {
4779 			freemsg(mp);
4780 		}
4781 		return;
4782 	}
4783 
4784 	/*
4785 	 * validate state
4786 	 */
4787 	switch (tep->te_state) {
4788 	case TS_DATA_XFER:
4789 	case TS_WREQ_ORDREL:
4790 		/* valid states */
4791 		if (tep->te_conp != NULL)
4792 			break;
4793 
4794 		if (tep->te_oconp == NULL)
4795 			break;
4796 
4797 		/*
4798 		 * For a socket the T_CONN_CON is sent early thus
4799 		 * the peer might not yet have accepted the connection.
4800 		 * If we are closing queue the packet with the T_CONN_IND.
4801 		 * Otherwise defer processing the packet until the peer
4802 		 * accepts the connection.
4803 		 * Note that the queue is noenabled when we go into this
4804 		 * state.
4805 		 */
4806 		if (!closing) {
4807 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4808 			    SL_TRACE|SL_ERROR,
4809 			    "tl_ordlrel: ocon"));
4810 			TL_PUTBQ(tep, mp);
4811 			return;
4812 		}
4813 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4814 		    "tl_ordlrel: closing socket ocon"));
4815 		prim->type = T_ORDREL_IND;
4816 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4817 		return;
4818 
4819 	default:
4820 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4821 		    SL_TRACE|SL_ERROR,
4822 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4823 		    tep->te_state));
4824 		if (!closing) {
4825 			tl_merror(wq, mp, EPROTO);
4826 		} else {
4827 			freemsg(mp);
4828 		}
4829 		return;
4830 	}
4831 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4832 
4833 	/*
4834 	 * get connected endpoint
4835 	 */
4836 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4837 		/* Peer closed */
4838 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4839 		    "tl_ordrel: peer gone"));
4840 		freemsg(mp);
4841 		return;
4842 	}
4843 
4844 	peer_rq = peer_tep->te_rq;
4845 
4846 	/*
4847 	 * Put it back if flow controlled except when we are closing.
4848 	 * Note: Messages already on queue when we are closing is bounded
4849 	 * so we can ignore flow control.
4850 	 */
4851 	if (! canputnext(peer_rq) && !closing) {
4852 		TL_PUTBQ(tep, mp);
4853 		return;
4854 	}
4855 
4856 	/*
4857 	 * validate state on peer
4858 	 */
4859 	switch (peer_tep->te_state) {
4860 	case TS_DATA_XFER:
4861 	case TS_WIND_ORDREL:
4862 		/* valid states */
4863 		break;
4864 	default:
4865 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4866 		    "tl_ordrel:rx side:invalid state"));
4867 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4868 		return;
4869 	}
4870 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4871 
4872 	/*
4873 	 * reuse message block
4874 	 */
4875 	prim->type = T_ORDREL_IND;
4876 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4877 	    "tl_ordrel: send ordrel_ind"));
4878 
4879 	/*
4880 	 * send data to connected peer
4881 	 */
4882 	putnext(peer_rq, mp);
4883 }
4884 
4885 
4886 /*
4887  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4888  */
4889 static void
4890 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4891 {
4892 	size_t			err_sz;
4893 	tl_endpt_t		*tep;
4894 	struct T_unitdata_req	*udreq;
4895 	mblk_t			*err_mp;
4896 	t_scalar_t		alen;
4897 	t_scalar_t		olen;
4898 	struct T_uderror_ind	*uderr;
4899 	uchar_t			*addr_startp;
4900 
4901 	err_sz = sizeof (struct T_uderror_ind);
4902 	tep = (tl_endpt_t *)wq->q_ptr;
4903 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4904 	alen = udreq->DEST_length;
4905 	olen = udreq->OPT_length;
4906 
4907 	if (alen > 0)
4908 		err_sz = T_ALIGN(err_sz + alen);
4909 	if (olen > 0)
4910 		err_sz += olen;
4911 
4912 	err_mp = allocb(err_sz, BPRI_MED);
4913 	if (! err_mp) {
4914 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4915 		    "tl_uderr:allocb failure"));
4916 		/*
4917 		 * Note: no rollback of state needed as it does
4918 		 * not change in connectionless transport
4919 		 */
4920 		tl_memrecover(wq, mp, err_sz);
4921 		return;
4922 	}
4923 
4924 	DB_TYPE(err_mp) = M_PROTO;
4925 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4926 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4927 	uderr->PRIM_type = T_UDERROR_IND;
4928 	uderr->ERROR_type = err;
4929 	uderr->DEST_length = alen;
4930 	uderr->OPT_length = olen;
4931 	if (alen <= 0) {
4932 		uderr->DEST_offset = 0;
4933 	} else {
4934 		uderr->DEST_offset =
4935 		    (t_scalar_t)sizeof (struct T_uderror_ind);
4936 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4937 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4938 		    (size_t)alen);
4939 	}
4940 	if (olen <= 0) {
4941 		uderr->OPT_offset = 0;
4942 	} else {
4943 		uderr->OPT_offset =
4944 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4945 		    uderr->DEST_length);
4946 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4947 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4948 		    (size_t)olen);
4949 	}
4950 	freemsg(mp);
4951 
4952 	/*
4953 	 * send indication message
4954 	 */
4955 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4956 
4957 	qreply(wq, err_mp);
4958 }
4959 
4960 static void
4961 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4962 {
4963 	queue_t *wq = tep->te_wq;
4964 
4965 	if (!tep->te_closing && (wq->q_first != NULL)) {
4966 		TL_PUTQ(tep, mp);
4967 	} else if (tep->te_rq != NULL)
4968 		tl_unitdata(mp, tep);
4969 	else
4970 		freemsg(mp);
4971 
4972 	tl_serializer_exit(tep);
4973 	tl_refrele(tep);
4974 }
4975 
4976 /*
4977  * Handle T_unitdata_req.
4978  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4979  * If this is a socket pass through options unmodified.
4980  */
4981 static void
4982 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4983 {
4984 	queue_t			*wq = tep->te_wq;
4985 	soux_addr_t		ux_addr;
4986 	tl_addr_t		destaddr;
4987 	uchar_t			*addr_startp;
4988 	tl_endpt_t		*peer_tep;
4989 	struct T_unitdata_ind	*udind;
4990 	struct T_unitdata_req	*udreq;
4991 	ssize_t			msz, ui_sz;
4992 	t_scalar_t		alen, aoff, olen, ooff;
4993 	t_scalar_t		oldolen = 0;
4994 	cred_t			*cr = NULL;
4995 	pid_t			cpid;
4996 
4997 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4998 	msz = MBLKL(mp);
4999 
5000 	/*
5001 	 * validate the state
5002 	 */
5003 	if (tep->te_state != TS_IDLE) {
5004 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
5005 		    SL_TRACE|SL_ERROR,
5006 		    "tl_wput:T_CONN_REQ:out of state"));
5007 		tl_merror(wq, mp, EPROTO);
5008 		return;
5009 	}
5010 	/*
5011 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5012 	 * (state does not change on this event)
5013 	 */
5014 
5015 	/*
5016 	 * validate the message
5017 	 * Note: dereference fields in struct inside message only
5018 	 * after validating the message length.
5019 	 */
5020 	if (msz < sizeof (struct T_unitdata_req)) {
5021 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5022 		    "tl_unitdata:invalid message length"));
5023 		tl_merror(wq, mp, EINVAL);
5024 		return;
5025 	}
5026 	alen = udreq->DEST_length;
5027 	aoff = udreq->DEST_offset;
5028 	oldolen = olen = udreq->OPT_length;
5029 	ooff = udreq->OPT_offset;
5030 	if (olen == 0)
5031 		ooff = 0;
5032 
5033 	if (IS_SOCKET(tep)) {
5034 		if ((alen != TL_SOUX_ADDRLEN) ||
5035 		    (aoff < 0) ||
5036 		    (aoff + alen > msz) ||
5037 		    (olen < 0) || (ooff < 0) ||
5038 		    ((olen > 0) && ((ooff + olen) > msz))) {
5039 			(void) (STRLOG(TL_ID, tep->te_minor,
5040 			    1, SL_TRACE|SL_ERROR,
5041 			    "tl_unitdata_req: invalid socket addr "
5042 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5043 			    (int)msz, alen, aoff, olen, ooff));
5044 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5045 			return;
5046 		}
5047 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5048 
5049 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5050 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5051 			(void) (STRLOG(TL_ID, tep->te_minor,
5052 			    1, SL_TRACE|SL_ERROR,
5053 			    "tl_conn_req: invalid socket magic"));
5054 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5055 			return;
5056 		}
5057 	} else {
5058 		if ((alen < 0) ||
5059 		    (aoff < 0) ||
5060 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5061 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5062 		    ((aoff + alen) < 0) ||
5063 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5064 		    (olen < 0) ||
5065 		    (ooff < 0) ||
5066 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5067 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5068 				    SL_TRACE|SL_ERROR,
5069 				    "tl_unitdata:invalid unit data message"));
5070 			tl_merror(wq, mp, EINVAL);
5071 			return;
5072 		}
5073 	}
5074 
5075 	/* Options not supported unless it's a socket */
5076 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5077 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5078 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5079 		tl_uderr(wq, mp, EPROTO);
5080 		return;
5081 	}
5082 #ifdef DEBUG
5083 	/*
5084 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5085 	 * if (! assertion)
5086 	 *	log warning;
5087 	 */
5088 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5089 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5090 		    "tl_unitdata:addr overlaps TPI message"));
5091 	}
5092 #endif
5093 	/*
5094 	 * get destination endpoint
5095 	 */
5096 	destaddr.ta_alen = alen;
5097 	destaddr.ta_abuf = mp->b_rptr + aoff;
5098 	destaddr.ta_zoneid = tep->te_zoneid;
5099 
5100 	/*
5101 	 * Check whether the destination is the same that was used previously
5102 	 * and the destination endpoint is in the right state. If something is
5103 	 * wrong, find destination again and cache it.
5104 	 */
5105 	peer_tep = tep->te_lastep;
5106 
5107 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5108 	    (peer_tep->te_state != TS_IDLE) ||
5109 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5110 		/*
5111 		 * Not the same as cached destination , need to find the right
5112 		 * destination.
5113 		 */
5114 		peer_tep = (IS_SOCKET(tep) ?
5115 		    tl_sock_find_peer(tep, &ux_addr) :
5116 		    tl_find_peer(tep, &destaddr));
5117 
5118 		if (peer_tep == NULL) {
5119 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5120 			    SL_TRACE|SL_ERROR,
5121 			    "tl_unitdata:no one at destination address"));
5122 			tl_uderr(wq, mp, ECONNRESET);
5123 			return;
5124 		}
5125 
5126 		/*
5127 		 * Cache the new peer.
5128 		 */
5129 		if (tep->te_lastep != NULL)
5130 			tl_refrele(tep->te_lastep);
5131 
5132 		tep->te_lastep = peer_tep;
5133 	}
5134 
5135 	if (peer_tep->te_state != TS_IDLE) {
5136 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5137 		    "tl_unitdata:provider in invalid state"));
5138 		tl_uderr(wq, mp, EPROTO);
5139 		return;
5140 	}
5141 
5142 	ASSERT(peer_tep->te_rq != NULL);
5143 
5144 	/*
5145 	 * Put it back if flow controlled except when we are closing.
5146 	 * Note: Messages already on queue when we are closing is bounded
5147 	 * so we can ignore flow control.
5148 	 */
5149 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5150 		/* record what we are flow controlled on */
5151 		if (tep->te_flowq != NULL) {
5152 			list_remove(&tep->te_flowq->te_flowlist, tep);
5153 		}
5154 		list_insert_head(&peer_tep->te_flowlist, tep);
5155 		tep->te_flowq = peer_tep;
5156 		TL_PUTBQ(tep, mp);
5157 		return;
5158 	}
5159 	/*
5160 	 * prepare indication message
5161 	 */
5162 
5163 	/*
5164 	 * calculate length of message
5165 	 */
5166 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5167 		cr = msg_getcred(mp, &cpid);
5168 		ASSERT(cr != NULL);
5169 
5170 		if (peer_tep->te_flag & TL_SETCRED) {
5171 			ASSERT(olen == 0);
5172 			olen = (t_scalar_t)sizeof (struct opthdr) +
5173 			    OPTLEN(sizeof (tl_credopt_t));
5174 						/* 1 option only */
5175 		} else if (peer_tep->te_flag & TL_SETUCRED) {
5176 			ASSERT(olen == 0);
5177 			olen = (t_scalar_t)sizeof (struct opthdr) +
5178 			    OPTLEN(ucredminsize(cr));
5179 						/* 1 option only */
5180 		} else {
5181 			/* Possibly more than one option */
5182 			olen += (t_scalar_t)sizeof (struct T_opthdr) +
5183 			    OPTLEN(ucredminsize(cr));
5184 		}
5185 	}
5186 
5187 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5188 	    olen;
5189 	/*
5190 	 * If the unitdata_ind fits and we are not adding options
5191 	 * reuse the udreq mblk.
5192 	 */
5193 	if (msz >= ui_sz && alen >= tep->te_alen &&
5194 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5195 		/*
5196 		 * Reuse the original mblk. Leave options in place.
5197 		 */
5198 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5199 		udind->PRIM_type = T_UNITDATA_IND;
5200 		udind->SRC_length = tep->te_alen;
5201 		addr_startp = mp->b_rptr + udind->SRC_offset;
5202 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5203 	} else {
5204 		/* Allocate a new T_unidata_ind message */
5205 		mblk_t *ui_mp;
5206 
5207 		ui_mp = allocb(ui_sz, BPRI_MED);
5208 		if (! ui_mp) {
5209 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5210 			    "tl_unitdata:allocb failure:message queued"));
5211 			tl_memrecover(wq, mp, ui_sz);
5212 			return;
5213 		}
5214 
5215 		/*
5216 		 * fill in T_UNITDATA_IND contents
5217 		 */
5218 		DB_TYPE(ui_mp) = M_PROTO;
5219 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5220 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5221 		udind->PRIM_type = T_UNITDATA_IND;
5222 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5223 		udind->SRC_length = tep->te_alen;
5224 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5225 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5226 		udind->OPT_offset =
5227 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5228 		udind->OPT_length = olen;
5229 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5230 
5231 			if (oldolen != 0) {
5232 				bcopy((void *)((uintptr_t)udreq + ooff),
5233 				    (void *)((uintptr_t)udind +
5234 				    udind->OPT_offset),
5235 				    oldolen);
5236 			}
5237 			ASSERT(cr != NULL);
5238 
5239 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5240 			    oldolen, cr, cpid,
5241 			    peer_tep->te_flag, peer_tep->te_credp);
5242 		} else {
5243 			bcopy((void *)((uintptr_t)udreq + ooff),
5244 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5245 			    olen);
5246 		}
5247 
5248 		/*
5249 		 * relink data blocks from mp to ui_mp
5250 		 */
5251 		ui_mp->b_cont = mp->b_cont;
5252 		freeb(mp);
5253 		mp = ui_mp;
5254 	}
5255 	/*
5256 	 * send indication message
5257 	 */
5258 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5259 	putnext(peer_tep->te_rq, mp);
5260 }
5261 
5262 
5263 
5264 /*
5265  * Check if a given addr is in use.
5266  * Endpoint ptr returned or NULL if not found.
5267  * The name space is separate for each mode. This implies that
5268  * sockets get their own name space.
5269  */
5270 static tl_endpt_t *
5271 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5272 {
5273 	tl_endpt_t *peer_tep = NULL;
5274 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5275 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5276 
5277 	ASSERT(! IS_SOCKET(tep));
5278 
5279 	ASSERT(ap != NULL && ap->ta_alen > 0);
5280 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5281 	ASSERT(ap->ta_abuf != NULL);
5282 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5283 	ASSERT(IMPLY(rc == 0,
5284 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5285 	    (tep->te_transport == peer_tep->te_transport)));
5286 
5287 	if ((rc == 0) && (peer_tep->te_closing)) {
5288 		tl_refrele(peer_tep);
5289 		peer_tep = NULL;
5290 	}
5291 
5292 	return (peer_tep);
5293 }
5294 
5295 /*
5296  * Find peer for a socket based on unix domain address.
5297  * For implicit addresses our peer can be found by minor number in ai hash. For
5298  * explicit binds we look vnode address at addr_hash.
5299  */
5300 static tl_endpt_t *
5301 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5302 {
5303 	tl_endpt_t *peer_tep = NULL;
5304 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5305 	    tep->te_aihash : tep->te_addrhash;
5306 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5307 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5308 
5309 	ASSERT(IS_SOCKET(tep));
5310 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5311 	ASSERT(IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport)));
5312 
5313 	if (peer_tep != NULL) {
5314 		/* Don't attempt to use closing peer. */
5315 		if (peer_tep->te_closing)
5316 			goto errout;
5317 
5318 		/*
5319 		 * Cross-zone unix sockets are permitted, but for Trusted
5320 		 * Extensions only, the "server" for these must be in the
5321 		 * global zone.
5322 		 */
5323 		if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5324 		    is_system_labeled() &&
5325 		    (peer_tep->te_zoneid != GLOBAL_ZONEID))
5326 			goto errout;
5327 	}
5328 
5329 	return (peer_tep);
5330 
5331 errout:
5332 	tl_refrele(peer_tep);
5333 	return (NULL);
5334 }
5335 
5336 /*
5337  * Generate a free addr and return it in struct pointed by ap
5338  * but allocating space for address buffer.
5339  * The generated address will be at least 4 bytes long and, if req->ta_alen
5340  * exceeds 4 bytes, be req->ta_alen bytes long.
5341  *
5342  * If address is found it will be inserted in the hash.
5343  *
5344  * If req->ta_alen is larger than the default alen (4 bytes) the last
5345  * alen-4 bytes will always be the same as in req.
5346  *
5347  * Return 0 for failure.
5348  * Return non-zero for success.
5349  */
5350 static boolean_t
5351 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5352 {
5353 	t_scalar_t	alen;
5354 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5355 
5356 	ASSERT(tep->te_hash_hndl != NULL);
5357 	ASSERT(! IS_SOCKET(tep));
5358 
5359 	if (tep->te_hash_hndl == NULL)
5360 		return (B_FALSE);
5361 
5362 	/*
5363 	 * check if default addr is in use
5364 	 * if it is - bump it and try again
5365 	 */
5366 	if (req == NULL) {
5367 		alen = sizeof (uint32_t);
5368 	} else {
5369 		alen = max(req->ta_alen, sizeof (uint32_t));
5370 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5371 	}
5372 
5373 	if (tep->te_alen < alen) {
5374 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5375 
5376 		/*
5377 		 * Not enough space in tep->ta_ap to hold the address,
5378 		 * allocate a bigger space.
5379 		 */
5380 		if (abuf == NULL)
5381 			return (B_FALSE);
5382 
5383 		if (tep->te_alen > 0)
5384 			kmem_free(tep->te_abuf, tep->te_alen);
5385 
5386 		tep->te_alen = alen;
5387 		tep->te_abuf = abuf;
5388 	}
5389 
5390 	/* Copy in the address in req */
5391 	if (req != NULL) {
5392 		ASSERT(alen >= req->ta_alen);
5393 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5394 	}
5395 
5396 	/*
5397 	 * First try minor number then try default addresses.
5398 	 */
5399 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5400 
5401 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5402 		if (mod_hash_insert_reserve(tep->te_addrhash,
5403 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5404 		    tep->te_hash_hndl) == 0) {
5405 			/*
5406 			 * found free address
5407 			 */
5408 			tep->te_flag |= TL_ADDRHASHED;
5409 			tep->te_hash_hndl = NULL;
5410 
5411 			return (B_TRUE); /* successful return */
5412 		}
5413 		/*
5414 		 * Use default address.
5415 		 */
5416 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5417 		atomic_add_32(&tep->te_defaddr, 1);
5418 	}
5419 
5420 	/*
5421 	 * Failed to find anything.
5422 	 */
5423 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5424 	    "tl_get_any_addr:looped 2^32 times"));
5425 	return (B_FALSE);
5426 }
5427 
5428 /*
5429  * reallocb + set r/w ptrs to reflect size.
5430  */
5431 static mblk_t *
5432 tl_resizemp(mblk_t *mp, ssize_t new_size)
5433 {
5434 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5435 		return (NULL);
5436 
5437 	mp->b_rptr = DB_BASE(mp);
5438 	mp->b_wptr = mp->b_rptr + new_size;
5439 	return (mp);
5440 }
5441 
5442 static void
5443 tl_cl_backenable(tl_endpt_t *tep)
5444 {
5445 	list_t *l = &tep->te_flowlist;
5446 	tl_endpt_t *elp;
5447 
5448 	ASSERT(IS_CLTS(tep));
5449 
5450 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5451 		ASSERT(tep->te_ser == elp->te_ser);
5452 		ASSERT(elp->te_flowq == tep);
5453 		if (! elp->te_closing)
5454 			TL_QENABLE(elp);
5455 		elp->te_flowq = NULL;
5456 		list_remove(l, elp);
5457 	}
5458 }
5459 
5460 /*
5461  * Unconnect endpoints.
5462  */
5463 static void
5464 tl_co_unconnect(tl_endpt_t *tep)
5465 {
5466 	tl_endpt_t	*peer_tep = tep->te_conp;
5467 	tl_endpt_t	*srv_tep = tep->te_oconp;
5468 	list_t		*l;
5469 	tl_icon_t  	*tip;
5470 	tl_endpt_t	*cl_tep;
5471 	mblk_t		*d_mp;
5472 
5473 	ASSERT(IS_COTS(tep));
5474 	/*
5475 	 * If our peer is closing, don't use it.
5476 	 */
5477 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5478 		TL_UNCONNECT(tep->te_conp);
5479 		peer_tep = NULL;
5480 	}
5481 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5482 		TL_UNCONNECT(tep->te_oconp);
5483 		srv_tep = NULL;
5484 	}
5485 
5486 	if (tep->te_nicon > 0) {
5487 		l = &tep->te_iconp;
5488 		/*
5489 		 * If incoming requests pending, change state
5490 		 * of clients on disconnect ind event and send
5491 		 * discon_ind pdu to modules above them
5492 		 * for server: all clients get disconnect
5493 		 */
5494 
5495 		while (tep->te_nicon > 0) {
5496 			tip    = list_head(l);
5497 			cl_tep = tip->ti_tep;
5498 
5499 			if (cl_tep == NULL) {
5500 				tl_freetip(tep, tip);
5501 				continue;
5502 			}
5503 
5504 			if (cl_tep->te_oconp != NULL) {
5505 				ASSERT(cl_tep != cl_tep->te_oconp);
5506 				TL_UNCONNECT(cl_tep->te_oconp);
5507 			}
5508 
5509 			if (cl_tep->te_closing) {
5510 				tl_freetip(tep, tip);
5511 				continue;
5512 			}
5513 
5514 			enableok(cl_tep->te_wq);
5515 			TL_QENABLE(cl_tep);
5516 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5517 			if (d_mp != NULL) {
5518 				cl_tep->te_state = TS_IDLE;
5519 				putnext(cl_tep->te_rq, d_mp);
5520 			} else {
5521 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5522 				    SL_TRACE|SL_ERROR,
5523 				    "tl_co_unconnect:icmng: "
5524 				    "allocb failure"));
5525 			}
5526 			tl_freetip(tep, tip);
5527 		}
5528 	} else if (srv_tep != NULL) {
5529 		/*
5530 		 * If outgoing request pending, change state
5531 		 * of server on discon ind event
5532 		 */
5533 
5534 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5535 		    IS_COTSORD(srv_tep) &&
5536 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5537 			/*
5538 			 * Queue ordrel_ind for server to be picked up
5539 			 * when the connection is accepted.
5540 			 */
5541 			d_mp = tl_ordrel_ind_alloc();
5542 		} else {
5543 			/*
5544 			 * send discon_ind to server
5545 			 */
5546 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5547 		}
5548 		if (d_mp == NULL) {
5549 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5550 			    SL_TRACE|SL_ERROR,
5551 			    "tl_co_unconnect:outgoing:allocb failure"));
5552 			TL_UNCONNECT(tep->te_oconp);
5553 			goto discon_peer;
5554 		}
5555 
5556 		/*
5557 		 * If this is a socket the T_DISCON_IND is queued with
5558 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5559 		 * from the list of pending connections.
5560 		 * Note that when te_oconp is set the peer better have
5561 		 * a t_connind_t for the client.
5562 		 */
5563 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5564 			/*
5565 			 * Queue the disconnection message.
5566 			 */
5567 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5568 		} else {
5569 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5570 			if (tip == NULL) {
5571 				freemsg(d_mp);
5572 			} else {
5573 				ASSERT(tep == tip->ti_tep);
5574 				ASSERT(tep->te_ser == srv_tep->te_ser);
5575 				/*
5576 				 * Delete tip from the server list.
5577 				 */
5578 				if (srv_tep->te_nicon == 1) {
5579 					srv_tep->te_state =
5580 					    NEXTSTATE(TE_DISCON_IND2,
5581 					    srv_tep->te_state);
5582 				} else {
5583 					srv_tep->te_state =
5584 					    NEXTSTATE(TE_DISCON_IND3,
5585 					    srv_tep->te_state);
5586 				}
5587 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5588 				    T_DISCON_IND);
5589 				putnext(srv_tep->te_rq, d_mp);
5590 				tl_freetip(srv_tep, tip);
5591 			}
5592 			TL_UNCONNECT(tep->te_oconp);
5593 			srv_tep = NULL;
5594 		}
5595 	} else if (peer_tep != NULL) {
5596 		/*
5597 		 * unconnect existing connection
5598 		 * If connected, change state of peer on
5599 		 * discon ind event and send discon ind pdu
5600 		 * to module above it
5601 		 */
5602 
5603 		ASSERT(tep->te_ser == peer_tep->te_ser);
5604 		if (IS_COTSORD(peer_tep) &&
5605 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5606 		    peer_tep->te_state == TS_DATA_XFER)) {
5607 			/*
5608 			 * send ordrel ind
5609 			 */
5610 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5611 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5612 			    peer_tep->te_state,
5613 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5614 			d_mp = tl_ordrel_ind_alloc();
5615 			if (! d_mp) {
5616 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5617 				    SL_TRACE|SL_ERROR,
5618 				    "tl_co_unconnect:connected:"
5619 				    "allocb failure"));
5620 				/*
5621 				 * Continue with cleaning up peer as
5622 				 * this side may go away with the close
5623 				 */
5624 				TL_QENABLE(peer_tep);
5625 				goto discon_peer;
5626 			}
5627 			peer_tep->te_state =
5628 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5629 
5630 			putnext(peer_tep->te_rq, d_mp);
5631 			/*
5632 			 * Handle flow control case.  This will generate
5633 			 * a t_discon_ind message with reason 0 if there
5634 			 * is data queued on the write side.
5635 			 */
5636 			TL_QENABLE(peer_tep);
5637 		} else if (IS_COTSORD(peer_tep) &&
5638 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5639 			/*
5640 			 * Sent an ordrel_ind. We send a discon with
5641 			 * with error 0 to inform that the peer is gone.
5642 			 */
5643 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5644 			    SL_TRACE|SL_ERROR,
5645 			    "tl_co_unconnect: discon in state %d",
5646 			    tep->te_state));
5647 			tl_discon_ind(peer_tep, 0);
5648 		} else {
5649 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5650 			    SL_TRACE|SL_ERROR,
5651 			    "tl_co_unconnect: state %d", tep->te_state));
5652 			tl_discon_ind(peer_tep, ECONNRESET);
5653 		}
5654 
5655 discon_peer:
5656 		/*
5657 		 * Disconnect cross-pointers only for close
5658 		 */
5659 		if (tep->te_closing) {
5660 			peer_tep = tep->te_conp;
5661 			TL_REMOVE_PEER(peer_tep->te_conp);
5662 			TL_REMOVE_PEER(tep->te_conp);
5663 		}
5664 	}
5665 }
5666 
5667 /*
5668  * Note: The following routine does not recover from allocb()
5669  * failures
5670  * The reason should be from the <sys/errno.h> space.
5671  */
5672 static void
5673 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5674 {
5675 	mblk_t *d_mp;
5676 
5677 	if (tep->te_closing)
5678 		return;
5679 
5680 	/*
5681 	 * flush the queues.
5682 	 */
5683 	flushq(tep->te_rq, FLUSHDATA);
5684 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5685 
5686 	/*
5687 	 * send discon ind
5688 	 */
5689 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5690 	if (! d_mp) {
5691 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5692 		    "tl_discon_ind:allocb failure"));
5693 		return;
5694 	}
5695 	tep->te_state = TS_IDLE;
5696 	putnext(tep->te_rq, d_mp);
5697 }
5698 
5699 /*
5700  * Note: The following routine does not recover from allocb()
5701  * failures
5702  * The reason should be from the <sys/errno.h> space.
5703  */
5704 static mblk_t *
5705 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5706 {
5707 	mblk_t *mp;
5708 	struct T_discon_ind *tdi;
5709 
5710 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5711 		DB_TYPE(mp) = M_PROTO;
5712 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5713 		tdi = (struct T_discon_ind *)mp->b_rptr;
5714 		tdi->PRIM_type = T_DISCON_IND;
5715 		tdi->DISCON_reason = reason;
5716 		tdi->SEQ_number = seqnum;
5717 	}
5718 	return (mp);
5719 }
5720 
5721 
5722 /*
5723  * Note: The following routine does not recover from allocb()
5724  * failures
5725  */
5726 static mblk_t *
5727 tl_ordrel_ind_alloc(void)
5728 {
5729 	mblk_t *mp;
5730 	struct T_ordrel_ind *toi;
5731 
5732 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5733 		DB_TYPE(mp) = M_PROTO;
5734 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5735 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5736 		toi->PRIM_type = T_ORDREL_IND;
5737 	}
5738 	return (mp);
5739 }
5740 
5741 
5742 /*
5743  * Lookup the seqno in the list of queued connections.
5744  */
5745 static tl_icon_t *
5746 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5747 {
5748 	list_t *l = &tep->te_iconp;
5749 	tl_icon_t *tip = list_head(l);
5750 
5751 	ASSERT(seqno != 0);
5752 
5753 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5754 		;
5755 
5756 	return (tip);
5757 }
5758 
5759 /*
5760  * Queue data for a given T_CONN_IND while verifying that redundant
5761  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5762  * Used when the originator of the connection closes.
5763  */
5764 static void
5765 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5766 {
5767 	tl_icon_t		*tip;
5768 	mblk_t			**mpp, *mp;
5769 	int			prim, nprim;
5770 
5771 	if (nmp->b_datap->db_type == M_PROTO)
5772 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5773 	else
5774 		nprim = -1;	/* M_DATA */
5775 
5776 	tip = tl_icon_find(tep, seqno);
5777 	if (tip == NULL) {
5778 		freemsg(nmp);
5779 		return;
5780 	}
5781 
5782 	ASSERT(tip->ti_seqno != 0);
5783 	mpp = &tip->ti_mp;
5784 	while (*mpp != NULL) {
5785 		mp = *mpp;
5786 
5787 		if (mp->b_datap->db_type == M_PROTO)
5788 			prim = ((union T_primitives *)mp->b_rptr)->type;
5789 		else
5790 			prim = -1;	/* M_DATA */
5791 
5792 		/*
5793 		 * Allow nothing after a T_DISCON_IND
5794 		 */
5795 		if (prim == T_DISCON_IND) {
5796 			freemsg(nmp);
5797 			return;
5798 		}
5799 		/*
5800 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5801 		 */
5802 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5803 			freemsg(nmp);
5804 			return;
5805 		}
5806 		mpp = &(mp->b_next);
5807 	}
5808 	*mpp = nmp;
5809 }
5810 
5811 /*
5812  * Verify if a certain TPI primitive exists on the connind queue.
5813  * Use prim -1 for M_DATA.
5814  * Return non-zero if found.
5815  */
5816 static boolean_t
5817 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5818 {
5819 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5820 	boolean_t found = B_FALSE;
5821 
5822 	if (tip != NULL) {
5823 		mblk_t *mp;
5824 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5825 			found = (DB_TYPE(mp) == M_PROTO &&
5826 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5827 		}
5828 	}
5829 	return (found);
5830 }
5831 
5832 /*
5833  * Send the b_next mblk chain that has accumulated before the connection
5834  * was accepted. Perform the necessary state transitions.
5835  */
5836 static void
5837 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5838 {
5839 	mblk_t			*mp;
5840 	union T_primitives	*primp;
5841 
5842 	if (tep->te_closing) {
5843 		tl_icon_freemsgs(mpp);
5844 		return;
5845 	}
5846 
5847 	ASSERT(tep->te_state == TS_DATA_XFER);
5848 	ASSERT(tep->te_rq->q_first == NULL);
5849 
5850 	while ((mp = *mpp) != NULL) {
5851 		*mpp = mp->b_next;
5852 		mp->b_next = NULL;
5853 
5854 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5855 		switch (DB_TYPE(mp)) {
5856 		default:
5857 			freemsg(mp);
5858 			break;
5859 		case M_DATA:
5860 			putnext(tep->te_rq, mp);
5861 			break;
5862 		case M_PROTO:
5863 			primp = (union T_primitives *)mp->b_rptr;
5864 			switch (primp->type) {
5865 			case T_UNITDATA_IND:
5866 			case T_DATA_IND:
5867 			case T_OPTDATA_IND:
5868 			case T_EXDATA_IND:
5869 				putnext(tep->te_rq, mp);
5870 				break;
5871 			case T_ORDREL_IND:
5872 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5873 				    tep->te_state);
5874 				putnext(tep->te_rq, mp);
5875 				break;
5876 			case T_DISCON_IND:
5877 				tep->te_state = TS_IDLE;
5878 				putnext(tep->te_rq, mp);
5879 				break;
5880 			default:
5881 #ifdef DEBUG
5882 				cmn_err(CE_PANIC,
5883 				    "tl_icon_sendmsgs: unknown primitive");
5884 #endif /* DEBUG */
5885 				freemsg(mp);
5886 				break;
5887 			}
5888 			break;
5889 		}
5890 	}
5891 }
5892 
5893 /*
5894  * Free the b_next mblk chain that has accumulated before the connection
5895  * was accepted.
5896  */
5897 static void
5898 tl_icon_freemsgs(mblk_t **mpp)
5899 {
5900 	mblk_t *mp;
5901 
5902 	while ((mp = *mpp) != NULL) {
5903 		*mpp = mp->b_next;
5904 		mp->b_next = NULL;
5905 		freemsg(mp);
5906 	}
5907 }
5908 
5909 /*
5910  * Send M_ERROR
5911  * Note: assumes caller ensured enough space in mp or enough
5912  *	memory available. Does not attempt recovery from allocb()
5913  *	failures
5914  */
5915 
5916 static void
5917 tl_merror(queue_t *wq, mblk_t *mp, int error)
5918 {
5919 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5920 
5921 	if (tep->te_closing) {
5922 		freemsg(mp);
5923 		return;
5924 	}
5925 
5926 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5927 	    SL_TRACE|SL_ERROR,
5928 	    "tl_merror: tep=%p, err=%d", (void *)tep, error));
5929 
5930 	/*
5931 	 * flush all messages on queue. we are shutting
5932 	 * the stream down on fatal error
5933 	 */
5934 	flushq(wq, FLUSHALL);
5935 	if (IS_COTS(tep)) {
5936 		/* connection oriented - unconnect endpoints */
5937 		tl_co_unconnect(tep);
5938 	}
5939 	if (mp->b_cont) {
5940 		freemsg(mp->b_cont);
5941 		mp->b_cont = NULL;
5942 	}
5943 
5944 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5945 		freemsg(mp);
5946 		mp = allocb(1, BPRI_HI);
5947 		if (!mp) {
5948 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5949 			    SL_TRACE|SL_ERROR,
5950 			    "tl_merror:M_PROTO: out of memory"));
5951 			return;
5952 		}
5953 	}
5954 	if (mp) {
5955 		DB_TYPE(mp) = M_ERROR;
5956 		mp->b_rptr = DB_BASE(mp);
5957 		*mp->b_rptr = (char)error;
5958 		mp->b_wptr = mp->b_rptr + sizeof (char);
5959 		qreply(wq, mp);
5960 	} else {
5961 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
5962 	}
5963 }
5964 
5965 static void
5966 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5967 {
5968 	ASSERT(cr != NULL);
5969 
5970 	if (flag & TL_SETCRED) {
5971 		struct opthdr *opt = (struct opthdr *)buf;
5972 		tl_credopt_t *tlcred;
5973 
5974 		opt->level = TL_PROT_LEVEL;
5975 		opt->name = TL_OPT_PEER_CRED;
5976 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5977 
5978 		tlcred = (tl_credopt_t *)(opt + 1);
5979 		tlcred->tc_uid = crgetuid(cr);
5980 		tlcred->tc_gid = crgetgid(cr);
5981 		tlcred->tc_ruid = crgetruid(cr);
5982 		tlcred->tc_rgid = crgetrgid(cr);
5983 		tlcred->tc_suid = crgetsuid(cr);
5984 		tlcred->tc_sgid = crgetsgid(cr);
5985 		tlcred->tc_ngroups = crgetngroups(cr);
5986 	} else if (flag & TL_SETUCRED) {
5987 		struct opthdr *opt = (struct opthdr *)buf;
5988 
5989 		opt->level = TL_PROT_LEVEL;
5990 		opt->name = TL_OPT_PEER_UCRED;
5991 		opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
5992 
5993 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
5994 	} else {
5995 		struct T_opthdr *topt = (struct T_opthdr *)buf;
5996 		ASSERT(flag & TL_SOCKUCRED);
5997 
5998 		topt->level = SOL_SOCKET;
5999 		topt->name = SCM_UCRED;
6000 		topt->len = ucredminsize(cr) + sizeof (*topt);
6001 		topt->status = 0;
6002 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6003 	}
6004 }
6005 
6006 /* ARGSUSED */
6007 static int
6008 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6009 {
6010 	/* no default value processed in protocol specific code currently */
6011 	return (-1);
6012 }
6013 
6014 /* ARGSUSED */
6015 static int
6016 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6017 {
6018 	int len;
6019 	tl_endpt_t *tep;
6020 	int *valp;
6021 
6022 	tep = (tl_endpt_t *)wq->q_ptr;
6023 
6024 	len = 0;
6025 
6026 	/*
6027 	 * Assumes: option level and name sanity check done elsewhere
6028 	 */
6029 
6030 	switch (level) {
6031 	case SOL_SOCKET:
6032 		if (! IS_SOCKET(tep))
6033 			break;
6034 		switch (name) {
6035 		case SO_RECVUCRED:
6036 			len = sizeof (int);
6037 			valp = (int *)ptr;
6038 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6039 			break;
6040 		default:
6041 			break;
6042 		}
6043 		break;
6044 	case TL_PROT_LEVEL:
6045 		switch (name) {
6046 		case TL_OPT_PEER_CRED:
6047 		case TL_OPT_PEER_UCRED:
6048 			/*
6049 			 * option not supposed to retrieved directly
6050 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6051 			 * when some internal flags set by other options
6052 			 * Direct retrieval always designed to fail(ignored)
6053 			 * for this option.
6054 			 */
6055 			break;
6056 		}
6057 	}
6058 	return (len);
6059 }
6060 
6061 /* ARGSUSED */
6062 static int
6063 tl_set_opt(
6064 	queue_t		*wq,
6065 	uint_t		mgmt_flags,
6066 	int		level,
6067 	int		name,
6068 	uint_t		inlen,
6069 	uchar_t		*invalp,
6070 	uint_t		*outlenp,
6071 	uchar_t		*outvalp,
6072 	void		*thisdg_attrs,
6073 	cred_t		*cr)
6074 {
6075 	int error;
6076 	tl_endpt_t *tep;
6077 
6078 	tep = (tl_endpt_t *)wq->q_ptr;
6079 
6080 	error = 0;		/* NOERROR */
6081 
6082 	/*
6083 	 * Assumes: option level and name sanity checks done elsewhere
6084 	 */
6085 
6086 	switch (level) {
6087 	case SOL_SOCKET:
6088 		if (! IS_SOCKET(tep)) {
6089 			error = EINVAL;
6090 			break;
6091 		}
6092 		/*
6093 		 * TBD: fill in other AF_UNIX socket options and then stop
6094 		 * returning error.
6095 		 */
6096 		switch (name) {
6097 		case SO_RECVUCRED:
6098 			/*
6099 			 * We only support this for datagram sockets;
6100 			 * getpeerucred handles the connection oriented
6101 			 * transports.
6102 			 */
6103 			if (! IS_CLTS(tep)) {
6104 				error = EINVAL;
6105 				break;
6106 			}
6107 			if (*(int *)invalp == 0)
6108 				tep->te_flag &= ~TL_SOCKUCRED;
6109 			else
6110 				tep->te_flag |= TL_SOCKUCRED;
6111 			break;
6112 		default:
6113 			error = EINVAL;
6114 			break;
6115 		}
6116 		break;
6117 	case TL_PROT_LEVEL:
6118 		switch (name) {
6119 		case TL_OPT_PEER_CRED:
6120 		case TL_OPT_PEER_UCRED:
6121 			/*
6122 			 * option not supposed to be set directly
6123 			 * Its value in initialized for each endpoint at
6124 			 * driver open time.
6125 			 * Direct setting always designed to fail for this
6126 			 * option.
6127 			 */
6128 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6129 			    SL_TRACE|SL_ERROR,
6130 			    "tl_set_opt: option is not supported"));
6131 			error = EPROTO;
6132 			break;
6133 		}
6134 	}
6135 	return (error);
6136 }
6137 
6138 
6139 static void
6140 tl_timer(void *arg)
6141 {
6142 	queue_t *wq = arg;
6143 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6144 
6145 	ASSERT(tep);
6146 
6147 	tep->te_timoutid = 0;
6148 
6149 	enableok(wq);
6150 	/*
6151 	 * Note: can call wsrv directly here and save context switch
6152 	 * Consider change when qtimeout (not timeout) is active
6153 	 */
6154 	qenable(wq);
6155 }
6156 
6157 static void
6158 tl_buffer(void *arg)
6159 {
6160 	queue_t *wq = arg;
6161 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6162 
6163 	ASSERT(tep);
6164 
6165 	tep->te_bufcid = 0;
6166 	tep->te_nowsrv = B_FALSE;
6167 
6168 	enableok(wq);
6169 	/*
6170 	 *  Note: can call wsrv directly here and save context switch
6171 	 * Consider change when qbufcall (not bufcall) is active
6172 	 */
6173 	qenable(wq);
6174 }
6175 
6176 static void
6177 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6178 {
6179 	tl_endpt_t *tep;
6180 
6181 	tep = (tl_endpt_t *)wq->q_ptr;
6182 
6183 	if (tep->te_closing) {
6184 		freemsg(mp);
6185 		return;
6186 	}
6187 	noenable(wq);
6188 
6189 	(void) insq(wq, wq->q_first, mp);
6190 
6191 	if (tep->te_bufcid || tep->te_timoutid) {
6192 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6193 		    "tl_memrecover:recover %p pending", (void *)wq));
6194 		return;
6195 	}
6196 
6197 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6198 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6199 		    drv_usectohz(TL_BUFWAIT));
6200 	}
6201 }
6202 
6203 static void
6204 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6205 {
6206 	ASSERT(tip->ti_seqno != 0);
6207 
6208 	if (tip->ti_mp != NULL) {
6209 		tl_icon_freemsgs(&tip->ti_mp);
6210 		tip->ti_mp = NULL;
6211 	}
6212 	if (tip->ti_tep != NULL) {
6213 		tl_refrele(tip->ti_tep);
6214 		tip->ti_tep = NULL;
6215 	}
6216 	list_remove(&tep->te_iconp, tip);
6217 	kmem_free(tip, sizeof (tl_icon_t));
6218 	tep->te_nicon--;
6219 }
6220 
6221 /*
6222  * Remove address from address hash.
6223  */
6224 static void
6225 tl_addr_unbind(tl_endpt_t *tep)
6226 {
6227 	tl_endpt_t *elp;
6228 
6229 	if (tep->te_flag & TL_ADDRHASHED) {
6230 		if (IS_SOCKET(tep)) {
6231 			(void) mod_hash_remove(tep->te_addrhash,
6232 			    (mod_hash_key_t)tep->te_vp,
6233 			    (mod_hash_val_t *)&elp);
6234 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6235 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6236 		} else {
6237 			(void) mod_hash_remove(tep->te_addrhash,
6238 			    (mod_hash_key_t)&tep->te_ap,
6239 			    (mod_hash_val_t *)&elp);
6240 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6241 			tep->te_alen = -1;
6242 			tep->te_abuf = NULL;
6243 		}
6244 		tep->te_flag &= ~TL_ADDRHASHED;
6245 	}
6246 }
6247