xref: /titanic_51/usr/src/uts/common/io/tl.c (revision 12cc75c814f0c017004a9bbc96429911e008601b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Multithreaded STREAMS Local Transport Provider.
30  *
31  * OVERVIEW
32  * ========
33  *
34  * This driver provides TLI as well as socket semantics.  It provides
35  * connectionless, connection oriented, and connection oriented with orderly
36  * release transports for TLI and sockets. Each transport type has separate name
37  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
38  * this removes any name space conflicts when binding to socket style transport
39  * addresses.
40  *
41  * NOTE: There is one exception: Socket ticots and ticotsord transports share
42  * the same namespace. In fact, sockets always use ticotsord type transport.
43  *
44  * The driver mode is specified during open() by the minor number used for
45  * open.
46  *
47  *  The sockets in addition have the following semantic differences:
48  *  No support for passing up credentials (TL_SET[U]CRED).
49  *
50  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
51  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
52  *	T_OPTDATA_IND.
53  *
54  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
55  *	a T_CONN_RES is received from the acceptor. This means that a socket
56  *	connect will complete before the peer has called accept.
57  *
58  *
59  * MULTITHREADING
60  * ==============
61  *
62  * The driver does not use STREAMS protection mechanisms. Instead it uses a
63  * generic "serializer" abstraction. Most of the operations are executed behind
64  * the serializer and are, essentially single-threaded. All functions executed
65  * behind the same serializer are strictly serialized. So if one thread calls
66  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
67  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
68  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
69  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
70  * same time.
71  *
72  * Connectionless transport use a single serializer per transport type (one for
73  * TLI and one for sockets. Connection-oriented transports use finer-grained
74  * serializers.
75  *
76  * All COTS-type endpoints start their life with private serializers. During
77  * connection request processing the endpoint serializer is switched to the
78  * listener's serializer and the rest of T_CONN_REQ processing is done on the
79  * listener serializer. During T_CONN_RES processing the eager serializer is
80  * switched from listener to acceptor serializer and after that point all
81  * processing for eager and acceptor happens on this serializer. To avoid races
82  * with endpoint closes while its serializer may be changing closes are blocked
83  * while serializers are manipulated.
84  *
85  * References accounting
86  * ---------------------
87  *
88  * Endpoints are reference counted and freed when the last reference is
89  * dropped. Functions within the serializer may access an endpoint state even
90  * after an endpoint closed. The te_closing being set on the endpoint indicates
91  * that the endpoint entered its close routine.
92  *
93  * One reference is held for each opened endpoint instance. The reference
94  * counter is incremented when the endpoint is linked to another endpoint and
95  * decremented when the link disappears. It is also incremented when the
96  * endpoint is found by the hash table lookup. This increment is atomic with the
97  * lookup itself and happens while the hash table read lock is held.
98  *
99  * Close synchronization
100  * ---------------------
101  *
102  * During close the endpoint as marked as closing using te_closing flag. It is
103  * usually enough to check for te_closing flag since all other state changes
104  * happen after this flag is set and the close entered serializer. Immediately
105  * after setting te_closing flag tl_close() enters serializer and waits until
106  * the callback finishes. This allows all functions called within serializer to
107  * simply check te_closing without any locks.
108  *
109  * Serializer management.
110  * ---------------------
111  *
112  * For COTS transports serializers are created when the endpoint is constructed
113  * and destroyed when the endpoint is destructed. CLTS transports use global
114  * serializers - one for sockets and one for TLI.
115  *
116  * COTS serializers have separate reference counts to deal with several
117  * endpoints sharing the same serializer. There is a subtle problem related to
118  * the serializer destruction. The serializer should never be destroyed by any
119  * function executed inside serializer. This means that close has to wait till
120  * all serializer activity for this endpoint is finished before it can drop the
121  * last reference on the endpoint (which may as well free the serializer).  This
122  * is only relevant for COTS transports which manage serializers
123  * dynamically. For CLTS transports close may complete without waiting for all
124  * serializer activity to finish since serializer is only destroyed at driver
125  * detach time.
126  *
127  * COTS endpoints keep track of the number of outstanding requests on the
128  * serializer for the endpoint. The code handling accept() avoids changing
129  * client serializer if it has any pending messages on the serializer and
130  * instead moves acceptor to listener's serializer.
131  *
132  *
133  * Use of hash tables
134  * ------------------
135  *
136  * The driver uses modhash hash table implementation. Each transport uses two
137  * hash tables - one for finding endpoints by acceptor ID and another one for
138  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
139  * pair of hash tables since sockets only use TICOTSORD.
140  *
141  * All hash tables lookups increment a reference count for returned endpoints,
142  * so we may safely check the endpoint state even when the endpoint is removed
143  * from the hash by another thread immediately after it is found.
144  *
145  *
146  * CLOSE processing
147  * ================
148  *
149  * The driver enters serializer twice on close(). The close sequence is the
150  * following:
151  *
152  * 1) Wait until closing is safe (te_closewait becomes zero)
153  *	This step is needed to prevent close during serializer switches. In most
154  *	cases (close happening after connection establishment) te_closewait is
155  *	zero.
156  * 1) Set te_closing.
157  * 2) Call tl_close_ser() within serializer and wait for it to complete.
158  *
159  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
160  *	It also needs to clear write-side q_next pointers - this should be done
161  *	before qprocsoff().
162  *
163  *    This synchronous serializer entry during close is needed to ensure that
164  *    the queue is valid everywhere inside the serializer.
165  *
166  *    Note that in many cases close will execute tl_close_ser() synchronously,
167  *    so it will not wait at all.
168  *
169  * 3) Calls qprocsoff().
170  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
171  *	complete (for COTS transports). For CLTS transport there is no wait.
172  *
173  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
174  *	close if there is any.
175  *
176  *    Note that in most cases close will enter te_close_ser_finish()
177  *    synchronously and will not wait at all.
178  *
179  *
180  * Flow Control
181  * ============
182  *
183  * The driver implements both read and write side service routines. No one calls
184  * putq() on the read queue. The read side service routine tl_rsrv() is called
185  * when the read side stream is back-enabled. It enters serializer synchronously
186  * (waits till serializer processing is complete). Within serializer it
187  * back-enables all endpoints blocked by the queue for connection-less
188  * transports and enables write side service processing for the peer for
189  * connection-oriented transports.
190  *
191  * Read and write side service routines use special mblk_sized space in the
192  * endpoint structure to enter perimeter.
193  *
194  * Write-side flow control
195  * -----------------------
196  *
197  * Write side flow control is a bit tricky. The driver needs to deal with two
198  * message queues - the explicit STREAMS message queue maintained by
199  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
200  * queues should be synchronized to preserve message ordering and should
201  * maintain a single order determined by the order in which messages enter
202  * tl_wput(). In order to maintain the ordering between these two queues the
203  * STREAMS queue is only manipulated within the serializer, so the ordering is
204  * provided by the serializer.
205  *
206  * Functions called from the tl_wsrv() sometimes may call putbq(). To
207  * immediately stop any further processing of the STREAMS message queues the
208  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
209  * side service processing stops when the flag is set.
210  *
211  * The tl_wsrv() function enters serializer synchronously and waits for it to
212  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
213  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
214  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
215  * always bounded by the amount of messages on the STREAMS queue at the time
216  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
217  * queue from another serialized entry which can't happen in parallel. This
218  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
219  * of it draining forever while writer places new messages on the STREAMS
220  * queue).
221  *
222  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
223  *
224  *
225  * Unix Domain Sockets
226  * ===================
227  *
228  * The driver knows the structure of Unix Domain sockets addresses and treats
229  * them differently from generic TLI addresses. For sockets implicit binds are
230  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
231  * instead of using address length of zero. Explicit binds specify
232  * SOU_MAGIC_EXPLICIT as magic.
233  *
234  * For implicit binds we always use minor number as soua_vp part of the address
235  * and avoid any hash table lookups. This saves two hash tables lookups per
236  * anonymous bind.
237  *
238  * For explicit address we hash the vnode pointer instead of hashing the
239  * full-scale address+zone+length. Hashing by pointer is more efficient then
240  * hashing by the full address.
241  *
242  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
243  * tep structure, so it should be never freed.
244  *
245  * Also for sockets the driver always uses minor number as acceptor id.
246  *
247  * TPI VIOLATIONS
248  * --------------
249  *
250  * This driver violates TPI in several respects for Unix Domain Sockets:
251  *
252  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
253  *	is requested and the endpoint is already in use. There is no point in
254  *	generating an unused address since this address will be rejected by
255  *	sockfs anyway. For implicit binds it always generates a new address
256  *	(sets soua_vp to its minor number).
257  *
258  * 2) It always uses minor number as acceptor ID and never uses queue
259  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
260  *	message and they do not use the queue pointer.
261  *
262  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
263  *	followed by listen(). The listen() should be issued with non-zero
264  *	backlog, so sotpi_listen() issues unbind request followed by bind
265  *	request to the same address but with a non-zero qlen value. Both
266  *	tl_bind() and tl_unbind() require write lock on the hash table to
267  *	insert/remove the address. The driver does not remove the address from
268  *	the hash for endpoints that are bound to the explicit address and have
269  *	backlog of zero. During T_BIND_REQ processing if the address requested
270  *	is equal to the address the endpoint already has it updates the backlog
271  *	without reinserting the address in the hash table. This optimization
272  *	avoids two hash table updates for each listener created. It always
273  *	avoids the problem of a "stolen" address when another listener may use
274  *	the same address between the unbind and bind and suddenly listen() fails
275  *	because address is in use even though the bind() succeeded.
276  *
277  *
278  * CONNECTIONLESS TRANSPORTS
279  * =========================
280  *
281  * Connectionless transports all share the same serializer (one for TLI and one
282  * for Sockets). Functions executing behind serializer can check or modify state
283  * of any endpoint.
284  *
285  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
286  * te_lastep field. The next time X talks to some address A it checks whether A
287  * is the same as Y's address and if it is there is no need to lookup Y. If the
288  * address is different or the state of Y is not appropriate (e.g. closed or not
289  * idle) X does a lookup using tl_find_peer() and caches the new address.
290  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
291  * on the endpoint found.
292  *
293  * During close of endpoint Y it doesn't try to remove itself from other
294  * endpoints caches. They will detect that Y is gone and will search the peer
295  * endpoint again.
296  *
297  * Flow Control Handling.
298  * ----------------------
299  *
300  * Each connectionless endpoint keeps a list of endpoints which are
301  * flow-controlled by its queue. It also keeps a pointer to the queue which
302  * flow-controls itself.  Whenever flow control releases for endpoint X it
303  * enables all queues from the list. During close it also back-enables everyone
304  * in the list. If X is flow-controlled when it is closing it removes it from
305  * the peers list.
306  *
307  * DATA STRUCTURES
308  * ===============
309  *
310  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
311  * endpoint state. For connection-oriented transports it has a keeps a list
312  * of pending connections (tl_icon_t). For connectionless transports it keeps a
313  * list of endpoints flow controlled by this one.
314  *
315  * Each transport type is represented by a per-transport data structure
316  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
317  * endpoint address hash tables for each transport. It also contains pointer to
318  * transport serializer for connectionless transports.
319  *
320  * Each endpoint keeps a link to its transport structure, so the code can find
321  * all per-transport information quickly.
322  */
323 
324 #include	<sys/types.h>
325 #include	<sys/inttypes.h>
326 #include	<sys/stream.h>
327 #include	<sys/stropts.h>
328 #define	_SUN_TPI_VERSION 2
329 #include	<sys/tihdr.h>
330 #include	<sys/strlog.h>
331 #include	<sys/debug.h>
332 #include	<sys/cred.h>
333 #include	<sys/errno.h>
334 #include	<sys/kmem.h>
335 #include	<sys/id_space.h>
336 #include	<sys/modhash.h>
337 #include	<sys/mkdev.h>
338 #include	<sys/tl.h>
339 #include	<sys/stat.h>
340 #include	<sys/conf.h>
341 #include	<sys/modctl.h>
342 #include	<sys/strsun.h>
343 #include	<sys/socket.h>
344 #include	<sys/socketvar.h>
345 #include	<sys/sysmacros.h>
346 #include	<sys/xti_xtiopt.h>
347 #include	<sys/ddi.h>
348 #include	<sys/sunddi.h>
349 #include	<sys/zone.h>
350 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
351 #include	<inet/optcom.h>
352 #include	<sys/strsubr.h>
353 #include	<sys/ucred.h>
354 #include	<sys/suntpi.h>
355 #include	<sys/list.h>
356 #include	<sys/serializer.h>
357 
358 /*
359  * TBD List
360  * 14 Eliminate state changes through table
361  * 16. AF_UNIX socket options
362  * 17. connect() for ticlts
363  * 18. support for "netstat" to show AF_UNIX plus TLI local
364  *	transport connections
365  * 21. sanity check to flushing on sending M_ERROR
366  */
367 
368 /*
369  * CONSTANT DECLARATIONS
370  * --------------------
371  */
372 
373 /*
374  * Local declarations
375  */
376 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
377 
378 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
379 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
380 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
381 /*
382  * Hash tables size.
383  */
384 #define	TL_HASH_SIZE 311
385 
386 /*
387  * Definitions for module_info
388  */
389 #define		TL_ID		(104)		/* module ID number */
390 #define		TL_NAME		"tl"		/* module name */
391 #define		TL_MINPSZ	(0)		/* min packet size */
392 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
393 #define		TL_HIWAT	(16*1024)	/* hi water mark */
394 #define		TL_LOWAT	(256)		/* lo water mark */
395 /*
396  * Definition of minor numbers/modes for new transport provider modes.
397  * We view the socket use as a separate mode to get a separate name space.
398  */
399 #define		TL_TICOTS	0	/* connection oriented transport */
400 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
401 #define		TL_TICLTS 	2	/* connectionless transport */
402 #define		TL_UNUSED	3
403 #define		TL_SOCKET	4	/* Socket */
404 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
405 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
406 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
407 
408 #define		TL_MINOR_MASK	0x7
409 #define		TL_MINOR_START	(TL_TICLTS + 1)
410 
411 /*
412  * LOCAL MACROS
413  */
414 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
415 
416 /*
417  * EXTERNAL VARIABLE DECLARATIONS
418  * -----------------------------
419  */
420 /*
421  * state table defined in the OS space.c
422  */
423 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
424 
425 /*
426  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
427  */
428 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
429 static int tl_close(queue_t *, int, cred_t *);
430 static void tl_wput(queue_t *, mblk_t *);
431 static void tl_wsrv(queue_t *);
432 static void tl_rsrv(queue_t *);
433 
434 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
435 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
436 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
437 
438 
439 /*
440  * GLOBAL DATA STRUCTURES AND VARIABLES
441  * -----------------------------------
442  */
443 
444 /*
445  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
446  * For now, we only manage the SO_RECVUCRED option but we also have
447  * harmless dummy options to make things work with some common code we access.
448  */
449 opdes_t	tl_opt_arr[] = {
450 	/* The SO_TYPE is needed for the hack below */
451 	{
452 		SO_TYPE,
453 		SOL_SOCKET,
454 		OA_R,
455 		OA_R,
456 		OP_NP,
457 		OP_PASSNEXT,
458 		sizeof (t_scalar_t),
459 		0
460 	},
461 	{
462 		SO_RECVUCRED,
463 		SOL_SOCKET,
464 		OA_RW,
465 		OA_RW,
466 		OP_NP,
467 		OP_PASSNEXT,
468 		sizeof (int),
469 		0
470 	}
471 };
472 
473 /*
474  * Table of all supported levels
475  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
476  * any supported options so we need this info separately.
477  *
478  * This is needed only for topmost tpi providers.
479  */
480 optlevel_t	tl_valid_levels_arr[] = {
481 	XTI_GENERIC,
482 	SOL_SOCKET,
483 	TL_PROT_LEVEL
484 };
485 
486 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
487 /*
488  * Current upper bound on the amount of space needed to return all options.
489  * Additional options with data size of sizeof(long) are handled automatically.
490  * Others need hand job.
491  */
492 #define	TL_MAX_OPT_BUF_LEN						\
493 		((A_CNT(tl_opt_arr) << 2) +				\
494 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
495 		+ 64 + sizeof (struct T_optmgmt_ack))
496 
497 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
498 
499 /*
500  *	transport addr structure
501  */
502 typedef struct tl_addr {
503 	zoneid_t	ta_zoneid;		/* Zone scope of address */
504 	t_scalar_t	ta_alen;		/* length of abuf */
505 	void		*ta_abuf;		/* the addr itself */
506 } tl_addr_t;
507 
508 /*
509  * Refcounted version of serializer.
510  */
511 typedef struct tl_serializer {
512 	uint_t		ts_refcnt;
513 	serializer_t	*ts_serializer;
514 } tl_serializer_t;
515 
516 /*
517  * Each transport type has a separate state.
518  * Per-transport state.
519  */
520 typedef struct tl_transport_state {
521 	char		*tr_name;
522 	minor_t		tr_minor;
523 	uint32_t	tr_defaddr;
524 	mod_hash_t	*tr_ai_hash;
525 	mod_hash_t	*tr_addr_hash;
526 	tl_serializer_t	*tr_serializer;
527 } tl_transport_state_t;
528 
529 #define	TL_DFADDR 0x1000
530 
531 static tl_transport_state_t tl_transports[] = {
532 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
533 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
534 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
535 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
536 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
537 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
538 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
539 };
540 
541 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
542 
543 struct tl_endpt;
544 typedef struct tl_endpt tl_endpt_t;
545 
546 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
547 
548 /*
549  * Data structure used to represent pending connects.
550  * Records enough information so that the connecting peer can close
551  * before the connection gets accepted.
552  */
553 typedef struct tl_icon {
554 	list_node_t	ti_node;
555 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
556 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
557 	t_scalar_t	ti_seqno;	/* Sequence number */
558 } tl_icon_t;
559 
560 typedef struct so_ux_addr soux_addr_t;
561 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
562 
563 /*
564  * Maximum number of unaccepted connection indications allowed per listener.
565  */
566 #define	TL_MAXQLEN	4096
567 int tl_maxqlen = TL_MAXQLEN;
568 
569 /*
570  *	transport endpoint structure
571  */
572 struct tl_endpt {
573 	queue_t		*te_rq;		/* stream read queue */
574 	queue_t		*te_wq;		/* stream write queue */
575 	uint32_t	te_refcnt;
576 	int32_t 	te_state;	/* TPI state of endpoint */
577 	minor_t		te_minor;	/* minor number */
578 #define	te_seqno	te_minor
579 	uint_t		te_flag;	/* flag field */
580 	boolean_t	te_nowsrv;
581 	tl_serializer_t	*te_ser;	/* Serializer to use */
582 #define	te_serializer	te_ser->ts_serializer
583 
584 	soux_addr_t	te_uxaddr;	/* Socket address */
585 #define	te_magic	te_uxaddr.soua_magic
586 #define	te_vp		te_uxaddr.soua_vp
587 	tl_addr_t	te_ap;		/* addr bound to this endpt */
588 #define	te_zoneid te_ap.ta_zoneid
589 #define	te_alen	te_ap.ta_alen
590 #define	te_abuf	te_ap.ta_abuf
591 
592 	tl_transport_state_t *te_transport;
593 #define	te_addrhash	te_transport->tr_addr_hash
594 #define	te_aihash	te_transport->tr_ai_hash
595 #define	te_defaddr	te_transport->tr_defaddr
596 	cred_t		*te_credp;	/* endpoint user credentials */
597 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
598 
599 	/*
600 	 * State specific for connection-oriented and connectionless transports.
601 	 */
602 	union {
603 		/* Connection-oriented state. */
604 		struct {
605 			t_uscalar_t _te_nicon;	/* count of conn requests */
606 			t_uscalar_t _te_qlen;	/* max conn requests */
607 			tl_endpt_t  *_te_oconp;	/* conn request pending */
608 			tl_endpt_t  *_te_conp;	/* connected endpt */
609 #ifndef _ILP32
610 			void	    *_te_pad;
611 #endif
612 			list_t	_te_iconp;	/* list of conn ind. pending */
613 		} _te_cots_state;
614 		/* Connection-less state. */
615 		struct {
616 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
617 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
618 			list_node_t _te_flows;	/* lists of connections */
619 			list_t  _te_flowlist;	/* Who flowcontrols on me */
620 		} _te_clts_state;
621 	} _te_transport_state;
622 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
623 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
624 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
625 #define	te_conp		_te_transport_state._te_cots_state._te_conp
626 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
627 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
628 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
629 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
630 #define	te_flows	_te_transport_state._te_clts_state._te_flows
631 
632 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
633 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
634 	pid_t		te_cpid;	/* cached pid of endpoint */
635 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
636 	/*
637 	 * Pieces of the endpoint state needed for closing.
638 	 */
639 	kmutex_t	te_closelock;
640 	kcondvar_t	te_closecv;
641 	uint8_t		te_closing;	/* The endpoint started closing */
642 	uint8_t		te_closewait;	/* Wait in close until zero */
643 	mblk_t		te_closemp;	/* for entering serializer on close */
644 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
645 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
646 	kmutex_t	te_srv_lock;
647 	kcondvar_t	te_srv_cv;
648 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
649 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
650 	/*
651 	 * Pieces of the endpoint state needed for serializer transitions.
652 	 */
653 	kmutex_t	te_ser_lock;	/* Protects the count below */
654 	uint_t		te_ser_count;	/* Number of messages on serializer */
655 };
656 
657 /*
658  * Flag values. Lower 4 bits specify that transport used.
659  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
660  * they allow to identify the endpoint more easily.
661  */
662 #define	TL_LISTENER	0x00010	/* the listener endpoint */
663 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
664 #define	TL_EAGER	0x00040	/* connecting endpoint */
665 #define	TL_ACCEPTED	0x00080	/* accepted connection */
666 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
667 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
668 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
669 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
670 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
671 /*
672  * Boolean checks for the endpoint type.
673  */
674 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
675 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
676 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
677 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
678 
679 #define	TLPID(mp, tep)	(DB_CPID(mp) == -1 ? (tep)->te_cpid : DB_CPID(mp))
680 
681 /*
682  * Certain operations are always used together. These macros reduce the chance
683  * of missing a part of a combination.
684  */
685 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
686 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
687 
688 #define	TL_PUTBQ(x, mp) {		\
689 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
690 	(x)->te_nowsrv = B_TRUE;	\
691 	(void) putbq((x)->te_wq, mp);	\
692 }
693 
694 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
695 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
696 
697 /*
698  * STREAMS driver glue data structures.
699  */
700 static	struct	module_info	tl_minfo = {
701 	TL_ID,			/* mi_idnum */
702 	TL_NAME,		/* mi_idname */
703 	TL_MINPSZ,		/* mi_minpsz */
704 	TL_MAXPSZ,		/* mi_maxpsz */
705 	TL_HIWAT,		/* mi_hiwat */
706 	TL_LOWAT		/* mi_lowat */
707 };
708 
709 static	struct	qinit	tl_rinit = {
710 	NULL,			/* qi_putp */
711 	(int (*)())tl_rsrv,	/* qi_srvp */
712 	tl_open,		/* qi_qopen */
713 	tl_close,		/* qi_qclose */
714 	NULL,			/* qi_qadmin */
715 	&tl_minfo,		/* qi_minfo */
716 	NULL			/* qi_mstat */
717 };
718 
719 static	struct	qinit	tl_winit = {
720 	(int (*)())tl_wput,	/* qi_putp */
721 	(int (*)())tl_wsrv,	/* qi_srvp */
722 	NULL,			/* qi_qopen */
723 	NULL,			/* qi_qclose */
724 	NULL,			/* qi_qadmin */
725 	&tl_minfo,		/* qi_minfo */
726 	NULL			/* qi_mstat */
727 };
728 
729 static	struct streamtab	tlinfo = {
730 	&tl_rinit,		/* st_rdinit */
731 	&tl_winit,		/* st_wrinit */
732 	NULL,			/* st_muxrinit */
733 	NULL			/* st_muxwrinit */
734 };
735 
736 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
737     nulldev, tl_info, D_MP, &tlinfo);
738 
739 static struct modldrv modldrv = {
740 	&mod_driverops,		/* Type of module -- pseudo driver here */
741 	"TPI Local Transport (tl) %I%",
742 	&tl_devops,		/* driver ops */
743 };
744 
745 /*
746  * Module linkage information for the kernel.
747  */
748 static struct modlinkage modlinkage = {
749 	MODREV_1,
750 	&modldrv,
751 	NULL
752 };
753 
754 /*
755  * Templates for response to info request
756  * Check sanity of unlimited connect data etc.
757  */
758 
759 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
760 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
761 
762 static struct T_info_ack tl_cots_info_ack =
763 	{
764 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
765 		T_INFINITE,	/* TSDU size */
766 		T_INFINITE,	/* ETSDU size */
767 		T_INFINITE,	/* CDATA_size */
768 		T_INFINITE,	/* DDATA_size */
769 		T_INFINITE,	/* ADDR_size  */
770 		T_INFINITE,	/* OPT_size */
771 		0,		/* TIDU_size - fill at run time */
772 		T_COTS,		/* SERV_type */
773 		-1,		/* CURRENT_state */
774 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
775 	};
776 
777 static struct T_info_ack tl_clts_info_ack =
778 	{
779 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
780 		0,		/* TSDU_size - fill at run time */
781 		-2,		/* ETSDU_size -2 => not supported */
782 		-2,		/* CDATA_size -2 => not supported */
783 		-2,		/* DDATA_size  -2 => not supported */
784 		-1,		/* ADDR_size -1 => unlimited */
785 		-1,		/* OPT_size */
786 		0,		/* TIDU_size - fill at run time */
787 		T_CLTS,		/* SERV_type */
788 		-1,		/* CURRENT_state */
789 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
790 	};
791 
792 /*
793  * private copy of devinfo pointer used in tl_info
794  */
795 static dev_info_t *tl_dip;
796 
797 /*
798  * Endpoints cache.
799  */
800 static kmem_cache_t *tl_cache;
801 /*
802  * Minor number space.
803  */
804 static id_space_t *tl_minors;
805 
806 /*
807  * Default Data Unit size.
808  */
809 static t_scalar_t tl_tidusz;
810 
811 /*
812  * Size of hash tables.
813  */
814 static size_t tl_hash_size = TL_HASH_SIZE;
815 
816 /*
817  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
818  * for sockets.
819  */
820 static int tl_disable_early_connect = 0;
821 static int tl_client_closing_when_accepting;
822 
823 static int tl_serializer_noswitch;
824 
825 /*
826  * LOCAL FUNCTION PROTOTYPES
827  * -------------------------
828  */
829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
830 static void tl_do_proto(mblk_t *, tl_endpt_t *);
831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
834 	t_scalar_t);
835 static void tl_bind(mblk_t *, tl_endpt_t *);
836 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
837 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
838 static void tl_unbind(mblk_t *, tl_endpt_t *);
839 static void tl_optmgmt(queue_t *, mblk_t *);
840 static void tl_conn_req(queue_t *, mblk_t *);
841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
842 static void tl_conn_res(mblk_t *, tl_endpt_t *);
843 static void tl_discon_req(mblk_t *, tl_endpt_t *);
844 static void tl_capability_req(mblk_t *, tl_endpt_t *);
845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
846 static void tl_info_req(mblk_t *, tl_endpt_t *);
847 static void tl_addr_req(mblk_t *, tl_endpt_t *);
848 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
849 static void tl_data(mblk_t  *, tl_endpt_t *);
850 static void tl_exdata(mblk_t *, tl_endpt_t *);
851 static void tl_ordrel(mblk_t *, tl_endpt_t *);
852 static void tl_unitdata(mblk_t *, tl_endpt_t *);
853 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
854 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
855 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
856 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
857 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
858 static void tl_cl_backenable(tl_endpt_t *);
859 static void tl_co_unconnect(tl_endpt_t *);
860 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
861 static void tl_discon_ind(tl_endpt_t *, uint32_t);
862 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
863 static mblk_t *tl_ordrel_ind_alloc(void);
864 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
865 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
866 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
867 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
868 static void tl_icon_freemsgs(mblk_t **);
869 static void tl_merror(queue_t *, mblk_t *, int);
870 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
871 static int tl_default_opt(queue_t *, int, int, uchar_t *);
872 static int tl_get_opt(queue_t *, int, int, uchar_t *);
873 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
874     uchar_t *, void *, cred_t *, mblk_t *);
875 static void tl_memrecover(queue_t *, mblk_t *, size_t);
876 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
877 static void tl_free(tl_endpt_t *);
878 static int  tl_constructor(void *, void *, int);
879 static void tl_destructor(void *, void *);
880 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
881 static tl_serializer_t *tl_serializer_alloc(int);
882 static void tl_serializer_refhold(tl_serializer_t *);
883 static void tl_serializer_refrele(tl_serializer_t *);
884 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
885 static void tl_serializer_exit(tl_endpt_t *);
886 static boolean_t tl_noclose(tl_endpt_t *);
887 static void tl_closeok(tl_endpt_t *);
888 static void tl_refhold(tl_endpt_t *);
889 static void tl_refrele(tl_endpt_t *);
890 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
891 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
892 static void tl_close_ser(mblk_t *, tl_endpt_t *);
893 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
894 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
895 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
896 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
897 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
898 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
900 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
901 static void tl_addr_unbind(tl_endpt_t *);
902 
903 /*
904  * Intialize option database object for TL
905  */
906 
907 optdb_obj_t tl_opt_obj = {
908 	tl_default_opt,		/* TL default value function pointer */
909 	tl_get_opt,		/* TL get function pointer */
910 	tl_set_opt,		/* TL set function pointer */
911 	B_TRUE,			/* TL is tpi provider */
912 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
913 	tl_opt_arr,		/* TL option database */
914 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
915 	tl_valid_levels_arr	/* TL valid level array */
916 };
917 
918 /*
919  * Logical operations.
920  *
921  * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y
922  * should also be true.
923  *
924  * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or falce at
925  * the same time.
926  */
927 #define	IMPLY(X, Y)	(!(X) || (Y))
928 #define	EQUIV(X, Y)	(IMPLY(X, Y) && IMPLY(Y, X))
929 
930 /*
931  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
932  * ---------------------------------------
933  */
934 
935 /*
936  * Loadable module routines
937  */
938 int
939 _init(void)
940 {
941 	return (mod_install(&modlinkage));
942 }
943 
944 int
945 _fini(void)
946 {
947 	return (mod_remove(&modlinkage));
948 }
949 
950 int
951 _info(struct modinfo *modinfop)
952 {
953 	return (mod_info(&modlinkage, modinfop));
954 }
955 
956 /*
957  * Driver Entry Points and Other routines
958  */
959 static int
960 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
961 {
962 	int i;
963 	char name[32];
964 
965 	/*
966 	 * Resume from a checkpoint state.
967 	 */
968 	if (cmd == DDI_RESUME)
969 		return (DDI_SUCCESS);
970 
971 	if (cmd != DDI_ATTACH)
972 		return (DDI_FAILURE);
973 
974 	/*
975 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
976 	 * streams message sizes can be unlimited. We use a defined constant
977 	 * instead.
978 	 */
979 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
980 
981 	/*
982 	 * Create subdevices for each transport.
983 	 */
984 	for (i = 0; i < TL_UNUSED; i++) {
985 		if (ddi_create_minor_node(devi,
986 			tl_transports[i].tr_name,
987 			S_IFCHR, tl_transports[i].tr_minor,
988 			DDI_PSEUDO, NULL) == DDI_FAILURE) {
989 			ddi_remove_minor_node(devi, NULL);
990 			return (DDI_FAILURE);
991 		}
992 	}
993 
994 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
995 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
996 
997 	if (tl_cache == NULL) {
998 		ddi_remove_minor_node(devi, NULL);
999 		return (DDI_FAILURE);
1000 	}
1001 
1002 	tl_minors = id_space_create("tl_minor_space",
1003 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
1004 
1005 	/*
1006 	 * Create ID space for minor numbers
1007 	 */
1008 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1009 		tl_transport_state_t *t = &tl_transports[i];
1010 
1011 		if (i == TL_UNUSED)
1012 			continue;
1013 
1014 		/* Socket COTSORD shares namespace with COTS */
1015 		if (i == TL_SOCK_COTSORD) {
1016 			t->tr_ai_hash =
1017 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1018 			ASSERT(t->tr_ai_hash != NULL);
1019 			t->tr_addr_hash =
1020 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1021 			ASSERT(t->tr_addr_hash != NULL);
1022 			continue;
1023 		}
1024 
1025 		/*
1026 		 * Create hash tables.
1027 		 */
1028 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1029 		    t->tr_name);
1030 #ifdef _ILP32
1031 		if (i & TL_SOCKET)
1032 			t->tr_ai_hash =
1033 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1034 				mod_hash_null_valdtor);
1035 		else
1036 			t->tr_ai_hash =
1037 			    mod_hash_create_ptrhash(name, tl_hash_size,
1038 				mod_hash_null_valdtor, sizeof (queue_t));
1039 #else
1040 		t->tr_ai_hash =
1041 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1042 			mod_hash_null_valdtor);
1043 #endif /* _ILP32 */
1044 
1045 		if (i & TL_SOCKET) {
1046 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1047 			    t->tr_name);
1048 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1049 			    tl_hash_size, mod_hash_null_valdtor,
1050 			    sizeof (uintptr_t));
1051 		} else {
1052 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1053 			    t->tr_name);
1054 			t->tr_addr_hash = mod_hash_create_extended(name,
1055 			    tl_hash_size, mod_hash_null_keydtor,
1056 			    mod_hash_null_valdtor,
1057 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1058 		}
1059 
1060 		/* Create serializer for connectionless transports. */
1061 		if (i & TL_TICLTS)
1062 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1063 	}
1064 
1065 	tl_dip = devi;
1066 
1067 	return (DDI_SUCCESS);
1068 }
1069 
1070 static int
1071 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1072 {
1073 	int i;
1074 
1075 	if (cmd == DDI_SUSPEND)
1076 		return (DDI_SUCCESS);
1077 
1078 	if (cmd != DDI_DETACH)
1079 		return (DDI_FAILURE);
1080 
1081 	/*
1082 	 * Destroy arenas and hash tables.
1083 	 */
1084 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1085 		tl_transport_state_t *t = &tl_transports[i];
1086 
1087 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1088 			continue;
1089 
1090 		ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL));
1091 		if (t->tr_serializer != NULL) {
1092 			tl_serializer_refrele(t->tr_serializer);
1093 			t->tr_serializer = NULL;
1094 		}
1095 
1096 #ifdef _ILP32
1097 		if (i & TL_SOCKET)
1098 			mod_hash_destroy_idhash(t->tr_ai_hash);
1099 		else
1100 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1101 #else
1102 		mod_hash_destroy_idhash(t->tr_ai_hash);
1103 #endif /* _ILP32 */
1104 		t->tr_ai_hash = NULL;
1105 		if (i & TL_SOCKET)
1106 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1107 		else
1108 			mod_hash_destroy_hash(t->tr_addr_hash);
1109 		t->tr_addr_hash = NULL;
1110 	}
1111 
1112 	kmem_cache_destroy(tl_cache);
1113 	tl_cache = NULL;
1114 	id_space_destroy(tl_minors);
1115 	tl_minors = NULL;
1116 	ddi_remove_minor_node(devi, NULL);
1117 	return (DDI_SUCCESS);
1118 }
1119 
1120 /* ARGSUSED */
1121 static int
1122 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1123 {
1124 
1125 	int retcode = DDI_FAILURE;
1126 
1127 	switch (infocmd) {
1128 
1129 	case DDI_INFO_DEVT2DEVINFO:
1130 		if (tl_dip != NULL) {
1131 			*result = (void *)tl_dip;
1132 			retcode = DDI_SUCCESS;
1133 		}
1134 		break;
1135 
1136 	case DDI_INFO_DEVT2INSTANCE:
1137 		*result = (void *)0;
1138 		retcode = DDI_SUCCESS;
1139 		break;
1140 
1141 	default:
1142 		break;
1143 	}
1144 	return (retcode);
1145 }
1146 
1147 /*
1148  * Endpoint reference management.
1149  */
1150 static void
1151 tl_refhold(tl_endpt_t *tep)
1152 {
1153 	atomic_add_32(&tep->te_refcnt, 1);
1154 }
1155 
1156 static void
1157 tl_refrele(tl_endpt_t *tep)
1158 {
1159 	ASSERT(tep->te_refcnt != 0);
1160 
1161 	if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1162 		tl_free(tep);
1163 }
1164 
1165 /*ARGSUSED*/
1166 static int
1167 tl_constructor(void *buf, void *cdrarg, int kmflags)
1168 {
1169 	tl_endpt_t *tep = buf;
1170 
1171 	bzero(tep, sizeof (tl_endpt_t));
1172 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1173 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1174 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1175 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1176 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1177 
1178 	return (0);
1179 }
1180 
1181 /*ARGSUSED*/
1182 static void
1183 tl_destructor(void *buf, void *cdrarg)
1184 {
1185 	tl_endpt_t *tep = buf;
1186 
1187 	mutex_destroy(&tep->te_closelock);
1188 	cv_destroy(&tep->te_closecv);
1189 	mutex_destroy(&tep->te_srv_lock);
1190 	cv_destroy(&tep->te_srv_cv);
1191 	mutex_destroy(&tep->te_ser_lock);
1192 }
1193 
1194 static void
1195 tl_free(tl_endpt_t *tep)
1196 {
1197 	ASSERT(tep->te_refcnt == 0);
1198 	ASSERT(tep->te_transport != NULL);
1199 	ASSERT(tep->te_rq == NULL);
1200 	ASSERT(tep->te_wq == NULL);
1201 	ASSERT(tep->te_ser != NULL);
1202 	ASSERT(tep->te_ser_count == 0);
1203 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1204 
1205 	if (IS_SOCKET(tep)) {
1206 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1207 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1208 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1209 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1210 	} else if (tep->te_abuf != NULL) {
1211 		kmem_free(tep->te_abuf, tep->te_alen);
1212 		tep->te_alen = -1; /* uninitialized */
1213 		tep->te_abuf = NULL;
1214 	} else {
1215 		ASSERT(tep->te_alen == -1);
1216 	}
1217 
1218 	id_free(tl_minors, tep->te_minor);
1219 	ASSERT(tep->te_credp == NULL);
1220 
1221 	if (tep->te_hash_hndl != NULL)
1222 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1223 
1224 	if (IS_COTS(tep)) {
1225 		TL_REMOVE_PEER(tep->te_conp);
1226 		TL_REMOVE_PEER(tep->te_oconp);
1227 		tl_serializer_refrele(tep->te_ser);
1228 		tep->te_ser = NULL;
1229 		ASSERT(tep->te_nicon == 0);
1230 		ASSERT(list_head(&tep->te_iconp) == NULL);
1231 	} else {
1232 		ASSERT(tep->te_lastep == NULL);
1233 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1234 		ASSERT(tep->te_flowq == NULL);
1235 	}
1236 
1237 	ASSERT(tep->te_bufcid == 0);
1238 	ASSERT(tep->te_timoutid == 0);
1239 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1240 	tep->te_acceptor_id = 0;
1241 
1242 	ASSERT(tep->te_closewait == 0);
1243 	ASSERT(!tep->te_rsrv_active);
1244 	ASSERT(!tep->te_wsrv_active);
1245 	tep->te_closing = 0;
1246 	tep->te_nowsrv = B_FALSE;
1247 	tep->te_flag = 0;
1248 
1249 	kmem_cache_free(tl_cache, tep);
1250 }
1251 
1252 /*
1253  * Allocate/free reference-counted wrappers for serializers.
1254  */
1255 static tl_serializer_t *
1256 tl_serializer_alloc(int flags)
1257 {
1258 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1259 	serializer_t *ser;
1260 
1261 	if (s == NULL)
1262 		return (NULL);
1263 
1264 	ser = serializer_create(flags);
1265 
1266 	if (ser == NULL) {
1267 		kmem_free(s, sizeof (tl_serializer_t));
1268 		return (NULL);
1269 	}
1270 
1271 	s->ts_refcnt = 1;
1272 	s->ts_serializer = ser;
1273 	return (s);
1274 }
1275 
1276 static void
1277 tl_serializer_refhold(tl_serializer_t *s)
1278 {
1279 	atomic_add_32(&s->ts_refcnt, 1);
1280 }
1281 
1282 static void
1283 tl_serializer_refrele(tl_serializer_t *s)
1284 {
1285 	if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1286 		serializer_destroy(s->ts_serializer);
1287 		kmem_free(s, sizeof (tl_serializer_t));
1288 	}
1289 }
1290 
1291 /*
1292  * Post a request on the endpoint serializer. For COTS transports keep track of
1293  * the number of pending requests.
1294  */
1295 static void
1296 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1297 {
1298 	if (IS_COTS(tep)) {
1299 		mutex_enter(&tep->te_ser_lock);
1300 		tep->te_ser_count++;
1301 		mutex_exit(&tep->te_ser_lock);
1302 	}
1303 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1304 }
1305 
1306 /*
1307  * Complete processing the request on the serializer. Decrement the counter for
1308  * pending requests for COTS transports.
1309  */
1310 static void
1311 tl_serializer_exit(tl_endpt_t *tep)
1312 {
1313 	if (IS_COTS(tep)) {
1314 		mutex_enter(&tep->te_ser_lock);
1315 		ASSERT(tep->te_ser_count != 0);
1316 		tep->te_ser_count--;
1317 		mutex_exit(&tep->te_ser_lock);
1318 	}
1319 }
1320 
1321 /*
1322  * Hash management functions.
1323  */
1324 
1325 /*
1326  * Return TRUE if two addresses are equal, false otherwise.
1327  */
1328 static boolean_t
1329 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1330 {
1331 	return ((ap1->ta_alen > 0) &&
1332 	    (ap1->ta_alen == ap2->ta_alen) &&
1333 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1334 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1335 }
1336 
1337 /*
1338  * This function is called whenever an endpoint is found in the hash table.
1339  */
1340 /* ARGSUSED0 */
1341 static void
1342 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1343 {
1344 	tl_refhold((tl_endpt_t *)val);
1345 }
1346 
1347 /*
1348  * Address hash function.
1349  */
1350 /* ARGSUSED */
1351 static uint_t
1352 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1353 {
1354 	tl_addr_t *ap = (tl_addr_t *)key;
1355 	size_t	len = ap->ta_alen;
1356 	uchar_t *p = ap->ta_abuf;
1357 	uint_t i, g;
1358 
1359 	ASSERT((len > 0) && (p != NULL));
1360 
1361 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1362 		i = (i << 4) + (*p);
1363 		if ((g = (i & 0xf0000000U)) != 0) {
1364 			i ^= (g >> 24);
1365 			i ^= g;
1366 		}
1367 	}
1368 	return (i);
1369 }
1370 
1371 /*
1372  * This function is used by hash lookups. It compares two generic addresses.
1373  */
1374 static int
1375 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1376 {
1377 #ifdef 	DEBUG
1378 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1379 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1380 
1381 	ASSERT(key1 != NULL);
1382 	ASSERT(key2 != NULL);
1383 
1384 	ASSERT(ap1->ta_abuf != NULL);
1385 	ASSERT(ap2->ta_abuf != NULL);
1386 	ASSERT(ap1->ta_alen > 0);
1387 	ASSERT(ap2->ta_alen > 0);
1388 #endif
1389 
1390 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1391 }
1392 
1393 /*
1394  * Prevent endpoint from closing if possible.
1395  * Return B_TRUE on success, B_FALSE on failure.
1396  */
1397 static boolean_t
1398 tl_noclose(tl_endpt_t *tep)
1399 {
1400 	boolean_t rc = B_FALSE;
1401 
1402 	mutex_enter(&tep->te_closelock);
1403 	if (! tep->te_closing) {
1404 		ASSERT(tep->te_closewait == 0);
1405 		tep->te_closewait++;
1406 		rc = B_TRUE;
1407 	}
1408 	mutex_exit(&tep->te_closelock);
1409 	return (rc);
1410 }
1411 
1412 /*
1413  * Allow endpoint to close if needed.
1414  */
1415 static void
1416 tl_closeok(tl_endpt_t *tep)
1417 {
1418 	ASSERT(tep->te_closewait > 0);
1419 	mutex_enter(&tep->te_closelock);
1420 	ASSERT(tep->te_closewait == 1);
1421 	tep->te_closewait--;
1422 	cv_signal(&tep->te_closecv);
1423 	mutex_exit(&tep->te_closelock);
1424 }
1425 
1426 /*
1427  * STREAMS open entry point.
1428  */
1429 /* ARGSUSED */
1430 static int
1431 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1432 {
1433 	tl_endpt_t *tep;
1434 	minor_t	    minor = getminor(*devp);
1435 
1436 	/*
1437 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1438 	 * are illegal
1439 	 */
1440 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1441 		return (ENXIO);
1442 
1443 	if (rq->q_ptr != NULL)
1444 		return (0);
1445 
1446 	/* Minor number should specify the mode used for the driver. */
1447 	if ((minor >= TL_UNUSED))
1448 		return (ENXIO);
1449 
1450 	if (oflag & SO_SOCKSTR) {
1451 		minor |= TL_SOCKET;
1452 	}
1453 
1454 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1455 	tep->te_refcnt = 1;
1456 	tep->te_cpid = curproc->p_pid;
1457 	rq->q_ptr = WR(rq)->q_ptr = tep;
1458 	tep->te_state = TS_UNBND;
1459 	tep->te_credp = credp;
1460 	crhold(credp);
1461 	tep->te_zoneid = getzoneid();
1462 
1463 	tep->te_flag = minor & TL_MINOR_MASK;
1464 	tep->te_transport = &tl_transports[minor];
1465 
1466 	/* Allocate a unique minor number for this instance. */
1467 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1468 
1469 	/* Reserve hash handle for bind(). */
1470 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1471 
1472 	/* Transport-specific initialization */
1473 	if (IS_COTS(tep)) {
1474 		/* Use private serializer */
1475 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1476 
1477 		/* Create list for pending connections */
1478 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1479 		    offsetof(tl_icon_t, ti_node));
1480 		tep->te_qlen = 0;
1481 		tep->te_nicon = 0;
1482 		tep->te_oconp = NULL;
1483 		tep->te_conp = NULL;
1484 	} else {
1485 		/* Use shared serializer */
1486 		tep->te_ser = tep->te_transport->tr_serializer;
1487 		bzero(&tep->te_flows, sizeof (list_node_t));
1488 		/* Create list for flow control */
1489 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1490 		    offsetof(tl_endpt_t, te_flows));
1491 		tep->te_flowq = NULL;
1492 		tep->te_lastep = NULL;
1493 
1494 	}
1495 
1496 	/* Initialize endpoint address */
1497 	if (IS_SOCKET(tep)) {
1498 		/* Socket-specific address handling. */
1499 		tep->te_alen = TL_SOUX_ADDRLEN;
1500 		tep->te_abuf = &tep->te_uxaddr;
1501 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1502 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1503 	} else {
1504 		tep->te_alen = -1;
1505 		tep->te_abuf = NULL;
1506 	}
1507 
1508 	/* clone the driver */
1509 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1510 
1511 	tep->te_rq = rq;
1512 	tep->te_wq = WR(rq);
1513 
1514 #ifdef	_ILP32
1515 	if (IS_SOCKET(tep))
1516 		tep->te_acceptor_id = tep->te_minor;
1517 	else
1518 		tep->te_acceptor_id = (t_uscalar_t)rq;
1519 #else
1520 	tep->te_acceptor_id = tep->te_minor;
1521 #endif	/* _ILP32 */
1522 
1523 
1524 	qprocson(rq);
1525 
1526 	/*
1527 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1528 	 * insertion so insertion can't fail.
1529 	 */
1530 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1531 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1532 	    (mod_hash_val_t)tep);
1533 
1534 	return (0);
1535 }
1536 
1537 /* ARGSUSED1 */
1538 static int
1539 tl_close(queue_t *rq, int flag,	cred_t *credp)
1540 {
1541 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1542 	tl_endpt_t *elp = NULL;
1543 	queue_t *wq = tep->te_wq;
1544 	int rc;
1545 
1546 	ASSERT(wq == WR(rq));
1547 
1548 	/*
1549 	 * Remove the endpoint from acceptor hash.
1550 	 */
1551 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1552 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1553 	    (mod_hash_val_t *)&elp);
1554 	ASSERT(rc == 0 && tep == elp);
1555 	if ((rc != 0) || (tep != elp)) {
1556 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1557 			    SL_TRACE|SL_ERROR,
1558 			    "tl_close:inconsistency in AI hash"));
1559 	}
1560 
1561 	/*
1562 	 * Wait till close is safe, then mark endpoint as closing.
1563 	 */
1564 	mutex_enter(&tep->te_closelock);
1565 	while (tep->te_closewait)
1566 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1567 	tep->te_closing = B_TRUE;
1568 	/*
1569 	 * Will wait for the serializer part of the close to finish, so set
1570 	 * te_closewait now.
1571 	 */
1572 	tep->te_closewait = 1;
1573 	tep->te_nowsrv = B_FALSE;
1574 	mutex_exit(&tep->te_closelock);
1575 
1576 	/*
1577 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1578 	 * It is safe because close will wait for tl_close_ser to finish.
1579 	 */
1580 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1581 
1582 	/*
1583 	 * Wait for the first phase of close to complete before qprocsoff().
1584 	 */
1585 	mutex_enter(&tep->te_closelock);
1586 	while (tep->te_closewait)
1587 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1588 	mutex_exit(&tep->te_closelock);
1589 
1590 	qprocsoff(rq);
1591 
1592 	if (tep->te_bufcid) {
1593 		qunbufcall(rq, tep->te_bufcid);
1594 		tep->te_bufcid = 0;
1595 	}
1596 	if (tep->te_timoutid) {
1597 		(void) quntimeout(rq, tep->te_timoutid);
1598 		tep->te_timoutid = 0;
1599 	}
1600 
1601 	/*
1602 	 * Finish close behind serializer.
1603 	 *
1604 	 * For a CLTS endpoint increase a refcount and continue close processing
1605 	 * with serializer protection. This processing may happen asynchronously
1606 	 * with the completion of tl_close().
1607 	 *
1608 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1609 	 * may go away together with tep and we need to destroy serializer
1610 	 * outside of serializer context.
1611 	 */
1612 	ASSERT(tep->te_closewait == 0);
1613 	if (IS_COTS(tep))
1614 		tep->te_closewait = 1;
1615 	else
1616 		tl_refhold(tep);
1617 
1618 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1619 
1620 	/*
1621 	 * For connection-oriented transports wait for all serializer activity
1622 	 * to settle down.
1623 	 */
1624 	if (IS_COTS(tep)) {
1625 		mutex_enter(&tep->te_closelock);
1626 		while (tep->te_closewait)
1627 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1628 		mutex_exit(&tep->te_closelock);
1629 	}
1630 
1631 	crfree(tep->te_credp);
1632 	tep->te_credp = NULL;
1633 	tep->te_wq = NULL;
1634 	tl_refrele(tep);
1635 	/*
1636 	 * tep is likely to be destroyed now, so can't reference it any more.
1637 	 */
1638 
1639 	rq->q_ptr = wq->q_ptr = NULL;
1640 	return (0);
1641 }
1642 
1643 /*
1644  * First phase of close processing done behind the serializer.
1645  *
1646  * Do not drop the reference in the end - tl_close() wants this reference to
1647  * stay.
1648  */
1649 /* ARGSUSED0 */
1650 static void
1651 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1652 {
1653 	ASSERT(tep->te_closing);
1654 	ASSERT(tep->te_closewait == 1);
1655 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1656 
1657 	tep->te_flag |= TL_CLOSE_SER;
1658 
1659 	/*
1660 	 * Drain out all messages on queue except for TL_TICOTS where the
1661 	 * abortive release semantics permit discarding of data on close
1662 	 */
1663 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1664 		tl_wsrv_ser(NULL, tep);
1665 	}
1666 
1667 	/* Remove address from hash table. */
1668 	tl_addr_unbind(tep);
1669 	/*
1670 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1671 	 * queue of the driver, so clear these before qprocsoff() is called.
1672 	 * Also clear q_next for the peer since this queue is going away.
1673 	 */
1674 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1675 		tl_endpt_t *peer_tep = tep->te_conp;
1676 
1677 		tep->te_wq->q_next = NULL;
1678 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1679 			peer_tep->te_wq->q_next = NULL;
1680 	}
1681 
1682 	tep->te_rq = NULL;
1683 
1684 	/* wake up tl_close() */
1685 	tl_closeok(tep);
1686 	tl_serializer_exit(tep);
1687 }
1688 
1689 /*
1690  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1691  * the reference for CLTS.
1692  *
1693  * Called from serializer. Should drop reference count for CLTS only.
1694  */
1695 /* ARGSUSED0 */
1696 static void
1697 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1698 {
1699 	ASSERT(tep->te_closing);
1700 	ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0));
1701 	ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1));
1702 
1703 	tep->te_state = -1;	/* Uninitialized */
1704 	if (IS_COTS(tep)) {
1705 		tl_co_unconnect(tep);
1706 	} else {
1707 		/* Connectionless specific cleanup */
1708 		TL_REMOVE_PEER(tep->te_lastep);
1709 		/*
1710 		 * Backenable anybody that is flow controlled waiting for
1711 		 * this endpoint.
1712 		 */
1713 		tl_cl_backenable(tep);
1714 		if (tep->te_flowq != NULL) {
1715 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1716 			tep->te_flowq = NULL;
1717 		}
1718 	}
1719 
1720 	tl_serializer_exit(tep);
1721 	if (IS_COTS(tep))
1722 		tl_closeok(tep);
1723 	else
1724 		tl_refrele(tep);
1725 }
1726 
1727 /*
1728  * STREAMS write-side put procedure.
1729  * Enter serializer for most of the processing.
1730  *
1731  * The T_CONN_REQ is processed outside of serializer.
1732  */
1733 static void
1734 tl_wput(queue_t *wq, mblk_t *mp)
1735 {
1736 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1737 	ssize_t			msz = MBLKL(mp);
1738 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1739 	tlproc_t		*tl_proc = NULL;
1740 
1741 	switch (DB_TYPE(mp)) {
1742 	case M_DATA:
1743 		/* Only valid for connection-oriented transports */
1744 		if (IS_CLTS(tep)) {
1745 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1746 				SL_TRACE|SL_ERROR,
1747 				"tl_wput:M_DATA invalid for ticlts driver"));
1748 			tl_merror(wq, mp, EPROTO);
1749 			return;
1750 		}
1751 		tl_proc = tl_wput_data_ser;
1752 		break;
1753 
1754 	case M_IOCTL:
1755 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1756 		case TL_IOC_CREDOPT:
1757 			/* FALLTHROUGH */
1758 		case TL_IOC_UCREDOPT:
1759 			/*
1760 			 * Serialize endpoint state change.
1761 			 */
1762 			tl_proc = tl_do_ioctl_ser;
1763 			break;
1764 
1765 		default:
1766 			miocnak(wq, mp, 0, EINVAL);
1767 			return;
1768 		}
1769 		break;
1770 
1771 	case M_FLUSH:
1772 		/*
1773 		 * do canonical M_FLUSH processing
1774 		 */
1775 		if (*mp->b_rptr & FLUSHW) {
1776 			flushq(wq, FLUSHALL);
1777 			*mp->b_rptr &= ~FLUSHW;
1778 		}
1779 		if (*mp->b_rptr & FLUSHR) {
1780 			flushq(RD(wq), FLUSHALL);
1781 			qreply(wq, mp);
1782 		} else {
1783 			freemsg(mp);
1784 		}
1785 		return;
1786 
1787 	case M_PROTO:
1788 		if (msz < sizeof (prim->type)) {
1789 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1790 				SL_TRACE|SL_ERROR,
1791 				"tl_wput:M_PROTO data too short"));
1792 			tl_merror(wq, mp, EPROTO);
1793 			return;
1794 		}
1795 		switch (prim->type) {
1796 		case T_OPTMGMT_REQ:
1797 		case T_SVR4_OPTMGMT_REQ:
1798 			/*
1799 			 * Process TPI option management requests immediately
1800 			 * in put procedure regardless of in-order processing
1801 			 * of already queued messages.
1802 			 * (Note: This driver supports AF_UNIX socket
1803 			 * implementation.  Unless we implement this processing,
1804 			 * setsockopt() on socket endpoint will block on flow
1805 			 * controlled endpoints which it should not. That is
1806 			 * required for successful execution of VSU socket tests
1807 			 * and is consistent with BSD socket behavior).
1808 			 */
1809 			tl_optmgmt(wq, mp);
1810 			return;
1811 		case O_T_BIND_REQ:
1812 		case T_BIND_REQ:
1813 			tl_proc = tl_bind_ser;
1814 			break;
1815 		case T_CONN_REQ:
1816 			if (IS_CLTS(tep)) {
1817 				tl_merror(wq, mp, EPROTO);
1818 				return;
1819 			}
1820 			tl_conn_req(wq, mp);
1821 			return;
1822 		case T_DATA_REQ:
1823 		case T_OPTDATA_REQ:
1824 		case T_EXDATA_REQ:
1825 		case T_ORDREL_REQ:
1826 			tl_proc = tl_putq_ser;
1827 			break;
1828 		case T_UNITDATA_REQ:
1829 			if (IS_COTS(tep) ||
1830 			    (msz < sizeof (struct T_unitdata_req))) {
1831 				tl_merror(wq, mp, EPROTO);
1832 				return;
1833 			}
1834 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1835 				tl_proc = tl_unitdata_ser;
1836 			} else {
1837 				tl_proc = tl_putq_ser;
1838 			}
1839 			break;
1840 		default:
1841 			/*
1842 			 * process in service procedure if message already
1843 			 * queued (maintain in-order processing)
1844 			 */
1845 			if (wq->q_first != NULL) {
1846 				tl_proc = tl_putq_ser;
1847 			} else {
1848 				tl_proc = tl_wput_ser;
1849 			}
1850 			break;
1851 		}
1852 		break;
1853 
1854 	case M_PCPROTO:
1855 		/*
1856 		 * Check that the message has enough data to figure out TPI
1857 		 * primitive.
1858 		 */
1859 		if (msz < sizeof (prim->type)) {
1860 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1861 				SL_TRACE|SL_ERROR,
1862 				"tl_wput:M_PCROTO data too short"));
1863 			tl_merror(wq, mp, EPROTO);
1864 			return;
1865 		}
1866 		switch (prim->type) {
1867 		case T_CAPABILITY_REQ:
1868 			tl_capability_req(mp, tep);
1869 			return;
1870 		case T_INFO_REQ:
1871 			tl_proc = tl_info_req_ser;
1872 			break;
1873 		default:
1874 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1875 				    SL_TRACE|SL_ERROR,
1876 				    "tl_wput:unknown TPI msg primitive"));
1877 			tl_merror(wq, mp, EPROTO);
1878 			return;
1879 		}
1880 		break;
1881 	default:
1882 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1883 			"tl_wput:default:unexpected Streams message"));
1884 		freemsg(mp);
1885 		return;
1886 	}
1887 
1888 	/*
1889 	 * Continue processing via serializer.
1890 	 */
1891 	ASSERT(tl_proc != NULL);
1892 	tl_refhold(tep);
1893 	tl_serializer_enter(tep, tl_proc, mp);
1894 }
1895 
1896 /*
1897  * Place message on the queue while preserving order.
1898  */
1899 static void
1900 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1901 {
1902 	if (tep->te_closing) {
1903 		tl_wput_ser(mp, tep);
1904 	} else {
1905 		TL_PUTQ(tep, mp);
1906 		tl_serializer_exit(tep);
1907 		tl_refrele(tep);
1908 	}
1909 
1910 }
1911 
1912 static void
1913 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1914 {
1915 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1916 
1917 	switch (DB_TYPE(mp)) {
1918 	case M_DATA:
1919 		tl_data(mp, tep);
1920 		break;
1921 	case M_PROTO:
1922 		tl_do_proto(mp, tep);
1923 		break;
1924 	default:
1925 		freemsg(mp);
1926 		break;
1927 	}
1928 }
1929 
1930 /*
1931  * Write side put procedure called from serializer.
1932  */
1933 static void
1934 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1935 {
1936 	tl_wput_common_ser(mp, tep);
1937 	tl_serializer_exit(tep);
1938 	tl_refrele(tep);
1939 }
1940 
1941 /*
1942  * M_DATA processing. Called from serializer.
1943  */
1944 static void
1945 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1946 {
1947 	tl_endpt_t	*peer_tep = tep->te_conp;
1948 	queue_t		*peer_rq;
1949 
1950 	ASSERT(DB_TYPE(mp) == M_DATA);
1951 	ASSERT(IS_COTS(tep));
1952 
1953 	ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer));
1954 
1955 	/*
1956 	 * fastpath for data. Ignore flow control if tep is closing.
1957 	 */
1958 	if ((peer_tep != NULL) &&
1959 	    !peer_tep->te_closing &&
1960 	    ((tep->te_state == TS_DATA_XFER) ||
1961 		(tep->te_state == TS_WREQ_ORDREL)) &&
1962 	    (tep->te_wq != NULL) &&
1963 	    (tep->te_wq->q_first == NULL) &&
1964 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1965 		(peer_tep->te_state == TS_WREQ_ORDREL))	&&
1966 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1967 	    (canputnext(peer_rq) || tep->te_closing)) {
1968 		putnext(peer_rq, mp);
1969 	} else if (tep->te_closing) {
1970 		/*
1971 		 * It is possible that by the time we got here tep started to
1972 		 * close. If the write queue is not empty, and the state is
1973 		 * TS_DATA_XFER the data should be delivered in order, so we
1974 		 * call putq() instead of freeing the data.
1975 		 */
1976 		if ((tep->te_wq != NULL) &&
1977 		    ((tep->te_state == TS_DATA_XFER) ||
1978 			(tep->te_state == TS_WREQ_ORDREL))) {
1979 			TL_PUTQ(tep, mp);
1980 		} else {
1981 			freemsg(mp);
1982 		}
1983 	} else {
1984 		TL_PUTQ(tep, mp);
1985 	}
1986 
1987 	tl_serializer_exit(tep);
1988 	tl_refrele(tep);
1989 }
1990 
1991 /*
1992  * Write side service routine.
1993  *
1994  * All actual processing happens within serializer which is entered
1995  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1996  * messages that need processing may have arrived, so tl_wsrv repeats until
1997  * queue is empty or te_nowsrv is set.
1998  */
1999 static void
2000 tl_wsrv(queue_t *wq)
2001 {
2002 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2003 
2004 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
2005 		mutex_enter(&tep->te_srv_lock);
2006 		ASSERT(tep->te_wsrv_active == B_FALSE);
2007 		tep->te_wsrv_active = B_TRUE;
2008 		mutex_exit(&tep->te_srv_lock);
2009 
2010 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2011 
2012 		/*
2013 		 * Wait for serializer job to complete.
2014 		 */
2015 		mutex_enter(&tep->te_srv_lock);
2016 		while (tep->te_wsrv_active) {
2017 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2018 		}
2019 		cv_signal(&tep->te_srv_cv);
2020 		mutex_exit(&tep->te_srv_lock);
2021 	}
2022 }
2023 
2024 /*
2025  * Serialized write side processing of the STREAMS queue.
2026  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2027  * is NULL.
2028  */
2029 static void
2030 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2031 {
2032 	mblk_t *mp;
2033 	queue_t *wq = tep->te_wq;
2034 
2035 	ASSERT(wq != NULL);
2036 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2037 		tl_wput_common_ser(mp, tep);
2038 	}
2039 
2040 	/*
2041 	 * Wakeup service routine unless called from close.
2042 	 * If ser_mp is specified, the caller is tl_wsrv().
2043 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2044 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2045 	 * be no matching tl_serializer_exit() in this case.
2046 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2047 	 * waiting on te_srv_cv.
2048 	 */
2049 	if (ser_mp != NULL) {
2050 		/*
2051 		 * We are called from tl_wsrv.
2052 		 */
2053 		mutex_enter(&tep->te_srv_lock);
2054 		ASSERT(tep->te_wsrv_active);
2055 		tep->te_wsrv_active = B_FALSE;
2056 		cv_signal(&tep->te_srv_cv);
2057 		mutex_exit(&tep->te_srv_lock);
2058 		tl_serializer_exit(tep);
2059 	}
2060 }
2061 
2062 /*
2063  * Called when the stream is backenabled. Enter serializer and qenable everyone
2064  * flow controlled by tep.
2065  *
2066  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2067  * is possible that two instances of tl_rsrv will be running reusing the same
2068  * rsrv mblk.
2069  */
2070 static void
2071 tl_rsrv(queue_t *rq)
2072 {
2073 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2074 
2075 	ASSERT(rq->q_first == NULL);
2076 	ASSERT(tep->te_rsrv_active == 0);
2077 
2078 	tep->te_rsrv_active = B_TRUE;
2079 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2080 	/*
2081 	 * Wait for serializer job to complete.
2082 	 */
2083 	mutex_enter(&tep->te_srv_lock);
2084 	while (tep->te_rsrv_active) {
2085 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2086 	}
2087 	cv_signal(&tep->te_srv_cv);
2088 	mutex_exit(&tep->te_srv_lock);
2089 }
2090 
2091 /* ARGSUSED */
2092 static void
2093 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2094 {
2095 	tl_endpt_t *peer_tep;
2096 
2097 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2098 		tl_cl_backenable(tep);
2099 	} else if (
2100 		IS_COTS(tep) &&
2101 		    ((peer_tep = tep->te_conp) != NULL) &&
2102 		    !peer_tep->te_closing &&
2103 		    ((tep->te_state == TS_DATA_XFER) ||
2104 			(tep->te_state == TS_WIND_ORDREL)||
2105 			(tep->te_state == TS_WREQ_ORDREL))) {
2106 		TL_QENABLE(peer_tep);
2107 	}
2108 
2109 	/*
2110 	 * Wakeup read side service routine.
2111 	 */
2112 	mutex_enter(&tep->te_srv_lock);
2113 	ASSERT(tep->te_rsrv_active);
2114 	tep->te_rsrv_active = B_FALSE;
2115 	cv_signal(&tep->te_srv_cv);
2116 	mutex_exit(&tep->te_srv_lock);
2117 	tl_serializer_exit(tep);
2118 }
2119 
2120 /*
2121  * process M_PROTO messages. Always called from serializer.
2122  */
2123 static void
2124 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2125 {
2126 	ssize_t			msz = MBLKL(mp);
2127 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2128 
2129 	/* Message size was validated by tl_wput(). */
2130 	ASSERT(msz >= sizeof (prim->type));
2131 
2132 	switch (prim->type) {
2133 	case T_UNBIND_REQ:
2134 		tl_unbind(mp, tep);
2135 		break;
2136 
2137 	case T_ADDR_REQ:
2138 		tl_addr_req(mp, tep);
2139 		break;
2140 
2141 	case O_T_CONN_RES:
2142 	case T_CONN_RES:
2143 		if (IS_CLTS(tep)) {
2144 			tl_merror(tep->te_wq, mp, EPROTO);
2145 			break;
2146 		}
2147 		tl_conn_res(mp, tep);
2148 		break;
2149 
2150 	case T_DISCON_REQ:
2151 		if (IS_CLTS(tep)) {
2152 			tl_merror(tep->te_wq, mp, EPROTO);
2153 			break;
2154 		}
2155 		tl_discon_req(mp, tep);
2156 		break;
2157 
2158 	case T_DATA_REQ:
2159 		if (IS_CLTS(tep)) {
2160 			tl_merror(tep->te_wq, mp, EPROTO);
2161 			break;
2162 		}
2163 		tl_data(mp, tep);
2164 		break;
2165 
2166 	case T_OPTDATA_REQ:
2167 		if (IS_CLTS(tep)) {
2168 			tl_merror(tep->te_wq, mp, EPROTO);
2169 			break;
2170 		}
2171 		tl_data(mp, tep);
2172 		break;
2173 
2174 	case T_EXDATA_REQ:
2175 		if (IS_CLTS(tep)) {
2176 			tl_merror(tep->te_wq, mp, EPROTO);
2177 			break;
2178 		}
2179 		tl_exdata(mp, tep);
2180 		break;
2181 
2182 	case T_ORDREL_REQ:
2183 		if (! IS_COTSORD(tep)) {
2184 			tl_merror(tep->te_wq, mp, EPROTO);
2185 			break;
2186 		}
2187 		tl_ordrel(mp, tep);
2188 		break;
2189 
2190 	case T_UNITDATA_REQ:
2191 		if (IS_COTS(tep)) {
2192 			tl_merror(tep->te_wq, mp, EPROTO);
2193 			break;
2194 		}
2195 		tl_unitdata(mp, tep);
2196 		break;
2197 
2198 	default:
2199 		tl_merror(tep->te_wq, mp, EPROTO);
2200 		break;
2201 	}
2202 }
2203 
2204 /*
2205  * Process ioctl from serializer.
2206  * This is a wrapper around tl_do_ioctl().
2207  */
2208 static void
2209 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2210 {
2211 	if (! tep->te_closing)
2212 		tl_do_ioctl(mp, tep);
2213 	else
2214 		freemsg(mp);
2215 
2216 	tl_serializer_exit(tep);
2217 	tl_refrele(tep);
2218 }
2219 
2220 static void
2221 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2222 {
2223 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2224 	int cmd = iocbp->ioc_cmd;
2225 	queue_t *wq = tep->te_wq;
2226 	int error;
2227 	int thisopt, otheropt;
2228 
2229 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2230 
2231 	switch (cmd) {
2232 	case TL_IOC_CREDOPT:
2233 		if (cmd == TL_IOC_CREDOPT) {
2234 			thisopt = TL_SETCRED;
2235 			otheropt = TL_SETUCRED;
2236 		} else {
2237 			/* FALLTHROUGH */
2238 	case TL_IOC_UCREDOPT:
2239 			thisopt = TL_SETUCRED;
2240 			otheropt = TL_SETCRED;
2241 		}
2242 		/*
2243 		 * The credentials passing does not apply to sockets.
2244 		 * Only one of the cred options can be set at a given time.
2245 		 */
2246 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2247 			miocnak(wq, mp, 0, EINVAL);
2248 			return;
2249 		}
2250 
2251 		/*
2252 		 * Turn on generation of credential options for
2253 		 * T_conn_req, T_conn_con, T_unidata_ind.
2254 		 */
2255 		error = miocpullup(mp, sizeof (uint32_t));
2256 		if (error != 0) {
2257 			miocnak(wq, mp, 0, error);
2258 			return;
2259 		}
2260 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2261 			miocnak(wq, mp, 0, EINVAL);
2262 			return;
2263 		}
2264 
2265 		if (*(uint32_t *)mp->b_cont->b_rptr)
2266 			tep->te_flag |= thisopt;
2267 		else
2268 			tep->te_flag &= ~thisopt;
2269 
2270 		miocack(wq, mp, 0, 0);
2271 		break;
2272 
2273 	default:
2274 		/* Should not be here */
2275 		miocnak(wq, mp, 0, EINVAL);
2276 		break;
2277 	}
2278 }
2279 
2280 
2281 /*
2282  * send T_ERROR_ACK
2283  * Note: assumes enough memory or caller passed big enough mp
2284  *	- no recovery from allocb failures
2285  */
2286 
2287 static void
2288 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2289     t_scalar_t unix_err, t_scalar_t type)
2290 {
2291 	struct T_error_ack *err_ack;
2292 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2293 	    M_PCPROTO, T_ERROR_ACK);
2294 
2295 	if (ackmp == NULL) {
2296 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2297 			    "tl_error_ack:out of mblk memory"));
2298 		tl_merror(wq, NULL, ENOSR);
2299 		return;
2300 	}
2301 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2302 	err_ack->ERROR_prim = type;
2303 	err_ack->TLI_error = tli_err;
2304 	err_ack->UNIX_error = unix_err;
2305 
2306 	/*
2307 	 * send error ack message
2308 	 */
2309 	qreply(wq, ackmp);
2310 }
2311 
2312 
2313 
2314 /*
2315  * send T_OK_ACK
2316  * Note: assumes enough memory or caller passed big enough mp
2317  *	- no recovery from allocb failures
2318  */
2319 static void
2320 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2321 {
2322 	struct T_ok_ack *ok_ack;
2323 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2324 	    M_PCPROTO, T_OK_ACK);
2325 
2326 	if (ackmp == NULL) {
2327 		tl_merror(wq, NULL, ENOMEM);
2328 		return;
2329 	}
2330 
2331 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2332 	ok_ack->CORRECT_prim = type;
2333 
2334 	(void) qreply(wq, ackmp);
2335 }
2336 
2337 /*
2338  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2339  * This is a wrapper around tl_bind().
2340  */
2341 static void
2342 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2343 {
2344 	if (! tep->te_closing)
2345 		tl_bind(mp, tep);
2346 	else
2347 		freemsg(mp);
2348 
2349 	tl_serializer_exit(tep);
2350 	tl_refrele(tep);
2351 }
2352 
2353 /*
2354  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2355  * Assumes that the endpoint is in the unbound.
2356  */
2357 static void
2358 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2359 {
2360 	queue_t			*wq = tep->te_wq;
2361 	struct T_bind_ack	*b_ack;
2362 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2363 	mblk_t			*ackmp, *bamp;
2364 	soux_addr_t		ux_addr;
2365 	t_uscalar_t		qlen = 0;
2366 	t_scalar_t		alen, aoff;
2367 	tl_addr_t		addr_req;
2368 	void			*addr_startp;
2369 	ssize_t			msz = MBLKL(mp), basize;
2370 	t_scalar_t		tli_err = 0, unix_err = 0;
2371 	t_scalar_t		save_prim_type = bind->PRIM_type;
2372 	t_scalar_t		save_state = tep->te_state;
2373 
2374 	if (tep->te_state != TS_UNBND) {
2375 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2376 			SL_TRACE|SL_ERROR,
2377 			"tl_wput:bind_request:out of state, state=%d",
2378 			tep->te_state));
2379 		tli_err = TOUTSTATE;
2380 		goto error;
2381 	}
2382 
2383 	if (msz < sizeof (struct T_bind_req)) {
2384 		tli_err = TSYSERR; unix_err = EINVAL;
2385 		goto error;
2386 	}
2387 
2388 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2389 
2390 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2391 	    (bind->PRIM_type == T_BIND_REQ));
2392 
2393 	alen = bind->ADDR_length;
2394 	aoff = bind->ADDR_offset;
2395 
2396 	/* negotiate max conn req pending */
2397 	if (IS_COTS(tep)) {
2398 		qlen = bind->CONIND_number;
2399 		if (qlen > tl_maxqlen)
2400 			qlen = tl_maxqlen;
2401 	}
2402 
2403 	/*
2404 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2405 	 * and bound again.
2406 	 */
2407 	if ((tep->te_hash_hndl == NULL) &&
2408 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2409 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2410 		&tep->te_hash_hndl) != 0) {
2411 		tli_err = TSYSERR; unix_err = ENOSR;
2412 		goto error;
2413 	}
2414 
2415 	/*
2416 	 * Verify address correctness.
2417 	 */
2418 	if (IS_SOCKET(tep)) {
2419 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2420 
2421 		if ((alen != TL_SOUX_ADDRLEN) ||
2422 		    (aoff < 0) ||
2423 		    (aoff + alen > msz)) {
2424 			(void) (STRLOG(TL_ID, tep->te_minor,
2425 				    1, SL_TRACE|SL_ERROR,
2426 				    "tl_bind: invalid socket addr"));
2427 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2428 			tli_err = TSYSERR; unix_err = EINVAL;
2429 			goto error;
2430 		}
2431 		/* Copy address from message to local buffer. */
2432 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2433 		/*
2434 		 * Check that we got correct address from sockets
2435 		 */
2436 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2437 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2438 			(void) (STRLOG(TL_ID, tep->te_minor,
2439 				    1, SL_TRACE|SL_ERROR,
2440 				    "tl_bind: invalid socket magic"));
2441 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2442 			tli_err = TSYSERR; unix_err = EINVAL;
2443 			goto error;
2444 		}
2445 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2446 		    (ux_addr.soua_vp != NULL)) {
2447 			(void) (STRLOG(TL_ID, tep->te_minor,
2448 				    1, SL_TRACE|SL_ERROR,
2449 				    "tl_bind: implicit addr non-empty"));
2450 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451 			tli_err = TSYSERR; unix_err = EINVAL;
2452 			goto error;
2453 		}
2454 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2455 		    (ux_addr.soua_vp == NULL)) {
2456 			(void) (STRLOG(TL_ID, tep->te_minor,
2457 				    1, SL_TRACE|SL_ERROR,
2458 				    "tl_bind: explicit addr empty"));
2459 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2460 			tli_err = TSYSERR; unix_err = EINVAL;
2461 			goto error;
2462 		}
2463 	} else {
2464 		if ((alen > 0) && ((aoff < 0) ||
2465 			((ssize_t)(aoff + alen) > msz) ||
2466 			((aoff + alen) < 0))) {
2467 			(void) (STRLOG(TL_ID, tep->te_minor,
2468 				    1, SL_TRACE|SL_ERROR,
2469 				    "tl_bind: invalid message"));
2470 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2471 			tli_err = TSYSERR; unix_err = EINVAL;
2472 			goto error;
2473 		}
2474 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2475 			(void) (STRLOG(TL_ID, tep->te_minor,
2476 				    1, SL_TRACE|SL_ERROR,
2477 				    "tl_bind: bad addr in  message"));
2478 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2479 			tli_err = TBADADDR;
2480 			goto error;
2481 		}
2482 #ifdef DEBUG
2483 		/*
2484 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2485 		 * if (! assertion)
2486 		 *	log warning;
2487 		 */
2488 		if (! ((alen == 0 && aoff == 0) ||
2489 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2490 			(void) (STRLOG(TL_ID, tep->te_minor,
2491 				    3, SL_TRACE|SL_ERROR,
2492 				    "tl_bind: addr overlaps TPI message"));
2493 		}
2494 #endif
2495 	}
2496 
2497 	/*
2498 	 * Bind the address provided or allocate one if requested.
2499 	 * Allow rebinds with a new qlen value.
2500 	 */
2501 	if (IS_SOCKET(tep)) {
2502 		/*
2503 		 * For anonymous requests the te_ap is already set up properly
2504 		 * so use minor number as an address.
2505 		 * For explicit requests need to check whether the address is
2506 		 * already in use.
2507 		 */
2508 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2509 			int rc;
2510 
2511 			if (tep->te_flag & TL_ADDRHASHED) {
2512 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2513 				if (tep->te_vp == ux_addr.soua_vp)
2514 					goto skip_addr_bind;
2515 				else /* Rebind to a new address. */
2516 					tl_addr_unbind(tep);
2517 			}
2518 			/*
2519 			 * Insert address in the hash if it is not already
2520 			 * there.  Since we use preallocated handle, the insert
2521 			 * can fail only if the key is already present.
2522 			 */
2523 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2524 			    (mod_hash_key_t)ux_addr.soua_vp,
2525 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2526 
2527 			if (rc != 0) {
2528 				ASSERT(rc == MH_ERR_DUPLICATE);
2529 				/*
2530 				 * Violate O_T_BIND_REQ semantics and fail with
2531 				 * TADDRBUSY - sockets will not use any address
2532 				 * other than supplied one for explicit binds.
2533 				 */
2534 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2535 					SL_TRACE|SL_ERROR,
2536 					"tl_bind:requested addr %p is busy",
2537 					    ux_addr.soua_vp));
2538 				tli_err = TADDRBUSY; unix_err = 0;
2539 				goto error;
2540 			}
2541 			tep->te_uxaddr = ux_addr;
2542 			tep->te_flag |= TL_ADDRHASHED;
2543 			tep->te_hash_hndl = NULL;
2544 		}
2545 	} else if (alen == 0) {
2546 		/*
2547 		 * assign any free address
2548 		 */
2549 		if (! tl_get_any_addr(tep, NULL)) {
2550 			(void) (STRLOG(TL_ID, tep->te_minor,
2551 				1, SL_TRACE|SL_ERROR,
2552 				"tl_bind:failed to get buffer for any "
2553 				"address"));
2554 			tli_err = TSYSERR; unix_err = ENOSR;
2555 			goto error;
2556 		}
2557 	} else {
2558 		addr_req.ta_alen = alen;
2559 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2560 		addr_req.ta_zoneid = tep->te_zoneid;
2561 
2562 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2563 		if (tep->te_abuf == NULL) {
2564 			tli_err = TSYSERR; unix_err = ENOSR;
2565 			goto error;
2566 		}
2567 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2568 		tep->te_alen = alen;
2569 
2570 		if (mod_hash_insert_reserve(tep->te_addrhash,
2571 			(mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2572 			tep->te_hash_hndl) != 0) {
2573 			if (save_prim_type == T_BIND_REQ) {
2574 				/*
2575 				 * The bind semantics for this primitive
2576 				 * require a failure if the exact address
2577 				 * requested is busy
2578 				 */
2579 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2580 					SL_TRACE|SL_ERROR,
2581 					"tl_bind:requested addr is busy"));
2582 				tli_err = TADDRBUSY; unix_err = 0;
2583 				goto error;
2584 			}
2585 
2586 			/*
2587 			 * O_T_BIND_REQ semantics say if address if requested
2588 			 * address is busy, bind to any available free address
2589 			 */
2590 			if (! tl_get_any_addr(tep, &addr_req)) {
2591 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2592 					SL_TRACE|SL_ERROR,
2593 					"tl_bind:unable to get any addr buf"));
2594 				tli_err = TSYSERR; unix_err = ENOMEM;
2595 				goto error;
2596 			}
2597 		} else {
2598 			tep->te_flag |= TL_ADDRHASHED;
2599 			tep->te_hash_hndl = NULL;
2600 		}
2601 	}
2602 
2603 	ASSERT(tep->te_alen >= 0);
2604 
2605 skip_addr_bind:
2606 	/*
2607 	 * prepare T_BIND_ACK TPI message
2608 	 */
2609 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2610 	bamp = reallocb(mp, basize, 0);
2611 	if (bamp == NULL) {
2612 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2613 			"tl_wput:tl_bind: allocb failed"));
2614 		/*
2615 		 * roll back state changes
2616 		 */
2617 		tl_addr_unbind(tep);
2618 		tep->te_state = TS_UNBND;
2619 		tl_memrecover(wq, mp, basize);
2620 		return;
2621 	}
2622 
2623 	DB_TYPE(bamp) = M_PCPROTO;
2624 	bamp->b_wptr = bamp->b_rptr + basize;
2625 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2626 	b_ack->PRIM_type = T_BIND_ACK;
2627 	b_ack->CONIND_number = qlen;
2628 	b_ack->ADDR_length = tep->te_alen;
2629 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2630 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2631 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2632 
2633 	if (IS_COTS(tep)) {
2634 		tep->te_qlen = qlen;
2635 		if (qlen > 0)
2636 			tep->te_flag |= TL_LISTENER;
2637 	}
2638 
2639 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2640 	/*
2641 	 * send T_BIND_ACK message
2642 	 */
2643 	(void) qreply(wq, bamp);
2644 	return;
2645 
2646 error:
2647 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2648 	if (ackmp == NULL) {
2649 		/*
2650 		 * roll back state changes
2651 		 */
2652 		tep->te_state = save_state;
2653 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2654 		return;
2655 	}
2656 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2657 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2658 }
2659 
2660 /*
2661  * Process T_UNBIND_REQ.
2662  * Called from serializer.
2663  */
2664 static void
2665 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2666 {
2667 	queue_t *wq;
2668 	mblk_t *ackmp;
2669 
2670 	if (tep->te_closing) {
2671 		freemsg(mp);
2672 		return;
2673 	}
2674 
2675 	wq = tep->te_wq;
2676 
2677 	/*
2678 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2679 	 * ==> allocate for T_ERROR_ACK (known max)
2680 	 */
2681 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2682 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2683 		return;
2684 	}
2685 	/*
2686 	 * memory resources committed
2687 	 * Note: no message validation. T_UNBIND_REQ message is
2688 	 * same size as PRIM_type field so already verified earlier.
2689 	 */
2690 
2691 	/*
2692 	 * validate state
2693 	 */
2694 	if (tep->te_state != TS_IDLE) {
2695 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2696 			SL_TRACE|SL_ERROR,
2697 			"tl_wput:T_UNBIND_REQ:out of state, state=%d",
2698 			tep->te_state));
2699 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2700 		return;
2701 	}
2702 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2703 
2704 	/*
2705 	 * TPI says on T_UNBIND_REQ:
2706 	 *    send up a M_FLUSH to flush both
2707 	 *    read and write queues
2708 	 */
2709 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2710 
2711 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2712 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2713 
2714 		/*
2715 		 * Sockets use bind with qlen==0 followed by bind() to
2716 		 * the same address with qlen > 0 for listeners.
2717 		 * We allow rebind with a new qlen value.
2718 		 */
2719 		tl_addr_unbind(tep);
2720 	}
2721 
2722 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2723 	/*
2724 	 * send  T_OK_ACK
2725 	 */
2726 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2727 }
2728 
2729 
2730 /*
2731  * Option management code from drv/ip is used here
2732  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2733  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2734  *	However, that is what we want as that option is 'unorthodox'
2735  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2736  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2737  * Note2: use of optcom_req means this routine is an exception to
2738  *	 recovery from allocb() failures.
2739  */
2740 
2741 static void
2742 tl_optmgmt(queue_t *wq, mblk_t *mp)
2743 {
2744 	tl_endpt_t *tep;
2745 	mblk_t *ackmp;
2746 	union T_primitives *prim;
2747 
2748 	tep = (tl_endpt_t *)wq->q_ptr;
2749 	prim = (union T_primitives *)mp->b_rptr;
2750 
2751 	/*  all states OK for AF_UNIX options ? */
2752 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2753 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2754 		/*
2755 		 * Broken TLI semantics that options can only be managed
2756 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2757 		 * tests this TLI (mis)feature using this device driver.
2758 		 */
2759 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2760 			SL_TRACE|SL_ERROR,
2761 			"tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2762 			tep->te_state));
2763 		/*
2764 		 * preallocate memory for T_ERROR_ACK
2765 		 */
2766 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2767 		if (! ackmp) {
2768 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2769 			return;
2770 		}
2771 
2772 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2773 		freemsg(mp);
2774 		return;
2775 	}
2776 
2777 	/*
2778 	 * call common option management routine from drv/ip
2779 	 */
2780 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2781 		(void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj);
2782 	} else {
2783 		ASSERT(prim->type == T_OPTMGMT_REQ);
2784 		(void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj);
2785 	}
2786 }
2787 
2788 /*
2789  * Handle T_conn_req - the driver part of accept().
2790  * If TL_SET[U]CRED generate the credentials options.
2791  * If this is a socket pass through options unmodified.
2792  * For sockets generate the T_CONN_CON here instead of
2793  * waiting for the T_CONN_RES.
2794  */
2795 static void
2796 tl_conn_req(queue_t *wq, mblk_t *mp)
2797 {
2798 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2799 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2800 	ssize_t			msz = MBLKL(mp);
2801 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2802 	tl_endpt_t		*peer_tep = NULL;
2803 	mblk_t			*ackmp;
2804 	mblk_t			*dimp;
2805 	struct T_discon_ind	*di;
2806 	soux_addr_t		ux_addr;
2807 	tl_addr_t		dst;
2808 
2809 	ASSERT(IS_COTS(tep));
2810 
2811 	if (tep->te_closing) {
2812 		freemsg(mp);
2813 		return;
2814 	}
2815 
2816 	/*
2817 	 * preallocate memory for:
2818 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2819 	 *	==> known max T_ERROR_ACK
2820 	 * 2. max of T_DISCON_IND and T_CONN_IND
2821 	 */
2822 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2823 	if (! ackmp) {
2824 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2825 		return;
2826 	}
2827 	/*
2828 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2829 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2830 	 */
2831 
2832 	if (tep->te_state != TS_IDLE) {
2833 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2834 			SL_TRACE|SL_ERROR,
2835 			"tl_wput:T_CONN_REQ:out of state, state=%d",
2836 			tep->te_state));
2837 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2838 		freemsg(mp);
2839 		return;
2840 	}
2841 
2842 	/*
2843 	 * validate the message
2844 	 * Note: dereference fields in struct inside message only
2845 	 * after validating the message length.
2846 	 */
2847 	if (msz < sizeof (struct T_conn_req)) {
2848 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2849 			"tl_conn_req:invalid message length"));
2850 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2851 		freemsg(mp);
2852 		return;
2853 	}
2854 	alen = creq->DEST_length;
2855 	aoff = creq->DEST_offset;
2856 	olen = creq->OPT_length;
2857 	ooff = creq->OPT_offset;
2858 	if (olen == 0)
2859 		ooff = 0;
2860 
2861 	if (IS_SOCKET(tep)) {
2862 		if ((alen != TL_SOUX_ADDRLEN) ||
2863 		    (aoff < 0) ||
2864 		    (aoff + alen > msz) ||
2865 		    (alen > msz - sizeof (struct T_conn_req))) {
2866 			(void) (STRLOG(TL_ID, tep->te_minor,
2867 				    1, SL_TRACE|SL_ERROR,
2868 				    "tl_conn_req: invalid socket addr"));
2869 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2870 			freemsg(mp);
2871 			return;
2872 		}
2873 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2874 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2875 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2876 			(void) (STRLOG(TL_ID, tep->te_minor,
2877 				    1, SL_TRACE|SL_ERROR,
2878 				    "tl_conn_req: invalid socket magic"));
2879 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2880 			freemsg(mp);
2881 			return;
2882 		}
2883 	} else {
2884 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2885 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2886 			ooff + olen < 0)) ||
2887 		    olen < 0 || ooff < 0) {
2888 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2889 				    SL_TRACE|SL_ERROR,
2890 				    "tl_conn_req:invalid message"));
2891 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2892 			freemsg(mp);
2893 			return;
2894 		}
2895 
2896 		if (alen <= 0 || aoff < 0 ||
2897 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2898 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2899 				    SL_TRACE|SL_ERROR,
2900 				    "tl_conn_req:bad addr in message, "
2901 				    "alen=%d, msz=%ld",
2902 				    alen, msz));
2903 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2904 			freemsg(mp);
2905 			return;
2906 		}
2907 #ifdef DEBUG
2908 		/*
2909 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2910 		 * if (! assertion)
2911 		 *	log warning;
2912 		 */
2913 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2914 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2915 				    SL_TRACE|SL_ERROR,
2916 				    "tl_conn_req: addr overlaps TPI message"));
2917 		}
2918 #endif
2919 		if (olen) {
2920 			/*
2921 			 * no opts in connect req
2922 			 * supported in this provider except for sockets.
2923 			 */
2924 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2925 				    SL_TRACE|SL_ERROR,
2926 				    "tl_conn_req:options not supported "
2927 				    "in message"));
2928 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2929 			freemsg(mp);
2930 			return;
2931 		}
2932 	}
2933 
2934 	/*
2935 	 * Prevent tep from closing on us.
2936 	 */
2937 	if (! tl_noclose(tep)) {
2938 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2939 			"tl_conn_req:endpoint is closing"));
2940 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2941 		freemsg(mp);
2942 		return;
2943 	}
2944 
2945 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2946 	/*
2947 	 * get endpoint to connect to
2948 	 * check that peer with DEST addr is bound to addr
2949 	 * and has CONIND_number > 0
2950 	 */
2951 	dst.ta_alen = alen;
2952 	dst.ta_abuf = mp->b_rptr + aoff;
2953 	dst.ta_zoneid = tep->te_zoneid;
2954 
2955 	/*
2956 	 * Verify if remote addr is in use
2957 	 */
2958 	peer_tep = (IS_SOCKET(tep) ?
2959 	    tl_sock_find_peer(tep, &ux_addr) :
2960 	    tl_find_peer(tep, &dst));
2961 
2962 	if (peer_tep == NULL) {
2963 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2964 			"tl_conn_req:no one at connect address"));
2965 		err = ECONNREFUSED;
2966 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2967 		/*
2968 		 * validate that number of incoming connection is
2969 		 * not to capacity on destination endpoint
2970 		 */
2971 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2972 			"tl_conn_req: qlen overflow connection refused"));
2973 			err = ECONNREFUSED;
2974 	}
2975 
2976 	/*
2977 	 * Send T_DISCON_IND in case of error
2978 	 */
2979 	if (err != 0) {
2980 		if (peer_tep != NULL)
2981 			tl_refrele(peer_tep);
2982 		/* We are still expected to send T_OK_ACK */
2983 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2984 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2985 		tl_closeok(tep);
2986 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2987 		    M_PROTO, T_DISCON_IND);
2988 		if (dimp == NULL) {
2989 			tl_merror(wq, NULL, ENOSR);
2990 			return;
2991 		}
2992 		di = (struct T_discon_ind *)dimp->b_rptr;
2993 		di->DISCON_reason = err;
2994 		di->SEQ_number = BADSEQNUM;
2995 
2996 		tep->te_state = TS_IDLE;
2997 		/*
2998 		 * send T_DISCON_IND message
2999 		 */
3000 		putnext(tep->te_rq, dimp);
3001 		return;
3002 	}
3003 
3004 	ASSERT(IS_COTS(peer_tep));
3005 
3006 	/*
3007 	 * Found the listener. At this point processing will continue on
3008 	 * listener serializer. Close of the endpoint should be blocked while we
3009 	 * switch serializers.
3010 	 */
3011 	tl_serializer_refhold(peer_tep->te_ser);
3012 	tl_serializer_refrele(tep->te_ser);
3013 	tep->te_ser = peer_tep->te_ser;
3014 	ASSERT(tep->te_oconp == NULL);
3015 	tep->te_oconp = peer_tep;
3016 
3017 	/*
3018 	 * It is safe to close now. Close may continue on listener serializer.
3019 	 */
3020 	tl_closeok(tep);
3021 
3022 	/*
3023 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3024 	 * data, so we link mp to ackmp.
3025 	 */
3026 	ackmp->b_cont = mp;
3027 	mp = ackmp;
3028 
3029 	tl_refhold(tep);
3030 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3031 }
3032 
3033 /*
3034  * Finish T_CONN_REQ processing on listener serializer.
3035  */
3036 static void
3037 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3038 {
3039 	queue_t		*wq;
3040 	tl_endpt_t	*peer_tep = tep->te_oconp;
3041 	mblk_t		*confmp, *cimp, *indmp;
3042 	void		*opts = NULL;
3043 	mblk_t		*ackmp = mp;
3044 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3045 	struct T_conn_ind	*ci;
3046 	tl_icon_t	*tip;
3047 	void		*addr_startp;
3048 	t_scalar_t	olen = creq->OPT_length;
3049 	t_scalar_t	ooff = creq->OPT_offset;
3050 	size_t 		ci_msz;
3051 	size_t		size;
3052 
3053 	if (tep->te_closing) {
3054 		TL_UNCONNECT(tep->te_oconp);
3055 		tl_serializer_exit(tep);
3056 		tl_refrele(tep);
3057 		freemsg(mp);
3058 		return;
3059 	}
3060 
3061 	wq = tep->te_wq;
3062 	tep->te_flag |= TL_EAGER;
3063 
3064 	/*
3065 	 * Extract preallocated ackmp from mp.
3066 	 */
3067 	mp = mp->b_cont;
3068 	ackmp->b_cont = NULL;
3069 
3070 	if (olen == 0)
3071 		ooff = 0;
3072 
3073 	if (peer_tep->te_closing ||
3074 	    !((peer_tep->te_state == TS_IDLE) ||
3075 		(peer_tep->te_state == TS_WRES_CIND))) {
3076 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3077 			"tl_conn_req:peer in bad state (%d)",
3078 			    peer_tep->te_state));
3079 		TL_UNCONNECT(tep->te_oconp);
3080 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3081 		freemsg(ackmp);
3082 		tl_serializer_exit(tep);
3083 		tl_refrele(tep);
3084 		return;
3085 	}
3086 
3087 	/*
3088 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3089 	 */
3090 	/*
3091 	 * calculate length of T_CONN_IND message
3092 	 */
3093 	if (peer_tep->te_flag & TL_SETCRED) {
3094 		ooff = 0;
3095 		olen = (t_scalar_t) sizeof (struct opthdr) +
3096 		    OPTLEN(sizeof (tl_credopt_t));
3097 		/* 1 option only */
3098 	} else if (peer_tep->te_flag & TL_SETUCRED) {
3099 		ooff = 0;
3100 		olen = (t_scalar_t)sizeof (struct opthdr) +
3101 		    OPTLEN(ucredsize);
3102 		/* 1 option only */
3103 	}
3104 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3105 	ci_msz = T_ALIGN(ci_msz) + olen;
3106 	size = max(ci_msz, sizeof (struct T_discon_ind));
3107 
3108 	/*
3109 	 * Save options from mp - we'll need them for T_CONN_IND.
3110 	 */
3111 	if (ooff != 0) {
3112 		opts = kmem_alloc(olen, KM_NOSLEEP);
3113 		if (opts == NULL) {
3114 			/*
3115 			 * roll back state changes
3116 			 */
3117 			tep->te_state = TS_IDLE;
3118 			tl_memrecover(wq, mp, size);
3119 			freemsg(ackmp);
3120 			TL_UNCONNECT(tep->te_oconp);
3121 			tl_serializer_exit(tep);
3122 			tl_refrele(tep);
3123 			return;
3124 		}
3125 		/* Copy options to a temp buffer */
3126 		bcopy(mp->b_rptr + ooff, opts, olen);
3127 	}
3128 
3129 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3130 		/*
3131 		 * Generate a T_CONN_CON that has the identical address
3132 		 * (and options) as the T_CONN_REQ.
3133 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3134 		 * are isomorphic.
3135 		 */
3136 		confmp = copyb(mp);
3137 		if (! confmp) {
3138 			/*
3139 			 * roll back state changes
3140 			 */
3141 			tep->te_state = TS_IDLE;
3142 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3143 			freemsg(ackmp);
3144 			if (opts != NULL)
3145 				kmem_free(opts, olen);
3146 			TL_UNCONNECT(tep->te_oconp);
3147 			tl_serializer_exit(tep);
3148 			tl_refrele(tep);
3149 			return;
3150 		}
3151 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3152 			T_CONN_CON;
3153 	} else {
3154 		confmp = NULL;
3155 	}
3156 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3157 		/*
3158 		 * roll back state changes
3159 		 */
3160 		tep->te_state = TS_IDLE;
3161 		tl_memrecover(wq, mp, size);
3162 		freemsg(ackmp);
3163 		if (opts != NULL)
3164 			kmem_free(opts, olen);
3165 		freemsg(confmp);
3166 		TL_UNCONNECT(tep->te_oconp);
3167 		tl_serializer_exit(tep);
3168 		tl_refrele(tep);
3169 		return;
3170 	}
3171 
3172 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3173 	if (tip == NULL) {
3174 		/*
3175 		 * roll back state changes
3176 		 */
3177 		tep->te_state = TS_IDLE;
3178 		tl_memrecover(wq, indmp, sizeof (*tip));
3179 		freemsg(ackmp);
3180 		if (opts != NULL)
3181 			kmem_free(opts, olen);
3182 		freemsg(confmp);
3183 		TL_UNCONNECT(tep->te_oconp);
3184 		tl_serializer_exit(tep);
3185 		tl_refrele(tep);
3186 		return;
3187 	}
3188 	tip->ti_mp = NULL;
3189 
3190 	/*
3191 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3192 	 * and tl_icon_t cell.
3193 	 */
3194 
3195 	/*
3196 	 * ack validity of request and send the peer credential in the ACK.
3197 	 */
3198 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3199 
3200 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3201 	    confmp != NULL) {
3202 		mblk_setcred(confmp, peer_tep->te_credp);
3203 		DB_CPID(confmp) = peer_tep->te_cpid;
3204 	}
3205 
3206 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3207 
3208 	/*
3209 	 * prepare message to send T_CONN_IND
3210 	 */
3211 	/*
3212 	 * allocate the message - original data blocks retained
3213 	 * in the returned mblk
3214 	 */
3215 	cimp = tl_resizemp(indmp, size);
3216 	if (! cimp) {
3217 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3218 			"tl_conn_req:con_ind:allocb failure"));
3219 		tl_merror(wq, indmp, ENOMEM);
3220 		TL_UNCONNECT(tep->te_oconp);
3221 		tl_serializer_exit(tep);
3222 		tl_refrele(tep);
3223 		if (opts != NULL)
3224 			kmem_free(opts, olen);
3225 		freemsg(confmp);
3226 		ASSERT(tip->ti_mp == NULL);
3227 		kmem_free(tip, sizeof (*tip));
3228 		return;
3229 	}
3230 
3231 	DB_TYPE(cimp) = M_PROTO;
3232 	ci = (struct T_conn_ind *)cimp->b_rptr;
3233 	ci->PRIM_type  = T_CONN_IND;
3234 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3235 	ci->SRC_length = tep->te_alen;
3236 	ci->SEQ_number = tep->te_seqno;
3237 
3238 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3239 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3240 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3241 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3242 					ci->SRC_length);
3243 		ci->OPT_length = olen; /* because only 1 option */
3244 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3245 			DB_CREDDEF(cimp, tep->te_credp),
3246 			TLPID(cimp, tep),
3247 			peer_tep->te_flag, peer_tep->te_credp);
3248 	} else if (ooff != 0) {
3249 		/* Copy option from T_CONN_REQ */
3250 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3251 					ci->SRC_length);
3252 		ci->OPT_length = olen;
3253 		ASSERT(opts != NULL);
3254 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3255 	} else {
3256 		ci->OPT_offset = 0;
3257 		ci->OPT_length = 0;
3258 	}
3259 	if (opts != NULL)
3260 		kmem_free(opts, olen);
3261 
3262 	/*
3263 	 * register connection request with server peer
3264 	 * append to list of incoming connections
3265 	 * increment references for both peer_tep and tep: peer_tep is placed on
3266 	 * te_oconp and tep is placed on listeners queue.
3267 	 */
3268 	tip->ti_tep = tep;
3269 	tip->ti_seqno = tep->te_seqno;
3270 	list_insert_tail(&peer_tep->te_iconp, tip);
3271 	peer_tep->te_nicon++;
3272 
3273 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3274 	/*
3275 	 * send the T_CONN_IND message
3276 	 */
3277 	putnext(peer_tep->te_rq, cimp);
3278 
3279 	/*
3280 	 * Send a T_CONN_CON message for sockets.
3281 	 * Disable the queues until we have reached the correct state!
3282 	 */
3283 	if (confmp != NULL) {
3284 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3285 		noenable(wq);
3286 		putnext(tep->te_rq, confmp);
3287 	}
3288 	/*
3289 	 * Now we need to increment tep reference because tep is referenced by
3290 	 * server list of pending connections. We also need to decrement
3291 	 * reference before exiting serializer. Two operations void each other
3292 	 * so we don't modify reference at all.
3293 	 */
3294 	ASSERT(tep->te_refcnt >= 2);
3295 	ASSERT(peer_tep->te_refcnt >= 2);
3296 	tl_serializer_exit(tep);
3297 }
3298 
3299 
3300 
3301 /*
3302  * Handle T_conn_res on listener stream. Called on listener serializer.
3303  * tl_conn_req has already generated the T_CONN_CON.
3304  * tl_conn_res is called on listener serializer.
3305  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3306  * Switch eager serializer to acceptor's.
3307  *
3308  * If TL_SET[U]CRED generate the credentials options.
3309  * For sockets tl_conn_req has already generated the T_CONN_CON.
3310  */
3311 static void
3312 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3313 {
3314 	queue_t			*wq;
3315 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3316 	ssize_t			msz = MBLKL(mp);
3317 	t_scalar_t		olen, ooff, err = 0;
3318 	t_scalar_t		prim = cres->PRIM_type;
3319 	uchar_t			*addr_startp;
3320 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3321 	tl_icon_t		*tip;
3322 	size_t			size;
3323 	mblk_t			*ackmp, *respmp;
3324 	mblk_t			*dimp, *ccmp = NULL;
3325 	struct T_discon_ind	*di;
3326 	struct T_conn_con	*cc;
3327 	boolean_t		client_noclose_set = B_FALSE;
3328 	boolean_t		switch_client_serializer = B_TRUE;
3329 
3330 	ASSERT(IS_COTS(tep));
3331 
3332 	if (tep->te_closing) {
3333 		freemsg(mp);
3334 		return;
3335 	}
3336 
3337 	wq = tep->te_wq;
3338 
3339 	/*
3340 	 * preallocate memory for:
3341 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3342 	 *	==> known max T_ERROR_ACK
3343 	 * 2. max of T_DISCON_IND and T_CONN_CON
3344 	 */
3345 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3346 	if (! ackmp) {
3347 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3348 		return;
3349 	}
3350 	/*
3351 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3352 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3353 	 */
3354 
3355 
3356 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3357 
3358 	/*
3359 	 * validate state
3360 	 */
3361 	if (tep->te_state != TS_WRES_CIND) {
3362 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3363 			SL_TRACE|SL_ERROR,
3364 			"tl_wput:T_CONN_RES:out of state, state=%d",
3365 			tep->te_state));
3366 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3367 		freemsg(mp);
3368 		return;
3369 	}
3370 
3371 	/*
3372 	 * validate the message
3373 	 * Note: dereference fields in struct inside message only
3374 	 * after validating the message length.
3375 	 */
3376 	if (msz < sizeof (struct T_conn_res)) {
3377 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3378 			"tl_conn_res:invalid message length"));
3379 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3380 		freemsg(mp);
3381 		return;
3382 	}
3383 	olen = cres->OPT_length;
3384 	ooff = cres->OPT_offset;
3385 	if (((olen > 0) && ((ooff + olen) > msz))) {
3386 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3387 			"tl_conn_res:invalid message"));
3388 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3389 		freemsg(mp);
3390 		return;
3391 	}
3392 	if (olen) {
3393 		/*
3394 		 * no opts in connect res
3395 		 * supported in this provider
3396 		 */
3397 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3398 			"tl_conn_res:options not supported in message"));
3399 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3400 		freemsg(mp);
3401 		return;
3402 	}
3403 
3404 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3405 	ASSERT(tep->te_state == TS_WACK_CRES);
3406 
3407 	if (cres->SEQ_number < TL_MINOR_START &&
3408 		cres->SEQ_number >= BADSEQNUM) {
3409 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3410 			"tl_conn_res:remote endpoint sequence number bad"));
3411 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3412 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3413 		freemsg(mp);
3414 		return;
3415 	}
3416 
3417 	/*
3418 	 * find accepting endpoint. Will have extra reference if found.
3419 	 */
3420 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3421 		(mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3422 		(mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3423 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3424 			"tl_conn_res:bad accepting endpoint"));
3425 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3426 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3427 		freemsg(mp);
3428 		return;
3429 	}
3430 
3431 	/*
3432 	 * Prevent acceptor from closing.
3433 	 */
3434 	if (! tl_noclose(acc_ep)) {
3435 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3436 			"tl_conn_res:bad accepting endpoint"));
3437 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3438 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3439 		tl_refrele(acc_ep);
3440 		freemsg(mp);
3441 		return;
3442 	}
3443 
3444 	acc_ep->te_flag |= TL_ACCEPTOR;
3445 
3446 	/*
3447 	 * validate that accepting endpoint, if different from listening
3448 	 * has address bound => state is TS_IDLE
3449 	 * TROUBLE in XPG4 !!?
3450 	 */
3451 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3452 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3453 			"tl_conn_res:accepting endpoint has no address bound,"
3454 			"state=%d", acc_ep->te_state));
3455 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3456 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3457 		freemsg(mp);
3458 		tl_closeok(acc_ep);
3459 		tl_refrele(acc_ep);
3460 		return;
3461 	}
3462 
3463 	/*
3464 	 * validate if accepting endpt same as listening, then
3465 	 * no other incoming connection should be on the queue
3466 	 */
3467 
3468 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3469 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3470 			"tl_conn_res: > 1 conn_ind on listener-acceptor"));
3471 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3472 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3473 		freemsg(mp);
3474 		tl_closeok(acc_ep);
3475 		tl_refrele(acc_ep);
3476 		return;
3477 	}
3478 
3479 	/*
3480 	 * Mark for deletion, the entry corresponding to client
3481 	 * on list of pending connections made by the listener
3482 	 *  search list to see if client is one of the
3483 	 * recorded as a listener.
3484 	 */
3485 	tip = tl_icon_find(tep, cres->SEQ_number);
3486 	if (tip == NULL) {
3487 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3488 			"tl_conn_res:no client in listener list"));
3489 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3490 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3491 		freemsg(mp);
3492 		tl_closeok(acc_ep);
3493 		tl_refrele(acc_ep);
3494 		return;
3495 	}
3496 
3497 	/*
3498 	 * If ti_tep is NULL the client has already closed. In this case
3499 	 * the code below will avoid any action on the client side
3500 	 * but complete the server and acceptor state transitions.
3501 	 */
3502 	ASSERT(tip->ti_tep == NULL ||
3503 		tip->ti_tep->te_seqno == cres->SEQ_number);
3504 	cl_ep = tip->ti_tep;
3505 
3506 	/*
3507 	 * If the client is present it is switched from listener's to acceptor's
3508 	 * serializer. We should block client closes while serializers are
3509 	 * being switched.
3510 	 *
3511 	 * It is possible that the client is present but is currently being
3512 	 * closed. There are two possible cases:
3513 	 *
3514 	 * 1) The client has already entered tl_close_finish_ser() and sent
3515 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3516 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3517 	 *
3518 	 * 2) The client started the close but has not entered
3519 	 *    tl_close_finish_ser() yet. In this case, the client is already
3520 	 *    proceeding asynchronously on the listener's serializer, so we're
3521 	 *    forced to change the acceptor to use the listener's serializer to
3522 	 *    ensure that any operations on the acceptor are serialized with
3523 	 *    respect to the close that's in-progress.
3524 	 */
3525 	if (cl_ep != NULL) {
3526 		if (tl_noclose(cl_ep)) {
3527 			client_noclose_set = B_TRUE;
3528 		} else {
3529 			/*
3530 			 * Client is closing. If it it has sent the
3531 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3532 			 * we have to let let the client continue until it is
3533 			 * sent.
3534 			 *
3535 			 * If we do continue using the client, acceptor will
3536 			 * switch to client's serializer which is used by client
3537 			 * for its close.
3538 			 */
3539 			tl_client_closing_when_accepting++;
3540 			switch_client_serializer = B_FALSE;
3541 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3542 			    cl_ep->te_state == -1)
3543 				cl_ep = NULL;
3544 		}
3545 	}
3546 
3547 	if (cl_ep != NULL) {
3548 		/*
3549 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3550 		 * (latter for sockets only)
3551 		 */
3552 		if (cl_ep->te_state != TS_WCON_CREQ &&
3553 		    (cl_ep->te_state != TS_DATA_XFER &&
3554 		    IS_SOCKET(cl_ep))) {
3555 			err = ECONNREFUSED;
3556 			/*
3557 			 * T_DISCON_IND sent later after committing memory
3558 			 * and acking validity of request
3559 			 */
3560 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3561 				"tl_conn_res:peer in bad state"));
3562 		}
3563 
3564 		/*
3565 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3566 		 * ack validity of request (T_OK_ACK) after memory committed
3567 		 */
3568 
3569 		if (err)
3570 			size = sizeof (struct T_discon_ind);
3571 		else {
3572 			/*
3573 			 * calculate length of T_CONN_CON message
3574 			 */
3575 			olen = 0;
3576 			if (cl_ep->te_flag & TL_SETCRED) {
3577 				olen = (t_scalar_t)sizeof (struct opthdr) +
3578 					OPTLEN(sizeof (tl_credopt_t));
3579 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3580 				olen = (t_scalar_t)sizeof (struct opthdr) +
3581 					OPTLEN(ucredsize);
3582 			}
3583 			size = T_ALIGN(sizeof (struct T_conn_con) +
3584 					acc_ep->te_alen) + olen;
3585 		}
3586 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3587 			/*
3588 			 * roll back state changes
3589 			 */
3590 			tep->te_state = TS_WRES_CIND;
3591 			tl_memrecover(wq, mp, size);
3592 			freemsg(ackmp);
3593 			if (client_noclose_set)
3594 				tl_closeok(cl_ep);
3595 			tl_closeok(acc_ep);
3596 			tl_refrele(acc_ep);
3597 			return;
3598 		}
3599 		mp = NULL;
3600 	}
3601 
3602 	/*
3603 	 * Now ack validity of request
3604 	 */
3605 	if (tep->te_nicon == 1) {
3606 		if (tep == acc_ep)
3607 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3608 		else
3609 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3610 	} else
3611 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3612 
3613 	/*
3614 	 * send T_DISCON_IND now if client state validation failed earlier
3615 	 */
3616 	if (err) {
3617 		tl_ok_ack(wq, ackmp, prim);
3618 		/*
3619 		 * flush the queues - why always ?
3620 		 */
3621 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3622 
3623 		dimp = tl_resizemp(respmp, size);
3624 		if (! dimp) {
3625 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3626 				SL_TRACE|SL_ERROR,
3627 				"tl_conn_res:con_ind:allocb failure"));
3628 			tl_merror(wq, respmp, ENOMEM);
3629 			tl_closeok(acc_ep);
3630 			if (client_noclose_set)
3631 				tl_closeok(cl_ep);
3632 			tl_refrele(acc_ep);
3633 			return;
3634 		}
3635 		if (dimp->b_cont) {
3636 			/* no user data in provider generated discon ind */
3637 			freemsg(dimp->b_cont);
3638 			dimp->b_cont = NULL;
3639 		}
3640 
3641 		DB_TYPE(dimp) = M_PROTO;
3642 		di = (struct T_discon_ind *)dimp->b_rptr;
3643 		di->PRIM_type  = T_DISCON_IND;
3644 		di->DISCON_reason = err;
3645 		di->SEQ_number = BADSEQNUM;
3646 
3647 		tep->te_state = TS_IDLE;
3648 		/*
3649 		 * send T_DISCON_IND message
3650 		 */
3651 		putnext(acc_ep->te_rq, dimp);
3652 		if (client_noclose_set)
3653 			tl_closeok(cl_ep);
3654 		tl_closeok(acc_ep);
3655 		tl_refrele(acc_ep);
3656 		return;
3657 	}
3658 
3659 	/*
3660 	 * now start connecting the accepting endpoint
3661 	 */
3662 	if (tep != acc_ep)
3663 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3664 
3665 	if (cl_ep == NULL) {
3666 		/*
3667 		 * The client has already closed. Send up any queued messages
3668 		 * and change the state accordingly.
3669 		 */
3670 		tl_ok_ack(wq, ackmp, prim);
3671 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3672 
3673 		/*
3674 		 * remove endpoint from incoming connection
3675 		 * delete client from list of incoming connections
3676 		 */
3677 		tl_freetip(tep, tip);
3678 		freemsg(mp);
3679 		tl_closeok(acc_ep);
3680 		tl_refrele(acc_ep);
3681 		return;
3682 	} else if (tip->ti_mp != NULL) {
3683 		/*
3684 		 * The client could have queued a T_DISCON_IND which needs
3685 		 * to be sent up.
3686 		 * Note that t_discon_req can not operate the same as
3687 		 * t_data_req since it is not possible for it to putbq
3688 		 * the message and return -1 due to the use of qwriter.
3689 		 */
3690 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3691 	}
3692 
3693 	/*
3694 	 * prepare connect confirm T_CONN_CON message
3695 	 */
3696 
3697 	/*
3698 	 * allocate the message - original data blocks
3699 	 * retained in the returned mblk
3700 	 */
3701 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3702 		ccmp = tl_resizemp(respmp, size);
3703 		if (ccmp == NULL) {
3704 			tl_ok_ack(wq, ackmp, prim);
3705 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3706 				    SL_TRACE|SL_ERROR,
3707 				    "tl_conn_res:conn_con:allocb failure"));
3708 			tl_merror(wq, respmp, ENOMEM);
3709 			tl_closeok(acc_ep);
3710 			if (client_noclose_set)
3711 				tl_closeok(cl_ep);
3712 			tl_refrele(acc_ep);
3713 			return;
3714 		}
3715 
3716 		DB_TYPE(ccmp) = M_PROTO;
3717 		cc = (struct T_conn_con *)ccmp->b_rptr;
3718 		cc->PRIM_type  = T_CONN_CON;
3719 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3720 		cc->RES_length = acc_ep->te_alen;
3721 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3722 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3723 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3724 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3725 			    cc->RES_length);
3726 			cc->OPT_length = olen;
3727 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3728 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3729 			    cl_ep->te_credp);
3730 		} else {
3731 			cc->OPT_offset = 0;
3732 			cc->OPT_length = 0;
3733 		}
3734 		/*
3735 		 * Forward the credential in the packet so it can be picked up
3736 		 * at the higher layers for more complete credential processing
3737 		 */
3738 		mblk_setcred(ccmp, acc_ep->te_credp);
3739 		DB_CPID(ccmp) = acc_ep->te_cpid;
3740 	} else {
3741 		freemsg(respmp);
3742 		respmp = NULL;
3743 	}
3744 
3745 	/*
3746 	 * make connection linking
3747 	 * accepting and client endpoints
3748 	 * No need to increment references:
3749 	 *	on client: it should already have one from tip->ti_tep linkage.
3750 	 *	on acceptor is should already have one from the table lookup.
3751 	 *
3752 	 * At this point both client and acceptor can't close. Set client
3753 	 * serializer to acceptor's.
3754 	 */
3755 	ASSERT(cl_ep->te_refcnt >= 2);
3756 	ASSERT(acc_ep->te_refcnt >= 2);
3757 	ASSERT(cl_ep->te_conp == NULL);
3758 	ASSERT(acc_ep->te_conp == NULL);
3759 	cl_ep->te_conp = acc_ep;
3760 	acc_ep->te_conp = cl_ep;
3761 	ASSERT(cl_ep->te_ser == tep->te_ser);
3762 	if (switch_client_serializer) {
3763 		mutex_enter(&cl_ep->te_ser_lock);
3764 		if (cl_ep->te_ser_count > 0) {
3765 			switch_client_serializer = B_FALSE;
3766 			tl_serializer_noswitch++;
3767 		} else {
3768 			/*
3769 			 * Move client to the acceptor's serializer.
3770 			 */
3771 			tl_serializer_refhold(acc_ep->te_ser);
3772 			tl_serializer_refrele(cl_ep->te_ser);
3773 			cl_ep->te_ser = acc_ep->te_ser;
3774 		}
3775 		mutex_exit(&cl_ep->te_ser_lock);
3776 	}
3777 	if (!switch_client_serializer) {
3778 		/*
3779 		 * It is not possible to switch client to use acceptor's.
3780 		 * Move acceptor to client's serializer (which is the same as
3781 		 * listener's).
3782 		 */
3783 		tl_serializer_refhold(cl_ep->te_ser);
3784 		tl_serializer_refrele(acc_ep->te_ser);
3785 		acc_ep->te_ser = cl_ep->te_ser;
3786 	}
3787 
3788 	TL_REMOVE_PEER(cl_ep->te_oconp);
3789 	TL_REMOVE_PEER(acc_ep->te_oconp);
3790 
3791 	/*
3792 	 * remove endpoint from incoming connection
3793 	 * delete client from list of incoming connections
3794 	 */
3795 	tip->ti_tep = NULL;
3796 	tl_freetip(tep, tip);
3797 	tl_ok_ack(wq, ackmp, prim);
3798 
3799 	/*
3800 	 * data blocks already linked in reallocb()
3801 	 */
3802 
3803 	/*
3804 	 * link queues so that I_SENDFD will work
3805 	 */
3806 	if (! IS_SOCKET(tep)) {
3807 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3808 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3809 	}
3810 
3811 	/*
3812 	 * send T_CONN_CON up on client side unless it was already
3813 	 * done (for a socket). In cases any data or ordrel req has been
3814 	 * queued make sure that the service procedure runs.
3815 	 */
3816 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3817 		enableok(cl_ep->te_wq);
3818 		TL_QENABLE(cl_ep);
3819 		if (ccmp != NULL)
3820 			freemsg(ccmp);
3821 	} else {
3822 		/*
3823 		 * change client state on TE_CONN_CON event
3824 		 */
3825 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3826 		putnext(cl_ep->te_rq, ccmp);
3827 	}
3828 
3829 	/* Mark the both endpoints as accepted */
3830 	cl_ep->te_flag |= TL_ACCEPTED;
3831 	acc_ep->te_flag |= TL_ACCEPTED;
3832 
3833 	/*
3834 	 * Allow client and acceptor to close.
3835 	 */
3836 	tl_closeok(acc_ep);
3837 	if (client_noclose_set)
3838 		tl_closeok(cl_ep);
3839 }
3840 
3841 
3842 
3843 
3844 static void
3845 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3846 {
3847 	queue_t			*wq;
3848 	struct T_discon_req	*dr;
3849 	ssize_t			msz;
3850 	tl_endpt_t		*peer_tep = tep->te_conp;
3851 	tl_endpt_t		*srv_tep = tep->te_oconp;
3852 	tl_icon_t		*tip;
3853 	size_t			size;
3854 	mblk_t			*ackmp, *dimp, *respmp;
3855 	struct T_discon_ind	*di;
3856 	t_scalar_t		save_state, new_state;
3857 
3858 	if (tep->te_closing) {
3859 		freemsg(mp);
3860 		return;
3861 	}
3862 
3863 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3864 		TL_UNCONNECT(tep->te_conp);
3865 		peer_tep = NULL;
3866 	}
3867 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3868 		TL_UNCONNECT(tep->te_oconp);
3869 		srv_tep = NULL;
3870 	}
3871 
3872 	wq = tep->te_wq;
3873 
3874 	/*
3875 	 * preallocate memory for:
3876 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3877 	 *	==> known max T_ERROR_ACK
3878 	 * 2. for  T_DISCON_IND
3879 	 */
3880 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3881 	if (! ackmp) {
3882 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3883 		return;
3884 	}
3885 	/*
3886 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3887 	 * will be committed for T_DISCON_IND  later
3888 	 */
3889 
3890 	dr = (struct T_discon_req *)mp->b_rptr;
3891 	msz = MBLKL(mp);
3892 
3893 	/*
3894 	 * validate the state
3895 	 */
3896 	save_state = new_state = tep->te_state;
3897 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3898 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3899 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3900 			SL_TRACE|SL_ERROR,
3901 			"tl_wput:T_DISCON_REQ:out of state, state=%d",
3902 			tep->te_state));
3903 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3904 		freemsg(mp);
3905 		return;
3906 	}
3907 	/*
3908 	 * Defer committing the state change until it is determined if
3909 	 * the message will be queued with the tl_icon or not.
3910 	 */
3911 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3912 
3913 	/* validate the message */
3914 	if (msz < sizeof (struct T_discon_req)) {
3915 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3916 			"tl_discon_req:invalid message"));
3917 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3918 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3919 		freemsg(mp);
3920 		return;
3921 	}
3922 
3923 	/*
3924 	 * if server, then validate that client exists
3925 	 * by connection sequence number etc.
3926 	 */
3927 	if (tep->te_nicon > 0) { /* server */
3928 
3929 		/*
3930 		 * search server list for disconnect client
3931 		 */
3932 		tip = tl_icon_find(tep, dr->SEQ_number);
3933 		if (tip == NULL) {
3934 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3935 				SL_TRACE|SL_ERROR,
3936 				"tl_discon_req:no disconnect endpoint"));
3937 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3938 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3939 			freemsg(mp);
3940 			return;
3941 		}
3942 		/*
3943 		 * If ti_tep is NULL the client has already closed. In this case
3944 		 * the code below will avoid any action on the client side.
3945 		 */
3946 
3947 		ASSERT(IMPLY(tip->ti_tep != NULL,
3948 			tip->ti_tep->te_seqno == dr->SEQ_number));
3949 		peer_tep = tip->ti_tep;
3950 	}
3951 
3952 	/*
3953 	 * preallocate now for T_DISCON_IND
3954 	 * ack validity of request (T_OK_ACK) after memory committed
3955 	 */
3956 	size = sizeof (struct T_discon_ind);
3957 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3958 		tl_memrecover(wq, mp, size);
3959 		freemsg(ackmp);
3960 		return;
3961 	}
3962 
3963 	/*
3964 	 * prepare message to ack validity of request
3965 	 */
3966 	if (tep->te_nicon == 0)
3967 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3968 	else
3969 		if (tep->te_nicon == 1)
3970 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3971 		else
3972 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3973 
3974 	/*
3975 	 * Flushing queues according to TPI. Using the old state.
3976 	 */
3977 	if ((tep->te_nicon <= 1) &&
3978 	    ((save_state == TS_DATA_XFER) ||
3979 	    (save_state == TS_WIND_ORDREL) ||
3980 	    (save_state == TS_WREQ_ORDREL)))
3981 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3982 
3983 	/* send T_OK_ACK up  */
3984 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3985 
3986 	/*
3987 	 * now do disconnect business
3988 	 */
3989 	if (tep->te_nicon > 0) { /* listener */
3990 		if (peer_tep != NULL && !peer_tep->te_closing) {
3991 			/*
3992 			 * disconnect incoming connect request pending to tep
3993 			 */
3994 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
3995 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
3996 					SL_TRACE|SL_ERROR,
3997 					"tl_discon_req: reallocb failed"));
3998 				tep->te_state = new_state;
3999 				tl_merror(wq, respmp, ENOMEM);
4000 				return;
4001 			}
4002 			di = (struct T_discon_ind *)dimp->b_rptr;
4003 			di->SEQ_number = BADSEQNUM;
4004 			save_state = peer_tep->te_state;
4005 			peer_tep->te_state = TS_IDLE;
4006 
4007 			TL_REMOVE_PEER(peer_tep->te_oconp);
4008 			enableok(peer_tep->te_wq);
4009 			TL_QENABLE(peer_tep);
4010 		} else {
4011 			freemsg(respmp);
4012 			dimp = NULL;
4013 		}
4014 
4015 		/*
4016 		 * remove endpoint from incoming connection list
4017 		 * - remove disconnect client from list on server
4018 		 */
4019 		tl_freetip(tep, tip);
4020 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4021 		/*
4022 		 * disconnect an outgoing request pending from tep
4023 		 */
4024 
4025 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4026 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4027 				SL_TRACE|SL_ERROR,
4028 				"tl_discon_req: reallocb failed"));
4029 			tep->te_state = new_state;
4030 			tl_merror(wq, respmp, ENOMEM);
4031 			return;
4032 		}
4033 		di = (struct T_discon_ind *)dimp->b_rptr;
4034 		DB_TYPE(dimp) = M_PROTO;
4035 		di->PRIM_type  = T_DISCON_IND;
4036 		di->DISCON_reason = ECONNRESET;
4037 		di->SEQ_number = tep->te_seqno;
4038 
4039 		/*
4040 		 * If this is a socket the T_DISCON_IND is queued with
4041 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4042 		 * from the list of pending connections.
4043 		 * Note that when te_oconp is set the peer better have
4044 		 * a t_connind_t for the client.
4045 		 */
4046 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4047 			/*
4048 			 * No need to check that
4049 			 * ti_tep == NULL since the T_DISCON_IND
4050 			 * takes precedence over other queued
4051 			 * messages.
4052 			 */
4053 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4054 			peer_tep = NULL;
4055 			dimp = NULL;
4056 			/*
4057 			 * Can't clear te_oconp since tl_co_unconnect needs
4058 			 * it as a hint not to free the tep.
4059 			 * Keep the state unchanged since tl_conn_res inspects
4060 			 * it.
4061 			 */
4062 			new_state = tep->te_state;
4063 		} else {
4064 			/* Found - delete it */
4065 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4066 			if (tip != NULL) {
4067 				ASSERT(tep == tip->ti_tep);
4068 				save_state = peer_tep->te_state;
4069 				if (peer_tep->te_nicon == 1)
4070 					peer_tep->te_state =
4071 					    NEXTSTATE(TE_DISCON_IND2,
4072 						peer_tep->te_state);
4073 				else
4074 					peer_tep->te_state =
4075 					    NEXTSTATE(TE_DISCON_IND3,
4076 						peer_tep->te_state);
4077 				tl_freetip(peer_tep, tip);
4078 			}
4079 			ASSERT(tep->te_oconp != NULL);
4080 			TL_UNCONNECT(tep->te_oconp);
4081 		}
4082 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4083 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4084 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4085 				SL_TRACE|SL_ERROR,
4086 				"tl_discon_req: reallocb failed"));
4087 			tep->te_state = new_state;
4088 			tl_merror(wq, respmp, ENOMEM);
4089 			return;
4090 		}
4091 		di = (struct T_discon_ind *)dimp->b_rptr;
4092 		di->SEQ_number = BADSEQNUM;
4093 
4094 		save_state = peer_tep->te_state;
4095 		peer_tep->te_state = TS_IDLE;
4096 	} else {
4097 		/* Not connected */
4098 		tep->te_state = new_state;
4099 		freemsg(respmp);
4100 		return;
4101 	}
4102 
4103 	/* Commit state changes */
4104 	tep->te_state = new_state;
4105 
4106 	if (peer_tep == NULL) {
4107 		ASSERT(dimp == NULL);
4108 		goto done;
4109 	}
4110 	/*
4111 	 * Flush queues on peer before sending up
4112 	 * T_DISCON_IND according to TPI
4113 	 */
4114 
4115 	if ((save_state == TS_DATA_XFER) ||
4116 	    (save_state == TS_WIND_ORDREL) ||
4117 	    (save_state == TS_WREQ_ORDREL))
4118 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4119 
4120 	DB_TYPE(dimp) = M_PROTO;
4121 	di->PRIM_type  = T_DISCON_IND;
4122 	di->DISCON_reason = ECONNRESET;
4123 
4124 	/*
4125 	 * data blocks already linked into dimp by reallocb()
4126 	 */
4127 	/*
4128 	 * send indication message to peer user module
4129 	 */
4130 	ASSERT(dimp != NULL);
4131 	putnext(peer_tep->te_rq, dimp);
4132 done:
4133 	if (tep->te_conp) {	/* disconnect pointers if connected */
4134 		ASSERT(! peer_tep->te_closing);
4135 
4136 		/*
4137 		 * Messages may be queued on peer's write queue
4138 		 * waiting to be processed by its write service
4139 		 * procedure. Before the pointer to the peer transport
4140 		 * structure is set to NULL, qenable the peer's write
4141 		 * queue so that the queued up messages are processed.
4142 		 */
4143 		if ((save_state == TS_DATA_XFER) ||
4144 		    (save_state == TS_WIND_ORDREL) ||
4145 		    (save_state == TS_WREQ_ORDREL))
4146 			TL_QENABLE(peer_tep);
4147 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4148 		TL_UNCONNECT(peer_tep->te_conp);
4149 		if (! IS_SOCKET(tep)) {
4150 			/*
4151 			 * unlink the streams
4152 			 */
4153 			tep->te_wq->q_next = NULL;
4154 			peer_tep->te_wq->q_next = NULL;
4155 		}
4156 		TL_UNCONNECT(tep->te_conp);
4157 	}
4158 }
4159 
4160 
4161 static void
4162 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4163 {
4164 	queue_t			*wq;
4165 	size_t			ack_sz;
4166 	mblk_t			*ackmp;
4167 	struct T_addr_ack	*taa;
4168 
4169 	if (tep->te_closing) {
4170 		freemsg(mp);
4171 		return;
4172 	}
4173 
4174 	wq = tep->te_wq;
4175 
4176 	/*
4177 	 * Note: T_ADDR_REQ message has only PRIM_type field
4178 	 * so it is already validated earlier.
4179 	 */
4180 
4181 	if (IS_CLTS(tep) ||
4182 	    (tep->te_state > TS_WREQ_ORDREL) ||
4183 	    (tep->te_state < TS_DATA_XFER)) {
4184 		/*
4185 		 * Either connectionless or connection oriented but not
4186 		 * in connected data transfer state or half-closed states.
4187 		 */
4188 		ack_sz = sizeof (struct T_addr_ack);
4189 		if (tep->te_state >= TS_IDLE)
4190 			/* is bound */
4191 			ack_sz += tep->te_alen;
4192 		ackmp = reallocb(mp, ack_sz, 0);
4193 		if (ackmp == NULL) {
4194 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4195 				SL_TRACE|SL_ERROR,
4196 				"tl_addr_req: reallocb failed"));
4197 			tl_memrecover(wq, mp, ack_sz);
4198 			return;
4199 		}
4200 
4201 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4202 
4203 		bzero(taa, sizeof (struct T_addr_ack));
4204 
4205 		taa->PRIM_type = T_ADDR_ACK;
4206 		ackmp->b_datap->db_type = M_PCPROTO;
4207 		ackmp->b_wptr = (uchar_t *)&taa[1];
4208 
4209 		if (tep->te_state >= TS_IDLE) {
4210 			/* endpoint is bound */
4211 			taa->LOCADDR_length = tep->te_alen;
4212 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4213 
4214 			bcopy(tep->te_abuf, ackmp->b_wptr,
4215 				tep->te_alen);
4216 			ackmp->b_wptr += tep->te_alen;
4217 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4218 		}
4219 
4220 		(void) qreply(wq, ackmp);
4221 	} else {
4222 		ASSERT(tep->te_state == TS_DATA_XFER ||
4223 			tep->te_state == TS_WIND_ORDREL ||
4224 			tep->te_state == TS_WREQ_ORDREL);
4225 		/* connection oriented in data transfer */
4226 		tl_connected_cots_addr_req(mp, tep);
4227 	}
4228 }
4229 
4230 
4231 static void
4232 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4233 {
4234 	tl_endpt_t		*peer_tep;
4235 	size_t			ack_sz;
4236 	mblk_t			*ackmp;
4237 	struct T_addr_ack	*taa;
4238 	uchar_t			*addr_startp;
4239 
4240 	if (tep->te_closing) {
4241 		freemsg(mp);
4242 		return;
4243 	}
4244 
4245 	ASSERT(tep->te_state >= TS_IDLE);
4246 
4247 	ack_sz = sizeof (struct T_addr_ack);
4248 	ack_sz += T_ALIGN(tep->te_alen);
4249 	peer_tep = tep->te_conp;
4250 	ack_sz += peer_tep->te_alen;
4251 
4252 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4253 	if (ackmp == NULL) {
4254 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4255 			"tl_connected_cots_addr_req: reallocb failed"));
4256 		tl_memrecover(tep->te_wq, mp, ack_sz);
4257 		return;
4258 	}
4259 
4260 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4261 
4262 	/* endpoint is bound */
4263 	taa->LOCADDR_length = tep->te_alen;
4264 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4265 
4266 	addr_startp = (uchar_t *)&taa[1];
4267 
4268 	bcopy(tep->te_abuf, addr_startp,
4269 	    tep->te_alen);
4270 
4271 	taa->REMADDR_length = peer_tep->te_alen;
4272 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4273 				    taa->LOCADDR_length);
4274 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4275 	bcopy(peer_tep->te_abuf, addr_startp,
4276 	    peer_tep->te_alen);
4277 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4278 	    taa->REMADDR_offset + peer_tep->te_alen;
4279 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4280 
4281 	putnext(tep->te_rq, ackmp);
4282 }
4283 
4284 static void
4285 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4286 {
4287 	if (IS_CLTS(tep)) {
4288 		*ia = tl_clts_info_ack;
4289 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4290 	} else {
4291 		*ia = tl_cots_info_ack;
4292 		if (IS_COTSORD(tep))
4293 			ia->SERV_type = T_COTS_ORD;
4294 	}
4295 	ia->TIDU_size = tl_tidusz;
4296 	ia->CURRENT_state = tep->te_state;
4297 }
4298 
4299 /*
4300  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4301  * tl_wput.
4302  */
4303 static void
4304 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4305 {
4306 	mblk_t			*ackmp;
4307 	t_uscalar_t		cap_bits1;
4308 	struct T_capability_ack	*tcap;
4309 
4310 	if (tep->te_closing) {
4311 		freemsg(mp);
4312 		return;
4313 	}
4314 
4315 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4316 
4317 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4318 	    M_PCPROTO, T_CAPABILITY_ACK);
4319 	if (ackmp == NULL) {
4320 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4321 			"tl_capability_req: reallocb failed"));
4322 		tl_memrecover(tep->te_wq, mp,
4323 		    sizeof (struct T_capability_ack));
4324 		return;
4325 	}
4326 
4327 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4328 	tcap->CAP_bits1 = 0;
4329 
4330 	if (cap_bits1 & TC1_INFO) {
4331 		tl_copy_info(&tcap->INFO_ack, tep);
4332 		tcap->CAP_bits1 |= TC1_INFO;
4333 	}
4334 
4335 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4336 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4337 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4338 	}
4339 
4340 	putnext(tep->te_rq, ackmp);
4341 }
4342 
4343 static void
4344 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4345 {
4346 	if (! tep->te_closing)
4347 		tl_info_req(mp, tep);
4348 	else
4349 		freemsg(mp);
4350 
4351 	tl_serializer_exit(tep);
4352 	tl_refrele(tep);
4353 }
4354 
4355 static void
4356 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4357 {
4358 	mblk_t *ackmp;
4359 
4360 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4361 	    M_PCPROTO, T_INFO_ACK);
4362 	if (ackmp == NULL) {
4363 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4364 			"tl_info_req: reallocb failed"));
4365 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4366 		return;
4367 	}
4368 
4369 	/*
4370 	 * fill in T_INFO_ACK contents
4371 	 */
4372 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4373 
4374 	/*
4375 	 * send ack message
4376 	 */
4377 	putnext(tep->te_rq, ackmp);
4378 }
4379 
4380 /*
4381  * Handle M_DATA, T_data_req and T_optdata_req.
4382  * If this is a socket pass through T_optdata_req options unmodified.
4383  */
4384 static void
4385 tl_data(mblk_t *mp, tl_endpt_t *tep)
4386 {
4387 	queue_t			*wq = tep->te_wq;
4388 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4389 	ssize_t			msz = MBLKL(mp);
4390 	tl_endpt_t		*peer_tep;
4391 	queue_t			*peer_rq;
4392 	boolean_t		closing = tep->te_closing;
4393 
4394 	if (IS_CLTS(tep)) {
4395 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4396 			    SL_TRACE|SL_ERROR,
4397 			    "tl_wput:clts:unattached M_DATA"));
4398 		if (!closing) {
4399 			tl_merror(wq, mp, EPROTO);
4400 		} else {
4401 			freemsg(mp);
4402 		}
4403 		return;
4404 	}
4405 
4406 	/*
4407 	 * If the endpoint is closing it should still forward any data to the
4408 	 * peer (if it has one). If it is not allowed to forward it can just
4409 	 * free the message.
4410 	 */
4411 	if (closing &&
4412 	    (tep->te_state != TS_DATA_XFER) &&
4413 	    (tep->te_state != TS_WREQ_ORDREL)) {
4414 		freemsg(mp);
4415 		return;
4416 	}
4417 
4418 	if (DB_TYPE(mp) == M_PROTO) {
4419 		if (prim->type == T_DATA_REQ &&
4420 		    msz < sizeof (struct T_data_req)) {
4421 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4422 				SL_TRACE|SL_ERROR,
4423 				"tl_data:T_DATA_REQ:invalid message"));
4424 			if (!closing) {
4425 				tl_merror(wq, mp, EPROTO);
4426 			} else {
4427 				freemsg(mp);
4428 			}
4429 			return;
4430 		} else if (prim->type == T_OPTDATA_REQ &&
4431 			    (msz < sizeof (struct T_optdata_req) ||
4432 			    !IS_SOCKET(tep))) {
4433 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4434 				SL_TRACE|SL_ERROR,
4435 				"tl_data:T_OPTDATA_REQ:invalid message"));
4436 			if (!closing) {
4437 				tl_merror(wq, mp, EPROTO);
4438 			} else {
4439 				freemsg(mp);
4440 			}
4441 			return;
4442 		}
4443 	}
4444 
4445 	/*
4446 	 * connection oriented provider
4447 	 */
4448 	switch (tep->te_state) {
4449 	case TS_IDLE:
4450 		/*
4451 		 * Other end not here - do nothing.
4452 		 */
4453 		freemsg(mp);
4454 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4455 			"tl_data:cots with endpoint idle"));
4456 		return;
4457 
4458 	case TS_DATA_XFER:
4459 		/* valid states */
4460 		if (tep->te_conp != NULL)
4461 			break;
4462 
4463 		if (tep->te_oconp == NULL) {
4464 			if (!closing) {
4465 				tl_merror(wq, mp, EPROTO);
4466 			} else {
4467 				freemsg(mp);
4468 			}
4469 			return;
4470 		}
4471 		/*
4472 		 * For a socket the T_CONN_CON is sent early thus
4473 		 * the peer might not yet have accepted the connection.
4474 		 * If we are closing queue the packet with the T_CONN_IND.
4475 		 * Otherwise defer processing the packet until the peer
4476 		 * accepts the connection.
4477 		 * Note that the queue is noenabled when we go into this
4478 		 * state.
4479 		 */
4480 		if (!closing) {
4481 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4482 				    SL_TRACE|SL_ERROR,
4483 				    "tl_data: ocon"));
4484 			TL_PUTBQ(tep, mp);
4485 			return;
4486 		}
4487 		if (DB_TYPE(mp) == M_PROTO) {
4488 			if (msz < sizeof (t_scalar_t)) {
4489 				freemsg(mp);
4490 				return;
4491 			}
4492 			/* reuse message block - just change REQ to IND */
4493 			if (prim->type == T_DATA_REQ)
4494 				prim->type = T_DATA_IND;
4495 			else
4496 				prim->type = T_OPTDATA_IND;
4497 		}
4498 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4499 		return;
4500 
4501 	case TS_WREQ_ORDREL:
4502 		if (tep->te_conp == NULL) {
4503 			/*
4504 			 * Other end closed - generate discon_ind
4505 			 * with reason 0 to cause an EPIPE but no
4506 			 * read side error on AF_UNIX sockets.
4507 			 */
4508 			freemsg(mp);
4509 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4510 				SL_TRACE|SL_ERROR,
4511 				"tl_data: WREQ_ORDREL and no peer"));
4512 			tl_discon_ind(tep, 0);
4513 			return;
4514 		}
4515 		break;
4516 
4517 	default:
4518 		/* invalid state for event TE_DATA_REQ */
4519 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4520 			"tl_data:cots:out of state"));
4521 		tl_merror(wq, mp, EPROTO);
4522 		return;
4523 	}
4524 	/*
4525 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4526 	 * (State stays same on this event)
4527 	 */
4528 
4529 	/*
4530 	 * get connected endpoint
4531 	 */
4532 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4533 		freemsg(mp);
4534 		/* Peer closed */
4535 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4536 			"tl_data: peer gone"));
4537 		return;
4538 	}
4539 
4540 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4541 	peer_rq = peer_tep->te_rq;
4542 
4543 	/*
4544 	 * Put it back if flow controlled
4545 	 * Note: Messages already on queue when we are closing is bounded
4546 	 * so we can ignore flow control.
4547 	 */
4548 	if (!canputnext(peer_rq) && !closing) {
4549 		TL_PUTBQ(tep, mp);
4550 		return;
4551 	}
4552 
4553 	/*
4554 	 * validate peer state
4555 	 */
4556 	switch (peer_tep->te_state) {
4557 	case TS_DATA_XFER:
4558 	case TS_WIND_ORDREL:
4559 		/* valid states */
4560 		break;
4561 	default:
4562 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4563 			"tl_data:rx side:invalid state"));
4564 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4565 		return;
4566 	}
4567 	if (DB_TYPE(mp) == M_PROTO) {
4568 		/* reuse message block - just change REQ to IND */
4569 		if (prim->type == T_DATA_REQ)
4570 			prim->type = T_DATA_IND;
4571 		else
4572 			prim->type = T_OPTDATA_IND;
4573 	}
4574 	/*
4575 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4576 	 * (peer state stays same on this event)
4577 	 */
4578 	/*
4579 	 * send data to connected peer
4580 	 */
4581 	putnext(peer_rq, mp);
4582 }
4583 
4584 
4585 
4586 static void
4587 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4588 {
4589 	queue_t			*wq = tep->te_wq;
4590 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4591 	ssize_t			msz = MBLKL(mp);
4592 	tl_endpt_t		*peer_tep;
4593 	queue_t			*peer_rq;
4594 	boolean_t		closing = tep->te_closing;
4595 
4596 	if (msz < sizeof (struct T_exdata_req)) {
4597 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4598 			"tl_exdata:invalid message"));
4599 		if (!closing) {
4600 			tl_merror(wq, mp, EPROTO);
4601 		} else {
4602 			freemsg(mp);
4603 		}
4604 		return;
4605 	}
4606 
4607 	/*
4608 	 * If the endpoint is closing it should still forward any data to the
4609 	 * peer (if it has one). If it is not allowed to forward it can just
4610 	 * free the message.
4611 	 */
4612 	if (closing &&
4613 	    (tep->te_state != TS_DATA_XFER) &&
4614 	    (tep->te_state != TS_WREQ_ORDREL)) {
4615 		freemsg(mp);
4616 		return;
4617 	}
4618 
4619 	/*
4620 	 * validate state
4621 	 */
4622 	switch (tep->te_state) {
4623 	case TS_IDLE:
4624 		/*
4625 		 * Other end not here - do nothing.
4626 		 */
4627 		freemsg(mp);
4628 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4629 			"tl_exdata:cots with endpoint idle"));
4630 		return;
4631 
4632 	case TS_DATA_XFER:
4633 		/* valid states */
4634 		if (tep->te_conp != NULL)
4635 			break;
4636 
4637 		if (tep->te_oconp == NULL) {
4638 			if (!closing) {
4639 				tl_merror(wq, mp, EPROTO);
4640 			} else {
4641 				freemsg(mp);
4642 			}
4643 			return;
4644 		}
4645 		/*
4646 		 * For a socket the T_CONN_CON is sent early thus
4647 		 * the peer might not yet have accepted the connection.
4648 		 * If we are closing queue the packet with the T_CONN_IND.
4649 		 * Otherwise defer processing the packet until the peer
4650 		 * accepts the connection.
4651 		 * Note that the queue is noenabled when we go into this
4652 		 * state.
4653 		 */
4654 		if (!closing) {
4655 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4656 				    SL_TRACE|SL_ERROR,
4657 				    "tl_exdata: ocon"));
4658 			TL_PUTBQ(tep, mp);
4659 			return;
4660 		}
4661 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4662 			    "tl_exdata: closing socket ocon"));
4663 		prim->type = T_EXDATA_IND;
4664 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4665 		return;
4666 
4667 	case TS_WREQ_ORDREL:
4668 		if (tep->te_conp == NULL) {
4669 			/*
4670 			 * Other end closed - generate discon_ind
4671 			 * with reason 0 to cause an EPIPE but no
4672 			 * read side error on AF_UNIX sockets.
4673 			 */
4674 			freemsg(mp);
4675 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4676 				SL_TRACE|SL_ERROR,
4677 				"tl_exdata: WREQ_ORDREL and no peer"));
4678 			tl_discon_ind(tep, 0);
4679 			return;
4680 		}
4681 		break;
4682 
4683 	default:
4684 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4685 			SL_TRACE|SL_ERROR,
4686 			"tl_wput:T_EXDATA_REQ:out of state, state=%d",
4687 			tep->te_state));
4688 		tl_merror(wq, mp, EPROTO);
4689 		return;
4690 	}
4691 	/*
4692 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4693 	 * (state stays same on this event)
4694 	 */
4695 
4696 	/*
4697 	 * get connected endpoint
4698 	 */
4699 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4700 		freemsg(mp);
4701 		/* Peer closed */
4702 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4703 			"tl_exdata: peer gone"));
4704 		return;
4705 	}
4706 
4707 	peer_rq = peer_tep->te_rq;
4708 
4709 	/*
4710 	 * Put it back if flow controlled
4711 	 * Note: Messages already on queue when we are closing is bounded
4712 	 * so we can ignore flow control.
4713 	 */
4714 	if (!canputnext(peer_rq) && !closing) {
4715 		TL_PUTBQ(tep, mp);
4716 		return;
4717 	}
4718 
4719 	/*
4720 	 * validate state on peer
4721 	 */
4722 	switch (peer_tep->te_state) {
4723 	case TS_DATA_XFER:
4724 	case TS_WIND_ORDREL:
4725 		/* valid states */
4726 		break;
4727 	default:
4728 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4729 			"tl_exdata:rx side:invalid state"));
4730 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4731 		return;
4732 	}
4733 	/*
4734 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4735 	 * (peer state stays same on this event)
4736 	 */
4737 	/*
4738 	 * reuse message block
4739 	 */
4740 	prim->type = T_EXDATA_IND;
4741 
4742 	/*
4743 	 * send data to connected peer
4744 	 */
4745 	putnext(peer_rq, mp);
4746 }
4747 
4748 
4749 
4750 static void
4751 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4752 {
4753 	queue_t			*wq =  tep->te_wq;
4754 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4755 	ssize_t			msz = MBLKL(mp);
4756 	tl_endpt_t		*peer_tep;
4757 	queue_t			*peer_rq;
4758 	boolean_t		closing = tep->te_closing;
4759 
4760 	if (msz < sizeof (struct T_ordrel_req)) {
4761 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4762 			"tl_ordrel:invalid message"));
4763 		if (!closing) {
4764 			tl_merror(wq, mp, EPROTO);
4765 		} else {
4766 			freemsg(mp);
4767 		}
4768 		return;
4769 	}
4770 
4771 	/*
4772 	 * validate state
4773 	 */
4774 	switch (tep->te_state) {
4775 	case TS_DATA_XFER:
4776 	case TS_WREQ_ORDREL:
4777 		/* valid states */
4778 		if (tep->te_conp != NULL)
4779 			break;
4780 
4781 		if (tep->te_oconp == NULL)
4782 			break;
4783 
4784 		/*
4785 		 * For a socket the T_CONN_CON is sent early thus
4786 		 * the peer might not yet have accepted the connection.
4787 		 * If we are closing queue the packet with the T_CONN_IND.
4788 		 * Otherwise defer processing the packet until the peer
4789 		 * accepts the connection.
4790 		 * Note that the queue is noenabled when we go into this
4791 		 * state.
4792 		 */
4793 		if (!closing) {
4794 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4795 				SL_TRACE|SL_ERROR,
4796 				"tl_ordlrel: ocon"));
4797 			TL_PUTBQ(tep, mp);
4798 			return;
4799 		}
4800 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4801 			"tl_ordlrel: closing socket ocon"));
4802 		prim->type = T_ORDREL_IND;
4803 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4804 		return;
4805 
4806 	default:
4807 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4808 			SL_TRACE|SL_ERROR,
4809 			"tl_wput:T_ORDREL_REQ:out of state, state=%d",
4810 			tep->te_state));
4811 		if (!closing) {
4812 			tl_merror(wq, mp, EPROTO);
4813 		} else {
4814 			freemsg(mp);
4815 		}
4816 		return;
4817 	}
4818 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4819 
4820 	/*
4821 	 * get connected endpoint
4822 	 */
4823 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4824 		/* Peer closed */
4825 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4826 			"tl_ordrel: peer gone"));
4827 		freemsg(mp);
4828 		return;
4829 	}
4830 
4831 	peer_rq = peer_tep->te_rq;
4832 
4833 	/*
4834 	 * Put it back if flow controlled except when we are closing.
4835 	 * Note: Messages already on queue when we are closing is bounded
4836 	 * so we can ignore flow control.
4837 	 */
4838 	if (! canputnext(peer_rq) && !closing) {
4839 		TL_PUTBQ(tep, mp);
4840 		return;
4841 	}
4842 
4843 	/*
4844 	 * validate state on peer
4845 	 */
4846 	switch (peer_tep->te_state) {
4847 	case TS_DATA_XFER:
4848 	case TS_WIND_ORDREL:
4849 		/* valid states */
4850 		break;
4851 	default:
4852 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4853 			"tl_ordrel:rx side:invalid state"));
4854 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4855 		return;
4856 	}
4857 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4858 
4859 	/*
4860 	 * reuse message block
4861 	 */
4862 	prim->type = T_ORDREL_IND;
4863 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4864 		"tl_ordrel: send ordrel_ind"));
4865 
4866 	/*
4867 	 * send data to connected peer
4868 	 */
4869 	putnext(peer_rq, mp);
4870 }
4871 
4872 
4873 /*
4874  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4875  */
4876 static void
4877 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4878 {
4879 	size_t			err_sz;
4880 	tl_endpt_t		*tep;
4881 	struct T_unitdata_req	*udreq;
4882 	mblk_t			*err_mp;
4883 	t_scalar_t		alen;
4884 	t_scalar_t		olen;
4885 	struct T_uderror_ind	*uderr;
4886 	uchar_t			*addr_startp;
4887 
4888 	err_sz = sizeof (struct T_uderror_ind);
4889 	tep = (tl_endpt_t *)wq->q_ptr;
4890 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4891 	alen = udreq->DEST_length;
4892 	olen = udreq->OPT_length;
4893 
4894 	if (alen > 0)
4895 		err_sz = T_ALIGN(err_sz + alen);
4896 	if (olen > 0)
4897 		err_sz += olen;
4898 
4899 	err_mp = allocb(err_sz, BPRI_MED);
4900 	if (! err_mp) {
4901 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4902 			"tl_uderr:allocb failure"));
4903 		/*
4904 		 * Note: no rollback of state needed as it does
4905 		 * not change in connectionless transport
4906 		 */
4907 		tl_memrecover(wq, mp, err_sz);
4908 		return;
4909 	}
4910 
4911 	DB_TYPE(err_mp) = M_PROTO;
4912 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4913 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4914 	uderr->PRIM_type = T_UDERROR_IND;
4915 	uderr->ERROR_type = err;
4916 	uderr->DEST_length = alen;
4917 	uderr->OPT_length = olen;
4918 	if (alen <= 0) {
4919 		uderr->DEST_offset = 0;
4920 	} else {
4921 		uderr->DEST_offset =
4922 			(t_scalar_t)sizeof (struct T_uderror_ind);
4923 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4924 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4925 			(size_t)alen);
4926 	}
4927 	if (olen <= 0) {
4928 		uderr->OPT_offset = 0;
4929 	} else {
4930 		uderr->OPT_offset =
4931 			(t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4932 						uderr->DEST_length);
4933 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4934 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4935 			(size_t)olen);
4936 	}
4937 	freemsg(mp);
4938 
4939 	/*
4940 	 * send indication message
4941 	 */
4942 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4943 
4944 	qreply(wq, err_mp);
4945 }
4946 
4947 static void
4948 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4949 {
4950 	queue_t *wq = tep->te_wq;
4951 
4952 	if (!tep->te_closing && (wq->q_first != NULL)) {
4953 		TL_PUTQ(tep, mp);
4954 	} else if (tep->te_rq != NULL)
4955 		tl_unitdata(mp, tep);
4956 	else
4957 		freemsg(mp);
4958 
4959 	tl_serializer_exit(tep);
4960 	tl_refrele(tep);
4961 }
4962 
4963 /*
4964  * Handle T_unitdata_req.
4965  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4966  * If this is a socket pass through options unmodified.
4967  */
4968 static void
4969 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4970 {
4971 	queue_t			*wq = tep->te_wq;
4972 	soux_addr_t		ux_addr;
4973 	tl_addr_t		destaddr;
4974 	uchar_t			*addr_startp;
4975 	tl_endpt_t		*peer_tep;
4976 	struct T_unitdata_ind	*udind;
4977 	struct T_unitdata_req	*udreq;
4978 	ssize_t			msz, ui_sz;
4979 	t_scalar_t		alen, aoff, olen, ooff;
4980 	t_scalar_t		oldolen = 0;
4981 
4982 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4983 	msz = MBLKL(mp);
4984 
4985 	/*
4986 	 * validate the state
4987 	 */
4988 	if (tep->te_state != TS_IDLE) {
4989 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4990 			SL_TRACE|SL_ERROR,
4991 			"tl_wput:T_CONN_REQ:out of state"));
4992 		tl_merror(wq, mp, EPROTO);
4993 		return;
4994 	}
4995 	/*
4996 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
4997 	 * (state does not change on this event)
4998 	 */
4999 
5000 	/*
5001 	 * validate the message
5002 	 * Note: dereference fields in struct inside message only
5003 	 * after validating the message length.
5004 	 */
5005 	if (msz < sizeof (struct T_unitdata_req)) {
5006 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5007 			"tl_unitdata:invalid message length"));
5008 		tl_merror(wq, mp, EINVAL);
5009 		return;
5010 	}
5011 	alen = udreq->DEST_length;
5012 	aoff = udreq->DEST_offset;
5013 	oldolen = olen = udreq->OPT_length;
5014 	ooff = udreq->OPT_offset;
5015 	if (olen == 0)
5016 		ooff = 0;
5017 
5018 	if (IS_SOCKET(tep)) {
5019 		if ((alen != TL_SOUX_ADDRLEN) ||
5020 		    (aoff < 0) ||
5021 		    (aoff + alen > msz) ||
5022 		    (olen < 0) || (ooff < 0) ||
5023 		    ((olen > 0) && ((ooff + olen) > msz))) {
5024 			(void) (STRLOG(TL_ID, tep->te_minor,
5025 				    1, SL_TRACE|SL_ERROR,
5026 				    "tl_unitdata_req: invalid socket addr "
5027 				    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5028 				    (int)msz, alen, aoff, olen, ooff));
5029 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5030 			return;
5031 		}
5032 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5033 
5034 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5035 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5036 			(void) (STRLOG(TL_ID, tep->te_minor,
5037 				    1, SL_TRACE|SL_ERROR,
5038 				    "tl_conn_req: invalid socket magic"));
5039 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5040 			return;
5041 		}
5042 	} else {
5043 		if ((alen < 0) ||
5044 		    (aoff < 0) ||
5045 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5046 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5047 		    ((aoff + alen) < 0) ||
5048 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5049 		    (olen < 0) ||
5050 		    (ooff < 0) ||
5051 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5052 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5053 				    SL_TRACE|SL_ERROR,
5054 				    "tl_unitdata:invalid unit data message"));
5055 			tl_merror(wq, mp, EINVAL);
5056 			return;
5057 		}
5058 	}
5059 
5060 	/* Options not supported unless it's a socket */
5061 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5062 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5063 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5064 		tl_uderr(wq, mp, EPROTO);
5065 		return;
5066 	}
5067 #ifdef DEBUG
5068 	/*
5069 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5070 	 * if (! assertion)
5071 	 *	log warning;
5072 	 */
5073 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5074 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5075 			"tl_unitdata:addr overlaps TPI message"));
5076 	}
5077 #endif
5078 	/*
5079 	 * get destination endpoint
5080 	 */
5081 	destaddr.ta_alen = alen;
5082 	destaddr.ta_abuf = mp->b_rptr + aoff;
5083 	destaddr.ta_zoneid = tep->te_zoneid;
5084 
5085 	/*
5086 	 * Check whether the destination is the same that was used previously
5087 	 * and the destination endpoint is in the right state. If something is
5088 	 * wrong, find destination again and cache it.
5089 	 */
5090 	peer_tep = tep->te_lastep;
5091 
5092 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5093 	    (peer_tep->te_state != TS_IDLE) ||
5094 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5095 		/*
5096 		 * Not the same as cached destination , need to find the right
5097 		 * destination.
5098 		 */
5099 		peer_tep = (IS_SOCKET(tep) ?
5100 		    tl_sock_find_peer(tep, &ux_addr) :
5101 		    tl_find_peer(tep, &destaddr));
5102 
5103 		if (peer_tep == NULL) {
5104 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5105 				SL_TRACE|SL_ERROR,
5106 				"tl_unitdata:no one at destination address"));
5107 			tl_uderr(wq, mp, ECONNRESET);
5108 			return;
5109 		}
5110 
5111 		/*
5112 		 * Cache the new peer.
5113 		 */
5114 		if (tep->te_lastep != NULL)
5115 			tl_refrele(tep->te_lastep);
5116 
5117 		tep->te_lastep = peer_tep;
5118 	}
5119 
5120 	if (peer_tep->te_state != TS_IDLE) {
5121 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5122 			"tl_unitdata:provider in invalid state"));
5123 		tl_uderr(wq, mp, EPROTO);
5124 		return;
5125 	}
5126 
5127 	ASSERT(peer_tep->te_rq != NULL);
5128 
5129 	/*
5130 	 * Put it back if flow controlled except when we are closing.
5131 	 * Note: Messages already on queue when we are closing is bounded
5132 	 * so we can ignore flow control.
5133 	 */
5134 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5135 		/* record what we are flow controlled on */
5136 		if (tep->te_flowq != NULL) {
5137 			list_remove(&tep->te_flowq->te_flowlist, tep);
5138 		}
5139 		list_insert_head(&peer_tep->te_flowlist, tep);
5140 		tep->te_flowq = peer_tep;
5141 		TL_PUTBQ(tep, mp);
5142 		return;
5143 	}
5144 	/*
5145 	 * prepare indication message
5146 	 */
5147 
5148 	/*
5149 	 * calculate length of message
5150 	 */
5151 	if (peer_tep->te_flag & TL_SETCRED) {
5152 		ASSERT(olen == 0);
5153 		olen = (t_scalar_t)sizeof (struct opthdr) +
5154 				OPTLEN(sizeof (tl_credopt_t));
5155 					/* 1 option only */
5156 	} else if (peer_tep->te_flag & TL_SETUCRED) {
5157 		ASSERT(olen == 0);
5158 		olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredsize);
5159 					/* 1 option only */
5160 	} else if (peer_tep->te_flag & TL_SOCKUCRED) {
5161 		/* Possibly more than one option */
5162 		olen += (t_scalar_t)sizeof (struct T_opthdr) +
5163 		    OPTLEN(ucredsize);
5164 	}
5165 
5166 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5167 		olen;
5168 	/*
5169 	 * If the unitdata_ind fits and we are not adding options
5170 	 * reuse the udreq mblk.
5171 	 */
5172 	if (msz >= ui_sz && alen >= tep->te_alen &&
5173 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5174 		/*
5175 		 * Reuse the original mblk. Leave options in place.
5176 		 */
5177 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5178 		udind->PRIM_type = T_UNITDATA_IND;
5179 		udind->SRC_length = tep->te_alen;
5180 		addr_startp = mp->b_rptr + udind->SRC_offset;
5181 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5182 	} else {
5183 		/* Allocate a new T_unidata_ind message */
5184 		mblk_t *ui_mp;
5185 
5186 		ui_mp = allocb(ui_sz, BPRI_MED);
5187 		if (! ui_mp) {
5188 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5189 				"tl_unitdata:allocb failure:message queued"));
5190 			tl_memrecover(wq, mp, ui_sz);
5191 			return;
5192 		}
5193 
5194 		/*
5195 		 * fill in T_UNITDATA_IND contents
5196 		 */
5197 		DB_TYPE(ui_mp) = M_PROTO;
5198 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5199 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5200 		udind->PRIM_type = T_UNITDATA_IND;
5201 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5202 		udind->SRC_length = tep->te_alen;
5203 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5204 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5205 		udind->OPT_offset =
5206 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5207 		udind->OPT_length = olen;
5208 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5209 			if (oldolen != 0) {
5210 				bcopy((void *)((uintptr_t)udreq + ooff),
5211 				    (void *)((uintptr_t)udind +
5212 				    udind->OPT_offset),
5213 				    oldolen);
5214 			}
5215 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5216 			    oldolen,
5217 			    DB_CREDDEF(mp, tep->te_credp), TLPID(mp, tep),
5218 			    peer_tep->te_flag, peer_tep->te_credp);
5219 		} else {
5220 			bcopy((void *)((uintptr_t)udreq + ooff),
5221 				(void *)((uintptr_t)udind + udind->OPT_offset),
5222 				olen);
5223 		}
5224 
5225 		/*
5226 		 * relink data blocks from mp to ui_mp
5227 		 */
5228 		ui_mp->b_cont = mp->b_cont;
5229 		freeb(mp);
5230 		mp = ui_mp;
5231 	}
5232 	/*
5233 	 * send indication message
5234 	 */
5235 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5236 	putnext(peer_tep->te_rq, mp);
5237 }
5238 
5239 
5240 
5241 /*
5242  * Check if a given addr is in use.
5243  * Endpoint ptr returned or NULL if not found.
5244  * The name space is separate for each mode. This implies that
5245  * sockets get their own name space.
5246  */
5247 static tl_endpt_t *
5248 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5249 {
5250 	tl_endpt_t *peer_tep = NULL;
5251 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5252 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5253 
5254 	ASSERT(! IS_SOCKET(tep));
5255 
5256 	ASSERT(ap != NULL && ap->ta_alen > 0);
5257 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5258 	ASSERT(ap->ta_abuf != NULL);
5259 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5260 	ASSERT(IMPLY(rc == 0,
5261 		(tep->te_zoneid == peer_tep->te_zoneid) &&
5262 		(tep->te_transport == peer_tep->te_transport)));
5263 
5264 	if ((rc == 0) && (peer_tep->te_closing)) {
5265 		tl_refrele(peer_tep);
5266 		peer_tep = NULL;
5267 	}
5268 
5269 	return (peer_tep);
5270 }
5271 
5272 /*
5273  * Find peer for a socket based on unix domain address.
5274  * For implicit addresses our peer can be found by minor number in ai hash. For
5275  * explici binds we look vnode address at addr_hash.
5276  */
5277 static tl_endpt_t *
5278 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5279 {
5280 	tl_endpt_t *peer_tep = NULL;
5281 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5282 	    tep->te_aihash : tep->te_addrhash;
5283 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5284 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5285 
5286 	ASSERT(IS_SOCKET(tep));
5287 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5288 	ASSERT(IMPLY(rc == 0,
5289 		(tep->te_zoneid == peer_tep->te_zoneid) &&
5290 		(tep->te_transport == peer_tep->te_transport)));
5291 	/*
5292 	 * Don't attempt to use closing peer.
5293 	 */
5294 	if ((peer_tep != NULL) &&
5295 	    (peer_tep->te_closing ||
5296 		(peer_tep->te_zoneid != tep->te_zoneid))) {
5297 		tl_refrele(peer_tep);
5298 		peer_tep = NULL;
5299 	}
5300 
5301 	return (peer_tep);
5302 }
5303 
5304 /*
5305  * Generate a free addr and return it in struct pointed by ap
5306  * but allocating space for address buffer.
5307  * The generated address will be at least 4 bytes long and, if req->ta_alen
5308  * exceeds 4 bytes, be req->ta_alen bytes long.
5309  *
5310  * If address is found it will be inserted in the hash.
5311  *
5312  * If req->ta_alen is larger than the default alen (4 bytes) the last
5313  * alen-4 bytes will always be the same as in req.
5314  *
5315  * Return 0 for failure.
5316  * Return non-zero for success.
5317  */
5318 static boolean_t
5319 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5320 {
5321 	t_scalar_t	alen;
5322 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5323 
5324 	ASSERT(tep->te_hash_hndl != NULL);
5325 	ASSERT(! IS_SOCKET(tep));
5326 
5327 	if (tep->te_hash_hndl == NULL)
5328 		return (B_FALSE);
5329 
5330 	/*
5331 	 * check if default addr is in use
5332 	 * if it is - bump it and try again
5333 	 */
5334 	if (req == NULL) {
5335 		alen = sizeof (uint32_t);
5336 	} else {
5337 		alen = max(req->ta_alen, sizeof (uint32_t));
5338 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5339 	}
5340 
5341 	if (tep->te_alen < alen) {
5342 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5343 
5344 		/*
5345 		 * Not enough space in tep->ta_ap to hold the address,
5346 		 * allocate a bigger space.
5347 		 */
5348 		if (abuf == NULL)
5349 			return (B_FALSE);
5350 
5351 		if (tep->te_alen > 0)
5352 			kmem_free(tep->te_abuf, tep->te_alen);
5353 
5354 		tep->te_alen = alen;
5355 		tep->te_abuf = abuf;
5356 	}
5357 
5358 	/* Copy in the address in req */
5359 	if (req != NULL) {
5360 		ASSERT(alen >= req->ta_alen);
5361 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5362 	}
5363 
5364 	/*
5365 	 * First try minor number then try default addresses.
5366 	 */
5367 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5368 
5369 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5370 		if (mod_hash_insert_reserve(tep->te_addrhash,
5371 			(mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5372 			tep->te_hash_hndl) == 0) {
5373 			/*
5374 			 * found free address
5375 			 */
5376 			tep->te_flag |= TL_ADDRHASHED;
5377 			tep->te_hash_hndl = NULL;
5378 
5379 			return (B_TRUE); /* successful return */
5380 		}
5381 		/*
5382 		 * Use default address.
5383 		 */
5384 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5385 		atomic_add_32(&tep->te_defaddr, 1);
5386 	}
5387 
5388 	/*
5389 	 * Failed to find anything.
5390 	 */
5391 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5392 		"tl_get_any_addr:looped 2^32 times"));
5393 	return (B_FALSE);
5394 }
5395 
5396 /*
5397  * reallocb + set r/w ptrs to reflect size.
5398  */
5399 static mblk_t *
5400 tl_resizemp(mblk_t *mp, ssize_t new_size)
5401 {
5402 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5403 		return (NULL);
5404 
5405 	mp->b_rptr = DB_BASE(mp);
5406 	mp->b_wptr = mp->b_rptr + new_size;
5407 	return (mp);
5408 }
5409 
5410 static void
5411 tl_cl_backenable(tl_endpt_t *tep)
5412 {
5413 	list_t *l = &tep->te_flowlist;
5414 	tl_endpt_t *elp;
5415 
5416 	ASSERT(IS_CLTS(tep));
5417 
5418 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5419 		ASSERT(tep->te_ser == elp->te_ser);
5420 		ASSERT(elp->te_flowq == tep);
5421 		if (! elp->te_closing)
5422 			TL_QENABLE(elp);
5423 		elp->te_flowq = NULL;
5424 		list_remove(l, elp);
5425 	}
5426 }
5427 
5428 /*
5429  * Unconnect endpoints.
5430  */
5431 static void
5432 tl_co_unconnect(tl_endpt_t *tep)
5433 {
5434 	tl_endpt_t	*peer_tep = tep->te_conp;
5435 	tl_endpt_t	*srv_tep = tep->te_oconp;
5436 	list_t		*l;
5437 	tl_icon_t  	*tip;
5438 	tl_endpt_t	*cl_tep;
5439 	mblk_t		*d_mp;
5440 
5441 	ASSERT(IS_COTS(tep));
5442 	/*
5443 	 * If our peer is closing, don't use it.
5444 	 */
5445 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5446 		TL_UNCONNECT(tep->te_conp);
5447 		peer_tep = NULL;
5448 	}
5449 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5450 		TL_UNCONNECT(tep->te_oconp);
5451 		srv_tep = NULL;
5452 	}
5453 
5454 	if (tep->te_nicon > 0) {
5455 		l = &tep->te_iconp;
5456 		/*
5457 		 * If incoming requests pending, change state
5458 		 * of clients on disconnect ind event and send
5459 		 * discon_ind pdu to modules above them
5460 		 * for server: all clients get disconnect
5461 		 */
5462 
5463 		while (tep->te_nicon > 0) {
5464 			tip    = list_head(l);
5465 			cl_tep = tip->ti_tep;
5466 
5467 			if (cl_tep == NULL) {
5468 				tl_freetip(tep, tip);
5469 				continue;
5470 			}
5471 
5472 			if (cl_tep->te_oconp != NULL) {
5473 				ASSERT(cl_tep != cl_tep->te_oconp);
5474 				TL_UNCONNECT(cl_tep->te_oconp);
5475 			}
5476 
5477 			if (cl_tep->te_closing) {
5478 				tl_freetip(tep, tip);
5479 				continue;
5480 			}
5481 
5482 			enableok(cl_tep->te_wq);
5483 			TL_QENABLE(cl_tep);
5484 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5485 			if (d_mp != NULL) {
5486 				cl_tep->te_state = TS_IDLE;
5487 				putnext(cl_tep->te_rq, d_mp);
5488 			} else {
5489 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5490 					    SL_TRACE|SL_ERROR,
5491 					    "tl_co_unconnect:icmng: "
5492 					    "allocb failure"));
5493 			}
5494 			tl_freetip(tep, tip);
5495 		}
5496 	} else if (srv_tep != NULL) {
5497 		/*
5498 		 * If outgoing request pending, change state
5499 		 * of server on discon ind event
5500 		 */
5501 
5502 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5503 		    IS_COTSORD(srv_tep) &&
5504 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5505 			/*
5506 			 * Queue ordrel_ind for server to be picked up
5507 			 * when the connection is accepted.
5508 			 */
5509 			d_mp = tl_ordrel_ind_alloc();
5510 		} else {
5511 			/*
5512 			 * send discon_ind to server
5513 			 */
5514 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5515 		}
5516 		if (d_mp == NULL) {
5517 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5518 				SL_TRACE|SL_ERROR,
5519 				"tl_co_unconnect:outgoing:allocb failure"));
5520 			TL_UNCONNECT(tep->te_oconp);
5521 			goto discon_peer;
5522 		}
5523 
5524 		/*
5525 		 * If this is a socket the T_DISCON_IND is queued with
5526 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5527 		 * from the list of pending connections.
5528 		 * Note that when te_oconp is set the peer better have
5529 		 * a t_connind_t for the client.
5530 		 */
5531 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5532 			/*
5533 			 * Queue the disconnection message.
5534 			 */
5535 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5536 		} else {
5537 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5538 			if (tip == NULL) {
5539 				freemsg(d_mp);
5540 			} else {
5541 				ASSERT(tep == tip->ti_tep);
5542 				ASSERT(tep->te_ser == srv_tep->te_ser);
5543 				/*
5544 				 * Delete tip from the server list.
5545 				 */
5546 				if (srv_tep->te_nicon == 1) {
5547 					srv_tep->te_state =
5548 					    NEXTSTATE(TE_DISCON_IND2,
5549 						srv_tep->te_state);
5550 				} else {
5551 					srv_tep->te_state =
5552 					    NEXTSTATE(TE_DISCON_IND3,
5553 						srv_tep->te_state);
5554 				}
5555 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5556 				    T_DISCON_IND);
5557 				putnext(srv_tep->te_rq, d_mp);
5558 				tl_freetip(srv_tep, tip);
5559 			}
5560 			TL_UNCONNECT(tep->te_oconp);
5561 			srv_tep = NULL;
5562 		}
5563 	} else if (peer_tep != NULL) {
5564 		/*
5565 		 * unconnect existing connection
5566 		 * If connected, change state of peer on
5567 		 * discon ind event and send discon ind pdu
5568 		 * to module above it
5569 		 */
5570 
5571 		ASSERT(tep->te_ser == peer_tep->te_ser);
5572 		if (IS_COTSORD(peer_tep) &&
5573 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5574 		    peer_tep->te_state == TS_DATA_XFER)) {
5575 			/*
5576 			 * send ordrel ind
5577 			 */
5578 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5579 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5580 				peer_tep->te_state,
5581 				NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5582 			d_mp = tl_ordrel_ind_alloc();
5583 			if (! d_mp) {
5584 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5585 				    SL_TRACE|SL_ERROR,
5586 				    "tl_co_unconnect:connected:"
5587 				    "allocb failure"));
5588 				/*
5589 				 * Continue with cleaning up peer as
5590 				 * this side may go away with the close
5591 				 */
5592 				TL_QENABLE(peer_tep);
5593 				goto discon_peer;
5594 			}
5595 			peer_tep->te_state =
5596 				NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5597 
5598 			putnext(peer_tep->te_rq, d_mp);
5599 			/*
5600 			 * Handle flow control case.  This will generate
5601 			 * a t_discon_ind message with reason 0 if there
5602 			 * is data queued on the write side.
5603 			 */
5604 			TL_QENABLE(peer_tep);
5605 		} else if (IS_COTSORD(peer_tep) &&
5606 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5607 			/*
5608 			 * Sent an ordrel_ind. We send a discon with
5609 			 * with error 0 to inform that the peer is gone.
5610 			 */
5611 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5612 				SL_TRACE|SL_ERROR,
5613 				"tl_co_unconnect: discon in state %d",
5614 				tep->te_state));
5615 			tl_discon_ind(peer_tep, 0);
5616 		} else {
5617 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5618 				SL_TRACE|SL_ERROR,
5619 				"tl_co_unconnect: state %d", tep->te_state));
5620 			tl_discon_ind(peer_tep, ECONNRESET);
5621 		}
5622 
5623 discon_peer:
5624 		/*
5625 		 * Disconnect cross-pointers only for close
5626 		 */
5627 		if (tep->te_closing) {
5628 			peer_tep = tep->te_conp;
5629 			TL_REMOVE_PEER(peer_tep->te_conp);
5630 			TL_REMOVE_PEER(tep->te_conp);
5631 		}
5632 	}
5633 }
5634 
5635 /*
5636  * Note: The following routine does not recover from allocb()
5637  * failures
5638  * The reason should be from the <sys/errno.h> space.
5639  */
5640 static void
5641 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5642 {
5643 	mblk_t *d_mp;
5644 
5645 	if (tep->te_closing)
5646 		return;
5647 
5648 	/*
5649 	 * flush the queues.
5650 	 */
5651 	flushq(tep->te_rq, FLUSHDATA);
5652 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5653 
5654 	/*
5655 	 * send discon ind
5656 	 */
5657 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5658 	if (! d_mp) {
5659 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5660 			"tl_discon_ind:allocb failure"));
5661 		return;
5662 	}
5663 	tep->te_state = TS_IDLE;
5664 	putnext(tep->te_rq, d_mp);
5665 }
5666 
5667 /*
5668  * Note: The following routine does not recover from allocb()
5669  * failures
5670  * The reason should be from the <sys/errno.h> space.
5671  */
5672 static mblk_t *
5673 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5674 {
5675 	mblk_t *mp;
5676 	struct T_discon_ind *tdi;
5677 
5678 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5679 		DB_TYPE(mp) = M_PROTO;
5680 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5681 		tdi = (struct T_discon_ind *)mp->b_rptr;
5682 		tdi->PRIM_type = T_DISCON_IND;
5683 		tdi->DISCON_reason = reason;
5684 		tdi->SEQ_number = seqnum;
5685 	}
5686 	return (mp);
5687 }
5688 
5689 
5690 /*
5691  * Note: The following routine does not recover from allocb()
5692  * failures
5693  */
5694 static mblk_t *
5695 tl_ordrel_ind_alloc(void)
5696 {
5697 	mblk_t *mp;
5698 	struct T_ordrel_ind *toi;
5699 
5700 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5701 		DB_TYPE(mp) = M_PROTO;
5702 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5703 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5704 		toi->PRIM_type = T_ORDREL_IND;
5705 	}
5706 	return (mp);
5707 }
5708 
5709 
5710 /*
5711  * Lookup the seqno in the list of queued connections.
5712  */
5713 static tl_icon_t *
5714 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5715 {
5716 	list_t *l = &tep->te_iconp;
5717 	tl_icon_t *tip = list_head(l);
5718 
5719 	ASSERT(seqno != 0);
5720 
5721 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5722 		;
5723 
5724 	return (tip);
5725 }
5726 
5727 /*
5728  * Queue data for a given T_CONN_IND while verifying that redundant
5729  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5730  * Used when the originator of the connection closes.
5731  */
5732 static void
5733 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5734 {
5735 	tl_icon_t		*tip;
5736 	mblk_t			**mpp, *mp;
5737 	int			prim, nprim;
5738 
5739 	if (nmp->b_datap->db_type == M_PROTO)
5740 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5741 	else
5742 		nprim = -1;	/* M_DATA */
5743 
5744 	tip = tl_icon_find(tep, seqno);
5745 	if (tip == NULL) {
5746 		freemsg(nmp);
5747 		return;
5748 	}
5749 
5750 	ASSERT(tip->ti_seqno != 0);
5751 	mpp = &tip->ti_mp;
5752 	while (*mpp != NULL) {
5753 		mp = *mpp;
5754 
5755 		if (mp->b_datap->db_type == M_PROTO)
5756 			prim = ((union T_primitives *)mp->b_rptr)->type;
5757 		else
5758 			prim = -1;	/* M_DATA */
5759 
5760 		/*
5761 		 * Allow nothing after a T_DISCON_IND
5762 		 */
5763 		if (prim == T_DISCON_IND) {
5764 			freemsg(nmp);
5765 			return;
5766 		}
5767 		/*
5768 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5769 		 */
5770 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5771 			freemsg(nmp);
5772 			return;
5773 		}
5774 		mpp = &(mp->b_next);
5775 	}
5776 	*mpp = nmp;
5777 }
5778 
5779 /*
5780  * Verify if a certain TPI primitive exists on the connind queue.
5781  * Use prim -1 for M_DATA.
5782  * Return non-zero if found.
5783  */
5784 static boolean_t
5785 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5786 {
5787 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5788 	boolean_t found = B_FALSE;
5789 
5790 	if (tip != NULL) {
5791 		mblk_t *mp;
5792 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5793 			found = (DB_TYPE(mp) == M_PROTO &&
5794 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5795 		}
5796 	}
5797 	return (found);
5798 }
5799 
5800 /*
5801  * Send the b_next mblk chain that has accumulated before the connection
5802  * was accepted. Perform the necessary state transitions.
5803  */
5804 static void
5805 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5806 {
5807 	mblk_t			*mp;
5808 	union T_primitives	*primp;
5809 
5810 	if (tep->te_closing) {
5811 		tl_icon_freemsgs(mpp);
5812 		return;
5813 	}
5814 
5815 	ASSERT(tep->te_state == TS_DATA_XFER);
5816 	ASSERT(tep->te_rq->q_first == NULL);
5817 
5818 	while ((mp = *mpp) != NULL) {
5819 		*mpp = mp->b_next;
5820 		mp->b_next = NULL;
5821 
5822 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5823 		switch (DB_TYPE(mp)) {
5824 		default:
5825 			freemsg(mp);
5826 			break;
5827 		case M_DATA:
5828 			putnext(tep->te_rq, mp);
5829 			break;
5830 		case M_PROTO:
5831 			primp = (union T_primitives *)mp->b_rptr;
5832 			switch (primp->type) {
5833 			case T_UNITDATA_IND:
5834 			case T_DATA_IND:
5835 			case T_OPTDATA_IND:
5836 			case T_EXDATA_IND:
5837 				putnext(tep->te_rq, mp);
5838 				break;
5839 			case T_ORDREL_IND:
5840 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5841 							tep->te_state);
5842 				putnext(tep->te_rq, mp);
5843 				break;
5844 			case T_DISCON_IND:
5845 				tep->te_state = TS_IDLE;
5846 				putnext(tep->te_rq, mp);
5847 				break;
5848 			default:
5849 #ifdef DEBUG
5850 				cmn_err(CE_PANIC,
5851 					"tl_icon_sendmsgs: unknown primitive");
5852 #endif /* DEBUG */
5853 				freemsg(mp);
5854 				break;
5855 			}
5856 			break;
5857 		}
5858 	}
5859 }
5860 
5861 /*
5862  * Free the b_next mblk chain that has accumulated before the connection
5863  * was accepted.
5864  */
5865 static void
5866 tl_icon_freemsgs(mblk_t **mpp)
5867 {
5868 	mblk_t *mp;
5869 
5870 	while ((mp = *mpp) != NULL) {
5871 		*mpp = mp->b_next;
5872 		mp->b_next = NULL;
5873 		freemsg(mp);
5874 	}
5875 }
5876 
5877 /*
5878  * Send M_ERROR
5879  * Note: assumes caller ensured enough space in mp or enough
5880  *	memory available. Does not attempt recovery from allocb()
5881  *	failures
5882  */
5883 
5884 static void
5885 tl_merror(queue_t *wq, mblk_t *mp, int error)
5886 {
5887 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5888 
5889 	if (tep->te_closing) {
5890 		freemsg(mp);
5891 		return;
5892 	}
5893 
5894 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5895 		    SL_TRACE|SL_ERROR,
5896 		    "tl_merror: tep=%p, err=%d", tep, error));
5897 
5898 	/*
5899 	 * flush all messages on queue. we are shutting
5900 	 * the stream down on fatal error
5901 	 */
5902 	flushq(wq, FLUSHALL);
5903 	if (IS_COTS(tep)) {
5904 		/* connection oriented - unconnect endpoints */
5905 		tl_co_unconnect(tep);
5906 	}
5907 	if (mp->b_cont) {
5908 		freemsg(mp->b_cont);
5909 		mp->b_cont = NULL;
5910 	}
5911 
5912 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5913 		freemsg(mp);
5914 		mp = allocb(1, BPRI_HI);
5915 		if (!mp) {
5916 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5917 				SL_TRACE|SL_ERROR,
5918 				"tl_merror:M_PROTO: out of memory"));
5919 			return;
5920 		}
5921 	}
5922 	if (mp) {
5923 		DB_TYPE(mp) = M_ERROR;
5924 		mp->b_rptr = DB_BASE(mp);
5925 		*mp->b_rptr = (char)error;
5926 		mp->b_wptr = mp->b_rptr + sizeof (char);
5927 		qreply(wq, mp);
5928 	} else {
5929 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
5930 	}
5931 }
5932 
5933 static void
5934 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5935 {
5936 	if (flag & TL_SETCRED) {
5937 		struct opthdr *opt = (struct opthdr *)buf;
5938 		tl_credopt_t *tlcred;
5939 
5940 		opt->level = TL_PROT_LEVEL;
5941 		opt->name = TL_OPT_PEER_CRED;
5942 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5943 
5944 		tlcred = (tl_credopt_t *)(opt + 1);
5945 		tlcred->tc_uid = crgetuid(cr);
5946 		tlcred->tc_gid = crgetgid(cr);
5947 		tlcred->tc_ruid = crgetruid(cr);
5948 		tlcred->tc_rgid = crgetrgid(cr);
5949 		tlcred->tc_suid = crgetsuid(cr);
5950 		tlcred->tc_sgid = crgetsgid(cr);
5951 		tlcred->tc_ngroups = crgetngroups(cr);
5952 	} else if (flag & TL_SETUCRED) {
5953 		struct opthdr *opt = (struct opthdr *)buf;
5954 
5955 		opt->level = TL_PROT_LEVEL;
5956 		opt->name = TL_OPT_PEER_UCRED;
5957 		opt->len = (t_uscalar_t)OPTLEN(ucredsize);
5958 
5959 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
5960 	} else {
5961 		struct T_opthdr *topt = (struct T_opthdr *)buf;
5962 		ASSERT(flag & TL_SOCKUCRED);
5963 
5964 		topt->level = SOL_SOCKET;
5965 		topt->name = SCM_UCRED;
5966 		topt->len = ucredsize + sizeof (*topt);
5967 		topt->status = 0;
5968 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
5969 	}
5970 }
5971 
5972 /* ARGSUSED */
5973 static int
5974 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5975 {
5976 	/* no default value processed in protocol specific code currently */
5977 	return (-1);
5978 }
5979 
5980 /* ARGSUSED */
5981 static int
5982 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5983 {
5984 	int len;
5985 	tl_endpt_t *tep;
5986 	int *valp;
5987 
5988 	tep = (tl_endpt_t *)wq->q_ptr;
5989 
5990 	len = 0;
5991 
5992 	/*
5993 	 * Assumes: option level and name sanity check done elsewhere
5994 	 */
5995 
5996 	switch (level) {
5997 	case SOL_SOCKET:
5998 		if (! IS_SOCKET(tep))
5999 			break;
6000 		switch (name) {
6001 		case SO_RECVUCRED:
6002 			len = sizeof (int);
6003 			valp = (int *)ptr;
6004 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6005 			break;
6006 		default:
6007 			break;
6008 		}
6009 		break;
6010 	case TL_PROT_LEVEL:
6011 		switch (name) {
6012 		case TL_OPT_PEER_CRED:
6013 		case TL_OPT_PEER_UCRED:
6014 			/*
6015 			 * option not supposed to retrieved directly
6016 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6017 			 * when some internal flags set by other options
6018 			 * Direct retrieval always designed to fail(ignored)
6019 			 * for this option.
6020 			 */
6021 			break;
6022 		}
6023 	}
6024 	return (len);
6025 }
6026 
6027 /* ARGSUSED */
6028 static int
6029 tl_set_opt(
6030 	queue_t		*wq,
6031 	uint_t		mgmt_flags,
6032 	int		level,
6033 	int		name,
6034 	uint_t		inlen,
6035 	uchar_t		*invalp,
6036 	uint_t		*outlenp,
6037 	uchar_t		*outvalp,
6038 	void		*thisdg_attrs,
6039 	cred_t		*cr,
6040 	mblk_t		*mblk)
6041 {
6042 	int error;
6043 	tl_endpt_t *tep;
6044 
6045 	tep = (tl_endpt_t *)wq->q_ptr;
6046 
6047 	error = 0;		/* NOERROR */
6048 
6049 	/*
6050 	 * Assumes: option level and name sanity checks done elsewhere
6051 	 */
6052 
6053 	switch (level) {
6054 	case SOL_SOCKET:
6055 		if (! IS_SOCKET(tep)) {
6056 			error = EINVAL;
6057 			break;
6058 		}
6059 		/*
6060 		 * TBD: fill in other AF_UNIX socket options and then stop
6061 		 * returning error.
6062 		 */
6063 		switch (name) {
6064 		case SO_RECVUCRED:
6065 			/*
6066 			 * We only support this for datagram sockets;
6067 			 * getpeerucred handles the connection oriented
6068 			 * transports.
6069 			 */
6070 			if (! IS_CLTS(tep)) {
6071 				error = EINVAL;
6072 				break;
6073 			}
6074 			if (*(int *)invalp == 0)
6075 				tep->te_flag &= ~TL_SOCKUCRED;
6076 			else
6077 				tep->te_flag |= TL_SOCKUCRED;
6078 			break;
6079 		default:
6080 			error = EINVAL;
6081 			break;
6082 		}
6083 		break;
6084 	case TL_PROT_LEVEL:
6085 		switch (name) {
6086 		case TL_OPT_PEER_CRED:
6087 		case TL_OPT_PEER_UCRED:
6088 			/*
6089 			 * option not supposed to be set directly
6090 			 * Its value in initialized for each endpoint at
6091 			 * driver open time.
6092 			 * Direct setting always designed to fail for this
6093 			 * option.
6094 			 */
6095 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6096 				    SL_TRACE|SL_ERROR,
6097 				    "tl_set_opt: option is not supported"));
6098 			error = EPROTO;
6099 			break;
6100 		}
6101 	}
6102 	return (error);
6103 }
6104 
6105 
6106 static void
6107 tl_timer(void *arg)
6108 {
6109 	queue_t *wq = arg;
6110 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6111 
6112 	ASSERT(tep);
6113 
6114 	tep->te_timoutid = 0;
6115 
6116 	enableok(wq);
6117 	/*
6118 	 * Note: can call wsrv directly here and save context switch
6119 	 * Consider change when qtimeout (not timeout) is active
6120 	 */
6121 	qenable(wq);
6122 }
6123 
6124 static void
6125 tl_buffer(void *arg)
6126 {
6127 	queue_t *wq = arg;
6128 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6129 
6130 	ASSERT(tep);
6131 
6132 	tep->te_bufcid = 0;
6133 	tep->te_nowsrv = B_FALSE;
6134 
6135 	enableok(wq);
6136 	/*
6137 	 *  Note: can call wsrv directly here and save context switch
6138 	 * Consider change when qbufcall (not bufcall) is active
6139 	 */
6140 	qenable(wq);
6141 }
6142 
6143 static void
6144 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6145 {
6146 	tl_endpt_t *tep;
6147 
6148 	tep = (tl_endpt_t *)wq->q_ptr;
6149 
6150 	if (tep->te_closing) {
6151 		freemsg(mp);
6152 		return;
6153 	}
6154 	noenable(wq);
6155 
6156 	(void) insq(wq, wq->q_first, mp);
6157 
6158 	if (tep->te_bufcid || tep->te_timoutid) {
6159 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6160 			"tl_memrecover:recover %p pending", (void *)wq));
6161 		return;
6162 	}
6163 
6164 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6165 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6166 		    drv_usectohz(TL_BUFWAIT));
6167 	}
6168 }
6169 
6170 static void
6171 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6172 {
6173 	ASSERT(tip->ti_seqno != 0);
6174 
6175 	if (tip->ti_mp != NULL) {
6176 		tl_icon_freemsgs(&tip->ti_mp);
6177 		tip->ti_mp = NULL;
6178 	}
6179 	if (tip->ti_tep != NULL) {
6180 		tl_refrele(tip->ti_tep);
6181 		tip->ti_tep = NULL;
6182 	}
6183 	list_remove(&tep->te_iconp, tip);
6184 	kmem_free(tip, sizeof (tl_icon_t));
6185 	tep->te_nicon--;
6186 }
6187 
6188 /*
6189  * Remove address from address hash.
6190  */
6191 static void
6192 tl_addr_unbind(tl_endpt_t *tep)
6193 {
6194 	tl_endpt_t *elp;
6195 
6196 	if (tep->te_flag & TL_ADDRHASHED) {
6197 		if (IS_SOCKET(tep)) {
6198 			(void) mod_hash_remove(tep->te_addrhash,
6199 			    (mod_hash_key_t)tep->te_vp,
6200 			    (mod_hash_val_t *)&elp);
6201 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6202 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6203 		} else {
6204 			(void) mod_hash_remove(tep->te_addrhash,
6205 			    (mod_hash_key_t)&tep->te_ap,
6206 			    (mod_hash_val_t *)&elp);
6207 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6208 			tep->te_alen = -1;
6209 			tep->te_abuf = NULL;
6210 		}
6211 		tep->te_flag &= ~TL_ADDRHASHED;
6212 	}
6213 }
6214