xref: /titanic_51/usr/src/uts/common/io/tl.c (revision 5a7763bf3e9db4cfe6cb523b096cb74af71e3793)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Multithreaded STREAMS Local Transport Provider.
30  *
31  * OVERVIEW
32  * ========
33  *
34  * This driver provides TLI as well as socket semantics.  It provides
35  * connectionless, connection oriented, and connection oriented with orderly
36  * release transports for TLI and sockets. Each transport type has separate name
37  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
38  * this removes any name space conflicts when binding to socket style transport
39  * addresses.
40  *
41  * NOTE: There is one exception: Socket ticots and ticotsord transports share
42  * the same namespace. In fact, sockets always use ticotsord type transport.
43  *
44  * The driver mode is specified during open() by the minor number used for
45  * open.
46  *
47  *  The sockets in addition have the following semantic differences:
48  *  No support for passing up credentials (TL_SET[U]CRED).
49  *
50  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
51  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
52  *	T_OPTDATA_IND.
53  *
54  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
55  *	a T_CONN_RES is received from the acceptor. This means that a socket
56  *	connect will complete before the peer has called accept.
57  *
58  *
59  * MULTITHREADING
60  * ==============
61  *
62  * The driver does not use STREAMS protection mechanisms. Instead it uses a
63  * generic "serializer" abstraction. Most of the operations are executed behind
64  * the serializer and are, essentially single-threaded. All functions executed
65  * behind the same serializer are strictly serialized. So if one thread calls
66  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
67  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
68  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
69  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
70  * same time.
71  *
72  * Connectionless transport use a single serializer per transport type (one for
73  * TLI and one for sockets. Connection-oriented transports use finer-grained
74  * serializers.
75  *
76  * All COTS-type endpoints start their life with private serializers. During
77  * connection request processing the endpoint serializer is switched to the
78  * listener's serializer and the rest of T_CONN_REQ processing is done on the
79  * listener serializer. During T_CONN_RES processing the eager serializer is
80  * switched from listener to acceptor serializer and after that point all
81  * processing for eager and acceptor happens on this serializer. To avoid races
82  * with endpoint closes while its serializer may be changing closes are blocked
83  * while serializers are manipulated.
84  *
85  * References accounting
86  * ---------------------
87  *
88  * Endpoints are reference counted and freed when the last reference is
89  * dropped. Functions within the serializer may access an endpoint state even
90  * after an endpoint closed. The te_closing being set on the endpoint indicates
91  * that the endpoint entered its close routine.
92  *
93  * One reference is held for each opened endpoint instance. The reference
94  * counter is incremented when the endpoint is linked to another endpoint and
95  * decremented when the link disappears. It is also incremented when the
96  * endpoint is found by the hash table lookup. This increment is atomic with the
97  * lookup itself and happens while the hash table read lock is held.
98  *
99  * Close synchronization
100  * ---------------------
101  *
102  * During close the endpoint as marked as closing using te_closing flag. It is
103  * usually enough to check for te_closing flag since all other state changes
104  * happen after this flag is set and the close entered serializer. Immediately
105  * after setting te_closing flag tl_close() enters serializer and waits until
106  * the callback finishes. This allows all functions called within serializer to
107  * simply check te_closing without any locks.
108  *
109  * Serializer management.
110  * ---------------------
111  *
112  * For COTS transports serializers are created when the endpoint is constructed
113  * and destroyed when the endpoint is destructed. CLTS transports use global
114  * serializers - one for sockets and one for TLI.
115  *
116  * COTS serializers have separate reference counts to deal with several
117  * endpoints sharing the same serializer. There is a subtle problem related to
118  * the serializer destruction. The serializer should never be destroyed by any
119  * function executed inside serializer. This means that close has to wait till
120  * all serializer activity for this endpoint is finished before it can drop the
121  * last reference on the endpoint (which may as well free the serializer).  This
122  * is only relevant for COTS transports which manage serializers
123  * dynamically. For CLTS transports close may complete without waiting for all
124  * serializer activity to finish since serializer is only destroyed at driver
125  * detach time.
126  *
127  * COTS endpoints keep track of the number of outstanding requests on the
128  * serializer for the endpoint. The code handling accept() avoids changing
129  * client serializer if it has any pending messages on the serializer and
130  * instead moves acceptor to listener's serializer.
131  *
132  *
133  * Use of hash tables
134  * ------------------
135  *
136  * The driver uses modhash hash table implementation. Each transport uses two
137  * hash tables - one for finding endpoints by acceptor ID and another one for
138  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
139  * pair of hash tables since sockets only use TICOTSORD.
140  *
141  * All hash tables lookups increment a reference count for returned endpoints,
142  * so we may safely check the endpoint state even when the endpoint is removed
143  * from the hash by another thread immediately after it is found.
144  *
145  *
146  * CLOSE processing
147  * ================
148  *
149  * The driver enters serializer twice on close(). The close sequence is the
150  * following:
151  *
152  * 1) Wait until closing is safe (te_closewait becomes zero)
153  *	This step is needed to prevent close during serializer switches. In most
154  *	cases (close happening after connection establishment) te_closewait is
155  *	zero.
156  * 1) Set te_closing.
157  * 2) Call tl_close_ser() within serializer and wait for it to complete.
158  *
159  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
160  *	It also needs to clear write-side q_next pointers - this should be done
161  *	before qprocsoff().
162  *
163  *    This synchronous serializer entry during close is needed to ensure that
164  *    the queue is valid everywhere inside the serializer.
165  *
166  *    Note that in many cases close will execute tl_close_ser() synchronously,
167  *    so it will not wait at all.
168  *
169  * 3) Calls qprocsoff().
170  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
171  *	complete (for COTS transports). For CLTS transport there is no wait.
172  *
173  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
174  *	close if there is any.
175  *
176  *    Note that in most cases close will enter te_close_ser_finish()
177  *    synchronously and will not wait at all.
178  *
179  *
180  * Flow Control
181  * ============
182  *
183  * The driver implements both read and write side service routines. No one calls
184  * putq() on the read queue. The read side service routine tl_rsrv() is called
185  * when the read side stream is back-enabled. It enters serializer synchronously
186  * (waits till serializer processing is complete). Within serializer it
187  * back-enables all endpoints blocked by the queue for connection-less
188  * transports and enables write side service processing for the peer for
189  * connection-oriented transports.
190  *
191  * Read and write side service routines use special mblk_sized space in the
192  * endpoint structure to enter perimeter.
193  *
194  * Write-side flow control
195  * -----------------------
196  *
197  * Write side flow control is a bit tricky. The driver needs to deal with two
198  * message queues - the explicit STREAMS message queue maintained by
199  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
200  * queues should be synchronized to preserve message ordering and should
201  * maintain a single order determined by the order in which messages enter
202  * tl_wput(). In order to maintain the ordering between these two queues the
203  * STREAMS queue is only manipulated within the serializer, so the ordering is
204  * provided by the serializer.
205  *
206  * Functions called from the tl_wsrv() sometimes may call putbq(). To
207  * immediately stop any further processing of the STREAMS message queues the
208  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
209  * side service processing stops when the flag is set.
210  *
211  * The tl_wsrv() function enters serializer synchronously and waits for it to
212  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
213  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
214  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
215  * always bounded by the amount of messages on the STREAMS queue at the time
216  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
217  * queue from another serialized entry which can't happen in parallel. This
218  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
219  * of it draining forever while writer places new messages on the STREAMS
220  * queue).
221  *
222  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
223  *
224  *
225  * Unix Domain Sockets
226  * ===================
227  *
228  * The driver knows the structure of Unix Domain sockets addresses and treats
229  * them differently from generic TLI addresses. For sockets implicit binds are
230  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
231  * instead of using address length of zero. Explicit binds specify
232  * SOU_MAGIC_EXPLICIT as magic.
233  *
234  * For implicit binds we always use minor number as soua_vp part of the address
235  * and avoid any hash table lookups. This saves two hash tables lookups per
236  * anonymous bind.
237  *
238  * For explicit address we hash the vnode pointer instead of hashing the
239  * full-scale address+zone+length. Hashing by pointer is more efficient then
240  * hashing by the full address.
241  *
242  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
243  * tep structure, so it should be never freed.
244  *
245  * Also for sockets the driver always uses minor number as acceptor id.
246  *
247  * TPI VIOLATIONS
248  * --------------
249  *
250  * This driver violates TPI in several respects for Unix Domain Sockets:
251  *
252  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
253  *	is requested and the endpoint is already in use. There is no point in
254  *	generating an unused address since this address will be rejected by
255  *	sockfs anyway. For implicit binds it always generates a new address
256  *	(sets soua_vp to its minor number).
257  *
258  * 2) It always uses minor number as acceptor ID and never uses queue
259  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
260  *	message and they do not use the queue pointer.
261  *
262  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
263  *	followed by listen(). The listen() should be issued with non-zero
264  *	backlog, so sotpi_listen() issues unbind request followed by bind
265  *	request to the same address but with a non-zero qlen value. Both
266  *	tl_bind() and tl_unbind() require write lock on the hash table to
267  *	insert/remove the address. The driver does not remove the address from
268  *	the hash for endpoints that are bound to the explicit address and have
269  *	backlog of zero. During T_BIND_REQ processing if the address requested
270  *	is equal to the address the endpoint already has it updates the backlog
271  *	without reinserting the address in the hash table. This optimization
272  *	avoids two hash table updates for each listener created. It always
273  *	avoids the problem of a "stolen" address when another listener may use
274  *	the same address between the unbind and bind and suddenly listen() fails
275  *	because address is in use even though the bind() succeeded.
276  *
277  *
278  * CONNECTIONLESS TRANSPORTS
279  * =========================
280  *
281  * Connectionless transports all share the same serializer (one for TLI and one
282  * for Sockets). Functions executing behind serializer can check or modify state
283  * of any endpoint.
284  *
285  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
286  * te_lastep field. The next time X talks to some address A it checks whether A
287  * is the same as Y's address and if it is there is no need to lookup Y. If the
288  * address is different or the state of Y is not appropriate (e.g. closed or not
289  * idle) X does a lookup using tl_find_peer() and caches the new address.
290  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
291  * on the endpoint found.
292  *
293  * During close of endpoint Y it doesn't try to remove itself from other
294  * endpoints caches. They will detect that Y is gone and will search the peer
295  * endpoint again.
296  *
297  * Flow Control Handling.
298  * ----------------------
299  *
300  * Each connectionless endpoint keeps a list of endpoints which are
301  * flow-controlled by its queue. It also keeps a pointer to the queue which
302  * flow-controls itself.  Whenever flow control releases for endpoint X it
303  * enables all queues from the list. During close it also back-enables everyone
304  * in the list. If X is flow-controlled when it is closing it removes it from
305  * the peers list.
306  *
307  * DATA STRUCTURES
308  * ===============
309  *
310  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
311  * endpoint state. For connection-oriented transports it has a keeps a list
312  * of pending connections (tl_icon_t). For connectionless transports it keeps a
313  * list of endpoints flow controlled by this one.
314  *
315  * Each transport type is represented by a per-transport data structure
316  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
317  * endpoint address hash tables for each transport. It also contains pointer to
318  * transport serializer for connectionless transports.
319  *
320  * Each endpoint keeps a link to its transport structure, so the code can find
321  * all per-transport information quickly.
322  */
323 
324 #include	<sys/types.h>
325 #include	<sys/inttypes.h>
326 #include	<sys/stream.h>
327 #include	<sys/stropts.h>
328 #define	_SUN_TPI_VERSION 2
329 #include	<sys/tihdr.h>
330 #include	<sys/strlog.h>
331 #include	<sys/debug.h>
332 #include	<sys/cred.h>
333 #include	<sys/errno.h>
334 #include	<sys/kmem.h>
335 #include	<sys/id_space.h>
336 #include	<sys/modhash.h>
337 #include	<sys/mkdev.h>
338 #include	<sys/tl.h>
339 #include	<sys/stat.h>
340 #include	<sys/conf.h>
341 #include	<sys/modctl.h>
342 #include	<sys/strsun.h>
343 #include	<sys/socket.h>
344 #include	<sys/socketvar.h>
345 #include	<sys/sysmacros.h>
346 #include	<sys/xti_xtiopt.h>
347 #include	<sys/ddi.h>
348 #include	<sys/sunddi.h>
349 #include	<sys/zone.h>
350 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
351 #include	<inet/optcom.h>
352 #include	<sys/strsubr.h>
353 #include	<sys/ucred.h>
354 #include	<sys/suntpi.h>
355 #include	<sys/list.h>
356 #include	<sys/serializer.h>
357 
358 /*
359  * TBD List
360  * 14 Eliminate state changes through table
361  * 16. AF_UNIX socket options
362  * 17. connect() for ticlts
363  * 18. support for "netstat" to show AF_UNIX plus TLI local
364  *	transport connections
365  * 21. sanity check to flushing on sending M_ERROR
366  */
367 
368 /*
369  * CONSTANT DECLARATIONS
370  * --------------------
371  */
372 
373 /*
374  * Local declarations
375  */
376 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
377 
378 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
379 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
380 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
381 /*
382  * Hash tables size.
383  */
384 #define	TL_HASH_SIZE 311
385 
386 /*
387  * Definitions for module_info
388  */
389 #define		TL_ID		(104)		/* module ID number */
390 #define		TL_NAME		"tl"		/* module name */
391 #define		TL_MINPSZ	(0)		/* min packet size */
392 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
393 #define		TL_HIWAT	(16*1024)	/* hi water mark */
394 #define		TL_LOWAT	(256)		/* lo water mark */
395 /*
396  * Definition of minor numbers/modes for new transport provider modes.
397  * We view the socket use as a separate mode to get a separate name space.
398  */
399 #define		TL_TICOTS	0	/* connection oriented transport */
400 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
401 #define		TL_TICLTS 	2	/* connectionless transport */
402 #define		TL_UNUSED	3
403 #define		TL_SOCKET	4	/* Socket */
404 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
405 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
406 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
407 
408 #define		TL_MINOR_MASK	0x7
409 #define		TL_MINOR_START	(TL_TICLTS + 1)
410 
411 /*
412  * LOCAL MACROS
413  */
414 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
415 
416 /*
417  * EXTERNAL VARIABLE DECLARATIONS
418  * -----------------------------
419  */
420 /*
421  * state table defined in the OS space.c
422  */
423 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
424 
425 /*
426  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
427  */
428 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
429 static int tl_close(queue_t *, int, cred_t *);
430 static void tl_wput(queue_t *, mblk_t *);
431 static void tl_wsrv(queue_t *);
432 static void tl_rsrv(queue_t *);
433 
434 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
435 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
436 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
437 
438 
439 /*
440  * GLOBAL DATA STRUCTURES AND VARIABLES
441  * -----------------------------------
442  */
443 
444 /*
445  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
446  * For now, we only manage the SO_RECVUCRED option but we also have
447  * harmless dummy options to make things work with some common code we access.
448  */
449 opdes_t	tl_opt_arr[] = {
450 	/* The SO_TYPE is needed for the hack below */
451 	{
452 		SO_TYPE,
453 		SOL_SOCKET,
454 		OA_R,
455 		OA_R,
456 		OP_NP,
457 		OP_PASSNEXT,
458 		sizeof (t_scalar_t),
459 		0
460 	},
461 	{
462 		SO_RECVUCRED,
463 		SOL_SOCKET,
464 		OA_RW,
465 		OA_RW,
466 		OP_NP,
467 		OP_PASSNEXT,
468 		sizeof (int),
469 		0
470 	}
471 };
472 
473 /*
474  * Table of all supported levels
475  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
476  * any supported options so we need this info separately.
477  *
478  * This is needed only for topmost tpi providers.
479  */
480 optlevel_t	tl_valid_levels_arr[] = {
481 	XTI_GENERIC,
482 	SOL_SOCKET,
483 	TL_PROT_LEVEL
484 };
485 
486 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
487 /*
488  * Current upper bound on the amount of space needed to return all options.
489  * Additional options with data size of sizeof(long) are handled automatically.
490  * Others need hand job.
491  */
492 #define	TL_MAX_OPT_BUF_LEN						\
493 		((A_CNT(tl_opt_arr) << 2) +				\
494 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
495 		+ 64 + sizeof (struct T_optmgmt_ack))
496 
497 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
498 
499 /*
500  *	transport addr structure
501  */
502 typedef struct tl_addr {
503 	zoneid_t	ta_zoneid;		/* Zone scope of address */
504 	t_scalar_t	ta_alen;		/* length of abuf */
505 	void		*ta_abuf;		/* the addr itself */
506 } tl_addr_t;
507 
508 /*
509  * Refcounted version of serializer.
510  */
511 typedef struct tl_serializer {
512 	uint_t		ts_refcnt;
513 	serializer_t	*ts_serializer;
514 } tl_serializer_t;
515 
516 /*
517  * Each transport type has a separate state.
518  * Per-transport state.
519  */
520 typedef struct tl_transport_state {
521 	char		*tr_name;
522 	minor_t		tr_minor;
523 	uint32_t	tr_defaddr;
524 	mod_hash_t	*tr_ai_hash;
525 	mod_hash_t	*tr_addr_hash;
526 	tl_serializer_t	*tr_serializer;
527 } tl_transport_state_t;
528 
529 #define	TL_DFADDR 0x1000
530 
531 static tl_transport_state_t tl_transports[] = {
532 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
533 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
534 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
535 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
536 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
537 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
538 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
539 };
540 
541 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
542 
543 struct tl_endpt;
544 typedef struct tl_endpt tl_endpt_t;
545 
546 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
547 
548 /*
549  * Data structure used to represent pending connects.
550  * Records enough information so that the connecting peer can close
551  * before the connection gets accepted.
552  */
553 typedef struct tl_icon {
554 	list_node_t	ti_node;
555 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
556 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
557 	t_scalar_t	ti_seqno;	/* Sequence number */
558 } tl_icon_t;
559 
560 typedef struct so_ux_addr soux_addr_t;
561 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
562 
563 /*
564  * Maximum number of unaccepted connection indications allowed per listener.
565  */
566 #define	TL_MAXQLEN	4096
567 int tl_maxqlen = TL_MAXQLEN;
568 
569 /*
570  *	transport endpoint structure
571  */
572 struct tl_endpt {
573 	queue_t		*te_rq;		/* stream read queue */
574 	queue_t		*te_wq;		/* stream write queue */
575 	uint32_t	te_refcnt;
576 	int32_t 	te_state;	/* TPI state of endpoint */
577 	minor_t		te_minor;	/* minor number */
578 #define	te_seqno	te_minor
579 	uint_t		te_flag;	/* flag field */
580 	boolean_t	te_nowsrv;
581 	tl_serializer_t	*te_ser;	/* Serializer to use */
582 #define	te_serializer	te_ser->ts_serializer
583 
584 	soux_addr_t	te_uxaddr;	/* Socket address */
585 #define	te_magic	te_uxaddr.soua_magic
586 #define	te_vp		te_uxaddr.soua_vp
587 	tl_addr_t	te_ap;		/* addr bound to this endpt */
588 #define	te_zoneid te_ap.ta_zoneid
589 #define	te_alen	te_ap.ta_alen
590 #define	te_abuf	te_ap.ta_abuf
591 
592 	tl_transport_state_t *te_transport;
593 #define	te_addrhash	te_transport->tr_addr_hash
594 #define	te_aihash	te_transport->tr_ai_hash
595 #define	te_defaddr	te_transport->tr_defaddr
596 	cred_t		*te_credp;	/* endpoint user credentials */
597 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
598 
599 	/*
600 	 * State specific for connection-oriented and connectionless transports.
601 	 */
602 	union {
603 		/* Connection-oriented state. */
604 		struct {
605 			t_uscalar_t _te_nicon;	/* count of conn requests */
606 			t_uscalar_t _te_qlen;	/* max conn requests */
607 			tl_endpt_t  *_te_oconp;	/* conn request pending */
608 			tl_endpt_t  *_te_conp;	/* connected endpt */
609 #ifndef _ILP32
610 			void	    *_te_pad;
611 #endif
612 			list_t	_te_iconp;	/* list of conn ind. pending */
613 		} _te_cots_state;
614 		/* Connection-less state. */
615 		struct {
616 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
617 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
618 			list_node_t _te_flows;	/* lists of connections */
619 			list_t  _te_flowlist;	/* Who flowcontrols on me */
620 		} _te_clts_state;
621 	} _te_transport_state;
622 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
623 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
624 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
625 #define	te_conp		_te_transport_state._te_cots_state._te_conp
626 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
627 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
628 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
629 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
630 #define	te_flows	_te_transport_state._te_clts_state._te_flows
631 
632 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
633 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
634 	pid_t		te_cpid;	/* cached pid of endpoint */
635 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
636 	/*
637 	 * Pieces of the endpoint state needed for closing.
638 	 */
639 	kmutex_t	te_closelock;
640 	kcondvar_t	te_closecv;
641 	uint8_t		te_closing;	/* The endpoint started closing */
642 	uint8_t		te_closewait;	/* Wait in close until zero */
643 	mblk_t		te_closemp;	/* for entering serializer on close */
644 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
645 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
646 	kmutex_t	te_srv_lock;
647 	kcondvar_t	te_srv_cv;
648 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
649 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
650 	/*
651 	 * Pieces of the endpoint state needed for serializer transitions.
652 	 */
653 	kmutex_t	te_ser_lock;	/* Protects the count below */
654 	uint_t		te_ser_count;	/* Number of messages on serializer */
655 };
656 
657 /*
658  * Flag values. Lower 4 bits specify that transport used.
659  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
660  * they allow to identify the endpoint more easily.
661  */
662 #define	TL_LISTENER	0x00010	/* the listener endpoint */
663 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
664 #define	TL_EAGER	0x00040	/* connecting endpoint */
665 #define	TL_ACCEPTED	0x00080	/* accepted connection */
666 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
667 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
668 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
669 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
670 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
671 /*
672  * Boolean checks for the endpoint type.
673  */
674 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
675 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
676 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
677 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
678 
679 #define	TLPID(mp, tep)	(DB_CPID(mp) == -1 ? (tep)->te_cpid : DB_CPID(mp))
680 
681 /*
682  * Certain operations are always used together. These macros reduce the chance
683  * of missing a part of a combination.
684  */
685 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
686 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
687 
688 #define	TL_PUTBQ(x, mp) {		\
689 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
690 	(x)->te_nowsrv = B_TRUE;	\
691 	(void) putbq((x)->te_wq, mp);	\
692 }
693 
694 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
695 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
696 
697 /*
698  * STREAMS driver glue data structures.
699  */
700 static	struct	module_info	tl_minfo = {
701 	TL_ID,			/* mi_idnum */
702 	TL_NAME,		/* mi_idname */
703 	TL_MINPSZ,		/* mi_minpsz */
704 	TL_MAXPSZ,		/* mi_maxpsz */
705 	TL_HIWAT,		/* mi_hiwat */
706 	TL_LOWAT		/* mi_lowat */
707 };
708 
709 static	struct	qinit	tl_rinit = {
710 	NULL,			/* qi_putp */
711 	(int (*)())tl_rsrv,	/* qi_srvp */
712 	tl_open,		/* qi_qopen */
713 	tl_close,		/* qi_qclose */
714 	NULL,			/* qi_qadmin */
715 	&tl_minfo,		/* qi_minfo */
716 	NULL			/* qi_mstat */
717 };
718 
719 static	struct	qinit	tl_winit = {
720 	(int (*)())tl_wput,	/* qi_putp */
721 	(int (*)())tl_wsrv,	/* qi_srvp */
722 	NULL,			/* qi_qopen */
723 	NULL,			/* qi_qclose */
724 	NULL,			/* qi_qadmin */
725 	&tl_minfo,		/* qi_minfo */
726 	NULL			/* qi_mstat */
727 };
728 
729 static	struct streamtab	tlinfo = {
730 	&tl_rinit,		/* st_rdinit */
731 	&tl_winit,		/* st_wrinit */
732 	NULL,			/* st_muxrinit */
733 	NULL			/* st_muxwrinit */
734 };
735 
736 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
737     nulldev, tl_info, D_MP, &tlinfo);
738 
739 static struct modldrv modldrv = {
740 	&mod_driverops,		/* Type of module -- pseudo driver here */
741 	"TPI Local Transport (tl) %I%",
742 	&tl_devops,		/* driver ops */
743 };
744 
745 /*
746  * Module linkage information for the kernel.
747  */
748 static struct modlinkage modlinkage = {
749 	MODREV_1,
750 	&modldrv,
751 	NULL
752 };
753 
754 /*
755  * Templates for response to info request
756  * Check sanity of unlimited connect data etc.
757  */
758 
759 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
760 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
761 
762 static struct T_info_ack tl_cots_info_ack =
763 	{
764 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
765 		T_INFINITE,	/* TSDU size */
766 		T_INFINITE,	/* ETSDU size */
767 		T_INFINITE,	/* CDATA_size */
768 		T_INFINITE,	/* DDATA_size */
769 		T_INFINITE,	/* ADDR_size  */
770 		T_INFINITE,	/* OPT_size */
771 		0,		/* TIDU_size - fill at run time */
772 		T_COTS,		/* SERV_type */
773 		-1,		/* CURRENT_state */
774 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
775 	};
776 
777 static struct T_info_ack tl_clts_info_ack =
778 	{
779 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
780 		0,		/* TSDU_size - fill at run time */
781 		-2,		/* ETSDU_size -2 => not supported */
782 		-2,		/* CDATA_size -2 => not supported */
783 		-2,		/* DDATA_size  -2 => not supported */
784 		-1,		/* ADDR_size -1 => unlimited */
785 		-1,		/* OPT_size */
786 		0,		/* TIDU_size - fill at run time */
787 		T_CLTS,		/* SERV_type */
788 		-1,		/* CURRENT_state */
789 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
790 	};
791 
792 /*
793  * private copy of devinfo pointer used in tl_info
794  */
795 static dev_info_t *tl_dip;
796 
797 /*
798  * Endpoints cache.
799  */
800 static kmem_cache_t *tl_cache;
801 /*
802  * Minor number space.
803  */
804 static id_space_t *tl_minors;
805 
806 /*
807  * Default Data Unit size.
808  */
809 static t_scalar_t tl_tidusz;
810 
811 /*
812  * Size of hash tables.
813  */
814 static size_t tl_hash_size = TL_HASH_SIZE;
815 
816 /*
817  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
818  * for sockets.
819  */
820 static int tl_disable_early_connect = 0;
821 static int tl_client_closing_when_accepting;
822 
823 static int tl_serializer_noswitch;
824 
825 /*
826  * LOCAL FUNCTION PROTOTYPES
827  * -------------------------
828  */
829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
830 static void tl_do_proto(mblk_t *, tl_endpt_t *);
831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
834 	t_scalar_t);
835 static void tl_bind(mblk_t *, tl_endpt_t *);
836 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
837 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
838 static void tl_unbind(mblk_t *, tl_endpt_t *);
839 static void tl_optmgmt(queue_t *, mblk_t *);
840 static void tl_conn_req(queue_t *, mblk_t *);
841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
842 static void tl_conn_res(mblk_t *, tl_endpt_t *);
843 static void tl_discon_req(mblk_t *, tl_endpt_t *);
844 static void tl_capability_req(mblk_t *, tl_endpt_t *);
845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
846 static void tl_info_req(mblk_t *, tl_endpt_t *);
847 static void tl_addr_req(mblk_t *, tl_endpt_t *);
848 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
849 static void tl_data(mblk_t  *, tl_endpt_t *);
850 static void tl_exdata(mblk_t *, tl_endpt_t *);
851 static void tl_ordrel(mblk_t *, tl_endpt_t *);
852 static void tl_unitdata(mblk_t *, tl_endpt_t *);
853 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
854 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
855 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
856 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
857 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
858 static void tl_cl_backenable(tl_endpt_t *);
859 static void tl_co_unconnect(tl_endpt_t *);
860 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
861 static void tl_discon_ind(tl_endpt_t *, uint32_t);
862 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
863 static mblk_t *tl_ordrel_ind_alloc(void);
864 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
865 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
866 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
867 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
868 static void tl_icon_freemsgs(mblk_t **);
869 static void tl_merror(queue_t *, mblk_t *, int);
870 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
871 static int tl_default_opt(queue_t *, int, int, uchar_t *);
872 static int tl_get_opt(queue_t *, int, int, uchar_t *);
873 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
874     uchar_t *, void *, cred_t *, mblk_t *);
875 static void tl_memrecover(queue_t *, mblk_t *, size_t);
876 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
877 static void tl_free(tl_endpt_t *);
878 static int  tl_constructor(void *, void *, int);
879 static void tl_destructor(void *, void *);
880 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
881 static tl_serializer_t *tl_serializer_alloc(int);
882 static void tl_serializer_refhold(tl_serializer_t *);
883 static void tl_serializer_refrele(tl_serializer_t *);
884 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
885 static void tl_serializer_exit(tl_endpt_t *);
886 static boolean_t tl_noclose(tl_endpt_t *);
887 static void tl_closeok(tl_endpt_t *);
888 static void tl_refhold(tl_endpt_t *);
889 static void tl_refrele(tl_endpt_t *);
890 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
891 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
892 static void tl_close_ser(mblk_t *, tl_endpt_t *);
893 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
894 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
895 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
896 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
897 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
898 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
900 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
901 static void tl_addr_unbind(tl_endpt_t *);
902 
903 /*
904  * Intialize option database object for TL
905  */
906 
907 optdb_obj_t tl_opt_obj = {
908 	tl_default_opt,		/* TL default value function pointer */
909 	tl_get_opt,		/* TL get function pointer */
910 	tl_set_opt,		/* TL set function pointer */
911 	B_TRUE,			/* TL is tpi provider */
912 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
913 	tl_opt_arr,		/* TL option database */
914 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
915 	tl_valid_levels_arr	/* TL valid level array */
916 };
917 
918 /*
919  * Logical operations.
920  *
921  * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y
922  * should also be true.
923  *
924  * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or falce at
925  * the same time.
926  */
927 #define	IMPLY(X, Y)	(!(X) || (Y))
928 #define	EQUIV(X, Y)	(IMPLY(X, Y) && IMPLY(Y, X))
929 
930 /*
931  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
932  * ---------------------------------------
933  */
934 
935 /*
936  * Loadable module routines
937  */
938 int
939 _init(void)
940 {
941 	return (mod_install(&modlinkage));
942 }
943 
944 int
945 _fini(void)
946 {
947 	return (mod_remove(&modlinkage));
948 }
949 
950 int
951 _info(struct modinfo *modinfop)
952 {
953 	return (mod_info(&modlinkage, modinfop));
954 }
955 
956 /*
957  * Driver Entry Points and Other routines
958  */
959 static int
960 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
961 {
962 	int i;
963 	char name[32];
964 
965 	/*
966 	 * Resume from a checkpoint state.
967 	 */
968 	if (cmd == DDI_RESUME)
969 		return (DDI_SUCCESS);
970 
971 	if (cmd != DDI_ATTACH)
972 		return (DDI_FAILURE);
973 
974 	/*
975 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
976 	 * streams message sizes can be unlimited. We use a defined constant
977 	 * instead.
978 	 */
979 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
980 
981 	/*
982 	 * Create subdevices for each transport.
983 	 */
984 	for (i = 0; i < TL_UNUSED; i++) {
985 		if (ddi_create_minor_node(devi,
986 		    tl_transports[i].tr_name,
987 		    S_IFCHR, tl_transports[i].tr_minor,
988 		    DDI_PSEUDO, NULL) == DDI_FAILURE) {
989 			ddi_remove_minor_node(devi, NULL);
990 			return (DDI_FAILURE);
991 		}
992 	}
993 
994 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
995 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
996 
997 	if (tl_cache == NULL) {
998 		ddi_remove_minor_node(devi, NULL);
999 		return (DDI_FAILURE);
1000 	}
1001 
1002 	tl_minors = id_space_create("tl_minor_space",
1003 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
1004 
1005 	/*
1006 	 * Create ID space for minor numbers
1007 	 */
1008 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1009 		tl_transport_state_t *t = &tl_transports[i];
1010 
1011 		if (i == TL_UNUSED)
1012 			continue;
1013 
1014 		/* Socket COTSORD shares namespace with COTS */
1015 		if (i == TL_SOCK_COTSORD) {
1016 			t->tr_ai_hash =
1017 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1018 			ASSERT(t->tr_ai_hash != NULL);
1019 			t->tr_addr_hash =
1020 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1021 			ASSERT(t->tr_addr_hash != NULL);
1022 			continue;
1023 		}
1024 
1025 		/*
1026 		 * Create hash tables.
1027 		 */
1028 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1029 		    t->tr_name);
1030 #ifdef _ILP32
1031 		if (i & TL_SOCKET)
1032 			t->tr_ai_hash =
1033 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1034 			    mod_hash_null_valdtor);
1035 		else
1036 			t->tr_ai_hash =
1037 			    mod_hash_create_ptrhash(name, tl_hash_size,
1038 			    mod_hash_null_valdtor, sizeof (queue_t));
1039 #else
1040 		t->tr_ai_hash =
1041 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1042 		    mod_hash_null_valdtor);
1043 #endif /* _ILP32 */
1044 
1045 		if (i & TL_SOCKET) {
1046 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1047 			    t->tr_name);
1048 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1049 			    tl_hash_size, mod_hash_null_valdtor,
1050 			    sizeof (uintptr_t));
1051 		} else {
1052 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1053 			    t->tr_name);
1054 			t->tr_addr_hash = mod_hash_create_extended(name,
1055 			    tl_hash_size, mod_hash_null_keydtor,
1056 			    mod_hash_null_valdtor,
1057 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1058 		}
1059 
1060 		/* Create serializer for connectionless transports. */
1061 		if (i & TL_TICLTS)
1062 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1063 	}
1064 
1065 	tl_dip = devi;
1066 
1067 	return (DDI_SUCCESS);
1068 }
1069 
1070 static int
1071 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1072 {
1073 	int i;
1074 
1075 	if (cmd == DDI_SUSPEND)
1076 		return (DDI_SUCCESS);
1077 
1078 	if (cmd != DDI_DETACH)
1079 		return (DDI_FAILURE);
1080 
1081 	/*
1082 	 * Destroy arenas and hash tables.
1083 	 */
1084 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1085 		tl_transport_state_t *t = &tl_transports[i];
1086 
1087 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1088 			continue;
1089 
1090 		ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL));
1091 		if (t->tr_serializer != NULL) {
1092 			tl_serializer_refrele(t->tr_serializer);
1093 			t->tr_serializer = NULL;
1094 		}
1095 
1096 #ifdef _ILP32
1097 		if (i & TL_SOCKET)
1098 			mod_hash_destroy_idhash(t->tr_ai_hash);
1099 		else
1100 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1101 #else
1102 		mod_hash_destroy_idhash(t->tr_ai_hash);
1103 #endif /* _ILP32 */
1104 		t->tr_ai_hash = NULL;
1105 		if (i & TL_SOCKET)
1106 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1107 		else
1108 			mod_hash_destroy_hash(t->tr_addr_hash);
1109 		t->tr_addr_hash = NULL;
1110 	}
1111 
1112 	kmem_cache_destroy(tl_cache);
1113 	tl_cache = NULL;
1114 	id_space_destroy(tl_minors);
1115 	tl_minors = NULL;
1116 	ddi_remove_minor_node(devi, NULL);
1117 	return (DDI_SUCCESS);
1118 }
1119 
1120 /* ARGSUSED */
1121 static int
1122 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1123 {
1124 
1125 	int retcode = DDI_FAILURE;
1126 
1127 	switch (infocmd) {
1128 
1129 	case DDI_INFO_DEVT2DEVINFO:
1130 		if (tl_dip != NULL) {
1131 			*result = (void *)tl_dip;
1132 			retcode = DDI_SUCCESS;
1133 		}
1134 		break;
1135 
1136 	case DDI_INFO_DEVT2INSTANCE:
1137 		*result = (void *)0;
1138 		retcode = DDI_SUCCESS;
1139 		break;
1140 
1141 	default:
1142 		break;
1143 	}
1144 	return (retcode);
1145 }
1146 
1147 /*
1148  * Endpoint reference management.
1149  */
1150 static void
1151 tl_refhold(tl_endpt_t *tep)
1152 {
1153 	atomic_add_32(&tep->te_refcnt, 1);
1154 }
1155 
1156 static void
1157 tl_refrele(tl_endpt_t *tep)
1158 {
1159 	ASSERT(tep->te_refcnt != 0);
1160 
1161 	if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1162 		tl_free(tep);
1163 }
1164 
1165 /*ARGSUSED*/
1166 static int
1167 tl_constructor(void *buf, void *cdrarg, int kmflags)
1168 {
1169 	tl_endpt_t *tep = buf;
1170 
1171 	bzero(tep, sizeof (tl_endpt_t));
1172 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1173 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1174 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1175 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1176 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1177 
1178 	return (0);
1179 }
1180 
1181 /*ARGSUSED*/
1182 static void
1183 tl_destructor(void *buf, void *cdrarg)
1184 {
1185 	tl_endpt_t *tep = buf;
1186 
1187 	mutex_destroy(&tep->te_closelock);
1188 	cv_destroy(&tep->te_closecv);
1189 	mutex_destroy(&tep->te_srv_lock);
1190 	cv_destroy(&tep->te_srv_cv);
1191 	mutex_destroy(&tep->te_ser_lock);
1192 }
1193 
1194 static void
1195 tl_free(tl_endpt_t *tep)
1196 {
1197 	ASSERT(tep->te_refcnt == 0);
1198 	ASSERT(tep->te_transport != NULL);
1199 	ASSERT(tep->te_rq == NULL);
1200 	ASSERT(tep->te_wq == NULL);
1201 	ASSERT(tep->te_ser != NULL);
1202 	ASSERT(tep->te_ser_count == 0);
1203 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1204 
1205 	if (IS_SOCKET(tep)) {
1206 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1207 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1208 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1209 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1210 	} else if (tep->te_abuf != NULL) {
1211 		kmem_free(tep->te_abuf, tep->te_alen);
1212 		tep->te_alen = -1; /* uninitialized */
1213 		tep->te_abuf = NULL;
1214 	} else {
1215 		ASSERT(tep->te_alen == -1);
1216 	}
1217 
1218 	id_free(tl_minors, tep->te_minor);
1219 	ASSERT(tep->te_credp == NULL);
1220 
1221 	if (tep->te_hash_hndl != NULL)
1222 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1223 
1224 	if (IS_COTS(tep)) {
1225 		TL_REMOVE_PEER(tep->te_conp);
1226 		TL_REMOVE_PEER(tep->te_oconp);
1227 		tl_serializer_refrele(tep->te_ser);
1228 		tep->te_ser = NULL;
1229 		ASSERT(tep->te_nicon == 0);
1230 		ASSERT(list_head(&tep->te_iconp) == NULL);
1231 	} else {
1232 		ASSERT(tep->te_lastep == NULL);
1233 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1234 		ASSERT(tep->te_flowq == NULL);
1235 	}
1236 
1237 	ASSERT(tep->te_bufcid == 0);
1238 	ASSERT(tep->te_timoutid == 0);
1239 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1240 	tep->te_acceptor_id = 0;
1241 
1242 	ASSERT(tep->te_closewait == 0);
1243 	ASSERT(!tep->te_rsrv_active);
1244 	ASSERT(!tep->te_wsrv_active);
1245 	tep->te_closing = 0;
1246 	tep->te_nowsrv = B_FALSE;
1247 	tep->te_flag = 0;
1248 
1249 	kmem_cache_free(tl_cache, tep);
1250 }
1251 
1252 /*
1253  * Allocate/free reference-counted wrappers for serializers.
1254  */
1255 static tl_serializer_t *
1256 tl_serializer_alloc(int flags)
1257 {
1258 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1259 	serializer_t *ser;
1260 
1261 	if (s == NULL)
1262 		return (NULL);
1263 
1264 	ser = serializer_create(flags);
1265 
1266 	if (ser == NULL) {
1267 		kmem_free(s, sizeof (tl_serializer_t));
1268 		return (NULL);
1269 	}
1270 
1271 	s->ts_refcnt = 1;
1272 	s->ts_serializer = ser;
1273 	return (s);
1274 }
1275 
1276 static void
1277 tl_serializer_refhold(tl_serializer_t *s)
1278 {
1279 	atomic_add_32(&s->ts_refcnt, 1);
1280 }
1281 
1282 static void
1283 tl_serializer_refrele(tl_serializer_t *s)
1284 {
1285 	if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1286 		serializer_destroy(s->ts_serializer);
1287 		kmem_free(s, sizeof (tl_serializer_t));
1288 	}
1289 }
1290 
1291 /*
1292  * Post a request on the endpoint serializer. For COTS transports keep track of
1293  * the number of pending requests.
1294  */
1295 static void
1296 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1297 {
1298 	if (IS_COTS(tep)) {
1299 		mutex_enter(&tep->te_ser_lock);
1300 		tep->te_ser_count++;
1301 		mutex_exit(&tep->te_ser_lock);
1302 	}
1303 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1304 }
1305 
1306 /*
1307  * Complete processing the request on the serializer. Decrement the counter for
1308  * pending requests for COTS transports.
1309  */
1310 static void
1311 tl_serializer_exit(tl_endpt_t *tep)
1312 {
1313 	if (IS_COTS(tep)) {
1314 		mutex_enter(&tep->te_ser_lock);
1315 		ASSERT(tep->te_ser_count != 0);
1316 		tep->te_ser_count--;
1317 		mutex_exit(&tep->te_ser_lock);
1318 	}
1319 }
1320 
1321 /*
1322  * Hash management functions.
1323  */
1324 
1325 /*
1326  * Return TRUE if two addresses are equal, false otherwise.
1327  */
1328 static boolean_t
1329 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1330 {
1331 	return ((ap1->ta_alen > 0) &&
1332 	    (ap1->ta_alen == ap2->ta_alen) &&
1333 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1334 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1335 }
1336 
1337 /*
1338  * This function is called whenever an endpoint is found in the hash table.
1339  */
1340 /* ARGSUSED0 */
1341 static void
1342 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1343 {
1344 	tl_refhold((tl_endpt_t *)val);
1345 }
1346 
1347 /*
1348  * Address hash function.
1349  */
1350 /* ARGSUSED */
1351 static uint_t
1352 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1353 {
1354 	tl_addr_t *ap = (tl_addr_t *)key;
1355 	size_t	len = ap->ta_alen;
1356 	uchar_t *p = ap->ta_abuf;
1357 	uint_t i, g;
1358 
1359 	ASSERT((len > 0) && (p != NULL));
1360 
1361 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1362 		i = (i << 4) + (*p);
1363 		if ((g = (i & 0xf0000000U)) != 0) {
1364 			i ^= (g >> 24);
1365 			i ^= g;
1366 		}
1367 	}
1368 	return (i);
1369 }
1370 
1371 /*
1372  * This function is used by hash lookups. It compares two generic addresses.
1373  */
1374 static int
1375 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1376 {
1377 #ifdef 	DEBUG
1378 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1379 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1380 
1381 	ASSERT(key1 != NULL);
1382 	ASSERT(key2 != NULL);
1383 
1384 	ASSERT(ap1->ta_abuf != NULL);
1385 	ASSERT(ap2->ta_abuf != NULL);
1386 	ASSERT(ap1->ta_alen > 0);
1387 	ASSERT(ap2->ta_alen > 0);
1388 #endif
1389 
1390 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1391 }
1392 
1393 /*
1394  * Prevent endpoint from closing if possible.
1395  * Return B_TRUE on success, B_FALSE on failure.
1396  */
1397 static boolean_t
1398 tl_noclose(tl_endpt_t *tep)
1399 {
1400 	boolean_t rc = B_FALSE;
1401 
1402 	mutex_enter(&tep->te_closelock);
1403 	if (! tep->te_closing) {
1404 		ASSERT(tep->te_closewait == 0);
1405 		tep->te_closewait++;
1406 		rc = B_TRUE;
1407 	}
1408 	mutex_exit(&tep->te_closelock);
1409 	return (rc);
1410 }
1411 
1412 /*
1413  * Allow endpoint to close if needed.
1414  */
1415 static void
1416 tl_closeok(tl_endpt_t *tep)
1417 {
1418 	ASSERT(tep->te_closewait > 0);
1419 	mutex_enter(&tep->te_closelock);
1420 	ASSERT(tep->te_closewait == 1);
1421 	tep->te_closewait--;
1422 	cv_signal(&tep->te_closecv);
1423 	mutex_exit(&tep->te_closelock);
1424 }
1425 
1426 /*
1427  * STREAMS open entry point.
1428  */
1429 /* ARGSUSED */
1430 static int
1431 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1432 {
1433 	tl_endpt_t *tep;
1434 	minor_t	    minor = getminor(*devp);
1435 
1436 	/*
1437 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1438 	 * are illegal
1439 	 */
1440 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1441 		return (ENXIO);
1442 
1443 	if (rq->q_ptr != NULL)
1444 		return (0);
1445 
1446 	/* Minor number should specify the mode used for the driver. */
1447 	if ((minor >= TL_UNUSED))
1448 		return (ENXIO);
1449 
1450 	if (oflag & SO_SOCKSTR) {
1451 		minor |= TL_SOCKET;
1452 	}
1453 
1454 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1455 	tep->te_refcnt = 1;
1456 	tep->te_cpid = curproc->p_pid;
1457 	rq->q_ptr = WR(rq)->q_ptr = tep;
1458 	tep->te_state = TS_UNBND;
1459 	tep->te_credp = credp;
1460 	crhold(credp);
1461 	tep->te_zoneid = getzoneid();
1462 
1463 	tep->te_flag = minor & TL_MINOR_MASK;
1464 	tep->te_transport = &tl_transports[minor];
1465 
1466 	/* Allocate a unique minor number for this instance. */
1467 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1468 
1469 	/* Reserve hash handle for bind(). */
1470 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1471 
1472 	/* Transport-specific initialization */
1473 	if (IS_COTS(tep)) {
1474 		/* Use private serializer */
1475 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1476 
1477 		/* Create list for pending connections */
1478 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1479 		    offsetof(tl_icon_t, ti_node));
1480 		tep->te_qlen = 0;
1481 		tep->te_nicon = 0;
1482 		tep->te_oconp = NULL;
1483 		tep->te_conp = NULL;
1484 	} else {
1485 		/* Use shared serializer */
1486 		tep->te_ser = tep->te_transport->tr_serializer;
1487 		bzero(&tep->te_flows, sizeof (list_node_t));
1488 		/* Create list for flow control */
1489 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1490 		    offsetof(tl_endpt_t, te_flows));
1491 		tep->te_flowq = NULL;
1492 		tep->te_lastep = NULL;
1493 
1494 	}
1495 
1496 	/* Initialize endpoint address */
1497 	if (IS_SOCKET(tep)) {
1498 		/* Socket-specific address handling. */
1499 		tep->te_alen = TL_SOUX_ADDRLEN;
1500 		tep->te_abuf = &tep->te_uxaddr;
1501 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1502 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1503 	} else {
1504 		tep->te_alen = -1;
1505 		tep->te_abuf = NULL;
1506 	}
1507 
1508 	/* clone the driver */
1509 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1510 
1511 	tep->te_rq = rq;
1512 	tep->te_wq = WR(rq);
1513 
1514 #ifdef	_ILP32
1515 	if (IS_SOCKET(tep))
1516 		tep->te_acceptor_id = tep->te_minor;
1517 	else
1518 		tep->te_acceptor_id = (t_uscalar_t)rq;
1519 #else
1520 	tep->te_acceptor_id = tep->te_minor;
1521 #endif	/* _ILP32 */
1522 
1523 
1524 	qprocson(rq);
1525 
1526 	/*
1527 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1528 	 * insertion so insertion can't fail.
1529 	 */
1530 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1531 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1532 	    (mod_hash_val_t)tep);
1533 
1534 	return (0);
1535 }
1536 
1537 /* ARGSUSED1 */
1538 static int
1539 tl_close(queue_t *rq, int flag,	cred_t *credp)
1540 {
1541 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1542 	tl_endpt_t *elp = NULL;
1543 	queue_t *wq = tep->te_wq;
1544 	int rc;
1545 
1546 	ASSERT(wq == WR(rq));
1547 
1548 	/*
1549 	 * Remove the endpoint from acceptor hash.
1550 	 */
1551 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1552 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1553 	    (mod_hash_val_t *)&elp);
1554 	ASSERT(rc == 0 && tep == elp);
1555 	if ((rc != 0) || (tep != elp)) {
1556 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1557 		    SL_TRACE|SL_ERROR,
1558 		    "tl_close:inconsistency in AI hash"));
1559 	}
1560 
1561 	/*
1562 	 * Wait till close is safe, then mark endpoint as closing.
1563 	 */
1564 	mutex_enter(&tep->te_closelock);
1565 	while (tep->te_closewait)
1566 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1567 	tep->te_closing = B_TRUE;
1568 	/*
1569 	 * Will wait for the serializer part of the close to finish, so set
1570 	 * te_closewait now.
1571 	 */
1572 	tep->te_closewait = 1;
1573 	tep->te_nowsrv = B_FALSE;
1574 	mutex_exit(&tep->te_closelock);
1575 
1576 	/*
1577 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1578 	 * It is safe because close will wait for tl_close_ser to finish.
1579 	 */
1580 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1581 
1582 	/*
1583 	 * Wait for the first phase of close to complete before qprocsoff().
1584 	 */
1585 	mutex_enter(&tep->te_closelock);
1586 	while (tep->te_closewait)
1587 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1588 	mutex_exit(&tep->te_closelock);
1589 
1590 	qprocsoff(rq);
1591 
1592 	if (tep->te_bufcid) {
1593 		qunbufcall(rq, tep->te_bufcid);
1594 		tep->te_bufcid = 0;
1595 	}
1596 	if (tep->te_timoutid) {
1597 		(void) quntimeout(rq, tep->te_timoutid);
1598 		tep->te_timoutid = 0;
1599 	}
1600 
1601 	/*
1602 	 * Finish close behind serializer.
1603 	 *
1604 	 * For a CLTS endpoint increase a refcount and continue close processing
1605 	 * with serializer protection. This processing may happen asynchronously
1606 	 * with the completion of tl_close().
1607 	 *
1608 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1609 	 * may go away together with tep and we need to destroy serializer
1610 	 * outside of serializer context.
1611 	 */
1612 	ASSERT(tep->te_closewait == 0);
1613 	if (IS_COTS(tep))
1614 		tep->te_closewait = 1;
1615 	else
1616 		tl_refhold(tep);
1617 
1618 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1619 
1620 	/*
1621 	 * For connection-oriented transports wait for all serializer activity
1622 	 * to settle down.
1623 	 */
1624 	if (IS_COTS(tep)) {
1625 		mutex_enter(&tep->te_closelock);
1626 		while (tep->te_closewait)
1627 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1628 		mutex_exit(&tep->te_closelock);
1629 	}
1630 
1631 	crfree(tep->te_credp);
1632 	tep->te_credp = NULL;
1633 	tep->te_wq = NULL;
1634 	tl_refrele(tep);
1635 	/*
1636 	 * tep is likely to be destroyed now, so can't reference it any more.
1637 	 */
1638 
1639 	rq->q_ptr = wq->q_ptr = NULL;
1640 	return (0);
1641 }
1642 
1643 /*
1644  * First phase of close processing done behind the serializer.
1645  *
1646  * Do not drop the reference in the end - tl_close() wants this reference to
1647  * stay.
1648  */
1649 /* ARGSUSED0 */
1650 static void
1651 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1652 {
1653 	ASSERT(tep->te_closing);
1654 	ASSERT(tep->te_closewait == 1);
1655 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1656 
1657 	tep->te_flag |= TL_CLOSE_SER;
1658 
1659 	/*
1660 	 * Drain out all messages on queue except for TL_TICOTS where the
1661 	 * abortive release semantics permit discarding of data on close
1662 	 */
1663 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1664 		tl_wsrv_ser(NULL, tep);
1665 	}
1666 
1667 	/* Remove address from hash table. */
1668 	tl_addr_unbind(tep);
1669 	/*
1670 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1671 	 * queue of the driver, so clear these before qprocsoff() is called.
1672 	 * Also clear q_next for the peer since this queue is going away.
1673 	 */
1674 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1675 		tl_endpt_t *peer_tep = tep->te_conp;
1676 
1677 		tep->te_wq->q_next = NULL;
1678 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1679 			peer_tep->te_wq->q_next = NULL;
1680 	}
1681 
1682 	tep->te_rq = NULL;
1683 
1684 	/* wake up tl_close() */
1685 	tl_closeok(tep);
1686 	tl_serializer_exit(tep);
1687 }
1688 
1689 /*
1690  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1691  * the reference for CLTS.
1692  *
1693  * Called from serializer. Should drop reference count for CLTS only.
1694  */
1695 /* ARGSUSED0 */
1696 static void
1697 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1698 {
1699 	ASSERT(tep->te_closing);
1700 	ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0));
1701 	ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1));
1702 
1703 	tep->te_state = -1;	/* Uninitialized */
1704 	if (IS_COTS(tep)) {
1705 		tl_co_unconnect(tep);
1706 	} else {
1707 		/* Connectionless specific cleanup */
1708 		TL_REMOVE_PEER(tep->te_lastep);
1709 		/*
1710 		 * Backenable anybody that is flow controlled waiting for
1711 		 * this endpoint.
1712 		 */
1713 		tl_cl_backenable(tep);
1714 		if (tep->te_flowq != NULL) {
1715 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1716 			tep->te_flowq = NULL;
1717 		}
1718 	}
1719 
1720 	tl_serializer_exit(tep);
1721 	if (IS_COTS(tep))
1722 		tl_closeok(tep);
1723 	else
1724 		tl_refrele(tep);
1725 }
1726 
1727 /*
1728  * STREAMS write-side put procedure.
1729  * Enter serializer for most of the processing.
1730  *
1731  * The T_CONN_REQ is processed outside of serializer.
1732  */
1733 static void
1734 tl_wput(queue_t *wq, mblk_t *mp)
1735 {
1736 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1737 	ssize_t			msz = MBLKL(mp);
1738 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1739 	tlproc_t		*tl_proc = NULL;
1740 
1741 	switch (DB_TYPE(mp)) {
1742 	case M_DATA:
1743 		/* Only valid for connection-oriented transports */
1744 		if (IS_CLTS(tep)) {
1745 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1746 			    SL_TRACE|SL_ERROR,
1747 			    "tl_wput:M_DATA invalid for ticlts driver"));
1748 			tl_merror(wq, mp, EPROTO);
1749 			return;
1750 		}
1751 		tl_proc = tl_wput_data_ser;
1752 		break;
1753 
1754 	case M_IOCTL:
1755 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1756 		case TL_IOC_CREDOPT:
1757 			/* FALLTHROUGH */
1758 		case TL_IOC_UCREDOPT:
1759 			/*
1760 			 * Serialize endpoint state change.
1761 			 */
1762 			tl_proc = tl_do_ioctl_ser;
1763 			break;
1764 
1765 		default:
1766 			miocnak(wq, mp, 0, EINVAL);
1767 			return;
1768 		}
1769 		break;
1770 
1771 	case M_FLUSH:
1772 		/*
1773 		 * do canonical M_FLUSH processing
1774 		 */
1775 		if (*mp->b_rptr & FLUSHW) {
1776 			flushq(wq, FLUSHALL);
1777 			*mp->b_rptr &= ~FLUSHW;
1778 		}
1779 		if (*mp->b_rptr & FLUSHR) {
1780 			flushq(RD(wq), FLUSHALL);
1781 			qreply(wq, mp);
1782 		} else {
1783 			freemsg(mp);
1784 		}
1785 		return;
1786 
1787 	case M_PROTO:
1788 		if (msz < sizeof (prim->type)) {
1789 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1790 			    SL_TRACE|SL_ERROR,
1791 			    "tl_wput:M_PROTO data too short"));
1792 			tl_merror(wq, mp, EPROTO);
1793 			return;
1794 		}
1795 		switch (prim->type) {
1796 		case T_OPTMGMT_REQ:
1797 		case T_SVR4_OPTMGMT_REQ:
1798 			/*
1799 			 * Process TPI option management requests immediately
1800 			 * in put procedure regardless of in-order processing
1801 			 * of already queued messages.
1802 			 * (Note: This driver supports AF_UNIX socket
1803 			 * implementation.  Unless we implement this processing,
1804 			 * setsockopt() on socket endpoint will block on flow
1805 			 * controlled endpoints which it should not. That is
1806 			 * required for successful execution of VSU socket tests
1807 			 * and is consistent with BSD socket behavior).
1808 			 */
1809 			tl_optmgmt(wq, mp);
1810 			return;
1811 		case O_T_BIND_REQ:
1812 		case T_BIND_REQ:
1813 			tl_proc = tl_bind_ser;
1814 			break;
1815 		case T_CONN_REQ:
1816 			if (IS_CLTS(tep)) {
1817 				tl_merror(wq, mp, EPROTO);
1818 				return;
1819 			}
1820 			tl_conn_req(wq, mp);
1821 			return;
1822 		case T_DATA_REQ:
1823 		case T_OPTDATA_REQ:
1824 		case T_EXDATA_REQ:
1825 		case T_ORDREL_REQ:
1826 			tl_proc = tl_putq_ser;
1827 			break;
1828 		case T_UNITDATA_REQ:
1829 			if (IS_COTS(tep) ||
1830 			    (msz < sizeof (struct T_unitdata_req))) {
1831 				tl_merror(wq, mp, EPROTO);
1832 				return;
1833 			}
1834 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1835 				tl_proc = tl_unitdata_ser;
1836 			} else {
1837 				tl_proc = tl_putq_ser;
1838 			}
1839 			break;
1840 		default:
1841 			/*
1842 			 * process in service procedure if message already
1843 			 * queued (maintain in-order processing)
1844 			 */
1845 			if (wq->q_first != NULL) {
1846 				tl_proc = tl_putq_ser;
1847 			} else {
1848 				tl_proc = tl_wput_ser;
1849 			}
1850 			break;
1851 		}
1852 		break;
1853 
1854 	case M_PCPROTO:
1855 		/*
1856 		 * Check that the message has enough data to figure out TPI
1857 		 * primitive.
1858 		 */
1859 		if (msz < sizeof (prim->type)) {
1860 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1861 			    SL_TRACE|SL_ERROR,
1862 			    "tl_wput:M_PCROTO data too short"));
1863 			tl_merror(wq, mp, EPROTO);
1864 			return;
1865 		}
1866 		switch (prim->type) {
1867 		case T_CAPABILITY_REQ:
1868 			tl_capability_req(mp, tep);
1869 			return;
1870 		case T_INFO_REQ:
1871 			tl_proc = tl_info_req_ser;
1872 			break;
1873 		default:
1874 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1875 			    SL_TRACE|SL_ERROR,
1876 			    "tl_wput:unknown TPI msg primitive"));
1877 			tl_merror(wq, mp, EPROTO);
1878 			return;
1879 		}
1880 		break;
1881 	default:
1882 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1883 		    "tl_wput:default:unexpected Streams message"));
1884 		freemsg(mp);
1885 		return;
1886 	}
1887 
1888 	/*
1889 	 * Continue processing via serializer.
1890 	 */
1891 	ASSERT(tl_proc != NULL);
1892 	tl_refhold(tep);
1893 	tl_serializer_enter(tep, tl_proc, mp);
1894 }
1895 
1896 /*
1897  * Place message on the queue while preserving order.
1898  */
1899 static void
1900 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1901 {
1902 	if (tep->te_closing) {
1903 		tl_wput_ser(mp, tep);
1904 	} else {
1905 		TL_PUTQ(tep, mp);
1906 		tl_serializer_exit(tep);
1907 		tl_refrele(tep);
1908 	}
1909 
1910 }
1911 
1912 static void
1913 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1914 {
1915 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1916 
1917 	switch (DB_TYPE(mp)) {
1918 	case M_DATA:
1919 		tl_data(mp, tep);
1920 		break;
1921 	case M_PROTO:
1922 		tl_do_proto(mp, tep);
1923 		break;
1924 	default:
1925 		freemsg(mp);
1926 		break;
1927 	}
1928 }
1929 
1930 /*
1931  * Write side put procedure called from serializer.
1932  */
1933 static void
1934 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1935 {
1936 	tl_wput_common_ser(mp, tep);
1937 	tl_serializer_exit(tep);
1938 	tl_refrele(tep);
1939 }
1940 
1941 /*
1942  * M_DATA processing. Called from serializer.
1943  */
1944 static void
1945 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1946 {
1947 	tl_endpt_t	*peer_tep = tep->te_conp;
1948 	queue_t		*peer_rq;
1949 
1950 	ASSERT(DB_TYPE(mp) == M_DATA);
1951 	ASSERT(IS_COTS(tep));
1952 
1953 	ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer));
1954 
1955 	/*
1956 	 * fastpath for data. Ignore flow control if tep is closing.
1957 	 */
1958 	if ((peer_tep != NULL) &&
1959 	    !peer_tep->te_closing &&
1960 	    ((tep->te_state == TS_DATA_XFER) ||
1961 	    (tep->te_state == TS_WREQ_ORDREL)) &&
1962 	    (tep->te_wq != NULL) &&
1963 	    (tep->te_wq->q_first == NULL) &&
1964 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1965 	    (peer_tep->te_state == TS_WREQ_ORDREL))	&&
1966 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1967 	    (canputnext(peer_rq) || tep->te_closing)) {
1968 		putnext(peer_rq, mp);
1969 	} else if (tep->te_closing) {
1970 		/*
1971 		 * It is possible that by the time we got here tep started to
1972 		 * close. If the write queue is not empty, and the state is
1973 		 * TS_DATA_XFER the data should be delivered in order, so we
1974 		 * call putq() instead of freeing the data.
1975 		 */
1976 		if ((tep->te_wq != NULL) &&
1977 		    ((tep->te_state == TS_DATA_XFER) ||
1978 		    (tep->te_state == TS_WREQ_ORDREL))) {
1979 			TL_PUTQ(tep, mp);
1980 		} else {
1981 			freemsg(mp);
1982 		}
1983 	} else {
1984 		TL_PUTQ(tep, mp);
1985 	}
1986 
1987 	tl_serializer_exit(tep);
1988 	tl_refrele(tep);
1989 }
1990 
1991 /*
1992  * Write side service routine.
1993  *
1994  * All actual processing happens within serializer which is entered
1995  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1996  * messages that need processing may have arrived, so tl_wsrv repeats until
1997  * queue is empty or te_nowsrv is set.
1998  */
1999 static void
2000 tl_wsrv(queue_t *wq)
2001 {
2002 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2003 
2004 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
2005 		mutex_enter(&tep->te_srv_lock);
2006 		ASSERT(tep->te_wsrv_active == B_FALSE);
2007 		tep->te_wsrv_active = B_TRUE;
2008 		mutex_exit(&tep->te_srv_lock);
2009 
2010 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2011 
2012 		/*
2013 		 * Wait for serializer job to complete.
2014 		 */
2015 		mutex_enter(&tep->te_srv_lock);
2016 		while (tep->te_wsrv_active) {
2017 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2018 		}
2019 		cv_signal(&tep->te_srv_cv);
2020 		mutex_exit(&tep->te_srv_lock);
2021 	}
2022 }
2023 
2024 /*
2025  * Serialized write side processing of the STREAMS queue.
2026  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2027  * is NULL.
2028  */
2029 static void
2030 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2031 {
2032 	mblk_t *mp;
2033 	queue_t *wq = tep->te_wq;
2034 
2035 	ASSERT(wq != NULL);
2036 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2037 		tl_wput_common_ser(mp, tep);
2038 	}
2039 
2040 	/*
2041 	 * Wakeup service routine unless called from close.
2042 	 * If ser_mp is specified, the caller is tl_wsrv().
2043 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2044 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2045 	 * be no matching tl_serializer_exit() in this case.
2046 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2047 	 * waiting on te_srv_cv.
2048 	 */
2049 	if (ser_mp != NULL) {
2050 		/*
2051 		 * We are called from tl_wsrv.
2052 		 */
2053 		mutex_enter(&tep->te_srv_lock);
2054 		ASSERT(tep->te_wsrv_active);
2055 		tep->te_wsrv_active = B_FALSE;
2056 		cv_signal(&tep->te_srv_cv);
2057 		mutex_exit(&tep->te_srv_lock);
2058 		tl_serializer_exit(tep);
2059 	}
2060 }
2061 
2062 /*
2063  * Called when the stream is backenabled. Enter serializer and qenable everyone
2064  * flow controlled by tep.
2065  *
2066  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2067  * is possible that two instances of tl_rsrv will be running reusing the same
2068  * rsrv mblk.
2069  */
2070 static void
2071 tl_rsrv(queue_t *rq)
2072 {
2073 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2074 
2075 	ASSERT(rq->q_first == NULL);
2076 	ASSERT(tep->te_rsrv_active == 0);
2077 
2078 	tep->te_rsrv_active = B_TRUE;
2079 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2080 	/*
2081 	 * Wait for serializer job to complete.
2082 	 */
2083 	mutex_enter(&tep->te_srv_lock);
2084 	while (tep->te_rsrv_active) {
2085 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2086 	}
2087 	cv_signal(&tep->te_srv_cv);
2088 	mutex_exit(&tep->te_srv_lock);
2089 }
2090 
2091 /* ARGSUSED */
2092 static void
2093 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2094 {
2095 	tl_endpt_t *peer_tep;
2096 
2097 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2098 		tl_cl_backenable(tep);
2099 	} else if (
2100 	    IS_COTS(tep) &&
2101 	    ((peer_tep = tep->te_conp) != NULL) &&
2102 	    !peer_tep->te_closing &&
2103 	    ((tep->te_state == TS_DATA_XFER) ||
2104 	    (tep->te_state == TS_WIND_ORDREL)||
2105 	    (tep->te_state == TS_WREQ_ORDREL))) {
2106 		TL_QENABLE(peer_tep);
2107 	}
2108 
2109 	/*
2110 	 * Wakeup read side service routine.
2111 	 */
2112 	mutex_enter(&tep->te_srv_lock);
2113 	ASSERT(tep->te_rsrv_active);
2114 	tep->te_rsrv_active = B_FALSE;
2115 	cv_signal(&tep->te_srv_cv);
2116 	mutex_exit(&tep->te_srv_lock);
2117 	tl_serializer_exit(tep);
2118 }
2119 
2120 /*
2121  * process M_PROTO messages. Always called from serializer.
2122  */
2123 static void
2124 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2125 {
2126 	ssize_t			msz = MBLKL(mp);
2127 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2128 
2129 	/* Message size was validated by tl_wput(). */
2130 	ASSERT(msz >= sizeof (prim->type));
2131 
2132 	switch (prim->type) {
2133 	case T_UNBIND_REQ:
2134 		tl_unbind(mp, tep);
2135 		break;
2136 
2137 	case T_ADDR_REQ:
2138 		tl_addr_req(mp, tep);
2139 		break;
2140 
2141 	case O_T_CONN_RES:
2142 	case T_CONN_RES:
2143 		if (IS_CLTS(tep)) {
2144 			tl_merror(tep->te_wq, mp, EPROTO);
2145 			break;
2146 		}
2147 		tl_conn_res(mp, tep);
2148 		break;
2149 
2150 	case T_DISCON_REQ:
2151 		if (IS_CLTS(tep)) {
2152 			tl_merror(tep->te_wq, mp, EPROTO);
2153 			break;
2154 		}
2155 		tl_discon_req(mp, tep);
2156 		break;
2157 
2158 	case T_DATA_REQ:
2159 		if (IS_CLTS(tep)) {
2160 			tl_merror(tep->te_wq, mp, EPROTO);
2161 			break;
2162 		}
2163 		tl_data(mp, tep);
2164 		break;
2165 
2166 	case T_OPTDATA_REQ:
2167 		if (IS_CLTS(tep)) {
2168 			tl_merror(tep->te_wq, mp, EPROTO);
2169 			break;
2170 		}
2171 		tl_data(mp, tep);
2172 		break;
2173 
2174 	case T_EXDATA_REQ:
2175 		if (IS_CLTS(tep)) {
2176 			tl_merror(tep->te_wq, mp, EPROTO);
2177 			break;
2178 		}
2179 		tl_exdata(mp, tep);
2180 		break;
2181 
2182 	case T_ORDREL_REQ:
2183 		if (! IS_COTSORD(tep)) {
2184 			tl_merror(tep->te_wq, mp, EPROTO);
2185 			break;
2186 		}
2187 		tl_ordrel(mp, tep);
2188 		break;
2189 
2190 	case T_UNITDATA_REQ:
2191 		if (IS_COTS(tep)) {
2192 			tl_merror(tep->te_wq, mp, EPROTO);
2193 			break;
2194 		}
2195 		tl_unitdata(mp, tep);
2196 		break;
2197 
2198 	default:
2199 		tl_merror(tep->te_wq, mp, EPROTO);
2200 		break;
2201 	}
2202 }
2203 
2204 /*
2205  * Process ioctl from serializer.
2206  * This is a wrapper around tl_do_ioctl().
2207  */
2208 static void
2209 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2210 {
2211 	if (! tep->te_closing)
2212 		tl_do_ioctl(mp, tep);
2213 	else
2214 		freemsg(mp);
2215 
2216 	tl_serializer_exit(tep);
2217 	tl_refrele(tep);
2218 }
2219 
2220 static void
2221 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2222 {
2223 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2224 	int cmd = iocbp->ioc_cmd;
2225 	queue_t *wq = tep->te_wq;
2226 	int error;
2227 	int thisopt, otheropt;
2228 
2229 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2230 
2231 	switch (cmd) {
2232 	case TL_IOC_CREDOPT:
2233 		if (cmd == TL_IOC_CREDOPT) {
2234 			thisopt = TL_SETCRED;
2235 			otheropt = TL_SETUCRED;
2236 		} else {
2237 			/* FALLTHROUGH */
2238 	case TL_IOC_UCREDOPT:
2239 			thisopt = TL_SETUCRED;
2240 			otheropt = TL_SETCRED;
2241 		}
2242 		/*
2243 		 * The credentials passing does not apply to sockets.
2244 		 * Only one of the cred options can be set at a given time.
2245 		 */
2246 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2247 			miocnak(wq, mp, 0, EINVAL);
2248 			return;
2249 		}
2250 
2251 		/*
2252 		 * Turn on generation of credential options for
2253 		 * T_conn_req, T_conn_con, T_unidata_ind.
2254 		 */
2255 		error = miocpullup(mp, sizeof (uint32_t));
2256 		if (error != 0) {
2257 			miocnak(wq, mp, 0, error);
2258 			return;
2259 		}
2260 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2261 			miocnak(wq, mp, 0, EINVAL);
2262 			return;
2263 		}
2264 
2265 		if (*(uint32_t *)mp->b_cont->b_rptr)
2266 			tep->te_flag |= thisopt;
2267 		else
2268 			tep->te_flag &= ~thisopt;
2269 
2270 		miocack(wq, mp, 0, 0);
2271 		break;
2272 
2273 	default:
2274 		/* Should not be here */
2275 		miocnak(wq, mp, 0, EINVAL);
2276 		break;
2277 	}
2278 }
2279 
2280 
2281 /*
2282  * send T_ERROR_ACK
2283  * Note: assumes enough memory or caller passed big enough mp
2284  *	- no recovery from allocb failures
2285  */
2286 
2287 static void
2288 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2289     t_scalar_t unix_err, t_scalar_t type)
2290 {
2291 	struct T_error_ack *err_ack;
2292 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2293 	    M_PCPROTO, T_ERROR_ACK);
2294 
2295 	if (ackmp == NULL) {
2296 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2297 		    "tl_error_ack:out of mblk memory"));
2298 		tl_merror(wq, NULL, ENOSR);
2299 		return;
2300 	}
2301 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2302 	err_ack->ERROR_prim = type;
2303 	err_ack->TLI_error = tli_err;
2304 	err_ack->UNIX_error = unix_err;
2305 
2306 	/*
2307 	 * send error ack message
2308 	 */
2309 	qreply(wq, ackmp);
2310 }
2311 
2312 
2313 
2314 /*
2315  * send T_OK_ACK
2316  * Note: assumes enough memory or caller passed big enough mp
2317  *	- no recovery from allocb failures
2318  */
2319 static void
2320 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2321 {
2322 	struct T_ok_ack *ok_ack;
2323 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2324 	    M_PCPROTO, T_OK_ACK);
2325 
2326 	if (ackmp == NULL) {
2327 		tl_merror(wq, NULL, ENOMEM);
2328 		return;
2329 	}
2330 
2331 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2332 	ok_ack->CORRECT_prim = type;
2333 
2334 	(void) qreply(wq, ackmp);
2335 }
2336 
2337 /*
2338  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2339  * This is a wrapper around tl_bind().
2340  */
2341 static void
2342 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2343 {
2344 	if (! tep->te_closing)
2345 		tl_bind(mp, tep);
2346 	else
2347 		freemsg(mp);
2348 
2349 	tl_serializer_exit(tep);
2350 	tl_refrele(tep);
2351 }
2352 
2353 /*
2354  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2355  * Assumes that the endpoint is in the unbound.
2356  */
2357 static void
2358 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2359 {
2360 	queue_t			*wq = tep->te_wq;
2361 	struct T_bind_ack	*b_ack;
2362 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2363 	mblk_t			*ackmp, *bamp;
2364 	soux_addr_t		ux_addr;
2365 	t_uscalar_t		qlen = 0;
2366 	t_scalar_t		alen, aoff;
2367 	tl_addr_t		addr_req;
2368 	void			*addr_startp;
2369 	ssize_t			msz = MBLKL(mp), basize;
2370 	t_scalar_t		tli_err = 0, unix_err = 0;
2371 	t_scalar_t		save_prim_type = bind->PRIM_type;
2372 	t_scalar_t		save_state = tep->te_state;
2373 
2374 	if (tep->te_state != TS_UNBND) {
2375 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2376 		    SL_TRACE|SL_ERROR,
2377 		    "tl_wput:bind_request:out of state, state=%d",
2378 		    tep->te_state));
2379 		tli_err = TOUTSTATE;
2380 		goto error;
2381 	}
2382 
2383 	if (msz < sizeof (struct T_bind_req)) {
2384 		tli_err = TSYSERR; unix_err = EINVAL;
2385 		goto error;
2386 	}
2387 
2388 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2389 
2390 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2391 	    (bind->PRIM_type == T_BIND_REQ));
2392 
2393 	alen = bind->ADDR_length;
2394 	aoff = bind->ADDR_offset;
2395 
2396 	/* negotiate max conn req pending */
2397 	if (IS_COTS(tep)) {
2398 		qlen = bind->CONIND_number;
2399 		if (qlen > tl_maxqlen)
2400 			qlen = tl_maxqlen;
2401 	}
2402 
2403 	/*
2404 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2405 	 * and bound again.
2406 	 */
2407 	if ((tep->te_hash_hndl == NULL) &&
2408 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2409 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2410 	    &tep->te_hash_hndl) != 0) {
2411 		tli_err = TSYSERR; unix_err = ENOSR;
2412 		goto error;
2413 	}
2414 
2415 	/*
2416 	 * Verify address correctness.
2417 	 */
2418 	if (IS_SOCKET(tep)) {
2419 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2420 
2421 		if ((alen != TL_SOUX_ADDRLEN) ||
2422 		    (aoff < 0) ||
2423 		    (aoff + alen > msz)) {
2424 			(void) (STRLOG(TL_ID, tep->te_minor,
2425 			    1, SL_TRACE|SL_ERROR,
2426 			    "tl_bind: invalid socket addr"));
2427 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2428 			tli_err = TSYSERR; unix_err = EINVAL;
2429 			goto error;
2430 		}
2431 		/* Copy address from message to local buffer. */
2432 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2433 		/*
2434 		 * Check that we got correct address from sockets
2435 		 */
2436 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2437 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2438 			(void) (STRLOG(TL_ID, tep->te_minor,
2439 			    1, SL_TRACE|SL_ERROR,
2440 			    "tl_bind: invalid socket magic"));
2441 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2442 			tli_err = TSYSERR; unix_err = EINVAL;
2443 			goto error;
2444 		}
2445 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2446 		    (ux_addr.soua_vp != NULL)) {
2447 			(void) (STRLOG(TL_ID, tep->te_minor,
2448 			    1, SL_TRACE|SL_ERROR,
2449 			    "tl_bind: implicit addr non-empty"));
2450 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451 			tli_err = TSYSERR; unix_err = EINVAL;
2452 			goto error;
2453 		}
2454 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2455 		    (ux_addr.soua_vp == NULL)) {
2456 			(void) (STRLOG(TL_ID, tep->te_minor,
2457 			    1, SL_TRACE|SL_ERROR,
2458 			    "tl_bind: explicit addr empty"));
2459 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2460 			tli_err = TSYSERR; unix_err = EINVAL;
2461 			goto error;
2462 		}
2463 	} else {
2464 		if ((alen > 0) && ((aoff < 0) ||
2465 		    ((ssize_t)(aoff + alen) > msz) ||
2466 		    ((aoff + alen) < 0))) {
2467 			(void) (STRLOG(TL_ID, tep->te_minor,
2468 			    1, SL_TRACE|SL_ERROR,
2469 			    "tl_bind: invalid message"));
2470 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2471 			tli_err = TSYSERR; unix_err = EINVAL;
2472 			goto error;
2473 		}
2474 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2475 			(void) (STRLOG(TL_ID, tep->te_minor,
2476 			    1, SL_TRACE|SL_ERROR,
2477 			    "tl_bind: bad addr in  message"));
2478 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2479 			tli_err = TBADADDR;
2480 			goto error;
2481 		}
2482 #ifdef DEBUG
2483 		/*
2484 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2485 		 * if (! assertion)
2486 		 *	log warning;
2487 		 */
2488 		if (! ((alen == 0 && aoff == 0) ||
2489 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2490 			(void) (STRLOG(TL_ID, tep->te_minor,
2491 				    3, SL_TRACE|SL_ERROR,
2492 				    "tl_bind: addr overlaps TPI message"));
2493 		}
2494 #endif
2495 	}
2496 
2497 	/*
2498 	 * Bind the address provided or allocate one if requested.
2499 	 * Allow rebinds with a new qlen value.
2500 	 */
2501 	if (IS_SOCKET(tep)) {
2502 		/*
2503 		 * For anonymous requests the te_ap is already set up properly
2504 		 * so use minor number as an address.
2505 		 * For explicit requests need to check whether the address is
2506 		 * already in use.
2507 		 */
2508 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2509 			int rc;
2510 
2511 			if (tep->te_flag & TL_ADDRHASHED) {
2512 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2513 				if (tep->te_vp == ux_addr.soua_vp)
2514 					goto skip_addr_bind;
2515 				else /* Rebind to a new address. */
2516 					tl_addr_unbind(tep);
2517 			}
2518 			/*
2519 			 * Insert address in the hash if it is not already
2520 			 * there.  Since we use preallocated handle, the insert
2521 			 * can fail only if the key is already present.
2522 			 */
2523 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2524 			    (mod_hash_key_t)ux_addr.soua_vp,
2525 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2526 
2527 			if (rc != 0) {
2528 				ASSERT(rc == MH_ERR_DUPLICATE);
2529 				/*
2530 				 * Violate O_T_BIND_REQ semantics and fail with
2531 				 * TADDRBUSY - sockets will not use any address
2532 				 * other than supplied one for explicit binds.
2533 				 */
2534 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2535 				    SL_TRACE|SL_ERROR,
2536 				    "tl_bind:requested addr %p is busy",
2537 				    ux_addr.soua_vp));
2538 				tli_err = TADDRBUSY; unix_err = 0;
2539 				goto error;
2540 			}
2541 			tep->te_uxaddr = ux_addr;
2542 			tep->te_flag |= TL_ADDRHASHED;
2543 			tep->te_hash_hndl = NULL;
2544 		}
2545 	} else if (alen == 0) {
2546 		/*
2547 		 * assign any free address
2548 		 */
2549 		if (! tl_get_any_addr(tep, NULL)) {
2550 			(void) (STRLOG(TL_ID, tep->te_minor,
2551 			    1, SL_TRACE|SL_ERROR,
2552 			    "tl_bind:failed to get buffer for any "
2553 			    "address"));
2554 			tli_err = TSYSERR; unix_err = ENOSR;
2555 			goto error;
2556 		}
2557 	} else {
2558 		addr_req.ta_alen = alen;
2559 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2560 		addr_req.ta_zoneid = tep->te_zoneid;
2561 
2562 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2563 		if (tep->te_abuf == NULL) {
2564 			tli_err = TSYSERR; unix_err = ENOSR;
2565 			goto error;
2566 		}
2567 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2568 		tep->te_alen = alen;
2569 
2570 		if (mod_hash_insert_reserve(tep->te_addrhash,
2571 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2572 		    tep->te_hash_hndl) != 0) {
2573 			if (save_prim_type == T_BIND_REQ) {
2574 				/*
2575 				 * The bind semantics for this primitive
2576 				 * require a failure if the exact address
2577 				 * requested is busy
2578 				 */
2579 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2580 				    SL_TRACE|SL_ERROR,
2581 				    "tl_bind:requested addr is busy"));
2582 				tli_err = TADDRBUSY; unix_err = 0;
2583 				goto error;
2584 			}
2585 
2586 			/*
2587 			 * O_T_BIND_REQ semantics say if address if requested
2588 			 * address is busy, bind to any available free address
2589 			 */
2590 			if (! tl_get_any_addr(tep, &addr_req)) {
2591 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2592 				    SL_TRACE|SL_ERROR,
2593 				    "tl_bind:unable to get any addr buf"));
2594 				tli_err = TSYSERR; unix_err = ENOMEM;
2595 				goto error;
2596 			}
2597 		} else {
2598 			tep->te_flag |= TL_ADDRHASHED;
2599 			tep->te_hash_hndl = NULL;
2600 		}
2601 	}
2602 
2603 	ASSERT(tep->te_alen >= 0);
2604 
2605 skip_addr_bind:
2606 	/*
2607 	 * prepare T_BIND_ACK TPI message
2608 	 */
2609 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2610 	bamp = reallocb(mp, basize, 0);
2611 	if (bamp == NULL) {
2612 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2613 		    "tl_wput:tl_bind: allocb failed"));
2614 		/*
2615 		 * roll back state changes
2616 		 */
2617 		tl_addr_unbind(tep);
2618 		tep->te_state = TS_UNBND;
2619 		tl_memrecover(wq, mp, basize);
2620 		return;
2621 	}
2622 
2623 	DB_TYPE(bamp) = M_PCPROTO;
2624 	bamp->b_wptr = bamp->b_rptr + basize;
2625 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2626 	b_ack->PRIM_type = T_BIND_ACK;
2627 	b_ack->CONIND_number = qlen;
2628 	b_ack->ADDR_length = tep->te_alen;
2629 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2630 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2631 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2632 
2633 	if (IS_COTS(tep)) {
2634 		tep->te_qlen = qlen;
2635 		if (qlen > 0)
2636 			tep->te_flag |= TL_LISTENER;
2637 	}
2638 
2639 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2640 	/*
2641 	 * send T_BIND_ACK message
2642 	 */
2643 	(void) qreply(wq, bamp);
2644 	return;
2645 
2646 error:
2647 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2648 	if (ackmp == NULL) {
2649 		/*
2650 		 * roll back state changes
2651 		 */
2652 		tep->te_state = save_state;
2653 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2654 		return;
2655 	}
2656 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2657 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2658 }
2659 
2660 /*
2661  * Process T_UNBIND_REQ.
2662  * Called from serializer.
2663  */
2664 static void
2665 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2666 {
2667 	queue_t *wq;
2668 	mblk_t *ackmp;
2669 
2670 	if (tep->te_closing) {
2671 		freemsg(mp);
2672 		return;
2673 	}
2674 
2675 	wq = tep->te_wq;
2676 
2677 	/*
2678 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2679 	 * ==> allocate for T_ERROR_ACK (known max)
2680 	 */
2681 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2682 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2683 		return;
2684 	}
2685 	/*
2686 	 * memory resources committed
2687 	 * Note: no message validation. T_UNBIND_REQ message is
2688 	 * same size as PRIM_type field so already verified earlier.
2689 	 */
2690 
2691 	/*
2692 	 * validate state
2693 	 */
2694 	if (tep->te_state != TS_IDLE) {
2695 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2696 		    SL_TRACE|SL_ERROR,
2697 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2698 		    tep->te_state));
2699 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2700 		return;
2701 	}
2702 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2703 
2704 	/*
2705 	 * TPI says on T_UNBIND_REQ:
2706 	 *    send up a M_FLUSH to flush both
2707 	 *    read and write queues
2708 	 */
2709 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2710 
2711 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2712 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2713 
2714 		/*
2715 		 * Sockets use bind with qlen==0 followed by bind() to
2716 		 * the same address with qlen > 0 for listeners.
2717 		 * We allow rebind with a new qlen value.
2718 		 */
2719 		tl_addr_unbind(tep);
2720 	}
2721 
2722 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2723 	/*
2724 	 * send  T_OK_ACK
2725 	 */
2726 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2727 }
2728 
2729 
2730 /*
2731  * Option management code from drv/ip is used here
2732  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2733  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2734  *	However, that is what we want as that option is 'unorthodox'
2735  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2736  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2737  * Note2: use of optcom_req means this routine is an exception to
2738  *	 recovery from allocb() failures.
2739  */
2740 
2741 static void
2742 tl_optmgmt(queue_t *wq, mblk_t *mp)
2743 {
2744 	tl_endpt_t *tep;
2745 	mblk_t *ackmp;
2746 	union T_primitives *prim;
2747 
2748 	tep = (tl_endpt_t *)wq->q_ptr;
2749 	prim = (union T_primitives *)mp->b_rptr;
2750 
2751 	/*  all states OK for AF_UNIX options ? */
2752 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2753 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2754 		/*
2755 		 * Broken TLI semantics that options can only be managed
2756 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2757 		 * tests this TLI (mis)feature using this device driver.
2758 		 */
2759 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2760 		    SL_TRACE|SL_ERROR,
2761 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2762 		    tep->te_state));
2763 		/*
2764 		 * preallocate memory for T_ERROR_ACK
2765 		 */
2766 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2767 		if (! ackmp) {
2768 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2769 			return;
2770 		}
2771 
2772 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2773 		freemsg(mp);
2774 		return;
2775 	}
2776 
2777 	/*
2778 	 * call common option management routine from drv/ip
2779 	 */
2780 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2781 		(void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj,
2782 		    B_FALSE);
2783 	} else {
2784 		ASSERT(prim->type == T_OPTMGMT_REQ);
2785 		(void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj,
2786 		    B_FALSE);
2787 	}
2788 }
2789 
2790 /*
2791  * Handle T_conn_req - the driver part of accept().
2792  * If TL_SET[U]CRED generate the credentials options.
2793  * If this is a socket pass through options unmodified.
2794  * For sockets generate the T_CONN_CON here instead of
2795  * waiting for the T_CONN_RES.
2796  */
2797 static void
2798 tl_conn_req(queue_t *wq, mblk_t *mp)
2799 {
2800 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2801 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2802 	ssize_t			msz = MBLKL(mp);
2803 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2804 	tl_endpt_t		*peer_tep = NULL;
2805 	mblk_t			*ackmp;
2806 	mblk_t			*dimp;
2807 	struct T_discon_ind	*di;
2808 	soux_addr_t		ux_addr;
2809 	tl_addr_t		dst;
2810 
2811 	ASSERT(IS_COTS(tep));
2812 
2813 	if (tep->te_closing) {
2814 		freemsg(mp);
2815 		return;
2816 	}
2817 
2818 	/*
2819 	 * preallocate memory for:
2820 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2821 	 *	==> known max T_ERROR_ACK
2822 	 * 2. max of T_DISCON_IND and T_CONN_IND
2823 	 */
2824 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2825 	if (! ackmp) {
2826 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2827 		return;
2828 	}
2829 	/*
2830 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2831 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2832 	 */
2833 
2834 	if (tep->te_state != TS_IDLE) {
2835 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2836 		    SL_TRACE|SL_ERROR,
2837 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2838 		    tep->te_state));
2839 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2840 		freemsg(mp);
2841 		return;
2842 	}
2843 
2844 	/*
2845 	 * validate the message
2846 	 * Note: dereference fields in struct inside message only
2847 	 * after validating the message length.
2848 	 */
2849 	if (msz < sizeof (struct T_conn_req)) {
2850 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2851 		    "tl_conn_req:invalid message length"));
2852 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2853 		freemsg(mp);
2854 		return;
2855 	}
2856 	alen = creq->DEST_length;
2857 	aoff = creq->DEST_offset;
2858 	olen = creq->OPT_length;
2859 	ooff = creq->OPT_offset;
2860 	if (olen == 0)
2861 		ooff = 0;
2862 
2863 	if (IS_SOCKET(tep)) {
2864 		if ((alen != TL_SOUX_ADDRLEN) ||
2865 		    (aoff < 0) ||
2866 		    (aoff + alen > msz) ||
2867 		    (alen > msz - sizeof (struct T_conn_req))) {
2868 			(void) (STRLOG(TL_ID, tep->te_minor,
2869 				    1, SL_TRACE|SL_ERROR,
2870 				    "tl_conn_req: invalid socket addr"));
2871 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2872 			freemsg(mp);
2873 			return;
2874 		}
2875 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2876 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2877 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2878 			(void) (STRLOG(TL_ID, tep->te_minor,
2879 			    1, SL_TRACE|SL_ERROR,
2880 			    "tl_conn_req: invalid socket magic"));
2881 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2882 			freemsg(mp);
2883 			return;
2884 		}
2885 	} else {
2886 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2887 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2888 		    ooff + olen < 0)) ||
2889 		    olen < 0 || ooff < 0) {
2890 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2891 			    SL_TRACE|SL_ERROR,
2892 			    "tl_conn_req:invalid message"));
2893 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2894 			freemsg(mp);
2895 			return;
2896 		}
2897 
2898 		if (alen <= 0 || aoff < 0 ||
2899 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2900 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2901 				    SL_TRACE|SL_ERROR,
2902 				    "tl_conn_req:bad addr in message, "
2903 				    "alen=%d, msz=%ld",
2904 				    alen, msz));
2905 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2906 			freemsg(mp);
2907 			return;
2908 		}
2909 #ifdef DEBUG
2910 		/*
2911 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2912 		 * if (! assertion)
2913 		 *	log warning;
2914 		 */
2915 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2916 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2917 			    SL_TRACE|SL_ERROR,
2918 			    "tl_conn_req: addr overlaps TPI message"));
2919 		}
2920 #endif
2921 		if (olen) {
2922 			/*
2923 			 * no opts in connect req
2924 			 * supported in this provider except for sockets.
2925 			 */
2926 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2927 			    SL_TRACE|SL_ERROR,
2928 			    "tl_conn_req:options not supported "
2929 			    "in message"));
2930 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2931 			freemsg(mp);
2932 			return;
2933 		}
2934 	}
2935 
2936 	/*
2937 	 * Prevent tep from closing on us.
2938 	 */
2939 	if (! tl_noclose(tep)) {
2940 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2941 		    "tl_conn_req:endpoint is closing"));
2942 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2943 		freemsg(mp);
2944 		return;
2945 	}
2946 
2947 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2948 	/*
2949 	 * get endpoint to connect to
2950 	 * check that peer with DEST addr is bound to addr
2951 	 * and has CONIND_number > 0
2952 	 */
2953 	dst.ta_alen = alen;
2954 	dst.ta_abuf = mp->b_rptr + aoff;
2955 	dst.ta_zoneid = tep->te_zoneid;
2956 
2957 	/*
2958 	 * Verify if remote addr is in use
2959 	 */
2960 	peer_tep = (IS_SOCKET(tep) ?
2961 	    tl_sock_find_peer(tep, &ux_addr) :
2962 	    tl_find_peer(tep, &dst));
2963 
2964 	if (peer_tep == NULL) {
2965 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2966 		    "tl_conn_req:no one at connect address"));
2967 		err = ECONNREFUSED;
2968 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2969 		/*
2970 		 * validate that number of incoming connection is
2971 		 * not to capacity on destination endpoint
2972 		 */
2973 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2974 		    "tl_conn_req: qlen overflow connection refused"));
2975 			err = ECONNREFUSED;
2976 	}
2977 
2978 	/*
2979 	 * Send T_DISCON_IND in case of error
2980 	 */
2981 	if (err != 0) {
2982 		if (peer_tep != NULL)
2983 			tl_refrele(peer_tep);
2984 		/* We are still expected to send T_OK_ACK */
2985 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2986 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2987 		tl_closeok(tep);
2988 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2989 		    M_PROTO, T_DISCON_IND);
2990 		if (dimp == NULL) {
2991 			tl_merror(wq, NULL, ENOSR);
2992 			return;
2993 		}
2994 		di = (struct T_discon_ind *)dimp->b_rptr;
2995 		di->DISCON_reason = err;
2996 		di->SEQ_number = BADSEQNUM;
2997 
2998 		tep->te_state = TS_IDLE;
2999 		/*
3000 		 * send T_DISCON_IND message
3001 		 */
3002 		putnext(tep->te_rq, dimp);
3003 		return;
3004 	}
3005 
3006 	ASSERT(IS_COTS(peer_tep));
3007 
3008 	/*
3009 	 * Found the listener. At this point processing will continue on
3010 	 * listener serializer. Close of the endpoint should be blocked while we
3011 	 * switch serializers.
3012 	 */
3013 	tl_serializer_refhold(peer_tep->te_ser);
3014 	tl_serializer_refrele(tep->te_ser);
3015 	tep->te_ser = peer_tep->te_ser;
3016 	ASSERT(tep->te_oconp == NULL);
3017 	tep->te_oconp = peer_tep;
3018 
3019 	/*
3020 	 * It is safe to close now. Close may continue on listener serializer.
3021 	 */
3022 	tl_closeok(tep);
3023 
3024 	/*
3025 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3026 	 * data, so we link mp to ackmp.
3027 	 */
3028 	ackmp->b_cont = mp;
3029 	mp = ackmp;
3030 
3031 	tl_refhold(tep);
3032 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3033 }
3034 
3035 /*
3036  * Finish T_CONN_REQ processing on listener serializer.
3037  */
3038 static void
3039 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3040 {
3041 	queue_t		*wq;
3042 	tl_endpt_t	*peer_tep = tep->te_oconp;
3043 	mblk_t		*confmp, *cimp, *indmp;
3044 	void		*opts = NULL;
3045 	mblk_t		*ackmp = mp;
3046 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3047 	struct T_conn_ind	*ci;
3048 	tl_icon_t	*tip;
3049 	void		*addr_startp;
3050 	t_scalar_t	olen = creq->OPT_length;
3051 	t_scalar_t	ooff = creq->OPT_offset;
3052 	size_t 		ci_msz;
3053 	size_t		size;
3054 
3055 	if (tep->te_closing) {
3056 		TL_UNCONNECT(tep->te_oconp);
3057 		tl_serializer_exit(tep);
3058 		tl_refrele(tep);
3059 		freemsg(mp);
3060 		return;
3061 	}
3062 
3063 	wq = tep->te_wq;
3064 	tep->te_flag |= TL_EAGER;
3065 
3066 	/*
3067 	 * Extract preallocated ackmp from mp.
3068 	 */
3069 	mp = mp->b_cont;
3070 	ackmp->b_cont = NULL;
3071 
3072 	if (olen == 0)
3073 		ooff = 0;
3074 
3075 	if (peer_tep->te_closing ||
3076 	    !((peer_tep->te_state == TS_IDLE) ||
3077 	    (peer_tep->te_state == TS_WRES_CIND))) {
3078 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3079 		    "tl_conn_req:peer in bad state (%d)",
3080 		    peer_tep->te_state));
3081 		TL_UNCONNECT(tep->te_oconp);
3082 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3083 		freemsg(ackmp);
3084 		tl_serializer_exit(tep);
3085 		tl_refrele(tep);
3086 		return;
3087 	}
3088 
3089 	/*
3090 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3091 	 */
3092 	/*
3093 	 * calculate length of T_CONN_IND message
3094 	 */
3095 	if (peer_tep->te_flag & TL_SETCRED) {
3096 		ooff = 0;
3097 		olen = (t_scalar_t) sizeof (struct opthdr) +
3098 		    OPTLEN(sizeof (tl_credopt_t));
3099 		/* 1 option only */
3100 	} else if (peer_tep->te_flag & TL_SETUCRED) {
3101 		ooff = 0;
3102 		olen = (t_scalar_t)sizeof (struct opthdr) +
3103 		    OPTLEN(ucredsize);
3104 		/* 1 option only */
3105 	}
3106 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3107 	ci_msz = T_ALIGN(ci_msz) + olen;
3108 	size = max(ci_msz, sizeof (struct T_discon_ind));
3109 
3110 	/*
3111 	 * Save options from mp - we'll need them for T_CONN_IND.
3112 	 */
3113 	if (ooff != 0) {
3114 		opts = kmem_alloc(olen, KM_NOSLEEP);
3115 		if (opts == NULL) {
3116 			/*
3117 			 * roll back state changes
3118 			 */
3119 			tep->te_state = TS_IDLE;
3120 			tl_memrecover(wq, mp, size);
3121 			freemsg(ackmp);
3122 			TL_UNCONNECT(tep->te_oconp);
3123 			tl_serializer_exit(tep);
3124 			tl_refrele(tep);
3125 			return;
3126 		}
3127 		/* Copy options to a temp buffer */
3128 		bcopy(mp->b_rptr + ooff, opts, olen);
3129 	}
3130 
3131 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3132 		/*
3133 		 * Generate a T_CONN_CON that has the identical address
3134 		 * (and options) as the T_CONN_REQ.
3135 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3136 		 * are isomorphic.
3137 		 */
3138 		confmp = copyb(mp);
3139 		if (! confmp) {
3140 			/*
3141 			 * roll back state changes
3142 			 */
3143 			tep->te_state = TS_IDLE;
3144 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3145 			freemsg(ackmp);
3146 			if (opts != NULL)
3147 				kmem_free(opts, olen);
3148 			TL_UNCONNECT(tep->te_oconp);
3149 			tl_serializer_exit(tep);
3150 			tl_refrele(tep);
3151 			return;
3152 		}
3153 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3154 		    T_CONN_CON;
3155 	} else {
3156 		confmp = NULL;
3157 	}
3158 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3159 		/*
3160 		 * roll back state changes
3161 		 */
3162 		tep->te_state = TS_IDLE;
3163 		tl_memrecover(wq, mp, size);
3164 		freemsg(ackmp);
3165 		if (opts != NULL)
3166 			kmem_free(opts, olen);
3167 		freemsg(confmp);
3168 		TL_UNCONNECT(tep->te_oconp);
3169 		tl_serializer_exit(tep);
3170 		tl_refrele(tep);
3171 		return;
3172 	}
3173 
3174 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3175 	if (tip == NULL) {
3176 		/*
3177 		 * roll back state changes
3178 		 */
3179 		tep->te_state = TS_IDLE;
3180 		tl_memrecover(wq, indmp, sizeof (*tip));
3181 		freemsg(ackmp);
3182 		if (opts != NULL)
3183 			kmem_free(opts, olen);
3184 		freemsg(confmp);
3185 		TL_UNCONNECT(tep->te_oconp);
3186 		tl_serializer_exit(tep);
3187 		tl_refrele(tep);
3188 		return;
3189 	}
3190 	tip->ti_mp = NULL;
3191 
3192 	/*
3193 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3194 	 * and tl_icon_t cell.
3195 	 */
3196 
3197 	/*
3198 	 * ack validity of request and send the peer credential in the ACK.
3199 	 */
3200 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3201 
3202 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3203 	    confmp != NULL) {
3204 		mblk_setcred(confmp, peer_tep->te_credp);
3205 		DB_CPID(confmp) = peer_tep->te_cpid;
3206 	}
3207 
3208 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3209 
3210 	/*
3211 	 * prepare message to send T_CONN_IND
3212 	 */
3213 	/*
3214 	 * allocate the message - original data blocks retained
3215 	 * in the returned mblk
3216 	 */
3217 	cimp = tl_resizemp(indmp, size);
3218 	if (! cimp) {
3219 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3220 		    "tl_conn_req:con_ind:allocb failure"));
3221 		tl_merror(wq, indmp, ENOMEM);
3222 		TL_UNCONNECT(tep->te_oconp);
3223 		tl_serializer_exit(tep);
3224 		tl_refrele(tep);
3225 		if (opts != NULL)
3226 			kmem_free(opts, olen);
3227 		freemsg(confmp);
3228 		ASSERT(tip->ti_mp == NULL);
3229 		kmem_free(tip, sizeof (*tip));
3230 		return;
3231 	}
3232 
3233 	DB_TYPE(cimp) = M_PROTO;
3234 	ci = (struct T_conn_ind *)cimp->b_rptr;
3235 	ci->PRIM_type  = T_CONN_IND;
3236 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3237 	ci->SRC_length = tep->te_alen;
3238 	ci->SEQ_number = tep->te_seqno;
3239 
3240 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3241 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3242 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3243 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3244 		    ci->SRC_length);
3245 		ci->OPT_length = olen; /* because only 1 option */
3246 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3247 		    DB_CREDDEF(cimp, tep->te_credp),
3248 		    TLPID(cimp, tep),
3249 		    peer_tep->te_flag, peer_tep->te_credp);
3250 	} else if (ooff != 0) {
3251 		/* Copy option from T_CONN_REQ */
3252 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3253 		    ci->SRC_length);
3254 		ci->OPT_length = olen;
3255 		ASSERT(opts != NULL);
3256 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3257 	} else {
3258 		ci->OPT_offset = 0;
3259 		ci->OPT_length = 0;
3260 	}
3261 	if (opts != NULL)
3262 		kmem_free(opts, olen);
3263 
3264 	/*
3265 	 * register connection request with server peer
3266 	 * append to list of incoming connections
3267 	 * increment references for both peer_tep and tep: peer_tep is placed on
3268 	 * te_oconp and tep is placed on listeners queue.
3269 	 */
3270 	tip->ti_tep = tep;
3271 	tip->ti_seqno = tep->te_seqno;
3272 	list_insert_tail(&peer_tep->te_iconp, tip);
3273 	peer_tep->te_nicon++;
3274 
3275 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3276 	/*
3277 	 * send the T_CONN_IND message
3278 	 */
3279 	putnext(peer_tep->te_rq, cimp);
3280 
3281 	/*
3282 	 * Send a T_CONN_CON message for sockets.
3283 	 * Disable the queues until we have reached the correct state!
3284 	 */
3285 	if (confmp != NULL) {
3286 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3287 		noenable(wq);
3288 		putnext(tep->te_rq, confmp);
3289 	}
3290 	/*
3291 	 * Now we need to increment tep reference because tep is referenced by
3292 	 * server list of pending connections. We also need to decrement
3293 	 * reference before exiting serializer. Two operations void each other
3294 	 * so we don't modify reference at all.
3295 	 */
3296 	ASSERT(tep->te_refcnt >= 2);
3297 	ASSERT(peer_tep->te_refcnt >= 2);
3298 	tl_serializer_exit(tep);
3299 }
3300 
3301 
3302 
3303 /*
3304  * Handle T_conn_res on listener stream. Called on listener serializer.
3305  * tl_conn_req has already generated the T_CONN_CON.
3306  * tl_conn_res is called on listener serializer.
3307  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3308  * Switch eager serializer to acceptor's.
3309  *
3310  * If TL_SET[U]CRED generate the credentials options.
3311  * For sockets tl_conn_req has already generated the T_CONN_CON.
3312  */
3313 static void
3314 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3315 {
3316 	queue_t			*wq;
3317 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3318 	ssize_t			msz = MBLKL(mp);
3319 	t_scalar_t		olen, ooff, err = 0;
3320 	t_scalar_t		prim = cres->PRIM_type;
3321 	uchar_t			*addr_startp;
3322 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3323 	tl_icon_t		*tip;
3324 	size_t			size;
3325 	mblk_t			*ackmp, *respmp;
3326 	mblk_t			*dimp, *ccmp = NULL;
3327 	struct T_discon_ind	*di;
3328 	struct T_conn_con	*cc;
3329 	boolean_t		client_noclose_set = B_FALSE;
3330 	boolean_t		switch_client_serializer = B_TRUE;
3331 
3332 	ASSERT(IS_COTS(tep));
3333 
3334 	if (tep->te_closing) {
3335 		freemsg(mp);
3336 		return;
3337 	}
3338 
3339 	wq = tep->te_wq;
3340 
3341 	/*
3342 	 * preallocate memory for:
3343 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3344 	 *	==> known max T_ERROR_ACK
3345 	 * 2. max of T_DISCON_IND and T_CONN_CON
3346 	 */
3347 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3348 	if (! ackmp) {
3349 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3350 		return;
3351 	}
3352 	/*
3353 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3354 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3355 	 */
3356 
3357 
3358 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3359 
3360 	/*
3361 	 * validate state
3362 	 */
3363 	if (tep->te_state != TS_WRES_CIND) {
3364 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3365 		    SL_TRACE|SL_ERROR,
3366 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3367 		    tep->te_state));
3368 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3369 		freemsg(mp);
3370 		return;
3371 	}
3372 
3373 	/*
3374 	 * validate the message
3375 	 * Note: dereference fields in struct inside message only
3376 	 * after validating the message length.
3377 	 */
3378 	if (msz < sizeof (struct T_conn_res)) {
3379 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3380 		    "tl_conn_res:invalid message length"));
3381 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3382 		freemsg(mp);
3383 		return;
3384 	}
3385 	olen = cres->OPT_length;
3386 	ooff = cres->OPT_offset;
3387 	if (((olen > 0) && ((ooff + olen) > msz))) {
3388 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3389 		    "tl_conn_res:invalid message"));
3390 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3391 		freemsg(mp);
3392 		return;
3393 	}
3394 	if (olen) {
3395 		/*
3396 		 * no opts in connect res
3397 		 * supported in this provider
3398 		 */
3399 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3400 		    "tl_conn_res:options not supported in message"));
3401 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3402 		freemsg(mp);
3403 		return;
3404 	}
3405 
3406 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3407 	ASSERT(tep->te_state == TS_WACK_CRES);
3408 
3409 	if (cres->SEQ_number < TL_MINOR_START &&
3410 	    cres->SEQ_number >= BADSEQNUM) {
3411 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3412 		    "tl_conn_res:remote endpoint sequence number bad"));
3413 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3414 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3415 		freemsg(mp);
3416 		return;
3417 	}
3418 
3419 	/*
3420 	 * find accepting endpoint. Will have extra reference if found.
3421 	 */
3422 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3423 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3424 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3425 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3426 		    "tl_conn_res:bad accepting endpoint"));
3427 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3428 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3429 		freemsg(mp);
3430 		return;
3431 	}
3432 
3433 	/*
3434 	 * Prevent acceptor from closing.
3435 	 */
3436 	if (! tl_noclose(acc_ep)) {
3437 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3438 		    "tl_conn_res:bad accepting endpoint"));
3439 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3440 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3441 		tl_refrele(acc_ep);
3442 		freemsg(mp);
3443 		return;
3444 	}
3445 
3446 	acc_ep->te_flag |= TL_ACCEPTOR;
3447 
3448 	/*
3449 	 * validate that accepting endpoint, if different from listening
3450 	 * has address bound => state is TS_IDLE
3451 	 * TROUBLE in XPG4 !!?
3452 	 */
3453 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3454 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3455 		    "tl_conn_res:accepting endpoint has no address bound,"
3456 		    "state=%d", acc_ep->te_state));
3457 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3458 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3459 		freemsg(mp);
3460 		tl_closeok(acc_ep);
3461 		tl_refrele(acc_ep);
3462 		return;
3463 	}
3464 
3465 	/*
3466 	 * validate if accepting endpt same as listening, then
3467 	 * no other incoming connection should be on the queue
3468 	 */
3469 
3470 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3471 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3472 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3473 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3474 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3475 		freemsg(mp);
3476 		tl_closeok(acc_ep);
3477 		tl_refrele(acc_ep);
3478 		return;
3479 	}
3480 
3481 	/*
3482 	 * Mark for deletion, the entry corresponding to client
3483 	 * on list of pending connections made by the listener
3484 	 *  search list to see if client is one of the
3485 	 * recorded as a listener.
3486 	 */
3487 	tip = tl_icon_find(tep, cres->SEQ_number);
3488 	if (tip == NULL) {
3489 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3490 		    "tl_conn_res:no client in listener list"));
3491 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3492 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3493 		freemsg(mp);
3494 		tl_closeok(acc_ep);
3495 		tl_refrele(acc_ep);
3496 		return;
3497 	}
3498 
3499 	/*
3500 	 * If ti_tep is NULL the client has already closed. In this case
3501 	 * the code below will avoid any action on the client side
3502 	 * but complete the server and acceptor state transitions.
3503 	 */
3504 	ASSERT(tip->ti_tep == NULL ||
3505 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3506 	cl_ep = tip->ti_tep;
3507 
3508 	/*
3509 	 * If the client is present it is switched from listener's to acceptor's
3510 	 * serializer. We should block client closes while serializers are
3511 	 * being switched.
3512 	 *
3513 	 * It is possible that the client is present but is currently being
3514 	 * closed. There are two possible cases:
3515 	 *
3516 	 * 1) The client has already entered tl_close_finish_ser() and sent
3517 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3518 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3519 	 *
3520 	 * 2) The client started the close but has not entered
3521 	 *    tl_close_finish_ser() yet. In this case, the client is already
3522 	 *    proceeding asynchronously on the listener's serializer, so we're
3523 	 *    forced to change the acceptor to use the listener's serializer to
3524 	 *    ensure that any operations on the acceptor are serialized with
3525 	 *    respect to the close that's in-progress.
3526 	 */
3527 	if (cl_ep != NULL) {
3528 		if (tl_noclose(cl_ep)) {
3529 			client_noclose_set = B_TRUE;
3530 		} else {
3531 			/*
3532 			 * Client is closing. If it it has sent the
3533 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3534 			 * we have to let let the client continue until it is
3535 			 * sent.
3536 			 *
3537 			 * If we do continue using the client, acceptor will
3538 			 * switch to client's serializer which is used by client
3539 			 * for its close.
3540 			 */
3541 			tl_client_closing_when_accepting++;
3542 			switch_client_serializer = B_FALSE;
3543 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3544 			    cl_ep->te_state == -1)
3545 				cl_ep = NULL;
3546 		}
3547 	}
3548 
3549 	if (cl_ep != NULL) {
3550 		/*
3551 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3552 		 * (latter for sockets only)
3553 		 */
3554 		if (cl_ep->te_state != TS_WCON_CREQ &&
3555 		    (cl_ep->te_state != TS_DATA_XFER &&
3556 		    IS_SOCKET(cl_ep))) {
3557 			err = ECONNREFUSED;
3558 			/*
3559 			 * T_DISCON_IND sent later after committing memory
3560 			 * and acking validity of request
3561 			 */
3562 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3563 			    "tl_conn_res:peer in bad state"));
3564 		}
3565 
3566 		/*
3567 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3568 		 * ack validity of request (T_OK_ACK) after memory committed
3569 		 */
3570 
3571 		if (err)
3572 			size = sizeof (struct T_discon_ind);
3573 		else {
3574 			/*
3575 			 * calculate length of T_CONN_CON message
3576 			 */
3577 			olen = 0;
3578 			if (cl_ep->te_flag & TL_SETCRED) {
3579 				olen = (t_scalar_t)sizeof (struct opthdr) +
3580 				    OPTLEN(sizeof (tl_credopt_t));
3581 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3582 				olen = (t_scalar_t)sizeof (struct opthdr) +
3583 				    OPTLEN(ucredsize);
3584 			}
3585 			size = T_ALIGN(sizeof (struct T_conn_con) +
3586 			    acc_ep->te_alen) + olen;
3587 		}
3588 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3589 			/*
3590 			 * roll back state changes
3591 			 */
3592 			tep->te_state = TS_WRES_CIND;
3593 			tl_memrecover(wq, mp, size);
3594 			freemsg(ackmp);
3595 			if (client_noclose_set)
3596 				tl_closeok(cl_ep);
3597 			tl_closeok(acc_ep);
3598 			tl_refrele(acc_ep);
3599 			return;
3600 		}
3601 		mp = NULL;
3602 	}
3603 
3604 	/*
3605 	 * Now ack validity of request
3606 	 */
3607 	if (tep->te_nicon == 1) {
3608 		if (tep == acc_ep)
3609 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3610 		else
3611 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3612 	} else
3613 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3614 
3615 	/*
3616 	 * send T_DISCON_IND now if client state validation failed earlier
3617 	 */
3618 	if (err) {
3619 		tl_ok_ack(wq, ackmp, prim);
3620 		/*
3621 		 * flush the queues - why always ?
3622 		 */
3623 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3624 
3625 		dimp = tl_resizemp(respmp, size);
3626 		if (! dimp) {
3627 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3628 			    SL_TRACE|SL_ERROR,
3629 			    "tl_conn_res:con_ind:allocb failure"));
3630 			tl_merror(wq, respmp, ENOMEM);
3631 			tl_closeok(acc_ep);
3632 			if (client_noclose_set)
3633 				tl_closeok(cl_ep);
3634 			tl_refrele(acc_ep);
3635 			return;
3636 		}
3637 		if (dimp->b_cont) {
3638 			/* no user data in provider generated discon ind */
3639 			freemsg(dimp->b_cont);
3640 			dimp->b_cont = NULL;
3641 		}
3642 
3643 		DB_TYPE(dimp) = M_PROTO;
3644 		di = (struct T_discon_ind *)dimp->b_rptr;
3645 		di->PRIM_type  = T_DISCON_IND;
3646 		di->DISCON_reason = err;
3647 		di->SEQ_number = BADSEQNUM;
3648 
3649 		tep->te_state = TS_IDLE;
3650 		/*
3651 		 * send T_DISCON_IND message
3652 		 */
3653 		putnext(acc_ep->te_rq, dimp);
3654 		if (client_noclose_set)
3655 			tl_closeok(cl_ep);
3656 		tl_closeok(acc_ep);
3657 		tl_refrele(acc_ep);
3658 		return;
3659 	}
3660 
3661 	/*
3662 	 * now start connecting the accepting endpoint
3663 	 */
3664 	if (tep != acc_ep)
3665 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3666 
3667 	if (cl_ep == NULL) {
3668 		/*
3669 		 * The client has already closed. Send up any queued messages
3670 		 * and change the state accordingly.
3671 		 */
3672 		tl_ok_ack(wq, ackmp, prim);
3673 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3674 
3675 		/*
3676 		 * remove endpoint from incoming connection
3677 		 * delete client from list of incoming connections
3678 		 */
3679 		tl_freetip(tep, tip);
3680 		freemsg(mp);
3681 		tl_closeok(acc_ep);
3682 		tl_refrele(acc_ep);
3683 		return;
3684 	} else if (tip->ti_mp != NULL) {
3685 		/*
3686 		 * The client could have queued a T_DISCON_IND which needs
3687 		 * to be sent up.
3688 		 * Note that t_discon_req can not operate the same as
3689 		 * t_data_req since it is not possible for it to putbq
3690 		 * the message and return -1 due to the use of qwriter.
3691 		 */
3692 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3693 	}
3694 
3695 	/*
3696 	 * prepare connect confirm T_CONN_CON message
3697 	 */
3698 
3699 	/*
3700 	 * allocate the message - original data blocks
3701 	 * retained in the returned mblk
3702 	 */
3703 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3704 		ccmp = tl_resizemp(respmp, size);
3705 		if (ccmp == NULL) {
3706 			tl_ok_ack(wq, ackmp, prim);
3707 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3708 			    SL_TRACE|SL_ERROR,
3709 			    "tl_conn_res:conn_con:allocb failure"));
3710 			tl_merror(wq, respmp, ENOMEM);
3711 			tl_closeok(acc_ep);
3712 			if (client_noclose_set)
3713 				tl_closeok(cl_ep);
3714 			tl_refrele(acc_ep);
3715 			return;
3716 		}
3717 
3718 		DB_TYPE(ccmp) = M_PROTO;
3719 		cc = (struct T_conn_con *)ccmp->b_rptr;
3720 		cc->PRIM_type  = T_CONN_CON;
3721 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3722 		cc->RES_length = acc_ep->te_alen;
3723 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3724 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3725 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3726 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3727 			    cc->RES_length);
3728 			cc->OPT_length = olen;
3729 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3730 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3731 			    cl_ep->te_credp);
3732 		} else {
3733 			cc->OPT_offset = 0;
3734 			cc->OPT_length = 0;
3735 		}
3736 		/*
3737 		 * Forward the credential in the packet so it can be picked up
3738 		 * at the higher layers for more complete credential processing
3739 		 */
3740 		mblk_setcred(ccmp, acc_ep->te_credp);
3741 		DB_CPID(ccmp) = acc_ep->te_cpid;
3742 	} else {
3743 		freemsg(respmp);
3744 		respmp = NULL;
3745 	}
3746 
3747 	/*
3748 	 * make connection linking
3749 	 * accepting and client endpoints
3750 	 * No need to increment references:
3751 	 *	on client: it should already have one from tip->ti_tep linkage.
3752 	 *	on acceptor is should already have one from the table lookup.
3753 	 *
3754 	 * At this point both client and acceptor can't close. Set client
3755 	 * serializer to acceptor's.
3756 	 */
3757 	ASSERT(cl_ep->te_refcnt >= 2);
3758 	ASSERT(acc_ep->te_refcnt >= 2);
3759 	ASSERT(cl_ep->te_conp == NULL);
3760 	ASSERT(acc_ep->te_conp == NULL);
3761 	cl_ep->te_conp = acc_ep;
3762 	acc_ep->te_conp = cl_ep;
3763 	ASSERT(cl_ep->te_ser == tep->te_ser);
3764 	if (switch_client_serializer) {
3765 		mutex_enter(&cl_ep->te_ser_lock);
3766 		if (cl_ep->te_ser_count > 0) {
3767 			switch_client_serializer = B_FALSE;
3768 			tl_serializer_noswitch++;
3769 		} else {
3770 			/*
3771 			 * Move client to the acceptor's serializer.
3772 			 */
3773 			tl_serializer_refhold(acc_ep->te_ser);
3774 			tl_serializer_refrele(cl_ep->te_ser);
3775 			cl_ep->te_ser = acc_ep->te_ser;
3776 		}
3777 		mutex_exit(&cl_ep->te_ser_lock);
3778 	}
3779 	if (!switch_client_serializer) {
3780 		/*
3781 		 * It is not possible to switch client to use acceptor's.
3782 		 * Move acceptor to client's serializer (which is the same as
3783 		 * listener's).
3784 		 */
3785 		tl_serializer_refhold(cl_ep->te_ser);
3786 		tl_serializer_refrele(acc_ep->te_ser);
3787 		acc_ep->te_ser = cl_ep->te_ser;
3788 	}
3789 
3790 	TL_REMOVE_PEER(cl_ep->te_oconp);
3791 	TL_REMOVE_PEER(acc_ep->te_oconp);
3792 
3793 	/*
3794 	 * remove endpoint from incoming connection
3795 	 * delete client from list of incoming connections
3796 	 */
3797 	tip->ti_tep = NULL;
3798 	tl_freetip(tep, tip);
3799 	tl_ok_ack(wq, ackmp, prim);
3800 
3801 	/*
3802 	 * data blocks already linked in reallocb()
3803 	 */
3804 
3805 	/*
3806 	 * link queues so that I_SENDFD will work
3807 	 */
3808 	if (! IS_SOCKET(tep)) {
3809 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3810 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3811 	}
3812 
3813 	/*
3814 	 * send T_CONN_CON up on client side unless it was already
3815 	 * done (for a socket). In cases any data or ordrel req has been
3816 	 * queued make sure that the service procedure runs.
3817 	 */
3818 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3819 		enableok(cl_ep->te_wq);
3820 		TL_QENABLE(cl_ep);
3821 		if (ccmp != NULL)
3822 			freemsg(ccmp);
3823 	} else {
3824 		/*
3825 		 * change client state on TE_CONN_CON event
3826 		 */
3827 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3828 		putnext(cl_ep->te_rq, ccmp);
3829 	}
3830 
3831 	/* Mark the both endpoints as accepted */
3832 	cl_ep->te_flag |= TL_ACCEPTED;
3833 	acc_ep->te_flag |= TL_ACCEPTED;
3834 
3835 	/*
3836 	 * Allow client and acceptor to close.
3837 	 */
3838 	tl_closeok(acc_ep);
3839 	if (client_noclose_set)
3840 		tl_closeok(cl_ep);
3841 }
3842 
3843 
3844 
3845 
3846 static void
3847 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3848 {
3849 	queue_t			*wq;
3850 	struct T_discon_req	*dr;
3851 	ssize_t			msz;
3852 	tl_endpt_t		*peer_tep = tep->te_conp;
3853 	tl_endpt_t		*srv_tep = tep->te_oconp;
3854 	tl_icon_t		*tip;
3855 	size_t			size;
3856 	mblk_t			*ackmp, *dimp, *respmp;
3857 	struct T_discon_ind	*di;
3858 	t_scalar_t		save_state, new_state;
3859 
3860 	if (tep->te_closing) {
3861 		freemsg(mp);
3862 		return;
3863 	}
3864 
3865 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3866 		TL_UNCONNECT(tep->te_conp);
3867 		peer_tep = NULL;
3868 	}
3869 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3870 		TL_UNCONNECT(tep->te_oconp);
3871 		srv_tep = NULL;
3872 	}
3873 
3874 	wq = tep->te_wq;
3875 
3876 	/*
3877 	 * preallocate memory for:
3878 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3879 	 *	==> known max T_ERROR_ACK
3880 	 * 2. for  T_DISCON_IND
3881 	 */
3882 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3883 	if (! ackmp) {
3884 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3885 		return;
3886 	}
3887 	/*
3888 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3889 	 * will be committed for T_DISCON_IND  later
3890 	 */
3891 
3892 	dr = (struct T_discon_req *)mp->b_rptr;
3893 	msz = MBLKL(mp);
3894 
3895 	/*
3896 	 * validate the state
3897 	 */
3898 	save_state = new_state = tep->te_state;
3899 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3900 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3901 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3902 		    SL_TRACE|SL_ERROR,
3903 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
3904 		    tep->te_state));
3905 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3906 		freemsg(mp);
3907 		return;
3908 	}
3909 	/*
3910 	 * Defer committing the state change until it is determined if
3911 	 * the message will be queued with the tl_icon or not.
3912 	 */
3913 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3914 
3915 	/* validate the message */
3916 	if (msz < sizeof (struct T_discon_req)) {
3917 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3918 		    "tl_discon_req:invalid message"));
3919 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3920 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3921 		freemsg(mp);
3922 		return;
3923 	}
3924 
3925 	/*
3926 	 * if server, then validate that client exists
3927 	 * by connection sequence number etc.
3928 	 */
3929 	if (tep->te_nicon > 0) { /* server */
3930 
3931 		/*
3932 		 * search server list for disconnect client
3933 		 */
3934 		tip = tl_icon_find(tep, dr->SEQ_number);
3935 		if (tip == NULL) {
3936 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3937 			    SL_TRACE|SL_ERROR,
3938 			    "tl_discon_req:no disconnect endpoint"));
3939 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3940 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3941 			freemsg(mp);
3942 			return;
3943 		}
3944 		/*
3945 		 * If ti_tep is NULL the client has already closed. In this case
3946 		 * the code below will avoid any action on the client side.
3947 		 */
3948 
3949 		ASSERT(IMPLY(tip->ti_tep != NULL,
3950 		    tip->ti_tep->te_seqno == dr->SEQ_number));
3951 		peer_tep = tip->ti_tep;
3952 	}
3953 
3954 	/*
3955 	 * preallocate now for T_DISCON_IND
3956 	 * ack validity of request (T_OK_ACK) after memory committed
3957 	 */
3958 	size = sizeof (struct T_discon_ind);
3959 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3960 		tl_memrecover(wq, mp, size);
3961 		freemsg(ackmp);
3962 		return;
3963 	}
3964 
3965 	/*
3966 	 * prepare message to ack validity of request
3967 	 */
3968 	if (tep->te_nicon == 0)
3969 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3970 	else
3971 		if (tep->te_nicon == 1)
3972 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3973 		else
3974 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3975 
3976 	/*
3977 	 * Flushing queues according to TPI. Using the old state.
3978 	 */
3979 	if ((tep->te_nicon <= 1) &&
3980 	    ((save_state == TS_DATA_XFER) ||
3981 	    (save_state == TS_WIND_ORDREL) ||
3982 	    (save_state == TS_WREQ_ORDREL)))
3983 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3984 
3985 	/* send T_OK_ACK up  */
3986 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3987 
3988 	/*
3989 	 * now do disconnect business
3990 	 */
3991 	if (tep->te_nicon > 0) { /* listener */
3992 		if (peer_tep != NULL && !peer_tep->te_closing) {
3993 			/*
3994 			 * disconnect incoming connect request pending to tep
3995 			 */
3996 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
3997 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
3998 				    SL_TRACE|SL_ERROR,
3999 				    "tl_discon_req: reallocb failed"));
4000 				tep->te_state = new_state;
4001 				tl_merror(wq, respmp, ENOMEM);
4002 				return;
4003 			}
4004 			di = (struct T_discon_ind *)dimp->b_rptr;
4005 			di->SEQ_number = BADSEQNUM;
4006 			save_state = peer_tep->te_state;
4007 			peer_tep->te_state = TS_IDLE;
4008 
4009 			TL_REMOVE_PEER(peer_tep->te_oconp);
4010 			enableok(peer_tep->te_wq);
4011 			TL_QENABLE(peer_tep);
4012 		} else {
4013 			freemsg(respmp);
4014 			dimp = NULL;
4015 		}
4016 
4017 		/*
4018 		 * remove endpoint from incoming connection list
4019 		 * - remove disconnect client from list on server
4020 		 */
4021 		tl_freetip(tep, tip);
4022 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4023 		/*
4024 		 * disconnect an outgoing request pending from tep
4025 		 */
4026 
4027 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4028 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4029 			    SL_TRACE|SL_ERROR,
4030 			    "tl_discon_req: reallocb failed"));
4031 			tep->te_state = new_state;
4032 			tl_merror(wq, respmp, ENOMEM);
4033 			return;
4034 		}
4035 		di = (struct T_discon_ind *)dimp->b_rptr;
4036 		DB_TYPE(dimp) = M_PROTO;
4037 		di->PRIM_type  = T_DISCON_IND;
4038 		di->DISCON_reason = ECONNRESET;
4039 		di->SEQ_number = tep->te_seqno;
4040 
4041 		/*
4042 		 * If this is a socket the T_DISCON_IND is queued with
4043 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4044 		 * from the list of pending connections.
4045 		 * Note that when te_oconp is set the peer better have
4046 		 * a t_connind_t for the client.
4047 		 */
4048 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4049 			/*
4050 			 * No need to check that
4051 			 * ti_tep == NULL since the T_DISCON_IND
4052 			 * takes precedence over other queued
4053 			 * messages.
4054 			 */
4055 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4056 			peer_tep = NULL;
4057 			dimp = NULL;
4058 			/*
4059 			 * Can't clear te_oconp since tl_co_unconnect needs
4060 			 * it as a hint not to free the tep.
4061 			 * Keep the state unchanged since tl_conn_res inspects
4062 			 * it.
4063 			 */
4064 			new_state = tep->te_state;
4065 		} else {
4066 			/* Found - delete it */
4067 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4068 			if (tip != NULL) {
4069 				ASSERT(tep == tip->ti_tep);
4070 				save_state = peer_tep->te_state;
4071 				if (peer_tep->te_nicon == 1)
4072 					peer_tep->te_state =
4073 					    NEXTSTATE(TE_DISCON_IND2,
4074 					    peer_tep->te_state);
4075 				else
4076 					peer_tep->te_state =
4077 					    NEXTSTATE(TE_DISCON_IND3,
4078 					    peer_tep->te_state);
4079 				tl_freetip(peer_tep, tip);
4080 			}
4081 			ASSERT(tep->te_oconp != NULL);
4082 			TL_UNCONNECT(tep->te_oconp);
4083 		}
4084 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4085 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4086 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4087 			    SL_TRACE|SL_ERROR,
4088 			    "tl_discon_req: reallocb failed"));
4089 			tep->te_state = new_state;
4090 			tl_merror(wq, respmp, ENOMEM);
4091 			return;
4092 		}
4093 		di = (struct T_discon_ind *)dimp->b_rptr;
4094 		di->SEQ_number = BADSEQNUM;
4095 
4096 		save_state = peer_tep->te_state;
4097 		peer_tep->te_state = TS_IDLE;
4098 	} else {
4099 		/* Not connected */
4100 		tep->te_state = new_state;
4101 		freemsg(respmp);
4102 		return;
4103 	}
4104 
4105 	/* Commit state changes */
4106 	tep->te_state = new_state;
4107 
4108 	if (peer_tep == NULL) {
4109 		ASSERT(dimp == NULL);
4110 		goto done;
4111 	}
4112 	/*
4113 	 * Flush queues on peer before sending up
4114 	 * T_DISCON_IND according to TPI
4115 	 */
4116 
4117 	if ((save_state == TS_DATA_XFER) ||
4118 	    (save_state == TS_WIND_ORDREL) ||
4119 	    (save_state == TS_WREQ_ORDREL))
4120 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4121 
4122 	DB_TYPE(dimp) = M_PROTO;
4123 	di->PRIM_type  = T_DISCON_IND;
4124 	di->DISCON_reason = ECONNRESET;
4125 
4126 	/*
4127 	 * data blocks already linked into dimp by reallocb()
4128 	 */
4129 	/*
4130 	 * send indication message to peer user module
4131 	 */
4132 	ASSERT(dimp != NULL);
4133 	putnext(peer_tep->te_rq, dimp);
4134 done:
4135 	if (tep->te_conp) {	/* disconnect pointers if connected */
4136 		ASSERT(! peer_tep->te_closing);
4137 
4138 		/*
4139 		 * Messages may be queued on peer's write queue
4140 		 * waiting to be processed by its write service
4141 		 * procedure. Before the pointer to the peer transport
4142 		 * structure is set to NULL, qenable the peer's write
4143 		 * queue so that the queued up messages are processed.
4144 		 */
4145 		if ((save_state == TS_DATA_XFER) ||
4146 		    (save_state == TS_WIND_ORDREL) ||
4147 		    (save_state == TS_WREQ_ORDREL))
4148 			TL_QENABLE(peer_tep);
4149 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4150 		TL_UNCONNECT(peer_tep->te_conp);
4151 		if (! IS_SOCKET(tep)) {
4152 			/*
4153 			 * unlink the streams
4154 			 */
4155 			tep->te_wq->q_next = NULL;
4156 			peer_tep->te_wq->q_next = NULL;
4157 		}
4158 		TL_UNCONNECT(tep->te_conp);
4159 	}
4160 }
4161 
4162 
4163 static void
4164 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4165 {
4166 	queue_t			*wq;
4167 	size_t			ack_sz;
4168 	mblk_t			*ackmp;
4169 	struct T_addr_ack	*taa;
4170 
4171 	if (tep->te_closing) {
4172 		freemsg(mp);
4173 		return;
4174 	}
4175 
4176 	wq = tep->te_wq;
4177 
4178 	/*
4179 	 * Note: T_ADDR_REQ message has only PRIM_type field
4180 	 * so it is already validated earlier.
4181 	 */
4182 
4183 	if (IS_CLTS(tep) ||
4184 	    (tep->te_state > TS_WREQ_ORDREL) ||
4185 	    (tep->te_state < TS_DATA_XFER)) {
4186 		/*
4187 		 * Either connectionless or connection oriented but not
4188 		 * in connected data transfer state or half-closed states.
4189 		 */
4190 		ack_sz = sizeof (struct T_addr_ack);
4191 		if (tep->te_state >= TS_IDLE)
4192 			/* is bound */
4193 			ack_sz += tep->te_alen;
4194 		ackmp = reallocb(mp, ack_sz, 0);
4195 		if (ackmp == NULL) {
4196 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4197 			    SL_TRACE|SL_ERROR,
4198 			    "tl_addr_req: reallocb failed"));
4199 			tl_memrecover(wq, mp, ack_sz);
4200 			return;
4201 		}
4202 
4203 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4204 
4205 		bzero(taa, sizeof (struct T_addr_ack));
4206 
4207 		taa->PRIM_type = T_ADDR_ACK;
4208 		ackmp->b_datap->db_type = M_PCPROTO;
4209 		ackmp->b_wptr = (uchar_t *)&taa[1];
4210 
4211 		if (tep->te_state >= TS_IDLE) {
4212 			/* endpoint is bound */
4213 			taa->LOCADDR_length = tep->te_alen;
4214 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4215 
4216 			bcopy(tep->te_abuf, ackmp->b_wptr,
4217 			    tep->te_alen);
4218 			ackmp->b_wptr += tep->te_alen;
4219 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4220 		}
4221 
4222 		(void) qreply(wq, ackmp);
4223 	} else {
4224 		ASSERT(tep->te_state == TS_DATA_XFER ||
4225 		    tep->te_state == TS_WIND_ORDREL ||
4226 		    tep->te_state == TS_WREQ_ORDREL);
4227 		/* connection oriented in data transfer */
4228 		tl_connected_cots_addr_req(mp, tep);
4229 	}
4230 }
4231 
4232 
4233 static void
4234 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4235 {
4236 	tl_endpt_t		*peer_tep;
4237 	size_t			ack_sz;
4238 	mblk_t			*ackmp;
4239 	struct T_addr_ack	*taa;
4240 	uchar_t			*addr_startp;
4241 
4242 	if (tep->te_closing) {
4243 		freemsg(mp);
4244 		return;
4245 	}
4246 
4247 	ASSERT(tep->te_state >= TS_IDLE);
4248 
4249 	ack_sz = sizeof (struct T_addr_ack);
4250 	ack_sz += T_ALIGN(tep->te_alen);
4251 	peer_tep = tep->te_conp;
4252 	ack_sz += peer_tep->te_alen;
4253 
4254 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4255 	if (ackmp == NULL) {
4256 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4257 		    "tl_connected_cots_addr_req: reallocb failed"));
4258 		tl_memrecover(tep->te_wq, mp, ack_sz);
4259 		return;
4260 	}
4261 
4262 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4263 
4264 	/* endpoint is bound */
4265 	taa->LOCADDR_length = tep->te_alen;
4266 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4267 
4268 	addr_startp = (uchar_t *)&taa[1];
4269 
4270 	bcopy(tep->te_abuf, addr_startp,
4271 	    tep->te_alen);
4272 
4273 	taa->REMADDR_length = peer_tep->te_alen;
4274 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4275 	    taa->LOCADDR_length);
4276 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4277 	bcopy(peer_tep->te_abuf, addr_startp,
4278 	    peer_tep->te_alen);
4279 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4280 	    taa->REMADDR_offset + peer_tep->te_alen;
4281 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4282 
4283 	putnext(tep->te_rq, ackmp);
4284 }
4285 
4286 static void
4287 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4288 {
4289 	if (IS_CLTS(tep)) {
4290 		*ia = tl_clts_info_ack;
4291 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4292 	} else {
4293 		*ia = tl_cots_info_ack;
4294 		if (IS_COTSORD(tep))
4295 			ia->SERV_type = T_COTS_ORD;
4296 	}
4297 	ia->TIDU_size = tl_tidusz;
4298 	ia->CURRENT_state = tep->te_state;
4299 }
4300 
4301 /*
4302  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4303  * tl_wput.
4304  */
4305 static void
4306 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4307 {
4308 	mblk_t			*ackmp;
4309 	t_uscalar_t		cap_bits1;
4310 	struct T_capability_ack	*tcap;
4311 
4312 	if (tep->te_closing) {
4313 		freemsg(mp);
4314 		return;
4315 	}
4316 
4317 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4318 
4319 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4320 	    M_PCPROTO, T_CAPABILITY_ACK);
4321 	if (ackmp == NULL) {
4322 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4323 		    "tl_capability_req: reallocb failed"));
4324 		tl_memrecover(tep->te_wq, mp,
4325 		    sizeof (struct T_capability_ack));
4326 		return;
4327 	}
4328 
4329 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4330 	tcap->CAP_bits1 = 0;
4331 
4332 	if (cap_bits1 & TC1_INFO) {
4333 		tl_copy_info(&tcap->INFO_ack, tep);
4334 		tcap->CAP_bits1 |= TC1_INFO;
4335 	}
4336 
4337 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4338 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4339 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4340 	}
4341 
4342 	putnext(tep->te_rq, ackmp);
4343 }
4344 
4345 static void
4346 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4347 {
4348 	if (! tep->te_closing)
4349 		tl_info_req(mp, tep);
4350 	else
4351 		freemsg(mp);
4352 
4353 	tl_serializer_exit(tep);
4354 	tl_refrele(tep);
4355 }
4356 
4357 static void
4358 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4359 {
4360 	mblk_t *ackmp;
4361 
4362 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4363 	    M_PCPROTO, T_INFO_ACK);
4364 	if (ackmp == NULL) {
4365 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4366 		    "tl_info_req: reallocb failed"));
4367 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4368 		return;
4369 	}
4370 
4371 	/*
4372 	 * fill in T_INFO_ACK contents
4373 	 */
4374 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4375 
4376 	/*
4377 	 * send ack message
4378 	 */
4379 	putnext(tep->te_rq, ackmp);
4380 }
4381 
4382 /*
4383  * Handle M_DATA, T_data_req and T_optdata_req.
4384  * If this is a socket pass through T_optdata_req options unmodified.
4385  */
4386 static void
4387 tl_data(mblk_t *mp, tl_endpt_t *tep)
4388 {
4389 	queue_t			*wq = tep->te_wq;
4390 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4391 	ssize_t			msz = MBLKL(mp);
4392 	tl_endpt_t		*peer_tep;
4393 	queue_t			*peer_rq;
4394 	boolean_t		closing = tep->te_closing;
4395 
4396 	if (IS_CLTS(tep)) {
4397 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4398 		    SL_TRACE|SL_ERROR,
4399 		    "tl_wput:clts:unattached M_DATA"));
4400 		if (!closing) {
4401 			tl_merror(wq, mp, EPROTO);
4402 		} else {
4403 			freemsg(mp);
4404 		}
4405 		return;
4406 	}
4407 
4408 	/*
4409 	 * If the endpoint is closing it should still forward any data to the
4410 	 * peer (if it has one). If it is not allowed to forward it can just
4411 	 * free the message.
4412 	 */
4413 	if (closing &&
4414 	    (tep->te_state != TS_DATA_XFER) &&
4415 	    (tep->te_state != TS_WREQ_ORDREL)) {
4416 		freemsg(mp);
4417 		return;
4418 	}
4419 
4420 	if (DB_TYPE(mp) == M_PROTO) {
4421 		if (prim->type == T_DATA_REQ &&
4422 		    msz < sizeof (struct T_data_req)) {
4423 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4424 				SL_TRACE|SL_ERROR,
4425 				"tl_data:T_DATA_REQ:invalid message"));
4426 			if (!closing) {
4427 				tl_merror(wq, mp, EPROTO);
4428 			} else {
4429 				freemsg(mp);
4430 			}
4431 			return;
4432 		} else if (prim->type == T_OPTDATA_REQ &&
4433 		    (msz < sizeof (struct T_optdata_req) ||
4434 		    !IS_SOCKET(tep))) {
4435 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4436 			    SL_TRACE|SL_ERROR,
4437 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4438 			if (!closing) {
4439 				tl_merror(wq, mp, EPROTO);
4440 			} else {
4441 				freemsg(mp);
4442 			}
4443 			return;
4444 		}
4445 	}
4446 
4447 	/*
4448 	 * connection oriented provider
4449 	 */
4450 	switch (tep->te_state) {
4451 	case TS_IDLE:
4452 		/*
4453 		 * Other end not here - do nothing.
4454 		 */
4455 		freemsg(mp);
4456 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4457 		    "tl_data:cots with endpoint idle"));
4458 		return;
4459 
4460 	case TS_DATA_XFER:
4461 		/* valid states */
4462 		if (tep->te_conp != NULL)
4463 			break;
4464 
4465 		if (tep->te_oconp == NULL) {
4466 			if (!closing) {
4467 				tl_merror(wq, mp, EPROTO);
4468 			} else {
4469 				freemsg(mp);
4470 			}
4471 			return;
4472 		}
4473 		/*
4474 		 * For a socket the T_CONN_CON is sent early thus
4475 		 * the peer might not yet have accepted the connection.
4476 		 * If we are closing queue the packet with the T_CONN_IND.
4477 		 * Otherwise defer processing the packet until the peer
4478 		 * accepts the connection.
4479 		 * Note that the queue is noenabled when we go into this
4480 		 * state.
4481 		 */
4482 		if (!closing) {
4483 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4484 			    SL_TRACE|SL_ERROR,
4485 			    "tl_data: ocon"));
4486 			TL_PUTBQ(tep, mp);
4487 			return;
4488 		}
4489 		if (DB_TYPE(mp) == M_PROTO) {
4490 			if (msz < sizeof (t_scalar_t)) {
4491 				freemsg(mp);
4492 				return;
4493 			}
4494 			/* reuse message block - just change REQ to IND */
4495 			if (prim->type == T_DATA_REQ)
4496 				prim->type = T_DATA_IND;
4497 			else
4498 				prim->type = T_OPTDATA_IND;
4499 		}
4500 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4501 		return;
4502 
4503 	case TS_WREQ_ORDREL:
4504 		if (tep->te_conp == NULL) {
4505 			/*
4506 			 * Other end closed - generate discon_ind
4507 			 * with reason 0 to cause an EPIPE but no
4508 			 * read side error on AF_UNIX sockets.
4509 			 */
4510 			freemsg(mp);
4511 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4512 			    SL_TRACE|SL_ERROR,
4513 			    "tl_data: WREQ_ORDREL and no peer"));
4514 			tl_discon_ind(tep, 0);
4515 			return;
4516 		}
4517 		break;
4518 
4519 	default:
4520 		/* invalid state for event TE_DATA_REQ */
4521 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4522 		    "tl_data:cots:out of state"));
4523 		tl_merror(wq, mp, EPROTO);
4524 		return;
4525 	}
4526 	/*
4527 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4528 	 * (State stays same on this event)
4529 	 */
4530 
4531 	/*
4532 	 * get connected endpoint
4533 	 */
4534 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4535 		freemsg(mp);
4536 		/* Peer closed */
4537 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4538 		    "tl_data: peer gone"));
4539 		return;
4540 	}
4541 
4542 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4543 	peer_rq = peer_tep->te_rq;
4544 
4545 	/*
4546 	 * Put it back if flow controlled
4547 	 * Note: Messages already on queue when we are closing is bounded
4548 	 * so we can ignore flow control.
4549 	 */
4550 	if (!canputnext(peer_rq) && !closing) {
4551 		TL_PUTBQ(tep, mp);
4552 		return;
4553 	}
4554 
4555 	/*
4556 	 * validate peer state
4557 	 */
4558 	switch (peer_tep->te_state) {
4559 	case TS_DATA_XFER:
4560 	case TS_WIND_ORDREL:
4561 		/* valid states */
4562 		break;
4563 	default:
4564 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4565 		    "tl_data:rx side:invalid state"));
4566 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4567 		return;
4568 	}
4569 	if (DB_TYPE(mp) == M_PROTO) {
4570 		/* reuse message block - just change REQ to IND */
4571 		if (prim->type == T_DATA_REQ)
4572 			prim->type = T_DATA_IND;
4573 		else
4574 			prim->type = T_OPTDATA_IND;
4575 	}
4576 	/*
4577 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4578 	 * (peer state stays same on this event)
4579 	 */
4580 	/*
4581 	 * send data to connected peer
4582 	 */
4583 	putnext(peer_rq, mp);
4584 }
4585 
4586 
4587 
4588 static void
4589 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4590 {
4591 	queue_t			*wq = tep->te_wq;
4592 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4593 	ssize_t			msz = MBLKL(mp);
4594 	tl_endpt_t		*peer_tep;
4595 	queue_t			*peer_rq;
4596 	boolean_t		closing = tep->te_closing;
4597 
4598 	if (msz < sizeof (struct T_exdata_req)) {
4599 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4600 		    "tl_exdata:invalid message"));
4601 		if (!closing) {
4602 			tl_merror(wq, mp, EPROTO);
4603 		} else {
4604 			freemsg(mp);
4605 		}
4606 		return;
4607 	}
4608 
4609 	/*
4610 	 * If the endpoint is closing it should still forward any data to the
4611 	 * peer (if it has one). If it is not allowed to forward it can just
4612 	 * free the message.
4613 	 */
4614 	if (closing &&
4615 	    (tep->te_state != TS_DATA_XFER) &&
4616 	    (tep->te_state != TS_WREQ_ORDREL)) {
4617 		freemsg(mp);
4618 		return;
4619 	}
4620 
4621 	/*
4622 	 * validate state
4623 	 */
4624 	switch (tep->te_state) {
4625 	case TS_IDLE:
4626 		/*
4627 		 * Other end not here - do nothing.
4628 		 */
4629 		freemsg(mp);
4630 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4631 		    "tl_exdata:cots with endpoint idle"));
4632 		return;
4633 
4634 	case TS_DATA_XFER:
4635 		/* valid states */
4636 		if (tep->te_conp != NULL)
4637 			break;
4638 
4639 		if (tep->te_oconp == NULL) {
4640 			if (!closing) {
4641 				tl_merror(wq, mp, EPROTO);
4642 			} else {
4643 				freemsg(mp);
4644 			}
4645 			return;
4646 		}
4647 		/*
4648 		 * For a socket the T_CONN_CON is sent early thus
4649 		 * the peer might not yet have accepted the connection.
4650 		 * If we are closing queue the packet with the T_CONN_IND.
4651 		 * Otherwise defer processing the packet until the peer
4652 		 * accepts the connection.
4653 		 * Note that the queue is noenabled when we go into this
4654 		 * state.
4655 		 */
4656 		if (!closing) {
4657 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4658 			    SL_TRACE|SL_ERROR,
4659 			    "tl_exdata: ocon"));
4660 			TL_PUTBQ(tep, mp);
4661 			return;
4662 		}
4663 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4664 		    "tl_exdata: closing socket ocon"));
4665 		prim->type = T_EXDATA_IND;
4666 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4667 		return;
4668 
4669 	case TS_WREQ_ORDREL:
4670 		if (tep->te_conp == NULL) {
4671 			/*
4672 			 * Other end closed - generate discon_ind
4673 			 * with reason 0 to cause an EPIPE but no
4674 			 * read side error on AF_UNIX sockets.
4675 			 */
4676 			freemsg(mp);
4677 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4678 			    SL_TRACE|SL_ERROR,
4679 			    "tl_exdata: WREQ_ORDREL and no peer"));
4680 			tl_discon_ind(tep, 0);
4681 			return;
4682 		}
4683 		break;
4684 
4685 	default:
4686 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4687 		    SL_TRACE|SL_ERROR,
4688 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4689 		    tep->te_state));
4690 		tl_merror(wq, mp, EPROTO);
4691 		return;
4692 	}
4693 	/*
4694 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4695 	 * (state stays same on this event)
4696 	 */
4697 
4698 	/*
4699 	 * get connected endpoint
4700 	 */
4701 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4702 		freemsg(mp);
4703 		/* Peer closed */
4704 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4705 		    "tl_exdata: peer gone"));
4706 		return;
4707 	}
4708 
4709 	peer_rq = peer_tep->te_rq;
4710 
4711 	/*
4712 	 * Put it back if flow controlled
4713 	 * Note: Messages already on queue when we are closing is bounded
4714 	 * so we can ignore flow control.
4715 	 */
4716 	if (!canputnext(peer_rq) && !closing) {
4717 		TL_PUTBQ(tep, mp);
4718 		return;
4719 	}
4720 
4721 	/*
4722 	 * validate state on peer
4723 	 */
4724 	switch (peer_tep->te_state) {
4725 	case TS_DATA_XFER:
4726 	case TS_WIND_ORDREL:
4727 		/* valid states */
4728 		break;
4729 	default:
4730 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4731 		    "tl_exdata:rx side:invalid state"));
4732 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4733 		return;
4734 	}
4735 	/*
4736 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4737 	 * (peer state stays same on this event)
4738 	 */
4739 	/*
4740 	 * reuse message block
4741 	 */
4742 	prim->type = T_EXDATA_IND;
4743 
4744 	/*
4745 	 * send data to connected peer
4746 	 */
4747 	putnext(peer_rq, mp);
4748 }
4749 
4750 
4751 
4752 static void
4753 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4754 {
4755 	queue_t			*wq =  tep->te_wq;
4756 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4757 	ssize_t			msz = MBLKL(mp);
4758 	tl_endpt_t		*peer_tep;
4759 	queue_t			*peer_rq;
4760 	boolean_t		closing = tep->te_closing;
4761 
4762 	if (msz < sizeof (struct T_ordrel_req)) {
4763 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4764 		    "tl_ordrel:invalid message"));
4765 		if (!closing) {
4766 			tl_merror(wq, mp, EPROTO);
4767 		} else {
4768 			freemsg(mp);
4769 		}
4770 		return;
4771 	}
4772 
4773 	/*
4774 	 * validate state
4775 	 */
4776 	switch (tep->te_state) {
4777 	case TS_DATA_XFER:
4778 	case TS_WREQ_ORDREL:
4779 		/* valid states */
4780 		if (tep->te_conp != NULL)
4781 			break;
4782 
4783 		if (tep->te_oconp == NULL)
4784 			break;
4785 
4786 		/*
4787 		 * For a socket the T_CONN_CON is sent early thus
4788 		 * the peer might not yet have accepted the connection.
4789 		 * If we are closing queue the packet with the T_CONN_IND.
4790 		 * Otherwise defer processing the packet until the peer
4791 		 * accepts the connection.
4792 		 * Note that the queue is noenabled when we go into this
4793 		 * state.
4794 		 */
4795 		if (!closing) {
4796 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4797 			    SL_TRACE|SL_ERROR,
4798 			    "tl_ordlrel: ocon"));
4799 			TL_PUTBQ(tep, mp);
4800 			return;
4801 		}
4802 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4803 		    "tl_ordlrel: closing socket ocon"));
4804 		prim->type = T_ORDREL_IND;
4805 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4806 		return;
4807 
4808 	default:
4809 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4810 		    SL_TRACE|SL_ERROR,
4811 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4812 		    tep->te_state));
4813 		if (!closing) {
4814 			tl_merror(wq, mp, EPROTO);
4815 		} else {
4816 			freemsg(mp);
4817 		}
4818 		return;
4819 	}
4820 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4821 
4822 	/*
4823 	 * get connected endpoint
4824 	 */
4825 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4826 		/* Peer closed */
4827 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4828 		    "tl_ordrel: peer gone"));
4829 		freemsg(mp);
4830 		return;
4831 	}
4832 
4833 	peer_rq = peer_tep->te_rq;
4834 
4835 	/*
4836 	 * Put it back if flow controlled except when we are closing.
4837 	 * Note: Messages already on queue when we are closing is bounded
4838 	 * so we can ignore flow control.
4839 	 */
4840 	if (! canputnext(peer_rq) && !closing) {
4841 		TL_PUTBQ(tep, mp);
4842 		return;
4843 	}
4844 
4845 	/*
4846 	 * validate state on peer
4847 	 */
4848 	switch (peer_tep->te_state) {
4849 	case TS_DATA_XFER:
4850 	case TS_WIND_ORDREL:
4851 		/* valid states */
4852 		break;
4853 	default:
4854 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4855 		    "tl_ordrel:rx side:invalid state"));
4856 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4857 		return;
4858 	}
4859 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4860 
4861 	/*
4862 	 * reuse message block
4863 	 */
4864 	prim->type = T_ORDREL_IND;
4865 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4866 	    "tl_ordrel: send ordrel_ind"));
4867 
4868 	/*
4869 	 * send data to connected peer
4870 	 */
4871 	putnext(peer_rq, mp);
4872 }
4873 
4874 
4875 /*
4876  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4877  */
4878 static void
4879 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4880 {
4881 	size_t			err_sz;
4882 	tl_endpt_t		*tep;
4883 	struct T_unitdata_req	*udreq;
4884 	mblk_t			*err_mp;
4885 	t_scalar_t		alen;
4886 	t_scalar_t		olen;
4887 	struct T_uderror_ind	*uderr;
4888 	uchar_t			*addr_startp;
4889 
4890 	err_sz = sizeof (struct T_uderror_ind);
4891 	tep = (tl_endpt_t *)wq->q_ptr;
4892 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4893 	alen = udreq->DEST_length;
4894 	olen = udreq->OPT_length;
4895 
4896 	if (alen > 0)
4897 		err_sz = T_ALIGN(err_sz + alen);
4898 	if (olen > 0)
4899 		err_sz += olen;
4900 
4901 	err_mp = allocb(err_sz, BPRI_MED);
4902 	if (! err_mp) {
4903 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4904 		    "tl_uderr:allocb failure"));
4905 		/*
4906 		 * Note: no rollback of state needed as it does
4907 		 * not change in connectionless transport
4908 		 */
4909 		tl_memrecover(wq, mp, err_sz);
4910 		return;
4911 	}
4912 
4913 	DB_TYPE(err_mp) = M_PROTO;
4914 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4915 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4916 	uderr->PRIM_type = T_UDERROR_IND;
4917 	uderr->ERROR_type = err;
4918 	uderr->DEST_length = alen;
4919 	uderr->OPT_length = olen;
4920 	if (alen <= 0) {
4921 		uderr->DEST_offset = 0;
4922 	} else {
4923 		uderr->DEST_offset =
4924 		    (t_scalar_t)sizeof (struct T_uderror_ind);
4925 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4926 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4927 		    (size_t)alen);
4928 	}
4929 	if (olen <= 0) {
4930 		uderr->OPT_offset = 0;
4931 	} else {
4932 		uderr->OPT_offset =
4933 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4934 		    uderr->DEST_length);
4935 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4936 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4937 		    (size_t)olen);
4938 	}
4939 	freemsg(mp);
4940 
4941 	/*
4942 	 * send indication message
4943 	 */
4944 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4945 
4946 	qreply(wq, err_mp);
4947 }
4948 
4949 static void
4950 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4951 {
4952 	queue_t *wq = tep->te_wq;
4953 
4954 	if (!tep->te_closing && (wq->q_first != NULL)) {
4955 		TL_PUTQ(tep, mp);
4956 	} else if (tep->te_rq != NULL)
4957 		tl_unitdata(mp, tep);
4958 	else
4959 		freemsg(mp);
4960 
4961 	tl_serializer_exit(tep);
4962 	tl_refrele(tep);
4963 }
4964 
4965 /*
4966  * Handle T_unitdata_req.
4967  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4968  * If this is a socket pass through options unmodified.
4969  */
4970 static void
4971 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4972 {
4973 	queue_t			*wq = tep->te_wq;
4974 	soux_addr_t		ux_addr;
4975 	tl_addr_t		destaddr;
4976 	uchar_t			*addr_startp;
4977 	tl_endpt_t		*peer_tep;
4978 	struct T_unitdata_ind	*udind;
4979 	struct T_unitdata_req	*udreq;
4980 	ssize_t			msz, ui_sz;
4981 	t_scalar_t		alen, aoff, olen, ooff;
4982 	t_scalar_t		oldolen = 0;
4983 
4984 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4985 	msz = MBLKL(mp);
4986 
4987 	/*
4988 	 * validate the state
4989 	 */
4990 	if (tep->te_state != TS_IDLE) {
4991 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4992 		    SL_TRACE|SL_ERROR,
4993 		    "tl_wput:T_CONN_REQ:out of state"));
4994 		tl_merror(wq, mp, EPROTO);
4995 		return;
4996 	}
4997 	/*
4998 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
4999 	 * (state does not change on this event)
5000 	 */
5001 
5002 	/*
5003 	 * validate the message
5004 	 * Note: dereference fields in struct inside message only
5005 	 * after validating the message length.
5006 	 */
5007 	if (msz < sizeof (struct T_unitdata_req)) {
5008 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5009 		    "tl_unitdata:invalid message length"));
5010 		tl_merror(wq, mp, EINVAL);
5011 		return;
5012 	}
5013 	alen = udreq->DEST_length;
5014 	aoff = udreq->DEST_offset;
5015 	oldolen = olen = udreq->OPT_length;
5016 	ooff = udreq->OPT_offset;
5017 	if (olen == 0)
5018 		ooff = 0;
5019 
5020 	if (IS_SOCKET(tep)) {
5021 		if ((alen != TL_SOUX_ADDRLEN) ||
5022 		    (aoff < 0) ||
5023 		    (aoff + alen > msz) ||
5024 		    (olen < 0) || (ooff < 0) ||
5025 		    ((olen > 0) && ((ooff + olen) > msz))) {
5026 			(void) (STRLOG(TL_ID, tep->te_minor,
5027 			    1, SL_TRACE|SL_ERROR,
5028 			    "tl_unitdata_req: invalid socket addr "
5029 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5030 			    (int)msz, alen, aoff, olen, ooff));
5031 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5032 			return;
5033 		}
5034 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5035 
5036 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5037 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5038 			(void) (STRLOG(TL_ID, tep->te_minor,
5039 			    1, SL_TRACE|SL_ERROR,
5040 			    "tl_conn_req: invalid socket magic"));
5041 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5042 			return;
5043 		}
5044 	} else {
5045 		if ((alen < 0) ||
5046 		    (aoff < 0) ||
5047 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5048 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5049 		    ((aoff + alen) < 0) ||
5050 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5051 		    (olen < 0) ||
5052 		    (ooff < 0) ||
5053 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5054 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5055 				    SL_TRACE|SL_ERROR,
5056 				    "tl_unitdata:invalid unit data message"));
5057 			tl_merror(wq, mp, EINVAL);
5058 			return;
5059 		}
5060 	}
5061 
5062 	/* Options not supported unless it's a socket */
5063 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5064 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5065 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5066 		tl_uderr(wq, mp, EPROTO);
5067 		return;
5068 	}
5069 #ifdef DEBUG
5070 	/*
5071 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5072 	 * if (! assertion)
5073 	 *	log warning;
5074 	 */
5075 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5076 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5077 		    "tl_unitdata:addr overlaps TPI message"));
5078 	}
5079 #endif
5080 	/*
5081 	 * get destination endpoint
5082 	 */
5083 	destaddr.ta_alen = alen;
5084 	destaddr.ta_abuf = mp->b_rptr + aoff;
5085 	destaddr.ta_zoneid = tep->te_zoneid;
5086 
5087 	/*
5088 	 * Check whether the destination is the same that was used previously
5089 	 * and the destination endpoint is in the right state. If something is
5090 	 * wrong, find destination again and cache it.
5091 	 */
5092 	peer_tep = tep->te_lastep;
5093 
5094 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5095 	    (peer_tep->te_state != TS_IDLE) ||
5096 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5097 		/*
5098 		 * Not the same as cached destination , need to find the right
5099 		 * destination.
5100 		 */
5101 		peer_tep = (IS_SOCKET(tep) ?
5102 		    tl_sock_find_peer(tep, &ux_addr) :
5103 		    tl_find_peer(tep, &destaddr));
5104 
5105 		if (peer_tep == NULL) {
5106 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5107 			    SL_TRACE|SL_ERROR,
5108 			    "tl_unitdata:no one at destination address"));
5109 			tl_uderr(wq, mp, ECONNRESET);
5110 			return;
5111 		}
5112 
5113 		/*
5114 		 * Cache the new peer.
5115 		 */
5116 		if (tep->te_lastep != NULL)
5117 			tl_refrele(tep->te_lastep);
5118 
5119 		tep->te_lastep = peer_tep;
5120 	}
5121 
5122 	if (peer_tep->te_state != TS_IDLE) {
5123 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5124 		    "tl_unitdata:provider in invalid state"));
5125 		tl_uderr(wq, mp, EPROTO);
5126 		return;
5127 	}
5128 
5129 	ASSERT(peer_tep->te_rq != NULL);
5130 
5131 	/*
5132 	 * Put it back if flow controlled except when we are closing.
5133 	 * Note: Messages already on queue when we are closing is bounded
5134 	 * so we can ignore flow control.
5135 	 */
5136 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5137 		/* record what we are flow controlled on */
5138 		if (tep->te_flowq != NULL) {
5139 			list_remove(&tep->te_flowq->te_flowlist, tep);
5140 		}
5141 		list_insert_head(&peer_tep->te_flowlist, tep);
5142 		tep->te_flowq = peer_tep;
5143 		TL_PUTBQ(tep, mp);
5144 		return;
5145 	}
5146 	/*
5147 	 * prepare indication message
5148 	 */
5149 
5150 	/*
5151 	 * calculate length of message
5152 	 */
5153 	if (peer_tep->te_flag & TL_SETCRED) {
5154 		ASSERT(olen == 0);
5155 		olen = (t_scalar_t)sizeof (struct opthdr) +
5156 		    OPTLEN(sizeof (tl_credopt_t));
5157 					/* 1 option only */
5158 	} else if (peer_tep->te_flag & TL_SETUCRED) {
5159 		ASSERT(olen == 0);
5160 		olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredsize);
5161 					/* 1 option only */
5162 	} else if (peer_tep->te_flag & TL_SOCKUCRED) {
5163 		/* Possibly more than one option */
5164 		olen += (t_scalar_t)sizeof (struct T_opthdr) +
5165 		    OPTLEN(ucredsize);
5166 	}
5167 
5168 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5169 	    olen;
5170 	/*
5171 	 * If the unitdata_ind fits and we are not adding options
5172 	 * reuse the udreq mblk.
5173 	 */
5174 	if (msz >= ui_sz && alen >= tep->te_alen &&
5175 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5176 		/*
5177 		 * Reuse the original mblk. Leave options in place.
5178 		 */
5179 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5180 		udind->PRIM_type = T_UNITDATA_IND;
5181 		udind->SRC_length = tep->te_alen;
5182 		addr_startp = mp->b_rptr + udind->SRC_offset;
5183 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5184 	} else {
5185 		/* Allocate a new T_unidata_ind message */
5186 		mblk_t *ui_mp;
5187 
5188 		ui_mp = allocb(ui_sz, BPRI_MED);
5189 		if (! ui_mp) {
5190 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5191 			    "tl_unitdata:allocb failure:message queued"));
5192 			tl_memrecover(wq, mp, ui_sz);
5193 			return;
5194 		}
5195 
5196 		/*
5197 		 * fill in T_UNITDATA_IND contents
5198 		 */
5199 		DB_TYPE(ui_mp) = M_PROTO;
5200 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5201 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5202 		udind->PRIM_type = T_UNITDATA_IND;
5203 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5204 		udind->SRC_length = tep->te_alen;
5205 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5206 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5207 		udind->OPT_offset =
5208 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5209 		udind->OPT_length = olen;
5210 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5211 			if (oldolen != 0) {
5212 				bcopy((void *)((uintptr_t)udreq + ooff),
5213 				    (void *)((uintptr_t)udind +
5214 				    udind->OPT_offset),
5215 				    oldolen);
5216 			}
5217 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5218 			    oldolen,
5219 			    DB_CREDDEF(mp, tep->te_credp), TLPID(mp, tep),
5220 			    peer_tep->te_flag, peer_tep->te_credp);
5221 		} else {
5222 			bcopy((void *)((uintptr_t)udreq + ooff),
5223 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5224 			    olen);
5225 		}
5226 
5227 		/*
5228 		 * relink data blocks from mp to ui_mp
5229 		 */
5230 		ui_mp->b_cont = mp->b_cont;
5231 		freeb(mp);
5232 		mp = ui_mp;
5233 	}
5234 	/*
5235 	 * send indication message
5236 	 */
5237 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5238 	putnext(peer_tep->te_rq, mp);
5239 }
5240 
5241 
5242 
5243 /*
5244  * Check if a given addr is in use.
5245  * Endpoint ptr returned or NULL if not found.
5246  * The name space is separate for each mode. This implies that
5247  * sockets get their own name space.
5248  */
5249 static tl_endpt_t *
5250 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5251 {
5252 	tl_endpt_t *peer_tep = NULL;
5253 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5254 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5255 
5256 	ASSERT(! IS_SOCKET(tep));
5257 
5258 	ASSERT(ap != NULL && ap->ta_alen > 0);
5259 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5260 	ASSERT(ap->ta_abuf != NULL);
5261 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5262 	ASSERT(IMPLY(rc == 0,
5263 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5264 	    (tep->te_transport == peer_tep->te_transport)));
5265 
5266 	if ((rc == 0) && (peer_tep->te_closing)) {
5267 		tl_refrele(peer_tep);
5268 		peer_tep = NULL;
5269 	}
5270 
5271 	return (peer_tep);
5272 }
5273 
5274 /*
5275  * Find peer for a socket based on unix domain address.
5276  * For implicit addresses our peer can be found by minor number in ai hash. For
5277  * explici binds we look vnode address at addr_hash.
5278  */
5279 static tl_endpt_t *
5280 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5281 {
5282 	tl_endpt_t *peer_tep = NULL;
5283 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5284 	    tep->te_aihash : tep->te_addrhash;
5285 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5286 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5287 
5288 	ASSERT(IS_SOCKET(tep));
5289 	ASSERT(EQUIV(rc == 0, peer_tep != NULL));
5290 	ASSERT(IMPLY(rc == 0,
5291 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5292 	    (tep->te_transport == peer_tep->te_transport)));
5293 	/*
5294 	 * Don't attempt to use closing peer.
5295 	 */
5296 	if ((peer_tep != NULL) &&
5297 	    (peer_tep->te_closing ||
5298 	    (peer_tep->te_zoneid != tep->te_zoneid))) {
5299 		tl_refrele(peer_tep);
5300 		peer_tep = NULL;
5301 	}
5302 
5303 	return (peer_tep);
5304 }
5305 
5306 /*
5307  * Generate a free addr and return it in struct pointed by ap
5308  * but allocating space for address buffer.
5309  * The generated address will be at least 4 bytes long and, if req->ta_alen
5310  * exceeds 4 bytes, be req->ta_alen bytes long.
5311  *
5312  * If address is found it will be inserted in the hash.
5313  *
5314  * If req->ta_alen is larger than the default alen (4 bytes) the last
5315  * alen-4 bytes will always be the same as in req.
5316  *
5317  * Return 0 for failure.
5318  * Return non-zero for success.
5319  */
5320 static boolean_t
5321 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5322 {
5323 	t_scalar_t	alen;
5324 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5325 
5326 	ASSERT(tep->te_hash_hndl != NULL);
5327 	ASSERT(! IS_SOCKET(tep));
5328 
5329 	if (tep->te_hash_hndl == NULL)
5330 		return (B_FALSE);
5331 
5332 	/*
5333 	 * check if default addr is in use
5334 	 * if it is - bump it and try again
5335 	 */
5336 	if (req == NULL) {
5337 		alen = sizeof (uint32_t);
5338 	} else {
5339 		alen = max(req->ta_alen, sizeof (uint32_t));
5340 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5341 	}
5342 
5343 	if (tep->te_alen < alen) {
5344 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5345 
5346 		/*
5347 		 * Not enough space in tep->ta_ap to hold the address,
5348 		 * allocate a bigger space.
5349 		 */
5350 		if (abuf == NULL)
5351 			return (B_FALSE);
5352 
5353 		if (tep->te_alen > 0)
5354 			kmem_free(tep->te_abuf, tep->te_alen);
5355 
5356 		tep->te_alen = alen;
5357 		tep->te_abuf = abuf;
5358 	}
5359 
5360 	/* Copy in the address in req */
5361 	if (req != NULL) {
5362 		ASSERT(alen >= req->ta_alen);
5363 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5364 	}
5365 
5366 	/*
5367 	 * First try minor number then try default addresses.
5368 	 */
5369 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5370 
5371 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5372 		if (mod_hash_insert_reserve(tep->te_addrhash,
5373 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5374 		    tep->te_hash_hndl) == 0) {
5375 			/*
5376 			 * found free address
5377 			 */
5378 			tep->te_flag |= TL_ADDRHASHED;
5379 			tep->te_hash_hndl = NULL;
5380 
5381 			return (B_TRUE); /* successful return */
5382 		}
5383 		/*
5384 		 * Use default address.
5385 		 */
5386 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5387 		atomic_add_32(&tep->te_defaddr, 1);
5388 	}
5389 
5390 	/*
5391 	 * Failed to find anything.
5392 	 */
5393 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5394 	    "tl_get_any_addr:looped 2^32 times"));
5395 	return (B_FALSE);
5396 }
5397 
5398 /*
5399  * reallocb + set r/w ptrs to reflect size.
5400  */
5401 static mblk_t *
5402 tl_resizemp(mblk_t *mp, ssize_t new_size)
5403 {
5404 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5405 		return (NULL);
5406 
5407 	mp->b_rptr = DB_BASE(mp);
5408 	mp->b_wptr = mp->b_rptr + new_size;
5409 	return (mp);
5410 }
5411 
5412 static void
5413 tl_cl_backenable(tl_endpt_t *tep)
5414 {
5415 	list_t *l = &tep->te_flowlist;
5416 	tl_endpt_t *elp;
5417 
5418 	ASSERT(IS_CLTS(tep));
5419 
5420 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5421 		ASSERT(tep->te_ser == elp->te_ser);
5422 		ASSERT(elp->te_flowq == tep);
5423 		if (! elp->te_closing)
5424 			TL_QENABLE(elp);
5425 		elp->te_flowq = NULL;
5426 		list_remove(l, elp);
5427 	}
5428 }
5429 
5430 /*
5431  * Unconnect endpoints.
5432  */
5433 static void
5434 tl_co_unconnect(tl_endpt_t *tep)
5435 {
5436 	tl_endpt_t	*peer_tep = tep->te_conp;
5437 	tl_endpt_t	*srv_tep = tep->te_oconp;
5438 	list_t		*l;
5439 	tl_icon_t  	*tip;
5440 	tl_endpt_t	*cl_tep;
5441 	mblk_t		*d_mp;
5442 
5443 	ASSERT(IS_COTS(tep));
5444 	/*
5445 	 * If our peer is closing, don't use it.
5446 	 */
5447 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5448 		TL_UNCONNECT(tep->te_conp);
5449 		peer_tep = NULL;
5450 	}
5451 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5452 		TL_UNCONNECT(tep->te_oconp);
5453 		srv_tep = NULL;
5454 	}
5455 
5456 	if (tep->te_nicon > 0) {
5457 		l = &tep->te_iconp;
5458 		/*
5459 		 * If incoming requests pending, change state
5460 		 * of clients on disconnect ind event and send
5461 		 * discon_ind pdu to modules above them
5462 		 * for server: all clients get disconnect
5463 		 */
5464 
5465 		while (tep->te_nicon > 0) {
5466 			tip    = list_head(l);
5467 			cl_tep = tip->ti_tep;
5468 
5469 			if (cl_tep == NULL) {
5470 				tl_freetip(tep, tip);
5471 				continue;
5472 			}
5473 
5474 			if (cl_tep->te_oconp != NULL) {
5475 				ASSERT(cl_tep != cl_tep->te_oconp);
5476 				TL_UNCONNECT(cl_tep->te_oconp);
5477 			}
5478 
5479 			if (cl_tep->te_closing) {
5480 				tl_freetip(tep, tip);
5481 				continue;
5482 			}
5483 
5484 			enableok(cl_tep->te_wq);
5485 			TL_QENABLE(cl_tep);
5486 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5487 			if (d_mp != NULL) {
5488 				cl_tep->te_state = TS_IDLE;
5489 				putnext(cl_tep->te_rq, d_mp);
5490 			} else {
5491 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5492 				    SL_TRACE|SL_ERROR,
5493 				    "tl_co_unconnect:icmng: "
5494 				    "allocb failure"));
5495 			}
5496 			tl_freetip(tep, tip);
5497 		}
5498 	} else if (srv_tep != NULL) {
5499 		/*
5500 		 * If outgoing request pending, change state
5501 		 * of server on discon ind event
5502 		 */
5503 
5504 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5505 		    IS_COTSORD(srv_tep) &&
5506 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5507 			/*
5508 			 * Queue ordrel_ind for server to be picked up
5509 			 * when the connection is accepted.
5510 			 */
5511 			d_mp = tl_ordrel_ind_alloc();
5512 		} else {
5513 			/*
5514 			 * send discon_ind to server
5515 			 */
5516 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5517 		}
5518 		if (d_mp == NULL) {
5519 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5520 			    SL_TRACE|SL_ERROR,
5521 			    "tl_co_unconnect:outgoing:allocb failure"));
5522 			TL_UNCONNECT(tep->te_oconp);
5523 			goto discon_peer;
5524 		}
5525 
5526 		/*
5527 		 * If this is a socket the T_DISCON_IND is queued with
5528 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5529 		 * from the list of pending connections.
5530 		 * Note that when te_oconp is set the peer better have
5531 		 * a t_connind_t for the client.
5532 		 */
5533 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5534 			/*
5535 			 * Queue the disconnection message.
5536 			 */
5537 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5538 		} else {
5539 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5540 			if (tip == NULL) {
5541 				freemsg(d_mp);
5542 			} else {
5543 				ASSERT(tep == tip->ti_tep);
5544 				ASSERT(tep->te_ser == srv_tep->te_ser);
5545 				/*
5546 				 * Delete tip from the server list.
5547 				 */
5548 				if (srv_tep->te_nicon == 1) {
5549 					srv_tep->te_state =
5550 					    NEXTSTATE(TE_DISCON_IND2,
5551 					    srv_tep->te_state);
5552 				} else {
5553 					srv_tep->te_state =
5554 					    NEXTSTATE(TE_DISCON_IND3,
5555 					    srv_tep->te_state);
5556 				}
5557 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5558 				    T_DISCON_IND);
5559 				putnext(srv_tep->te_rq, d_mp);
5560 				tl_freetip(srv_tep, tip);
5561 			}
5562 			TL_UNCONNECT(tep->te_oconp);
5563 			srv_tep = NULL;
5564 		}
5565 	} else if (peer_tep != NULL) {
5566 		/*
5567 		 * unconnect existing connection
5568 		 * If connected, change state of peer on
5569 		 * discon ind event and send discon ind pdu
5570 		 * to module above it
5571 		 */
5572 
5573 		ASSERT(tep->te_ser == peer_tep->te_ser);
5574 		if (IS_COTSORD(peer_tep) &&
5575 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5576 		    peer_tep->te_state == TS_DATA_XFER)) {
5577 			/*
5578 			 * send ordrel ind
5579 			 */
5580 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5581 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5582 			    peer_tep->te_state,
5583 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5584 			d_mp = tl_ordrel_ind_alloc();
5585 			if (! d_mp) {
5586 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5587 				    SL_TRACE|SL_ERROR,
5588 				    "tl_co_unconnect:connected:"
5589 				    "allocb failure"));
5590 				/*
5591 				 * Continue with cleaning up peer as
5592 				 * this side may go away with the close
5593 				 */
5594 				TL_QENABLE(peer_tep);
5595 				goto discon_peer;
5596 			}
5597 			peer_tep->te_state =
5598 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5599 
5600 			putnext(peer_tep->te_rq, d_mp);
5601 			/*
5602 			 * Handle flow control case.  This will generate
5603 			 * a t_discon_ind message with reason 0 if there
5604 			 * is data queued on the write side.
5605 			 */
5606 			TL_QENABLE(peer_tep);
5607 		} else if (IS_COTSORD(peer_tep) &&
5608 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5609 			/*
5610 			 * Sent an ordrel_ind. We send a discon with
5611 			 * with error 0 to inform that the peer is gone.
5612 			 */
5613 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5614 			    SL_TRACE|SL_ERROR,
5615 			    "tl_co_unconnect: discon in state %d",
5616 			    tep->te_state));
5617 			tl_discon_ind(peer_tep, 0);
5618 		} else {
5619 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5620 			    SL_TRACE|SL_ERROR,
5621 			    "tl_co_unconnect: state %d", tep->te_state));
5622 			tl_discon_ind(peer_tep, ECONNRESET);
5623 		}
5624 
5625 discon_peer:
5626 		/*
5627 		 * Disconnect cross-pointers only for close
5628 		 */
5629 		if (tep->te_closing) {
5630 			peer_tep = tep->te_conp;
5631 			TL_REMOVE_PEER(peer_tep->te_conp);
5632 			TL_REMOVE_PEER(tep->te_conp);
5633 		}
5634 	}
5635 }
5636 
5637 /*
5638  * Note: The following routine does not recover from allocb()
5639  * failures
5640  * The reason should be from the <sys/errno.h> space.
5641  */
5642 static void
5643 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5644 {
5645 	mblk_t *d_mp;
5646 
5647 	if (tep->te_closing)
5648 		return;
5649 
5650 	/*
5651 	 * flush the queues.
5652 	 */
5653 	flushq(tep->te_rq, FLUSHDATA);
5654 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5655 
5656 	/*
5657 	 * send discon ind
5658 	 */
5659 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5660 	if (! d_mp) {
5661 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5662 		    "tl_discon_ind:allocb failure"));
5663 		return;
5664 	}
5665 	tep->te_state = TS_IDLE;
5666 	putnext(tep->te_rq, d_mp);
5667 }
5668 
5669 /*
5670  * Note: The following routine does not recover from allocb()
5671  * failures
5672  * The reason should be from the <sys/errno.h> space.
5673  */
5674 static mblk_t *
5675 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5676 {
5677 	mblk_t *mp;
5678 	struct T_discon_ind *tdi;
5679 
5680 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5681 		DB_TYPE(mp) = M_PROTO;
5682 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5683 		tdi = (struct T_discon_ind *)mp->b_rptr;
5684 		tdi->PRIM_type = T_DISCON_IND;
5685 		tdi->DISCON_reason = reason;
5686 		tdi->SEQ_number = seqnum;
5687 	}
5688 	return (mp);
5689 }
5690 
5691 
5692 /*
5693  * Note: The following routine does not recover from allocb()
5694  * failures
5695  */
5696 static mblk_t *
5697 tl_ordrel_ind_alloc(void)
5698 {
5699 	mblk_t *mp;
5700 	struct T_ordrel_ind *toi;
5701 
5702 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5703 		DB_TYPE(mp) = M_PROTO;
5704 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5705 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5706 		toi->PRIM_type = T_ORDREL_IND;
5707 	}
5708 	return (mp);
5709 }
5710 
5711 
5712 /*
5713  * Lookup the seqno in the list of queued connections.
5714  */
5715 static tl_icon_t *
5716 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5717 {
5718 	list_t *l = &tep->te_iconp;
5719 	tl_icon_t *tip = list_head(l);
5720 
5721 	ASSERT(seqno != 0);
5722 
5723 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5724 		;
5725 
5726 	return (tip);
5727 }
5728 
5729 /*
5730  * Queue data for a given T_CONN_IND while verifying that redundant
5731  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5732  * Used when the originator of the connection closes.
5733  */
5734 static void
5735 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5736 {
5737 	tl_icon_t		*tip;
5738 	mblk_t			**mpp, *mp;
5739 	int			prim, nprim;
5740 
5741 	if (nmp->b_datap->db_type == M_PROTO)
5742 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5743 	else
5744 		nprim = -1;	/* M_DATA */
5745 
5746 	tip = tl_icon_find(tep, seqno);
5747 	if (tip == NULL) {
5748 		freemsg(nmp);
5749 		return;
5750 	}
5751 
5752 	ASSERT(tip->ti_seqno != 0);
5753 	mpp = &tip->ti_mp;
5754 	while (*mpp != NULL) {
5755 		mp = *mpp;
5756 
5757 		if (mp->b_datap->db_type == M_PROTO)
5758 			prim = ((union T_primitives *)mp->b_rptr)->type;
5759 		else
5760 			prim = -1;	/* M_DATA */
5761 
5762 		/*
5763 		 * Allow nothing after a T_DISCON_IND
5764 		 */
5765 		if (prim == T_DISCON_IND) {
5766 			freemsg(nmp);
5767 			return;
5768 		}
5769 		/*
5770 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5771 		 */
5772 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5773 			freemsg(nmp);
5774 			return;
5775 		}
5776 		mpp = &(mp->b_next);
5777 	}
5778 	*mpp = nmp;
5779 }
5780 
5781 /*
5782  * Verify if a certain TPI primitive exists on the connind queue.
5783  * Use prim -1 for M_DATA.
5784  * Return non-zero if found.
5785  */
5786 static boolean_t
5787 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5788 {
5789 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5790 	boolean_t found = B_FALSE;
5791 
5792 	if (tip != NULL) {
5793 		mblk_t *mp;
5794 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5795 			found = (DB_TYPE(mp) == M_PROTO &&
5796 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5797 		}
5798 	}
5799 	return (found);
5800 }
5801 
5802 /*
5803  * Send the b_next mblk chain that has accumulated before the connection
5804  * was accepted. Perform the necessary state transitions.
5805  */
5806 static void
5807 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5808 {
5809 	mblk_t			*mp;
5810 	union T_primitives	*primp;
5811 
5812 	if (tep->te_closing) {
5813 		tl_icon_freemsgs(mpp);
5814 		return;
5815 	}
5816 
5817 	ASSERT(tep->te_state == TS_DATA_XFER);
5818 	ASSERT(tep->te_rq->q_first == NULL);
5819 
5820 	while ((mp = *mpp) != NULL) {
5821 		*mpp = mp->b_next;
5822 		mp->b_next = NULL;
5823 
5824 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5825 		switch (DB_TYPE(mp)) {
5826 		default:
5827 			freemsg(mp);
5828 			break;
5829 		case M_DATA:
5830 			putnext(tep->te_rq, mp);
5831 			break;
5832 		case M_PROTO:
5833 			primp = (union T_primitives *)mp->b_rptr;
5834 			switch (primp->type) {
5835 			case T_UNITDATA_IND:
5836 			case T_DATA_IND:
5837 			case T_OPTDATA_IND:
5838 			case T_EXDATA_IND:
5839 				putnext(tep->te_rq, mp);
5840 				break;
5841 			case T_ORDREL_IND:
5842 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5843 				    tep->te_state);
5844 				putnext(tep->te_rq, mp);
5845 				break;
5846 			case T_DISCON_IND:
5847 				tep->te_state = TS_IDLE;
5848 				putnext(tep->te_rq, mp);
5849 				break;
5850 			default:
5851 #ifdef DEBUG
5852 				cmn_err(CE_PANIC,
5853 				    "tl_icon_sendmsgs: unknown primitive");
5854 #endif /* DEBUG */
5855 				freemsg(mp);
5856 				break;
5857 			}
5858 			break;
5859 		}
5860 	}
5861 }
5862 
5863 /*
5864  * Free the b_next mblk chain that has accumulated before the connection
5865  * was accepted.
5866  */
5867 static void
5868 tl_icon_freemsgs(mblk_t **mpp)
5869 {
5870 	mblk_t *mp;
5871 
5872 	while ((mp = *mpp) != NULL) {
5873 		*mpp = mp->b_next;
5874 		mp->b_next = NULL;
5875 		freemsg(mp);
5876 	}
5877 }
5878 
5879 /*
5880  * Send M_ERROR
5881  * Note: assumes caller ensured enough space in mp or enough
5882  *	memory available. Does not attempt recovery from allocb()
5883  *	failures
5884  */
5885 
5886 static void
5887 tl_merror(queue_t *wq, mblk_t *mp, int error)
5888 {
5889 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5890 
5891 	if (tep->te_closing) {
5892 		freemsg(mp);
5893 		return;
5894 	}
5895 
5896 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5897 	    SL_TRACE|SL_ERROR,
5898 	    "tl_merror: tep=%p, err=%d", tep, error));
5899 
5900 	/*
5901 	 * flush all messages on queue. we are shutting
5902 	 * the stream down on fatal error
5903 	 */
5904 	flushq(wq, FLUSHALL);
5905 	if (IS_COTS(tep)) {
5906 		/* connection oriented - unconnect endpoints */
5907 		tl_co_unconnect(tep);
5908 	}
5909 	if (mp->b_cont) {
5910 		freemsg(mp->b_cont);
5911 		mp->b_cont = NULL;
5912 	}
5913 
5914 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5915 		freemsg(mp);
5916 		mp = allocb(1, BPRI_HI);
5917 		if (!mp) {
5918 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5919 			    SL_TRACE|SL_ERROR,
5920 			    "tl_merror:M_PROTO: out of memory"));
5921 			return;
5922 		}
5923 	}
5924 	if (mp) {
5925 		DB_TYPE(mp) = M_ERROR;
5926 		mp->b_rptr = DB_BASE(mp);
5927 		*mp->b_rptr = (char)error;
5928 		mp->b_wptr = mp->b_rptr + sizeof (char);
5929 		qreply(wq, mp);
5930 	} else {
5931 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
5932 	}
5933 }
5934 
5935 static void
5936 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5937 {
5938 	if (flag & TL_SETCRED) {
5939 		struct opthdr *opt = (struct opthdr *)buf;
5940 		tl_credopt_t *tlcred;
5941 
5942 		opt->level = TL_PROT_LEVEL;
5943 		opt->name = TL_OPT_PEER_CRED;
5944 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5945 
5946 		tlcred = (tl_credopt_t *)(opt + 1);
5947 		tlcred->tc_uid = crgetuid(cr);
5948 		tlcred->tc_gid = crgetgid(cr);
5949 		tlcred->tc_ruid = crgetruid(cr);
5950 		tlcred->tc_rgid = crgetrgid(cr);
5951 		tlcred->tc_suid = crgetsuid(cr);
5952 		tlcred->tc_sgid = crgetsgid(cr);
5953 		tlcred->tc_ngroups = crgetngroups(cr);
5954 	} else if (flag & TL_SETUCRED) {
5955 		struct opthdr *opt = (struct opthdr *)buf;
5956 
5957 		opt->level = TL_PROT_LEVEL;
5958 		opt->name = TL_OPT_PEER_UCRED;
5959 		opt->len = (t_uscalar_t)OPTLEN(ucredsize);
5960 
5961 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
5962 	} else {
5963 		struct T_opthdr *topt = (struct T_opthdr *)buf;
5964 		ASSERT(flag & TL_SOCKUCRED);
5965 
5966 		topt->level = SOL_SOCKET;
5967 		topt->name = SCM_UCRED;
5968 		topt->len = ucredsize + sizeof (*topt);
5969 		topt->status = 0;
5970 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
5971 	}
5972 }
5973 
5974 /* ARGSUSED */
5975 static int
5976 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5977 {
5978 	/* no default value processed in protocol specific code currently */
5979 	return (-1);
5980 }
5981 
5982 /* ARGSUSED */
5983 static int
5984 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5985 {
5986 	int len;
5987 	tl_endpt_t *tep;
5988 	int *valp;
5989 
5990 	tep = (tl_endpt_t *)wq->q_ptr;
5991 
5992 	len = 0;
5993 
5994 	/*
5995 	 * Assumes: option level and name sanity check done elsewhere
5996 	 */
5997 
5998 	switch (level) {
5999 	case SOL_SOCKET:
6000 		if (! IS_SOCKET(tep))
6001 			break;
6002 		switch (name) {
6003 		case SO_RECVUCRED:
6004 			len = sizeof (int);
6005 			valp = (int *)ptr;
6006 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6007 			break;
6008 		default:
6009 			break;
6010 		}
6011 		break;
6012 	case TL_PROT_LEVEL:
6013 		switch (name) {
6014 		case TL_OPT_PEER_CRED:
6015 		case TL_OPT_PEER_UCRED:
6016 			/*
6017 			 * option not supposed to retrieved directly
6018 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6019 			 * when some internal flags set by other options
6020 			 * Direct retrieval always designed to fail(ignored)
6021 			 * for this option.
6022 			 */
6023 			break;
6024 		}
6025 	}
6026 	return (len);
6027 }
6028 
6029 /* ARGSUSED */
6030 static int
6031 tl_set_opt(
6032 	queue_t		*wq,
6033 	uint_t		mgmt_flags,
6034 	int		level,
6035 	int		name,
6036 	uint_t		inlen,
6037 	uchar_t		*invalp,
6038 	uint_t		*outlenp,
6039 	uchar_t		*outvalp,
6040 	void		*thisdg_attrs,
6041 	cred_t		*cr,
6042 	mblk_t		*mblk)
6043 {
6044 	int error;
6045 	tl_endpt_t *tep;
6046 
6047 	tep = (tl_endpt_t *)wq->q_ptr;
6048 
6049 	error = 0;		/* NOERROR */
6050 
6051 	/*
6052 	 * Assumes: option level and name sanity checks done elsewhere
6053 	 */
6054 
6055 	switch (level) {
6056 	case SOL_SOCKET:
6057 		if (! IS_SOCKET(tep)) {
6058 			error = EINVAL;
6059 			break;
6060 		}
6061 		/*
6062 		 * TBD: fill in other AF_UNIX socket options and then stop
6063 		 * returning error.
6064 		 */
6065 		switch (name) {
6066 		case SO_RECVUCRED:
6067 			/*
6068 			 * We only support this for datagram sockets;
6069 			 * getpeerucred handles the connection oriented
6070 			 * transports.
6071 			 */
6072 			if (! IS_CLTS(tep)) {
6073 				error = EINVAL;
6074 				break;
6075 			}
6076 			if (*(int *)invalp == 0)
6077 				tep->te_flag &= ~TL_SOCKUCRED;
6078 			else
6079 				tep->te_flag |= TL_SOCKUCRED;
6080 			break;
6081 		default:
6082 			error = EINVAL;
6083 			break;
6084 		}
6085 		break;
6086 	case TL_PROT_LEVEL:
6087 		switch (name) {
6088 		case TL_OPT_PEER_CRED:
6089 		case TL_OPT_PEER_UCRED:
6090 			/*
6091 			 * option not supposed to be set directly
6092 			 * Its value in initialized for each endpoint at
6093 			 * driver open time.
6094 			 * Direct setting always designed to fail for this
6095 			 * option.
6096 			 */
6097 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6098 			    SL_TRACE|SL_ERROR,
6099 			    "tl_set_opt: option is not supported"));
6100 			error = EPROTO;
6101 			break;
6102 		}
6103 	}
6104 	return (error);
6105 }
6106 
6107 
6108 static void
6109 tl_timer(void *arg)
6110 {
6111 	queue_t *wq = arg;
6112 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6113 
6114 	ASSERT(tep);
6115 
6116 	tep->te_timoutid = 0;
6117 
6118 	enableok(wq);
6119 	/*
6120 	 * Note: can call wsrv directly here and save context switch
6121 	 * Consider change when qtimeout (not timeout) is active
6122 	 */
6123 	qenable(wq);
6124 }
6125 
6126 static void
6127 tl_buffer(void *arg)
6128 {
6129 	queue_t *wq = arg;
6130 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6131 
6132 	ASSERT(tep);
6133 
6134 	tep->te_bufcid = 0;
6135 	tep->te_nowsrv = B_FALSE;
6136 
6137 	enableok(wq);
6138 	/*
6139 	 *  Note: can call wsrv directly here and save context switch
6140 	 * Consider change when qbufcall (not bufcall) is active
6141 	 */
6142 	qenable(wq);
6143 }
6144 
6145 static void
6146 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6147 {
6148 	tl_endpt_t *tep;
6149 
6150 	tep = (tl_endpt_t *)wq->q_ptr;
6151 
6152 	if (tep->te_closing) {
6153 		freemsg(mp);
6154 		return;
6155 	}
6156 	noenable(wq);
6157 
6158 	(void) insq(wq, wq->q_first, mp);
6159 
6160 	if (tep->te_bufcid || tep->te_timoutid) {
6161 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6162 		    "tl_memrecover:recover %p pending", (void *)wq));
6163 		return;
6164 	}
6165 
6166 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6167 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6168 		    drv_usectohz(TL_BUFWAIT));
6169 	}
6170 }
6171 
6172 static void
6173 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6174 {
6175 	ASSERT(tip->ti_seqno != 0);
6176 
6177 	if (tip->ti_mp != NULL) {
6178 		tl_icon_freemsgs(&tip->ti_mp);
6179 		tip->ti_mp = NULL;
6180 	}
6181 	if (tip->ti_tep != NULL) {
6182 		tl_refrele(tip->ti_tep);
6183 		tip->ti_tep = NULL;
6184 	}
6185 	list_remove(&tep->te_iconp, tip);
6186 	kmem_free(tip, sizeof (tl_icon_t));
6187 	tep->te_nicon--;
6188 }
6189 
6190 /*
6191  * Remove address from address hash.
6192  */
6193 static void
6194 tl_addr_unbind(tl_endpt_t *tep)
6195 {
6196 	tl_endpt_t *elp;
6197 
6198 	if (tep->te_flag & TL_ADDRHASHED) {
6199 		if (IS_SOCKET(tep)) {
6200 			(void) mod_hash_remove(tep->te_addrhash,
6201 			    (mod_hash_key_t)tep->te_vp,
6202 			    (mod_hash_val_t *)&elp);
6203 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6204 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6205 		} else {
6206 			(void) mod_hash_remove(tep->te_addrhash,
6207 			    (mod_hash_key_t)&tep->te_ap,
6208 			    (mod_hash_val_t *)&elp);
6209 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6210 			tep->te_alen = -1;
6211 			tep->te_abuf = NULL;
6212 		}
6213 		tep->te_flag &= ~TL_ADDRHASHED;
6214 	}
6215 }
6216