xref: /titanic_44/usr/src/uts/common/io/tl.c (revision bde3d612a7c090234c60e6e4578821237a5db135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
27  * Copyright (c) 2012 by Delphix. All rights reserved.
28  */
29 
30 /*
31  * Multithreaded STREAMS Local Transport Provider.
32  *
33  * OVERVIEW
34  * ========
35  *
36  * This driver provides TLI as well as socket semantics.  It provides
37  * connectionless, connection oriented, and connection oriented with orderly
38  * release transports for TLI and sockets. Each transport type has separate name
39  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
40  * this removes any name space conflicts when binding to socket style transport
41  * addresses.
42  *
43  * NOTE: There is one exception: Socket ticots and ticotsord transports share
44  * the same namespace. In fact, sockets always use ticotsord type transport.
45  *
46  * The driver mode is specified during open() by the minor number used for
47  * open.
48  *
49  *  The sockets in addition have the following semantic differences:
50  *  No support for passing up credentials (TL_SET[U]CRED).
51  *
52  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
53  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
54  *	T_OPTDATA_IND.
55  *
56  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
57  *	a T_CONN_RES is received from the acceptor. This means that a socket
58  *	connect will complete before the peer has called accept.
59  *
60  *
61  * MULTITHREADING
62  * ==============
63  *
64  * The driver does not use STREAMS protection mechanisms. Instead it uses a
65  * generic "serializer" abstraction. Most of the operations are executed behind
66  * the serializer and are, essentially single-threaded. All functions executed
67  * behind the same serializer are strictly serialized. So if one thread calls
68  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
69  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
70  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
71  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
72  * same time.
73  *
74  * Connectionless transport use a single serializer per transport type (one for
75  * TLI and one for sockets. Connection-oriented transports use finer-grained
76  * serializers.
77  *
78  * All COTS-type endpoints start their life with private serializers. During
79  * connection request processing the endpoint serializer is switched to the
80  * listener's serializer and the rest of T_CONN_REQ processing is done on the
81  * listener serializer. During T_CONN_RES processing the eager serializer is
82  * switched from listener to acceptor serializer and after that point all
83  * processing for eager and acceptor happens on this serializer. To avoid races
84  * with endpoint closes while its serializer may be changing closes are blocked
85  * while serializers are manipulated.
86  *
87  * References accounting
88  * ---------------------
89  *
90  * Endpoints are reference counted and freed when the last reference is
91  * dropped. Functions within the serializer may access an endpoint state even
92  * after an endpoint closed. The te_closing being set on the endpoint indicates
93  * that the endpoint entered its close routine.
94  *
95  * One reference is held for each opened endpoint instance. The reference
96  * counter is incremented when the endpoint is linked to another endpoint and
97  * decremented when the link disappears. It is also incremented when the
98  * endpoint is found by the hash table lookup. This increment is atomic with the
99  * lookup itself and happens while the hash table read lock is held.
100  *
101  * Close synchronization
102  * ---------------------
103  *
104  * During close the endpoint as marked as closing using te_closing flag. It is
105  * usually enough to check for te_closing flag since all other state changes
106  * happen after this flag is set and the close entered serializer. Immediately
107  * after setting te_closing flag tl_close() enters serializer and waits until
108  * the callback finishes. This allows all functions called within serializer to
109  * simply check te_closing without any locks.
110  *
111  * Serializer management.
112  * ---------------------
113  *
114  * For COTS transports serializers are created when the endpoint is constructed
115  * and destroyed when the endpoint is destructed. CLTS transports use global
116  * serializers - one for sockets and one for TLI.
117  *
118  * COTS serializers have separate reference counts to deal with several
119  * endpoints sharing the same serializer. There is a subtle problem related to
120  * the serializer destruction. The serializer should never be destroyed by any
121  * function executed inside serializer. This means that close has to wait till
122  * all serializer activity for this endpoint is finished before it can drop the
123  * last reference on the endpoint (which may as well free the serializer).  This
124  * is only relevant for COTS transports which manage serializers
125  * dynamically. For CLTS transports close may complete without waiting for all
126  * serializer activity to finish since serializer is only destroyed at driver
127  * detach time.
128  *
129  * COTS endpoints keep track of the number of outstanding requests on the
130  * serializer for the endpoint. The code handling accept() avoids changing
131  * client serializer if it has any pending messages on the serializer and
132  * instead moves acceptor to listener's serializer.
133  *
134  *
135  * Use of hash tables
136  * ------------------
137  *
138  * The driver uses modhash hash table implementation. Each transport uses two
139  * hash tables - one for finding endpoints by acceptor ID and another one for
140  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
141  * pair of hash tables since sockets only use TICOTSORD.
142  *
143  * All hash tables lookups increment a reference count for returned endpoints,
144  * so we may safely check the endpoint state even when the endpoint is removed
145  * from the hash by another thread immediately after it is found.
146  *
147  *
148  * CLOSE processing
149  * ================
150  *
151  * The driver enters serializer twice on close(). The close sequence is the
152  * following:
153  *
154  * 1) Wait until closing is safe (te_closewait becomes zero)
155  *	This step is needed to prevent close during serializer switches. In most
156  *	cases (close happening after connection establishment) te_closewait is
157  *	zero.
158  * 1) Set te_closing.
159  * 2) Call tl_close_ser() within serializer and wait for it to complete.
160  *
161  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
162  *	It also needs to clear write-side q_next pointers - this should be done
163  *	before qprocsoff().
164  *
165  *    This synchronous serializer entry during close is needed to ensure that
166  *    the queue is valid everywhere inside the serializer.
167  *
168  *    Note that in many cases close will execute tl_close_ser() synchronously,
169  *    so it will not wait at all.
170  *
171  * 3) Calls qprocsoff().
172  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
173  *	complete (for COTS transports). For CLTS transport there is no wait.
174  *
175  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
176  *	close if there is any.
177  *
178  *    Note that in most cases close will enter te_close_ser_finish()
179  *    synchronously and will not wait at all.
180  *
181  *
182  * Flow Control
183  * ============
184  *
185  * The driver implements both read and write side service routines. No one calls
186  * putq() on the read queue. The read side service routine tl_rsrv() is called
187  * when the read side stream is back-enabled. It enters serializer synchronously
188  * (waits till serializer processing is complete). Within serializer it
189  * back-enables all endpoints blocked by the queue for connection-less
190  * transports and enables write side service processing for the peer for
191  * connection-oriented transports.
192  *
193  * Read and write side service routines use special mblk_sized space in the
194  * endpoint structure to enter perimeter.
195  *
196  * Write-side flow control
197  * -----------------------
198  *
199  * Write side flow control is a bit tricky. The driver needs to deal with two
200  * message queues - the explicit STREAMS message queue maintained by
201  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
202  * queues should be synchronized to preserve message ordering and should
203  * maintain a single order determined by the order in which messages enter
204  * tl_wput(). In order to maintain the ordering between these two queues the
205  * STREAMS queue is only manipulated within the serializer, so the ordering is
206  * provided by the serializer.
207  *
208  * Functions called from the tl_wsrv() sometimes may call putbq(). To
209  * immediately stop any further processing of the STREAMS message queues the
210  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
211  * side service processing stops when the flag is set.
212  *
213  * The tl_wsrv() function enters serializer synchronously and waits for it to
214  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
215  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
216  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
217  * always bounded by the amount of messages on the STREAMS queue at the time
218  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
219  * queue from another serialized entry which can't happen in parallel. This
220  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
221  * of it draining forever while writer places new messages on the STREAMS
222  * queue).
223  *
224  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
225  *
226  *
227  * Unix Domain Sockets
228  * ===================
229  *
230  * The driver knows the structure of Unix Domain sockets addresses and treats
231  * them differently from generic TLI addresses. For sockets implicit binds are
232  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
233  * instead of using address length of zero. Explicit binds specify
234  * SOU_MAGIC_EXPLICIT as magic.
235  *
236  * For implicit binds we always use minor number as soua_vp part of the address
237  * and avoid any hash table lookups. This saves two hash tables lookups per
238  * anonymous bind.
239  *
240  * For explicit address we hash the vnode pointer instead of hashing the
241  * full-scale address+zone+length. Hashing by pointer is more efficient then
242  * hashing by the full address.
243  *
244  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
245  * tep structure, so it should be never freed.
246  *
247  * Also for sockets the driver always uses minor number as acceptor id.
248  *
249  * TPI VIOLATIONS
250  * --------------
251  *
252  * This driver violates TPI in several respects for Unix Domain Sockets:
253  *
254  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
255  *	is requested and the endpoint is already in use. There is no point in
256  *	generating an unused address since this address will be rejected by
257  *	sockfs anyway. For implicit binds it always generates a new address
258  *	(sets soua_vp to its minor number).
259  *
260  * 2) It always uses minor number as acceptor ID and never uses queue
261  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
262  *	message and they do not use the queue pointer.
263  *
264  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
265  *	followed by listen(). The listen() should be issued with non-zero
266  *	backlog, so sotpi_listen() issues unbind request followed by bind
267  *	request to the same address but with a non-zero qlen value. Both
268  *	tl_bind() and tl_unbind() require write lock on the hash table to
269  *	insert/remove the address. The driver does not remove the address from
270  *	the hash for endpoints that are bound to the explicit address and have
271  *	backlog of zero. During T_BIND_REQ processing if the address requested
272  *	is equal to the address the endpoint already has it updates the backlog
273  *	without reinserting the address in the hash table. This optimization
274  *	avoids two hash table updates for each listener created. It always
275  *	avoids the problem of a "stolen" address when another listener may use
276  *	the same address between the unbind and bind and suddenly listen() fails
277  *	because address is in use even though the bind() succeeded.
278  *
279  *
280  * CONNECTIONLESS TRANSPORTS
281  * =========================
282  *
283  * Connectionless transports all share the same serializer (one for TLI and one
284  * for Sockets). Functions executing behind serializer can check or modify state
285  * of any endpoint.
286  *
287  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
288  * te_lastep field. The next time X talks to some address A it checks whether A
289  * is the same as Y's address and if it is there is no need to lookup Y. If the
290  * address is different or the state of Y is not appropriate (e.g. closed or not
291  * idle) X does a lookup using tl_find_peer() and caches the new address.
292  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
293  * on the endpoint found.
294  *
295  * During close of endpoint Y it doesn't try to remove itself from other
296  * endpoints caches. They will detect that Y is gone and will search the peer
297  * endpoint again.
298  *
299  * Flow Control Handling.
300  * ----------------------
301  *
302  * Each connectionless endpoint keeps a list of endpoints which are
303  * flow-controlled by its queue. It also keeps a pointer to the queue which
304  * flow-controls itself.  Whenever flow control releases for endpoint X it
305  * enables all queues from the list. During close it also back-enables everyone
306  * in the list. If X is flow-controlled when it is closing it removes it from
307  * the peers list.
308  *
309  * DATA STRUCTURES
310  * ===============
311  *
312  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
313  * endpoint state. For connection-oriented transports it has a keeps a list
314  * of pending connections (tl_icon_t). For connectionless transports it keeps a
315  * list of endpoints flow controlled by this one.
316  *
317  * Each transport type is represented by a per-transport data structure
318  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
319  * endpoint address hash tables for each transport. It also contains pointer to
320  * transport serializer for connectionless transports.
321  *
322  * Each endpoint keeps a link to its transport structure, so the code can find
323  * all per-transport information quickly.
324  */
325 
326 #include	<sys/types.h>
327 #include	<sys/inttypes.h>
328 #include	<sys/stream.h>
329 #include	<sys/stropts.h>
330 #define	_SUN_TPI_VERSION 2
331 #include	<sys/tihdr.h>
332 #include	<sys/strlog.h>
333 #include	<sys/debug.h>
334 #include	<sys/cred.h>
335 #include	<sys/errno.h>
336 #include	<sys/kmem.h>
337 #include	<sys/id_space.h>
338 #include	<sys/modhash.h>
339 #include	<sys/mkdev.h>
340 #include	<sys/tl.h>
341 #include	<sys/stat.h>
342 #include	<sys/conf.h>
343 #include	<sys/modctl.h>
344 #include	<sys/strsun.h>
345 #include	<sys/socket.h>
346 #include	<sys/socketvar.h>
347 #include	<sys/sysmacros.h>
348 #include	<sys/xti_xtiopt.h>
349 #include	<sys/ddi.h>
350 #include	<sys/sunddi.h>
351 #include	<sys/zone.h>
352 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
353 #include	<inet/optcom.h>
354 #include	<sys/strsubr.h>
355 #include	<sys/ucred.h>
356 #include	<sys/suntpi.h>
357 #include	<sys/list.h>
358 #include	<sys/serializer.h>
359 
360 /*
361  * TBD List
362  * 14 Eliminate state changes through table
363  * 16. AF_UNIX socket options
364  * 17. connect() for ticlts
365  * 18. support for "netstat" to show AF_UNIX plus TLI local
366  *	transport connections
367  * 21. sanity check to flushing on sending M_ERROR
368  */
369 
370 /*
371  * CONSTANT DECLARATIONS
372  * --------------------
373  */
374 
375 /*
376  * Local declarations
377  */
378 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
379 
380 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
381 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
382 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
383 /*
384  * Hash tables size.
385  */
386 #define	TL_HASH_SIZE 311
387 
388 /*
389  * Definitions for module_info
390  */
391 #define		TL_ID		(104)		/* module ID number */
392 #define		TL_NAME		"tl"		/* module name */
393 #define		TL_MINPSZ	(0)		/* min packet size */
394 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
395 #define		TL_HIWAT	(16*1024)	/* hi water mark */
396 #define		TL_LOWAT	(256)		/* lo water mark */
397 /*
398  * Definition of minor numbers/modes for new transport provider modes.
399  * We view the socket use as a separate mode to get a separate name space.
400  */
401 #define		TL_TICOTS	0	/* connection oriented transport */
402 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
403 #define		TL_TICLTS 	2	/* connectionless transport */
404 #define		TL_UNUSED	3
405 #define		TL_SOCKET	4	/* Socket */
406 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
407 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
408 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
409 
410 #define		TL_MINOR_MASK	0x7
411 #define		TL_MINOR_START	(TL_TICLTS + 1)
412 
413 /*
414  * LOCAL MACROS
415  */
416 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
417 
418 /*
419  * EXTERNAL VARIABLE DECLARATIONS
420  * -----------------------------
421  */
422 /*
423  * state table defined in the OS space.c
424  */
425 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
426 
427 /*
428  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
429  */
430 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
431 static int tl_close(queue_t *, int, cred_t *);
432 static void tl_wput(queue_t *, mblk_t *);
433 static void tl_wsrv(queue_t *);
434 static void tl_rsrv(queue_t *);
435 
436 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
437 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
438 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
439 
440 
441 /*
442  * GLOBAL DATA STRUCTURES AND VARIABLES
443  * -----------------------------------
444  */
445 
446 /*
447  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
448  * For now, we only manage the SO_RECVUCRED option but we also have
449  * harmless dummy options to make things work with some common code we access.
450  */
451 opdes_t	tl_opt_arr[] = {
452 	/* The SO_TYPE is needed for the hack below */
453 	{
454 		SO_TYPE,
455 		SOL_SOCKET,
456 		OA_R,
457 		OA_R,
458 		OP_NP,
459 		0,
460 		sizeof (t_scalar_t),
461 		0
462 	},
463 	{
464 		SO_RECVUCRED,
465 		SOL_SOCKET,
466 		OA_RW,
467 		OA_RW,
468 		OP_NP,
469 		0,
470 		sizeof (int),
471 		0
472 	}
473 };
474 
475 /*
476  * Table of all supported levels
477  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
478  * any supported options so we need this info separately.
479  *
480  * This is needed only for topmost tpi providers.
481  */
482 optlevel_t	tl_valid_levels_arr[] = {
483 	XTI_GENERIC,
484 	SOL_SOCKET,
485 	TL_PROT_LEVEL
486 };
487 
488 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
489 /*
490  * Current upper bound on the amount of space needed to return all options.
491  * Additional options with data size of sizeof(long) are handled automatically.
492  * Others need hand job.
493  */
494 #define	TL_MAX_OPT_BUF_LEN						\
495 		((A_CNT(tl_opt_arr) << 2) +				\
496 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
497 		+ 64 + sizeof (struct T_optmgmt_ack))
498 
499 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
500 
501 /*
502  *	transport addr structure
503  */
504 typedef struct tl_addr {
505 	zoneid_t	ta_zoneid;		/* Zone scope of address */
506 	t_scalar_t	ta_alen;		/* length of abuf */
507 	void		*ta_abuf;		/* the addr itself */
508 } tl_addr_t;
509 
510 /*
511  * Refcounted version of serializer.
512  */
513 typedef struct tl_serializer {
514 	uint_t		ts_refcnt;
515 	serializer_t	*ts_serializer;
516 } tl_serializer_t;
517 
518 /*
519  * Each transport type has a separate state.
520  * Per-transport state.
521  */
522 typedef struct tl_transport_state {
523 	char		*tr_name;
524 	minor_t		tr_minor;
525 	uint32_t	tr_defaddr;
526 	mod_hash_t	*tr_ai_hash;
527 	mod_hash_t	*tr_addr_hash;
528 	tl_serializer_t	*tr_serializer;
529 } tl_transport_state_t;
530 
531 #define	TL_DFADDR 0x1000
532 
533 static tl_transport_state_t tl_transports[] = {
534 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
535 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
536 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
537 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
538 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
539 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
540 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
541 };
542 
543 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
544 
545 struct tl_endpt;
546 typedef struct tl_endpt tl_endpt_t;
547 
548 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
549 
550 /*
551  * Data structure used to represent pending connects.
552  * Records enough information so that the connecting peer can close
553  * before the connection gets accepted.
554  */
555 typedef struct tl_icon {
556 	list_node_t	ti_node;
557 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
558 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
559 	t_scalar_t	ti_seqno;	/* Sequence number */
560 } tl_icon_t;
561 
562 typedef struct so_ux_addr soux_addr_t;
563 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
564 
565 /*
566  * Maximum number of unaccepted connection indications allowed per listener.
567  */
568 #define	TL_MAXQLEN	4096
569 int tl_maxqlen = TL_MAXQLEN;
570 
571 /*
572  *	transport endpoint structure
573  */
574 struct tl_endpt {
575 	queue_t		*te_rq;		/* stream read queue */
576 	queue_t		*te_wq;		/* stream write queue */
577 	uint32_t	te_refcnt;
578 	int32_t 	te_state;	/* TPI state of endpoint */
579 	minor_t		te_minor;	/* minor number */
580 #define	te_seqno	te_minor
581 	uint_t		te_flag;	/* flag field */
582 	boolean_t	te_nowsrv;
583 	tl_serializer_t	*te_ser;	/* Serializer to use */
584 #define	te_serializer	te_ser->ts_serializer
585 
586 	soux_addr_t	te_uxaddr;	/* Socket address */
587 #define	te_magic	te_uxaddr.soua_magic
588 #define	te_vp		te_uxaddr.soua_vp
589 	tl_addr_t	te_ap;		/* addr bound to this endpt */
590 #define	te_zoneid te_ap.ta_zoneid
591 #define	te_alen	te_ap.ta_alen
592 #define	te_abuf	te_ap.ta_abuf
593 
594 	tl_transport_state_t *te_transport;
595 #define	te_addrhash	te_transport->tr_addr_hash
596 #define	te_aihash	te_transport->tr_ai_hash
597 #define	te_defaddr	te_transport->tr_defaddr
598 	cred_t		*te_credp;	/* endpoint user credentials */
599 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
600 
601 	/*
602 	 * State specific for connection-oriented and connectionless transports.
603 	 */
604 	union {
605 		/* Connection-oriented state. */
606 		struct {
607 			t_uscalar_t _te_nicon;	/* count of conn requests */
608 			t_uscalar_t _te_qlen;	/* max conn requests */
609 			tl_endpt_t  *_te_oconp;	/* conn request pending */
610 			tl_endpt_t  *_te_conp;	/* connected endpt */
611 #ifndef _ILP32
612 			void	    *_te_pad;
613 #endif
614 			list_t	_te_iconp;	/* list of conn ind. pending */
615 		} _te_cots_state;
616 		/* Connection-less state. */
617 		struct {
618 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
619 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
620 			list_node_t _te_flows;	/* lists of connections */
621 			list_t  _te_flowlist;	/* Who flowcontrols on me */
622 		} _te_clts_state;
623 	} _te_transport_state;
624 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
625 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
626 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
627 #define	te_conp		_te_transport_state._te_cots_state._te_conp
628 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
629 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
630 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
631 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
632 #define	te_flows	_te_transport_state._te_clts_state._te_flows
633 
634 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
635 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
636 	pid_t		te_cpid;	/* cached pid of endpoint */
637 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
638 	/*
639 	 * Pieces of the endpoint state needed for closing.
640 	 */
641 	kmutex_t	te_closelock;
642 	kcondvar_t	te_closecv;
643 	uint8_t		te_closing;	/* The endpoint started closing */
644 	uint8_t		te_closewait;	/* Wait in close until zero */
645 	mblk_t		te_closemp;	/* for entering serializer on close */
646 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
647 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
648 	kmutex_t	te_srv_lock;
649 	kcondvar_t	te_srv_cv;
650 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
651 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
652 	/*
653 	 * Pieces of the endpoint state needed for serializer transitions.
654 	 */
655 	kmutex_t	te_ser_lock;	/* Protects the count below */
656 	uint_t		te_ser_count;	/* Number of messages on serializer */
657 };
658 
659 /*
660  * Flag values. Lower 4 bits specify that transport used.
661  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
662  * they allow to identify the endpoint more easily.
663  */
664 #define	TL_LISTENER	0x00010	/* the listener endpoint */
665 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
666 #define	TL_EAGER	0x00040	/* connecting endpoint */
667 #define	TL_ACCEPTED	0x00080	/* accepted connection */
668 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
669 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
670 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
671 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
672 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
673 /*
674  * Boolean checks for the endpoint type.
675  */
676 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
677 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
678 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
679 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
680 
681 /*
682  * Certain operations are always used together. These macros reduce the chance
683  * of missing a part of a combination.
684  */
685 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
686 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
687 
688 #define	TL_PUTBQ(x, mp) {		\
689 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
690 	(x)->te_nowsrv = B_TRUE;	\
691 	(void) putbq((x)->te_wq, mp);	\
692 }
693 
694 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
695 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
696 
697 /*
698  * STREAMS driver glue data structures.
699  */
700 static	struct	module_info	tl_minfo = {
701 	TL_ID,			/* mi_idnum */
702 	TL_NAME,		/* mi_idname */
703 	TL_MINPSZ,		/* mi_minpsz */
704 	TL_MAXPSZ,		/* mi_maxpsz */
705 	TL_HIWAT,		/* mi_hiwat */
706 	TL_LOWAT		/* mi_lowat */
707 };
708 
709 static	struct	qinit	tl_rinit = {
710 	NULL,			/* qi_putp */
711 	(int (*)())tl_rsrv,	/* qi_srvp */
712 	tl_open,		/* qi_qopen */
713 	tl_close,		/* qi_qclose */
714 	NULL,			/* qi_qadmin */
715 	&tl_minfo,		/* qi_minfo */
716 	NULL			/* qi_mstat */
717 };
718 
719 static	struct	qinit	tl_winit = {
720 	(int (*)())tl_wput,	/* qi_putp */
721 	(int (*)())tl_wsrv,	/* qi_srvp */
722 	NULL,			/* qi_qopen */
723 	NULL,			/* qi_qclose */
724 	NULL,			/* qi_qadmin */
725 	&tl_minfo,		/* qi_minfo */
726 	NULL			/* qi_mstat */
727 };
728 
729 static	struct streamtab	tlinfo = {
730 	&tl_rinit,		/* st_rdinit */
731 	&tl_winit,		/* st_wrinit */
732 	NULL,			/* st_muxrinit */
733 	NULL			/* st_muxwrinit */
734 };
735 
736 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
737     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
738 
739 static struct modldrv modldrv = {
740 	&mod_driverops,		/* Type of module -- pseudo driver here */
741 	"TPI Local Transport (tl)",
742 	&tl_devops,		/* driver ops */
743 };
744 
745 /*
746  * Module linkage information for the kernel.
747  */
748 static struct modlinkage modlinkage = {
749 	MODREV_1,
750 	&modldrv,
751 	NULL
752 };
753 
754 /*
755  * Templates for response to info request
756  * Check sanity of unlimited connect data etc.
757  */
758 
759 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
760 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
761 
762 static struct T_info_ack tl_cots_info_ack =
763 	{
764 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
765 		T_INFINITE,	/* TSDU size */
766 		T_INFINITE,	/* ETSDU size */
767 		T_INFINITE,	/* CDATA_size */
768 		T_INFINITE,	/* DDATA_size */
769 		T_INFINITE,	/* ADDR_size  */
770 		T_INFINITE,	/* OPT_size */
771 		0,		/* TIDU_size - fill at run time */
772 		T_COTS,		/* SERV_type */
773 		-1,		/* CURRENT_state */
774 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
775 	};
776 
777 static struct T_info_ack tl_clts_info_ack =
778 	{
779 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
780 		0,		/* TSDU_size - fill at run time */
781 		-2,		/* ETSDU_size -2 => not supported */
782 		-2,		/* CDATA_size -2 => not supported */
783 		-2,		/* DDATA_size  -2 => not supported */
784 		-1,		/* ADDR_size -1 => infinite */
785 		-1,		/* OPT_size */
786 		0,		/* TIDU_size - fill at run time */
787 		T_CLTS,		/* SERV_type */
788 		-1,		/* CURRENT_state */
789 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
790 	};
791 
792 /*
793  * private copy of devinfo pointer used in tl_info
794  */
795 static dev_info_t *tl_dip;
796 
797 /*
798  * Endpoints cache.
799  */
800 static kmem_cache_t *tl_cache;
801 /*
802  * Minor number space.
803  */
804 static id_space_t *tl_minors;
805 
806 /*
807  * Default Data Unit size.
808  */
809 static t_scalar_t tl_tidusz;
810 
811 /*
812  * Size of hash tables.
813  */
814 static size_t tl_hash_size = TL_HASH_SIZE;
815 
816 /*
817  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
818  * for sockets.
819  */
820 static int tl_disable_early_connect = 0;
821 static int tl_client_closing_when_accepting;
822 
823 static int tl_serializer_noswitch;
824 
825 /*
826  * LOCAL FUNCTION PROTOTYPES
827  * -------------------------
828  */
829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
830 static void tl_do_proto(mblk_t *, tl_endpt_t *);
831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
834 	t_scalar_t);
835 static void tl_bind(mblk_t *, tl_endpt_t *);
836 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
837 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
838 static void tl_unbind(mblk_t *, tl_endpt_t *);
839 static void tl_optmgmt(queue_t *, mblk_t *);
840 static void tl_conn_req(queue_t *, mblk_t *);
841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
842 static void tl_conn_res(mblk_t *, tl_endpt_t *);
843 static void tl_discon_req(mblk_t *, tl_endpt_t *);
844 static void tl_capability_req(mblk_t *, tl_endpt_t *);
845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
846 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
847 static void tl_info_req(mblk_t *, tl_endpt_t *);
848 static void tl_addr_req(mblk_t *, tl_endpt_t *);
849 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
850 static void tl_data(mblk_t  *, tl_endpt_t *);
851 static void tl_exdata(mblk_t *, tl_endpt_t *);
852 static void tl_ordrel(mblk_t *, tl_endpt_t *);
853 static void tl_unitdata(mblk_t *, tl_endpt_t *);
854 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
855 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
856 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
857 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
858 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
859 static void tl_cl_backenable(tl_endpt_t *);
860 static void tl_co_unconnect(tl_endpt_t *);
861 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
862 static void tl_discon_ind(tl_endpt_t *, uint32_t);
863 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
864 static mblk_t *tl_ordrel_ind_alloc(void);
865 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
866 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
867 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
868 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
869 static void tl_icon_freemsgs(mblk_t **);
870 static void tl_merror(queue_t *, mblk_t *, int);
871 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
872 static int tl_default_opt(queue_t *, int, int, uchar_t *);
873 static int tl_get_opt(queue_t *, int, int, uchar_t *);
874 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
875     uchar_t *, void *, cred_t *);
876 static void tl_memrecover(queue_t *, mblk_t *, size_t);
877 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
878 static void tl_free(tl_endpt_t *);
879 static int  tl_constructor(void *, void *, int);
880 static void tl_destructor(void *, void *);
881 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
882 static tl_serializer_t *tl_serializer_alloc(int);
883 static void tl_serializer_refhold(tl_serializer_t *);
884 static void tl_serializer_refrele(tl_serializer_t *);
885 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
886 static void tl_serializer_exit(tl_endpt_t *);
887 static boolean_t tl_noclose(tl_endpt_t *);
888 static void tl_closeok(tl_endpt_t *);
889 static void tl_refhold(tl_endpt_t *);
890 static void tl_refrele(tl_endpt_t *);
891 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
892 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
893 static void tl_close_ser(mblk_t *, tl_endpt_t *);
894 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
895 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
896 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
897 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
898 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
900 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
901 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
902 static void tl_addr_unbind(tl_endpt_t *);
903 
904 /*
905  * Intialize option database object for TL
906  */
907 
908 optdb_obj_t tl_opt_obj = {
909 	tl_default_opt,		/* TL default value function pointer */
910 	tl_get_opt,		/* TL get function pointer */
911 	tl_set_opt,		/* TL set function pointer */
912 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
913 	tl_opt_arr,		/* TL option database */
914 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
915 	tl_valid_levels_arr	/* TL valid level array */
916 };
917 
918 /*
919  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
920  * ---------------------------------------
921  */
922 
923 /*
924  * Loadable module routines
925  */
926 int
927 _init(void)
928 {
929 	return (mod_install(&modlinkage));
930 }
931 
932 int
933 _fini(void)
934 {
935 	return (mod_remove(&modlinkage));
936 }
937 
938 int
939 _info(struct modinfo *modinfop)
940 {
941 	return (mod_info(&modlinkage, modinfop));
942 }
943 
944 /*
945  * Driver Entry Points and Other routines
946  */
947 static int
948 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
949 {
950 	int i;
951 	char name[32];
952 
953 	/*
954 	 * Resume from a checkpoint state.
955 	 */
956 	if (cmd == DDI_RESUME)
957 		return (DDI_SUCCESS);
958 
959 	if (cmd != DDI_ATTACH)
960 		return (DDI_FAILURE);
961 
962 	/*
963 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
964 	 * streams message sizes can be unlimited. We use a defined constant
965 	 * instead.
966 	 */
967 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
968 
969 	/*
970 	 * Create subdevices for each transport.
971 	 */
972 	for (i = 0; i < TL_UNUSED; i++) {
973 		if (ddi_create_minor_node(devi,
974 		    tl_transports[i].tr_name,
975 		    S_IFCHR, tl_transports[i].tr_minor,
976 		    DDI_PSEUDO, NULL) == DDI_FAILURE) {
977 			ddi_remove_minor_node(devi, NULL);
978 			return (DDI_FAILURE);
979 		}
980 	}
981 
982 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
983 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
984 
985 	if (tl_cache == NULL) {
986 		ddi_remove_minor_node(devi, NULL);
987 		return (DDI_FAILURE);
988 	}
989 
990 	tl_minors = id_space_create("tl_minor_space",
991 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
992 
993 	/*
994 	 * Create ID space for minor numbers
995 	 */
996 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
997 		tl_transport_state_t *t = &tl_transports[i];
998 
999 		if (i == TL_UNUSED)
1000 			continue;
1001 
1002 		/* Socket COTSORD shares namespace with COTS */
1003 		if (i == TL_SOCK_COTSORD) {
1004 			t->tr_ai_hash =
1005 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1006 			ASSERT(t->tr_ai_hash != NULL);
1007 			t->tr_addr_hash =
1008 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1009 			ASSERT(t->tr_addr_hash != NULL);
1010 			continue;
1011 		}
1012 
1013 		/*
1014 		 * Create hash tables.
1015 		 */
1016 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1017 		    t->tr_name);
1018 #ifdef _ILP32
1019 		if (i & TL_SOCKET)
1020 			t->tr_ai_hash =
1021 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1022 			    mod_hash_null_valdtor);
1023 		else
1024 			t->tr_ai_hash =
1025 			    mod_hash_create_ptrhash(name, tl_hash_size,
1026 			    mod_hash_null_valdtor, sizeof (queue_t));
1027 #else
1028 		t->tr_ai_hash =
1029 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1030 		    mod_hash_null_valdtor);
1031 #endif /* _ILP32 */
1032 
1033 		if (i & TL_SOCKET) {
1034 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1035 			    t->tr_name);
1036 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1037 			    tl_hash_size, mod_hash_null_valdtor,
1038 			    sizeof (uintptr_t));
1039 		} else {
1040 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1041 			    t->tr_name);
1042 			t->tr_addr_hash = mod_hash_create_extended(name,
1043 			    tl_hash_size, mod_hash_null_keydtor,
1044 			    mod_hash_null_valdtor,
1045 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1046 		}
1047 
1048 		/* Create serializer for connectionless transports. */
1049 		if (i & TL_TICLTS)
1050 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1051 	}
1052 
1053 	tl_dip = devi;
1054 
1055 	return (DDI_SUCCESS);
1056 }
1057 
1058 static int
1059 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1060 {
1061 	int i;
1062 
1063 	if (cmd == DDI_SUSPEND)
1064 		return (DDI_SUCCESS);
1065 
1066 	if (cmd != DDI_DETACH)
1067 		return (DDI_FAILURE);
1068 
1069 	/*
1070 	 * Destroy arenas and hash tables.
1071 	 */
1072 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1073 		tl_transport_state_t *t = &tl_transports[i];
1074 
1075 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1076 			continue;
1077 
1078 		EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1079 		if (t->tr_serializer != NULL) {
1080 			tl_serializer_refrele(t->tr_serializer);
1081 			t->tr_serializer = NULL;
1082 		}
1083 
1084 #ifdef _ILP32
1085 		if (i & TL_SOCKET)
1086 			mod_hash_destroy_idhash(t->tr_ai_hash);
1087 		else
1088 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1089 #else
1090 		mod_hash_destroy_idhash(t->tr_ai_hash);
1091 #endif /* _ILP32 */
1092 		t->tr_ai_hash = NULL;
1093 		if (i & TL_SOCKET)
1094 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1095 		else
1096 			mod_hash_destroy_hash(t->tr_addr_hash);
1097 		t->tr_addr_hash = NULL;
1098 	}
1099 
1100 	kmem_cache_destroy(tl_cache);
1101 	tl_cache = NULL;
1102 	id_space_destroy(tl_minors);
1103 	tl_minors = NULL;
1104 	ddi_remove_minor_node(devi, NULL);
1105 	return (DDI_SUCCESS);
1106 }
1107 
1108 /* ARGSUSED */
1109 static int
1110 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1111 {
1112 
1113 	int retcode = DDI_FAILURE;
1114 
1115 	switch (infocmd) {
1116 
1117 	case DDI_INFO_DEVT2DEVINFO:
1118 		if (tl_dip != NULL) {
1119 			*result = (void *)tl_dip;
1120 			retcode = DDI_SUCCESS;
1121 		}
1122 		break;
1123 
1124 	case DDI_INFO_DEVT2INSTANCE:
1125 		*result = (void *)0;
1126 		retcode = DDI_SUCCESS;
1127 		break;
1128 
1129 	default:
1130 		break;
1131 	}
1132 	return (retcode);
1133 }
1134 
1135 /*
1136  * Endpoint reference management.
1137  */
1138 static void
1139 tl_refhold(tl_endpt_t *tep)
1140 {
1141 	atomic_inc_32(&tep->te_refcnt);
1142 }
1143 
1144 static void
1145 tl_refrele(tl_endpt_t *tep)
1146 {
1147 	ASSERT(tep->te_refcnt != 0);
1148 
1149 	if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1150 		tl_free(tep);
1151 }
1152 
1153 /*ARGSUSED*/
1154 static int
1155 tl_constructor(void *buf, void *cdrarg, int kmflags)
1156 {
1157 	tl_endpt_t *tep = buf;
1158 
1159 	bzero(tep, sizeof (tl_endpt_t));
1160 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1161 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1162 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1163 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1164 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1165 
1166 	return (0);
1167 }
1168 
1169 /*ARGSUSED*/
1170 static void
1171 tl_destructor(void *buf, void *cdrarg)
1172 {
1173 	tl_endpt_t *tep = buf;
1174 
1175 	mutex_destroy(&tep->te_closelock);
1176 	cv_destroy(&tep->te_closecv);
1177 	mutex_destroy(&tep->te_srv_lock);
1178 	cv_destroy(&tep->te_srv_cv);
1179 	mutex_destroy(&tep->te_ser_lock);
1180 }
1181 
1182 static void
1183 tl_free(tl_endpt_t *tep)
1184 {
1185 	ASSERT(tep->te_refcnt == 0);
1186 	ASSERT(tep->te_transport != NULL);
1187 	ASSERT(tep->te_rq == NULL);
1188 	ASSERT(tep->te_wq == NULL);
1189 	ASSERT(tep->te_ser != NULL);
1190 	ASSERT(tep->te_ser_count == 0);
1191 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1192 
1193 	if (IS_SOCKET(tep)) {
1194 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1195 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1196 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1197 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1198 	} else if (tep->te_abuf != NULL) {
1199 		kmem_free(tep->te_abuf, tep->te_alen);
1200 		tep->te_alen = -1; /* uninitialized */
1201 		tep->te_abuf = NULL;
1202 	} else {
1203 		ASSERT(tep->te_alen == -1);
1204 	}
1205 
1206 	id_free(tl_minors, tep->te_minor);
1207 	ASSERT(tep->te_credp == NULL);
1208 
1209 	if (tep->te_hash_hndl != NULL)
1210 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1211 
1212 	if (IS_COTS(tep)) {
1213 		TL_REMOVE_PEER(tep->te_conp);
1214 		TL_REMOVE_PEER(tep->te_oconp);
1215 		tl_serializer_refrele(tep->te_ser);
1216 		tep->te_ser = NULL;
1217 		ASSERT(tep->te_nicon == 0);
1218 		ASSERT(list_head(&tep->te_iconp) == NULL);
1219 	} else {
1220 		ASSERT(tep->te_lastep == NULL);
1221 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1222 		ASSERT(tep->te_flowq == NULL);
1223 	}
1224 
1225 	ASSERT(tep->te_bufcid == 0);
1226 	ASSERT(tep->te_timoutid == 0);
1227 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1228 	tep->te_acceptor_id = 0;
1229 
1230 	ASSERT(tep->te_closewait == 0);
1231 	ASSERT(!tep->te_rsrv_active);
1232 	ASSERT(!tep->te_wsrv_active);
1233 	tep->te_closing = 0;
1234 	tep->te_nowsrv = B_FALSE;
1235 	tep->te_flag = 0;
1236 
1237 	kmem_cache_free(tl_cache, tep);
1238 }
1239 
1240 /*
1241  * Allocate/free reference-counted wrappers for serializers.
1242  */
1243 static tl_serializer_t *
1244 tl_serializer_alloc(int flags)
1245 {
1246 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1247 	serializer_t *ser;
1248 
1249 	if (s == NULL)
1250 		return (NULL);
1251 
1252 	ser = serializer_create(flags);
1253 
1254 	if (ser == NULL) {
1255 		kmem_free(s, sizeof (tl_serializer_t));
1256 		return (NULL);
1257 	}
1258 
1259 	s->ts_refcnt = 1;
1260 	s->ts_serializer = ser;
1261 	return (s);
1262 }
1263 
1264 static void
1265 tl_serializer_refhold(tl_serializer_t *s)
1266 {
1267 	atomic_inc_32(&s->ts_refcnt);
1268 }
1269 
1270 static void
1271 tl_serializer_refrele(tl_serializer_t *s)
1272 {
1273 	if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1274 		serializer_destroy(s->ts_serializer);
1275 		kmem_free(s, sizeof (tl_serializer_t));
1276 	}
1277 }
1278 
1279 /*
1280  * Post a request on the endpoint serializer. For COTS transports keep track of
1281  * the number of pending requests.
1282  */
1283 static void
1284 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1285 {
1286 	if (IS_COTS(tep)) {
1287 		mutex_enter(&tep->te_ser_lock);
1288 		tep->te_ser_count++;
1289 		mutex_exit(&tep->te_ser_lock);
1290 	}
1291 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1292 }
1293 
1294 /*
1295  * Complete processing the request on the serializer. Decrement the counter for
1296  * pending requests for COTS transports.
1297  */
1298 static void
1299 tl_serializer_exit(tl_endpt_t *tep)
1300 {
1301 	if (IS_COTS(tep)) {
1302 		mutex_enter(&tep->te_ser_lock);
1303 		ASSERT(tep->te_ser_count != 0);
1304 		tep->te_ser_count--;
1305 		mutex_exit(&tep->te_ser_lock);
1306 	}
1307 }
1308 
1309 /*
1310  * Hash management functions.
1311  */
1312 
1313 /*
1314  * Return TRUE if two addresses are equal, false otherwise.
1315  */
1316 static boolean_t
1317 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1318 {
1319 	return ((ap1->ta_alen > 0) &&
1320 	    (ap1->ta_alen == ap2->ta_alen) &&
1321 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1322 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1323 }
1324 
1325 /*
1326  * This function is called whenever an endpoint is found in the hash table.
1327  */
1328 /* ARGSUSED0 */
1329 static void
1330 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1331 {
1332 	tl_refhold((tl_endpt_t *)val);
1333 }
1334 
1335 /*
1336  * Address hash function.
1337  */
1338 /* ARGSUSED */
1339 static uint_t
1340 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1341 {
1342 	tl_addr_t *ap = (tl_addr_t *)key;
1343 	size_t	len = ap->ta_alen;
1344 	uchar_t *p = ap->ta_abuf;
1345 	uint_t i, g;
1346 
1347 	ASSERT((len > 0) && (p != NULL));
1348 
1349 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1350 		i = (i << 4) + (*p);
1351 		if ((g = (i & 0xf0000000U)) != 0) {
1352 			i ^= (g >> 24);
1353 			i ^= g;
1354 		}
1355 	}
1356 	return (i);
1357 }
1358 
1359 /*
1360  * This function is used by hash lookups. It compares two generic addresses.
1361  */
1362 static int
1363 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1364 {
1365 #ifdef 	DEBUG
1366 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1367 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1368 
1369 	ASSERT(key1 != NULL);
1370 	ASSERT(key2 != NULL);
1371 
1372 	ASSERT(ap1->ta_abuf != NULL);
1373 	ASSERT(ap2->ta_abuf != NULL);
1374 	ASSERT(ap1->ta_alen > 0);
1375 	ASSERT(ap2->ta_alen > 0);
1376 #endif
1377 
1378 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1379 }
1380 
1381 /*
1382  * Prevent endpoint from closing if possible.
1383  * Return B_TRUE on success, B_FALSE on failure.
1384  */
1385 static boolean_t
1386 tl_noclose(tl_endpt_t *tep)
1387 {
1388 	boolean_t rc = B_FALSE;
1389 
1390 	mutex_enter(&tep->te_closelock);
1391 	if (! tep->te_closing) {
1392 		ASSERT(tep->te_closewait == 0);
1393 		tep->te_closewait++;
1394 		rc = B_TRUE;
1395 	}
1396 	mutex_exit(&tep->te_closelock);
1397 	return (rc);
1398 }
1399 
1400 /*
1401  * Allow endpoint to close if needed.
1402  */
1403 static void
1404 tl_closeok(tl_endpt_t *tep)
1405 {
1406 	ASSERT(tep->te_closewait > 0);
1407 	mutex_enter(&tep->te_closelock);
1408 	ASSERT(tep->te_closewait == 1);
1409 	tep->te_closewait--;
1410 	cv_signal(&tep->te_closecv);
1411 	mutex_exit(&tep->te_closelock);
1412 }
1413 
1414 /*
1415  * STREAMS open entry point.
1416  */
1417 /* ARGSUSED */
1418 static int
1419 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1420 {
1421 	tl_endpt_t *tep;
1422 	minor_t	    minor = getminor(*devp);
1423 
1424 	/*
1425 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1426 	 * are illegal
1427 	 */
1428 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1429 		return (ENXIO);
1430 
1431 	if (rq->q_ptr != NULL)
1432 		return (0);
1433 
1434 	/* Minor number should specify the mode used for the driver. */
1435 	if ((minor >= TL_UNUSED))
1436 		return (ENXIO);
1437 
1438 	if (oflag & SO_SOCKSTR) {
1439 		minor |= TL_SOCKET;
1440 	}
1441 
1442 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1443 	tep->te_refcnt = 1;
1444 	tep->te_cpid = curproc->p_pid;
1445 	rq->q_ptr = WR(rq)->q_ptr = tep;
1446 	tep->te_state = TS_UNBND;
1447 	tep->te_credp = credp;
1448 	crhold(credp);
1449 	tep->te_zoneid = getzoneid();
1450 
1451 	tep->te_flag = minor & TL_MINOR_MASK;
1452 	tep->te_transport = &tl_transports[minor];
1453 
1454 	/* Allocate a unique minor number for this instance. */
1455 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1456 
1457 	/* Reserve hash handle for bind(). */
1458 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1459 
1460 	/* Transport-specific initialization */
1461 	if (IS_COTS(tep)) {
1462 		/* Use private serializer */
1463 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1464 
1465 		/* Create list for pending connections */
1466 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1467 		    offsetof(tl_icon_t, ti_node));
1468 		tep->te_qlen = 0;
1469 		tep->te_nicon = 0;
1470 		tep->te_oconp = NULL;
1471 		tep->te_conp = NULL;
1472 	} else {
1473 		/* Use shared serializer */
1474 		tep->te_ser = tep->te_transport->tr_serializer;
1475 		bzero(&tep->te_flows, sizeof (list_node_t));
1476 		/* Create list for flow control */
1477 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1478 		    offsetof(tl_endpt_t, te_flows));
1479 		tep->te_flowq = NULL;
1480 		tep->te_lastep = NULL;
1481 
1482 	}
1483 
1484 	/* Initialize endpoint address */
1485 	if (IS_SOCKET(tep)) {
1486 		/* Socket-specific address handling. */
1487 		tep->te_alen = TL_SOUX_ADDRLEN;
1488 		tep->te_abuf = &tep->te_uxaddr;
1489 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1490 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1491 	} else {
1492 		tep->te_alen = -1;
1493 		tep->te_abuf = NULL;
1494 	}
1495 
1496 	/* clone the driver */
1497 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1498 
1499 	tep->te_rq = rq;
1500 	tep->te_wq = WR(rq);
1501 
1502 #ifdef	_ILP32
1503 	if (IS_SOCKET(tep))
1504 		tep->te_acceptor_id = tep->te_minor;
1505 	else
1506 		tep->te_acceptor_id = (t_uscalar_t)rq;
1507 #else
1508 	tep->te_acceptor_id = tep->te_minor;
1509 #endif	/* _ILP32 */
1510 
1511 
1512 	qprocson(rq);
1513 
1514 	/*
1515 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1516 	 * insertion so insertion can't fail.
1517 	 */
1518 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1519 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1520 	    (mod_hash_val_t)tep);
1521 
1522 	return (0);
1523 }
1524 
1525 /* ARGSUSED1 */
1526 static int
1527 tl_close(queue_t *rq, int flag,	cred_t *credp)
1528 {
1529 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1530 	tl_endpt_t *elp = NULL;
1531 	queue_t *wq = tep->te_wq;
1532 	int rc;
1533 
1534 	ASSERT(wq == WR(rq));
1535 
1536 	/*
1537 	 * Remove the endpoint from acceptor hash.
1538 	 */
1539 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1540 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1541 	    (mod_hash_val_t *)&elp);
1542 	ASSERT(rc == 0 && tep == elp);
1543 	if ((rc != 0) || (tep != elp)) {
1544 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1545 		    SL_TRACE|SL_ERROR,
1546 		    "tl_close:inconsistency in AI hash"));
1547 	}
1548 
1549 	/*
1550 	 * Wait till close is safe, then mark endpoint as closing.
1551 	 */
1552 	mutex_enter(&tep->te_closelock);
1553 	while (tep->te_closewait)
1554 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1555 	tep->te_closing = B_TRUE;
1556 	/*
1557 	 * Will wait for the serializer part of the close to finish, so set
1558 	 * te_closewait now.
1559 	 */
1560 	tep->te_closewait = 1;
1561 	tep->te_nowsrv = B_FALSE;
1562 	mutex_exit(&tep->te_closelock);
1563 
1564 	/*
1565 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1566 	 * It is safe because close will wait for tl_close_ser to finish.
1567 	 */
1568 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1569 
1570 	/*
1571 	 * Wait for the first phase of close to complete before qprocsoff().
1572 	 */
1573 	mutex_enter(&tep->te_closelock);
1574 	while (tep->te_closewait)
1575 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1576 	mutex_exit(&tep->te_closelock);
1577 
1578 	qprocsoff(rq);
1579 
1580 	if (tep->te_bufcid) {
1581 		qunbufcall(rq, tep->te_bufcid);
1582 		tep->te_bufcid = 0;
1583 	}
1584 	if (tep->te_timoutid) {
1585 		(void) quntimeout(rq, tep->te_timoutid);
1586 		tep->te_timoutid = 0;
1587 	}
1588 
1589 	/*
1590 	 * Finish close behind serializer.
1591 	 *
1592 	 * For a CLTS endpoint increase a refcount and continue close processing
1593 	 * with serializer protection. This processing may happen asynchronously
1594 	 * with the completion of tl_close().
1595 	 *
1596 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1597 	 * may go away together with tep and we need to destroy serializer
1598 	 * outside of serializer context.
1599 	 */
1600 	ASSERT(tep->te_closewait == 0);
1601 	if (IS_COTS(tep))
1602 		tep->te_closewait = 1;
1603 	else
1604 		tl_refhold(tep);
1605 
1606 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1607 
1608 	/*
1609 	 * For connection-oriented transports wait for all serializer activity
1610 	 * to settle down.
1611 	 */
1612 	if (IS_COTS(tep)) {
1613 		mutex_enter(&tep->te_closelock);
1614 		while (tep->te_closewait)
1615 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1616 		mutex_exit(&tep->te_closelock);
1617 	}
1618 
1619 	crfree(tep->te_credp);
1620 	tep->te_credp = NULL;
1621 	tep->te_wq = NULL;
1622 	tl_refrele(tep);
1623 	/*
1624 	 * tep is likely to be destroyed now, so can't reference it any more.
1625 	 */
1626 
1627 	rq->q_ptr = wq->q_ptr = NULL;
1628 	return (0);
1629 }
1630 
1631 /*
1632  * First phase of close processing done behind the serializer.
1633  *
1634  * Do not drop the reference in the end - tl_close() wants this reference to
1635  * stay.
1636  */
1637 /* ARGSUSED0 */
1638 static void
1639 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1640 {
1641 	ASSERT(tep->te_closing);
1642 	ASSERT(tep->te_closewait == 1);
1643 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1644 
1645 	tep->te_flag |= TL_CLOSE_SER;
1646 
1647 	/*
1648 	 * Drain out all messages on queue except for TL_TICOTS where the
1649 	 * abortive release semantics permit discarding of data on close
1650 	 */
1651 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1652 		tl_wsrv_ser(NULL, tep);
1653 	}
1654 
1655 	/* Remove address from hash table. */
1656 	tl_addr_unbind(tep);
1657 	/*
1658 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1659 	 * queue of the driver, so clear these before qprocsoff() is called.
1660 	 * Also clear q_next for the peer since this queue is going away.
1661 	 */
1662 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1663 		tl_endpt_t *peer_tep = tep->te_conp;
1664 
1665 		tep->te_wq->q_next = NULL;
1666 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1667 			peer_tep->te_wq->q_next = NULL;
1668 	}
1669 
1670 	tep->te_rq = NULL;
1671 
1672 	/* wake up tl_close() */
1673 	tl_closeok(tep);
1674 	tl_serializer_exit(tep);
1675 }
1676 
1677 /*
1678  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1679  * the reference for CLTS.
1680  *
1681  * Called from serializer. Should drop reference count for CLTS only.
1682  */
1683 /* ARGSUSED0 */
1684 static void
1685 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1686 {
1687 	ASSERT(tep->te_closing);
1688 	IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1689 	IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1690 
1691 	tep->te_state = -1;	/* Uninitialized */
1692 	if (IS_COTS(tep)) {
1693 		tl_co_unconnect(tep);
1694 	} else {
1695 		/* Connectionless specific cleanup */
1696 		TL_REMOVE_PEER(tep->te_lastep);
1697 		/*
1698 		 * Backenable anybody that is flow controlled waiting for
1699 		 * this endpoint.
1700 		 */
1701 		tl_cl_backenable(tep);
1702 		if (tep->te_flowq != NULL) {
1703 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1704 			tep->te_flowq = NULL;
1705 		}
1706 	}
1707 
1708 	tl_serializer_exit(tep);
1709 	if (IS_COTS(tep))
1710 		tl_closeok(tep);
1711 	else
1712 		tl_refrele(tep);
1713 }
1714 
1715 /*
1716  * STREAMS write-side put procedure.
1717  * Enter serializer for most of the processing.
1718  *
1719  * The T_CONN_REQ is processed outside of serializer.
1720  */
1721 static void
1722 tl_wput(queue_t *wq, mblk_t *mp)
1723 {
1724 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1725 	ssize_t			msz = MBLKL(mp);
1726 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1727 	tlproc_t		*tl_proc = NULL;
1728 
1729 	switch (DB_TYPE(mp)) {
1730 	case M_DATA:
1731 		/* Only valid for connection-oriented transports */
1732 		if (IS_CLTS(tep)) {
1733 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1734 			    SL_TRACE|SL_ERROR,
1735 			    "tl_wput:M_DATA invalid for ticlts driver"));
1736 			tl_merror(wq, mp, EPROTO);
1737 			return;
1738 		}
1739 		tl_proc = tl_wput_data_ser;
1740 		break;
1741 
1742 	case M_IOCTL:
1743 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1744 		case TL_IOC_CREDOPT:
1745 			/* FALLTHROUGH */
1746 		case TL_IOC_UCREDOPT:
1747 			/*
1748 			 * Serialize endpoint state change.
1749 			 */
1750 			tl_proc = tl_do_ioctl_ser;
1751 			break;
1752 
1753 		default:
1754 			miocnak(wq, mp, 0, EINVAL);
1755 			return;
1756 		}
1757 		break;
1758 
1759 	case M_FLUSH:
1760 		/*
1761 		 * do canonical M_FLUSH processing
1762 		 */
1763 		if (*mp->b_rptr & FLUSHW) {
1764 			flushq(wq, FLUSHALL);
1765 			*mp->b_rptr &= ~FLUSHW;
1766 		}
1767 		if (*mp->b_rptr & FLUSHR) {
1768 			flushq(RD(wq), FLUSHALL);
1769 			qreply(wq, mp);
1770 		} else {
1771 			freemsg(mp);
1772 		}
1773 		return;
1774 
1775 	case M_PROTO:
1776 		if (msz < sizeof (prim->type)) {
1777 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1778 			    SL_TRACE|SL_ERROR,
1779 			    "tl_wput:M_PROTO data too short"));
1780 			tl_merror(wq, mp, EPROTO);
1781 			return;
1782 		}
1783 		switch (prim->type) {
1784 		case T_OPTMGMT_REQ:
1785 		case T_SVR4_OPTMGMT_REQ:
1786 			/*
1787 			 * Process TPI option management requests immediately
1788 			 * in put procedure regardless of in-order processing
1789 			 * of already queued messages.
1790 			 * (Note: This driver supports AF_UNIX socket
1791 			 * implementation.  Unless we implement this processing,
1792 			 * setsockopt() on socket endpoint will block on flow
1793 			 * controlled endpoints which it should not. That is
1794 			 * required for successful execution of VSU socket tests
1795 			 * and is consistent with BSD socket behavior).
1796 			 */
1797 			tl_optmgmt(wq, mp);
1798 			return;
1799 		case O_T_BIND_REQ:
1800 		case T_BIND_REQ:
1801 			tl_proc = tl_bind_ser;
1802 			break;
1803 		case T_CONN_REQ:
1804 			if (IS_CLTS(tep)) {
1805 				tl_merror(wq, mp, EPROTO);
1806 				return;
1807 			}
1808 			tl_conn_req(wq, mp);
1809 			return;
1810 		case T_DATA_REQ:
1811 		case T_OPTDATA_REQ:
1812 		case T_EXDATA_REQ:
1813 		case T_ORDREL_REQ:
1814 			tl_proc = tl_putq_ser;
1815 			break;
1816 		case T_UNITDATA_REQ:
1817 			if (IS_COTS(tep) ||
1818 			    (msz < sizeof (struct T_unitdata_req))) {
1819 				tl_merror(wq, mp, EPROTO);
1820 				return;
1821 			}
1822 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1823 				tl_proc = tl_unitdata_ser;
1824 			} else {
1825 				tl_proc = tl_putq_ser;
1826 			}
1827 			break;
1828 		default:
1829 			/*
1830 			 * process in service procedure if message already
1831 			 * queued (maintain in-order processing)
1832 			 */
1833 			if (wq->q_first != NULL) {
1834 				tl_proc = tl_putq_ser;
1835 			} else {
1836 				tl_proc = tl_wput_ser;
1837 			}
1838 			break;
1839 		}
1840 		break;
1841 
1842 	case M_PCPROTO:
1843 		/*
1844 		 * Check that the message has enough data to figure out TPI
1845 		 * primitive.
1846 		 */
1847 		if (msz < sizeof (prim->type)) {
1848 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1849 			    SL_TRACE|SL_ERROR,
1850 			    "tl_wput:M_PCROTO data too short"));
1851 			tl_merror(wq, mp, EPROTO);
1852 			return;
1853 		}
1854 		switch (prim->type) {
1855 		case T_CAPABILITY_REQ:
1856 			tl_capability_req(mp, tep);
1857 			return;
1858 		case T_INFO_REQ:
1859 			tl_proc = tl_info_req_ser;
1860 			break;
1861 		case T_ADDR_REQ:
1862 			tl_proc = tl_addr_req_ser;
1863 			break;
1864 
1865 		default:
1866 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1867 			    SL_TRACE|SL_ERROR,
1868 			    "tl_wput:unknown TPI msg primitive"));
1869 			tl_merror(wq, mp, EPROTO);
1870 			return;
1871 		}
1872 		break;
1873 	default:
1874 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1875 		    "tl_wput:default:unexpected Streams message"));
1876 		freemsg(mp);
1877 		return;
1878 	}
1879 
1880 	/*
1881 	 * Continue processing via serializer.
1882 	 */
1883 	ASSERT(tl_proc != NULL);
1884 	tl_refhold(tep);
1885 	tl_serializer_enter(tep, tl_proc, mp);
1886 }
1887 
1888 /*
1889  * Place message on the queue while preserving order.
1890  */
1891 static void
1892 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1893 {
1894 	if (tep->te_closing) {
1895 		tl_wput_ser(mp, tep);
1896 	} else {
1897 		TL_PUTQ(tep, mp);
1898 		tl_serializer_exit(tep);
1899 		tl_refrele(tep);
1900 	}
1901 
1902 }
1903 
1904 static void
1905 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1906 {
1907 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1908 
1909 	switch (DB_TYPE(mp)) {
1910 	case M_DATA:
1911 		tl_data(mp, tep);
1912 		break;
1913 	case M_PROTO:
1914 		tl_do_proto(mp, tep);
1915 		break;
1916 	default:
1917 		freemsg(mp);
1918 		break;
1919 	}
1920 }
1921 
1922 /*
1923  * Write side put procedure called from serializer.
1924  */
1925 static void
1926 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1927 {
1928 	tl_wput_common_ser(mp, tep);
1929 	tl_serializer_exit(tep);
1930 	tl_refrele(tep);
1931 }
1932 
1933 /*
1934  * M_DATA processing. Called from serializer.
1935  */
1936 static void
1937 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1938 {
1939 	tl_endpt_t	*peer_tep = tep->te_conp;
1940 	queue_t		*peer_rq;
1941 
1942 	ASSERT(DB_TYPE(mp) == M_DATA);
1943 	ASSERT(IS_COTS(tep));
1944 
1945 	IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1946 
1947 	/*
1948 	 * fastpath for data. Ignore flow control if tep is closing.
1949 	 */
1950 	if ((peer_tep != NULL) &&
1951 	    !peer_tep->te_closing &&
1952 	    ((tep->te_state == TS_DATA_XFER) ||
1953 	    (tep->te_state == TS_WREQ_ORDREL)) &&
1954 	    (tep->te_wq != NULL) &&
1955 	    (tep->te_wq->q_first == NULL) &&
1956 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1957 	    (peer_tep->te_state == TS_WREQ_ORDREL))	&&
1958 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1959 	    (canputnext(peer_rq) || tep->te_closing)) {
1960 		putnext(peer_rq, mp);
1961 	} else if (tep->te_closing) {
1962 		/*
1963 		 * It is possible that by the time we got here tep started to
1964 		 * close. If the write queue is not empty, and the state is
1965 		 * TS_DATA_XFER the data should be delivered in order, so we
1966 		 * call putq() instead of freeing the data.
1967 		 */
1968 		if ((tep->te_wq != NULL) &&
1969 		    ((tep->te_state == TS_DATA_XFER) ||
1970 		    (tep->te_state == TS_WREQ_ORDREL))) {
1971 			TL_PUTQ(tep, mp);
1972 		} else {
1973 			freemsg(mp);
1974 		}
1975 	} else {
1976 		TL_PUTQ(tep, mp);
1977 	}
1978 
1979 	tl_serializer_exit(tep);
1980 	tl_refrele(tep);
1981 }
1982 
1983 /*
1984  * Write side service routine.
1985  *
1986  * All actual processing happens within serializer which is entered
1987  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1988  * messages that need processing may have arrived, so tl_wsrv repeats until
1989  * queue is empty or te_nowsrv is set.
1990  */
1991 static void
1992 tl_wsrv(queue_t *wq)
1993 {
1994 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1995 
1996 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1997 		mutex_enter(&tep->te_srv_lock);
1998 		ASSERT(tep->te_wsrv_active == B_FALSE);
1999 		tep->te_wsrv_active = B_TRUE;
2000 		mutex_exit(&tep->te_srv_lock);
2001 
2002 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2003 
2004 		/*
2005 		 * Wait for serializer job to complete.
2006 		 */
2007 		mutex_enter(&tep->te_srv_lock);
2008 		while (tep->te_wsrv_active) {
2009 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2010 		}
2011 		cv_signal(&tep->te_srv_cv);
2012 		mutex_exit(&tep->te_srv_lock);
2013 	}
2014 }
2015 
2016 /*
2017  * Serialized write side processing of the STREAMS queue.
2018  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2019  * is NULL.
2020  */
2021 static void
2022 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2023 {
2024 	mblk_t *mp;
2025 	queue_t *wq = tep->te_wq;
2026 
2027 	ASSERT(wq != NULL);
2028 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2029 		tl_wput_common_ser(mp, tep);
2030 	}
2031 
2032 	/*
2033 	 * Wakeup service routine unless called from close.
2034 	 * If ser_mp is specified, the caller is tl_wsrv().
2035 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2036 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2037 	 * be no matching tl_serializer_exit() in this case.
2038 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2039 	 * waiting on te_srv_cv.
2040 	 */
2041 	if (ser_mp != NULL) {
2042 		/*
2043 		 * We are called from tl_wsrv.
2044 		 */
2045 		mutex_enter(&tep->te_srv_lock);
2046 		ASSERT(tep->te_wsrv_active);
2047 		tep->te_wsrv_active = B_FALSE;
2048 		cv_signal(&tep->te_srv_cv);
2049 		mutex_exit(&tep->te_srv_lock);
2050 		tl_serializer_exit(tep);
2051 	}
2052 }
2053 
2054 /*
2055  * Called when the stream is backenabled. Enter serializer and qenable everyone
2056  * flow controlled by tep.
2057  *
2058  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2059  * is possible that two instances of tl_rsrv will be running reusing the same
2060  * rsrv mblk.
2061  */
2062 static void
2063 tl_rsrv(queue_t *rq)
2064 {
2065 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2066 
2067 	ASSERT(rq->q_first == NULL);
2068 	ASSERT(tep->te_rsrv_active == 0);
2069 
2070 	tep->te_rsrv_active = B_TRUE;
2071 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2072 	/*
2073 	 * Wait for serializer job to complete.
2074 	 */
2075 	mutex_enter(&tep->te_srv_lock);
2076 	while (tep->te_rsrv_active) {
2077 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2078 	}
2079 	cv_signal(&tep->te_srv_cv);
2080 	mutex_exit(&tep->te_srv_lock);
2081 }
2082 
2083 /* ARGSUSED */
2084 static void
2085 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2086 {
2087 	tl_endpt_t *peer_tep;
2088 
2089 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2090 		tl_cl_backenable(tep);
2091 	} else if (
2092 	    IS_COTS(tep) &&
2093 	    ((peer_tep = tep->te_conp) != NULL) &&
2094 	    !peer_tep->te_closing &&
2095 	    ((tep->te_state == TS_DATA_XFER) ||
2096 	    (tep->te_state == TS_WIND_ORDREL)||
2097 	    (tep->te_state == TS_WREQ_ORDREL))) {
2098 		TL_QENABLE(peer_tep);
2099 	}
2100 
2101 	/*
2102 	 * Wakeup read side service routine.
2103 	 */
2104 	mutex_enter(&tep->te_srv_lock);
2105 	ASSERT(tep->te_rsrv_active);
2106 	tep->te_rsrv_active = B_FALSE;
2107 	cv_signal(&tep->te_srv_cv);
2108 	mutex_exit(&tep->te_srv_lock);
2109 	tl_serializer_exit(tep);
2110 }
2111 
2112 /*
2113  * process M_PROTO messages. Always called from serializer.
2114  */
2115 static void
2116 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2117 {
2118 	ssize_t			msz = MBLKL(mp);
2119 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2120 
2121 	/* Message size was validated by tl_wput(). */
2122 	ASSERT(msz >= sizeof (prim->type));
2123 
2124 	switch (prim->type) {
2125 	case T_UNBIND_REQ:
2126 		tl_unbind(mp, tep);
2127 		break;
2128 
2129 	case T_ADDR_REQ:
2130 		tl_addr_req(mp, tep);
2131 		break;
2132 
2133 	case O_T_CONN_RES:
2134 	case T_CONN_RES:
2135 		if (IS_CLTS(tep)) {
2136 			tl_merror(tep->te_wq, mp, EPROTO);
2137 			break;
2138 		}
2139 		tl_conn_res(mp, tep);
2140 		break;
2141 
2142 	case T_DISCON_REQ:
2143 		if (IS_CLTS(tep)) {
2144 			tl_merror(tep->te_wq, mp, EPROTO);
2145 			break;
2146 		}
2147 		tl_discon_req(mp, tep);
2148 		break;
2149 
2150 	case T_DATA_REQ:
2151 		if (IS_CLTS(tep)) {
2152 			tl_merror(tep->te_wq, mp, EPROTO);
2153 			break;
2154 		}
2155 		tl_data(mp, tep);
2156 		break;
2157 
2158 	case T_OPTDATA_REQ:
2159 		if (IS_CLTS(tep)) {
2160 			tl_merror(tep->te_wq, mp, EPROTO);
2161 			break;
2162 		}
2163 		tl_data(mp, tep);
2164 		break;
2165 
2166 	case T_EXDATA_REQ:
2167 		if (IS_CLTS(tep)) {
2168 			tl_merror(tep->te_wq, mp, EPROTO);
2169 			break;
2170 		}
2171 		tl_exdata(mp, tep);
2172 		break;
2173 
2174 	case T_ORDREL_REQ:
2175 		if (! IS_COTSORD(tep)) {
2176 			tl_merror(tep->te_wq, mp, EPROTO);
2177 			break;
2178 		}
2179 		tl_ordrel(mp, tep);
2180 		break;
2181 
2182 	case T_UNITDATA_REQ:
2183 		if (IS_COTS(tep)) {
2184 			tl_merror(tep->te_wq, mp, EPROTO);
2185 			break;
2186 		}
2187 		tl_unitdata(mp, tep);
2188 		break;
2189 
2190 	default:
2191 		tl_merror(tep->te_wq, mp, EPROTO);
2192 		break;
2193 	}
2194 }
2195 
2196 /*
2197  * Process ioctl from serializer.
2198  * This is a wrapper around tl_do_ioctl().
2199  */
2200 static void
2201 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2202 {
2203 	if (! tep->te_closing)
2204 		tl_do_ioctl(mp, tep);
2205 	else
2206 		freemsg(mp);
2207 
2208 	tl_serializer_exit(tep);
2209 	tl_refrele(tep);
2210 }
2211 
2212 static void
2213 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2214 {
2215 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2216 	int cmd = iocbp->ioc_cmd;
2217 	queue_t *wq = tep->te_wq;
2218 	int error;
2219 	int thisopt, otheropt;
2220 
2221 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2222 
2223 	switch (cmd) {
2224 	case TL_IOC_CREDOPT:
2225 		if (cmd == TL_IOC_CREDOPT) {
2226 			thisopt = TL_SETCRED;
2227 			otheropt = TL_SETUCRED;
2228 		} else {
2229 			/* FALLTHROUGH */
2230 	case TL_IOC_UCREDOPT:
2231 			thisopt = TL_SETUCRED;
2232 			otheropt = TL_SETCRED;
2233 		}
2234 		/*
2235 		 * The credentials passing does not apply to sockets.
2236 		 * Only one of the cred options can be set at a given time.
2237 		 */
2238 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2239 			miocnak(wq, mp, 0, EINVAL);
2240 			return;
2241 		}
2242 
2243 		/*
2244 		 * Turn on generation of credential options for
2245 		 * T_conn_req, T_conn_con, T_unidata_ind.
2246 		 */
2247 		error = miocpullup(mp, sizeof (uint32_t));
2248 		if (error != 0) {
2249 			miocnak(wq, mp, 0, error);
2250 			return;
2251 		}
2252 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2253 			miocnak(wq, mp, 0, EINVAL);
2254 			return;
2255 		}
2256 
2257 		if (*(uint32_t *)mp->b_cont->b_rptr)
2258 			tep->te_flag |= thisopt;
2259 		else
2260 			tep->te_flag &= ~thisopt;
2261 
2262 		miocack(wq, mp, 0, 0);
2263 		break;
2264 
2265 	default:
2266 		/* Should not be here */
2267 		miocnak(wq, mp, 0, EINVAL);
2268 		break;
2269 	}
2270 }
2271 
2272 
2273 /*
2274  * send T_ERROR_ACK
2275  * Note: assumes enough memory or caller passed big enough mp
2276  *	- no recovery from allocb failures
2277  */
2278 
2279 static void
2280 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2281     t_scalar_t unix_err, t_scalar_t type)
2282 {
2283 	struct T_error_ack *err_ack;
2284 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2285 	    M_PCPROTO, T_ERROR_ACK);
2286 
2287 	if (ackmp == NULL) {
2288 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2289 		    "tl_error_ack:out of mblk memory"));
2290 		tl_merror(wq, NULL, ENOSR);
2291 		return;
2292 	}
2293 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2294 	err_ack->ERROR_prim = type;
2295 	err_ack->TLI_error = tli_err;
2296 	err_ack->UNIX_error = unix_err;
2297 
2298 	/*
2299 	 * send error ack message
2300 	 */
2301 	qreply(wq, ackmp);
2302 }
2303 
2304 
2305 
2306 /*
2307  * send T_OK_ACK
2308  * Note: assumes enough memory or caller passed big enough mp
2309  *	- no recovery from allocb failures
2310  */
2311 static void
2312 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2313 {
2314 	struct T_ok_ack *ok_ack;
2315 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2316 	    M_PCPROTO, T_OK_ACK);
2317 
2318 	if (ackmp == NULL) {
2319 		tl_merror(wq, NULL, ENOMEM);
2320 		return;
2321 	}
2322 
2323 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2324 	ok_ack->CORRECT_prim = type;
2325 
2326 	(void) qreply(wq, ackmp);
2327 }
2328 
2329 /*
2330  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2331  * This is a wrapper around tl_bind().
2332  */
2333 static void
2334 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2335 {
2336 	if (! tep->te_closing)
2337 		tl_bind(mp, tep);
2338 	else
2339 		freemsg(mp);
2340 
2341 	tl_serializer_exit(tep);
2342 	tl_refrele(tep);
2343 }
2344 
2345 /*
2346  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2347  * Assumes that the endpoint is in the unbound.
2348  */
2349 static void
2350 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2351 {
2352 	queue_t			*wq = tep->te_wq;
2353 	struct T_bind_ack	*b_ack;
2354 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2355 	mblk_t			*ackmp, *bamp;
2356 	soux_addr_t		ux_addr;
2357 	t_uscalar_t		qlen = 0;
2358 	t_scalar_t		alen, aoff;
2359 	tl_addr_t		addr_req;
2360 	void			*addr_startp;
2361 	ssize_t			msz = MBLKL(mp), basize;
2362 	t_scalar_t		tli_err = 0, unix_err = 0;
2363 	t_scalar_t		save_prim_type = bind->PRIM_type;
2364 	t_scalar_t		save_state = tep->te_state;
2365 
2366 	if (tep->te_state != TS_UNBND) {
2367 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2368 		    SL_TRACE|SL_ERROR,
2369 		    "tl_wput:bind_request:out of state, state=%d",
2370 		    tep->te_state));
2371 		tli_err = TOUTSTATE;
2372 		goto error;
2373 	}
2374 
2375 	if (msz < sizeof (struct T_bind_req)) {
2376 		tli_err = TSYSERR; unix_err = EINVAL;
2377 		goto error;
2378 	}
2379 
2380 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2381 
2382 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2383 	    (bind->PRIM_type == T_BIND_REQ));
2384 
2385 	alen = bind->ADDR_length;
2386 	aoff = bind->ADDR_offset;
2387 
2388 	/* negotiate max conn req pending */
2389 	if (IS_COTS(tep)) {
2390 		qlen = bind->CONIND_number;
2391 		if (qlen > tl_maxqlen)
2392 			qlen = tl_maxqlen;
2393 	}
2394 
2395 	/*
2396 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2397 	 * and bound again.
2398 	 */
2399 	if ((tep->te_hash_hndl == NULL) &&
2400 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2401 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2402 	    &tep->te_hash_hndl) != 0) {
2403 		tli_err = TSYSERR; unix_err = ENOSR;
2404 		goto error;
2405 	}
2406 
2407 	/*
2408 	 * Verify address correctness.
2409 	 */
2410 	if (IS_SOCKET(tep)) {
2411 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2412 
2413 		if ((alen != TL_SOUX_ADDRLEN) ||
2414 		    (aoff < 0) ||
2415 		    (aoff + alen > msz)) {
2416 			(void) (STRLOG(TL_ID, tep->te_minor,
2417 			    1, SL_TRACE|SL_ERROR,
2418 			    "tl_bind: invalid socket addr"));
2419 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2420 			tli_err = TSYSERR; unix_err = EINVAL;
2421 			goto error;
2422 		}
2423 		/* Copy address from message to local buffer. */
2424 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2425 		/*
2426 		 * Check that we got correct address from sockets
2427 		 */
2428 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2429 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2430 			(void) (STRLOG(TL_ID, tep->te_minor,
2431 			    1, SL_TRACE|SL_ERROR,
2432 			    "tl_bind: invalid socket magic"));
2433 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2434 			tli_err = TSYSERR; unix_err = EINVAL;
2435 			goto error;
2436 		}
2437 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2438 		    (ux_addr.soua_vp != NULL)) {
2439 			(void) (STRLOG(TL_ID, tep->te_minor,
2440 			    1, SL_TRACE|SL_ERROR,
2441 			    "tl_bind: implicit addr non-empty"));
2442 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2443 			tli_err = TSYSERR; unix_err = EINVAL;
2444 			goto error;
2445 		}
2446 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2447 		    (ux_addr.soua_vp == NULL)) {
2448 			(void) (STRLOG(TL_ID, tep->te_minor,
2449 			    1, SL_TRACE|SL_ERROR,
2450 			    "tl_bind: explicit addr empty"));
2451 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2452 			tli_err = TSYSERR; unix_err = EINVAL;
2453 			goto error;
2454 		}
2455 	} else {
2456 		if ((alen > 0) && ((aoff < 0) ||
2457 		    ((ssize_t)(aoff + alen) > msz) ||
2458 		    ((aoff + alen) < 0))) {
2459 			(void) (STRLOG(TL_ID, tep->te_minor,
2460 			    1, SL_TRACE|SL_ERROR,
2461 			    "tl_bind: invalid message"));
2462 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2463 			tli_err = TSYSERR; unix_err = EINVAL;
2464 			goto error;
2465 		}
2466 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2467 			(void) (STRLOG(TL_ID, tep->te_minor,
2468 			    1, SL_TRACE|SL_ERROR,
2469 			    "tl_bind: bad addr in  message"));
2470 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2471 			tli_err = TBADADDR;
2472 			goto error;
2473 		}
2474 #ifdef DEBUG
2475 		/*
2476 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2477 		 * if (! assertion)
2478 		 *	log warning;
2479 		 */
2480 		if (! ((alen == 0 && aoff == 0) ||
2481 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2482 			(void) (STRLOG(TL_ID, tep->te_minor,
2483 				    3, SL_TRACE|SL_ERROR,
2484 				    "tl_bind: addr overlaps TPI message"));
2485 		}
2486 #endif
2487 	}
2488 
2489 	/*
2490 	 * Bind the address provided or allocate one if requested.
2491 	 * Allow rebinds with a new qlen value.
2492 	 */
2493 	if (IS_SOCKET(tep)) {
2494 		/*
2495 		 * For anonymous requests the te_ap is already set up properly
2496 		 * so use minor number as an address.
2497 		 * For explicit requests need to check whether the address is
2498 		 * already in use.
2499 		 */
2500 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2501 			int rc;
2502 
2503 			if (tep->te_flag & TL_ADDRHASHED) {
2504 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2505 				if (tep->te_vp == ux_addr.soua_vp)
2506 					goto skip_addr_bind;
2507 				else /* Rebind to a new address. */
2508 					tl_addr_unbind(tep);
2509 			}
2510 			/*
2511 			 * Insert address in the hash if it is not already
2512 			 * there.  Since we use preallocated handle, the insert
2513 			 * can fail only if the key is already present.
2514 			 */
2515 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2516 			    (mod_hash_key_t)ux_addr.soua_vp,
2517 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2518 
2519 			if (rc != 0) {
2520 				ASSERT(rc == MH_ERR_DUPLICATE);
2521 				/*
2522 				 * Violate O_T_BIND_REQ semantics and fail with
2523 				 * TADDRBUSY - sockets will not use any address
2524 				 * other than supplied one for explicit binds.
2525 				 */
2526 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2527 				    SL_TRACE|SL_ERROR,
2528 				    "tl_bind:requested addr %p is busy",
2529 				    ux_addr.soua_vp));
2530 				tli_err = TADDRBUSY; unix_err = 0;
2531 				goto error;
2532 			}
2533 			tep->te_uxaddr = ux_addr;
2534 			tep->te_flag |= TL_ADDRHASHED;
2535 			tep->te_hash_hndl = NULL;
2536 		}
2537 	} else if (alen == 0) {
2538 		/*
2539 		 * assign any free address
2540 		 */
2541 		if (! tl_get_any_addr(tep, NULL)) {
2542 			(void) (STRLOG(TL_ID, tep->te_minor,
2543 			    1, SL_TRACE|SL_ERROR,
2544 			    "tl_bind:failed to get buffer for any "
2545 			    "address"));
2546 			tli_err = TSYSERR; unix_err = ENOSR;
2547 			goto error;
2548 		}
2549 	} else {
2550 		addr_req.ta_alen = alen;
2551 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2552 		addr_req.ta_zoneid = tep->te_zoneid;
2553 
2554 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2555 		if (tep->te_abuf == NULL) {
2556 			tli_err = TSYSERR; unix_err = ENOSR;
2557 			goto error;
2558 		}
2559 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2560 		tep->te_alen = alen;
2561 
2562 		if (mod_hash_insert_reserve(tep->te_addrhash,
2563 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2564 		    tep->te_hash_hndl) != 0) {
2565 			if (save_prim_type == T_BIND_REQ) {
2566 				/*
2567 				 * The bind semantics for this primitive
2568 				 * require a failure if the exact address
2569 				 * requested is busy
2570 				 */
2571 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2572 				    SL_TRACE|SL_ERROR,
2573 				    "tl_bind:requested addr is busy"));
2574 				tli_err = TADDRBUSY; unix_err = 0;
2575 				goto error;
2576 			}
2577 
2578 			/*
2579 			 * O_T_BIND_REQ semantics say if address if requested
2580 			 * address is busy, bind to any available free address
2581 			 */
2582 			if (! tl_get_any_addr(tep, &addr_req)) {
2583 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2584 				    SL_TRACE|SL_ERROR,
2585 				    "tl_bind:unable to get any addr buf"));
2586 				tli_err = TSYSERR; unix_err = ENOMEM;
2587 				goto error;
2588 			}
2589 		} else {
2590 			tep->te_flag |= TL_ADDRHASHED;
2591 			tep->te_hash_hndl = NULL;
2592 		}
2593 	}
2594 
2595 	ASSERT(tep->te_alen >= 0);
2596 
2597 skip_addr_bind:
2598 	/*
2599 	 * prepare T_BIND_ACK TPI message
2600 	 */
2601 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2602 	bamp = reallocb(mp, basize, 0);
2603 	if (bamp == NULL) {
2604 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2605 		    "tl_wput:tl_bind: allocb failed"));
2606 		/*
2607 		 * roll back state changes
2608 		 */
2609 		tl_addr_unbind(tep);
2610 		tep->te_state = TS_UNBND;
2611 		tl_memrecover(wq, mp, basize);
2612 		return;
2613 	}
2614 
2615 	DB_TYPE(bamp) = M_PCPROTO;
2616 	bamp->b_wptr = bamp->b_rptr + basize;
2617 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2618 	b_ack->PRIM_type = T_BIND_ACK;
2619 	b_ack->CONIND_number = qlen;
2620 	b_ack->ADDR_length = tep->te_alen;
2621 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2622 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2623 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2624 
2625 	if (IS_COTS(tep)) {
2626 		tep->te_qlen = qlen;
2627 		if (qlen > 0)
2628 			tep->te_flag |= TL_LISTENER;
2629 	}
2630 
2631 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2632 	/*
2633 	 * send T_BIND_ACK message
2634 	 */
2635 	(void) qreply(wq, bamp);
2636 	return;
2637 
2638 error:
2639 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2640 	if (ackmp == NULL) {
2641 		/*
2642 		 * roll back state changes
2643 		 */
2644 		tep->te_state = save_state;
2645 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2646 		return;
2647 	}
2648 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2649 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2650 }
2651 
2652 /*
2653  * Process T_UNBIND_REQ.
2654  * Called from serializer.
2655  */
2656 static void
2657 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2658 {
2659 	queue_t *wq;
2660 	mblk_t *ackmp;
2661 
2662 	if (tep->te_closing) {
2663 		freemsg(mp);
2664 		return;
2665 	}
2666 
2667 	wq = tep->te_wq;
2668 
2669 	/*
2670 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2671 	 * ==> allocate for T_ERROR_ACK (known max)
2672 	 */
2673 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2674 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2675 		return;
2676 	}
2677 	/*
2678 	 * memory resources committed
2679 	 * Note: no message validation. T_UNBIND_REQ message is
2680 	 * same size as PRIM_type field so already verified earlier.
2681 	 */
2682 
2683 	/*
2684 	 * validate state
2685 	 */
2686 	if (tep->te_state != TS_IDLE) {
2687 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2688 		    SL_TRACE|SL_ERROR,
2689 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2690 		    tep->te_state));
2691 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2692 		return;
2693 	}
2694 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2695 
2696 	/*
2697 	 * TPI says on T_UNBIND_REQ:
2698 	 *    send up a M_FLUSH to flush both
2699 	 *    read and write queues
2700 	 */
2701 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2702 
2703 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2704 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2705 
2706 		/*
2707 		 * Sockets use bind with qlen==0 followed by bind() to
2708 		 * the same address with qlen > 0 for listeners.
2709 		 * We allow rebind with a new qlen value.
2710 		 */
2711 		tl_addr_unbind(tep);
2712 	}
2713 
2714 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2715 	/*
2716 	 * send  T_OK_ACK
2717 	 */
2718 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2719 }
2720 
2721 
2722 /*
2723  * Option management code from drv/ip is used here
2724  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2725  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2726  *	However, that is what we want as that option is 'unorthodox'
2727  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2728  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2729  * Note2: use of optcom_req means this routine is an exception to
2730  *	 recovery from allocb() failures.
2731  */
2732 
2733 static void
2734 tl_optmgmt(queue_t *wq, mblk_t *mp)
2735 {
2736 	tl_endpt_t *tep;
2737 	mblk_t *ackmp;
2738 	union T_primitives *prim;
2739 	cred_t *cr;
2740 
2741 	tep = (tl_endpt_t *)wq->q_ptr;
2742 	prim = (union T_primitives *)mp->b_rptr;
2743 
2744 	/*
2745 	 * All Solaris components should pass a db_credp
2746 	 * for this TPI message, hence we ASSERT.
2747 	 * But in case there is some other M_PROTO that looks
2748 	 * like a TPI message sent by some other kernel
2749 	 * component, we check and return an error.
2750 	 */
2751 	cr = msg_getcred(mp, NULL);
2752 	ASSERT(cr != NULL);
2753 	if (cr == NULL) {
2754 		tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2755 		return;
2756 	}
2757 
2758 	/*  all states OK for AF_UNIX options ? */
2759 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2760 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2761 		/*
2762 		 * Broken TLI semantics that options can only be managed
2763 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2764 		 * tests this TLI (mis)feature using this device driver.
2765 		 */
2766 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2767 		    SL_TRACE|SL_ERROR,
2768 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2769 		    tep->te_state));
2770 		/*
2771 		 * preallocate memory for T_ERROR_ACK
2772 		 */
2773 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2774 		if (! ackmp) {
2775 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2776 			return;
2777 		}
2778 
2779 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2780 		freemsg(mp);
2781 		return;
2782 	}
2783 
2784 	/*
2785 	 * call common option management routine from drv/ip
2786 	 */
2787 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2788 		svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2789 	} else {
2790 		ASSERT(prim->type == T_OPTMGMT_REQ);
2791 		tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2792 	}
2793 }
2794 
2795 /*
2796  * Handle T_conn_req - the driver part of accept().
2797  * If TL_SET[U]CRED generate the credentials options.
2798  * If this is a socket pass through options unmodified.
2799  * For sockets generate the T_CONN_CON here instead of
2800  * waiting for the T_CONN_RES.
2801  */
2802 static void
2803 tl_conn_req(queue_t *wq, mblk_t *mp)
2804 {
2805 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2806 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2807 	ssize_t			msz = MBLKL(mp);
2808 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2809 	tl_endpt_t		*peer_tep = NULL;
2810 	mblk_t			*ackmp;
2811 	mblk_t			*dimp;
2812 	struct T_discon_ind	*di;
2813 	soux_addr_t		ux_addr;
2814 	tl_addr_t		dst;
2815 
2816 	ASSERT(IS_COTS(tep));
2817 
2818 	if (tep->te_closing) {
2819 		freemsg(mp);
2820 		return;
2821 	}
2822 
2823 	/*
2824 	 * preallocate memory for:
2825 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2826 	 *	==> known max T_ERROR_ACK
2827 	 * 2. max of T_DISCON_IND and T_CONN_IND
2828 	 */
2829 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2830 	if (! ackmp) {
2831 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2832 		return;
2833 	}
2834 	/*
2835 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2836 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2837 	 */
2838 
2839 	if (tep->te_state != TS_IDLE) {
2840 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2841 		    SL_TRACE|SL_ERROR,
2842 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2843 		    tep->te_state));
2844 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2845 		freemsg(mp);
2846 		return;
2847 	}
2848 
2849 	/*
2850 	 * validate the message
2851 	 * Note: dereference fields in struct inside message only
2852 	 * after validating the message length.
2853 	 */
2854 	if (msz < sizeof (struct T_conn_req)) {
2855 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2856 		    "tl_conn_req:invalid message length"));
2857 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2858 		freemsg(mp);
2859 		return;
2860 	}
2861 	alen = creq->DEST_length;
2862 	aoff = creq->DEST_offset;
2863 	olen = creq->OPT_length;
2864 	ooff = creq->OPT_offset;
2865 	if (olen == 0)
2866 		ooff = 0;
2867 
2868 	if (IS_SOCKET(tep)) {
2869 		if ((alen != TL_SOUX_ADDRLEN) ||
2870 		    (aoff < 0) ||
2871 		    (aoff + alen > msz) ||
2872 		    (alen > msz - sizeof (struct T_conn_req))) {
2873 			(void) (STRLOG(TL_ID, tep->te_minor,
2874 				    1, SL_TRACE|SL_ERROR,
2875 				    "tl_conn_req: invalid socket addr"));
2876 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2877 			freemsg(mp);
2878 			return;
2879 		}
2880 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2881 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2882 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2883 			(void) (STRLOG(TL_ID, tep->te_minor,
2884 			    1, SL_TRACE|SL_ERROR,
2885 			    "tl_conn_req: invalid socket magic"));
2886 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2887 			freemsg(mp);
2888 			return;
2889 		}
2890 	} else {
2891 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2892 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2893 		    ooff + olen < 0)) ||
2894 		    olen < 0 || ooff < 0) {
2895 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2896 			    SL_TRACE|SL_ERROR,
2897 			    "tl_conn_req:invalid message"));
2898 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2899 			freemsg(mp);
2900 			return;
2901 		}
2902 
2903 		if (alen <= 0 || aoff < 0 ||
2904 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2905 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2906 				    SL_TRACE|SL_ERROR,
2907 				    "tl_conn_req:bad addr in message, "
2908 				    "alen=%d, msz=%ld",
2909 				    alen, msz));
2910 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2911 			freemsg(mp);
2912 			return;
2913 		}
2914 #ifdef DEBUG
2915 		/*
2916 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2917 		 * if (! assertion)
2918 		 *	log warning;
2919 		 */
2920 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2921 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2922 			    SL_TRACE|SL_ERROR,
2923 			    "tl_conn_req: addr overlaps TPI message"));
2924 		}
2925 #endif
2926 		if (olen) {
2927 			/*
2928 			 * no opts in connect req
2929 			 * supported in this provider except for sockets.
2930 			 */
2931 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2932 			    SL_TRACE|SL_ERROR,
2933 			    "tl_conn_req:options not supported "
2934 			    "in message"));
2935 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2936 			freemsg(mp);
2937 			return;
2938 		}
2939 	}
2940 
2941 	/*
2942 	 * Prevent tep from closing on us.
2943 	 */
2944 	if (! tl_noclose(tep)) {
2945 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2946 		    "tl_conn_req:endpoint is closing"));
2947 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2948 		freemsg(mp);
2949 		return;
2950 	}
2951 
2952 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2953 	/*
2954 	 * get endpoint to connect to
2955 	 * check that peer with DEST addr is bound to addr
2956 	 * and has CONIND_number > 0
2957 	 */
2958 	dst.ta_alen = alen;
2959 	dst.ta_abuf = mp->b_rptr + aoff;
2960 	dst.ta_zoneid = tep->te_zoneid;
2961 
2962 	/*
2963 	 * Verify if remote addr is in use
2964 	 */
2965 	peer_tep = (IS_SOCKET(tep) ?
2966 	    tl_sock_find_peer(tep, &ux_addr) :
2967 	    tl_find_peer(tep, &dst));
2968 
2969 	if (peer_tep == NULL) {
2970 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2971 		    "tl_conn_req:no one at connect address"));
2972 		err = ECONNREFUSED;
2973 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2974 		/*
2975 		 * validate that number of incoming connection is
2976 		 * not to capacity on destination endpoint
2977 		 */
2978 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2979 		    "tl_conn_req: qlen overflow connection refused"));
2980 			err = ECONNREFUSED;
2981 	}
2982 
2983 	/*
2984 	 * Send T_DISCON_IND in case of error
2985 	 */
2986 	if (err != 0) {
2987 		if (peer_tep != NULL)
2988 			tl_refrele(peer_tep);
2989 		/* We are still expected to send T_OK_ACK */
2990 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2991 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2992 		tl_closeok(tep);
2993 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2994 		    M_PROTO, T_DISCON_IND);
2995 		if (dimp == NULL) {
2996 			tl_merror(wq, NULL, ENOSR);
2997 			return;
2998 		}
2999 		di = (struct T_discon_ind *)dimp->b_rptr;
3000 		di->DISCON_reason = err;
3001 		di->SEQ_number = BADSEQNUM;
3002 
3003 		tep->te_state = TS_IDLE;
3004 		/*
3005 		 * send T_DISCON_IND message
3006 		 */
3007 		putnext(tep->te_rq, dimp);
3008 		return;
3009 	}
3010 
3011 	ASSERT(IS_COTS(peer_tep));
3012 
3013 	/*
3014 	 * Found the listener. At this point processing will continue on
3015 	 * listener serializer. Close of the endpoint should be blocked while we
3016 	 * switch serializers.
3017 	 */
3018 	tl_serializer_refhold(peer_tep->te_ser);
3019 	tl_serializer_refrele(tep->te_ser);
3020 	tep->te_ser = peer_tep->te_ser;
3021 	ASSERT(tep->te_oconp == NULL);
3022 	tep->te_oconp = peer_tep;
3023 
3024 	/*
3025 	 * It is safe to close now. Close may continue on listener serializer.
3026 	 */
3027 	tl_closeok(tep);
3028 
3029 	/*
3030 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3031 	 * data, so we link mp to ackmp.
3032 	 */
3033 	ackmp->b_cont = mp;
3034 	mp = ackmp;
3035 
3036 	tl_refhold(tep);
3037 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3038 }
3039 
3040 /*
3041  * Finish T_CONN_REQ processing on listener serializer.
3042  */
3043 static void
3044 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3045 {
3046 	queue_t		*wq;
3047 	tl_endpt_t	*peer_tep = tep->te_oconp;
3048 	mblk_t		*confmp, *cimp, *indmp;
3049 	void		*opts = NULL;
3050 	mblk_t		*ackmp = mp;
3051 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3052 	struct T_conn_ind	*ci;
3053 	tl_icon_t	*tip;
3054 	void		*addr_startp;
3055 	t_scalar_t	olen = creq->OPT_length;
3056 	t_scalar_t	ooff = creq->OPT_offset;
3057 	size_t 		ci_msz;
3058 	size_t		size;
3059 	cred_t		*cr = NULL;
3060 	pid_t		cpid;
3061 
3062 	if (tep->te_closing) {
3063 		TL_UNCONNECT(tep->te_oconp);
3064 		tl_serializer_exit(tep);
3065 		tl_refrele(tep);
3066 		freemsg(mp);
3067 		return;
3068 	}
3069 
3070 	wq = tep->te_wq;
3071 	tep->te_flag |= TL_EAGER;
3072 
3073 	/*
3074 	 * Extract preallocated ackmp from mp.
3075 	 */
3076 	mp = mp->b_cont;
3077 	ackmp->b_cont = NULL;
3078 
3079 	if (olen == 0)
3080 		ooff = 0;
3081 
3082 	if (peer_tep->te_closing ||
3083 	    !((peer_tep->te_state == TS_IDLE) ||
3084 	    (peer_tep->te_state == TS_WRES_CIND))) {
3085 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3086 		    "tl_conn_req:peer in bad state (%d)",
3087 		    peer_tep->te_state));
3088 		TL_UNCONNECT(tep->te_oconp);
3089 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3090 		freemsg(ackmp);
3091 		tl_serializer_exit(tep);
3092 		tl_refrele(tep);
3093 		return;
3094 	}
3095 
3096 	/*
3097 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3098 	 */
3099 	/*
3100 	 * calculate length of T_CONN_IND message
3101 	 */
3102 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3103 		cr = msg_getcred(mp, &cpid);
3104 		ASSERT(cr != NULL);
3105 		if (peer_tep->te_flag & TL_SETCRED) {
3106 			ooff = 0;
3107 			olen = (t_scalar_t) sizeof (struct opthdr) +
3108 			    OPTLEN(sizeof (tl_credopt_t));
3109 			/* 1 option only */
3110 		} else {
3111 			ooff = 0;
3112 			olen = (t_scalar_t)sizeof (struct opthdr) +
3113 			    OPTLEN(ucredminsize(cr));
3114 			/* 1 option only */
3115 		}
3116 	}
3117 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3118 	ci_msz = T_ALIGN(ci_msz) + olen;
3119 	size = max(ci_msz, sizeof (struct T_discon_ind));
3120 
3121 	/*
3122 	 * Save options from mp - we'll need them for T_CONN_IND.
3123 	 */
3124 	if (ooff != 0) {
3125 		opts = kmem_alloc(olen, KM_NOSLEEP);
3126 		if (opts == NULL) {
3127 			/*
3128 			 * roll back state changes
3129 			 */
3130 			tep->te_state = TS_IDLE;
3131 			tl_memrecover(wq, mp, size);
3132 			freemsg(ackmp);
3133 			TL_UNCONNECT(tep->te_oconp);
3134 			tl_serializer_exit(tep);
3135 			tl_refrele(tep);
3136 			return;
3137 		}
3138 		/* Copy options to a temp buffer */
3139 		bcopy(mp->b_rptr + ooff, opts, olen);
3140 	}
3141 
3142 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3143 		/*
3144 		 * Generate a T_CONN_CON that has the identical address
3145 		 * (and options) as the T_CONN_REQ.
3146 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3147 		 * are isomorphic.
3148 		 */
3149 		confmp = copyb(mp);
3150 		if (! confmp) {
3151 			/*
3152 			 * roll back state changes
3153 			 */
3154 			tep->te_state = TS_IDLE;
3155 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3156 			freemsg(ackmp);
3157 			if (opts != NULL)
3158 				kmem_free(opts, olen);
3159 			TL_UNCONNECT(tep->te_oconp);
3160 			tl_serializer_exit(tep);
3161 			tl_refrele(tep);
3162 			return;
3163 		}
3164 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3165 		    T_CONN_CON;
3166 	} else {
3167 		confmp = NULL;
3168 	}
3169 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3170 		/*
3171 		 * roll back state changes
3172 		 */
3173 		tep->te_state = TS_IDLE;
3174 		tl_memrecover(wq, mp, size);
3175 		freemsg(ackmp);
3176 		if (opts != NULL)
3177 			kmem_free(opts, olen);
3178 		freemsg(confmp);
3179 		TL_UNCONNECT(tep->te_oconp);
3180 		tl_serializer_exit(tep);
3181 		tl_refrele(tep);
3182 		return;
3183 	}
3184 
3185 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3186 	if (tip == NULL) {
3187 		/*
3188 		 * roll back state changes
3189 		 */
3190 		tep->te_state = TS_IDLE;
3191 		tl_memrecover(wq, indmp, sizeof (*tip));
3192 		freemsg(ackmp);
3193 		if (opts != NULL)
3194 			kmem_free(opts, olen);
3195 		freemsg(confmp);
3196 		TL_UNCONNECT(tep->te_oconp);
3197 		tl_serializer_exit(tep);
3198 		tl_refrele(tep);
3199 		return;
3200 	}
3201 	tip->ti_mp = NULL;
3202 
3203 	/*
3204 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3205 	 * and tl_icon_t cell.
3206 	 */
3207 
3208 	/*
3209 	 * ack validity of request and send the peer credential in the ACK.
3210 	 */
3211 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3212 
3213 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3214 	    confmp != NULL) {
3215 		mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3216 	}
3217 
3218 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3219 
3220 	/*
3221 	 * prepare message to send T_CONN_IND
3222 	 */
3223 	/*
3224 	 * allocate the message - original data blocks retained
3225 	 * in the returned mblk
3226 	 */
3227 	cimp = tl_resizemp(indmp, size);
3228 	if (! cimp) {
3229 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3230 		    "tl_conn_req:con_ind:allocb failure"));
3231 		tl_merror(wq, indmp, ENOMEM);
3232 		TL_UNCONNECT(tep->te_oconp);
3233 		tl_serializer_exit(tep);
3234 		tl_refrele(tep);
3235 		if (opts != NULL)
3236 			kmem_free(opts, olen);
3237 		freemsg(confmp);
3238 		ASSERT(tip->ti_mp == NULL);
3239 		kmem_free(tip, sizeof (*tip));
3240 		return;
3241 	}
3242 
3243 	DB_TYPE(cimp) = M_PROTO;
3244 	ci = (struct T_conn_ind *)cimp->b_rptr;
3245 	ci->PRIM_type  = T_CONN_IND;
3246 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3247 	ci->SRC_length = tep->te_alen;
3248 	ci->SEQ_number = tep->te_seqno;
3249 
3250 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3251 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3252 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3253 
3254 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3255 		    ci->SRC_length);
3256 		ci->OPT_length = olen; /* because only 1 option */
3257 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3258 		    cr, cpid,
3259 		    peer_tep->te_flag, peer_tep->te_credp);
3260 	} else if (ooff != 0) {
3261 		/* Copy option from T_CONN_REQ */
3262 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3263 		    ci->SRC_length);
3264 		ci->OPT_length = olen;
3265 		ASSERT(opts != NULL);
3266 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3267 	} else {
3268 		ci->OPT_offset = 0;
3269 		ci->OPT_length = 0;
3270 	}
3271 	if (opts != NULL)
3272 		kmem_free(opts, olen);
3273 
3274 	/*
3275 	 * register connection request with server peer
3276 	 * append to list of incoming connections
3277 	 * increment references for both peer_tep and tep: peer_tep is placed on
3278 	 * te_oconp and tep is placed on listeners queue.
3279 	 */
3280 	tip->ti_tep = tep;
3281 	tip->ti_seqno = tep->te_seqno;
3282 	list_insert_tail(&peer_tep->te_iconp, tip);
3283 	peer_tep->te_nicon++;
3284 
3285 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3286 	/*
3287 	 * send the T_CONN_IND message
3288 	 */
3289 	putnext(peer_tep->te_rq, cimp);
3290 
3291 	/*
3292 	 * Send a T_CONN_CON message for sockets.
3293 	 * Disable the queues until we have reached the correct state!
3294 	 */
3295 	if (confmp != NULL) {
3296 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3297 		noenable(wq);
3298 		putnext(tep->te_rq, confmp);
3299 	}
3300 	/*
3301 	 * Now we need to increment tep reference because tep is referenced by
3302 	 * server list of pending connections. We also need to decrement
3303 	 * reference before exiting serializer. Two operations void each other
3304 	 * so we don't modify reference at all.
3305 	 */
3306 	ASSERT(tep->te_refcnt >= 2);
3307 	ASSERT(peer_tep->te_refcnt >= 2);
3308 	tl_serializer_exit(tep);
3309 }
3310 
3311 
3312 
3313 /*
3314  * Handle T_conn_res on listener stream. Called on listener serializer.
3315  * tl_conn_req has already generated the T_CONN_CON.
3316  * tl_conn_res is called on listener serializer.
3317  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3318  * Switch eager serializer to acceptor's.
3319  *
3320  * If TL_SET[U]CRED generate the credentials options.
3321  * For sockets tl_conn_req has already generated the T_CONN_CON.
3322  */
3323 static void
3324 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3325 {
3326 	queue_t			*wq;
3327 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3328 	ssize_t			msz = MBLKL(mp);
3329 	t_scalar_t		olen, ooff, err = 0;
3330 	t_scalar_t		prim = cres->PRIM_type;
3331 	uchar_t			*addr_startp;
3332 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3333 	tl_icon_t		*tip;
3334 	size_t			size;
3335 	mblk_t			*ackmp, *respmp;
3336 	mblk_t			*dimp, *ccmp = NULL;
3337 	struct T_discon_ind	*di;
3338 	struct T_conn_con	*cc;
3339 	boolean_t		client_noclose_set = B_FALSE;
3340 	boolean_t		switch_client_serializer = B_TRUE;
3341 
3342 	ASSERT(IS_COTS(tep));
3343 
3344 	if (tep->te_closing) {
3345 		freemsg(mp);
3346 		return;
3347 	}
3348 
3349 	wq = tep->te_wq;
3350 
3351 	/*
3352 	 * preallocate memory for:
3353 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3354 	 *	==> known max T_ERROR_ACK
3355 	 * 2. max of T_DISCON_IND and T_CONN_CON
3356 	 */
3357 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3358 	if (! ackmp) {
3359 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3360 		return;
3361 	}
3362 	/*
3363 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3364 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3365 	 */
3366 
3367 
3368 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3369 
3370 	/*
3371 	 * validate state
3372 	 */
3373 	if (tep->te_state != TS_WRES_CIND) {
3374 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3375 		    SL_TRACE|SL_ERROR,
3376 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3377 		    tep->te_state));
3378 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3379 		freemsg(mp);
3380 		return;
3381 	}
3382 
3383 	/*
3384 	 * validate the message
3385 	 * Note: dereference fields in struct inside message only
3386 	 * after validating the message length.
3387 	 */
3388 	if (msz < sizeof (struct T_conn_res)) {
3389 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3390 		    "tl_conn_res:invalid message length"));
3391 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3392 		freemsg(mp);
3393 		return;
3394 	}
3395 	olen = cres->OPT_length;
3396 	ooff = cres->OPT_offset;
3397 	if (((olen > 0) && ((ooff + olen) > msz))) {
3398 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3399 		    "tl_conn_res:invalid message"));
3400 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3401 		freemsg(mp);
3402 		return;
3403 	}
3404 	if (olen) {
3405 		/*
3406 		 * no opts in connect res
3407 		 * supported in this provider
3408 		 */
3409 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3410 		    "tl_conn_res:options not supported in message"));
3411 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3412 		freemsg(mp);
3413 		return;
3414 	}
3415 
3416 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3417 	ASSERT(tep->te_state == TS_WACK_CRES);
3418 
3419 	if (cres->SEQ_number < TL_MINOR_START &&
3420 	    cres->SEQ_number >= BADSEQNUM) {
3421 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3422 		    "tl_conn_res:remote endpoint sequence number bad"));
3423 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3424 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3425 		freemsg(mp);
3426 		return;
3427 	}
3428 
3429 	/*
3430 	 * find accepting endpoint. Will have extra reference if found.
3431 	 */
3432 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3433 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3434 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3435 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3436 		    "tl_conn_res:bad accepting endpoint"));
3437 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3438 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3439 		freemsg(mp);
3440 		return;
3441 	}
3442 
3443 	/*
3444 	 * Prevent acceptor from closing.
3445 	 */
3446 	if (! tl_noclose(acc_ep)) {
3447 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3448 		    "tl_conn_res:bad accepting endpoint"));
3449 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3450 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3451 		tl_refrele(acc_ep);
3452 		freemsg(mp);
3453 		return;
3454 	}
3455 
3456 	acc_ep->te_flag |= TL_ACCEPTOR;
3457 
3458 	/*
3459 	 * validate that accepting endpoint, if different from listening
3460 	 * has address bound => state is TS_IDLE
3461 	 * TROUBLE in XPG4 !!?
3462 	 */
3463 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3464 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3465 		    "tl_conn_res:accepting endpoint has no address bound,"
3466 		    "state=%d", acc_ep->te_state));
3467 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3468 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3469 		freemsg(mp);
3470 		tl_closeok(acc_ep);
3471 		tl_refrele(acc_ep);
3472 		return;
3473 	}
3474 
3475 	/*
3476 	 * validate if accepting endpt same as listening, then
3477 	 * no other incoming connection should be on the queue
3478 	 */
3479 
3480 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3481 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3482 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3483 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3484 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3485 		freemsg(mp);
3486 		tl_closeok(acc_ep);
3487 		tl_refrele(acc_ep);
3488 		return;
3489 	}
3490 
3491 	/*
3492 	 * Mark for deletion, the entry corresponding to client
3493 	 * on list of pending connections made by the listener
3494 	 *  search list to see if client is one of the
3495 	 * recorded as a listener.
3496 	 */
3497 	tip = tl_icon_find(tep, cres->SEQ_number);
3498 	if (tip == NULL) {
3499 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3500 		    "tl_conn_res:no client in listener list"));
3501 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3502 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3503 		freemsg(mp);
3504 		tl_closeok(acc_ep);
3505 		tl_refrele(acc_ep);
3506 		return;
3507 	}
3508 
3509 	/*
3510 	 * If ti_tep is NULL the client has already closed. In this case
3511 	 * the code below will avoid any action on the client side
3512 	 * but complete the server and acceptor state transitions.
3513 	 */
3514 	ASSERT(tip->ti_tep == NULL ||
3515 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3516 	cl_ep = tip->ti_tep;
3517 
3518 	/*
3519 	 * If the client is present it is switched from listener's to acceptor's
3520 	 * serializer. We should block client closes while serializers are
3521 	 * being switched.
3522 	 *
3523 	 * It is possible that the client is present but is currently being
3524 	 * closed. There are two possible cases:
3525 	 *
3526 	 * 1) The client has already entered tl_close_finish_ser() and sent
3527 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3528 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3529 	 *
3530 	 * 2) The client started the close but has not entered
3531 	 *    tl_close_finish_ser() yet. In this case, the client is already
3532 	 *    proceeding asynchronously on the listener's serializer, so we're
3533 	 *    forced to change the acceptor to use the listener's serializer to
3534 	 *    ensure that any operations on the acceptor are serialized with
3535 	 *    respect to the close that's in-progress.
3536 	 */
3537 	if (cl_ep != NULL) {
3538 		if (tl_noclose(cl_ep)) {
3539 			client_noclose_set = B_TRUE;
3540 		} else {
3541 			/*
3542 			 * Client is closing. If it it has sent the
3543 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3544 			 * we have to let let the client continue until it is
3545 			 * sent.
3546 			 *
3547 			 * If we do continue using the client, acceptor will
3548 			 * switch to client's serializer which is used by client
3549 			 * for its close.
3550 			 */
3551 			tl_client_closing_when_accepting++;
3552 			switch_client_serializer = B_FALSE;
3553 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3554 			    cl_ep->te_state == -1)
3555 				cl_ep = NULL;
3556 		}
3557 	}
3558 
3559 	if (cl_ep != NULL) {
3560 		/*
3561 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3562 		 * (latter for sockets only)
3563 		 */
3564 		if (cl_ep->te_state != TS_WCON_CREQ &&
3565 		    (cl_ep->te_state != TS_DATA_XFER &&
3566 		    IS_SOCKET(cl_ep))) {
3567 			err = ECONNREFUSED;
3568 			/*
3569 			 * T_DISCON_IND sent later after committing memory
3570 			 * and acking validity of request
3571 			 */
3572 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3573 			    "tl_conn_res:peer in bad state"));
3574 		}
3575 
3576 		/*
3577 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3578 		 * ack validity of request (T_OK_ACK) after memory committed
3579 		 */
3580 
3581 		if (err)
3582 			size = sizeof (struct T_discon_ind);
3583 		else {
3584 			/*
3585 			 * calculate length of T_CONN_CON message
3586 			 */
3587 			olen = 0;
3588 			if (cl_ep->te_flag & TL_SETCRED) {
3589 				olen = (t_scalar_t)sizeof (struct opthdr) +
3590 				    OPTLEN(sizeof (tl_credopt_t));
3591 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3592 				olen = (t_scalar_t)sizeof (struct opthdr) +
3593 				    OPTLEN(ucredminsize(acc_ep->te_credp));
3594 			}
3595 			size = T_ALIGN(sizeof (struct T_conn_con) +
3596 			    acc_ep->te_alen) + olen;
3597 		}
3598 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3599 			/*
3600 			 * roll back state changes
3601 			 */
3602 			tep->te_state = TS_WRES_CIND;
3603 			tl_memrecover(wq, mp, size);
3604 			freemsg(ackmp);
3605 			if (client_noclose_set)
3606 				tl_closeok(cl_ep);
3607 			tl_closeok(acc_ep);
3608 			tl_refrele(acc_ep);
3609 			return;
3610 		}
3611 		mp = NULL;
3612 	}
3613 
3614 	/*
3615 	 * Now ack validity of request
3616 	 */
3617 	if (tep->te_nicon == 1) {
3618 		if (tep == acc_ep)
3619 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3620 		else
3621 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3622 	} else
3623 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3624 
3625 	/*
3626 	 * send T_DISCON_IND now if client state validation failed earlier
3627 	 */
3628 	if (err) {
3629 		tl_ok_ack(wq, ackmp, prim);
3630 		/*
3631 		 * flush the queues - why always ?
3632 		 */
3633 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3634 
3635 		dimp = tl_resizemp(respmp, size);
3636 		if (! dimp) {
3637 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3638 			    SL_TRACE|SL_ERROR,
3639 			    "tl_conn_res:con_ind:allocb failure"));
3640 			tl_merror(wq, respmp, ENOMEM);
3641 			tl_closeok(acc_ep);
3642 			if (client_noclose_set)
3643 				tl_closeok(cl_ep);
3644 			tl_refrele(acc_ep);
3645 			return;
3646 		}
3647 		if (dimp->b_cont) {
3648 			/* no user data in provider generated discon ind */
3649 			freemsg(dimp->b_cont);
3650 			dimp->b_cont = NULL;
3651 		}
3652 
3653 		DB_TYPE(dimp) = M_PROTO;
3654 		di = (struct T_discon_ind *)dimp->b_rptr;
3655 		di->PRIM_type  = T_DISCON_IND;
3656 		di->DISCON_reason = err;
3657 		di->SEQ_number = BADSEQNUM;
3658 
3659 		tep->te_state = TS_IDLE;
3660 		/*
3661 		 * send T_DISCON_IND message
3662 		 */
3663 		putnext(acc_ep->te_rq, dimp);
3664 		if (client_noclose_set)
3665 			tl_closeok(cl_ep);
3666 		tl_closeok(acc_ep);
3667 		tl_refrele(acc_ep);
3668 		return;
3669 	}
3670 
3671 	/*
3672 	 * now start connecting the accepting endpoint
3673 	 */
3674 	if (tep != acc_ep)
3675 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3676 
3677 	if (cl_ep == NULL) {
3678 		/*
3679 		 * The client has already closed. Send up any queued messages
3680 		 * and change the state accordingly.
3681 		 */
3682 		tl_ok_ack(wq, ackmp, prim);
3683 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3684 
3685 		/*
3686 		 * remove endpoint from incoming connection
3687 		 * delete client from list of incoming connections
3688 		 */
3689 		tl_freetip(tep, tip);
3690 		freemsg(mp);
3691 		tl_closeok(acc_ep);
3692 		tl_refrele(acc_ep);
3693 		return;
3694 	} else if (tip->ti_mp != NULL) {
3695 		/*
3696 		 * The client could have queued a T_DISCON_IND which needs
3697 		 * to be sent up.
3698 		 * Note that t_discon_req can not operate the same as
3699 		 * t_data_req since it is not possible for it to putbq
3700 		 * the message and return -1 due to the use of qwriter.
3701 		 */
3702 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3703 	}
3704 
3705 	/*
3706 	 * prepare connect confirm T_CONN_CON message
3707 	 */
3708 
3709 	/*
3710 	 * allocate the message - original data blocks
3711 	 * retained in the returned mblk
3712 	 */
3713 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3714 		ccmp = tl_resizemp(respmp, size);
3715 		if (ccmp == NULL) {
3716 			tl_ok_ack(wq, ackmp, prim);
3717 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3718 			    SL_TRACE|SL_ERROR,
3719 			    "tl_conn_res:conn_con:allocb failure"));
3720 			tl_merror(wq, respmp, ENOMEM);
3721 			tl_closeok(acc_ep);
3722 			if (client_noclose_set)
3723 				tl_closeok(cl_ep);
3724 			tl_refrele(acc_ep);
3725 			return;
3726 		}
3727 
3728 		DB_TYPE(ccmp) = M_PROTO;
3729 		cc = (struct T_conn_con *)ccmp->b_rptr;
3730 		cc->PRIM_type  = T_CONN_CON;
3731 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3732 		cc->RES_length = acc_ep->te_alen;
3733 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3734 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3735 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3736 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3737 			    cc->RES_length);
3738 			cc->OPT_length = olen;
3739 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3740 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3741 			    cl_ep->te_credp);
3742 		} else {
3743 			cc->OPT_offset = 0;
3744 			cc->OPT_length = 0;
3745 		}
3746 		/*
3747 		 * Forward the credential in the packet so it can be picked up
3748 		 * at the higher layers for more complete credential processing
3749 		 */
3750 		mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3751 	} else {
3752 		freemsg(respmp);
3753 		respmp = NULL;
3754 	}
3755 
3756 	/*
3757 	 * make connection linking
3758 	 * accepting and client endpoints
3759 	 * No need to increment references:
3760 	 *	on client: it should already have one from tip->ti_tep linkage.
3761 	 *	on acceptor is should already have one from the table lookup.
3762 	 *
3763 	 * At this point both client and acceptor can't close. Set client
3764 	 * serializer to acceptor's.
3765 	 */
3766 	ASSERT(cl_ep->te_refcnt >= 2);
3767 	ASSERT(acc_ep->te_refcnt >= 2);
3768 	ASSERT(cl_ep->te_conp == NULL);
3769 	ASSERT(acc_ep->te_conp == NULL);
3770 	cl_ep->te_conp = acc_ep;
3771 	acc_ep->te_conp = cl_ep;
3772 	ASSERT(cl_ep->te_ser == tep->te_ser);
3773 	if (switch_client_serializer) {
3774 		mutex_enter(&cl_ep->te_ser_lock);
3775 		if (cl_ep->te_ser_count > 0) {
3776 			switch_client_serializer = B_FALSE;
3777 			tl_serializer_noswitch++;
3778 		} else {
3779 			/*
3780 			 * Move client to the acceptor's serializer.
3781 			 */
3782 			tl_serializer_refhold(acc_ep->te_ser);
3783 			tl_serializer_refrele(cl_ep->te_ser);
3784 			cl_ep->te_ser = acc_ep->te_ser;
3785 		}
3786 		mutex_exit(&cl_ep->te_ser_lock);
3787 	}
3788 	if (!switch_client_serializer) {
3789 		/*
3790 		 * It is not possible to switch client to use acceptor's.
3791 		 * Move acceptor to client's serializer (which is the same as
3792 		 * listener's).
3793 		 */
3794 		tl_serializer_refhold(cl_ep->te_ser);
3795 		tl_serializer_refrele(acc_ep->te_ser);
3796 		acc_ep->te_ser = cl_ep->te_ser;
3797 	}
3798 
3799 	TL_REMOVE_PEER(cl_ep->te_oconp);
3800 	TL_REMOVE_PEER(acc_ep->te_oconp);
3801 
3802 	/*
3803 	 * remove endpoint from incoming connection
3804 	 * delete client from list of incoming connections
3805 	 */
3806 	tip->ti_tep = NULL;
3807 	tl_freetip(tep, tip);
3808 	tl_ok_ack(wq, ackmp, prim);
3809 
3810 	/*
3811 	 * data blocks already linked in reallocb()
3812 	 */
3813 
3814 	/*
3815 	 * link queues so that I_SENDFD will work
3816 	 */
3817 	if (! IS_SOCKET(tep)) {
3818 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3819 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3820 	}
3821 
3822 	/*
3823 	 * send T_CONN_CON up on client side unless it was already
3824 	 * done (for a socket). In cases any data or ordrel req has been
3825 	 * queued make sure that the service procedure runs.
3826 	 */
3827 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3828 		enableok(cl_ep->te_wq);
3829 		TL_QENABLE(cl_ep);
3830 		if (ccmp != NULL)
3831 			freemsg(ccmp);
3832 	} else {
3833 		/*
3834 		 * change client state on TE_CONN_CON event
3835 		 */
3836 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3837 		putnext(cl_ep->te_rq, ccmp);
3838 	}
3839 
3840 	/* Mark the both endpoints as accepted */
3841 	cl_ep->te_flag |= TL_ACCEPTED;
3842 	acc_ep->te_flag |= TL_ACCEPTED;
3843 
3844 	/*
3845 	 * Allow client and acceptor to close.
3846 	 */
3847 	tl_closeok(acc_ep);
3848 	if (client_noclose_set)
3849 		tl_closeok(cl_ep);
3850 }
3851 
3852 
3853 
3854 
3855 static void
3856 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3857 {
3858 	queue_t			*wq;
3859 	struct T_discon_req	*dr;
3860 	ssize_t			msz;
3861 	tl_endpt_t		*peer_tep = tep->te_conp;
3862 	tl_endpt_t		*srv_tep = tep->te_oconp;
3863 	tl_icon_t		*tip;
3864 	size_t			size;
3865 	mblk_t			*ackmp, *dimp, *respmp;
3866 	struct T_discon_ind	*di;
3867 	t_scalar_t		save_state, new_state;
3868 
3869 	if (tep->te_closing) {
3870 		freemsg(mp);
3871 		return;
3872 	}
3873 
3874 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3875 		TL_UNCONNECT(tep->te_conp);
3876 		peer_tep = NULL;
3877 	}
3878 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3879 		TL_UNCONNECT(tep->te_oconp);
3880 		srv_tep = NULL;
3881 	}
3882 
3883 	wq = tep->te_wq;
3884 
3885 	/*
3886 	 * preallocate memory for:
3887 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3888 	 *	==> known max T_ERROR_ACK
3889 	 * 2. for  T_DISCON_IND
3890 	 */
3891 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3892 	if (! ackmp) {
3893 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3894 		return;
3895 	}
3896 	/*
3897 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3898 	 * will be committed for T_DISCON_IND  later
3899 	 */
3900 
3901 	dr = (struct T_discon_req *)mp->b_rptr;
3902 	msz = MBLKL(mp);
3903 
3904 	/*
3905 	 * validate the state
3906 	 */
3907 	save_state = new_state = tep->te_state;
3908 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3909 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3910 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3911 		    SL_TRACE|SL_ERROR,
3912 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
3913 		    tep->te_state));
3914 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3915 		freemsg(mp);
3916 		return;
3917 	}
3918 	/*
3919 	 * Defer committing the state change until it is determined if
3920 	 * the message will be queued with the tl_icon or not.
3921 	 */
3922 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3923 
3924 	/* validate the message */
3925 	if (msz < sizeof (struct T_discon_req)) {
3926 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3927 		    "tl_discon_req:invalid message"));
3928 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3929 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3930 		freemsg(mp);
3931 		return;
3932 	}
3933 
3934 	/*
3935 	 * if server, then validate that client exists
3936 	 * by connection sequence number etc.
3937 	 */
3938 	if (tep->te_nicon > 0) { /* server */
3939 
3940 		/*
3941 		 * search server list for disconnect client
3942 		 */
3943 		tip = tl_icon_find(tep, dr->SEQ_number);
3944 		if (tip == NULL) {
3945 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3946 			    SL_TRACE|SL_ERROR,
3947 			    "tl_discon_req:no disconnect endpoint"));
3948 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3949 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3950 			freemsg(mp);
3951 			return;
3952 		}
3953 		/*
3954 		 * If ti_tep is NULL the client has already closed. In this case
3955 		 * the code below will avoid any action on the client side.
3956 		 */
3957 
3958 		IMPLY(tip->ti_tep != NULL,
3959 		    tip->ti_tep->te_seqno == dr->SEQ_number);
3960 		peer_tep = tip->ti_tep;
3961 	}
3962 
3963 	/*
3964 	 * preallocate now for T_DISCON_IND
3965 	 * ack validity of request (T_OK_ACK) after memory committed
3966 	 */
3967 	size = sizeof (struct T_discon_ind);
3968 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3969 		tl_memrecover(wq, mp, size);
3970 		freemsg(ackmp);
3971 		return;
3972 	}
3973 
3974 	/*
3975 	 * prepare message to ack validity of request
3976 	 */
3977 	if (tep->te_nicon == 0)
3978 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3979 	else
3980 		if (tep->te_nicon == 1)
3981 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3982 		else
3983 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3984 
3985 	/*
3986 	 * Flushing queues according to TPI. Using the old state.
3987 	 */
3988 	if ((tep->te_nicon <= 1) &&
3989 	    ((save_state == TS_DATA_XFER) ||
3990 	    (save_state == TS_WIND_ORDREL) ||
3991 	    (save_state == TS_WREQ_ORDREL)))
3992 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3993 
3994 	/* send T_OK_ACK up  */
3995 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3996 
3997 	/*
3998 	 * now do disconnect business
3999 	 */
4000 	if (tep->te_nicon > 0) { /* listener */
4001 		if (peer_tep != NULL && !peer_tep->te_closing) {
4002 			/*
4003 			 * disconnect incoming connect request pending to tep
4004 			 */
4005 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4006 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
4007 				    SL_TRACE|SL_ERROR,
4008 				    "tl_discon_req: reallocb failed"));
4009 				tep->te_state = new_state;
4010 				tl_merror(wq, respmp, ENOMEM);
4011 				return;
4012 			}
4013 			di = (struct T_discon_ind *)dimp->b_rptr;
4014 			di->SEQ_number = BADSEQNUM;
4015 			save_state = peer_tep->te_state;
4016 			peer_tep->te_state = TS_IDLE;
4017 
4018 			TL_REMOVE_PEER(peer_tep->te_oconp);
4019 			enableok(peer_tep->te_wq);
4020 			TL_QENABLE(peer_tep);
4021 		} else {
4022 			freemsg(respmp);
4023 			dimp = NULL;
4024 		}
4025 
4026 		/*
4027 		 * remove endpoint from incoming connection list
4028 		 * - remove disconnect client from list on server
4029 		 */
4030 		tl_freetip(tep, tip);
4031 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4032 		/*
4033 		 * disconnect an outgoing request pending from tep
4034 		 */
4035 
4036 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4037 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4038 			    SL_TRACE|SL_ERROR,
4039 			    "tl_discon_req: reallocb failed"));
4040 			tep->te_state = new_state;
4041 			tl_merror(wq, respmp, ENOMEM);
4042 			return;
4043 		}
4044 		di = (struct T_discon_ind *)dimp->b_rptr;
4045 		DB_TYPE(dimp) = M_PROTO;
4046 		di->PRIM_type  = T_DISCON_IND;
4047 		di->DISCON_reason = ECONNRESET;
4048 		di->SEQ_number = tep->te_seqno;
4049 
4050 		/*
4051 		 * If this is a socket the T_DISCON_IND is queued with
4052 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4053 		 * from the list of pending connections.
4054 		 * Note that when te_oconp is set the peer better have
4055 		 * a t_connind_t for the client.
4056 		 */
4057 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4058 			/*
4059 			 * No need to check that
4060 			 * ti_tep == NULL since the T_DISCON_IND
4061 			 * takes precedence over other queued
4062 			 * messages.
4063 			 */
4064 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4065 			peer_tep = NULL;
4066 			dimp = NULL;
4067 			/*
4068 			 * Can't clear te_oconp since tl_co_unconnect needs
4069 			 * it as a hint not to free the tep.
4070 			 * Keep the state unchanged since tl_conn_res inspects
4071 			 * it.
4072 			 */
4073 			new_state = tep->te_state;
4074 		} else {
4075 			/* Found - delete it */
4076 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4077 			if (tip != NULL) {
4078 				ASSERT(tep == tip->ti_tep);
4079 				save_state = peer_tep->te_state;
4080 				if (peer_tep->te_nicon == 1)
4081 					peer_tep->te_state =
4082 					    NEXTSTATE(TE_DISCON_IND2,
4083 					    peer_tep->te_state);
4084 				else
4085 					peer_tep->te_state =
4086 					    NEXTSTATE(TE_DISCON_IND3,
4087 					    peer_tep->te_state);
4088 				tl_freetip(peer_tep, tip);
4089 			}
4090 			ASSERT(tep->te_oconp != NULL);
4091 			TL_UNCONNECT(tep->te_oconp);
4092 		}
4093 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4094 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4095 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4096 			    SL_TRACE|SL_ERROR,
4097 			    "tl_discon_req: reallocb failed"));
4098 			tep->te_state = new_state;
4099 			tl_merror(wq, respmp, ENOMEM);
4100 			return;
4101 		}
4102 		di = (struct T_discon_ind *)dimp->b_rptr;
4103 		di->SEQ_number = BADSEQNUM;
4104 
4105 		save_state = peer_tep->te_state;
4106 		peer_tep->te_state = TS_IDLE;
4107 	} else {
4108 		/* Not connected */
4109 		tep->te_state = new_state;
4110 		freemsg(respmp);
4111 		return;
4112 	}
4113 
4114 	/* Commit state changes */
4115 	tep->te_state = new_state;
4116 
4117 	if (peer_tep == NULL) {
4118 		ASSERT(dimp == NULL);
4119 		goto done;
4120 	}
4121 	/*
4122 	 * Flush queues on peer before sending up
4123 	 * T_DISCON_IND according to TPI
4124 	 */
4125 
4126 	if ((save_state == TS_DATA_XFER) ||
4127 	    (save_state == TS_WIND_ORDREL) ||
4128 	    (save_state == TS_WREQ_ORDREL))
4129 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4130 
4131 	DB_TYPE(dimp) = M_PROTO;
4132 	di->PRIM_type  = T_DISCON_IND;
4133 	di->DISCON_reason = ECONNRESET;
4134 
4135 	/*
4136 	 * data blocks already linked into dimp by reallocb()
4137 	 */
4138 	/*
4139 	 * send indication message to peer user module
4140 	 */
4141 	ASSERT(dimp != NULL);
4142 	putnext(peer_tep->te_rq, dimp);
4143 done:
4144 	if (tep->te_conp) {	/* disconnect pointers if connected */
4145 		ASSERT(! peer_tep->te_closing);
4146 
4147 		/*
4148 		 * Messages may be queued on peer's write queue
4149 		 * waiting to be processed by its write service
4150 		 * procedure. Before the pointer to the peer transport
4151 		 * structure is set to NULL, qenable the peer's write
4152 		 * queue so that the queued up messages are processed.
4153 		 */
4154 		if ((save_state == TS_DATA_XFER) ||
4155 		    (save_state == TS_WIND_ORDREL) ||
4156 		    (save_state == TS_WREQ_ORDREL))
4157 			TL_QENABLE(peer_tep);
4158 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4159 		TL_UNCONNECT(peer_tep->te_conp);
4160 		if (! IS_SOCKET(tep)) {
4161 			/*
4162 			 * unlink the streams
4163 			 */
4164 			tep->te_wq->q_next = NULL;
4165 			peer_tep->te_wq->q_next = NULL;
4166 		}
4167 		TL_UNCONNECT(tep->te_conp);
4168 	}
4169 }
4170 
4171 static void
4172 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4173 {
4174 	if (!tep->te_closing)
4175 		tl_addr_req(mp, tep);
4176 	else
4177 		freemsg(mp);
4178 
4179 	tl_serializer_exit(tep);
4180 	tl_refrele(tep);
4181 }
4182 
4183 static void
4184 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4185 {
4186 	queue_t			*wq;
4187 	size_t			ack_sz;
4188 	mblk_t			*ackmp;
4189 	struct T_addr_ack	*taa;
4190 
4191 	if (tep->te_closing) {
4192 		freemsg(mp);
4193 		return;
4194 	}
4195 
4196 	wq = tep->te_wq;
4197 
4198 	/*
4199 	 * Note: T_ADDR_REQ message has only PRIM_type field
4200 	 * so it is already validated earlier.
4201 	 */
4202 
4203 	if (IS_CLTS(tep) ||
4204 	    (tep->te_state > TS_WREQ_ORDREL) ||
4205 	    (tep->te_state < TS_DATA_XFER)) {
4206 		/*
4207 		 * Either connectionless or connection oriented but not
4208 		 * in connected data transfer state or half-closed states.
4209 		 */
4210 		ack_sz = sizeof (struct T_addr_ack);
4211 		if (tep->te_state >= TS_IDLE)
4212 			/* is bound */
4213 			ack_sz += tep->te_alen;
4214 		ackmp = reallocb(mp, ack_sz, 0);
4215 		if (ackmp == NULL) {
4216 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4217 			    SL_TRACE|SL_ERROR,
4218 			    "tl_addr_req: reallocb failed"));
4219 			tl_memrecover(wq, mp, ack_sz);
4220 			return;
4221 		}
4222 
4223 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4224 
4225 		bzero(taa, sizeof (struct T_addr_ack));
4226 
4227 		taa->PRIM_type = T_ADDR_ACK;
4228 		ackmp->b_datap->db_type = M_PCPROTO;
4229 		ackmp->b_wptr = (uchar_t *)&taa[1];
4230 
4231 		if (tep->te_state >= TS_IDLE) {
4232 			/* endpoint is bound */
4233 			taa->LOCADDR_length = tep->te_alen;
4234 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4235 
4236 			bcopy(tep->te_abuf, ackmp->b_wptr,
4237 			    tep->te_alen);
4238 			ackmp->b_wptr += tep->te_alen;
4239 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4240 		}
4241 
4242 		(void) qreply(wq, ackmp);
4243 	} else {
4244 		ASSERT(tep->te_state == TS_DATA_XFER ||
4245 		    tep->te_state == TS_WIND_ORDREL ||
4246 		    tep->te_state == TS_WREQ_ORDREL);
4247 		/* connection oriented in data transfer */
4248 		tl_connected_cots_addr_req(mp, tep);
4249 	}
4250 }
4251 
4252 
4253 static void
4254 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4255 {
4256 	tl_endpt_t		*peer_tep = tep->te_conp;
4257 	size_t			ack_sz;
4258 	mblk_t			*ackmp;
4259 	struct T_addr_ack	*taa;
4260 	uchar_t			*addr_startp;
4261 
4262 	if (tep->te_closing) {
4263 		freemsg(mp);
4264 		return;
4265 	}
4266 
4267 	if (peer_tep == NULL || peer_tep->te_closing) {
4268 		tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4269 		return;
4270 	}
4271 
4272 	ASSERT(tep->te_state >= TS_IDLE);
4273 
4274 	ack_sz = sizeof (struct T_addr_ack);
4275 	ack_sz += T_ALIGN(tep->te_alen);
4276 	ack_sz += peer_tep->te_alen;
4277 
4278 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4279 	if (ackmp == NULL) {
4280 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4281 		    "tl_connected_cots_addr_req: reallocb failed"));
4282 		tl_memrecover(tep->te_wq, mp, ack_sz);
4283 		return;
4284 	}
4285 
4286 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4287 
4288 	/* endpoint is bound */
4289 	taa->LOCADDR_length = tep->te_alen;
4290 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4291 
4292 	addr_startp = (uchar_t *)&taa[1];
4293 
4294 	bcopy(tep->te_abuf, addr_startp,
4295 	    tep->te_alen);
4296 
4297 	taa->REMADDR_length = peer_tep->te_alen;
4298 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4299 	    taa->LOCADDR_length);
4300 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4301 	bcopy(peer_tep->te_abuf, addr_startp,
4302 	    peer_tep->te_alen);
4303 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4304 	    taa->REMADDR_offset + peer_tep->te_alen;
4305 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4306 
4307 	putnext(tep->te_rq, ackmp);
4308 }
4309 
4310 static void
4311 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4312 {
4313 	if (IS_CLTS(tep)) {
4314 		*ia = tl_clts_info_ack;
4315 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4316 	} else {
4317 		*ia = tl_cots_info_ack;
4318 		if (IS_COTSORD(tep))
4319 			ia->SERV_type = T_COTS_ORD;
4320 	}
4321 	ia->TIDU_size = tl_tidusz;
4322 	ia->CURRENT_state = tep->te_state;
4323 }
4324 
4325 /*
4326  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4327  * tl_wput.
4328  */
4329 static void
4330 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4331 {
4332 	mblk_t			*ackmp;
4333 	t_uscalar_t		cap_bits1;
4334 	struct T_capability_ack	*tcap;
4335 
4336 	if (tep->te_closing) {
4337 		freemsg(mp);
4338 		return;
4339 	}
4340 
4341 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4342 
4343 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4344 	    M_PCPROTO, T_CAPABILITY_ACK);
4345 	if (ackmp == NULL) {
4346 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4347 		    "tl_capability_req: reallocb failed"));
4348 		tl_memrecover(tep->te_wq, mp,
4349 		    sizeof (struct T_capability_ack));
4350 		return;
4351 	}
4352 
4353 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4354 	tcap->CAP_bits1 = 0;
4355 
4356 	if (cap_bits1 & TC1_INFO) {
4357 		tl_copy_info(&tcap->INFO_ack, tep);
4358 		tcap->CAP_bits1 |= TC1_INFO;
4359 	}
4360 
4361 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4362 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4363 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4364 	}
4365 
4366 	putnext(tep->te_rq, ackmp);
4367 }
4368 
4369 static void
4370 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4371 {
4372 	if (! tep->te_closing)
4373 		tl_info_req(mp, tep);
4374 	else
4375 		freemsg(mp);
4376 
4377 	tl_serializer_exit(tep);
4378 	tl_refrele(tep);
4379 }
4380 
4381 static void
4382 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4383 {
4384 	mblk_t *ackmp;
4385 
4386 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4387 	    M_PCPROTO, T_INFO_ACK);
4388 	if (ackmp == NULL) {
4389 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4390 		    "tl_info_req: reallocb failed"));
4391 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4392 		return;
4393 	}
4394 
4395 	/*
4396 	 * fill in T_INFO_ACK contents
4397 	 */
4398 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4399 
4400 	/*
4401 	 * send ack message
4402 	 */
4403 	putnext(tep->te_rq, ackmp);
4404 }
4405 
4406 /*
4407  * Handle M_DATA, T_data_req and T_optdata_req.
4408  * If this is a socket pass through T_optdata_req options unmodified.
4409  */
4410 static void
4411 tl_data(mblk_t *mp, tl_endpt_t *tep)
4412 {
4413 	queue_t			*wq = tep->te_wq;
4414 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4415 	ssize_t			msz = MBLKL(mp);
4416 	tl_endpt_t		*peer_tep;
4417 	queue_t			*peer_rq;
4418 	boolean_t		closing = tep->te_closing;
4419 
4420 	if (IS_CLTS(tep)) {
4421 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4422 		    SL_TRACE|SL_ERROR,
4423 		    "tl_wput:clts:unattached M_DATA"));
4424 		if (!closing) {
4425 			tl_merror(wq, mp, EPROTO);
4426 		} else {
4427 			freemsg(mp);
4428 		}
4429 		return;
4430 	}
4431 
4432 	/*
4433 	 * If the endpoint is closing it should still forward any data to the
4434 	 * peer (if it has one). If it is not allowed to forward it can just
4435 	 * free the message.
4436 	 */
4437 	if (closing &&
4438 	    (tep->te_state != TS_DATA_XFER) &&
4439 	    (tep->te_state != TS_WREQ_ORDREL)) {
4440 		freemsg(mp);
4441 		return;
4442 	}
4443 
4444 	if (DB_TYPE(mp) == M_PROTO) {
4445 		if (prim->type == T_DATA_REQ &&
4446 		    msz < sizeof (struct T_data_req)) {
4447 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4448 				SL_TRACE|SL_ERROR,
4449 				"tl_data:T_DATA_REQ:invalid message"));
4450 			if (!closing) {
4451 				tl_merror(wq, mp, EPROTO);
4452 			} else {
4453 				freemsg(mp);
4454 			}
4455 			return;
4456 		} else if (prim->type == T_OPTDATA_REQ &&
4457 		    (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4458 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4459 			    SL_TRACE|SL_ERROR,
4460 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4461 			if (!closing) {
4462 				tl_merror(wq, mp, EPROTO);
4463 			} else {
4464 				freemsg(mp);
4465 			}
4466 			return;
4467 		}
4468 	}
4469 
4470 	/*
4471 	 * connection oriented provider
4472 	 */
4473 	switch (tep->te_state) {
4474 	case TS_IDLE:
4475 		/*
4476 		 * Other end not here - do nothing.
4477 		 */
4478 		freemsg(mp);
4479 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4480 		    "tl_data:cots with endpoint idle"));
4481 		return;
4482 
4483 	case TS_DATA_XFER:
4484 		/* valid states */
4485 		if (tep->te_conp != NULL)
4486 			break;
4487 
4488 		if (tep->te_oconp == NULL) {
4489 			if (!closing) {
4490 				tl_merror(wq, mp, EPROTO);
4491 			} else {
4492 				freemsg(mp);
4493 			}
4494 			return;
4495 		}
4496 		/*
4497 		 * For a socket the T_CONN_CON is sent early thus
4498 		 * the peer might not yet have accepted the connection.
4499 		 * If we are closing queue the packet with the T_CONN_IND.
4500 		 * Otherwise defer processing the packet until the peer
4501 		 * accepts the connection.
4502 		 * Note that the queue is noenabled when we go into this
4503 		 * state.
4504 		 */
4505 		if (!closing) {
4506 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4507 			    SL_TRACE|SL_ERROR,
4508 			    "tl_data: ocon"));
4509 			TL_PUTBQ(tep, mp);
4510 			return;
4511 		}
4512 		if (DB_TYPE(mp) == M_PROTO) {
4513 			if (msz < sizeof (t_scalar_t)) {
4514 				freemsg(mp);
4515 				return;
4516 			}
4517 			/* reuse message block - just change REQ to IND */
4518 			if (prim->type == T_DATA_REQ)
4519 				prim->type = T_DATA_IND;
4520 			else
4521 				prim->type = T_OPTDATA_IND;
4522 		}
4523 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4524 		return;
4525 
4526 	case TS_WREQ_ORDREL:
4527 		if (tep->te_conp == NULL) {
4528 			/*
4529 			 * Other end closed - generate discon_ind
4530 			 * with reason 0 to cause an EPIPE but no
4531 			 * read side error on AF_UNIX sockets.
4532 			 */
4533 			freemsg(mp);
4534 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4535 			    SL_TRACE|SL_ERROR,
4536 			    "tl_data: WREQ_ORDREL and no peer"));
4537 			tl_discon_ind(tep, 0);
4538 			return;
4539 		}
4540 		break;
4541 
4542 	default:
4543 		/* invalid state for event TE_DATA_REQ */
4544 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4545 		    "tl_data:cots:out of state"));
4546 		tl_merror(wq, mp, EPROTO);
4547 		return;
4548 	}
4549 	/*
4550 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4551 	 * (State stays same on this event)
4552 	 */
4553 
4554 	/*
4555 	 * get connected endpoint
4556 	 */
4557 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4558 		freemsg(mp);
4559 		/* Peer closed */
4560 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4561 		    "tl_data: peer gone"));
4562 		return;
4563 	}
4564 
4565 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4566 	peer_rq = peer_tep->te_rq;
4567 
4568 	/*
4569 	 * Put it back if flow controlled
4570 	 * Note: Messages already on queue when we are closing is bounded
4571 	 * so we can ignore flow control.
4572 	 */
4573 	if (!canputnext(peer_rq) && !closing) {
4574 		TL_PUTBQ(tep, mp);
4575 		return;
4576 	}
4577 
4578 	/*
4579 	 * validate peer state
4580 	 */
4581 	switch (peer_tep->te_state) {
4582 	case TS_DATA_XFER:
4583 	case TS_WIND_ORDREL:
4584 		/* valid states */
4585 		break;
4586 	default:
4587 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4588 		    "tl_data:rx side:invalid state"));
4589 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4590 		return;
4591 	}
4592 	if (DB_TYPE(mp) == M_PROTO) {
4593 		/* reuse message block - just change REQ to IND */
4594 		if (prim->type == T_DATA_REQ)
4595 			prim->type = T_DATA_IND;
4596 		else
4597 			prim->type = T_OPTDATA_IND;
4598 	}
4599 	/*
4600 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4601 	 * (peer state stays same on this event)
4602 	 */
4603 	/*
4604 	 * send data to connected peer
4605 	 */
4606 	putnext(peer_rq, mp);
4607 }
4608 
4609 
4610 
4611 static void
4612 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4613 {
4614 	queue_t			*wq = tep->te_wq;
4615 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4616 	ssize_t			msz = MBLKL(mp);
4617 	tl_endpt_t		*peer_tep;
4618 	queue_t			*peer_rq;
4619 	boolean_t		closing = tep->te_closing;
4620 
4621 	if (msz < sizeof (struct T_exdata_req)) {
4622 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4623 		    "tl_exdata:invalid message"));
4624 		if (!closing) {
4625 			tl_merror(wq, mp, EPROTO);
4626 		} else {
4627 			freemsg(mp);
4628 		}
4629 		return;
4630 	}
4631 
4632 	/*
4633 	 * If the endpoint is closing it should still forward any data to the
4634 	 * peer (if it has one). If it is not allowed to forward it can just
4635 	 * free the message.
4636 	 */
4637 	if (closing &&
4638 	    (tep->te_state != TS_DATA_XFER) &&
4639 	    (tep->te_state != TS_WREQ_ORDREL)) {
4640 		freemsg(mp);
4641 		return;
4642 	}
4643 
4644 	/*
4645 	 * validate state
4646 	 */
4647 	switch (tep->te_state) {
4648 	case TS_IDLE:
4649 		/*
4650 		 * Other end not here - do nothing.
4651 		 */
4652 		freemsg(mp);
4653 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4654 		    "tl_exdata:cots with endpoint idle"));
4655 		return;
4656 
4657 	case TS_DATA_XFER:
4658 		/* valid states */
4659 		if (tep->te_conp != NULL)
4660 			break;
4661 
4662 		if (tep->te_oconp == NULL) {
4663 			if (!closing) {
4664 				tl_merror(wq, mp, EPROTO);
4665 			} else {
4666 				freemsg(mp);
4667 			}
4668 			return;
4669 		}
4670 		/*
4671 		 * For a socket the T_CONN_CON is sent early thus
4672 		 * the peer might not yet have accepted the connection.
4673 		 * If we are closing queue the packet with the T_CONN_IND.
4674 		 * Otherwise defer processing the packet until the peer
4675 		 * accepts the connection.
4676 		 * Note that the queue is noenabled when we go into this
4677 		 * state.
4678 		 */
4679 		if (!closing) {
4680 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4681 			    SL_TRACE|SL_ERROR,
4682 			    "tl_exdata: ocon"));
4683 			TL_PUTBQ(tep, mp);
4684 			return;
4685 		}
4686 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4687 		    "tl_exdata: closing socket ocon"));
4688 		prim->type = T_EXDATA_IND;
4689 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4690 		return;
4691 
4692 	case TS_WREQ_ORDREL:
4693 		if (tep->te_conp == NULL) {
4694 			/*
4695 			 * Other end closed - generate discon_ind
4696 			 * with reason 0 to cause an EPIPE but no
4697 			 * read side error on AF_UNIX sockets.
4698 			 */
4699 			freemsg(mp);
4700 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4701 			    SL_TRACE|SL_ERROR,
4702 			    "tl_exdata: WREQ_ORDREL and no peer"));
4703 			tl_discon_ind(tep, 0);
4704 			return;
4705 		}
4706 		break;
4707 
4708 	default:
4709 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4710 		    SL_TRACE|SL_ERROR,
4711 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4712 		    tep->te_state));
4713 		tl_merror(wq, mp, EPROTO);
4714 		return;
4715 	}
4716 	/*
4717 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4718 	 * (state stays same on this event)
4719 	 */
4720 
4721 	/*
4722 	 * get connected endpoint
4723 	 */
4724 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4725 		freemsg(mp);
4726 		/* Peer closed */
4727 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4728 		    "tl_exdata: peer gone"));
4729 		return;
4730 	}
4731 
4732 	peer_rq = peer_tep->te_rq;
4733 
4734 	/*
4735 	 * Put it back if flow controlled
4736 	 * Note: Messages already on queue when we are closing is bounded
4737 	 * so we can ignore flow control.
4738 	 */
4739 	if (!canputnext(peer_rq) && !closing) {
4740 		TL_PUTBQ(tep, mp);
4741 		return;
4742 	}
4743 
4744 	/*
4745 	 * validate state on peer
4746 	 */
4747 	switch (peer_tep->te_state) {
4748 	case TS_DATA_XFER:
4749 	case TS_WIND_ORDREL:
4750 		/* valid states */
4751 		break;
4752 	default:
4753 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4754 		    "tl_exdata:rx side:invalid state"));
4755 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4756 		return;
4757 	}
4758 	/*
4759 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4760 	 * (peer state stays same on this event)
4761 	 */
4762 	/*
4763 	 * reuse message block
4764 	 */
4765 	prim->type = T_EXDATA_IND;
4766 
4767 	/*
4768 	 * send data to connected peer
4769 	 */
4770 	putnext(peer_rq, mp);
4771 }
4772 
4773 
4774 
4775 static void
4776 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4777 {
4778 	queue_t			*wq =  tep->te_wq;
4779 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4780 	ssize_t			msz = MBLKL(mp);
4781 	tl_endpt_t		*peer_tep;
4782 	queue_t			*peer_rq;
4783 	boolean_t		closing = tep->te_closing;
4784 
4785 	if (msz < sizeof (struct T_ordrel_req)) {
4786 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4787 		    "tl_ordrel:invalid message"));
4788 		if (!closing) {
4789 			tl_merror(wq, mp, EPROTO);
4790 		} else {
4791 			freemsg(mp);
4792 		}
4793 		return;
4794 	}
4795 
4796 	/*
4797 	 * validate state
4798 	 */
4799 	switch (tep->te_state) {
4800 	case TS_DATA_XFER:
4801 	case TS_WREQ_ORDREL:
4802 		/* valid states */
4803 		if (tep->te_conp != NULL)
4804 			break;
4805 
4806 		if (tep->te_oconp == NULL)
4807 			break;
4808 
4809 		/*
4810 		 * For a socket the T_CONN_CON is sent early thus
4811 		 * the peer might not yet have accepted the connection.
4812 		 * If we are closing queue the packet with the T_CONN_IND.
4813 		 * Otherwise defer processing the packet until the peer
4814 		 * accepts the connection.
4815 		 * Note that the queue is noenabled when we go into this
4816 		 * state.
4817 		 */
4818 		if (!closing) {
4819 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4820 			    SL_TRACE|SL_ERROR,
4821 			    "tl_ordlrel: ocon"));
4822 			TL_PUTBQ(tep, mp);
4823 			return;
4824 		}
4825 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4826 		    "tl_ordlrel: closing socket ocon"));
4827 		prim->type = T_ORDREL_IND;
4828 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4829 		return;
4830 
4831 	default:
4832 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4833 		    SL_TRACE|SL_ERROR,
4834 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4835 		    tep->te_state));
4836 		if (!closing) {
4837 			tl_merror(wq, mp, EPROTO);
4838 		} else {
4839 			freemsg(mp);
4840 		}
4841 		return;
4842 	}
4843 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4844 
4845 	/*
4846 	 * get connected endpoint
4847 	 */
4848 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4849 		/* Peer closed */
4850 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4851 		    "tl_ordrel: peer gone"));
4852 		freemsg(mp);
4853 		return;
4854 	}
4855 
4856 	peer_rq = peer_tep->te_rq;
4857 
4858 	/*
4859 	 * Put it back if flow controlled except when we are closing.
4860 	 * Note: Messages already on queue when we are closing is bounded
4861 	 * so we can ignore flow control.
4862 	 */
4863 	if (! canputnext(peer_rq) && !closing) {
4864 		TL_PUTBQ(tep, mp);
4865 		return;
4866 	}
4867 
4868 	/*
4869 	 * validate state on peer
4870 	 */
4871 	switch (peer_tep->te_state) {
4872 	case TS_DATA_XFER:
4873 	case TS_WIND_ORDREL:
4874 		/* valid states */
4875 		break;
4876 	default:
4877 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4878 		    "tl_ordrel:rx side:invalid state"));
4879 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4880 		return;
4881 	}
4882 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4883 
4884 	/*
4885 	 * reuse message block
4886 	 */
4887 	prim->type = T_ORDREL_IND;
4888 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4889 	    "tl_ordrel: send ordrel_ind"));
4890 
4891 	/*
4892 	 * send data to connected peer
4893 	 */
4894 	putnext(peer_rq, mp);
4895 }
4896 
4897 
4898 /*
4899  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4900  */
4901 static void
4902 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4903 {
4904 	size_t			err_sz;
4905 	tl_endpt_t		*tep;
4906 	struct T_unitdata_req	*udreq;
4907 	mblk_t			*err_mp;
4908 	t_scalar_t		alen;
4909 	t_scalar_t		olen;
4910 	struct T_uderror_ind	*uderr;
4911 	uchar_t			*addr_startp;
4912 
4913 	err_sz = sizeof (struct T_uderror_ind);
4914 	tep = (tl_endpt_t *)wq->q_ptr;
4915 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4916 	alen = udreq->DEST_length;
4917 	olen = udreq->OPT_length;
4918 
4919 	if (alen > 0)
4920 		err_sz = T_ALIGN(err_sz + alen);
4921 	if (olen > 0)
4922 		err_sz += olen;
4923 
4924 	err_mp = allocb(err_sz, BPRI_MED);
4925 	if (! err_mp) {
4926 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4927 		    "tl_uderr:allocb failure"));
4928 		/*
4929 		 * Note: no rollback of state needed as it does
4930 		 * not change in connectionless transport
4931 		 */
4932 		tl_memrecover(wq, mp, err_sz);
4933 		return;
4934 	}
4935 
4936 	DB_TYPE(err_mp) = M_PROTO;
4937 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4938 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4939 	uderr->PRIM_type = T_UDERROR_IND;
4940 	uderr->ERROR_type = err;
4941 	uderr->DEST_length = alen;
4942 	uderr->OPT_length = olen;
4943 	if (alen <= 0) {
4944 		uderr->DEST_offset = 0;
4945 	} else {
4946 		uderr->DEST_offset =
4947 		    (t_scalar_t)sizeof (struct T_uderror_ind);
4948 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4949 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4950 		    (size_t)alen);
4951 	}
4952 	if (olen <= 0) {
4953 		uderr->OPT_offset = 0;
4954 	} else {
4955 		uderr->OPT_offset =
4956 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4957 		    uderr->DEST_length);
4958 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4959 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4960 		    (size_t)olen);
4961 	}
4962 	freemsg(mp);
4963 
4964 	/*
4965 	 * send indication message
4966 	 */
4967 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4968 
4969 	qreply(wq, err_mp);
4970 }
4971 
4972 static void
4973 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4974 {
4975 	queue_t *wq = tep->te_wq;
4976 
4977 	if (!tep->te_closing && (wq->q_first != NULL)) {
4978 		TL_PUTQ(tep, mp);
4979 	} else if (tep->te_rq != NULL)
4980 		tl_unitdata(mp, tep);
4981 	else
4982 		freemsg(mp);
4983 
4984 	tl_serializer_exit(tep);
4985 	tl_refrele(tep);
4986 }
4987 
4988 /*
4989  * Handle T_unitdata_req.
4990  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4991  * If this is a socket pass through options unmodified.
4992  */
4993 static void
4994 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4995 {
4996 	queue_t			*wq = tep->te_wq;
4997 	soux_addr_t		ux_addr;
4998 	tl_addr_t		destaddr;
4999 	uchar_t			*addr_startp;
5000 	tl_endpt_t		*peer_tep;
5001 	struct T_unitdata_ind	*udind;
5002 	struct T_unitdata_req	*udreq;
5003 	ssize_t			msz, ui_sz;
5004 	t_scalar_t		alen, aoff, olen, ooff;
5005 	t_scalar_t		oldolen = 0;
5006 	cred_t			*cr = NULL;
5007 	pid_t			cpid;
5008 
5009 	udreq = (struct T_unitdata_req *)mp->b_rptr;
5010 	msz = MBLKL(mp);
5011 
5012 	/*
5013 	 * validate the state
5014 	 */
5015 	if (tep->te_state != TS_IDLE) {
5016 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
5017 		    SL_TRACE|SL_ERROR,
5018 		    "tl_wput:T_CONN_REQ:out of state"));
5019 		tl_merror(wq, mp, EPROTO);
5020 		return;
5021 	}
5022 	/*
5023 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5024 	 * (state does not change on this event)
5025 	 */
5026 
5027 	/*
5028 	 * validate the message
5029 	 * Note: dereference fields in struct inside message only
5030 	 * after validating the message length.
5031 	 */
5032 	if (msz < sizeof (struct T_unitdata_req)) {
5033 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5034 		    "tl_unitdata:invalid message length"));
5035 		tl_merror(wq, mp, EINVAL);
5036 		return;
5037 	}
5038 	alen = udreq->DEST_length;
5039 	aoff = udreq->DEST_offset;
5040 	oldolen = olen = udreq->OPT_length;
5041 	ooff = udreq->OPT_offset;
5042 	if (olen == 0)
5043 		ooff = 0;
5044 
5045 	if (IS_SOCKET(tep)) {
5046 		if ((alen != TL_SOUX_ADDRLEN) ||
5047 		    (aoff < 0) ||
5048 		    (aoff + alen > msz) ||
5049 		    (olen < 0) || (ooff < 0) ||
5050 		    ((olen > 0) && ((ooff + olen) > msz))) {
5051 			(void) (STRLOG(TL_ID, tep->te_minor,
5052 			    1, SL_TRACE|SL_ERROR,
5053 			    "tl_unitdata_req: invalid socket addr "
5054 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5055 			    (int)msz, alen, aoff, olen, ooff));
5056 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5057 			return;
5058 		}
5059 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5060 
5061 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5062 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5063 			(void) (STRLOG(TL_ID, tep->te_minor,
5064 			    1, SL_TRACE|SL_ERROR,
5065 			    "tl_conn_req: invalid socket magic"));
5066 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5067 			return;
5068 		}
5069 	} else {
5070 		if ((alen < 0) ||
5071 		    (aoff < 0) ||
5072 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5073 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5074 		    ((aoff + alen) < 0) ||
5075 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5076 		    (olen < 0) ||
5077 		    (ooff < 0) ||
5078 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5079 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5080 				    SL_TRACE|SL_ERROR,
5081 				    "tl_unitdata:invalid unit data message"));
5082 			tl_merror(wq, mp, EINVAL);
5083 			return;
5084 		}
5085 	}
5086 
5087 	/* Options not supported unless it's a socket */
5088 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5089 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5090 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5091 		tl_uderr(wq, mp, EPROTO);
5092 		return;
5093 	}
5094 #ifdef DEBUG
5095 	/*
5096 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5097 	 * if (! assertion)
5098 	 *	log warning;
5099 	 */
5100 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5101 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5102 		    "tl_unitdata:addr overlaps TPI message"));
5103 	}
5104 #endif
5105 	/*
5106 	 * get destination endpoint
5107 	 */
5108 	destaddr.ta_alen = alen;
5109 	destaddr.ta_abuf = mp->b_rptr + aoff;
5110 	destaddr.ta_zoneid = tep->te_zoneid;
5111 
5112 	/*
5113 	 * Check whether the destination is the same that was used previously
5114 	 * and the destination endpoint is in the right state. If something is
5115 	 * wrong, find destination again and cache it.
5116 	 */
5117 	peer_tep = tep->te_lastep;
5118 
5119 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5120 	    (peer_tep->te_state != TS_IDLE) ||
5121 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5122 		/*
5123 		 * Not the same as cached destination , need to find the right
5124 		 * destination.
5125 		 */
5126 		peer_tep = (IS_SOCKET(tep) ?
5127 		    tl_sock_find_peer(tep, &ux_addr) :
5128 		    tl_find_peer(tep, &destaddr));
5129 
5130 		if (peer_tep == NULL) {
5131 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5132 			    SL_TRACE|SL_ERROR,
5133 			    "tl_unitdata:no one at destination address"));
5134 			tl_uderr(wq, mp, ECONNRESET);
5135 			return;
5136 		}
5137 
5138 		/*
5139 		 * Cache the new peer.
5140 		 */
5141 		if (tep->te_lastep != NULL)
5142 			tl_refrele(tep->te_lastep);
5143 
5144 		tep->te_lastep = peer_tep;
5145 	}
5146 
5147 	if (peer_tep->te_state != TS_IDLE) {
5148 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5149 		    "tl_unitdata:provider in invalid state"));
5150 		tl_uderr(wq, mp, EPROTO);
5151 		return;
5152 	}
5153 
5154 	ASSERT(peer_tep->te_rq != NULL);
5155 
5156 	/*
5157 	 * Put it back if flow controlled except when we are closing.
5158 	 * Note: Messages already on queue when we are closing is bounded
5159 	 * so we can ignore flow control.
5160 	 */
5161 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5162 		/* record what we are flow controlled on */
5163 		if (tep->te_flowq != NULL) {
5164 			list_remove(&tep->te_flowq->te_flowlist, tep);
5165 		}
5166 		list_insert_head(&peer_tep->te_flowlist, tep);
5167 		tep->te_flowq = peer_tep;
5168 		TL_PUTBQ(tep, mp);
5169 		return;
5170 	}
5171 	/*
5172 	 * prepare indication message
5173 	 */
5174 
5175 	/*
5176 	 * calculate length of message
5177 	 */
5178 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5179 		cr = msg_getcred(mp, &cpid);
5180 		ASSERT(cr != NULL);
5181 
5182 		if (peer_tep->te_flag & TL_SETCRED) {
5183 			ASSERT(olen == 0);
5184 			olen = (t_scalar_t)sizeof (struct opthdr) +
5185 			    OPTLEN(sizeof (tl_credopt_t));
5186 						/* 1 option only */
5187 		} else if (peer_tep->te_flag & TL_SETUCRED) {
5188 			ASSERT(olen == 0);
5189 			olen = (t_scalar_t)sizeof (struct opthdr) +
5190 			    OPTLEN(ucredminsize(cr));
5191 						/* 1 option only */
5192 		} else {
5193 			/* Possibly more than one option */
5194 			olen += (t_scalar_t)sizeof (struct T_opthdr) +
5195 			    OPTLEN(ucredminsize(cr));
5196 		}
5197 	}
5198 
5199 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5200 	    olen;
5201 	/*
5202 	 * If the unitdata_ind fits and we are not adding options
5203 	 * reuse the udreq mblk.
5204 	 */
5205 	if (msz >= ui_sz && alen >= tep->te_alen &&
5206 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5207 		/*
5208 		 * Reuse the original mblk. Leave options in place.
5209 		 */
5210 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5211 		udind->PRIM_type = T_UNITDATA_IND;
5212 		udind->SRC_length = tep->te_alen;
5213 		addr_startp = mp->b_rptr + udind->SRC_offset;
5214 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5215 	} else {
5216 		/* Allocate a new T_unidata_ind message */
5217 		mblk_t *ui_mp;
5218 
5219 		ui_mp = allocb(ui_sz, BPRI_MED);
5220 		if (! ui_mp) {
5221 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5222 			    "tl_unitdata:allocb failure:message queued"));
5223 			tl_memrecover(wq, mp, ui_sz);
5224 			return;
5225 		}
5226 
5227 		/*
5228 		 * fill in T_UNITDATA_IND contents
5229 		 */
5230 		DB_TYPE(ui_mp) = M_PROTO;
5231 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5232 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5233 		udind->PRIM_type = T_UNITDATA_IND;
5234 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5235 		udind->SRC_length = tep->te_alen;
5236 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5237 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5238 		udind->OPT_offset =
5239 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5240 		udind->OPT_length = olen;
5241 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5242 
5243 			if (oldolen != 0) {
5244 				bcopy((void *)((uintptr_t)udreq + ooff),
5245 				    (void *)((uintptr_t)udind +
5246 				    udind->OPT_offset),
5247 				    oldolen);
5248 			}
5249 			ASSERT(cr != NULL);
5250 
5251 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5252 			    oldolen, cr, cpid,
5253 			    peer_tep->te_flag, peer_tep->te_credp);
5254 		} else {
5255 			bcopy((void *)((uintptr_t)udreq + ooff),
5256 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5257 			    olen);
5258 		}
5259 
5260 		/*
5261 		 * relink data blocks from mp to ui_mp
5262 		 */
5263 		ui_mp->b_cont = mp->b_cont;
5264 		freeb(mp);
5265 		mp = ui_mp;
5266 	}
5267 	/*
5268 	 * send indication message
5269 	 */
5270 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5271 	putnext(peer_tep->te_rq, mp);
5272 }
5273 
5274 
5275 
5276 /*
5277  * Check if a given addr is in use.
5278  * Endpoint ptr returned or NULL if not found.
5279  * The name space is separate for each mode. This implies that
5280  * sockets get their own name space.
5281  */
5282 static tl_endpt_t *
5283 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5284 {
5285 	tl_endpt_t *peer_tep = NULL;
5286 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5287 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5288 
5289 	ASSERT(! IS_SOCKET(tep));
5290 
5291 	ASSERT(ap != NULL && ap->ta_alen > 0);
5292 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5293 	ASSERT(ap->ta_abuf != NULL);
5294 	EQUIV(rc == 0, peer_tep != NULL);
5295 	IMPLY(rc == 0,
5296 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5297 	    (tep->te_transport == peer_tep->te_transport));
5298 
5299 	if ((rc == 0) && (peer_tep->te_closing)) {
5300 		tl_refrele(peer_tep);
5301 		peer_tep = NULL;
5302 	}
5303 
5304 	return (peer_tep);
5305 }
5306 
5307 /*
5308  * Find peer for a socket based on unix domain address.
5309  * For implicit addresses our peer can be found by minor number in ai hash. For
5310  * explicit binds we look vnode address at addr_hash.
5311  */
5312 static tl_endpt_t *
5313 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5314 {
5315 	tl_endpt_t *peer_tep = NULL;
5316 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5317 	    tep->te_aihash : tep->te_addrhash;
5318 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5319 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5320 
5321 	ASSERT(IS_SOCKET(tep));
5322 	EQUIV(rc == 0, peer_tep != NULL);
5323 	IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5324 
5325 	if (peer_tep != NULL) {
5326 		/* Don't attempt to use closing peer. */
5327 		if (peer_tep->te_closing)
5328 			goto errout;
5329 
5330 		/*
5331 		 * Cross-zone unix sockets are permitted, but for Trusted
5332 		 * Extensions only, the "server" for these must be in the
5333 		 * global zone.
5334 		 */
5335 		if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5336 		    is_system_labeled() &&
5337 		    (peer_tep->te_zoneid != GLOBAL_ZONEID))
5338 			goto errout;
5339 	}
5340 
5341 	return (peer_tep);
5342 
5343 errout:
5344 	tl_refrele(peer_tep);
5345 	return (NULL);
5346 }
5347 
5348 /*
5349  * Generate a free addr and return it in struct pointed by ap
5350  * but allocating space for address buffer.
5351  * The generated address will be at least 4 bytes long and, if req->ta_alen
5352  * exceeds 4 bytes, be req->ta_alen bytes long.
5353  *
5354  * If address is found it will be inserted in the hash.
5355  *
5356  * If req->ta_alen is larger than the default alen (4 bytes) the last
5357  * alen-4 bytes will always be the same as in req.
5358  *
5359  * Return 0 for failure.
5360  * Return non-zero for success.
5361  */
5362 static boolean_t
5363 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5364 {
5365 	t_scalar_t	alen;
5366 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5367 
5368 	ASSERT(tep->te_hash_hndl != NULL);
5369 	ASSERT(! IS_SOCKET(tep));
5370 
5371 	if (tep->te_hash_hndl == NULL)
5372 		return (B_FALSE);
5373 
5374 	/*
5375 	 * check if default addr is in use
5376 	 * if it is - bump it and try again
5377 	 */
5378 	if (req == NULL) {
5379 		alen = sizeof (uint32_t);
5380 	} else {
5381 		alen = max(req->ta_alen, sizeof (uint32_t));
5382 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5383 	}
5384 
5385 	if (tep->te_alen < alen) {
5386 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5387 
5388 		/*
5389 		 * Not enough space in tep->ta_ap to hold the address,
5390 		 * allocate a bigger space.
5391 		 */
5392 		if (abuf == NULL)
5393 			return (B_FALSE);
5394 
5395 		if (tep->te_alen > 0)
5396 			kmem_free(tep->te_abuf, tep->te_alen);
5397 
5398 		tep->te_alen = alen;
5399 		tep->te_abuf = abuf;
5400 	}
5401 
5402 	/* Copy in the address in req */
5403 	if (req != NULL) {
5404 		ASSERT(alen >= req->ta_alen);
5405 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5406 	}
5407 
5408 	/*
5409 	 * First try minor number then try default addresses.
5410 	 */
5411 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5412 
5413 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5414 		if (mod_hash_insert_reserve(tep->te_addrhash,
5415 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5416 		    tep->te_hash_hndl) == 0) {
5417 			/*
5418 			 * found free address
5419 			 */
5420 			tep->te_flag |= TL_ADDRHASHED;
5421 			tep->te_hash_hndl = NULL;
5422 
5423 			return (B_TRUE); /* successful return */
5424 		}
5425 		/*
5426 		 * Use default address.
5427 		 */
5428 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5429 		atomic_inc_32(&tep->te_defaddr);
5430 	}
5431 
5432 	/*
5433 	 * Failed to find anything.
5434 	 */
5435 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5436 	    "tl_get_any_addr:looped 2^32 times"));
5437 	return (B_FALSE);
5438 }
5439 
5440 /*
5441  * reallocb + set r/w ptrs to reflect size.
5442  */
5443 static mblk_t *
5444 tl_resizemp(mblk_t *mp, ssize_t new_size)
5445 {
5446 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5447 		return (NULL);
5448 
5449 	mp->b_rptr = DB_BASE(mp);
5450 	mp->b_wptr = mp->b_rptr + new_size;
5451 	return (mp);
5452 }
5453 
5454 static void
5455 tl_cl_backenable(tl_endpt_t *tep)
5456 {
5457 	list_t *l = &tep->te_flowlist;
5458 	tl_endpt_t *elp;
5459 
5460 	ASSERT(IS_CLTS(tep));
5461 
5462 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5463 		ASSERT(tep->te_ser == elp->te_ser);
5464 		ASSERT(elp->te_flowq == tep);
5465 		if (! elp->te_closing)
5466 			TL_QENABLE(elp);
5467 		elp->te_flowq = NULL;
5468 		list_remove(l, elp);
5469 	}
5470 }
5471 
5472 /*
5473  * Unconnect endpoints.
5474  */
5475 static void
5476 tl_co_unconnect(tl_endpt_t *tep)
5477 {
5478 	tl_endpt_t	*peer_tep = tep->te_conp;
5479 	tl_endpt_t	*srv_tep = tep->te_oconp;
5480 	list_t		*l;
5481 	tl_icon_t  	*tip;
5482 	tl_endpt_t	*cl_tep;
5483 	mblk_t		*d_mp;
5484 
5485 	ASSERT(IS_COTS(tep));
5486 	/*
5487 	 * If our peer is closing, don't use it.
5488 	 */
5489 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5490 		TL_UNCONNECT(tep->te_conp);
5491 		peer_tep = NULL;
5492 	}
5493 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5494 		TL_UNCONNECT(tep->te_oconp);
5495 		srv_tep = NULL;
5496 	}
5497 
5498 	if (tep->te_nicon > 0) {
5499 		l = &tep->te_iconp;
5500 		/*
5501 		 * If incoming requests pending, change state
5502 		 * of clients on disconnect ind event and send
5503 		 * discon_ind pdu to modules above them
5504 		 * for server: all clients get disconnect
5505 		 */
5506 
5507 		while (tep->te_nicon > 0) {
5508 			tip    = list_head(l);
5509 			cl_tep = tip->ti_tep;
5510 
5511 			if (cl_tep == NULL) {
5512 				tl_freetip(tep, tip);
5513 				continue;
5514 			}
5515 
5516 			if (cl_tep->te_oconp != NULL) {
5517 				ASSERT(cl_tep != cl_tep->te_oconp);
5518 				TL_UNCONNECT(cl_tep->te_oconp);
5519 			}
5520 
5521 			if (cl_tep->te_closing) {
5522 				tl_freetip(tep, tip);
5523 				continue;
5524 			}
5525 
5526 			enableok(cl_tep->te_wq);
5527 			TL_QENABLE(cl_tep);
5528 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5529 			if (d_mp != NULL) {
5530 				cl_tep->te_state = TS_IDLE;
5531 				putnext(cl_tep->te_rq, d_mp);
5532 			} else {
5533 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5534 				    SL_TRACE|SL_ERROR,
5535 				    "tl_co_unconnect:icmng: "
5536 				    "allocb failure"));
5537 			}
5538 			tl_freetip(tep, tip);
5539 		}
5540 	} else if (srv_tep != NULL) {
5541 		/*
5542 		 * If outgoing request pending, change state
5543 		 * of server on discon ind event
5544 		 */
5545 
5546 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5547 		    IS_COTSORD(srv_tep) &&
5548 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5549 			/*
5550 			 * Queue ordrel_ind for server to be picked up
5551 			 * when the connection is accepted.
5552 			 */
5553 			d_mp = tl_ordrel_ind_alloc();
5554 		} else {
5555 			/*
5556 			 * send discon_ind to server
5557 			 */
5558 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5559 		}
5560 		if (d_mp == NULL) {
5561 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5562 			    SL_TRACE|SL_ERROR,
5563 			    "tl_co_unconnect:outgoing:allocb failure"));
5564 			TL_UNCONNECT(tep->te_oconp);
5565 			goto discon_peer;
5566 		}
5567 
5568 		/*
5569 		 * If this is a socket the T_DISCON_IND is queued with
5570 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5571 		 * from the list of pending connections.
5572 		 * Note that when te_oconp is set the peer better have
5573 		 * a t_connind_t for the client.
5574 		 */
5575 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5576 			/*
5577 			 * Queue the disconnection message.
5578 			 */
5579 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5580 		} else {
5581 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5582 			if (tip == NULL) {
5583 				freemsg(d_mp);
5584 			} else {
5585 				ASSERT(tep == tip->ti_tep);
5586 				ASSERT(tep->te_ser == srv_tep->te_ser);
5587 				/*
5588 				 * Delete tip from the server list.
5589 				 */
5590 				if (srv_tep->te_nicon == 1) {
5591 					srv_tep->te_state =
5592 					    NEXTSTATE(TE_DISCON_IND2,
5593 					    srv_tep->te_state);
5594 				} else {
5595 					srv_tep->te_state =
5596 					    NEXTSTATE(TE_DISCON_IND3,
5597 					    srv_tep->te_state);
5598 				}
5599 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5600 				    T_DISCON_IND);
5601 				putnext(srv_tep->te_rq, d_mp);
5602 				tl_freetip(srv_tep, tip);
5603 			}
5604 			TL_UNCONNECT(tep->te_oconp);
5605 			srv_tep = NULL;
5606 		}
5607 	} else if (peer_tep != NULL) {
5608 		/*
5609 		 * unconnect existing connection
5610 		 * If connected, change state of peer on
5611 		 * discon ind event and send discon ind pdu
5612 		 * to module above it
5613 		 */
5614 
5615 		ASSERT(tep->te_ser == peer_tep->te_ser);
5616 		if (IS_COTSORD(peer_tep) &&
5617 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5618 		    peer_tep->te_state == TS_DATA_XFER)) {
5619 			/*
5620 			 * send ordrel ind
5621 			 */
5622 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5623 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5624 			    peer_tep->te_state,
5625 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5626 			d_mp = tl_ordrel_ind_alloc();
5627 			if (! d_mp) {
5628 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5629 				    SL_TRACE|SL_ERROR,
5630 				    "tl_co_unconnect:connected:"
5631 				    "allocb failure"));
5632 				/*
5633 				 * Continue with cleaning up peer as
5634 				 * this side may go away with the close
5635 				 */
5636 				TL_QENABLE(peer_tep);
5637 				goto discon_peer;
5638 			}
5639 			peer_tep->te_state =
5640 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5641 
5642 			putnext(peer_tep->te_rq, d_mp);
5643 			/*
5644 			 * Handle flow control case.  This will generate
5645 			 * a t_discon_ind message with reason 0 if there
5646 			 * is data queued on the write side.
5647 			 */
5648 			TL_QENABLE(peer_tep);
5649 		} else if (IS_COTSORD(peer_tep) &&
5650 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5651 			/*
5652 			 * Sent an ordrel_ind. We send a discon with
5653 			 * with error 0 to inform that the peer is gone.
5654 			 */
5655 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5656 			    SL_TRACE|SL_ERROR,
5657 			    "tl_co_unconnect: discon in state %d",
5658 			    tep->te_state));
5659 			tl_discon_ind(peer_tep, 0);
5660 		} else {
5661 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5662 			    SL_TRACE|SL_ERROR,
5663 			    "tl_co_unconnect: state %d", tep->te_state));
5664 			tl_discon_ind(peer_tep, ECONNRESET);
5665 		}
5666 
5667 discon_peer:
5668 		/*
5669 		 * Disconnect cross-pointers only for close
5670 		 */
5671 		if (tep->te_closing) {
5672 			peer_tep = tep->te_conp;
5673 			TL_REMOVE_PEER(peer_tep->te_conp);
5674 			TL_REMOVE_PEER(tep->te_conp);
5675 		}
5676 	}
5677 }
5678 
5679 /*
5680  * Note: The following routine does not recover from allocb()
5681  * failures
5682  * The reason should be from the <sys/errno.h> space.
5683  */
5684 static void
5685 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5686 {
5687 	mblk_t *d_mp;
5688 
5689 	if (tep->te_closing)
5690 		return;
5691 
5692 	/*
5693 	 * flush the queues.
5694 	 */
5695 	flushq(tep->te_rq, FLUSHDATA);
5696 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5697 
5698 	/*
5699 	 * send discon ind
5700 	 */
5701 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5702 	if (! d_mp) {
5703 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5704 		    "tl_discon_ind:allocb failure"));
5705 		return;
5706 	}
5707 	tep->te_state = TS_IDLE;
5708 	putnext(tep->te_rq, d_mp);
5709 }
5710 
5711 /*
5712  * Note: The following routine does not recover from allocb()
5713  * failures
5714  * The reason should be from the <sys/errno.h> space.
5715  */
5716 static mblk_t *
5717 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5718 {
5719 	mblk_t *mp;
5720 	struct T_discon_ind *tdi;
5721 
5722 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5723 		DB_TYPE(mp) = M_PROTO;
5724 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5725 		tdi = (struct T_discon_ind *)mp->b_rptr;
5726 		tdi->PRIM_type = T_DISCON_IND;
5727 		tdi->DISCON_reason = reason;
5728 		tdi->SEQ_number = seqnum;
5729 	}
5730 	return (mp);
5731 }
5732 
5733 
5734 /*
5735  * Note: The following routine does not recover from allocb()
5736  * failures
5737  */
5738 static mblk_t *
5739 tl_ordrel_ind_alloc(void)
5740 {
5741 	mblk_t *mp;
5742 	struct T_ordrel_ind *toi;
5743 
5744 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5745 		DB_TYPE(mp) = M_PROTO;
5746 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5747 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5748 		toi->PRIM_type = T_ORDREL_IND;
5749 	}
5750 	return (mp);
5751 }
5752 
5753 
5754 /*
5755  * Lookup the seqno in the list of queued connections.
5756  */
5757 static tl_icon_t *
5758 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5759 {
5760 	list_t *l = &tep->te_iconp;
5761 	tl_icon_t *tip = list_head(l);
5762 
5763 	ASSERT(seqno != 0);
5764 
5765 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5766 		;
5767 
5768 	return (tip);
5769 }
5770 
5771 /*
5772  * Queue data for a given T_CONN_IND while verifying that redundant
5773  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5774  * Used when the originator of the connection closes.
5775  */
5776 static void
5777 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5778 {
5779 	tl_icon_t		*tip;
5780 	mblk_t			**mpp, *mp;
5781 	int			prim, nprim;
5782 
5783 	if (nmp->b_datap->db_type == M_PROTO)
5784 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5785 	else
5786 		nprim = -1;	/* M_DATA */
5787 
5788 	tip = tl_icon_find(tep, seqno);
5789 	if (tip == NULL) {
5790 		freemsg(nmp);
5791 		return;
5792 	}
5793 
5794 	ASSERT(tip->ti_seqno != 0);
5795 	mpp = &tip->ti_mp;
5796 	while (*mpp != NULL) {
5797 		mp = *mpp;
5798 
5799 		if (mp->b_datap->db_type == M_PROTO)
5800 			prim = ((union T_primitives *)mp->b_rptr)->type;
5801 		else
5802 			prim = -1;	/* M_DATA */
5803 
5804 		/*
5805 		 * Allow nothing after a T_DISCON_IND
5806 		 */
5807 		if (prim == T_DISCON_IND) {
5808 			freemsg(nmp);
5809 			return;
5810 		}
5811 		/*
5812 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5813 		 */
5814 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5815 			freemsg(nmp);
5816 			return;
5817 		}
5818 		mpp = &(mp->b_next);
5819 	}
5820 	*mpp = nmp;
5821 }
5822 
5823 /*
5824  * Verify if a certain TPI primitive exists on the connind queue.
5825  * Use prim -1 for M_DATA.
5826  * Return non-zero if found.
5827  */
5828 static boolean_t
5829 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5830 {
5831 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5832 	boolean_t found = B_FALSE;
5833 
5834 	if (tip != NULL) {
5835 		mblk_t *mp;
5836 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5837 			found = (DB_TYPE(mp) == M_PROTO &&
5838 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5839 		}
5840 	}
5841 	return (found);
5842 }
5843 
5844 /*
5845  * Send the b_next mblk chain that has accumulated before the connection
5846  * was accepted. Perform the necessary state transitions.
5847  */
5848 static void
5849 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5850 {
5851 	mblk_t			*mp;
5852 	union T_primitives	*primp;
5853 
5854 	if (tep->te_closing) {
5855 		tl_icon_freemsgs(mpp);
5856 		return;
5857 	}
5858 
5859 	ASSERT(tep->te_state == TS_DATA_XFER);
5860 	ASSERT(tep->te_rq->q_first == NULL);
5861 
5862 	while ((mp = *mpp) != NULL) {
5863 		*mpp = mp->b_next;
5864 		mp->b_next = NULL;
5865 
5866 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5867 		switch (DB_TYPE(mp)) {
5868 		default:
5869 			freemsg(mp);
5870 			break;
5871 		case M_DATA:
5872 			putnext(tep->te_rq, mp);
5873 			break;
5874 		case M_PROTO:
5875 			primp = (union T_primitives *)mp->b_rptr;
5876 			switch (primp->type) {
5877 			case T_UNITDATA_IND:
5878 			case T_DATA_IND:
5879 			case T_OPTDATA_IND:
5880 			case T_EXDATA_IND:
5881 				putnext(tep->te_rq, mp);
5882 				break;
5883 			case T_ORDREL_IND:
5884 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5885 				    tep->te_state);
5886 				putnext(tep->te_rq, mp);
5887 				break;
5888 			case T_DISCON_IND:
5889 				tep->te_state = TS_IDLE;
5890 				putnext(tep->te_rq, mp);
5891 				break;
5892 			default:
5893 #ifdef DEBUG
5894 				cmn_err(CE_PANIC,
5895 				    "tl_icon_sendmsgs: unknown primitive");
5896 #endif /* DEBUG */
5897 				freemsg(mp);
5898 				break;
5899 			}
5900 			break;
5901 		}
5902 	}
5903 }
5904 
5905 /*
5906  * Free the b_next mblk chain that has accumulated before the connection
5907  * was accepted.
5908  */
5909 static void
5910 tl_icon_freemsgs(mblk_t **mpp)
5911 {
5912 	mblk_t *mp;
5913 
5914 	while ((mp = *mpp) != NULL) {
5915 		*mpp = mp->b_next;
5916 		mp->b_next = NULL;
5917 		freemsg(mp);
5918 	}
5919 }
5920 
5921 /*
5922  * Send M_ERROR
5923  * Note: assumes caller ensured enough space in mp or enough
5924  *	memory available. Does not attempt recovery from allocb()
5925  *	failures
5926  */
5927 
5928 static void
5929 tl_merror(queue_t *wq, mblk_t *mp, int error)
5930 {
5931 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5932 
5933 	if (tep->te_closing) {
5934 		freemsg(mp);
5935 		return;
5936 	}
5937 
5938 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5939 	    SL_TRACE|SL_ERROR,
5940 	    "tl_merror: tep=%p, err=%d", (void *)tep, error));
5941 
5942 	/*
5943 	 * flush all messages on queue. we are shutting
5944 	 * the stream down on fatal error
5945 	 */
5946 	flushq(wq, FLUSHALL);
5947 	if (IS_COTS(tep)) {
5948 		/* connection oriented - unconnect endpoints */
5949 		tl_co_unconnect(tep);
5950 	}
5951 	if (mp->b_cont) {
5952 		freemsg(mp->b_cont);
5953 		mp->b_cont = NULL;
5954 	}
5955 
5956 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5957 		freemsg(mp);
5958 		mp = allocb(1, BPRI_HI);
5959 		if (!mp) {
5960 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5961 			    SL_TRACE|SL_ERROR,
5962 			    "tl_merror:M_PROTO: out of memory"));
5963 			return;
5964 		}
5965 	}
5966 	if (mp) {
5967 		DB_TYPE(mp) = M_ERROR;
5968 		mp->b_rptr = DB_BASE(mp);
5969 		*mp->b_rptr = (char)error;
5970 		mp->b_wptr = mp->b_rptr + sizeof (char);
5971 		qreply(wq, mp);
5972 	} else {
5973 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
5974 	}
5975 }
5976 
5977 static void
5978 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5979 {
5980 	ASSERT(cr != NULL);
5981 
5982 	if (flag & TL_SETCRED) {
5983 		struct opthdr *opt = (struct opthdr *)buf;
5984 		tl_credopt_t *tlcred;
5985 
5986 		opt->level = TL_PROT_LEVEL;
5987 		opt->name = TL_OPT_PEER_CRED;
5988 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5989 
5990 		tlcred = (tl_credopt_t *)(opt + 1);
5991 		tlcred->tc_uid = crgetuid(cr);
5992 		tlcred->tc_gid = crgetgid(cr);
5993 		tlcred->tc_ruid = crgetruid(cr);
5994 		tlcred->tc_rgid = crgetrgid(cr);
5995 		tlcred->tc_suid = crgetsuid(cr);
5996 		tlcred->tc_sgid = crgetsgid(cr);
5997 		tlcred->tc_ngroups = crgetngroups(cr);
5998 	} else if (flag & TL_SETUCRED) {
5999 		struct opthdr *opt = (struct opthdr *)buf;
6000 
6001 		opt->level = TL_PROT_LEVEL;
6002 		opt->name = TL_OPT_PEER_UCRED;
6003 		opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6004 
6005 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6006 	} else {
6007 		struct T_opthdr *topt = (struct T_opthdr *)buf;
6008 		ASSERT(flag & TL_SOCKUCRED);
6009 
6010 		topt->level = SOL_SOCKET;
6011 		topt->name = SCM_UCRED;
6012 		topt->len = ucredminsize(cr) + sizeof (*topt);
6013 		topt->status = 0;
6014 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6015 	}
6016 }
6017 
6018 /* ARGSUSED */
6019 static int
6020 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6021 {
6022 	/* no default value processed in protocol specific code currently */
6023 	return (-1);
6024 }
6025 
6026 /* ARGSUSED */
6027 static int
6028 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6029 {
6030 	int len;
6031 	tl_endpt_t *tep;
6032 	int *valp;
6033 
6034 	tep = (tl_endpt_t *)wq->q_ptr;
6035 
6036 	len = 0;
6037 
6038 	/*
6039 	 * Assumes: option level and name sanity check done elsewhere
6040 	 */
6041 
6042 	switch (level) {
6043 	case SOL_SOCKET:
6044 		if (! IS_SOCKET(tep))
6045 			break;
6046 		switch (name) {
6047 		case SO_RECVUCRED:
6048 			len = sizeof (int);
6049 			valp = (int *)ptr;
6050 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6051 			break;
6052 		default:
6053 			break;
6054 		}
6055 		break;
6056 	case TL_PROT_LEVEL:
6057 		switch (name) {
6058 		case TL_OPT_PEER_CRED:
6059 		case TL_OPT_PEER_UCRED:
6060 			/*
6061 			 * option not supposed to retrieved directly
6062 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6063 			 * when some internal flags set by other options
6064 			 * Direct retrieval always designed to fail(ignored)
6065 			 * for this option.
6066 			 */
6067 			break;
6068 		}
6069 	}
6070 	return (len);
6071 }
6072 
6073 /* ARGSUSED */
6074 static int
6075 tl_set_opt(
6076 	queue_t		*wq,
6077 	uint_t		mgmt_flags,
6078 	int		level,
6079 	int		name,
6080 	uint_t		inlen,
6081 	uchar_t		*invalp,
6082 	uint_t		*outlenp,
6083 	uchar_t		*outvalp,
6084 	void		*thisdg_attrs,
6085 	cred_t		*cr)
6086 {
6087 	int error;
6088 	tl_endpt_t *tep;
6089 
6090 	tep = (tl_endpt_t *)wq->q_ptr;
6091 
6092 	error = 0;		/* NOERROR */
6093 
6094 	/*
6095 	 * Assumes: option level and name sanity checks done elsewhere
6096 	 */
6097 
6098 	switch (level) {
6099 	case SOL_SOCKET:
6100 		if (! IS_SOCKET(tep)) {
6101 			error = EINVAL;
6102 			break;
6103 		}
6104 		/*
6105 		 * TBD: fill in other AF_UNIX socket options and then stop
6106 		 * returning error.
6107 		 */
6108 		switch (name) {
6109 		case SO_RECVUCRED:
6110 			/*
6111 			 * We only support this for datagram sockets;
6112 			 * getpeerucred handles the connection oriented
6113 			 * transports.
6114 			 */
6115 			if (! IS_CLTS(tep)) {
6116 				error = EINVAL;
6117 				break;
6118 			}
6119 			if (*(int *)invalp == 0)
6120 				tep->te_flag &= ~TL_SOCKUCRED;
6121 			else
6122 				tep->te_flag |= TL_SOCKUCRED;
6123 			break;
6124 		default:
6125 			error = EINVAL;
6126 			break;
6127 		}
6128 		break;
6129 	case TL_PROT_LEVEL:
6130 		switch (name) {
6131 		case TL_OPT_PEER_CRED:
6132 		case TL_OPT_PEER_UCRED:
6133 			/*
6134 			 * option not supposed to be set directly
6135 			 * Its value in initialized for each endpoint at
6136 			 * driver open time.
6137 			 * Direct setting always designed to fail for this
6138 			 * option.
6139 			 */
6140 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6141 			    SL_TRACE|SL_ERROR,
6142 			    "tl_set_opt: option is not supported"));
6143 			error = EPROTO;
6144 			break;
6145 		}
6146 	}
6147 	return (error);
6148 }
6149 
6150 
6151 static void
6152 tl_timer(void *arg)
6153 {
6154 	queue_t *wq = arg;
6155 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6156 
6157 	ASSERT(tep);
6158 
6159 	tep->te_timoutid = 0;
6160 
6161 	enableok(wq);
6162 	/*
6163 	 * Note: can call wsrv directly here and save context switch
6164 	 * Consider change when qtimeout (not timeout) is active
6165 	 */
6166 	qenable(wq);
6167 }
6168 
6169 static void
6170 tl_buffer(void *arg)
6171 {
6172 	queue_t *wq = arg;
6173 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6174 
6175 	ASSERT(tep);
6176 
6177 	tep->te_bufcid = 0;
6178 	tep->te_nowsrv = B_FALSE;
6179 
6180 	enableok(wq);
6181 	/*
6182 	 *  Note: can call wsrv directly here and save context switch
6183 	 * Consider change when qbufcall (not bufcall) is active
6184 	 */
6185 	qenable(wq);
6186 }
6187 
6188 static void
6189 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6190 {
6191 	tl_endpt_t *tep;
6192 
6193 	tep = (tl_endpt_t *)wq->q_ptr;
6194 
6195 	if (tep->te_closing) {
6196 		freemsg(mp);
6197 		return;
6198 	}
6199 	noenable(wq);
6200 
6201 	(void) insq(wq, wq->q_first, mp);
6202 
6203 	if (tep->te_bufcid || tep->te_timoutid) {
6204 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6205 		    "tl_memrecover:recover %p pending", (void *)wq));
6206 		return;
6207 	}
6208 
6209 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6210 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6211 		    drv_usectohz(TL_BUFWAIT));
6212 	}
6213 }
6214 
6215 static void
6216 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6217 {
6218 	ASSERT(tip->ti_seqno != 0);
6219 
6220 	if (tip->ti_mp != NULL) {
6221 		tl_icon_freemsgs(&tip->ti_mp);
6222 		tip->ti_mp = NULL;
6223 	}
6224 	if (tip->ti_tep != NULL) {
6225 		tl_refrele(tip->ti_tep);
6226 		tip->ti_tep = NULL;
6227 	}
6228 	list_remove(&tep->te_iconp, tip);
6229 	kmem_free(tip, sizeof (tl_icon_t));
6230 	tep->te_nicon--;
6231 }
6232 
6233 /*
6234  * Remove address from address hash.
6235  */
6236 static void
6237 tl_addr_unbind(tl_endpt_t *tep)
6238 {
6239 	tl_endpt_t *elp;
6240 
6241 	if (tep->te_flag & TL_ADDRHASHED) {
6242 		if (IS_SOCKET(tep)) {
6243 			(void) mod_hash_remove(tep->te_addrhash,
6244 			    (mod_hash_key_t)tep->te_vp,
6245 			    (mod_hash_val_t *)&elp);
6246 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6247 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6248 		} else {
6249 			(void) mod_hash_remove(tep->te_addrhash,
6250 			    (mod_hash_key_t)&tep->te_ap,
6251 			    (mod_hash_val_t *)&elp);
6252 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6253 			tep->te_alen = -1;
6254 			tep->te_abuf = NULL;
6255 		}
6256 		tep->te_flag &= ~TL_ADDRHASHED;
6257 	}
6258 }
6259