xref: /titanic_41/usr/src/uts/common/io/tl.c (revision a970c705050f9e90c4b6d7982a9b3211719353fb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
27  */
28 
29 /*
30  * Multithreaded STREAMS Local Transport Provider.
31  *
32  * OVERVIEW
33  * ========
34  *
35  * This driver provides TLI as well as socket semantics.  It provides
36  * connectionless, connection oriented, and connection oriented with orderly
37  * release transports for TLI and sockets. Each transport type has separate name
38  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
39  * this removes any name space conflicts when binding to socket style transport
40  * addresses.
41  *
42  * NOTE: There is one exception: Socket ticots and ticotsord transports share
43  * the same namespace. In fact, sockets always use ticotsord type transport.
44  *
45  * The driver mode is specified during open() by the minor number used for
46  * open.
47  *
48  *  The sockets in addition have the following semantic differences:
49  *  No support for passing up credentials (TL_SET[U]CRED).
50  *
51  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
52  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
53  *	T_OPTDATA_IND.
54  *
55  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
56  *	a T_CONN_RES is received from the acceptor. This means that a socket
57  *	connect will complete before the peer has called accept.
58  *
59  *
60  * MULTITHREADING
61  * ==============
62  *
63  * The driver does not use STREAMS protection mechanisms. Instead it uses a
64  * generic "serializer" abstraction. Most of the operations are executed behind
65  * the serializer and are, essentially single-threaded. All functions executed
66  * behind the same serializer are strictly serialized. So if one thread calls
67  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
68  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
69  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
70  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
71  * same time.
72  *
73  * Connectionless transport use a single serializer per transport type (one for
74  * TLI and one for sockets. Connection-oriented transports use finer-grained
75  * serializers.
76  *
77  * All COTS-type endpoints start their life with private serializers. During
78  * connection request processing the endpoint serializer is switched to the
79  * listener's serializer and the rest of T_CONN_REQ processing is done on the
80  * listener serializer. During T_CONN_RES processing the eager serializer is
81  * switched from listener to acceptor serializer and after that point all
82  * processing for eager and acceptor happens on this serializer. To avoid races
83  * with endpoint closes while its serializer may be changing closes are blocked
84  * while serializers are manipulated.
85  *
86  * References accounting
87  * ---------------------
88  *
89  * Endpoints are reference counted and freed when the last reference is
90  * dropped. Functions within the serializer may access an endpoint state even
91  * after an endpoint closed. The te_closing being set on the endpoint indicates
92  * that the endpoint entered its close routine.
93  *
94  * One reference is held for each opened endpoint instance. The reference
95  * counter is incremented when the endpoint is linked to another endpoint and
96  * decremented when the link disappears. It is also incremented when the
97  * endpoint is found by the hash table lookup. This increment is atomic with the
98  * lookup itself and happens while the hash table read lock is held.
99  *
100  * Close synchronization
101  * ---------------------
102  *
103  * During close the endpoint as marked as closing using te_closing flag. It is
104  * usually enough to check for te_closing flag since all other state changes
105  * happen after this flag is set and the close entered serializer. Immediately
106  * after setting te_closing flag tl_close() enters serializer and waits until
107  * the callback finishes. This allows all functions called within serializer to
108  * simply check te_closing without any locks.
109  *
110  * Serializer management.
111  * ---------------------
112  *
113  * For COTS transports serializers are created when the endpoint is constructed
114  * and destroyed when the endpoint is destructed. CLTS transports use global
115  * serializers - one for sockets and one for TLI.
116  *
117  * COTS serializers have separate reference counts to deal with several
118  * endpoints sharing the same serializer. There is a subtle problem related to
119  * the serializer destruction. The serializer should never be destroyed by any
120  * function executed inside serializer. This means that close has to wait till
121  * all serializer activity for this endpoint is finished before it can drop the
122  * last reference on the endpoint (which may as well free the serializer).  This
123  * is only relevant for COTS transports which manage serializers
124  * dynamically. For CLTS transports close may complete without waiting for all
125  * serializer activity to finish since serializer is only destroyed at driver
126  * detach time.
127  *
128  * COTS endpoints keep track of the number of outstanding requests on the
129  * serializer for the endpoint. The code handling accept() avoids changing
130  * client serializer if it has any pending messages on the serializer and
131  * instead moves acceptor to listener's serializer.
132  *
133  *
134  * Use of hash tables
135  * ------------------
136  *
137  * The driver uses modhash hash table implementation. Each transport uses two
138  * hash tables - one for finding endpoints by acceptor ID and another one for
139  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
140  * pair of hash tables since sockets only use TICOTSORD.
141  *
142  * All hash tables lookups increment a reference count for returned endpoints,
143  * so we may safely check the endpoint state even when the endpoint is removed
144  * from the hash by another thread immediately after it is found.
145  *
146  *
147  * CLOSE processing
148  * ================
149  *
150  * The driver enters serializer twice on close(). The close sequence is the
151  * following:
152  *
153  * 1) Wait until closing is safe (te_closewait becomes zero)
154  *	This step is needed to prevent close during serializer switches. In most
155  *	cases (close happening after connection establishment) te_closewait is
156  *	zero.
157  * 1) Set te_closing.
158  * 2) Call tl_close_ser() within serializer and wait for it to complete.
159  *
160  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
161  *	It also needs to clear write-side q_next pointers - this should be done
162  *	before qprocsoff().
163  *
164  *    This synchronous serializer entry during close is needed to ensure that
165  *    the queue is valid everywhere inside the serializer.
166  *
167  *    Note that in many cases close will execute tl_close_ser() synchronously,
168  *    so it will not wait at all.
169  *
170  * 3) Calls qprocsoff().
171  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
172  *	complete (for COTS transports). For CLTS transport there is no wait.
173  *
174  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
175  *	close if there is any.
176  *
177  *    Note that in most cases close will enter te_close_ser_finish()
178  *    synchronously and will not wait at all.
179  *
180  *
181  * Flow Control
182  * ============
183  *
184  * The driver implements both read and write side service routines. No one calls
185  * putq() on the read queue. The read side service routine tl_rsrv() is called
186  * when the read side stream is back-enabled. It enters serializer synchronously
187  * (waits till serializer processing is complete). Within serializer it
188  * back-enables all endpoints blocked by the queue for connection-less
189  * transports and enables write side service processing for the peer for
190  * connection-oriented transports.
191  *
192  * Read and write side service routines use special mblk_sized space in the
193  * endpoint structure to enter perimeter.
194  *
195  * Write-side flow control
196  * -----------------------
197  *
198  * Write side flow control is a bit tricky. The driver needs to deal with two
199  * message queues - the explicit STREAMS message queue maintained by
200  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
201  * queues should be synchronized to preserve message ordering and should
202  * maintain a single order determined by the order in which messages enter
203  * tl_wput(). In order to maintain the ordering between these two queues the
204  * STREAMS queue is only manipulated within the serializer, so the ordering is
205  * provided by the serializer.
206  *
207  * Functions called from the tl_wsrv() sometimes may call putbq(). To
208  * immediately stop any further processing of the STREAMS message queues the
209  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
210  * side service processing stops when the flag is set.
211  *
212  * The tl_wsrv() function enters serializer synchronously and waits for it to
213  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
214  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
215  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
216  * always bounded by the amount of messages on the STREAMS queue at the time
217  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
218  * queue from another serialized entry which can't happen in parallel. This
219  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
220  * of it draining forever while writer places new messages on the STREAMS
221  * queue).
222  *
223  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
224  *
225  *
226  * Unix Domain Sockets
227  * ===================
228  *
229  * The driver knows the structure of Unix Domain sockets addresses and treats
230  * them differently from generic TLI addresses. For sockets implicit binds are
231  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
232  * instead of using address length of zero. Explicit binds specify
233  * SOU_MAGIC_EXPLICIT as magic.
234  *
235  * For implicit binds we always use minor number as soua_vp part of the address
236  * and avoid any hash table lookups. This saves two hash tables lookups per
237  * anonymous bind.
238  *
239  * For explicit address we hash the vnode pointer instead of hashing the
240  * full-scale address+zone+length. Hashing by pointer is more efficient then
241  * hashing by the full address.
242  *
243  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
244  * tep structure, so it should be never freed.
245  *
246  * Also for sockets the driver always uses minor number as acceptor id.
247  *
248  * TPI VIOLATIONS
249  * --------------
250  *
251  * This driver violates TPI in several respects for Unix Domain Sockets:
252  *
253  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
254  *	is requested and the endpoint is already in use. There is no point in
255  *	generating an unused address since this address will be rejected by
256  *	sockfs anyway. For implicit binds it always generates a new address
257  *	(sets soua_vp to its minor number).
258  *
259  * 2) It always uses minor number as acceptor ID and never uses queue
260  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
261  *	message and they do not use the queue pointer.
262  *
263  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
264  *	followed by listen(). The listen() should be issued with non-zero
265  *	backlog, so sotpi_listen() issues unbind request followed by bind
266  *	request to the same address but with a non-zero qlen value. Both
267  *	tl_bind() and tl_unbind() require write lock on the hash table to
268  *	insert/remove the address. The driver does not remove the address from
269  *	the hash for endpoints that are bound to the explicit address and have
270  *	backlog of zero. During T_BIND_REQ processing if the address requested
271  *	is equal to the address the endpoint already has it updates the backlog
272  *	without reinserting the address in the hash table. This optimization
273  *	avoids two hash table updates for each listener created. It always
274  *	avoids the problem of a "stolen" address when another listener may use
275  *	the same address between the unbind and bind and suddenly listen() fails
276  *	because address is in use even though the bind() succeeded.
277  *
278  *
279  * CONNECTIONLESS TRANSPORTS
280  * =========================
281  *
282  * Connectionless transports all share the same serializer (one for TLI and one
283  * for Sockets). Functions executing behind serializer can check or modify state
284  * of any endpoint.
285  *
286  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
287  * te_lastep field. The next time X talks to some address A it checks whether A
288  * is the same as Y's address and if it is there is no need to lookup Y. If the
289  * address is different or the state of Y is not appropriate (e.g. closed or not
290  * idle) X does a lookup using tl_find_peer() and caches the new address.
291  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
292  * on the endpoint found.
293  *
294  * During close of endpoint Y it doesn't try to remove itself from other
295  * endpoints caches. They will detect that Y is gone and will search the peer
296  * endpoint again.
297  *
298  * Flow Control Handling.
299  * ----------------------
300  *
301  * Each connectionless endpoint keeps a list of endpoints which are
302  * flow-controlled by its queue. It also keeps a pointer to the queue which
303  * flow-controls itself.  Whenever flow control releases for endpoint X it
304  * enables all queues from the list. During close it also back-enables everyone
305  * in the list. If X is flow-controlled when it is closing it removes it from
306  * the peers list.
307  *
308  * DATA STRUCTURES
309  * ===============
310  *
311  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
312  * endpoint state. For connection-oriented transports it has a keeps a list
313  * of pending connections (tl_icon_t). For connectionless transports it keeps a
314  * list of endpoints flow controlled by this one.
315  *
316  * Each transport type is represented by a per-transport data structure
317  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
318  * endpoint address hash tables for each transport. It also contains pointer to
319  * transport serializer for connectionless transports.
320  *
321  * Each endpoint keeps a link to its transport structure, so the code can find
322  * all per-transport information quickly.
323  */
324 
325 #include	<sys/types.h>
326 #include	<sys/inttypes.h>
327 #include	<sys/stream.h>
328 #include	<sys/stropts.h>
329 #define	_SUN_TPI_VERSION 2
330 #include	<sys/tihdr.h>
331 #include	<sys/strlog.h>
332 #include	<sys/debug.h>
333 #include	<sys/cred.h>
334 #include	<sys/errno.h>
335 #include	<sys/kmem.h>
336 #include	<sys/id_space.h>
337 #include	<sys/modhash.h>
338 #include	<sys/mkdev.h>
339 #include	<sys/tl.h>
340 #include	<sys/stat.h>
341 #include	<sys/conf.h>
342 #include	<sys/modctl.h>
343 #include	<sys/strsun.h>
344 #include	<sys/socket.h>
345 #include	<sys/socketvar.h>
346 #include	<sys/sysmacros.h>
347 #include	<sys/xti_xtiopt.h>
348 #include	<sys/ddi.h>
349 #include	<sys/sunddi.h>
350 #include	<sys/zone.h>
351 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
352 #include	<inet/optcom.h>
353 #include	<sys/strsubr.h>
354 #include	<sys/ucred.h>
355 #include	<sys/suntpi.h>
356 #include	<sys/list.h>
357 #include	<sys/serializer.h>
358 
359 /*
360  * TBD List
361  * 14 Eliminate state changes through table
362  * 16. AF_UNIX socket options
363  * 17. connect() for ticlts
364  * 18. support for "netstat" to show AF_UNIX plus TLI local
365  *	transport connections
366  * 21. sanity check to flushing on sending M_ERROR
367  */
368 
369 /*
370  * CONSTANT DECLARATIONS
371  * --------------------
372  */
373 
374 /*
375  * Local declarations
376  */
377 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
378 
379 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
380 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
381 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
382 /*
383  * Hash tables size.
384  */
385 #define	TL_HASH_SIZE 311
386 
387 /*
388  * Definitions for module_info
389  */
390 #define		TL_ID		(104)		/* module ID number */
391 #define		TL_NAME		"tl"		/* module name */
392 #define		TL_MINPSZ	(0)		/* min packet size */
393 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
394 #define		TL_HIWAT	(16*1024)	/* hi water mark */
395 #define		TL_LOWAT	(256)		/* lo water mark */
396 /*
397  * Definition of minor numbers/modes for new transport provider modes.
398  * We view the socket use as a separate mode to get a separate name space.
399  */
400 #define		TL_TICOTS	0	/* connection oriented transport */
401 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
402 #define		TL_TICLTS 	2	/* connectionless transport */
403 #define		TL_UNUSED	3
404 #define		TL_SOCKET	4	/* Socket */
405 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
406 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
407 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
408 
409 #define		TL_MINOR_MASK	0x7
410 #define		TL_MINOR_START	(TL_TICLTS + 1)
411 
412 /*
413  * LOCAL MACROS
414  */
415 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
416 
417 /*
418  * EXTERNAL VARIABLE DECLARATIONS
419  * -----------------------------
420  */
421 /*
422  * state table defined in the OS space.c
423  */
424 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
425 
426 /*
427  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
428  */
429 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
430 static int tl_close(queue_t *, int, cred_t *);
431 static void tl_wput(queue_t *, mblk_t *);
432 static void tl_wsrv(queue_t *);
433 static void tl_rsrv(queue_t *);
434 
435 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
436 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
437 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
438 
439 
440 /*
441  * GLOBAL DATA STRUCTURES AND VARIABLES
442  * -----------------------------------
443  */
444 
445 /*
446  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
447  * For now, we only manage the SO_RECVUCRED option but we also have
448  * harmless dummy options to make things work with some common code we access.
449  */
450 opdes_t	tl_opt_arr[] = {
451 	/* The SO_TYPE is needed for the hack below */
452 	{
453 		SO_TYPE,
454 		SOL_SOCKET,
455 		OA_R,
456 		OA_R,
457 		OP_NP,
458 		0,
459 		sizeof (t_scalar_t),
460 		0
461 	},
462 	{
463 		SO_RECVUCRED,
464 		SOL_SOCKET,
465 		OA_RW,
466 		OA_RW,
467 		OP_NP,
468 		0,
469 		sizeof (int),
470 		0
471 	}
472 };
473 
474 /*
475  * Table of all supported levels
476  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
477  * any supported options so we need this info separately.
478  *
479  * This is needed only for topmost tpi providers.
480  */
481 optlevel_t	tl_valid_levels_arr[] = {
482 	XTI_GENERIC,
483 	SOL_SOCKET,
484 	TL_PROT_LEVEL
485 };
486 
487 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
488 /*
489  * Current upper bound on the amount of space needed to return all options.
490  * Additional options with data size of sizeof(long) are handled automatically.
491  * Others need hand job.
492  */
493 #define	TL_MAX_OPT_BUF_LEN						\
494 		((A_CNT(tl_opt_arr) << 2) +				\
495 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
496 		+ 64 + sizeof (struct T_optmgmt_ack))
497 
498 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
499 
500 /*
501  *	transport addr structure
502  */
503 typedef struct tl_addr {
504 	zoneid_t	ta_zoneid;		/* Zone scope of address */
505 	t_scalar_t	ta_alen;		/* length of abuf */
506 	void		*ta_abuf;		/* the addr itself */
507 } tl_addr_t;
508 
509 /*
510  * Refcounted version of serializer.
511  */
512 typedef struct tl_serializer {
513 	uint_t		ts_refcnt;
514 	serializer_t	*ts_serializer;
515 } tl_serializer_t;
516 
517 /*
518  * Each transport type has a separate state.
519  * Per-transport state.
520  */
521 typedef struct tl_transport_state {
522 	char		*tr_name;
523 	minor_t		tr_minor;
524 	uint32_t	tr_defaddr;
525 	mod_hash_t	*tr_ai_hash;
526 	mod_hash_t	*tr_addr_hash;
527 	tl_serializer_t	*tr_serializer;
528 } tl_transport_state_t;
529 
530 #define	TL_DFADDR 0x1000
531 
532 static tl_transport_state_t tl_transports[] = {
533 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
534 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
535 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
536 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
537 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
538 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
539 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
540 };
541 
542 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
543 
544 struct tl_endpt;
545 typedef struct tl_endpt tl_endpt_t;
546 
547 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
548 
549 /*
550  * Data structure used to represent pending connects.
551  * Records enough information so that the connecting peer can close
552  * before the connection gets accepted.
553  */
554 typedef struct tl_icon {
555 	list_node_t	ti_node;
556 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
557 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
558 	t_scalar_t	ti_seqno;	/* Sequence number */
559 } tl_icon_t;
560 
561 typedef struct so_ux_addr soux_addr_t;
562 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
563 
564 /*
565  * Maximum number of unaccepted connection indications allowed per listener.
566  */
567 #define	TL_MAXQLEN	4096
568 int tl_maxqlen = TL_MAXQLEN;
569 
570 /*
571  *	transport endpoint structure
572  */
573 struct tl_endpt {
574 	queue_t		*te_rq;		/* stream read queue */
575 	queue_t		*te_wq;		/* stream write queue */
576 	uint32_t	te_refcnt;
577 	int32_t 	te_state;	/* TPI state of endpoint */
578 	minor_t		te_minor;	/* minor number */
579 #define	te_seqno	te_minor
580 	uint_t		te_flag;	/* flag field */
581 	boolean_t	te_nowsrv;
582 	tl_serializer_t	*te_ser;	/* Serializer to use */
583 #define	te_serializer	te_ser->ts_serializer
584 
585 	soux_addr_t	te_uxaddr;	/* Socket address */
586 #define	te_magic	te_uxaddr.soua_magic
587 #define	te_vp		te_uxaddr.soua_vp
588 	tl_addr_t	te_ap;		/* addr bound to this endpt */
589 #define	te_zoneid te_ap.ta_zoneid
590 #define	te_alen	te_ap.ta_alen
591 #define	te_abuf	te_ap.ta_abuf
592 
593 	tl_transport_state_t *te_transport;
594 #define	te_addrhash	te_transport->tr_addr_hash
595 #define	te_aihash	te_transport->tr_ai_hash
596 #define	te_defaddr	te_transport->tr_defaddr
597 	cred_t		*te_credp;	/* endpoint user credentials */
598 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
599 
600 	/*
601 	 * State specific for connection-oriented and connectionless transports.
602 	 */
603 	union {
604 		/* Connection-oriented state. */
605 		struct {
606 			t_uscalar_t _te_nicon;	/* count of conn requests */
607 			t_uscalar_t _te_qlen;	/* max conn requests */
608 			tl_endpt_t  *_te_oconp;	/* conn request pending */
609 			tl_endpt_t  *_te_conp;	/* connected endpt */
610 #ifndef _ILP32
611 			void	    *_te_pad;
612 #endif
613 			list_t	_te_iconp;	/* list of conn ind. pending */
614 		} _te_cots_state;
615 		/* Connection-less state. */
616 		struct {
617 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
618 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
619 			list_node_t _te_flows;	/* lists of connections */
620 			list_t  _te_flowlist;	/* Who flowcontrols on me */
621 		} _te_clts_state;
622 	} _te_transport_state;
623 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
624 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
625 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
626 #define	te_conp		_te_transport_state._te_cots_state._te_conp
627 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
628 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
629 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
630 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
631 #define	te_flows	_te_transport_state._te_clts_state._te_flows
632 
633 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
634 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
635 	pid_t		te_cpid;	/* cached pid of endpoint */
636 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
637 	/*
638 	 * Pieces of the endpoint state needed for closing.
639 	 */
640 	kmutex_t	te_closelock;
641 	kcondvar_t	te_closecv;
642 	uint8_t		te_closing;	/* The endpoint started closing */
643 	uint8_t		te_closewait;	/* Wait in close until zero */
644 	mblk_t		te_closemp;	/* for entering serializer on close */
645 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
646 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
647 	kmutex_t	te_srv_lock;
648 	kcondvar_t	te_srv_cv;
649 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
650 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
651 	/*
652 	 * Pieces of the endpoint state needed for serializer transitions.
653 	 */
654 	kmutex_t	te_ser_lock;	/* Protects the count below */
655 	uint_t		te_ser_count;	/* Number of messages on serializer */
656 };
657 
658 /*
659  * Flag values. Lower 4 bits specify that transport used.
660  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
661  * they allow to identify the endpoint more easily.
662  */
663 #define	TL_LISTENER	0x00010	/* the listener endpoint */
664 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
665 #define	TL_EAGER	0x00040	/* connecting endpoint */
666 #define	TL_ACCEPTED	0x00080	/* accepted connection */
667 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
668 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
669 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
670 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
671 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
672 /*
673  * Boolean checks for the endpoint type.
674  */
675 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
676 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
677 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
678 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
679 
680 /*
681  * Certain operations are always used together. These macros reduce the chance
682  * of missing a part of a combination.
683  */
684 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
685 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
686 
687 #define	TL_PUTBQ(x, mp) {		\
688 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
689 	(x)->te_nowsrv = B_TRUE;	\
690 	(void) putbq((x)->te_wq, mp);	\
691 }
692 
693 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
694 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
695 
696 /*
697  * STREAMS driver glue data structures.
698  */
699 static	struct	module_info	tl_minfo = {
700 	TL_ID,			/* mi_idnum */
701 	TL_NAME,		/* mi_idname */
702 	TL_MINPSZ,		/* mi_minpsz */
703 	TL_MAXPSZ,		/* mi_maxpsz */
704 	TL_HIWAT,		/* mi_hiwat */
705 	TL_LOWAT		/* mi_lowat */
706 };
707 
708 static	struct	qinit	tl_rinit = {
709 	NULL,			/* qi_putp */
710 	(int (*)())tl_rsrv,	/* qi_srvp */
711 	tl_open,		/* qi_qopen */
712 	tl_close,		/* qi_qclose */
713 	NULL,			/* qi_qadmin */
714 	&tl_minfo,		/* qi_minfo */
715 	NULL			/* qi_mstat */
716 };
717 
718 static	struct	qinit	tl_winit = {
719 	(int (*)())tl_wput,	/* qi_putp */
720 	(int (*)())tl_wsrv,	/* qi_srvp */
721 	NULL,			/* qi_qopen */
722 	NULL,			/* qi_qclose */
723 	NULL,			/* qi_qadmin */
724 	&tl_minfo,		/* qi_minfo */
725 	NULL			/* qi_mstat */
726 };
727 
728 static	struct streamtab	tlinfo = {
729 	&tl_rinit,		/* st_rdinit */
730 	&tl_winit,		/* st_wrinit */
731 	NULL,			/* st_muxrinit */
732 	NULL			/* st_muxwrinit */
733 };
734 
735 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
736     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
737 
738 static struct modldrv modldrv = {
739 	&mod_driverops,		/* Type of module -- pseudo driver here */
740 	"TPI Local Transport (tl)",
741 	&tl_devops,		/* driver ops */
742 };
743 
744 /*
745  * Module linkage information for the kernel.
746  */
747 static struct modlinkage modlinkage = {
748 	MODREV_1,
749 	&modldrv,
750 	NULL
751 };
752 
753 /*
754  * Templates for response to info request
755  * Check sanity of unlimited connect data etc.
756  */
757 
758 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
759 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
760 
761 static struct T_info_ack tl_cots_info_ack =
762 	{
763 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
764 		T_INFINITE,	/* TSDU size */
765 		T_INFINITE,	/* ETSDU size */
766 		T_INFINITE,	/* CDATA_size */
767 		T_INFINITE,	/* DDATA_size */
768 		T_INFINITE,	/* ADDR_size  */
769 		T_INFINITE,	/* OPT_size */
770 		0,		/* TIDU_size - fill at run time */
771 		T_COTS,		/* SERV_type */
772 		-1,		/* CURRENT_state */
773 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
774 	};
775 
776 static struct T_info_ack tl_clts_info_ack =
777 	{
778 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
779 		0,		/* TSDU_size - fill at run time */
780 		-2,		/* ETSDU_size -2 => not supported */
781 		-2,		/* CDATA_size -2 => not supported */
782 		-2,		/* DDATA_size  -2 => not supported */
783 		-1,		/* ADDR_size -1 => infinite */
784 		-1,		/* OPT_size */
785 		0,		/* TIDU_size - fill at run time */
786 		T_CLTS,		/* SERV_type */
787 		-1,		/* CURRENT_state */
788 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
789 	};
790 
791 /*
792  * private copy of devinfo pointer used in tl_info
793  */
794 static dev_info_t *tl_dip;
795 
796 /*
797  * Endpoints cache.
798  */
799 static kmem_cache_t *tl_cache;
800 /*
801  * Minor number space.
802  */
803 static id_space_t *tl_minors;
804 
805 /*
806  * Default Data Unit size.
807  */
808 static t_scalar_t tl_tidusz;
809 
810 /*
811  * Size of hash tables.
812  */
813 static size_t tl_hash_size = TL_HASH_SIZE;
814 
815 /*
816  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
817  * for sockets.
818  */
819 static int tl_disable_early_connect = 0;
820 static int tl_client_closing_when_accepting;
821 
822 static int tl_serializer_noswitch;
823 
824 /*
825  * LOCAL FUNCTION PROTOTYPES
826  * -------------------------
827  */
828 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
829 static void tl_do_proto(mblk_t *, tl_endpt_t *);
830 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
831 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
832 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
833 	t_scalar_t);
834 static void tl_bind(mblk_t *, tl_endpt_t *);
835 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
836 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
837 static void tl_unbind(mblk_t *, tl_endpt_t *);
838 static void tl_optmgmt(queue_t *, mblk_t *);
839 static void tl_conn_req(queue_t *, mblk_t *);
840 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
841 static void tl_conn_res(mblk_t *, tl_endpt_t *);
842 static void tl_discon_req(mblk_t *, tl_endpt_t *);
843 static void tl_capability_req(mblk_t *, tl_endpt_t *);
844 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
845 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
846 static void tl_info_req(mblk_t *, tl_endpt_t *);
847 static void tl_addr_req(mblk_t *, tl_endpt_t *);
848 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
849 static void tl_data(mblk_t  *, tl_endpt_t *);
850 static void tl_exdata(mblk_t *, tl_endpt_t *);
851 static void tl_ordrel(mblk_t *, tl_endpt_t *);
852 static void tl_unitdata(mblk_t *, tl_endpt_t *);
853 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
854 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
855 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
856 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
857 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
858 static void tl_cl_backenable(tl_endpt_t *);
859 static void tl_co_unconnect(tl_endpt_t *);
860 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
861 static void tl_discon_ind(tl_endpt_t *, uint32_t);
862 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
863 static mblk_t *tl_ordrel_ind_alloc(void);
864 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
865 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
866 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
867 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
868 static void tl_icon_freemsgs(mblk_t **);
869 static void tl_merror(queue_t *, mblk_t *, int);
870 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
871 static int tl_default_opt(queue_t *, int, int, uchar_t *);
872 static int tl_get_opt(queue_t *, int, int, uchar_t *);
873 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
874     uchar_t *, void *, cred_t *);
875 static void tl_memrecover(queue_t *, mblk_t *, size_t);
876 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
877 static void tl_free(tl_endpt_t *);
878 static int  tl_constructor(void *, void *, int);
879 static void tl_destructor(void *, void *);
880 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
881 static tl_serializer_t *tl_serializer_alloc(int);
882 static void tl_serializer_refhold(tl_serializer_t *);
883 static void tl_serializer_refrele(tl_serializer_t *);
884 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
885 static void tl_serializer_exit(tl_endpt_t *);
886 static boolean_t tl_noclose(tl_endpt_t *);
887 static void tl_closeok(tl_endpt_t *);
888 static void tl_refhold(tl_endpt_t *);
889 static void tl_refrele(tl_endpt_t *);
890 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
891 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
892 static void tl_close_ser(mblk_t *, tl_endpt_t *);
893 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
894 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
895 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
896 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
897 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
898 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
900 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
901 static void tl_addr_unbind(tl_endpt_t *);
902 
903 /*
904  * Intialize option database object for TL
905  */
906 
907 optdb_obj_t tl_opt_obj = {
908 	tl_default_opt,		/* TL default value function pointer */
909 	tl_get_opt,		/* TL get function pointer */
910 	tl_set_opt,		/* TL set function pointer */
911 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
912 	tl_opt_arr,		/* TL option database */
913 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
914 	tl_valid_levels_arr	/* TL valid level array */
915 };
916 
917 /*
918  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
919  * ---------------------------------------
920  */
921 
922 /*
923  * Loadable module routines
924  */
925 int
926 _init(void)
927 {
928 	return (mod_install(&modlinkage));
929 }
930 
931 int
932 _fini(void)
933 {
934 	return (mod_remove(&modlinkage));
935 }
936 
937 int
938 _info(struct modinfo *modinfop)
939 {
940 	return (mod_info(&modlinkage, modinfop));
941 }
942 
943 /*
944  * Driver Entry Points and Other routines
945  */
946 static int
947 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
948 {
949 	int i;
950 	char name[32];
951 
952 	/*
953 	 * Resume from a checkpoint state.
954 	 */
955 	if (cmd == DDI_RESUME)
956 		return (DDI_SUCCESS);
957 
958 	if (cmd != DDI_ATTACH)
959 		return (DDI_FAILURE);
960 
961 	/*
962 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
963 	 * streams message sizes can be unlimited. We use a defined constant
964 	 * instead.
965 	 */
966 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
967 
968 	/*
969 	 * Create subdevices for each transport.
970 	 */
971 	for (i = 0; i < TL_UNUSED; i++) {
972 		if (ddi_create_minor_node(devi,
973 		    tl_transports[i].tr_name,
974 		    S_IFCHR, tl_transports[i].tr_minor,
975 		    DDI_PSEUDO, NULL) == DDI_FAILURE) {
976 			ddi_remove_minor_node(devi, NULL);
977 			return (DDI_FAILURE);
978 		}
979 	}
980 
981 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
982 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
983 
984 	if (tl_cache == NULL) {
985 		ddi_remove_minor_node(devi, NULL);
986 		return (DDI_FAILURE);
987 	}
988 
989 	tl_minors = id_space_create("tl_minor_space",
990 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
991 
992 	/*
993 	 * Create ID space for minor numbers
994 	 */
995 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
996 		tl_transport_state_t *t = &tl_transports[i];
997 
998 		if (i == TL_UNUSED)
999 			continue;
1000 
1001 		/* Socket COTSORD shares namespace with COTS */
1002 		if (i == TL_SOCK_COTSORD) {
1003 			t->tr_ai_hash =
1004 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1005 			ASSERT(t->tr_ai_hash != NULL);
1006 			t->tr_addr_hash =
1007 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1008 			ASSERT(t->tr_addr_hash != NULL);
1009 			continue;
1010 		}
1011 
1012 		/*
1013 		 * Create hash tables.
1014 		 */
1015 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1016 		    t->tr_name);
1017 #ifdef _ILP32
1018 		if (i & TL_SOCKET)
1019 			t->tr_ai_hash =
1020 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1021 			    mod_hash_null_valdtor);
1022 		else
1023 			t->tr_ai_hash =
1024 			    mod_hash_create_ptrhash(name, tl_hash_size,
1025 			    mod_hash_null_valdtor, sizeof (queue_t));
1026 #else
1027 		t->tr_ai_hash =
1028 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1029 		    mod_hash_null_valdtor);
1030 #endif /* _ILP32 */
1031 
1032 		if (i & TL_SOCKET) {
1033 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1034 			    t->tr_name);
1035 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1036 			    tl_hash_size, mod_hash_null_valdtor,
1037 			    sizeof (uintptr_t));
1038 		} else {
1039 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1040 			    t->tr_name);
1041 			t->tr_addr_hash = mod_hash_create_extended(name,
1042 			    tl_hash_size, mod_hash_null_keydtor,
1043 			    mod_hash_null_valdtor,
1044 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1045 		}
1046 
1047 		/* Create serializer for connectionless transports. */
1048 		if (i & TL_TICLTS)
1049 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1050 	}
1051 
1052 	tl_dip = devi;
1053 
1054 	return (DDI_SUCCESS);
1055 }
1056 
1057 static int
1058 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1059 {
1060 	int i;
1061 
1062 	if (cmd == DDI_SUSPEND)
1063 		return (DDI_SUCCESS);
1064 
1065 	if (cmd != DDI_DETACH)
1066 		return (DDI_FAILURE);
1067 
1068 	/*
1069 	 * Destroy arenas and hash tables.
1070 	 */
1071 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1072 		tl_transport_state_t *t = &tl_transports[i];
1073 
1074 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1075 			continue;
1076 
1077 		EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1078 		if (t->tr_serializer != NULL) {
1079 			tl_serializer_refrele(t->tr_serializer);
1080 			t->tr_serializer = NULL;
1081 		}
1082 
1083 #ifdef _ILP32
1084 		if (i & TL_SOCKET)
1085 			mod_hash_destroy_idhash(t->tr_ai_hash);
1086 		else
1087 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1088 #else
1089 		mod_hash_destroy_idhash(t->tr_ai_hash);
1090 #endif /* _ILP32 */
1091 		t->tr_ai_hash = NULL;
1092 		if (i & TL_SOCKET)
1093 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1094 		else
1095 			mod_hash_destroy_hash(t->tr_addr_hash);
1096 		t->tr_addr_hash = NULL;
1097 	}
1098 
1099 	kmem_cache_destroy(tl_cache);
1100 	tl_cache = NULL;
1101 	id_space_destroy(tl_minors);
1102 	tl_minors = NULL;
1103 	ddi_remove_minor_node(devi, NULL);
1104 	return (DDI_SUCCESS);
1105 }
1106 
1107 /* ARGSUSED */
1108 static int
1109 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1110 {
1111 
1112 	int retcode = DDI_FAILURE;
1113 
1114 	switch (infocmd) {
1115 
1116 	case DDI_INFO_DEVT2DEVINFO:
1117 		if (tl_dip != NULL) {
1118 			*result = (void *)tl_dip;
1119 			retcode = DDI_SUCCESS;
1120 		}
1121 		break;
1122 
1123 	case DDI_INFO_DEVT2INSTANCE:
1124 		*result = (void *)0;
1125 		retcode = DDI_SUCCESS;
1126 		break;
1127 
1128 	default:
1129 		break;
1130 	}
1131 	return (retcode);
1132 }
1133 
1134 /*
1135  * Endpoint reference management.
1136  */
1137 static void
1138 tl_refhold(tl_endpt_t *tep)
1139 {
1140 	atomic_add_32(&tep->te_refcnt, 1);
1141 }
1142 
1143 static void
1144 tl_refrele(tl_endpt_t *tep)
1145 {
1146 	ASSERT(tep->te_refcnt != 0);
1147 
1148 	if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1149 		tl_free(tep);
1150 }
1151 
1152 /*ARGSUSED*/
1153 static int
1154 tl_constructor(void *buf, void *cdrarg, int kmflags)
1155 {
1156 	tl_endpt_t *tep = buf;
1157 
1158 	bzero(tep, sizeof (tl_endpt_t));
1159 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1160 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1161 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1162 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1163 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1164 
1165 	return (0);
1166 }
1167 
1168 /*ARGSUSED*/
1169 static void
1170 tl_destructor(void *buf, void *cdrarg)
1171 {
1172 	tl_endpt_t *tep = buf;
1173 
1174 	mutex_destroy(&tep->te_closelock);
1175 	cv_destroy(&tep->te_closecv);
1176 	mutex_destroy(&tep->te_srv_lock);
1177 	cv_destroy(&tep->te_srv_cv);
1178 	mutex_destroy(&tep->te_ser_lock);
1179 }
1180 
1181 static void
1182 tl_free(tl_endpt_t *tep)
1183 {
1184 	ASSERT(tep->te_refcnt == 0);
1185 	ASSERT(tep->te_transport != NULL);
1186 	ASSERT(tep->te_rq == NULL);
1187 	ASSERT(tep->te_wq == NULL);
1188 	ASSERT(tep->te_ser != NULL);
1189 	ASSERT(tep->te_ser_count == 0);
1190 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1191 
1192 	if (IS_SOCKET(tep)) {
1193 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1194 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1195 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1196 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1197 	} else if (tep->te_abuf != NULL) {
1198 		kmem_free(tep->te_abuf, tep->te_alen);
1199 		tep->te_alen = -1; /* uninitialized */
1200 		tep->te_abuf = NULL;
1201 	} else {
1202 		ASSERT(tep->te_alen == -1);
1203 	}
1204 
1205 	id_free(tl_minors, tep->te_minor);
1206 	ASSERT(tep->te_credp == NULL);
1207 
1208 	if (tep->te_hash_hndl != NULL)
1209 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1210 
1211 	if (IS_COTS(tep)) {
1212 		TL_REMOVE_PEER(tep->te_conp);
1213 		TL_REMOVE_PEER(tep->te_oconp);
1214 		tl_serializer_refrele(tep->te_ser);
1215 		tep->te_ser = NULL;
1216 		ASSERT(tep->te_nicon == 0);
1217 		ASSERT(list_head(&tep->te_iconp) == NULL);
1218 	} else {
1219 		ASSERT(tep->te_lastep == NULL);
1220 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1221 		ASSERT(tep->te_flowq == NULL);
1222 	}
1223 
1224 	ASSERT(tep->te_bufcid == 0);
1225 	ASSERT(tep->te_timoutid == 0);
1226 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1227 	tep->te_acceptor_id = 0;
1228 
1229 	ASSERT(tep->te_closewait == 0);
1230 	ASSERT(!tep->te_rsrv_active);
1231 	ASSERT(!tep->te_wsrv_active);
1232 	tep->te_closing = 0;
1233 	tep->te_nowsrv = B_FALSE;
1234 	tep->te_flag = 0;
1235 
1236 	kmem_cache_free(tl_cache, tep);
1237 }
1238 
1239 /*
1240  * Allocate/free reference-counted wrappers for serializers.
1241  */
1242 static tl_serializer_t *
1243 tl_serializer_alloc(int flags)
1244 {
1245 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1246 	serializer_t *ser;
1247 
1248 	if (s == NULL)
1249 		return (NULL);
1250 
1251 	ser = serializer_create(flags);
1252 
1253 	if (ser == NULL) {
1254 		kmem_free(s, sizeof (tl_serializer_t));
1255 		return (NULL);
1256 	}
1257 
1258 	s->ts_refcnt = 1;
1259 	s->ts_serializer = ser;
1260 	return (s);
1261 }
1262 
1263 static void
1264 tl_serializer_refhold(tl_serializer_t *s)
1265 {
1266 	atomic_add_32(&s->ts_refcnt, 1);
1267 }
1268 
1269 static void
1270 tl_serializer_refrele(tl_serializer_t *s)
1271 {
1272 	if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1273 		serializer_destroy(s->ts_serializer);
1274 		kmem_free(s, sizeof (tl_serializer_t));
1275 	}
1276 }
1277 
1278 /*
1279  * Post a request on the endpoint serializer. For COTS transports keep track of
1280  * the number of pending requests.
1281  */
1282 static void
1283 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1284 {
1285 	if (IS_COTS(tep)) {
1286 		mutex_enter(&tep->te_ser_lock);
1287 		tep->te_ser_count++;
1288 		mutex_exit(&tep->te_ser_lock);
1289 	}
1290 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1291 }
1292 
1293 /*
1294  * Complete processing the request on the serializer. Decrement the counter for
1295  * pending requests for COTS transports.
1296  */
1297 static void
1298 tl_serializer_exit(tl_endpt_t *tep)
1299 {
1300 	if (IS_COTS(tep)) {
1301 		mutex_enter(&tep->te_ser_lock);
1302 		ASSERT(tep->te_ser_count != 0);
1303 		tep->te_ser_count--;
1304 		mutex_exit(&tep->te_ser_lock);
1305 	}
1306 }
1307 
1308 /*
1309  * Hash management functions.
1310  */
1311 
1312 /*
1313  * Return TRUE if two addresses are equal, false otherwise.
1314  */
1315 static boolean_t
1316 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1317 {
1318 	return ((ap1->ta_alen > 0) &&
1319 	    (ap1->ta_alen == ap2->ta_alen) &&
1320 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1321 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1322 }
1323 
1324 /*
1325  * This function is called whenever an endpoint is found in the hash table.
1326  */
1327 /* ARGSUSED0 */
1328 static void
1329 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1330 {
1331 	tl_refhold((tl_endpt_t *)val);
1332 }
1333 
1334 /*
1335  * Address hash function.
1336  */
1337 /* ARGSUSED */
1338 static uint_t
1339 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1340 {
1341 	tl_addr_t *ap = (tl_addr_t *)key;
1342 	size_t	len = ap->ta_alen;
1343 	uchar_t *p = ap->ta_abuf;
1344 	uint_t i, g;
1345 
1346 	ASSERT((len > 0) && (p != NULL));
1347 
1348 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1349 		i = (i << 4) + (*p);
1350 		if ((g = (i & 0xf0000000U)) != 0) {
1351 			i ^= (g >> 24);
1352 			i ^= g;
1353 		}
1354 	}
1355 	return (i);
1356 }
1357 
1358 /*
1359  * This function is used by hash lookups. It compares two generic addresses.
1360  */
1361 static int
1362 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1363 {
1364 #ifdef 	DEBUG
1365 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1366 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1367 
1368 	ASSERT(key1 != NULL);
1369 	ASSERT(key2 != NULL);
1370 
1371 	ASSERT(ap1->ta_abuf != NULL);
1372 	ASSERT(ap2->ta_abuf != NULL);
1373 	ASSERT(ap1->ta_alen > 0);
1374 	ASSERT(ap2->ta_alen > 0);
1375 #endif
1376 
1377 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1378 }
1379 
1380 /*
1381  * Prevent endpoint from closing if possible.
1382  * Return B_TRUE on success, B_FALSE on failure.
1383  */
1384 static boolean_t
1385 tl_noclose(tl_endpt_t *tep)
1386 {
1387 	boolean_t rc = B_FALSE;
1388 
1389 	mutex_enter(&tep->te_closelock);
1390 	if (! tep->te_closing) {
1391 		ASSERT(tep->te_closewait == 0);
1392 		tep->te_closewait++;
1393 		rc = B_TRUE;
1394 	}
1395 	mutex_exit(&tep->te_closelock);
1396 	return (rc);
1397 }
1398 
1399 /*
1400  * Allow endpoint to close if needed.
1401  */
1402 static void
1403 tl_closeok(tl_endpt_t *tep)
1404 {
1405 	ASSERT(tep->te_closewait > 0);
1406 	mutex_enter(&tep->te_closelock);
1407 	ASSERT(tep->te_closewait == 1);
1408 	tep->te_closewait--;
1409 	cv_signal(&tep->te_closecv);
1410 	mutex_exit(&tep->te_closelock);
1411 }
1412 
1413 /*
1414  * STREAMS open entry point.
1415  */
1416 /* ARGSUSED */
1417 static int
1418 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1419 {
1420 	tl_endpt_t *tep;
1421 	minor_t	    minor = getminor(*devp);
1422 
1423 	/*
1424 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1425 	 * are illegal
1426 	 */
1427 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1428 		return (ENXIO);
1429 
1430 	if (rq->q_ptr != NULL)
1431 		return (0);
1432 
1433 	/* Minor number should specify the mode used for the driver. */
1434 	if ((minor >= TL_UNUSED))
1435 		return (ENXIO);
1436 
1437 	if (oflag & SO_SOCKSTR) {
1438 		minor |= TL_SOCKET;
1439 	}
1440 
1441 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1442 	tep->te_refcnt = 1;
1443 	tep->te_cpid = curproc->p_pid;
1444 	rq->q_ptr = WR(rq)->q_ptr = tep;
1445 	tep->te_state = TS_UNBND;
1446 	tep->te_credp = credp;
1447 	crhold(credp);
1448 	tep->te_zoneid = getzoneid();
1449 
1450 	tep->te_flag = minor & TL_MINOR_MASK;
1451 	tep->te_transport = &tl_transports[minor];
1452 
1453 	/* Allocate a unique minor number for this instance. */
1454 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1455 
1456 	/* Reserve hash handle for bind(). */
1457 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1458 
1459 	/* Transport-specific initialization */
1460 	if (IS_COTS(tep)) {
1461 		/* Use private serializer */
1462 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1463 
1464 		/* Create list for pending connections */
1465 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1466 		    offsetof(tl_icon_t, ti_node));
1467 		tep->te_qlen = 0;
1468 		tep->te_nicon = 0;
1469 		tep->te_oconp = NULL;
1470 		tep->te_conp = NULL;
1471 	} else {
1472 		/* Use shared serializer */
1473 		tep->te_ser = tep->te_transport->tr_serializer;
1474 		bzero(&tep->te_flows, sizeof (list_node_t));
1475 		/* Create list for flow control */
1476 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1477 		    offsetof(tl_endpt_t, te_flows));
1478 		tep->te_flowq = NULL;
1479 		tep->te_lastep = NULL;
1480 
1481 	}
1482 
1483 	/* Initialize endpoint address */
1484 	if (IS_SOCKET(tep)) {
1485 		/* Socket-specific address handling. */
1486 		tep->te_alen = TL_SOUX_ADDRLEN;
1487 		tep->te_abuf = &tep->te_uxaddr;
1488 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1489 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1490 	} else {
1491 		tep->te_alen = -1;
1492 		tep->te_abuf = NULL;
1493 	}
1494 
1495 	/* clone the driver */
1496 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1497 
1498 	tep->te_rq = rq;
1499 	tep->te_wq = WR(rq);
1500 
1501 #ifdef	_ILP32
1502 	if (IS_SOCKET(tep))
1503 		tep->te_acceptor_id = tep->te_minor;
1504 	else
1505 		tep->te_acceptor_id = (t_uscalar_t)rq;
1506 #else
1507 	tep->te_acceptor_id = tep->te_minor;
1508 #endif	/* _ILP32 */
1509 
1510 
1511 	qprocson(rq);
1512 
1513 	/*
1514 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1515 	 * insertion so insertion can't fail.
1516 	 */
1517 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1518 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1519 	    (mod_hash_val_t)tep);
1520 
1521 	return (0);
1522 }
1523 
1524 /* ARGSUSED1 */
1525 static int
1526 tl_close(queue_t *rq, int flag,	cred_t *credp)
1527 {
1528 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1529 	tl_endpt_t *elp = NULL;
1530 	queue_t *wq = tep->te_wq;
1531 	int rc;
1532 
1533 	ASSERT(wq == WR(rq));
1534 
1535 	/*
1536 	 * Remove the endpoint from acceptor hash.
1537 	 */
1538 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1539 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1540 	    (mod_hash_val_t *)&elp);
1541 	ASSERT(rc == 0 && tep == elp);
1542 	if ((rc != 0) || (tep != elp)) {
1543 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1544 		    SL_TRACE|SL_ERROR,
1545 		    "tl_close:inconsistency in AI hash"));
1546 	}
1547 
1548 	/*
1549 	 * Wait till close is safe, then mark endpoint as closing.
1550 	 */
1551 	mutex_enter(&tep->te_closelock);
1552 	while (tep->te_closewait)
1553 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1554 	tep->te_closing = B_TRUE;
1555 	/*
1556 	 * Will wait for the serializer part of the close to finish, so set
1557 	 * te_closewait now.
1558 	 */
1559 	tep->te_closewait = 1;
1560 	tep->te_nowsrv = B_FALSE;
1561 	mutex_exit(&tep->te_closelock);
1562 
1563 	/*
1564 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1565 	 * It is safe because close will wait for tl_close_ser to finish.
1566 	 */
1567 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1568 
1569 	/*
1570 	 * Wait for the first phase of close to complete before qprocsoff().
1571 	 */
1572 	mutex_enter(&tep->te_closelock);
1573 	while (tep->te_closewait)
1574 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1575 	mutex_exit(&tep->te_closelock);
1576 
1577 	qprocsoff(rq);
1578 
1579 	if (tep->te_bufcid) {
1580 		qunbufcall(rq, tep->te_bufcid);
1581 		tep->te_bufcid = 0;
1582 	}
1583 	if (tep->te_timoutid) {
1584 		(void) quntimeout(rq, tep->te_timoutid);
1585 		tep->te_timoutid = 0;
1586 	}
1587 
1588 	/*
1589 	 * Finish close behind serializer.
1590 	 *
1591 	 * For a CLTS endpoint increase a refcount and continue close processing
1592 	 * with serializer protection. This processing may happen asynchronously
1593 	 * with the completion of tl_close().
1594 	 *
1595 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1596 	 * may go away together with tep and we need to destroy serializer
1597 	 * outside of serializer context.
1598 	 */
1599 	ASSERT(tep->te_closewait == 0);
1600 	if (IS_COTS(tep))
1601 		tep->te_closewait = 1;
1602 	else
1603 		tl_refhold(tep);
1604 
1605 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1606 
1607 	/*
1608 	 * For connection-oriented transports wait for all serializer activity
1609 	 * to settle down.
1610 	 */
1611 	if (IS_COTS(tep)) {
1612 		mutex_enter(&tep->te_closelock);
1613 		while (tep->te_closewait)
1614 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1615 		mutex_exit(&tep->te_closelock);
1616 	}
1617 
1618 	crfree(tep->te_credp);
1619 	tep->te_credp = NULL;
1620 	tep->te_wq = NULL;
1621 	tl_refrele(tep);
1622 	/*
1623 	 * tep is likely to be destroyed now, so can't reference it any more.
1624 	 */
1625 
1626 	rq->q_ptr = wq->q_ptr = NULL;
1627 	return (0);
1628 }
1629 
1630 /*
1631  * First phase of close processing done behind the serializer.
1632  *
1633  * Do not drop the reference in the end - tl_close() wants this reference to
1634  * stay.
1635  */
1636 /* ARGSUSED0 */
1637 static void
1638 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1639 {
1640 	ASSERT(tep->te_closing);
1641 	ASSERT(tep->te_closewait == 1);
1642 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1643 
1644 	tep->te_flag |= TL_CLOSE_SER;
1645 
1646 	/*
1647 	 * Drain out all messages on queue except for TL_TICOTS where the
1648 	 * abortive release semantics permit discarding of data on close
1649 	 */
1650 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1651 		tl_wsrv_ser(NULL, tep);
1652 	}
1653 
1654 	/* Remove address from hash table. */
1655 	tl_addr_unbind(tep);
1656 	/*
1657 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1658 	 * queue of the driver, so clear these before qprocsoff() is called.
1659 	 * Also clear q_next for the peer since this queue is going away.
1660 	 */
1661 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1662 		tl_endpt_t *peer_tep = tep->te_conp;
1663 
1664 		tep->te_wq->q_next = NULL;
1665 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1666 			peer_tep->te_wq->q_next = NULL;
1667 	}
1668 
1669 	tep->te_rq = NULL;
1670 
1671 	/* wake up tl_close() */
1672 	tl_closeok(tep);
1673 	tl_serializer_exit(tep);
1674 }
1675 
1676 /*
1677  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1678  * the reference for CLTS.
1679  *
1680  * Called from serializer. Should drop reference count for CLTS only.
1681  */
1682 /* ARGSUSED0 */
1683 static void
1684 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1685 {
1686 	ASSERT(tep->te_closing);
1687 	IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1688 	IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1689 
1690 	tep->te_state = -1;	/* Uninitialized */
1691 	if (IS_COTS(tep)) {
1692 		tl_co_unconnect(tep);
1693 	} else {
1694 		/* Connectionless specific cleanup */
1695 		TL_REMOVE_PEER(tep->te_lastep);
1696 		/*
1697 		 * Backenable anybody that is flow controlled waiting for
1698 		 * this endpoint.
1699 		 */
1700 		tl_cl_backenable(tep);
1701 		if (tep->te_flowq != NULL) {
1702 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1703 			tep->te_flowq = NULL;
1704 		}
1705 	}
1706 
1707 	tl_serializer_exit(tep);
1708 	if (IS_COTS(tep))
1709 		tl_closeok(tep);
1710 	else
1711 		tl_refrele(tep);
1712 }
1713 
1714 /*
1715  * STREAMS write-side put procedure.
1716  * Enter serializer for most of the processing.
1717  *
1718  * The T_CONN_REQ is processed outside of serializer.
1719  */
1720 static void
1721 tl_wput(queue_t *wq, mblk_t *mp)
1722 {
1723 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1724 	ssize_t			msz = MBLKL(mp);
1725 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1726 	tlproc_t		*tl_proc = NULL;
1727 
1728 	switch (DB_TYPE(mp)) {
1729 	case M_DATA:
1730 		/* Only valid for connection-oriented transports */
1731 		if (IS_CLTS(tep)) {
1732 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1733 			    SL_TRACE|SL_ERROR,
1734 			    "tl_wput:M_DATA invalid for ticlts driver"));
1735 			tl_merror(wq, mp, EPROTO);
1736 			return;
1737 		}
1738 		tl_proc = tl_wput_data_ser;
1739 		break;
1740 
1741 	case M_IOCTL:
1742 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1743 		case TL_IOC_CREDOPT:
1744 			/* FALLTHROUGH */
1745 		case TL_IOC_UCREDOPT:
1746 			/*
1747 			 * Serialize endpoint state change.
1748 			 */
1749 			tl_proc = tl_do_ioctl_ser;
1750 			break;
1751 
1752 		default:
1753 			miocnak(wq, mp, 0, EINVAL);
1754 			return;
1755 		}
1756 		break;
1757 
1758 	case M_FLUSH:
1759 		/*
1760 		 * do canonical M_FLUSH processing
1761 		 */
1762 		if (*mp->b_rptr & FLUSHW) {
1763 			flushq(wq, FLUSHALL);
1764 			*mp->b_rptr &= ~FLUSHW;
1765 		}
1766 		if (*mp->b_rptr & FLUSHR) {
1767 			flushq(RD(wq), FLUSHALL);
1768 			qreply(wq, mp);
1769 		} else {
1770 			freemsg(mp);
1771 		}
1772 		return;
1773 
1774 	case M_PROTO:
1775 		if (msz < sizeof (prim->type)) {
1776 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1777 			    SL_TRACE|SL_ERROR,
1778 			    "tl_wput:M_PROTO data too short"));
1779 			tl_merror(wq, mp, EPROTO);
1780 			return;
1781 		}
1782 		switch (prim->type) {
1783 		case T_OPTMGMT_REQ:
1784 		case T_SVR4_OPTMGMT_REQ:
1785 			/*
1786 			 * Process TPI option management requests immediately
1787 			 * in put procedure regardless of in-order processing
1788 			 * of already queued messages.
1789 			 * (Note: This driver supports AF_UNIX socket
1790 			 * implementation.  Unless we implement this processing,
1791 			 * setsockopt() on socket endpoint will block on flow
1792 			 * controlled endpoints which it should not. That is
1793 			 * required for successful execution of VSU socket tests
1794 			 * and is consistent with BSD socket behavior).
1795 			 */
1796 			tl_optmgmt(wq, mp);
1797 			return;
1798 		case O_T_BIND_REQ:
1799 		case T_BIND_REQ:
1800 			tl_proc = tl_bind_ser;
1801 			break;
1802 		case T_CONN_REQ:
1803 			if (IS_CLTS(tep)) {
1804 				tl_merror(wq, mp, EPROTO);
1805 				return;
1806 			}
1807 			tl_conn_req(wq, mp);
1808 			return;
1809 		case T_DATA_REQ:
1810 		case T_OPTDATA_REQ:
1811 		case T_EXDATA_REQ:
1812 		case T_ORDREL_REQ:
1813 			tl_proc = tl_putq_ser;
1814 			break;
1815 		case T_UNITDATA_REQ:
1816 			if (IS_COTS(tep) ||
1817 			    (msz < sizeof (struct T_unitdata_req))) {
1818 				tl_merror(wq, mp, EPROTO);
1819 				return;
1820 			}
1821 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1822 				tl_proc = tl_unitdata_ser;
1823 			} else {
1824 				tl_proc = tl_putq_ser;
1825 			}
1826 			break;
1827 		default:
1828 			/*
1829 			 * process in service procedure if message already
1830 			 * queued (maintain in-order processing)
1831 			 */
1832 			if (wq->q_first != NULL) {
1833 				tl_proc = tl_putq_ser;
1834 			} else {
1835 				tl_proc = tl_wput_ser;
1836 			}
1837 			break;
1838 		}
1839 		break;
1840 
1841 	case M_PCPROTO:
1842 		/*
1843 		 * Check that the message has enough data to figure out TPI
1844 		 * primitive.
1845 		 */
1846 		if (msz < sizeof (prim->type)) {
1847 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1848 			    SL_TRACE|SL_ERROR,
1849 			    "tl_wput:M_PCROTO data too short"));
1850 			tl_merror(wq, mp, EPROTO);
1851 			return;
1852 		}
1853 		switch (prim->type) {
1854 		case T_CAPABILITY_REQ:
1855 			tl_capability_req(mp, tep);
1856 			return;
1857 		case T_INFO_REQ:
1858 			tl_proc = tl_info_req_ser;
1859 			break;
1860 		case T_ADDR_REQ:
1861 			tl_proc = tl_addr_req_ser;
1862 			break;
1863 
1864 		default:
1865 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1866 			    SL_TRACE|SL_ERROR,
1867 			    "tl_wput:unknown TPI msg primitive"));
1868 			tl_merror(wq, mp, EPROTO);
1869 			return;
1870 		}
1871 		break;
1872 	default:
1873 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1874 		    "tl_wput:default:unexpected Streams message"));
1875 		freemsg(mp);
1876 		return;
1877 	}
1878 
1879 	/*
1880 	 * Continue processing via serializer.
1881 	 */
1882 	ASSERT(tl_proc != NULL);
1883 	tl_refhold(tep);
1884 	tl_serializer_enter(tep, tl_proc, mp);
1885 }
1886 
1887 /*
1888  * Place message on the queue while preserving order.
1889  */
1890 static void
1891 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1892 {
1893 	if (tep->te_closing) {
1894 		tl_wput_ser(mp, tep);
1895 	} else {
1896 		TL_PUTQ(tep, mp);
1897 		tl_serializer_exit(tep);
1898 		tl_refrele(tep);
1899 	}
1900 
1901 }
1902 
1903 static void
1904 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1905 {
1906 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1907 
1908 	switch (DB_TYPE(mp)) {
1909 	case M_DATA:
1910 		tl_data(mp, tep);
1911 		break;
1912 	case M_PROTO:
1913 		tl_do_proto(mp, tep);
1914 		break;
1915 	default:
1916 		freemsg(mp);
1917 		break;
1918 	}
1919 }
1920 
1921 /*
1922  * Write side put procedure called from serializer.
1923  */
1924 static void
1925 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1926 {
1927 	tl_wput_common_ser(mp, tep);
1928 	tl_serializer_exit(tep);
1929 	tl_refrele(tep);
1930 }
1931 
1932 /*
1933  * M_DATA processing. Called from serializer.
1934  */
1935 static void
1936 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1937 {
1938 	tl_endpt_t	*peer_tep = tep->te_conp;
1939 	queue_t		*peer_rq;
1940 
1941 	ASSERT(DB_TYPE(mp) == M_DATA);
1942 	ASSERT(IS_COTS(tep));
1943 
1944 	IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1945 
1946 	/*
1947 	 * fastpath for data. Ignore flow control if tep is closing.
1948 	 */
1949 	if ((peer_tep != NULL) &&
1950 	    !peer_tep->te_closing &&
1951 	    ((tep->te_state == TS_DATA_XFER) ||
1952 	    (tep->te_state == TS_WREQ_ORDREL)) &&
1953 	    (tep->te_wq != NULL) &&
1954 	    (tep->te_wq->q_first == NULL) &&
1955 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1956 	    (peer_tep->te_state == TS_WREQ_ORDREL))	&&
1957 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1958 	    (canputnext(peer_rq) || tep->te_closing)) {
1959 		putnext(peer_rq, mp);
1960 	} else if (tep->te_closing) {
1961 		/*
1962 		 * It is possible that by the time we got here tep started to
1963 		 * close. If the write queue is not empty, and the state is
1964 		 * TS_DATA_XFER the data should be delivered in order, so we
1965 		 * call putq() instead of freeing the data.
1966 		 */
1967 		if ((tep->te_wq != NULL) &&
1968 		    ((tep->te_state == TS_DATA_XFER) ||
1969 		    (tep->te_state == TS_WREQ_ORDREL))) {
1970 			TL_PUTQ(tep, mp);
1971 		} else {
1972 			freemsg(mp);
1973 		}
1974 	} else {
1975 		TL_PUTQ(tep, mp);
1976 	}
1977 
1978 	tl_serializer_exit(tep);
1979 	tl_refrele(tep);
1980 }
1981 
1982 /*
1983  * Write side service routine.
1984  *
1985  * All actual processing happens within serializer which is entered
1986  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1987  * messages that need processing may have arrived, so tl_wsrv repeats until
1988  * queue is empty or te_nowsrv is set.
1989  */
1990 static void
1991 tl_wsrv(queue_t *wq)
1992 {
1993 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1994 
1995 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1996 		mutex_enter(&tep->te_srv_lock);
1997 		ASSERT(tep->te_wsrv_active == B_FALSE);
1998 		tep->te_wsrv_active = B_TRUE;
1999 		mutex_exit(&tep->te_srv_lock);
2000 
2001 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2002 
2003 		/*
2004 		 * Wait for serializer job to complete.
2005 		 */
2006 		mutex_enter(&tep->te_srv_lock);
2007 		while (tep->te_wsrv_active) {
2008 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2009 		}
2010 		cv_signal(&tep->te_srv_cv);
2011 		mutex_exit(&tep->te_srv_lock);
2012 	}
2013 }
2014 
2015 /*
2016  * Serialized write side processing of the STREAMS queue.
2017  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2018  * is NULL.
2019  */
2020 static void
2021 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2022 {
2023 	mblk_t *mp;
2024 	queue_t *wq = tep->te_wq;
2025 
2026 	ASSERT(wq != NULL);
2027 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2028 		tl_wput_common_ser(mp, tep);
2029 	}
2030 
2031 	/*
2032 	 * Wakeup service routine unless called from close.
2033 	 * If ser_mp is specified, the caller is tl_wsrv().
2034 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2035 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2036 	 * be no matching tl_serializer_exit() in this case.
2037 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2038 	 * waiting on te_srv_cv.
2039 	 */
2040 	if (ser_mp != NULL) {
2041 		/*
2042 		 * We are called from tl_wsrv.
2043 		 */
2044 		mutex_enter(&tep->te_srv_lock);
2045 		ASSERT(tep->te_wsrv_active);
2046 		tep->te_wsrv_active = B_FALSE;
2047 		cv_signal(&tep->te_srv_cv);
2048 		mutex_exit(&tep->te_srv_lock);
2049 		tl_serializer_exit(tep);
2050 	}
2051 }
2052 
2053 /*
2054  * Called when the stream is backenabled. Enter serializer and qenable everyone
2055  * flow controlled by tep.
2056  *
2057  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2058  * is possible that two instances of tl_rsrv will be running reusing the same
2059  * rsrv mblk.
2060  */
2061 static void
2062 tl_rsrv(queue_t *rq)
2063 {
2064 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2065 
2066 	ASSERT(rq->q_first == NULL);
2067 	ASSERT(tep->te_rsrv_active == 0);
2068 
2069 	tep->te_rsrv_active = B_TRUE;
2070 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2071 	/*
2072 	 * Wait for serializer job to complete.
2073 	 */
2074 	mutex_enter(&tep->te_srv_lock);
2075 	while (tep->te_rsrv_active) {
2076 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2077 	}
2078 	cv_signal(&tep->te_srv_cv);
2079 	mutex_exit(&tep->te_srv_lock);
2080 }
2081 
2082 /* ARGSUSED */
2083 static void
2084 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2085 {
2086 	tl_endpt_t *peer_tep;
2087 
2088 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2089 		tl_cl_backenable(tep);
2090 	} else if (
2091 	    IS_COTS(tep) &&
2092 	    ((peer_tep = tep->te_conp) != NULL) &&
2093 	    !peer_tep->te_closing &&
2094 	    ((tep->te_state == TS_DATA_XFER) ||
2095 	    (tep->te_state == TS_WIND_ORDREL)||
2096 	    (tep->te_state == TS_WREQ_ORDREL))) {
2097 		TL_QENABLE(peer_tep);
2098 	}
2099 
2100 	/*
2101 	 * Wakeup read side service routine.
2102 	 */
2103 	mutex_enter(&tep->te_srv_lock);
2104 	ASSERT(tep->te_rsrv_active);
2105 	tep->te_rsrv_active = B_FALSE;
2106 	cv_signal(&tep->te_srv_cv);
2107 	mutex_exit(&tep->te_srv_lock);
2108 	tl_serializer_exit(tep);
2109 }
2110 
2111 /*
2112  * process M_PROTO messages. Always called from serializer.
2113  */
2114 static void
2115 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2116 {
2117 	ssize_t			msz = MBLKL(mp);
2118 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2119 
2120 	/* Message size was validated by tl_wput(). */
2121 	ASSERT(msz >= sizeof (prim->type));
2122 
2123 	switch (prim->type) {
2124 	case T_UNBIND_REQ:
2125 		tl_unbind(mp, tep);
2126 		break;
2127 
2128 	case T_ADDR_REQ:
2129 		tl_addr_req(mp, tep);
2130 		break;
2131 
2132 	case O_T_CONN_RES:
2133 	case T_CONN_RES:
2134 		if (IS_CLTS(tep)) {
2135 			tl_merror(tep->te_wq, mp, EPROTO);
2136 			break;
2137 		}
2138 		tl_conn_res(mp, tep);
2139 		break;
2140 
2141 	case T_DISCON_REQ:
2142 		if (IS_CLTS(tep)) {
2143 			tl_merror(tep->te_wq, mp, EPROTO);
2144 			break;
2145 		}
2146 		tl_discon_req(mp, tep);
2147 		break;
2148 
2149 	case T_DATA_REQ:
2150 		if (IS_CLTS(tep)) {
2151 			tl_merror(tep->te_wq, mp, EPROTO);
2152 			break;
2153 		}
2154 		tl_data(mp, tep);
2155 		break;
2156 
2157 	case T_OPTDATA_REQ:
2158 		if (IS_CLTS(tep)) {
2159 			tl_merror(tep->te_wq, mp, EPROTO);
2160 			break;
2161 		}
2162 		tl_data(mp, tep);
2163 		break;
2164 
2165 	case T_EXDATA_REQ:
2166 		if (IS_CLTS(tep)) {
2167 			tl_merror(tep->te_wq, mp, EPROTO);
2168 			break;
2169 		}
2170 		tl_exdata(mp, tep);
2171 		break;
2172 
2173 	case T_ORDREL_REQ:
2174 		if (! IS_COTSORD(tep)) {
2175 			tl_merror(tep->te_wq, mp, EPROTO);
2176 			break;
2177 		}
2178 		tl_ordrel(mp, tep);
2179 		break;
2180 
2181 	case T_UNITDATA_REQ:
2182 		if (IS_COTS(tep)) {
2183 			tl_merror(tep->te_wq, mp, EPROTO);
2184 			break;
2185 		}
2186 		tl_unitdata(mp, tep);
2187 		break;
2188 
2189 	default:
2190 		tl_merror(tep->te_wq, mp, EPROTO);
2191 		break;
2192 	}
2193 }
2194 
2195 /*
2196  * Process ioctl from serializer.
2197  * This is a wrapper around tl_do_ioctl().
2198  */
2199 static void
2200 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2201 {
2202 	if (! tep->te_closing)
2203 		tl_do_ioctl(mp, tep);
2204 	else
2205 		freemsg(mp);
2206 
2207 	tl_serializer_exit(tep);
2208 	tl_refrele(tep);
2209 }
2210 
2211 static void
2212 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2213 {
2214 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2215 	int cmd = iocbp->ioc_cmd;
2216 	queue_t *wq = tep->te_wq;
2217 	int error;
2218 	int thisopt, otheropt;
2219 
2220 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2221 
2222 	switch (cmd) {
2223 	case TL_IOC_CREDOPT:
2224 		if (cmd == TL_IOC_CREDOPT) {
2225 			thisopt = TL_SETCRED;
2226 			otheropt = TL_SETUCRED;
2227 		} else {
2228 			/* FALLTHROUGH */
2229 	case TL_IOC_UCREDOPT:
2230 			thisopt = TL_SETUCRED;
2231 			otheropt = TL_SETCRED;
2232 		}
2233 		/*
2234 		 * The credentials passing does not apply to sockets.
2235 		 * Only one of the cred options can be set at a given time.
2236 		 */
2237 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2238 			miocnak(wq, mp, 0, EINVAL);
2239 			return;
2240 		}
2241 
2242 		/*
2243 		 * Turn on generation of credential options for
2244 		 * T_conn_req, T_conn_con, T_unidata_ind.
2245 		 */
2246 		error = miocpullup(mp, sizeof (uint32_t));
2247 		if (error != 0) {
2248 			miocnak(wq, mp, 0, error);
2249 			return;
2250 		}
2251 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2252 			miocnak(wq, mp, 0, EINVAL);
2253 			return;
2254 		}
2255 
2256 		if (*(uint32_t *)mp->b_cont->b_rptr)
2257 			tep->te_flag |= thisopt;
2258 		else
2259 			tep->te_flag &= ~thisopt;
2260 
2261 		miocack(wq, mp, 0, 0);
2262 		break;
2263 
2264 	default:
2265 		/* Should not be here */
2266 		miocnak(wq, mp, 0, EINVAL);
2267 		break;
2268 	}
2269 }
2270 
2271 
2272 /*
2273  * send T_ERROR_ACK
2274  * Note: assumes enough memory or caller passed big enough mp
2275  *	- no recovery from allocb failures
2276  */
2277 
2278 static void
2279 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2280     t_scalar_t unix_err, t_scalar_t type)
2281 {
2282 	struct T_error_ack *err_ack;
2283 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2284 	    M_PCPROTO, T_ERROR_ACK);
2285 
2286 	if (ackmp == NULL) {
2287 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2288 		    "tl_error_ack:out of mblk memory"));
2289 		tl_merror(wq, NULL, ENOSR);
2290 		return;
2291 	}
2292 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2293 	err_ack->ERROR_prim = type;
2294 	err_ack->TLI_error = tli_err;
2295 	err_ack->UNIX_error = unix_err;
2296 
2297 	/*
2298 	 * send error ack message
2299 	 */
2300 	qreply(wq, ackmp);
2301 }
2302 
2303 
2304 
2305 /*
2306  * send T_OK_ACK
2307  * Note: assumes enough memory or caller passed big enough mp
2308  *	- no recovery from allocb failures
2309  */
2310 static void
2311 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2312 {
2313 	struct T_ok_ack *ok_ack;
2314 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2315 	    M_PCPROTO, T_OK_ACK);
2316 
2317 	if (ackmp == NULL) {
2318 		tl_merror(wq, NULL, ENOMEM);
2319 		return;
2320 	}
2321 
2322 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2323 	ok_ack->CORRECT_prim = type;
2324 
2325 	(void) qreply(wq, ackmp);
2326 }
2327 
2328 /*
2329  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2330  * This is a wrapper around tl_bind().
2331  */
2332 static void
2333 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2334 {
2335 	if (! tep->te_closing)
2336 		tl_bind(mp, tep);
2337 	else
2338 		freemsg(mp);
2339 
2340 	tl_serializer_exit(tep);
2341 	tl_refrele(tep);
2342 }
2343 
2344 /*
2345  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2346  * Assumes that the endpoint is in the unbound.
2347  */
2348 static void
2349 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2350 {
2351 	queue_t			*wq = tep->te_wq;
2352 	struct T_bind_ack	*b_ack;
2353 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2354 	mblk_t			*ackmp, *bamp;
2355 	soux_addr_t		ux_addr;
2356 	t_uscalar_t		qlen = 0;
2357 	t_scalar_t		alen, aoff;
2358 	tl_addr_t		addr_req;
2359 	void			*addr_startp;
2360 	ssize_t			msz = MBLKL(mp), basize;
2361 	t_scalar_t		tli_err = 0, unix_err = 0;
2362 	t_scalar_t		save_prim_type = bind->PRIM_type;
2363 	t_scalar_t		save_state = tep->te_state;
2364 
2365 	if (tep->te_state != TS_UNBND) {
2366 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2367 		    SL_TRACE|SL_ERROR,
2368 		    "tl_wput:bind_request:out of state, state=%d",
2369 		    tep->te_state));
2370 		tli_err = TOUTSTATE;
2371 		goto error;
2372 	}
2373 
2374 	if (msz < sizeof (struct T_bind_req)) {
2375 		tli_err = TSYSERR; unix_err = EINVAL;
2376 		goto error;
2377 	}
2378 
2379 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2380 
2381 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2382 	    (bind->PRIM_type == T_BIND_REQ));
2383 
2384 	alen = bind->ADDR_length;
2385 	aoff = bind->ADDR_offset;
2386 
2387 	/* negotiate max conn req pending */
2388 	if (IS_COTS(tep)) {
2389 		qlen = bind->CONIND_number;
2390 		if (qlen > tl_maxqlen)
2391 			qlen = tl_maxqlen;
2392 	}
2393 
2394 	/*
2395 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2396 	 * and bound again.
2397 	 */
2398 	if ((tep->te_hash_hndl == NULL) &&
2399 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2400 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2401 	    &tep->te_hash_hndl) != 0) {
2402 		tli_err = TSYSERR; unix_err = ENOSR;
2403 		goto error;
2404 	}
2405 
2406 	/*
2407 	 * Verify address correctness.
2408 	 */
2409 	if (IS_SOCKET(tep)) {
2410 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2411 
2412 		if ((alen != TL_SOUX_ADDRLEN) ||
2413 		    (aoff < 0) ||
2414 		    (aoff + alen > msz)) {
2415 			(void) (STRLOG(TL_ID, tep->te_minor,
2416 			    1, SL_TRACE|SL_ERROR,
2417 			    "tl_bind: invalid socket addr"));
2418 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2419 			tli_err = TSYSERR; unix_err = EINVAL;
2420 			goto error;
2421 		}
2422 		/* Copy address from message to local buffer. */
2423 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2424 		/*
2425 		 * Check that we got correct address from sockets
2426 		 */
2427 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2428 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2429 			(void) (STRLOG(TL_ID, tep->te_minor,
2430 			    1, SL_TRACE|SL_ERROR,
2431 			    "tl_bind: invalid socket magic"));
2432 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2433 			tli_err = TSYSERR; unix_err = EINVAL;
2434 			goto error;
2435 		}
2436 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2437 		    (ux_addr.soua_vp != NULL)) {
2438 			(void) (STRLOG(TL_ID, tep->te_minor,
2439 			    1, SL_TRACE|SL_ERROR,
2440 			    "tl_bind: implicit addr non-empty"));
2441 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2442 			tli_err = TSYSERR; unix_err = EINVAL;
2443 			goto error;
2444 		}
2445 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2446 		    (ux_addr.soua_vp == NULL)) {
2447 			(void) (STRLOG(TL_ID, tep->te_minor,
2448 			    1, SL_TRACE|SL_ERROR,
2449 			    "tl_bind: explicit addr empty"));
2450 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451 			tli_err = TSYSERR; unix_err = EINVAL;
2452 			goto error;
2453 		}
2454 	} else {
2455 		if ((alen > 0) && ((aoff < 0) ||
2456 		    ((ssize_t)(aoff + alen) > msz) ||
2457 		    ((aoff + alen) < 0))) {
2458 			(void) (STRLOG(TL_ID, tep->te_minor,
2459 			    1, SL_TRACE|SL_ERROR,
2460 			    "tl_bind: invalid message"));
2461 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2462 			tli_err = TSYSERR; unix_err = EINVAL;
2463 			goto error;
2464 		}
2465 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2466 			(void) (STRLOG(TL_ID, tep->te_minor,
2467 			    1, SL_TRACE|SL_ERROR,
2468 			    "tl_bind: bad addr in  message"));
2469 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2470 			tli_err = TBADADDR;
2471 			goto error;
2472 		}
2473 #ifdef DEBUG
2474 		/*
2475 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2476 		 * if (! assertion)
2477 		 *	log warning;
2478 		 */
2479 		if (! ((alen == 0 && aoff == 0) ||
2480 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2481 			(void) (STRLOG(TL_ID, tep->te_minor,
2482 				    3, SL_TRACE|SL_ERROR,
2483 				    "tl_bind: addr overlaps TPI message"));
2484 		}
2485 #endif
2486 	}
2487 
2488 	/*
2489 	 * Bind the address provided or allocate one if requested.
2490 	 * Allow rebinds with a new qlen value.
2491 	 */
2492 	if (IS_SOCKET(tep)) {
2493 		/*
2494 		 * For anonymous requests the te_ap is already set up properly
2495 		 * so use minor number as an address.
2496 		 * For explicit requests need to check whether the address is
2497 		 * already in use.
2498 		 */
2499 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2500 			int rc;
2501 
2502 			if (tep->te_flag & TL_ADDRHASHED) {
2503 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2504 				if (tep->te_vp == ux_addr.soua_vp)
2505 					goto skip_addr_bind;
2506 				else /* Rebind to a new address. */
2507 					tl_addr_unbind(tep);
2508 			}
2509 			/*
2510 			 * Insert address in the hash if it is not already
2511 			 * there.  Since we use preallocated handle, the insert
2512 			 * can fail only if the key is already present.
2513 			 */
2514 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2515 			    (mod_hash_key_t)ux_addr.soua_vp,
2516 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2517 
2518 			if (rc != 0) {
2519 				ASSERT(rc == MH_ERR_DUPLICATE);
2520 				/*
2521 				 * Violate O_T_BIND_REQ semantics and fail with
2522 				 * TADDRBUSY - sockets will not use any address
2523 				 * other than supplied one for explicit binds.
2524 				 */
2525 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2526 				    SL_TRACE|SL_ERROR,
2527 				    "tl_bind:requested addr %p is busy",
2528 				    ux_addr.soua_vp));
2529 				tli_err = TADDRBUSY; unix_err = 0;
2530 				goto error;
2531 			}
2532 			tep->te_uxaddr = ux_addr;
2533 			tep->te_flag |= TL_ADDRHASHED;
2534 			tep->te_hash_hndl = NULL;
2535 		}
2536 	} else if (alen == 0) {
2537 		/*
2538 		 * assign any free address
2539 		 */
2540 		if (! tl_get_any_addr(tep, NULL)) {
2541 			(void) (STRLOG(TL_ID, tep->te_minor,
2542 			    1, SL_TRACE|SL_ERROR,
2543 			    "tl_bind:failed to get buffer for any "
2544 			    "address"));
2545 			tli_err = TSYSERR; unix_err = ENOSR;
2546 			goto error;
2547 		}
2548 	} else {
2549 		addr_req.ta_alen = alen;
2550 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2551 		addr_req.ta_zoneid = tep->te_zoneid;
2552 
2553 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2554 		if (tep->te_abuf == NULL) {
2555 			tli_err = TSYSERR; unix_err = ENOSR;
2556 			goto error;
2557 		}
2558 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2559 		tep->te_alen = alen;
2560 
2561 		if (mod_hash_insert_reserve(tep->te_addrhash,
2562 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2563 		    tep->te_hash_hndl) != 0) {
2564 			if (save_prim_type == T_BIND_REQ) {
2565 				/*
2566 				 * The bind semantics for this primitive
2567 				 * require a failure if the exact address
2568 				 * requested is busy
2569 				 */
2570 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2571 				    SL_TRACE|SL_ERROR,
2572 				    "tl_bind:requested addr is busy"));
2573 				tli_err = TADDRBUSY; unix_err = 0;
2574 				goto error;
2575 			}
2576 
2577 			/*
2578 			 * O_T_BIND_REQ semantics say if address if requested
2579 			 * address is busy, bind to any available free address
2580 			 */
2581 			if (! tl_get_any_addr(tep, &addr_req)) {
2582 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2583 				    SL_TRACE|SL_ERROR,
2584 				    "tl_bind:unable to get any addr buf"));
2585 				tli_err = TSYSERR; unix_err = ENOMEM;
2586 				goto error;
2587 			}
2588 		} else {
2589 			tep->te_flag |= TL_ADDRHASHED;
2590 			tep->te_hash_hndl = NULL;
2591 		}
2592 	}
2593 
2594 	ASSERT(tep->te_alen >= 0);
2595 
2596 skip_addr_bind:
2597 	/*
2598 	 * prepare T_BIND_ACK TPI message
2599 	 */
2600 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2601 	bamp = reallocb(mp, basize, 0);
2602 	if (bamp == NULL) {
2603 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2604 		    "tl_wput:tl_bind: allocb failed"));
2605 		/*
2606 		 * roll back state changes
2607 		 */
2608 		tl_addr_unbind(tep);
2609 		tep->te_state = TS_UNBND;
2610 		tl_memrecover(wq, mp, basize);
2611 		return;
2612 	}
2613 
2614 	DB_TYPE(bamp) = M_PCPROTO;
2615 	bamp->b_wptr = bamp->b_rptr + basize;
2616 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2617 	b_ack->PRIM_type = T_BIND_ACK;
2618 	b_ack->CONIND_number = qlen;
2619 	b_ack->ADDR_length = tep->te_alen;
2620 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2621 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2622 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2623 
2624 	if (IS_COTS(tep)) {
2625 		tep->te_qlen = qlen;
2626 		if (qlen > 0)
2627 			tep->te_flag |= TL_LISTENER;
2628 	}
2629 
2630 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2631 	/*
2632 	 * send T_BIND_ACK message
2633 	 */
2634 	(void) qreply(wq, bamp);
2635 	return;
2636 
2637 error:
2638 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2639 	if (ackmp == NULL) {
2640 		/*
2641 		 * roll back state changes
2642 		 */
2643 		tep->te_state = save_state;
2644 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2645 		return;
2646 	}
2647 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2648 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2649 }
2650 
2651 /*
2652  * Process T_UNBIND_REQ.
2653  * Called from serializer.
2654  */
2655 static void
2656 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2657 {
2658 	queue_t *wq;
2659 	mblk_t *ackmp;
2660 
2661 	if (tep->te_closing) {
2662 		freemsg(mp);
2663 		return;
2664 	}
2665 
2666 	wq = tep->te_wq;
2667 
2668 	/*
2669 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2670 	 * ==> allocate for T_ERROR_ACK (known max)
2671 	 */
2672 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2673 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2674 		return;
2675 	}
2676 	/*
2677 	 * memory resources committed
2678 	 * Note: no message validation. T_UNBIND_REQ message is
2679 	 * same size as PRIM_type field so already verified earlier.
2680 	 */
2681 
2682 	/*
2683 	 * validate state
2684 	 */
2685 	if (tep->te_state != TS_IDLE) {
2686 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2687 		    SL_TRACE|SL_ERROR,
2688 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2689 		    tep->te_state));
2690 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2691 		return;
2692 	}
2693 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2694 
2695 	/*
2696 	 * TPI says on T_UNBIND_REQ:
2697 	 *    send up a M_FLUSH to flush both
2698 	 *    read and write queues
2699 	 */
2700 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2701 
2702 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2703 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2704 
2705 		/*
2706 		 * Sockets use bind with qlen==0 followed by bind() to
2707 		 * the same address with qlen > 0 for listeners.
2708 		 * We allow rebind with a new qlen value.
2709 		 */
2710 		tl_addr_unbind(tep);
2711 	}
2712 
2713 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2714 	/*
2715 	 * send  T_OK_ACK
2716 	 */
2717 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2718 }
2719 
2720 
2721 /*
2722  * Option management code from drv/ip is used here
2723  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2724  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2725  *	However, that is what we want as that option is 'unorthodox'
2726  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2727  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2728  * Note2: use of optcom_req means this routine is an exception to
2729  *	 recovery from allocb() failures.
2730  */
2731 
2732 static void
2733 tl_optmgmt(queue_t *wq, mblk_t *mp)
2734 {
2735 	tl_endpt_t *tep;
2736 	mblk_t *ackmp;
2737 	union T_primitives *prim;
2738 	cred_t *cr;
2739 
2740 	tep = (tl_endpt_t *)wq->q_ptr;
2741 	prim = (union T_primitives *)mp->b_rptr;
2742 
2743 	/*
2744 	 * All Solaris components should pass a db_credp
2745 	 * for this TPI message, hence we ASSERT.
2746 	 * But in case there is some other M_PROTO that looks
2747 	 * like a TPI message sent by some other kernel
2748 	 * component, we check and return an error.
2749 	 */
2750 	cr = msg_getcred(mp, NULL);
2751 	ASSERT(cr != NULL);
2752 	if (cr == NULL) {
2753 		tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2754 		return;
2755 	}
2756 
2757 	/*  all states OK for AF_UNIX options ? */
2758 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2759 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2760 		/*
2761 		 * Broken TLI semantics that options can only be managed
2762 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2763 		 * tests this TLI (mis)feature using this device driver.
2764 		 */
2765 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2766 		    SL_TRACE|SL_ERROR,
2767 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2768 		    tep->te_state));
2769 		/*
2770 		 * preallocate memory for T_ERROR_ACK
2771 		 */
2772 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2773 		if (! ackmp) {
2774 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2775 			return;
2776 		}
2777 
2778 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2779 		freemsg(mp);
2780 		return;
2781 	}
2782 
2783 	/*
2784 	 * call common option management routine from drv/ip
2785 	 */
2786 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2787 		svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2788 	} else {
2789 		ASSERT(prim->type == T_OPTMGMT_REQ);
2790 		tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2791 	}
2792 }
2793 
2794 /*
2795  * Handle T_conn_req - the driver part of accept().
2796  * If TL_SET[U]CRED generate the credentials options.
2797  * If this is a socket pass through options unmodified.
2798  * For sockets generate the T_CONN_CON here instead of
2799  * waiting for the T_CONN_RES.
2800  */
2801 static void
2802 tl_conn_req(queue_t *wq, mblk_t *mp)
2803 {
2804 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2805 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2806 	ssize_t			msz = MBLKL(mp);
2807 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2808 	tl_endpt_t		*peer_tep = NULL;
2809 	mblk_t			*ackmp;
2810 	mblk_t			*dimp;
2811 	struct T_discon_ind	*di;
2812 	soux_addr_t		ux_addr;
2813 	tl_addr_t		dst;
2814 
2815 	ASSERT(IS_COTS(tep));
2816 
2817 	if (tep->te_closing) {
2818 		freemsg(mp);
2819 		return;
2820 	}
2821 
2822 	/*
2823 	 * preallocate memory for:
2824 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2825 	 *	==> known max T_ERROR_ACK
2826 	 * 2. max of T_DISCON_IND and T_CONN_IND
2827 	 */
2828 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2829 	if (! ackmp) {
2830 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2831 		return;
2832 	}
2833 	/*
2834 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2835 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2836 	 */
2837 
2838 	if (tep->te_state != TS_IDLE) {
2839 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2840 		    SL_TRACE|SL_ERROR,
2841 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2842 		    tep->te_state));
2843 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2844 		freemsg(mp);
2845 		return;
2846 	}
2847 
2848 	/*
2849 	 * validate the message
2850 	 * Note: dereference fields in struct inside message only
2851 	 * after validating the message length.
2852 	 */
2853 	if (msz < sizeof (struct T_conn_req)) {
2854 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2855 		    "tl_conn_req:invalid message length"));
2856 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2857 		freemsg(mp);
2858 		return;
2859 	}
2860 	alen = creq->DEST_length;
2861 	aoff = creq->DEST_offset;
2862 	olen = creq->OPT_length;
2863 	ooff = creq->OPT_offset;
2864 	if (olen == 0)
2865 		ooff = 0;
2866 
2867 	if (IS_SOCKET(tep)) {
2868 		if ((alen != TL_SOUX_ADDRLEN) ||
2869 		    (aoff < 0) ||
2870 		    (aoff + alen > msz) ||
2871 		    (alen > msz - sizeof (struct T_conn_req))) {
2872 			(void) (STRLOG(TL_ID, tep->te_minor,
2873 				    1, SL_TRACE|SL_ERROR,
2874 				    "tl_conn_req: invalid socket addr"));
2875 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2876 			freemsg(mp);
2877 			return;
2878 		}
2879 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2880 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2881 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2882 			(void) (STRLOG(TL_ID, tep->te_minor,
2883 			    1, SL_TRACE|SL_ERROR,
2884 			    "tl_conn_req: invalid socket magic"));
2885 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2886 			freemsg(mp);
2887 			return;
2888 		}
2889 	} else {
2890 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2891 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2892 		    ooff + olen < 0)) ||
2893 		    olen < 0 || ooff < 0) {
2894 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2895 			    SL_TRACE|SL_ERROR,
2896 			    "tl_conn_req:invalid message"));
2897 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2898 			freemsg(mp);
2899 			return;
2900 		}
2901 
2902 		if (alen <= 0 || aoff < 0 ||
2903 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2904 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2905 				    SL_TRACE|SL_ERROR,
2906 				    "tl_conn_req:bad addr in message, "
2907 				    "alen=%d, msz=%ld",
2908 				    alen, msz));
2909 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2910 			freemsg(mp);
2911 			return;
2912 		}
2913 #ifdef DEBUG
2914 		/*
2915 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2916 		 * if (! assertion)
2917 		 *	log warning;
2918 		 */
2919 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2920 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2921 			    SL_TRACE|SL_ERROR,
2922 			    "tl_conn_req: addr overlaps TPI message"));
2923 		}
2924 #endif
2925 		if (olen) {
2926 			/*
2927 			 * no opts in connect req
2928 			 * supported in this provider except for sockets.
2929 			 */
2930 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2931 			    SL_TRACE|SL_ERROR,
2932 			    "tl_conn_req:options not supported "
2933 			    "in message"));
2934 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2935 			freemsg(mp);
2936 			return;
2937 		}
2938 	}
2939 
2940 	/*
2941 	 * Prevent tep from closing on us.
2942 	 */
2943 	if (! tl_noclose(tep)) {
2944 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2945 		    "tl_conn_req:endpoint is closing"));
2946 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2947 		freemsg(mp);
2948 		return;
2949 	}
2950 
2951 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2952 	/*
2953 	 * get endpoint to connect to
2954 	 * check that peer with DEST addr is bound to addr
2955 	 * and has CONIND_number > 0
2956 	 */
2957 	dst.ta_alen = alen;
2958 	dst.ta_abuf = mp->b_rptr + aoff;
2959 	dst.ta_zoneid = tep->te_zoneid;
2960 
2961 	/*
2962 	 * Verify if remote addr is in use
2963 	 */
2964 	peer_tep = (IS_SOCKET(tep) ?
2965 	    tl_sock_find_peer(tep, &ux_addr) :
2966 	    tl_find_peer(tep, &dst));
2967 
2968 	if (peer_tep == NULL) {
2969 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2970 		    "tl_conn_req:no one at connect address"));
2971 		err = ECONNREFUSED;
2972 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2973 		/*
2974 		 * validate that number of incoming connection is
2975 		 * not to capacity on destination endpoint
2976 		 */
2977 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2978 		    "tl_conn_req: qlen overflow connection refused"));
2979 			err = ECONNREFUSED;
2980 	}
2981 
2982 	/*
2983 	 * Send T_DISCON_IND in case of error
2984 	 */
2985 	if (err != 0) {
2986 		if (peer_tep != NULL)
2987 			tl_refrele(peer_tep);
2988 		/* We are still expected to send T_OK_ACK */
2989 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2990 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2991 		tl_closeok(tep);
2992 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2993 		    M_PROTO, T_DISCON_IND);
2994 		if (dimp == NULL) {
2995 			tl_merror(wq, NULL, ENOSR);
2996 			return;
2997 		}
2998 		di = (struct T_discon_ind *)dimp->b_rptr;
2999 		di->DISCON_reason = err;
3000 		di->SEQ_number = BADSEQNUM;
3001 
3002 		tep->te_state = TS_IDLE;
3003 		/*
3004 		 * send T_DISCON_IND message
3005 		 */
3006 		putnext(tep->te_rq, dimp);
3007 		return;
3008 	}
3009 
3010 	ASSERT(IS_COTS(peer_tep));
3011 
3012 	/*
3013 	 * Found the listener. At this point processing will continue on
3014 	 * listener serializer. Close of the endpoint should be blocked while we
3015 	 * switch serializers.
3016 	 */
3017 	tl_serializer_refhold(peer_tep->te_ser);
3018 	tl_serializer_refrele(tep->te_ser);
3019 	tep->te_ser = peer_tep->te_ser;
3020 	ASSERT(tep->te_oconp == NULL);
3021 	tep->te_oconp = peer_tep;
3022 
3023 	/*
3024 	 * It is safe to close now. Close may continue on listener serializer.
3025 	 */
3026 	tl_closeok(tep);
3027 
3028 	/*
3029 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3030 	 * data, so we link mp to ackmp.
3031 	 */
3032 	ackmp->b_cont = mp;
3033 	mp = ackmp;
3034 
3035 	tl_refhold(tep);
3036 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3037 }
3038 
3039 /*
3040  * Finish T_CONN_REQ processing on listener serializer.
3041  */
3042 static void
3043 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3044 {
3045 	queue_t		*wq;
3046 	tl_endpt_t	*peer_tep = tep->te_oconp;
3047 	mblk_t		*confmp, *cimp, *indmp;
3048 	void		*opts = NULL;
3049 	mblk_t		*ackmp = mp;
3050 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3051 	struct T_conn_ind	*ci;
3052 	tl_icon_t	*tip;
3053 	void		*addr_startp;
3054 	t_scalar_t	olen = creq->OPT_length;
3055 	t_scalar_t	ooff = creq->OPT_offset;
3056 	size_t 		ci_msz;
3057 	size_t		size;
3058 	cred_t		*cr = NULL;
3059 	pid_t		cpid;
3060 
3061 	if (tep->te_closing) {
3062 		TL_UNCONNECT(tep->te_oconp);
3063 		tl_serializer_exit(tep);
3064 		tl_refrele(tep);
3065 		freemsg(mp);
3066 		return;
3067 	}
3068 
3069 	wq = tep->te_wq;
3070 	tep->te_flag |= TL_EAGER;
3071 
3072 	/*
3073 	 * Extract preallocated ackmp from mp.
3074 	 */
3075 	mp = mp->b_cont;
3076 	ackmp->b_cont = NULL;
3077 
3078 	if (olen == 0)
3079 		ooff = 0;
3080 
3081 	if (peer_tep->te_closing ||
3082 	    !((peer_tep->te_state == TS_IDLE) ||
3083 	    (peer_tep->te_state == TS_WRES_CIND))) {
3084 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3085 		    "tl_conn_req:peer in bad state (%d)",
3086 		    peer_tep->te_state));
3087 		TL_UNCONNECT(tep->te_oconp);
3088 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3089 		freemsg(ackmp);
3090 		tl_serializer_exit(tep);
3091 		tl_refrele(tep);
3092 		return;
3093 	}
3094 
3095 	/*
3096 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3097 	 */
3098 	/*
3099 	 * calculate length of T_CONN_IND message
3100 	 */
3101 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3102 		cr = msg_getcred(mp, &cpid);
3103 		ASSERT(cr != NULL);
3104 		if (peer_tep->te_flag & TL_SETCRED) {
3105 			ooff = 0;
3106 			olen = (t_scalar_t) sizeof (struct opthdr) +
3107 			    OPTLEN(sizeof (tl_credopt_t));
3108 			/* 1 option only */
3109 		} else {
3110 			ooff = 0;
3111 			olen = (t_scalar_t)sizeof (struct opthdr) +
3112 			    OPTLEN(ucredminsize(cr));
3113 			/* 1 option only */
3114 		}
3115 	}
3116 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3117 	ci_msz = T_ALIGN(ci_msz) + olen;
3118 	size = max(ci_msz, sizeof (struct T_discon_ind));
3119 
3120 	/*
3121 	 * Save options from mp - we'll need them for T_CONN_IND.
3122 	 */
3123 	if (ooff != 0) {
3124 		opts = kmem_alloc(olen, KM_NOSLEEP);
3125 		if (opts == NULL) {
3126 			/*
3127 			 * roll back state changes
3128 			 */
3129 			tep->te_state = TS_IDLE;
3130 			tl_memrecover(wq, mp, size);
3131 			freemsg(ackmp);
3132 			TL_UNCONNECT(tep->te_oconp);
3133 			tl_serializer_exit(tep);
3134 			tl_refrele(tep);
3135 			return;
3136 		}
3137 		/* Copy options to a temp buffer */
3138 		bcopy(mp->b_rptr + ooff, opts, olen);
3139 	}
3140 
3141 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3142 		/*
3143 		 * Generate a T_CONN_CON that has the identical address
3144 		 * (and options) as the T_CONN_REQ.
3145 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3146 		 * are isomorphic.
3147 		 */
3148 		confmp = copyb(mp);
3149 		if (! confmp) {
3150 			/*
3151 			 * roll back state changes
3152 			 */
3153 			tep->te_state = TS_IDLE;
3154 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3155 			freemsg(ackmp);
3156 			if (opts != NULL)
3157 				kmem_free(opts, olen);
3158 			TL_UNCONNECT(tep->te_oconp);
3159 			tl_serializer_exit(tep);
3160 			tl_refrele(tep);
3161 			return;
3162 		}
3163 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3164 		    T_CONN_CON;
3165 	} else {
3166 		confmp = NULL;
3167 	}
3168 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3169 		/*
3170 		 * roll back state changes
3171 		 */
3172 		tep->te_state = TS_IDLE;
3173 		tl_memrecover(wq, mp, size);
3174 		freemsg(ackmp);
3175 		if (opts != NULL)
3176 			kmem_free(opts, olen);
3177 		freemsg(confmp);
3178 		TL_UNCONNECT(tep->te_oconp);
3179 		tl_serializer_exit(tep);
3180 		tl_refrele(tep);
3181 		return;
3182 	}
3183 
3184 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3185 	if (tip == NULL) {
3186 		/*
3187 		 * roll back state changes
3188 		 */
3189 		tep->te_state = TS_IDLE;
3190 		tl_memrecover(wq, indmp, sizeof (*tip));
3191 		freemsg(ackmp);
3192 		if (opts != NULL)
3193 			kmem_free(opts, olen);
3194 		freemsg(confmp);
3195 		TL_UNCONNECT(tep->te_oconp);
3196 		tl_serializer_exit(tep);
3197 		tl_refrele(tep);
3198 		return;
3199 	}
3200 	tip->ti_mp = NULL;
3201 
3202 	/*
3203 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3204 	 * and tl_icon_t cell.
3205 	 */
3206 
3207 	/*
3208 	 * ack validity of request and send the peer credential in the ACK.
3209 	 */
3210 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3211 
3212 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3213 	    confmp != NULL) {
3214 		mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3215 	}
3216 
3217 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3218 
3219 	/*
3220 	 * prepare message to send T_CONN_IND
3221 	 */
3222 	/*
3223 	 * allocate the message - original data blocks retained
3224 	 * in the returned mblk
3225 	 */
3226 	cimp = tl_resizemp(indmp, size);
3227 	if (! cimp) {
3228 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3229 		    "tl_conn_req:con_ind:allocb failure"));
3230 		tl_merror(wq, indmp, ENOMEM);
3231 		TL_UNCONNECT(tep->te_oconp);
3232 		tl_serializer_exit(tep);
3233 		tl_refrele(tep);
3234 		if (opts != NULL)
3235 			kmem_free(opts, olen);
3236 		freemsg(confmp);
3237 		ASSERT(tip->ti_mp == NULL);
3238 		kmem_free(tip, sizeof (*tip));
3239 		return;
3240 	}
3241 
3242 	DB_TYPE(cimp) = M_PROTO;
3243 	ci = (struct T_conn_ind *)cimp->b_rptr;
3244 	ci->PRIM_type  = T_CONN_IND;
3245 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3246 	ci->SRC_length = tep->te_alen;
3247 	ci->SEQ_number = tep->te_seqno;
3248 
3249 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3250 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3251 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3252 
3253 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3254 		    ci->SRC_length);
3255 		ci->OPT_length = olen; /* because only 1 option */
3256 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3257 		    cr, cpid,
3258 		    peer_tep->te_flag, peer_tep->te_credp);
3259 	} else if (ooff != 0) {
3260 		/* Copy option from T_CONN_REQ */
3261 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3262 		    ci->SRC_length);
3263 		ci->OPT_length = olen;
3264 		ASSERT(opts != NULL);
3265 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3266 	} else {
3267 		ci->OPT_offset = 0;
3268 		ci->OPT_length = 0;
3269 	}
3270 	if (opts != NULL)
3271 		kmem_free(opts, olen);
3272 
3273 	/*
3274 	 * register connection request with server peer
3275 	 * append to list of incoming connections
3276 	 * increment references for both peer_tep and tep: peer_tep is placed on
3277 	 * te_oconp and tep is placed on listeners queue.
3278 	 */
3279 	tip->ti_tep = tep;
3280 	tip->ti_seqno = tep->te_seqno;
3281 	list_insert_tail(&peer_tep->te_iconp, tip);
3282 	peer_tep->te_nicon++;
3283 
3284 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3285 	/*
3286 	 * send the T_CONN_IND message
3287 	 */
3288 	putnext(peer_tep->te_rq, cimp);
3289 
3290 	/*
3291 	 * Send a T_CONN_CON message for sockets.
3292 	 * Disable the queues until we have reached the correct state!
3293 	 */
3294 	if (confmp != NULL) {
3295 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3296 		noenable(wq);
3297 		putnext(tep->te_rq, confmp);
3298 	}
3299 	/*
3300 	 * Now we need to increment tep reference because tep is referenced by
3301 	 * server list of pending connections. We also need to decrement
3302 	 * reference before exiting serializer. Two operations void each other
3303 	 * so we don't modify reference at all.
3304 	 */
3305 	ASSERT(tep->te_refcnt >= 2);
3306 	ASSERT(peer_tep->te_refcnt >= 2);
3307 	tl_serializer_exit(tep);
3308 }
3309 
3310 
3311 
3312 /*
3313  * Handle T_conn_res on listener stream. Called on listener serializer.
3314  * tl_conn_req has already generated the T_CONN_CON.
3315  * tl_conn_res is called on listener serializer.
3316  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3317  * Switch eager serializer to acceptor's.
3318  *
3319  * If TL_SET[U]CRED generate the credentials options.
3320  * For sockets tl_conn_req has already generated the T_CONN_CON.
3321  */
3322 static void
3323 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3324 {
3325 	queue_t			*wq;
3326 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3327 	ssize_t			msz = MBLKL(mp);
3328 	t_scalar_t		olen, ooff, err = 0;
3329 	t_scalar_t		prim = cres->PRIM_type;
3330 	uchar_t			*addr_startp;
3331 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3332 	tl_icon_t		*tip;
3333 	size_t			size;
3334 	mblk_t			*ackmp, *respmp;
3335 	mblk_t			*dimp, *ccmp = NULL;
3336 	struct T_discon_ind	*di;
3337 	struct T_conn_con	*cc;
3338 	boolean_t		client_noclose_set = B_FALSE;
3339 	boolean_t		switch_client_serializer = B_TRUE;
3340 
3341 	ASSERT(IS_COTS(tep));
3342 
3343 	if (tep->te_closing) {
3344 		freemsg(mp);
3345 		return;
3346 	}
3347 
3348 	wq = tep->te_wq;
3349 
3350 	/*
3351 	 * preallocate memory for:
3352 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3353 	 *	==> known max T_ERROR_ACK
3354 	 * 2. max of T_DISCON_IND and T_CONN_CON
3355 	 */
3356 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3357 	if (! ackmp) {
3358 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3359 		return;
3360 	}
3361 	/*
3362 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3363 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3364 	 */
3365 
3366 
3367 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3368 
3369 	/*
3370 	 * validate state
3371 	 */
3372 	if (tep->te_state != TS_WRES_CIND) {
3373 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3374 		    SL_TRACE|SL_ERROR,
3375 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3376 		    tep->te_state));
3377 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3378 		freemsg(mp);
3379 		return;
3380 	}
3381 
3382 	/*
3383 	 * validate the message
3384 	 * Note: dereference fields in struct inside message only
3385 	 * after validating the message length.
3386 	 */
3387 	if (msz < sizeof (struct T_conn_res)) {
3388 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3389 		    "tl_conn_res:invalid message length"));
3390 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3391 		freemsg(mp);
3392 		return;
3393 	}
3394 	olen = cres->OPT_length;
3395 	ooff = cres->OPT_offset;
3396 	if (((olen > 0) && ((ooff + olen) > msz))) {
3397 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3398 		    "tl_conn_res:invalid message"));
3399 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3400 		freemsg(mp);
3401 		return;
3402 	}
3403 	if (olen) {
3404 		/*
3405 		 * no opts in connect res
3406 		 * supported in this provider
3407 		 */
3408 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3409 		    "tl_conn_res:options not supported in message"));
3410 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3411 		freemsg(mp);
3412 		return;
3413 	}
3414 
3415 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3416 	ASSERT(tep->te_state == TS_WACK_CRES);
3417 
3418 	if (cres->SEQ_number < TL_MINOR_START &&
3419 	    cres->SEQ_number >= BADSEQNUM) {
3420 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3421 		    "tl_conn_res:remote endpoint sequence number bad"));
3422 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3423 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3424 		freemsg(mp);
3425 		return;
3426 	}
3427 
3428 	/*
3429 	 * find accepting endpoint. Will have extra reference if found.
3430 	 */
3431 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3432 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3433 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3434 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3435 		    "tl_conn_res:bad accepting endpoint"));
3436 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3437 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3438 		freemsg(mp);
3439 		return;
3440 	}
3441 
3442 	/*
3443 	 * Prevent acceptor from closing.
3444 	 */
3445 	if (! tl_noclose(acc_ep)) {
3446 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3447 		    "tl_conn_res:bad accepting endpoint"));
3448 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3449 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3450 		tl_refrele(acc_ep);
3451 		freemsg(mp);
3452 		return;
3453 	}
3454 
3455 	acc_ep->te_flag |= TL_ACCEPTOR;
3456 
3457 	/*
3458 	 * validate that accepting endpoint, if different from listening
3459 	 * has address bound => state is TS_IDLE
3460 	 * TROUBLE in XPG4 !!?
3461 	 */
3462 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3463 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3464 		    "tl_conn_res:accepting endpoint has no address bound,"
3465 		    "state=%d", acc_ep->te_state));
3466 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3467 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3468 		freemsg(mp);
3469 		tl_closeok(acc_ep);
3470 		tl_refrele(acc_ep);
3471 		return;
3472 	}
3473 
3474 	/*
3475 	 * validate if accepting endpt same as listening, then
3476 	 * no other incoming connection should be on the queue
3477 	 */
3478 
3479 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3480 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3481 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3482 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3483 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3484 		freemsg(mp);
3485 		tl_closeok(acc_ep);
3486 		tl_refrele(acc_ep);
3487 		return;
3488 	}
3489 
3490 	/*
3491 	 * Mark for deletion, the entry corresponding to client
3492 	 * on list of pending connections made by the listener
3493 	 *  search list to see if client is one of the
3494 	 * recorded as a listener.
3495 	 */
3496 	tip = tl_icon_find(tep, cres->SEQ_number);
3497 	if (tip == NULL) {
3498 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3499 		    "tl_conn_res:no client in listener list"));
3500 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3501 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3502 		freemsg(mp);
3503 		tl_closeok(acc_ep);
3504 		tl_refrele(acc_ep);
3505 		return;
3506 	}
3507 
3508 	/*
3509 	 * If ti_tep is NULL the client has already closed. In this case
3510 	 * the code below will avoid any action on the client side
3511 	 * but complete the server and acceptor state transitions.
3512 	 */
3513 	ASSERT(tip->ti_tep == NULL ||
3514 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3515 	cl_ep = tip->ti_tep;
3516 
3517 	/*
3518 	 * If the client is present it is switched from listener's to acceptor's
3519 	 * serializer. We should block client closes while serializers are
3520 	 * being switched.
3521 	 *
3522 	 * It is possible that the client is present but is currently being
3523 	 * closed. There are two possible cases:
3524 	 *
3525 	 * 1) The client has already entered tl_close_finish_ser() and sent
3526 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3527 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3528 	 *
3529 	 * 2) The client started the close but has not entered
3530 	 *    tl_close_finish_ser() yet. In this case, the client is already
3531 	 *    proceeding asynchronously on the listener's serializer, so we're
3532 	 *    forced to change the acceptor to use the listener's serializer to
3533 	 *    ensure that any operations on the acceptor are serialized with
3534 	 *    respect to the close that's in-progress.
3535 	 */
3536 	if (cl_ep != NULL) {
3537 		if (tl_noclose(cl_ep)) {
3538 			client_noclose_set = B_TRUE;
3539 		} else {
3540 			/*
3541 			 * Client is closing. If it it has sent the
3542 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3543 			 * we have to let let the client continue until it is
3544 			 * sent.
3545 			 *
3546 			 * If we do continue using the client, acceptor will
3547 			 * switch to client's serializer which is used by client
3548 			 * for its close.
3549 			 */
3550 			tl_client_closing_when_accepting++;
3551 			switch_client_serializer = B_FALSE;
3552 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3553 			    cl_ep->te_state == -1)
3554 				cl_ep = NULL;
3555 		}
3556 	}
3557 
3558 	if (cl_ep != NULL) {
3559 		/*
3560 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3561 		 * (latter for sockets only)
3562 		 */
3563 		if (cl_ep->te_state != TS_WCON_CREQ &&
3564 		    (cl_ep->te_state != TS_DATA_XFER &&
3565 		    IS_SOCKET(cl_ep))) {
3566 			err = ECONNREFUSED;
3567 			/*
3568 			 * T_DISCON_IND sent later after committing memory
3569 			 * and acking validity of request
3570 			 */
3571 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3572 			    "tl_conn_res:peer in bad state"));
3573 		}
3574 
3575 		/*
3576 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3577 		 * ack validity of request (T_OK_ACK) after memory committed
3578 		 */
3579 
3580 		if (err)
3581 			size = sizeof (struct T_discon_ind);
3582 		else {
3583 			/*
3584 			 * calculate length of T_CONN_CON message
3585 			 */
3586 			olen = 0;
3587 			if (cl_ep->te_flag & TL_SETCRED) {
3588 				olen = (t_scalar_t)sizeof (struct opthdr) +
3589 				    OPTLEN(sizeof (tl_credopt_t));
3590 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3591 				olen = (t_scalar_t)sizeof (struct opthdr) +
3592 				    OPTLEN(ucredminsize(acc_ep->te_credp));
3593 			}
3594 			size = T_ALIGN(sizeof (struct T_conn_con) +
3595 			    acc_ep->te_alen) + olen;
3596 		}
3597 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3598 			/*
3599 			 * roll back state changes
3600 			 */
3601 			tep->te_state = TS_WRES_CIND;
3602 			tl_memrecover(wq, mp, size);
3603 			freemsg(ackmp);
3604 			if (client_noclose_set)
3605 				tl_closeok(cl_ep);
3606 			tl_closeok(acc_ep);
3607 			tl_refrele(acc_ep);
3608 			return;
3609 		}
3610 		mp = NULL;
3611 	}
3612 
3613 	/*
3614 	 * Now ack validity of request
3615 	 */
3616 	if (tep->te_nicon == 1) {
3617 		if (tep == acc_ep)
3618 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3619 		else
3620 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3621 	} else
3622 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3623 
3624 	/*
3625 	 * send T_DISCON_IND now if client state validation failed earlier
3626 	 */
3627 	if (err) {
3628 		tl_ok_ack(wq, ackmp, prim);
3629 		/*
3630 		 * flush the queues - why always ?
3631 		 */
3632 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3633 
3634 		dimp = tl_resizemp(respmp, size);
3635 		if (! dimp) {
3636 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3637 			    SL_TRACE|SL_ERROR,
3638 			    "tl_conn_res:con_ind:allocb failure"));
3639 			tl_merror(wq, respmp, ENOMEM);
3640 			tl_closeok(acc_ep);
3641 			if (client_noclose_set)
3642 				tl_closeok(cl_ep);
3643 			tl_refrele(acc_ep);
3644 			return;
3645 		}
3646 		if (dimp->b_cont) {
3647 			/* no user data in provider generated discon ind */
3648 			freemsg(dimp->b_cont);
3649 			dimp->b_cont = NULL;
3650 		}
3651 
3652 		DB_TYPE(dimp) = M_PROTO;
3653 		di = (struct T_discon_ind *)dimp->b_rptr;
3654 		di->PRIM_type  = T_DISCON_IND;
3655 		di->DISCON_reason = err;
3656 		di->SEQ_number = BADSEQNUM;
3657 
3658 		tep->te_state = TS_IDLE;
3659 		/*
3660 		 * send T_DISCON_IND message
3661 		 */
3662 		putnext(acc_ep->te_rq, dimp);
3663 		if (client_noclose_set)
3664 			tl_closeok(cl_ep);
3665 		tl_closeok(acc_ep);
3666 		tl_refrele(acc_ep);
3667 		return;
3668 	}
3669 
3670 	/*
3671 	 * now start connecting the accepting endpoint
3672 	 */
3673 	if (tep != acc_ep)
3674 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3675 
3676 	if (cl_ep == NULL) {
3677 		/*
3678 		 * The client has already closed. Send up any queued messages
3679 		 * and change the state accordingly.
3680 		 */
3681 		tl_ok_ack(wq, ackmp, prim);
3682 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3683 
3684 		/*
3685 		 * remove endpoint from incoming connection
3686 		 * delete client from list of incoming connections
3687 		 */
3688 		tl_freetip(tep, tip);
3689 		freemsg(mp);
3690 		tl_closeok(acc_ep);
3691 		tl_refrele(acc_ep);
3692 		return;
3693 	} else if (tip->ti_mp != NULL) {
3694 		/*
3695 		 * The client could have queued a T_DISCON_IND which needs
3696 		 * to be sent up.
3697 		 * Note that t_discon_req can not operate the same as
3698 		 * t_data_req since it is not possible for it to putbq
3699 		 * the message and return -1 due to the use of qwriter.
3700 		 */
3701 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3702 	}
3703 
3704 	/*
3705 	 * prepare connect confirm T_CONN_CON message
3706 	 */
3707 
3708 	/*
3709 	 * allocate the message - original data blocks
3710 	 * retained in the returned mblk
3711 	 */
3712 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3713 		ccmp = tl_resizemp(respmp, size);
3714 		if (ccmp == NULL) {
3715 			tl_ok_ack(wq, ackmp, prim);
3716 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3717 			    SL_TRACE|SL_ERROR,
3718 			    "tl_conn_res:conn_con:allocb failure"));
3719 			tl_merror(wq, respmp, ENOMEM);
3720 			tl_closeok(acc_ep);
3721 			if (client_noclose_set)
3722 				tl_closeok(cl_ep);
3723 			tl_refrele(acc_ep);
3724 			return;
3725 		}
3726 
3727 		DB_TYPE(ccmp) = M_PROTO;
3728 		cc = (struct T_conn_con *)ccmp->b_rptr;
3729 		cc->PRIM_type  = T_CONN_CON;
3730 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3731 		cc->RES_length = acc_ep->te_alen;
3732 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3733 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3734 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3735 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3736 			    cc->RES_length);
3737 			cc->OPT_length = olen;
3738 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3739 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3740 			    cl_ep->te_credp);
3741 		} else {
3742 			cc->OPT_offset = 0;
3743 			cc->OPT_length = 0;
3744 		}
3745 		/*
3746 		 * Forward the credential in the packet so it can be picked up
3747 		 * at the higher layers for more complete credential processing
3748 		 */
3749 		mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3750 	} else {
3751 		freemsg(respmp);
3752 		respmp = NULL;
3753 	}
3754 
3755 	/*
3756 	 * make connection linking
3757 	 * accepting and client endpoints
3758 	 * No need to increment references:
3759 	 *	on client: it should already have one from tip->ti_tep linkage.
3760 	 *	on acceptor is should already have one from the table lookup.
3761 	 *
3762 	 * At this point both client and acceptor can't close. Set client
3763 	 * serializer to acceptor's.
3764 	 */
3765 	ASSERT(cl_ep->te_refcnt >= 2);
3766 	ASSERT(acc_ep->te_refcnt >= 2);
3767 	ASSERT(cl_ep->te_conp == NULL);
3768 	ASSERT(acc_ep->te_conp == NULL);
3769 	cl_ep->te_conp = acc_ep;
3770 	acc_ep->te_conp = cl_ep;
3771 	ASSERT(cl_ep->te_ser == tep->te_ser);
3772 	if (switch_client_serializer) {
3773 		mutex_enter(&cl_ep->te_ser_lock);
3774 		if (cl_ep->te_ser_count > 0) {
3775 			switch_client_serializer = B_FALSE;
3776 			tl_serializer_noswitch++;
3777 		} else {
3778 			/*
3779 			 * Move client to the acceptor's serializer.
3780 			 */
3781 			tl_serializer_refhold(acc_ep->te_ser);
3782 			tl_serializer_refrele(cl_ep->te_ser);
3783 			cl_ep->te_ser = acc_ep->te_ser;
3784 		}
3785 		mutex_exit(&cl_ep->te_ser_lock);
3786 	}
3787 	if (!switch_client_serializer) {
3788 		/*
3789 		 * It is not possible to switch client to use acceptor's.
3790 		 * Move acceptor to client's serializer (which is the same as
3791 		 * listener's).
3792 		 */
3793 		tl_serializer_refhold(cl_ep->te_ser);
3794 		tl_serializer_refrele(acc_ep->te_ser);
3795 		acc_ep->te_ser = cl_ep->te_ser;
3796 	}
3797 
3798 	TL_REMOVE_PEER(cl_ep->te_oconp);
3799 	TL_REMOVE_PEER(acc_ep->te_oconp);
3800 
3801 	/*
3802 	 * remove endpoint from incoming connection
3803 	 * delete client from list of incoming connections
3804 	 */
3805 	tip->ti_tep = NULL;
3806 	tl_freetip(tep, tip);
3807 	tl_ok_ack(wq, ackmp, prim);
3808 
3809 	/*
3810 	 * data blocks already linked in reallocb()
3811 	 */
3812 
3813 	/*
3814 	 * link queues so that I_SENDFD will work
3815 	 */
3816 	if (! IS_SOCKET(tep)) {
3817 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3818 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3819 	}
3820 
3821 	/*
3822 	 * send T_CONN_CON up on client side unless it was already
3823 	 * done (for a socket). In cases any data or ordrel req has been
3824 	 * queued make sure that the service procedure runs.
3825 	 */
3826 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3827 		enableok(cl_ep->te_wq);
3828 		TL_QENABLE(cl_ep);
3829 		if (ccmp != NULL)
3830 			freemsg(ccmp);
3831 	} else {
3832 		/*
3833 		 * change client state on TE_CONN_CON event
3834 		 */
3835 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3836 		putnext(cl_ep->te_rq, ccmp);
3837 	}
3838 
3839 	/* Mark the both endpoints as accepted */
3840 	cl_ep->te_flag |= TL_ACCEPTED;
3841 	acc_ep->te_flag |= TL_ACCEPTED;
3842 
3843 	/*
3844 	 * Allow client and acceptor to close.
3845 	 */
3846 	tl_closeok(acc_ep);
3847 	if (client_noclose_set)
3848 		tl_closeok(cl_ep);
3849 }
3850 
3851 
3852 
3853 
3854 static void
3855 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3856 {
3857 	queue_t			*wq;
3858 	struct T_discon_req	*dr;
3859 	ssize_t			msz;
3860 	tl_endpt_t		*peer_tep = tep->te_conp;
3861 	tl_endpt_t		*srv_tep = tep->te_oconp;
3862 	tl_icon_t		*tip;
3863 	size_t			size;
3864 	mblk_t			*ackmp, *dimp, *respmp;
3865 	struct T_discon_ind	*di;
3866 	t_scalar_t		save_state, new_state;
3867 
3868 	if (tep->te_closing) {
3869 		freemsg(mp);
3870 		return;
3871 	}
3872 
3873 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3874 		TL_UNCONNECT(tep->te_conp);
3875 		peer_tep = NULL;
3876 	}
3877 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3878 		TL_UNCONNECT(tep->te_oconp);
3879 		srv_tep = NULL;
3880 	}
3881 
3882 	wq = tep->te_wq;
3883 
3884 	/*
3885 	 * preallocate memory for:
3886 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3887 	 *	==> known max T_ERROR_ACK
3888 	 * 2. for  T_DISCON_IND
3889 	 */
3890 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3891 	if (! ackmp) {
3892 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3893 		return;
3894 	}
3895 	/*
3896 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3897 	 * will be committed for T_DISCON_IND  later
3898 	 */
3899 
3900 	dr = (struct T_discon_req *)mp->b_rptr;
3901 	msz = MBLKL(mp);
3902 
3903 	/*
3904 	 * validate the state
3905 	 */
3906 	save_state = new_state = tep->te_state;
3907 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3908 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3909 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3910 		    SL_TRACE|SL_ERROR,
3911 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
3912 		    tep->te_state));
3913 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3914 		freemsg(mp);
3915 		return;
3916 	}
3917 	/*
3918 	 * Defer committing the state change until it is determined if
3919 	 * the message will be queued with the tl_icon or not.
3920 	 */
3921 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3922 
3923 	/* validate the message */
3924 	if (msz < sizeof (struct T_discon_req)) {
3925 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3926 		    "tl_discon_req:invalid message"));
3927 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3928 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3929 		freemsg(mp);
3930 		return;
3931 	}
3932 
3933 	/*
3934 	 * if server, then validate that client exists
3935 	 * by connection sequence number etc.
3936 	 */
3937 	if (tep->te_nicon > 0) { /* server */
3938 
3939 		/*
3940 		 * search server list for disconnect client
3941 		 */
3942 		tip = tl_icon_find(tep, dr->SEQ_number);
3943 		if (tip == NULL) {
3944 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3945 			    SL_TRACE|SL_ERROR,
3946 			    "tl_discon_req:no disconnect endpoint"));
3947 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3948 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3949 			freemsg(mp);
3950 			return;
3951 		}
3952 		/*
3953 		 * If ti_tep is NULL the client has already closed. In this case
3954 		 * the code below will avoid any action on the client side.
3955 		 */
3956 
3957 		IMPLY(tip->ti_tep != NULL,
3958 		    tip->ti_tep->te_seqno == dr->SEQ_number);
3959 		peer_tep = tip->ti_tep;
3960 	}
3961 
3962 	/*
3963 	 * preallocate now for T_DISCON_IND
3964 	 * ack validity of request (T_OK_ACK) after memory committed
3965 	 */
3966 	size = sizeof (struct T_discon_ind);
3967 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3968 		tl_memrecover(wq, mp, size);
3969 		freemsg(ackmp);
3970 		return;
3971 	}
3972 
3973 	/*
3974 	 * prepare message to ack validity of request
3975 	 */
3976 	if (tep->te_nicon == 0)
3977 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3978 	else
3979 		if (tep->te_nicon == 1)
3980 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3981 		else
3982 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3983 
3984 	/*
3985 	 * Flushing queues according to TPI. Using the old state.
3986 	 */
3987 	if ((tep->te_nicon <= 1) &&
3988 	    ((save_state == TS_DATA_XFER) ||
3989 	    (save_state == TS_WIND_ORDREL) ||
3990 	    (save_state == TS_WREQ_ORDREL)))
3991 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3992 
3993 	/* send T_OK_ACK up  */
3994 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3995 
3996 	/*
3997 	 * now do disconnect business
3998 	 */
3999 	if (tep->te_nicon > 0) { /* listener */
4000 		if (peer_tep != NULL && !peer_tep->te_closing) {
4001 			/*
4002 			 * disconnect incoming connect request pending to tep
4003 			 */
4004 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4005 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
4006 				    SL_TRACE|SL_ERROR,
4007 				    "tl_discon_req: reallocb failed"));
4008 				tep->te_state = new_state;
4009 				tl_merror(wq, respmp, ENOMEM);
4010 				return;
4011 			}
4012 			di = (struct T_discon_ind *)dimp->b_rptr;
4013 			di->SEQ_number = BADSEQNUM;
4014 			save_state = peer_tep->te_state;
4015 			peer_tep->te_state = TS_IDLE;
4016 
4017 			TL_REMOVE_PEER(peer_tep->te_oconp);
4018 			enableok(peer_tep->te_wq);
4019 			TL_QENABLE(peer_tep);
4020 		} else {
4021 			freemsg(respmp);
4022 			dimp = NULL;
4023 		}
4024 
4025 		/*
4026 		 * remove endpoint from incoming connection list
4027 		 * - remove disconnect client from list on server
4028 		 */
4029 		tl_freetip(tep, tip);
4030 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4031 		/*
4032 		 * disconnect an outgoing request pending from tep
4033 		 */
4034 
4035 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4036 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4037 			    SL_TRACE|SL_ERROR,
4038 			    "tl_discon_req: reallocb failed"));
4039 			tep->te_state = new_state;
4040 			tl_merror(wq, respmp, ENOMEM);
4041 			return;
4042 		}
4043 		di = (struct T_discon_ind *)dimp->b_rptr;
4044 		DB_TYPE(dimp) = M_PROTO;
4045 		di->PRIM_type  = T_DISCON_IND;
4046 		di->DISCON_reason = ECONNRESET;
4047 		di->SEQ_number = tep->te_seqno;
4048 
4049 		/*
4050 		 * If this is a socket the T_DISCON_IND is queued with
4051 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4052 		 * from the list of pending connections.
4053 		 * Note that when te_oconp is set the peer better have
4054 		 * a t_connind_t for the client.
4055 		 */
4056 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4057 			/*
4058 			 * No need to check that
4059 			 * ti_tep == NULL since the T_DISCON_IND
4060 			 * takes precedence over other queued
4061 			 * messages.
4062 			 */
4063 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4064 			peer_tep = NULL;
4065 			dimp = NULL;
4066 			/*
4067 			 * Can't clear te_oconp since tl_co_unconnect needs
4068 			 * it as a hint not to free the tep.
4069 			 * Keep the state unchanged since tl_conn_res inspects
4070 			 * it.
4071 			 */
4072 			new_state = tep->te_state;
4073 		} else {
4074 			/* Found - delete it */
4075 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4076 			if (tip != NULL) {
4077 				ASSERT(tep == tip->ti_tep);
4078 				save_state = peer_tep->te_state;
4079 				if (peer_tep->te_nicon == 1)
4080 					peer_tep->te_state =
4081 					    NEXTSTATE(TE_DISCON_IND2,
4082 					    peer_tep->te_state);
4083 				else
4084 					peer_tep->te_state =
4085 					    NEXTSTATE(TE_DISCON_IND3,
4086 					    peer_tep->te_state);
4087 				tl_freetip(peer_tep, tip);
4088 			}
4089 			ASSERT(tep->te_oconp != NULL);
4090 			TL_UNCONNECT(tep->te_oconp);
4091 		}
4092 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4093 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4094 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4095 			    SL_TRACE|SL_ERROR,
4096 			    "tl_discon_req: reallocb failed"));
4097 			tep->te_state = new_state;
4098 			tl_merror(wq, respmp, ENOMEM);
4099 			return;
4100 		}
4101 		di = (struct T_discon_ind *)dimp->b_rptr;
4102 		di->SEQ_number = BADSEQNUM;
4103 
4104 		save_state = peer_tep->te_state;
4105 		peer_tep->te_state = TS_IDLE;
4106 	} else {
4107 		/* Not connected */
4108 		tep->te_state = new_state;
4109 		freemsg(respmp);
4110 		return;
4111 	}
4112 
4113 	/* Commit state changes */
4114 	tep->te_state = new_state;
4115 
4116 	if (peer_tep == NULL) {
4117 		ASSERT(dimp == NULL);
4118 		goto done;
4119 	}
4120 	/*
4121 	 * Flush queues on peer before sending up
4122 	 * T_DISCON_IND according to TPI
4123 	 */
4124 
4125 	if ((save_state == TS_DATA_XFER) ||
4126 	    (save_state == TS_WIND_ORDREL) ||
4127 	    (save_state == TS_WREQ_ORDREL))
4128 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4129 
4130 	DB_TYPE(dimp) = M_PROTO;
4131 	di->PRIM_type  = T_DISCON_IND;
4132 	di->DISCON_reason = ECONNRESET;
4133 
4134 	/*
4135 	 * data blocks already linked into dimp by reallocb()
4136 	 */
4137 	/*
4138 	 * send indication message to peer user module
4139 	 */
4140 	ASSERT(dimp != NULL);
4141 	putnext(peer_tep->te_rq, dimp);
4142 done:
4143 	if (tep->te_conp) {	/* disconnect pointers if connected */
4144 		ASSERT(! peer_tep->te_closing);
4145 
4146 		/*
4147 		 * Messages may be queued on peer's write queue
4148 		 * waiting to be processed by its write service
4149 		 * procedure. Before the pointer to the peer transport
4150 		 * structure is set to NULL, qenable the peer's write
4151 		 * queue so that the queued up messages are processed.
4152 		 */
4153 		if ((save_state == TS_DATA_XFER) ||
4154 		    (save_state == TS_WIND_ORDREL) ||
4155 		    (save_state == TS_WREQ_ORDREL))
4156 			TL_QENABLE(peer_tep);
4157 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4158 		TL_UNCONNECT(peer_tep->te_conp);
4159 		if (! IS_SOCKET(tep)) {
4160 			/*
4161 			 * unlink the streams
4162 			 */
4163 			tep->te_wq->q_next = NULL;
4164 			peer_tep->te_wq->q_next = NULL;
4165 		}
4166 		TL_UNCONNECT(tep->te_conp);
4167 	}
4168 }
4169 
4170 static void
4171 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4172 {
4173 	if (!tep->te_closing)
4174 		tl_addr_req(mp, tep);
4175 	else
4176 		freemsg(mp);
4177 
4178 	tl_serializer_exit(tep);
4179 	tl_refrele(tep);
4180 }
4181 
4182 static void
4183 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4184 {
4185 	queue_t			*wq;
4186 	size_t			ack_sz;
4187 	mblk_t			*ackmp;
4188 	struct T_addr_ack	*taa;
4189 
4190 	if (tep->te_closing) {
4191 		freemsg(mp);
4192 		return;
4193 	}
4194 
4195 	wq = tep->te_wq;
4196 
4197 	/*
4198 	 * Note: T_ADDR_REQ message has only PRIM_type field
4199 	 * so it is already validated earlier.
4200 	 */
4201 
4202 	if (IS_CLTS(tep) ||
4203 	    (tep->te_state > TS_WREQ_ORDREL) ||
4204 	    (tep->te_state < TS_DATA_XFER)) {
4205 		/*
4206 		 * Either connectionless or connection oriented but not
4207 		 * in connected data transfer state or half-closed states.
4208 		 */
4209 		ack_sz = sizeof (struct T_addr_ack);
4210 		if (tep->te_state >= TS_IDLE)
4211 			/* is bound */
4212 			ack_sz += tep->te_alen;
4213 		ackmp = reallocb(mp, ack_sz, 0);
4214 		if (ackmp == NULL) {
4215 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4216 			    SL_TRACE|SL_ERROR,
4217 			    "tl_addr_req: reallocb failed"));
4218 			tl_memrecover(wq, mp, ack_sz);
4219 			return;
4220 		}
4221 
4222 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4223 
4224 		bzero(taa, sizeof (struct T_addr_ack));
4225 
4226 		taa->PRIM_type = T_ADDR_ACK;
4227 		ackmp->b_datap->db_type = M_PCPROTO;
4228 		ackmp->b_wptr = (uchar_t *)&taa[1];
4229 
4230 		if (tep->te_state >= TS_IDLE) {
4231 			/* endpoint is bound */
4232 			taa->LOCADDR_length = tep->te_alen;
4233 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4234 
4235 			bcopy(tep->te_abuf, ackmp->b_wptr,
4236 			    tep->te_alen);
4237 			ackmp->b_wptr += tep->te_alen;
4238 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4239 		}
4240 
4241 		(void) qreply(wq, ackmp);
4242 	} else {
4243 		ASSERT(tep->te_state == TS_DATA_XFER ||
4244 		    tep->te_state == TS_WIND_ORDREL ||
4245 		    tep->te_state == TS_WREQ_ORDREL);
4246 		/* connection oriented in data transfer */
4247 		tl_connected_cots_addr_req(mp, tep);
4248 	}
4249 }
4250 
4251 
4252 static void
4253 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4254 {
4255 	tl_endpt_t		*peer_tep;
4256 	size_t			ack_sz;
4257 	mblk_t			*ackmp;
4258 	struct T_addr_ack	*taa;
4259 	uchar_t			*addr_startp;
4260 
4261 	if (tep->te_closing) {
4262 		freemsg(mp);
4263 		return;
4264 	}
4265 
4266 	ASSERT(tep->te_state >= TS_IDLE);
4267 
4268 	ack_sz = sizeof (struct T_addr_ack);
4269 	ack_sz += T_ALIGN(tep->te_alen);
4270 	peer_tep = tep->te_conp;
4271 	ack_sz += peer_tep->te_alen;
4272 
4273 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4274 	if (ackmp == NULL) {
4275 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4276 		    "tl_connected_cots_addr_req: reallocb failed"));
4277 		tl_memrecover(tep->te_wq, mp, ack_sz);
4278 		return;
4279 	}
4280 
4281 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4282 
4283 	/* endpoint is bound */
4284 	taa->LOCADDR_length = tep->te_alen;
4285 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4286 
4287 	addr_startp = (uchar_t *)&taa[1];
4288 
4289 	bcopy(tep->te_abuf, addr_startp,
4290 	    tep->te_alen);
4291 
4292 	taa->REMADDR_length = peer_tep->te_alen;
4293 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4294 	    taa->LOCADDR_length);
4295 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4296 	bcopy(peer_tep->te_abuf, addr_startp,
4297 	    peer_tep->te_alen);
4298 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4299 	    taa->REMADDR_offset + peer_tep->te_alen;
4300 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4301 
4302 	putnext(tep->te_rq, ackmp);
4303 }
4304 
4305 static void
4306 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4307 {
4308 	if (IS_CLTS(tep)) {
4309 		*ia = tl_clts_info_ack;
4310 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4311 	} else {
4312 		*ia = tl_cots_info_ack;
4313 		if (IS_COTSORD(tep))
4314 			ia->SERV_type = T_COTS_ORD;
4315 	}
4316 	ia->TIDU_size = tl_tidusz;
4317 	ia->CURRENT_state = tep->te_state;
4318 }
4319 
4320 /*
4321  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4322  * tl_wput.
4323  */
4324 static void
4325 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4326 {
4327 	mblk_t			*ackmp;
4328 	t_uscalar_t		cap_bits1;
4329 	struct T_capability_ack	*tcap;
4330 
4331 	if (tep->te_closing) {
4332 		freemsg(mp);
4333 		return;
4334 	}
4335 
4336 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4337 
4338 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4339 	    M_PCPROTO, T_CAPABILITY_ACK);
4340 	if (ackmp == NULL) {
4341 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4342 		    "tl_capability_req: reallocb failed"));
4343 		tl_memrecover(tep->te_wq, mp,
4344 		    sizeof (struct T_capability_ack));
4345 		return;
4346 	}
4347 
4348 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4349 	tcap->CAP_bits1 = 0;
4350 
4351 	if (cap_bits1 & TC1_INFO) {
4352 		tl_copy_info(&tcap->INFO_ack, tep);
4353 		tcap->CAP_bits1 |= TC1_INFO;
4354 	}
4355 
4356 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4357 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4358 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4359 	}
4360 
4361 	putnext(tep->te_rq, ackmp);
4362 }
4363 
4364 static void
4365 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4366 {
4367 	if (! tep->te_closing)
4368 		tl_info_req(mp, tep);
4369 	else
4370 		freemsg(mp);
4371 
4372 	tl_serializer_exit(tep);
4373 	tl_refrele(tep);
4374 }
4375 
4376 static void
4377 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4378 {
4379 	mblk_t *ackmp;
4380 
4381 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4382 	    M_PCPROTO, T_INFO_ACK);
4383 	if (ackmp == NULL) {
4384 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4385 		    "tl_info_req: reallocb failed"));
4386 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4387 		return;
4388 	}
4389 
4390 	/*
4391 	 * fill in T_INFO_ACK contents
4392 	 */
4393 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4394 
4395 	/*
4396 	 * send ack message
4397 	 */
4398 	putnext(tep->te_rq, ackmp);
4399 }
4400 
4401 /*
4402  * Handle M_DATA, T_data_req and T_optdata_req.
4403  * If this is a socket pass through T_optdata_req options unmodified.
4404  */
4405 static void
4406 tl_data(mblk_t *mp, tl_endpt_t *tep)
4407 {
4408 	queue_t			*wq = tep->te_wq;
4409 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4410 	ssize_t			msz = MBLKL(mp);
4411 	tl_endpt_t		*peer_tep;
4412 	queue_t			*peer_rq;
4413 	boolean_t		closing = tep->te_closing;
4414 
4415 	if (IS_CLTS(tep)) {
4416 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4417 		    SL_TRACE|SL_ERROR,
4418 		    "tl_wput:clts:unattached M_DATA"));
4419 		if (!closing) {
4420 			tl_merror(wq, mp, EPROTO);
4421 		} else {
4422 			freemsg(mp);
4423 		}
4424 		return;
4425 	}
4426 
4427 	/*
4428 	 * If the endpoint is closing it should still forward any data to the
4429 	 * peer (if it has one). If it is not allowed to forward it can just
4430 	 * free the message.
4431 	 */
4432 	if (closing &&
4433 	    (tep->te_state != TS_DATA_XFER) &&
4434 	    (tep->te_state != TS_WREQ_ORDREL)) {
4435 		freemsg(mp);
4436 		return;
4437 	}
4438 
4439 	if (DB_TYPE(mp) == M_PROTO) {
4440 		if (prim->type == T_DATA_REQ &&
4441 		    msz < sizeof (struct T_data_req)) {
4442 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4443 				SL_TRACE|SL_ERROR,
4444 				"tl_data:T_DATA_REQ:invalid message"));
4445 			if (!closing) {
4446 				tl_merror(wq, mp, EPROTO);
4447 			} else {
4448 				freemsg(mp);
4449 			}
4450 			return;
4451 		} else if (prim->type == T_OPTDATA_REQ &&
4452 		    (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4453 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4454 			    SL_TRACE|SL_ERROR,
4455 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4456 			if (!closing) {
4457 				tl_merror(wq, mp, EPROTO);
4458 			} else {
4459 				freemsg(mp);
4460 			}
4461 			return;
4462 		}
4463 	}
4464 
4465 	/*
4466 	 * connection oriented provider
4467 	 */
4468 	switch (tep->te_state) {
4469 	case TS_IDLE:
4470 		/*
4471 		 * Other end not here - do nothing.
4472 		 */
4473 		freemsg(mp);
4474 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4475 		    "tl_data:cots with endpoint idle"));
4476 		return;
4477 
4478 	case TS_DATA_XFER:
4479 		/* valid states */
4480 		if (tep->te_conp != NULL)
4481 			break;
4482 
4483 		if (tep->te_oconp == NULL) {
4484 			if (!closing) {
4485 				tl_merror(wq, mp, EPROTO);
4486 			} else {
4487 				freemsg(mp);
4488 			}
4489 			return;
4490 		}
4491 		/*
4492 		 * For a socket the T_CONN_CON is sent early thus
4493 		 * the peer might not yet have accepted the connection.
4494 		 * If we are closing queue the packet with the T_CONN_IND.
4495 		 * Otherwise defer processing the packet until the peer
4496 		 * accepts the connection.
4497 		 * Note that the queue is noenabled when we go into this
4498 		 * state.
4499 		 */
4500 		if (!closing) {
4501 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4502 			    SL_TRACE|SL_ERROR,
4503 			    "tl_data: ocon"));
4504 			TL_PUTBQ(tep, mp);
4505 			return;
4506 		}
4507 		if (DB_TYPE(mp) == M_PROTO) {
4508 			if (msz < sizeof (t_scalar_t)) {
4509 				freemsg(mp);
4510 				return;
4511 			}
4512 			/* reuse message block - just change REQ to IND */
4513 			if (prim->type == T_DATA_REQ)
4514 				prim->type = T_DATA_IND;
4515 			else
4516 				prim->type = T_OPTDATA_IND;
4517 		}
4518 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4519 		return;
4520 
4521 	case TS_WREQ_ORDREL:
4522 		if (tep->te_conp == NULL) {
4523 			/*
4524 			 * Other end closed - generate discon_ind
4525 			 * with reason 0 to cause an EPIPE but no
4526 			 * read side error on AF_UNIX sockets.
4527 			 */
4528 			freemsg(mp);
4529 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4530 			    SL_TRACE|SL_ERROR,
4531 			    "tl_data: WREQ_ORDREL and no peer"));
4532 			tl_discon_ind(tep, 0);
4533 			return;
4534 		}
4535 		break;
4536 
4537 	default:
4538 		/* invalid state for event TE_DATA_REQ */
4539 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4540 		    "tl_data:cots:out of state"));
4541 		tl_merror(wq, mp, EPROTO);
4542 		return;
4543 	}
4544 	/*
4545 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4546 	 * (State stays same on this event)
4547 	 */
4548 
4549 	/*
4550 	 * get connected endpoint
4551 	 */
4552 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4553 		freemsg(mp);
4554 		/* Peer closed */
4555 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4556 		    "tl_data: peer gone"));
4557 		return;
4558 	}
4559 
4560 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4561 	peer_rq = peer_tep->te_rq;
4562 
4563 	/*
4564 	 * Put it back if flow controlled
4565 	 * Note: Messages already on queue when we are closing is bounded
4566 	 * so we can ignore flow control.
4567 	 */
4568 	if (!canputnext(peer_rq) && !closing) {
4569 		TL_PUTBQ(tep, mp);
4570 		return;
4571 	}
4572 
4573 	/*
4574 	 * validate peer state
4575 	 */
4576 	switch (peer_tep->te_state) {
4577 	case TS_DATA_XFER:
4578 	case TS_WIND_ORDREL:
4579 		/* valid states */
4580 		break;
4581 	default:
4582 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4583 		    "tl_data:rx side:invalid state"));
4584 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4585 		return;
4586 	}
4587 	if (DB_TYPE(mp) == M_PROTO) {
4588 		/* reuse message block - just change REQ to IND */
4589 		if (prim->type == T_DATA_REQ)
4590 			prim->type = T_DATA_IND;
4591 		else
4592 			prim->type = T_OPTDATA_IND;
4593 	}
4594 	/*
4595 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4596 	 * (peer state stays same on this event)
4597 	 */
4598 	/*
4599 	 * send data to connected peer
4600 	 */
4601 	putnext(peer_rq, mp);
4602 }
4603 
4604 
4605 
4606 static void
4607 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4608 {
4609 	queue_t			*wq = tep->te_wq;
4610 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4611 	ssize_t			msz = MBLKL(mp);
4612 	tl_endpt_t		*peer_tep;
4613 	queue_t			*peer_rq;
4614 	boolean_t		closing = tep->te_closing;
4615 
4616 	if (msz < sizeof (struct T_exdata_req)) {
4617 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4618 		    "tl_exdata:invalid message"));
4619 		if (!closing) {
4620 			tl_merror(wq, mp, EPROTO);
4621 		} else {
4622 			freemsg(mp);
4623 		}
4624 		return;
4625 	}
4626 
4627 	/*
4628 	 * If the endpoint is closing it should still forward any data to the
4629 	 * peer (if it has one). If it is not allowed to forward it can just
4630 	 * free the message.
4631 	 */
4632 	if (closing &&
4633 	    (tep->te_state != TS_DATA_XFER) &&
4634 	    (tep->te_state != TS_WREQ_ORDREL)) {
4635 		freemsg(mp);
4636 		return;
4637 	}
4638 
4639 	/*
4640 	 * validate state
4641 	 */
4642 	switch (tep->te_state) {
4643 	case TS_IDLE:
4644 		/*
4645 		 * Other end not here - do nothing.
4646 		 */
4647 		freemsg(mp);
4648 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4649 		    "tl_exdata:cots with endpoint idle"));
4650 		return;
4651 
4652 	case TS_DATA_XFER:
4653 		/* valid states */
4654 		if (tep->te_conp != NULL)
4655 			break;
4656 
4657 		if (tep->te_oconp == NULL) {
4658 			if (!closing) {
4659 				tl_merror(wq, mp, EPROTO);
4660 			} else {
4661 				freemsg(mp);
4662 			}
4663 			return;
4664 		}
4665 		/*
4666 		 * For a socket the T_CONN_CON is sent early thus
4667 		 * the peer might not yet have accepted the connection.
4668 		 * If we are closing queue the packet with the T_CONN_IND.
4669 		 * Otherwise defer processing the packet until the peer
4670 		 * accepts the connection.
4671 		 * Note that the queue is noenabled when we go into this
4672 		 * state.
4673 		 */
4674 		if (!closing) {
4675 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4676 			    SL_TRACE|SL_ERROR,
4677 			    "tl_exdata: ocon"));
4678 			TL_PUTBQ(tep, mp);
4679 			return;
4680 		}
4681 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4682 		    "tl_exdata: closing socket ocon"));
4683 		prim->type = T_EXDATA_IND;
4684 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4685 		return;
4686 
4687 	case TS_WREQ_ORDREL:
4688 		if (tep->te_conp == NULL) {
4689 			/*
4690 			 * Other end closed - generate discon_ind
4691 			 * with reason 0 to cause an EPIPE but no
4692 			 * read side error on AF_UNIX sockets.
4693 			 */
4694 			freemsg(mp);
4695 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4696 			    SL_TRACE|SL_ERROR,
4697 			    "tl_exdata: WREQ_ORDREL and no peer"));
4698 			tl_discon_ind(tep, 0);
4699 			return;
4700 		}
4701 		break;
4702 
4703 	default:
4704 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4705 		    SL_TRACE|SL_ERROR,
4706 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4707 		    tep->te_state));
4708 		tl_merror(wq, mp, EPROTO);
4709 		return;
4710 	}
4711 	/*
4712 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4713 	 * (state stays same on this event)
4714 	 */
4715 
4716 	/*
4717 	 * get connected endpoint
4718 	 */
4719 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4720 		freemsg(mp);
4721 		/* Peer closed */
4722 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4723 		    "tl_exdata: peer gone"));
4724 		return;
4725 	}
4726 
4727 	peer_rq = peer_tep->te_rq;
4728 
4729 	/*
4730 	 * Put it back if flow controlled
4731 	 * Note: Messages already on queue when we are closing is bounded
4732 	 * so we can ignore flow control.
4733 	 */
4734 	if (!canputnext(peer_rq) && !closing) {
4735 		TL_PUTBQ(tep, mp);
4736 		return;
4737 	}
4738 
4739 	/*
4740 	 * validate state on peer
4741 	 */
4742 	switch (peer_tep->te_state) {
4743 	case TS_DATA_XFER:
4744 	case TS_WIND_ORDREL:
4745 		/* valid states */
4746 		break;
4747 	default:
4748 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4749 		    "tl_exdata:rx side:invalid state"));
4750 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4751 		return;
4752 	}
4753 	/*
4754 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4755 	 * (peer state stays same on this event)
4756 	 */
4757 	/*
4758 	 * reuse message block
4759 	 */
4760 	prim->type = T_EXDATA_IND;
4761 
4762 	/*
4763 	 * send data to connected peer
4764 	 */
4765 	putnext(peer_rq, mp);
4766 }
4767 
4768 
4769 
4770 static void
4771 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4772 {
4773 	queue_t			*wq =  tep->te_wq;
4774 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4775 	ssize_t			msz = MBLKL(mp);
4776 	tl_endpt_t		*peer_tep;
4777 	queue_t			*peer_rq;
4778 	boolean_t		closing = tep->te_closing;
4779 
4780 	if (msz < sizeof (struct T_ordrel_req)) {
4781 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4782 		    "tl_ordrel:invalid message"));
4783 		if (!closing) {
4784 			tl_merror(wq, mp, EPROTO);
4785 		} else {
4786 			freemsg(mp);
4787 		}
4788 		return;
4789 	}
4790 
4791 	/*
4792 	 * validate state
4793 	 */
4794 	switch (tep->te_state) {
4795 	case TS_DATA_XFER:
4796 	case TS_WREQ_ORDREL:
4797 		/* valid states */
4798 		if (tep->te_conp != NULL)
4799 			break;
4800 
4801 		if (tep->te_oconp == NULL)
4802 			break;
4803 
4804 		/*
4805 		 * For a socket the T_CONN_CON is sent early thus
4806 		 * the peer might not yet have accepted the connection.
4807 		 * If we are closing queue the packet with the T_CONN_IND.
4808 		 * Otherwise defer processing the packet until the peer
4809 		 * accepts the connection.
4810 		 * Note that the queue is noenabled when we go into this
4811 		 * state.
4812 		 */
4813 		if (!closing) {
4814 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4815 			    SL_TRACE|SL_ERROR,
4816 			    "tl_ordlrel: ocon"));
4817 			TL_PUTBQ(tep, mp);
4818 			return;
4819 		}
4820 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4821 		    "tl_ordlrel: closing socket ocon"));
4822 		prim->type = T_ORDREL_IND;
4823 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4824 		return;
4825 
4826 	default:
4827 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4828 		    SL_TRACE|SL_ERROR,
4829 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4830 		    tep->te_state));
4831 		if (!closing) {
4832 			tl_merror(wq, mp, EPROTO);
4833 		} else {
4834 			freemsg(mp);
4835 		}
4836 		return;
4837 	}
4838 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4839 
4840 	/*
4841 	 * get connected endpoint
4842 	 */
4843 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4844 		/* Peer closed */
4845 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4846 		    "tl_ordrel: peer gone"));
4847 		freemsg(mp);
4848 		return;
4849 	}
4850 
4851 	peer_rq = peer_tep->te_rq;
4852 
4853 	/*
4854 	 * Put it back if flow controlled except when we are closing.
4855 	 * Note: Messages already on queue when we are closing is bounded
4856 	 * so we can ignore flow control.
4857 	 */
4858 	if (! canputnext(peer_rq) && !closing) {
4859 		TL_PUTBQ(tep, mp);
4860 		return;
4861 	}
4862 
4863 	/*
4864 	 * validate state on peer
4865 	 */
4866 	switch (peer_tep->te_state) {
4867 	case TS_DATA_XFER:
4868 	case TS_WIND_ORDREL:
4869 		/* valid states */
4870 		break;
4871 	default:
4872 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4873 		    "tl_ordrel:rx side:invalid state"));
4874 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4875 		return;
4876 	}
4877 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4878 
4879 	/*
4880 	 * reuse message block
4881 	 */
4882 	prim->type = T_ORDREL_IND;
4883 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4884 	    "tl_ordrel: send ordrel_ind"));
4885 
4886 	/*
4887 	 * send data to connected peer
4888 	 */
4889 	putnext(peer_rq, mp);
4890 }
4891 
4892 
4893 /*
4894  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4895  */
4896 static void
4897 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4898 {
4899 	size_t			err_sz;
4900 	tl_endpt_t		*tep;
4901 	struct T_unitdata_req	*udreq;
4902 	mblk_t			*err_mp;
4903 	t_scalar_t		alen;
4904 	t_scalar_t		olen;
4905 	struct T_uderror_ind	*uderr;
4906 	uchar_t			*addr_startp;
4907 
4908 	err_sz = sizeof (struct T_uderror_ind);
4909 	tep = (tl_endpt_t *)wq->q_ptr;
4910 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4911 	alen = udreq->DEST_length;
4912 	olen = udreq->OPT_length;
4913 
4914 	if (alen > 0)
4915 		err_sz = T_ALIGN(err_sz + alen);
4916 	if (olen > 0)
4917 		err_sz += olen;
4918 
4919 	err_mp = allocb(err_sz, BPRI_MED);
4920 	if (! err_mp) {
4921 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4922 		    "tl_uderr:allocb failure"));
4923 		/*
4924 		 * Note: no rollback of state needed as it does
4925 		 * not change in connectionless transport
4926 		 */
4927 		tl_memrecover(wq, mp, err_sz);
4928 		return;
4929 	}
4930 
4931 	DB_TYPE(err_mp) = M_PROTO;
4932 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4933 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4934 	uderr->PRIM_type = T_UDERROR_IND;
4935 	uderr->ERROR_type = err;
4936 	uderr->DEST_length = alen;
4937 	uderr->OPT_length = olen;
4938 	if (alen <= 0) {
4939 		uderr->DEST_offset = 0;
4940 	} else {
4941 		uderr->DEST_offset =
4942 		    (t_scalar_t)sizeof (struct T_uderror_ind);
4943 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4944 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4945 		    (size_t)alen);
4946 	}
4947 	if (olen <= 0) {
4948 		uderr->OPT_offset = 0;
4949 	} else {
4950 		uderr->OPT_offset =
4951 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4952 		    uderr->DEST_length);
4953 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4954 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4955 		    (size_t)olen);
4956 	}
4957 	freemsg(mp);
4958 
4959 	/*
4960 	 * send indication message
4961 	 */
4962 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4963 
4964 	qreply(wq, err_mp);
4965 }
4966 
4967 static void
4968 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4969 {
4970 	queue_t *wq = tep->te_wq;
4971 
4972 	if (!tep->te_closing && (wq->q_first != NULL)) {
4973 		TL_PUTQ(tep, mp);
4974 	} else if (tep->te_rq != NULL)
4975 		tl_unitdata(mp, tep);
4976 	else
4977 		freemsg(mp);
4978 
4979 	tl_serializer_exit(tep);
4980 	tl_refrele(tep);
4981 }
4982 
4983 /*
4984  * Handle T_unitdata_req.
4985  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4986  * If this is a socket pass through options unmodified.
4987  */
4988 static void
4989 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4990 {
4991 	queue_t			*wq = tep->te_wq;
4992 	soux_addr_t		ux_addr;
4993 	tl_addr_t		destaddr;
4994 	uchar_t			*addr_startp;
4995 	tl_endpt_t		*peer_tep;
4996 	struct T_unitdata_ind	*udind;
4997 	struct T_unitdata_req	*udreq;
4998 	ssize_t			msz, ui_sz;
4999 	t_scalar_t		alen, aoff, olen, ooff;
5000 	t_scalar_t		oldolen = 0;
5001 	cred_t			*cr = NULL;
5002 	pid_t			cpid;
5003 
5004 	udreq = (struct T_unitdata_req *)mp->b_rptr;
5005 	msz = MBLKL(mp);
5006 
5007 	/*
5008 	 * validate the state
5009 	 */
5010 	if (tep->te_state != TS_IDLE) {
5011 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
5012 		    SL_TRACE|SL_ERROR,
5013 		    "tl_wput:T_CONN_REQ:out of state"));
5014 		tl_merror(wq, mp, EPROTO);
5015 		return;
5016 	}
5017 	/*
5018 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5019 	 * (state does not change on this event)
5020 	 */
5021 
5022 	/*
5023 	 * validate the message
5024 	 * Note: dereference fields in struct inside message only
5025 	 * after validating the message length.
5026 	 */
5027 	if (msz < sizeof (struct T_unitdata_req)) {
5028 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5029 		    "tl_unitdata:invalid message length"));
5030 		tl_merror(wq, mp, EINVAL);
5031 		return;
5032 	}
5033 	alen = udreq->DEST_length;
5034 	aoff = udreq->DEST_offset;
5035 	oldolen = olen = udreq->OPT_length;
5036 	ooff = udreq->OPT_offset;
5037 	if (olen == 0)
5038 		ooff = 0;
5039 
5040 	if (IS_SOCKET(tep)) {
5041 		if ((alen != TL_SOUX_ADDRLEN) ||
5042 		    (aoff < 0) ||
5043 		    (aoff + alen > msz) ||
5044 		    (olen < 0) || (ooff < 0) ||
5045 		    ((olen > 0) && ((ooff + olen) > msz))) {
5046 			(void) (STRLOG(TL_ID, tep->te_minor,
5047 			    1, SL_TRACE|SL_ERROR,
5048 			    "tl_unitdata_req: invalid socket addr "
5049 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5050 			    (int)msz, alen, aoff, olen, ooff));
5051 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5052 			return;
5053 		}
5054 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5055 
5056 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5057 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5058 			(void) (STRLOG(TL_ID, tep->te_minor,
5059 			    1, SL_TRACE|SL_ERROR,
5060 			    "tl_conn_req: invalid socket magic"));
5061 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5062 			return;
5063 		}
5064 	} else {
5065 		if ((alen < 0) ||
5066 		    (aoff < 0) ||
5067 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5068 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5069 		    ((aoff + alen) < 0) ||
5070 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5071 		    (olen < 0) ||
5072 		    (ooff < 0) ||
5073 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5074 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5075 				    SL_TRACE|SL_ERROR,
5076 				    "tl_unitdata:invalid unit data message"));
5077 			tl_merror(wq, mp, EINVAL);
5078 			return;
5079 		}
5080 	}
5081 
5082 	/* Options not supported unless it's a socket */
5083 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5084 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5085 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5086 		tl_uderr(wq, mp, EPROTO);
5087 		return;
5088 	}
5089 #ifdef DEBUG
5090 	/*
5091 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5092 	 * if (! assertion)
5093 	 *	log warning;
5094 	 */
5095 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5096 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5097 		    "tl_unitdata:addr overlaps TPI message"));
5098 	}
5099 #endif
5100 	/*
5101 	 * get destination endpoint
5102 	 */
5103 	destaddr.ta_alen = alen;
5104 	destaddr.ta_abuf = mp->b_rptr + aoff;
5105 	destaddr.ta_zoneid = tep->te_zoneid;
5106 
5107 	/*
5108 	 * Check whether the destination is the same that was used previously
5109 	 * and the destination endpoint is in the right state. If something is
5110 	 * wrong, find destination again and cache it.
5111 	 */
5112 	peer_tep = tep->te_lastep;
5113 
5114 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5115 	    (peer_tep->te_state != TS_IDLE) ||
5116 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5117 		/*
5118 		 * Not the same as cached destination , need to find the right
5119 		 * destination.
5120 		 */
5121 		peer_tep = (IS_SOCKET(tep) ?
5122 		    tl_sock_find_peer(tep, &ux_addr) :
5123 		    tl_find_peer(tep, &destaddr));
5124 
5125 		if (peer_tep == NULL) {
5126 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5127 			    SL_TRACE|SL_ERROR,
5128 			    "tl_unitdata:no one at destination address"));
5129 			tl_uderr(wq, mp, ECONNRESET);
5130 			return;
5131 		}
5132 
5133 		/*
5134 		 * Cache the new peer.
5135 		 */
5136 		if (tep->te_lastep != NULL)
5137 			tl_refrele(tep->te_lastep);
5138 
5139 		tep->te_lastep = peer_tep;
5140 	}
5141 
5142 	if (peer_tep->te_state != TS_IDLE) {
5143 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5144 		    "tl_unitdata:provider in invalid state"));
5145 		tl_uderr(wq, mp, EPROTO);
5146 		return;
5147 	}
5148 
5149 	ASSERT(peer_tep->te_rq != NULL);
5150 
5151 	/*
5152 	 * Put it back if flow controlled except when we are closing.
5153 	 * Note: Messages already on queue when we are closing is bounded
5154 	 * so we can ignore flow control.
5155 	 */
5156 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5157 		/* record what we are flow controlled on */
5158 		if (tep->te_flowq != NULL) {
5159 			list_remove(&tep->te_flowq->te_flowlist, tep);
5160 		}
5161 		list_insert_head(&peer_tep->te_flowlist, tep);
5162 		tep->te_flowq = peer_tep;
5163 		TL_PUTBQ(tep, mp);
5164 		return;
5165 	}
5166 	/*
5167 	 * prepare indication message
5168 	 */
5169 
5170 	/*
5171 	 * calculate length of message
5172 	 */
5173 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5174 		cr = msg_getcred(mp, &cpid);
5175 		ASSERT(cr != NULL);
5176 
5177 		if (peer_tep->te_flag & TL_SETCRED) {
5178 			ASSERT(olen == 0);
5179 			olen = (t_scalar_t)sizeof (struct opthdr) +
5180 			    OPTLEN(sizeof (tl_credopt_t));
5181 						/* 1 option only */
5182 		} else if (peer_tep->te_flag & TL_SETUCRED) {
5183 			ASSERT(olen == 0);
5184 			olen = (t_scalar_t)sizeof (struct opthdr) +
5185 			    OPTLEN(ucredminsize(cr));
5186 						/* 1 option only */
5187 		} else {
5188 			/* Possibly more than one option */
5189 			olen += (t_scalar_t)sizeof (struct T_opthdr) +
5190 			    OPTLEN(ucredminsize(cr));
5191 		}
5192 	}
5193 
5194 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5195 	    olen;
5196 	/*
5197 	 * If the unitdata_ind fits and we are not adding options
5198 	 * reuse the udreq mblk.
5199 	 */
5200 	if (msz >= ui_sz && alen >= tep->te_alen &&
5201 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5202 		/*
5203 		 * Reuse the original mblk. Leave options in place.
5204 		 */
5205 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5206 		udind->PRIM_type = T_UNITDATA_IND;
5207 		udind->SRC_length = tep->te_alen;
5208 		addr_startp = mp->b_rptr + udind->SRC_offset;
5209 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5210 	} else {
5211 		/* Allocate a new T_unidata_ind message */
5212 		mblk_t *ui_mp;
5213 
5214 		ui_mp = allocb(ui_sz, BPRI_MED);
5215 		if (! ui_mp) {
5216 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5217 			    "tl_unitdata:allocb failure:message queued"));
5218 			tl_memrecover(wq, mp, ui_sz);
5219 			return;
5220 		}
5221 
5222 		/*
5223 		 * fill in T_UNITDATA_IND contents
5224 		 */
5225 		DB_TYPE(ui_mp) = M_PROTO;
5226 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5227 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5228 		udind->PRIM_type = T_UNITDATA_IND;
5229 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5230 		udind->SRC_length = tep->te_alen;
5231 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5232 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5233 		udind->OPT_offset =
5234 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5235 		udind->OPT_length = olen;
5236 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5237 
5238 			if (oldolen != 0) {
5239 				bcopy((void *)((uintptr_t)udreq + ooff),
5240 				    (void *)((uintptr_t)udind +
5241 				    udind->OPT_offset),
5242 				    oldolen);
5243 			}
5244 			ASSERT(cr != NULL);
5245 
5246 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5247 			    oldolen, cr, cpid,
5248 			    peer_tep->te_flag, peer_tep->te_credp);
5249 		} else {
5250 			bcopy((void *)((uintptr_t)udreq + ooff),
5251 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5252 			    olen);
5253 		}
5254 
5255 		/*
5256 		 * relink data blocks from mp to ui_mp
5257 		 */
5258 		ui_mp->b_cont = mp->b_cont;
5259 		freeb(mp);
5260 		mp = ui_mp;
5261 	}
5262 	/*
5263 	 * send indication message
5264 	 */
5265 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5266 	putnext(peer_tep->te_rq, mp);
5267 }
5268 
5269 
5270 
5271 /*
5272  * Check if a given addr is in use.
5273  * Endpoint ptr returned or NULL if not found.
5274  * The name space is separate for each mode. This implies that
5275  * sockets get their own name space.
5276  */
5277 static tl_endpt_t *
5278 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5279 {
5280 	tl_endpt_t *peer_tep = NULL;
5281 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5282 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5283 
5284 	ASSERT(! IS_SOCKET(tep));
5285 
5286 	ASSERT(ap != NULL && ap->ta_alen > 0);
5287 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5288 	ASSERT(ap->ta_abuf != NULL);
5289 	EQUIV(rc == 0, peer_tep != NULL);
5290 	IMPLY(rc == 0,
5291 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5292 	    (tep->te_transport == peer_tep->te_transport));
5293 
5294 	if ((rc == 0) && (peer_tep->te_closing)) {
5295 		tl_refrele(peer_tep);
5296 		peer_tep = NULL;
5297 	}
5298 
5299 	return (peer_tep);
5300 }
5301 
5302 /*
5303  * Find peer for a socket based on unix domain address.
5304  * For implicit addresses our peer can be found by minor number in ai hash. For
5305  * explicit binds we look vnode address at addr_hash.
5306  */
5307 static tl_endpt_t *
5308 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5309 {
5310 	tl_endpt_t *peer_tep = NULL;
5311 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5312 	    tep->te_aihash : tep->te_addrhash;
5313 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5314 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5315 
5316 	ASSERT(IS_SOCKET(tep));
5317 	EQUIV(rc == 0, peer_tep != NULL);
5318 	IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5319 
5320 	if (peer_tep != NULL) {
5321 		/* Don't attempt to use closing peer. */
5322 		if (peer_tep->te_closing)
5323 			goto errout;
5324 
5325 		/*
5326 		 * Cross-zone unix sockets are permitted, but for Trusted
5327 		 * Extensions only, the "server" for these must be in the
5328 		 * global zone.
5329 		 */
5330 		if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5331 		    is_system_labeled() &&
5332 		    (peer_tep->te_zoneid != GLOBAL_ZONEID))
5333 			goto errout;
5334 	}
5335 
5336 	return (peer_tep);
5337 
5338 errout:
5339 	tl_refrele(peer_tep);
5340 	return (NULL);
5341 }
5342 
5343 /*
5344  * Generate a free addr and return it in struct pointed by ap
5345  * but allocating space for address buffer.
5346  * The generated address will be at least 4 bytes long and, if req->ta_alen
5347  * exceeds 4 bytes, be req->ta_alen bytes long.
5348  *
5349  * If address is found it will be inserted in the hash.
5350  *
5351  * If req->ta_alen is larger than the default alen (4 bytes) the last
5352  * alen-4 bytes will always be the same as in req.
5353  *
5354  * Return 0 for failure.
5355  * Return non-zero for success.
5356  */
5357 static boolean_t
5358 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5359 {
5360 	t_scalar_t	alen;
5361 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5362 
5363 	ASSERT(tep->te_hash_hndl != NULL);
5364 	ASSERT(! IS_SOCKET(tep));
5365 
5366 	if (tep->te_hash_hndl == NULL)
5367 		return (B_FALSE);
5368 
5369 	/*
5370 	 * check if default addr is in use
5371 	 * if it is - bump it and try again
5372 	 */
5373 	if (req == NULL) {
5374 		alen = sizeof (uint32_t);
5375 	} else {
5376 		alen = max(req->ta_alen, sizeof (uint32_t));
5377 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5378 	}
5379 
5380 	if (tep->te_alen < alen) {
5381 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5382 
5383 		/*
5384 		 * Not enough space in tep->ta_ap to hold the address,
5385 		 * allocate a bigger space.
5386 		 */
5387 		if (abuf == NULL)
5388 			return (B_FALSE);
5389 
5390 		if (tep->te_alen > 0)
5391 			kmem_free(tep->te_abuf, tep->te_alen);
5392 
5393 		tep->te_alen = alen;
5394 		tep->te_abuf = abuf;
5395 	}
5396 
5397 	/* Copy in the address in req */
5398 	if (req != NULL) {
5399 		ASSERT(alen >= req->ta_alen);
5400 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5401 	}
5402 
5403 	/*
5404 	 * First try minor number then try default addresses.
5405 	 */
5406 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5407 
5408 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5409 		if (mod_hash_insert_reserve(tep->te_addrhash,
5410 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5411 		    tep->te_hash_hndl) == 0) {
5412 			/*
5413 			 * found free address
5414 			 */
5415 			tep->te_flag |= TL_ADDRHASHED;
5416 			tep->te_hash_hndl = NULL;
5417 
5418 			return (B_TRUE); /* successful return */
5419 		}
5420 		/*
5421 		 * Use default address.
5422 		 */
5423 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5424 		atomic_add_32(&tep->te_defaddr, 1);
5425 	}
5426 
5427 	/*
5428 	 * Failed to find anything.
5429 	 */
5430 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5431 	    "tl_get_any_addr:looped 2^32 times"));
5432 	return (B_FALSE);
5433 }
5434 
5435 /*
5436  * reallocb + set r/w ptrs to reflect size.
5437  */
5438 static mblk_t *
5439 tl_resizemp(mblk_t *mp, ssize_t new_size)
5440 {
5441 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5442 		return (NULL);
5443 
5444 	mp->b_rptr = DB_BASE(mp);
5445 	mp->b_wptr = mp->b_rptr + new_size;
5446 	return (mp);
5447 }
5448 
5449 static void
5450 tl_cl_backenable(tl_endpt_t *tep)
5451 {
5452 	list_t *l = &tep->te_flowlist;
5453 	tl_endpt_t *elp;
5454 
5455 	ASSERT(IS_CLTS(tep));
5456 
5457 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5458 		ASSERT(tep->te_ser == elp->te_ser);
5459 		ASSERT(elp->te_flowq == tep);
5460 		if (! elp->te_closing)
5461 			TL_QENABLE(elp);
5462 		elp->te_flowq = NULL;
5463 		list_remove(l, elp);
5464 	}
5465 }
5466 
5467 /*
5468  * Unconnect endpoints.
5469  */
5470 static void
5471 tl_co_unconnect(tl_endpt_t *tep)
5472 {
5473 	tl_endpt_t	*peer_tep = tep->te_conp;
5474 	tl_endpt_t	*srv_tep = tep->te_oconp;
5475 	list_t		*l;
5476 	tl_icon_t  	*tip;
5477 	tl_endpt_t	*cl_tep;
5478 	mblk_t		*d_mp;
5479 
5480 	ASSERT(IS_COTS(tep));
5481 	/*
5482 	 * If our peer is closing, don't use it.
5483 	 */
5484 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5485 		TL_UNCONNECT(tep->te_conp);
5486 		peer_tep = NULL;
5487 	}
5488 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5489 		TL_UNCONNECT(tep->te_oconp);
5490 		srv_tep = NULL;
5491 	}
5492 
5493 	if (tep->te_nicon > 0) {
5494 		l = &tep->te_iconp;
5495 		/*
5496 		 * If incoming requests pending, change state
5497 		 * of clients on disconnect ind event and send
5498 		 * discon_ind pdu to modules above them
5499 		 * for server: all clients get disconnect
5500 		 */
5501 
5502 		while (tep->te_nicon > 0) {
5503 			tip    = list_head(l);
5504 			cl_tep = tip->ti_tep;
5505 
5506 			if (cl_tep == NULL) {
5507 				tl_freetip(tep, tip);
5508 				continue;
5509 			}
5510 
5511 			if (cl_tep->te_oconp != NULL) {
5512 				ASSERT(cl_tep != cl_tep->te_oconp);
5513 				TL_UNCONNECT(cl_tep->te_oconp);
5514 			}
5515 
5516 			if (cl_tep->te_closing) {
5517 				tl_freetip(tep, tip);
5518 				continue;
5519 			}
5520 
5521 			enableok(cl_tep->te_wq);
5522 			TL_QENABLE(cl_tep);
5523 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5524 			if (d_mp != NULL) {
5525 				cl_tep->te_state = TS_IDLE;
5526 				putnext(cl_tep->te_rq, d_mp);
5527 			} else {
5528 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5529 				    SL_TRACE|SL_ERROR,
5530 				    "tl_co_unconnect:icmng: "
5531 				    "allocb failure"));
5532 			}
5533 			tl_freetip(tep, tip);
5534 		}
5535 	} else if (srv_tep != NULL) {
5536 		/*
5537 		 * If outgoing request pending, change state
5538 		 * of server on discon ind event
5539 		 */
5540 
5541 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5542 		    IS_COTSORD(srv_tep) &&
5543 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5544 			/*
5545 			 * Queue ordrel_ind for server to be picked up
5546 			 * when the connection is accepted.
5547 			 */
5548 			d_mp = tl_ordrel_ind_alloc();
5549 		} else {
5550 			/*
5551 			 * send discon_ind to server
5552 			 */
5553 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5554 		}
5555 		if (d_mp == NULL) {
5556 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5557 			    SL_TRACE|SL_ERROR,
5558 			    "tl_co_unconnect:outgoing:allocb failure"));
5559 			TL_UNCONNECT(tep->te_oconp);
5560 			goto discon_peer;
5561 		}
5562 
5563 		/*
5564 		 * If this is a socket the T_DISCON_IND is queued with
5565 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5566 		 * from the list of pending connections.
5567 		 * Note that when te_oconp is set the peer better have
5568 		 * a t_connind_t for the client.
5569 		 */
5570 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5571 			/*
5572 			 * Queue the disconnection message.
5573 			 */
5574 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5575 		} else {
5576 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5577 			if (tip == NULL) {
5578 				freemsg(d_mp);
5579 			} else {
5580 				ASSERT(tep == tip->ti_tep);
5581 				ASSERT(tep->te_ser == srv_tep->te_ser);
5582 				/*
5583 				 * Delete tip from the server list.
5584 				 */
5585 				if (srv_tep->te_nicon == 1) {
5586 					srv_tep->te_state =
5587 					    NEXTSTATE(TE_DISCON_IND2,
5588 					    srv_tep->te_state);
5589 				} else {
5590 					srv_tep->te_state =
5591 					    NEXTSTATE(TE_DISCON_IND3,
5592 					    srv_tep->te_state);
5593 				}
5594 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5595 				    T_DISCON_IND);
5596 				putnext(srv_tep->te_rq, d_mp);
5597 				tl_freetip(srv_tep, tip);
5598 			}
5599 			TL_UNCONNECT(tep->te_oconp);
5600 			srv_tep = NULL;
5601 		}
5602 	} else if (peer_tep != NULL) {
5603 		/*
5604 		 * unconnect existing connection
5605 		 * If connected, change state of peer on
5606 		 * discon ind event and send discon ind pdu
5607 		 * to module above it
5608 		 */
5609 
5610 		ASSERT(tep->te_ser == peer_tep->te_ser);
5611 		if (IS_COTSORD(peer_tep) &&
5612 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5613 		    peer_tep->te_state == TS_DATA_XFER)) {
5614 			/*
5615 			 * send ordrel ind
5616 			 */
5617 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5618 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5619 			    peer_tep->te_state,
5620 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5621 			d_mp = tl_ordrel_ind_alloc();
5622 			if (! d_mp) {
5623 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5624 				    SL_TRACE|SL_ERROR,
5625 				    "tl_co_unconnect:connected:"
5626 				    "allocb failure"));
5627 				/*
5628 				 * Continue with cleaning up peer as
5629 				 * this side may go away with the close
5630 				 */
5631 				TL_QENABLE(peer_tep);
5632 				goto discon_peer;
5633 			}
5634 			peer_tep->te_state =
5635 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5636 
5637 			putnext(peer_tep->te_rq, d_mp);
5638 			/*
5639 			 * Handle flow control case.  This will generate
5640 			 * a t_discon_ind message with reason 0 if there
5641 			 * is data queued on the write side.
5642 			 */
5643 			TL_QENABLE(peer_tep);
5644 		} else if (IS_COTSORD(peer_tep) &&
5645 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5646 			/*
5647 			 * Sent an ordrel_ind. We send a discon with
5648 			 * with error 0 to inform that the peer is gone.
5649 			 */
5650 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5651 			    SL_TRACE|SL_ERROR,
5652 			    "tl_co_unconnect: discon in state %d",
5653 			    tep->te_state));
5654 			tl_discon_ind(peer_tep, 0);
5655 		} else {
5656 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5657 			    SL_TRACE|SL_ERROR,
5658 			    "tl_co_unconnect: state %d", tep->te_state));
5659 			tl_discon_ind(peer_tep, ECONNRESET);
5660 		}
5661 
5662 discon_peer:
5663 		/*
5664 		 * Disconnect cross-pointers only for close
5665 		 */
5666 		if (tep->te_closing) {
5667 			peer_tep = tep->te_conp;
5668 			TL_REMOVE_PEER(peer_tep->te_conp);
5669 			TL_REMOVE_PEER(tep->te_conp);
5670 		}
5671 	}
5672 }
5673 
5674 /*
5675  * Note: The following routine does not recover from allocb()
5676  * failures
5677  * The reason should be from the <sys/errno.h> space.
5678  */
5679 static void
5680 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5681 {
5682 	mblk_t *d_mp;
5683 
5684 	if (tep->te_closing)
5685 		return;
5686 
5687 	/*
5688 	 * flush the queues.
5689 	 */
5690 	flushq(tep->te_rq, FLUSHDATA);
5691 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5692 
5693 	/*
5694 	 * send discon ind
5695 	 */
5696 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5697 	if (! d_mp) {
5698 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5699 		    "tl_discon_ind:allocb failure"));
5700 		return;
5701 	}
5702 	tep->te_state = TS_IDLE;
5703 	putnext(tep->te_rq, d_mp);
5704 }
5705 
5706 /*
5707  * Note: The following routine does not recover from allocb()
5708  * failures
5709  * The reason should be from the <sys/errno.h> space.
5710  */
5711 static mblk_t *
5712 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5713 {
5714 	mblk_t *mp;
5715 	struct T_discon_ind *tdi;
5716 
5717 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5718 		DB_TYPE(mp) = M_PROTO;
5719 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5720 		tdi = (struct T_discon_ind *)mp->b_rptr;
5721 		tdi->PRIM_type = T_DISCON_IND;
5722 		tdi->DISCON_reason = reason;
5723 		tdi->SEQ_number = seqnum;
5724 	}
5725 	return (mp);
5726 }
5727 
5728 
5729 /*
5730  * Note: The following routine does not recover from allocb()
5731  * failures
5732  */
5733 static mblk_t *
5734 tl_ordrel_ind_alloc(void)
5735 {
5736 	mblk_t *mp;
5737 	struct T_ordrel_ind *toi;
5738 
5739 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5740 		DB_TYPE(mp) = M_PROTO;
5741 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5742 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5743 		toi->PRIM_type = T_ORDREL_IND;
5744 	}
5745 	return (mp);
5746 }
5747 
5748 
5749 /*
5750  * Lookup the seqno in the list of queued connections.
5751  */
5752 static tl_icon_t *
5753 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5754 {
5755 	list_t *l = &tep->te_iconp;
5756 	tl_icon_t *tip = list_head(l);
5757 
5758 	ASSERT(seqno != 0);
5759 
5760 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5761 		;
5762 
5763 	return (tip);
5764 }
5765 
5766 /*
5767  * Queue data for a given T_CONN_IND while verifying that redundant
5768  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5769  * Used when the originator of the connection closes.
5770  */
5771 static void
5772 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5773 {
5774 	tl_icon_t		*tip;
5775 	mblk_t			**mpp, *mp;
5776 	int			prim, nprim;
5777 
5778 	if (nmp->b_datap->db_type == M_PROTO)
5779 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5780 	else
5781 		nprim = -1;	/* M_DATA */
5782 
5783 	tip = tl_icon_find(tep, seqno);
5784 	if (tip == NULL) {
5785 		freemsg(nmp);
5786 		return;
5787 	}
5788 
5789 	ASSERT(tip->ti_seqno != 0);
5790 	mpp = &tip->ti_mp;
5791 	while (*mpp != NULL) {
5792 		mp = *mpp;
5793 
5794 		if (mp->b_datap->db_type == M_PROTO)
5795 			prim = ((union T_primitives *)mp->b_rptr)->type;
5796 		else
5797 			prim = -1;	/* M_DATA */
5798 
5799 		/*
5800 		 * Allow nothing after a T_DISCON_IND
5801 		 */
5802 		if (prim == T_DISCON_IND) {
5803 			freemsg(nmp);
5804 			return;
5805 		}
5806 		/*
5807 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5808 		 */
5809 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5810 			freemsg(nmp);
5811 			return;
5812 		}
5813 		mpp = &(mp->b_next);
5814 	}
5815 	*mpp = nmp;
5816 }
5817 
5818 /*
5819  * Verify if a certain TPI primitive exists on the connind queue.
5820  * Use prim -1 for M_DATA.
5821  * Return non-zero if found.
5822  */
5823 static boolean_t
5824 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5825 {
5826 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5827 	boolean_t found = B_FALSE;
5828 
5829 	if (tip != NULL) {
5830 		mblk_t *mp;
5831 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5832 			found = (DB_TYPE(mp) == M_PROTO &&
5833 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5834 		}
5835 	}
5836 	return (found);
5837 }
5838 
5839 /*
5840  * Send the b_next mblk chain that has accumulated before the connection
5841  * was accepted. Perform the necessary state transitions.
5842  */
5843 static void
5844 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5845 {
5846 	mblk_t			*mp;
5847 	union T_primitives	*primp;
5848 
5849 	if (tep->te_closing) {
5850 		tl_icon_freemsgs(mpp);
5851 		return;
5852 	}
5853 
5854 	ASSERT(tep->te_state == TS_DATA_XFER);
5855 	ASSERT(tep->te_rq->q_first == NULL);
5856 
5857 	while ((mp = *mpp) != NULL) {
5858 		*mpp = mp->b_next;
5859 		mp->b_next = NULL;
5860 
5861 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5862 		switch (DB_TYPE(mp)) {
5863 		default:
5864 			freemsg(mp);
5865 			break;
5866 		case M_DATA:
5867 			putnext(tep->te_rq, mp);
5868 			break;
5869 		case M_PROTO:
5870 			primp = (union T_primitives *)mp->b_rptr;
5871 			switch (primp->type) {
5872 			case T_UNITDATA_IND:
5873 			case T_DATA_IND:
5874 			case T_OPTDATA_IND:
5875 			case T_EXDATA_IND:
5876 				putnext(tep->te_rq, mp);
5877 				break;
5878 			case T_ORDREL_IND:
5879 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5880 				    tep->te_state);
5881 				putnext(tep->te_rq, mp);
5882 				break;
5883 			case T_DISCON_IND:
5884 				tep->te_state = TS_IDLE;
5885 				putnext(tep->te_rq, mp);
5886 				break;
5887 			default:
5888 #ifdef DEBUG
5889 				cmn_err(CE_PANIC,
5890 				    "tl_icon_sendmsgs: unknown primitive");
5891 #endif /* DEBUG */
5892 				freemsg(mp);
5893 				break;
5894 			}
5895 			break;
5896 		}
5897 	}
5898 }
5899 
5900 /*
5901  * Free the b_next mblk chain that has accumulated before the connection
5902  * was accepted.
5903  */
5904 static void
5905 tl_icon_freemsgs(mblk_t **mpp)
5906 {
5907 	mblk_t *mp;
5908 
5909 	while ((mp = *mpp) != NULL) {
5910 		*mpp = mp->b_next;
5911 		mp->b_next = NULL;
5912 		freemsg(mp);
5913 	}
5914 }
5915 
5916 /*
5917  * Send M_ERROR
5918  * Note: assumes caller ensured enough space in mp or enough
5919  *	memory available. Does not attempt recovery from allocb()
5920  *	failures
5921  */
5922 
5923 static void
5924 tl_merror(queue_t *wq, mblk_t *mp, int error)
5925 {
5926 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5927 
5928 	if (tep->te_closing) {
5929 		freemsg(mp);
5930 		return;
5931 	}
5932 
5933 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5934 	    SL_TRACE|SL_ERROR,
5935 	    "tl_merror: tep=%p, err=%d", (void *)tep, error));
5936 
5937 	/*
5938 	 * flush all messages on queue. we are shutting
5939 	 * the stream down on fatal error
5940 	 */
5941 	flushq(wq, FLUSHALL);
5942 	if (IS_COTS(tep)) {
5943 		/* connection oriented - unconnect endpoints */
5944 		tl_co_unconnect(tep);
5945 	}
5946 	if (mp->b_cont) {
5947 		freemsg(mp->b_cont);
5948 		mp->b_cont = NULL;
5949 	}
5950 
5951 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5952 		freemsg(mp);
5953 		mp = allocb(1, BPRI_HI);
5954 		if (!mp) {
5955 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5956 			    SL_TRACE|SL_ERROR,
5957 			    "tl_merror:M_PROTO: out of memory"));
5958 			return;
5959 		}
5960 	}
5961 	if (mp) {
5962 		DB_TYPE(mp) = M_ERROR;
5963 		mp->b_rptr = DB_BASE(mp);
5964 		*mp->b_rptr = (char)error;
5965 		mp->b_wptr = mp->b_rptr + sizeof (char);
5966 		qreply(wq, mp);
5967 	} else {
5968 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
5969 	}
5970 }
5971 
5972 static void
5973 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5974 {
5975 	ASSERT(cr != NULL);
5976 
5977 	if (flag & TL_SETCRED) {
5978 		struct opthdr *opt = (struct opthdr *)buf;
5979 		tl_credopt_t *tlcred;
5980 
5981 		opt->level = TL_PROT_LEVEL;
5982 		opt->name = TL_OPT_PEER_CRED;
5983 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5984 
5985 		tlcred = (tl_credopt_t *)(opt + 1);
5986 		tlcred->tc_uid = crgetuid(cr);
5987 		tlcred->tc_gid = crgetgid(cr);
5988 		tlcred->tc_ruid = crgetruid(cr);
5989 		tlcred->tc_rgid = crgetrgid(cr);
5990 		tlcred->tc_suid = crgetsuid(cr);
5991 		tlcred->tc_sgid = crgetsgid(cr);
5992 		tlcred->tc_ngroups = crgetngroups(cr);
5993 	} else if (flag & TL_SETUCRED) {
5994 		struct opthdr *opt = (struct opthdr *)buf;
5995 
5996 		opt->level = TL_PROT_LEVEL;
5997 		opt->name = TL_OPT_PEER_UCRED;
5998 		opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
5999 
6000 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6001 	} else {
6002 		struct T_opthdr *topt = (struct T_opthdr *)buf;
6003 		ASSERT(flag & TL_SOCKUCRED);
6004 
6005 		topt->level = SOL_SOCKET;
6006 		topt->name = SCM_UCRED;
6007 		topt->len = ucredminsize(cr) + sizeof (*topt);
6008 		topt->status = 0;
6009 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6010 	}
6011 }
6012 
6013 /* ARGSUSED */
6014 static int
6015 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6016 {
6017 	/* no default value processed in protocol specific code currently */
6018 	return (-1);
6019 }
6020 
6021 /* ARGSUSED */
6022 static int
6023 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6024 {
6025 	int len;
6026 	tl_endpt_t *tep;
6027 	int *valp;
6028 
6029 	tep = (tl_endpt_t *)wq->q_ptr;
6030 
6031 	len = 0;
6032 
6033 	/*
6034 	 * Assumes: option level and name sanity check done elsewhere
6035 	 */
6036 
6037 	switch (level) {
6038 	case SOL_SOCKET:
6039 		if (! IS_SOCKET(tep))
6040 			break;
6041 		switch (name) {
6042 		case SO_RECVUCRED:
6043 			len = sizeof (int);
6044 			valp = (int *)ptr;
6045 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6046 			break;
6047 		default:
6048 			break;
6049 		}
6050 		break;
6051 	case TL_PROT_LEVEL:
6052 		switch (name) {
6053 		case TL_OPT_PEER_CRED:
6054 		case TL_OPT_PEER_UCRED:
6055 			/*
6056 			 * option not supposed to retrieved directly
6057 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6058 			 * when some internal flags set by other options
6059 			 * Direct retrieval always designed to fail(ignored)
6060 			 * for this option.
6061 			 */
6062 			break;
6063 		}
6064 	}
6065 	return (len);
6066 }
6067 
6068 /* ARGSUSED */
6069 static int
6070 tl_set_opt(
6071 	queue_t		*wq,
6072 	uint_t		mgmt_flags,
6073 	int		level,
6074 	int		name,
6075 	uint_t		inlen,
6076 	uchar_t		*invalp,
6077 	uint_t		*outlenp,
6078 	uchar_t		*outvalp,
6079 	void		*thisdg_attrs,
6080 	cred_t		*cr)
6081 {
6082 	int error;
6083 	tl_endpt_t *tep;
6084 
6085 	tep = (tl_endpt_t *)wq->q_ptr;
6086 
6087 	error = 0;		/* NOERROR */
6088 
6089 	/*
6090 	 * Assumes: option level and name sanity checks done elsewhere
6091 	 */
6092 
6093 	switch (level) {
6094 	case SOL_SOCKET:
6095 		if (! IS_SOCKET(tep)) {
6096 			error = EINVAL;
6097 			break;
6098 		}
6099 		/*
6100 		 * TBD: fill in other AF_UNIX socket options and then stop
6101 		 * returning error.
6102 		 */
6103 		switch (name) {
6104 		case SO_RECVUCRED:
6105 			/*
6106 			 * We only support this for datagram sockets;
6107 			 * getpeerucred handles the connection oriented
6108 			 * transports.
6109 			 */
6110 			if (! IS_CLTS(tep)) {
6111 				error = EINVAL;
6112 				break;
6113 			}
6114 			if (*(int *)invalp == 0)
6115 				tep->te_flag &= ~TL_SOCKUCRED;
6116 			else
6117 				tep->te_flag |= TL_SOCKUCRED;
6118 			break;
6119 		default:
6120 			error = EINVAL;
6121 			break;
6122 		}
6123 		break;
6124 	case TL_PROT_LEVEL:
6125 		switch (name) {
6126 		case TL_OPT_PEER_CRED:
6127 		case TL_OPT_PEER_UCRED:
6128 			/*
6129 			 * option not supposed to be set directly
6130 			 * Its value in initialized for each endpoint at
6131 			 * driver open time.
6132 			 * Direct setting always designed to fail for this
6133 			 * option.
6134 			 */
6135 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6136 			    SL_TRACE|SL_ERROR,
6137 			    "tl_set_opt: option is not supported"));
6138 			error = EPROTO;
6139 			break;
6140 		}
6141 	}
6142 	return (error);
6143 }
6144 
6145 
6146 static void
6147 tl_timer(void *arg)
6148 {
6149 	queue_t *wq = arg;
6150 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6151 
6152 	ASSERT(tep);
6153 
6154 	tep->te_timoutid = 0;
6155 
6156 	enableok(wq);
6157 	/*
6158 	 * Note: can call wsrv directly here and save context switch
6159 	 * Consider change when qtimeout (not timeout) is active
6160 	 */
6161 	qenable(wq);
6162 }
6163 
6164 static void
6165 tl_buffer(void *arg)
6166 {
6167 	queue_t *wq = arg;
6168 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6169 
6170 	ASSERT(tep);
6171 
6172 	tep->te_bufcid = 0;
6173 	tep->te_nowsrv = B_FALSE;
6174 
6175 	enableok(wq);
6176 	/*
6177 	 *  Note: can call wsrv directly here and save context switch
6178 	 * Consider change when qbufcall (not bufcall) is active
6179 	 */
6180 	qenable(wq);
6181 }
6182 
6183 static void
6184 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6185 {
6186 	tl_endpt_t *tep;
6187 
6188 	tep = (tl_endpt_t *)wq->q_ptr;
6189 
6190 	if (tep->te_closing) {
6191 		freemsg(mp);
6192 		return;
6193 	}
6194 	noenable(wq);
6195 
6196 	(void) insq(wq, wq->q_first, mp);
6197 
6198 	if (tep->te_bufcid || tep->te_timoutid) {
6199 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6200 		    "tl_memrecover:recover %p pending", (void *)wq));
6201 		return;
6202 	}
6203 
6204 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6205 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6206 		    drv_usectohz(TL_BUFWAIT));
6207 	}
6208 }
6209 
6210 static void
6211 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6212 {
6213 	ASSERT(tip->ti_seqno != 0);
6214 
6215 	if (tip->ti_mp != NULL) {
6216 		tl_icon_freemsgs(&tip->ti_mp);
6217 		tip->ti_mp = NULL;
6218 	}
6219 	if (tip->ti_tep != NULL) {
6220 		tl_refrele(tip->ti_tep);
6221 		tip->ti_tep = NULL;
6222 	}
6223 	list_remove(&tep->te_iconp, tip);
6224 	kmem_free(tip, sizeof (tl_icon_t));
6225 	tep->te_nicon--;
6226 }
6227 
6228 /*
6229  * Remove address from address hash.
6230  */
6231 static void
6232 tl_addr_unbind(tl_endpt_t *tep)
6233 {
6234 	tl_endpt_t *elp;
6235 
6236 	if (tep->te_flag & TL_ADDRHASHED) {
6237 		if (IS_SOCKET(tep)) {
6238 			(void) mod_hash_remove(tep->te_addrhash,
6239 			    (mod_hash_key_t)tep->te_vp,
6240 			    (mod_hash_val_t *)&elp);
6241 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6242 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6243 		} else {
6244 			(void) mod_hash_remove(tep->te_addrhash,
6245 			    (mod_hash_key_t)&tep->te_ap,
6246 			    (mod_hash_val_t *)&elp);
6247 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6248 			tep->te_alen = -1;
6249 			tep->te_abuf = NULL;
6250 		}
6251 		tep->te_flag &= ~TL_ADDRHASHED;
6252 	}
6253 }
6254