xref: /illumos-gate/usr/src/uts/common/io/tl.c (revision dfc115332c94a2f62058ac7f2bce7631fbd20b3d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
27  * Copyright (c) 2012 by Delphix. All rights reserved.
28  * Copyright 2015 Joyent, Inc.
29  */
30 
31 /*
32  * Multithreaded STREAMS Local Transport Provider.
33  *
34  * OVERVIEW
35  * ========
36  *
37  * This driver provides TLI as well as socket semantics.  It provides
38  * connectionless, connection oriented, and connection oriented with orderly
39  * release transports for TLI and sockets. Each transport type has separate name
40  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
41  * this removes any name space conflicts when binding to socket style transport
42  * addresses.
43  *
44  * NOTE: There is one exception: Socket ticots and ticotsord transports share
45  * the same namespace. In fact, sockets always use ticotsord type transport.
46  *
47  * The driver mode is specified during open() by the minor number used for
48  * open.
49  *
50  *  The sockets in addition have the following semantic differences:
51  *  No support for passing up credentials (TL_SET[U]CRED).
52  *
53  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
54  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
55  *	T_OPTDATA_IND.
56  *
57  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
58  *	a T_CONN_RES is received from the acceptor. This means that a socket
59  *	connect will complete before the peer has called accept.
60  *
61  *
62  * MULTITHREADING
63  * ==============
64  *
65  * The driver does not use STREAMS protection mechanisms. Instead it uses a
66  * generic "serializer" abstraction. Most of the operations are executed behind
67  * the serializer and are, essentially single-threaded. All functions executed
68  * behind the same serializer are strictly serialized. So if one thread calls
69  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
70  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
71  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
72  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
73  * same time.
74  *
75  * Connectionless transport use a single serializer per transport type (one for
76  * TLI and one for sockets. Connection-oriented transports use finer-grained
77  * serializers.
78  *
79  * All COTS-type endpoints start their life with private serializers. During
80  * connection request processing the endpoint serializer is switched to the
81  * listener's serializer and the rest of T_CONN_REQ processing is done on the
82  * listener serializer. During T_CONN_RES processing the eager serializer is
83  * switched from listener to acceptor serializer and after that point all
84  * processing for eager and acceptor happens on this serializer. To avoid races
85  * with endpoint closes while its serializer may be changing closes are blocked
86  * while serializers are manipulated.
87  *
88  * References accounting
89  * ---------------------
90  *
91  * Endpoints are reference counted and freed when the last reference is
92  * dropped. Functions within the serializer may access an endpoint state even
93  * after an endpoint closed. The te_closing being set on the endpoint indicates
94  * that the endpoint entered its close routine.
95  *
96  * One reference is held for each opened endpoint instance. The reference
97  * counter is incremented when the endpoint is linked to another endpoint and
98  * decremented when the link disappears. It is also incremented when the
99  * endpoint is found by the hash table lookup. This increment is atomic with the
100  * lookup itself and happens while the hash table read lock is held.
101  *
102  * Close synchronization
103  * ---------------------
104  *
105  * During close the endpoint as marked as closing using te_closing flag. It is
106  * usually enough to check for te_closing flag since all other state changes
107  * happen after this flag is set and the close entered serializer. Immediately
108  * after setting te_closing flag tl_close() enters serializer and waits until
109  * the callback finishes. This allows all functions called within serializer to
110  * simply check te_closing without any locks.
111  *
112  * Serializer management.
113  * ---------------------
114  *
115  * For COTS transports serializers are created when the endpoint is constructed
116  * and destroyed when the endpoint is destructed. CLTS transports use global
117  * serializers - one for sockets and one for TLI.
118  *
119  * COTS serializers have separate reference counts to deal with several
120  * endpoints sharing the same serializer. There is a subtle problem related to
121  * the serializer destruction. The serializer should never be destroyed by any
122  * function executed inside serializer. This means that close has to wait till
123  * all serializer activity for this endpoint is finished before it can drop the
124  * last reference on the endpoint (which may as well free the serializer).  This
125  * is only relevant for COTS transports which manage serializers
126  * dynamically. For CLTS transports close may complete without waiting for all
127  * serializer activity to finish since serializer is only destroyed at driver
128  * detach time.
129  *
130  * COTS endpoints keep track of the number of outstanding requests on the
131  * serializer for the endpoint. The code handling accept() avoids changing
132  * client serializer if it has any pending messages on the serializer and
133  * instead moves acceptor to listener's serializer.
134  *
135  *
136  * Use of hash tables
137  * ------------------
138  *
139  * The driver uses modhash hash table implementation. Each transport uses two
140  * hash tables - one for finding endpoints by acceptor ID and another one for
141  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
142  * pair of hash tables since sockets only use TICOTSORD.
143  *
144  * All hash tables lookups increment a reference count for returned endpoints,
145  * so we may safely check the endpoint state even when the endpoint is removed
146  * from the hash by another thread immediately after it is found.
147  *
148  *
149  * CLOSE processing
150  * ================
151  *
152  * The driver enters serializer twice on close(). The close sequence is the
153  * following:
154  *
155  * 1) Wait until closing is safe (te_closewait becomes zero)
156  *	This step is needed to prevent close during serializer switches. In most
157  *	cases (close happening after connection establishment) te_closewait is
158  *	zero.
159  * 1) Set te_closing.
160  * 2) Call tl_close_ser() within serializer and wait for it to complete.
161  *
162  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
163  *	It also needs to clear write-side q_next pointers - this should be done
164  *	before qprocsoff().
165  *
166  *    This synchronous serializer entry during close is needed to ensure that
167  *    the queue is valid everywhere inside the serializer.
168  *
169  *    Note that in many cases close will execute tl_close_ser() synchronously,
170  *    so it will not wait at all.
171  *
172  * 3) Calls qprocsoff().
173  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
174  *	complete (for COTS transports). For CLTS transport there is no wait.
175  *
176  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
177  *	close if there is any.
178  *
179  *    Note that in most cases close will enter te_close_ser_finish()
180  *    synchronously and will not wait at all.
181  *
182  *
183  * Flow Control
184  * ============
185  *
186  * The driver implements both read and write side service routines. No one calls
187  * putq() on the read queue. The read side service routine tl_rsrv() is called
188  * when the read side stream is back-enabled. It enters serializer synchronously
189  * (waits till serializer processing is complete). Within serializer it
190  * back-enables all endpoints blocked by the queue for connection-less
191  * transports and enables write side service processing for the peer for
192  * connection-oriented transports.
193  *
194  * Read and write side service routines use special mblk_sized space in the
195  * endpoint structure to enter perimeter.
196  *
197  * Write-side flow control
198  * -----------------------
199  *
200  * Write side flow control is a bit tricky. The driver needs to deal with two
201  * message queues - the explicit STREAMS message queue maintained by
202  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
203  * queues should be synchronized to preserve message ordering and should
204  * maintain a single order determined by the order in which messages enter
205  * tl_wput(). In order to maintain the ordering between these two queues the
206  * STREAMS queue is only manipulated within the serializer, so the ordering is
207  * provided by the serializer.
208  *
209  * Functions called from the tl_wsrv() sometimes may call putbq(). To
210  * immediately stop any further processing of the STREAMS message queues the
211  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
212  * side service processing stops when the flag is set.
213  *
214  * The tl_wsrv() function enters serializer synchronously and waits for it to
215  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
216  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
217  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
218  * always bounded by the amount of messages on the STREAMS queue at the time
219  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
220  * queue from another serialized entry which can't happen in parallel. This
221  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
222  * of it draining forever while writer places new messages on the STREAMS
223  * queue).
224  *
225  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
226  *
227  *
228  * Unix Domain Sockets
229  * ===================
230  *
231  * The driver knows the structure of Unix Domain sockets addresses and treats
232  * them differently from generic TLI addresses. For sockets implicit binds are
233  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
234  * instead of using address length of zero. Explicit binds specify
235  * SOU_MAGIC_EXPLICIT as magic.
236  *
237  * For implicit binds we always use minor number as soua_vp part of the address
238  * and avoid any hash table lookups. This saves two hash tables lookups per
239  * anonymous bind.
240  *
241  * For explicit address we hash the vnode pointer instead of hashing the
242  * full-scale address+zone+length. Hashing by pointer is more efficient then
243  * hashing by the full address.
244  *
245  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
246  * tep structure, so it should be never freed.
247  *
248  * Also for sockets the driver always uses minor number as acceptor id.
249  *
250  * TPI VIOLATIONS
251  * --------------
252  *
253  * This driver violates TPI in several respects for Unix Domain Sockets:
254  *
255  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
256  *	is requested and the endpoint is already in use. There is no point in
257  *	generating an unused address since this address will be rejected by
258  *	sockfs anyway. For implicit binds it always generates a new address
259  *	(sets soua_vp to its minor number).
260  *
261  * 2) It always uses minor number as acceptor ID and never uses queue
262  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
263  *	message and they do not use the queue pointer.
264  *
265  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
266  *	followed by listen(). The listen() should be issued with non-zero
267  *	backlog, so sotpi_listen() issues unbind request followed by bind
268  *	request to the same address but with a non-zero qlen value. Both
269  *	tl_bind() and tl_unbind() require write lock on the hash table to
270  *	insert/remove the address. The driver does not remove the address from
271  *	the hash for endpoints that are bound to the explicit address and have
272  *	backlog of zero. During T_BIND_REQ processing if the address requested
273  *	is equal to the address the endpoint already has it updates the backlog
274  *	without reinserting the address in the hash table. This optimization
275  *	avoids two hash table updates for each listener created. It always
276  *	avoids the problem of a "stolen" address when another listener may use
277  *	the same address between the unbind and bind and suddenly listen() fails
278  *	because address is in use even though the bind() succeeded.
279  *
280  *
281  * CONNECTIONLESS TRANSPORTS
282  * =========================
283  *
284  * Connectionless transports all share the same serializer (one for TLI and one
285  * for Sockets). Functions executing behind serializer can check or modify state
286  * of any endpoint.
287  *
288  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
289  * te_lastep field. The next time X talks to some address A it checks whether A
290  * is the same as Y's address and if it is there is no need to lookup Y. If the
291  * address is different or the state of Y is not appropriate (e.g. closed or not
292  * idle) X does a lookup using tl_find_peer() and caches the new address.
293  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
294  * on the endpoint found.
295  *
296  * During close of endpoint Y it doesn't try to remove itself from other
297  * endpoints caches. They will detect that Y is gone and will search the peer
298  * endpoint again.
299  *
300  * Flow Control Handling.
301  * ----------------------
302  *
303  * Each connectionless endpoint keeps a list of endpoints which are
304  * flow-controlled by its queue. It also keeps a pointer to the queue which
305  * flow-controls itself.  Whenever flow control releases for endpoint X it
306  * enables all queues from the list. During close it also back-enables everyone
307  * in the list. If X is flow-controlled when it is closing it removes it from
308  * the peers list.
309  *
310  * DATA STRUCTURES
311  * ===============
312  *
313  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
314  * endpoint state. For connection-oriented transports it has a keeps a list
315  * of pending connections (tl_icon_t). For connectionless transports it keeps a
316  * list of endpoints flow controlled by this one.
317  *
318  * Each transport type is represented by a per-transport data structure
319  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
320  * endpoint address hash tables for each transport. It also contains pointer to
321  * transport serializer for connectionless transports.
322  *
323  * Each endpoint keeps a link to its transport structure, so the code can find
324  * all per-transport information quickly.
325  */
326 
327 #include	<sys/types.h>
328 #include	<sys/inttypes.h>
329 #include	<sys/stream.h>
330 #include	<sys/stropts.h>
331 #define	_SUN_TPI_VERSION 2
332 #include	<sys/tihdr.h>
333 #include	<sys/strlog.h>
334 #include	<sys/debug.h>
335 #include	<sys/cred.h>
336 #include	<sys/errno.h>
337 #include	<sys/kmem.h>
338 #include	<sys/id_space.h>
339 #include	<sys/modhash.h>
340 #include	<sys/mkdev.h>
341 #include	<sys/tl.h>
342 #include	<sys/stat.h>
343 #include	<sys/conf.h>
344 #include	<sys/modctl.h>
345 #include	<sys/strsun.h>
346 #include	<sys/socket.h>
347 #include	<sys/socketvar.h>
348 #include	<sys/sysmacros.h>
349 #include	<sys/xti_xtiopt.h>
350 #include	<sys/ddi.h>
351 #include	<sys/sunddi.h>
352 #include	<sys/zone.h>
353 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
354 #include	<inet/optcom.h>
355 #include	<sys/strsubr.h>
356 #include	<sys/ucred.h>
357 #include	<sys/suntpi.h>
358 #include	<sys/list.h>
359 #include	<sys/serializer.h>
360 
361 /*
362  * TBD List
363  * 14 Eliminate state changes through table
364  * 16. AF_UNIX socket options
365  * 17. connect() for ticlts
366  * 18. support for "netstat" to show AF_UNIX plus TLI local
367  *	transport connections
368  * 21. sanity check to flushing on sending M_ERROR
369  */
370 
371 /*
372  * CONSTANT DECLARATIONS
373  * --------------------
374  */
375 
376 /*
377  * Local declarations
378  */
379 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
380 
381 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
382 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
383 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
384 /*
385  * Hash tables size.
386  */
387 #define	TL_HASH_SIZE 311
388 
389 /*
390  * Definitions for module_info
391  */
392 #define		TL_ID		(104)		/* module ID number */
393 #define		TL_NAME		"tl"		/* module name */
394 #define		TL_MINPSZ	(0)		/* min packet size */
395 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
396 #define		TL_HIWAT	(16*1024)	/* hi water mark */
397 #define		TL_LOWAT	(256)		/* lo water mark */
398 /*
399  * Definition of minor numbers/modes for new transport provider modes.
400  * We view the socket use as a separate mode to get a separate name space.
401  */
402 #define		TL_TICOTS	0	/* connection oriented transport */
403 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
404 #define		TL_TICLTS 	2	/* connectionless transport */
405 #define		TL_UNUSED	3
406 #define		TL_SOCKET	4	/* Socket */
407 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
408 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
409 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
410 
411 #define		TL_MINOR_MASK	0x7
412 #define		TL_MINOR_START	(TL_TICLTS + 1)
413 
414 /*
415  * LOCAL MACROS
416  */
417 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
418 
419 /*
420  * EXTERNAL VARIABLE DECLARATIONS
421  * -----------------------------
422  */
423 /*
424  * state table defined in the OS space.c
425  */
426 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
427 
428 /*
429  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
430  */
431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
432 static int tl_close(queue_t *, int, cred_t *);
433 static void tl_wput(queue_t *, mblk_t *);
434 static void tl_wsrv(queue_t *);
435 static void tl_rsrv(queue_t *);
436 
437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
440 
441 
442 /*
443  * GLOBAL DATA STRUCTURES AND VARIABLES
444  * -----------------------------------
445  */
446 
447 /*
448  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
449  * For now, we only manage the SO_RECVUCRED option but we also have
450  * harmless dummy options to make things work with some common code we access.
451  */
452 opdes_t	tl_opt_arr[] = {
453 	/* The SO_TYPE is needed for the hack below */
454 	{
455 		SO_TYPE,
456 		SOL_SOCKET,
457 		OA_R,
458 		OA_R,
459 		OP_NP,
460 		0,
461 		sizeof (t_scalar_t),
462 		0
463 	},
464 	{
465 		SO_RECVUCRED,
466 		SOL_SOCKET,
467 		OA_RW,
468 		OA_RW,
469 		OP_NP,
470 		0,
471 		sizeof (int),
472 		0
473 	}
474 };
475 
476 /*
477  * Table of all supported levels
478  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
479  * any supported options so we need this info separately.
480  *
481  * This is needed only for topmost tpi providers.
482  */
483 optlevel_t	tl_valid_levels_arr[] = {
484 	XTI_GENERIC,
485 	SOL_SOCKET,
486 	TL_PROT_LEVEL
487 };
488 
489 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
490 /*
491  * Current upper bound on the amount of space needed to return all options.
492  * Additional options with data size of sizeof(long) are handled automatically.
493  * Others need hand job.
494  */
495 #define	TL_MAX_OPT_BUF_LEN						\
496 		((A_CNT(tl_opt_arr) << 2) +				\
497 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
498 		+ 64 + sizeof (struct T_optmgmt_ack))
499 
500 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
501 
502 /*
503  *	transport addr structure
504  */
505 typedef struct tl_addr {
506 	zoneid_t	ta_zoneid;		/* Zone scope of address */
507 	t_scalar_t	ta_alen;		/* length of abuf */
508 	void		*ta_abuf;		/* the addr itself */
509 } tl_addr_t;
510 
511 /*
512  * Refcounted version of serializer.
513  */
514 typedef struct tl_serializer {
515 	uint_t		ts_refcnt;
516 	serializer_t	*ts_serializer;
517 } tl_serializer_t;
518 
519 /*
520  * Each transport type has a separate state.
521  * Per-transport state.
522  */
523 typedef struct tl_transport_state {
524 	char		*tr_name;
525 	minor_t		tr_minor;
526 	uint32_t	tr_defaddr;
527 	mod_hash_t	*tr_ai_hash;
528 	mod_hash_t	*tr_addr_hash;
529 	tl_serializer_t	*tr_serializer;
530 } tl_transport_state_t;
531 
532 #define	TL_DFADDR 0x1000
533 
534 static tl_transport_state_t tl_transports[] = {
535 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
536 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
537 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
538 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
539 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
540 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
541 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
542 };
543 
544 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
545 
546 struct tl_endpt;
547 typedef struct tl_endpt tl_endpt_t;
548 
549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
550 
551 /*
552  * Data structure used to represent pending connects.
553  * Records enough information so that the connecting peer can close
554  * before the connection gets accepted.
555  */
556 typedef struct tl_icon {
557 	list_node_t	ti_node;
558 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
559 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
560 	t_scalar_t	ti_seqno;	/* Sequence number */
561 } tl_icon_t;
562 
563 typedef struct so_ux_addr soux_addr_t;
564 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
565 
566 /*
567  * Maximum number of unaccepted connection indications allowed per listener.
568  */
569 #define	TL_MAXQLEN	4096
570 int tl_maxqlen = TL_MAXQLEN;
571 
572 /*
573  *	transport endpoint structure
574  */
575 struct tl_endpt {
576 	queue_t		*te_rq;		/* stream read queue */
577 	queue_t		*te_wq;		/* stream write queue */
578 	uint32_t	te_refcnt;
579 	int32_t 	te_state;	/* TPI state of endpoint */
580 	minor_t		te_minor;	/* minor number */
581 #define	te_seqno	te_minor
582 	uint_t		te_flag;	/* flag field */
583 	boolean_t	te_nowsrv;
584 	tl_serializer_t	*te_ser;	/* Serializer to use */
585 #define	te_serializer	te_ser->ts_serializer
586 
587 	soux_addr_t	te_uxaddr;	/* Socket address */
588 #define	te_magic	te_uxaddr.soua_magic
589 #define	te_vp		te_uxaddr.soua_vp
590 	tl_addr_t	te_ap;		/* addr bound to this endpt */
591 #define	te_zoneid te_ap.ta_zoneid
592 #define	te_alen	te_ap.ta_alen
593 #define	te_abuf	te_ap.ta_abuf
594 
595 	tl_transport_state_t *te_transport;
596 #define	te_addrhash	te_transport->tr_addr_hash
597 #define	te_aihash	te_transport->tr_ai_hash
598 #define	te_defaddr	te_transport->tr_defaddr
599 	cred_t		*te_credp;	/* endpoint user credentials */
600 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
601 
602 	/*
603 	 * State specific for connection-oriented and connectionless transports.
604 	 */
605 	union {
606 		/* Connection-oriented state. */
607 		struct {
608 			t_uscalar_t _te_nicon;	/* count of conn requests */
609 			t_uscalar_t _te_qlen;	/* max conn requests */
610 			tl_endpt_t  *_te_oconp;	/* conn request pending */
611 			tl_endpt_t  *_te_conp;	/* connected endpt */
612 #ifndef _ILP32
613 			void	    *_te_pad;
614 #endif
615 			list_t	_te_iconp;	/* list of conn ind. pending */
616 		} _te_cots_state;
617 		/* Connection-less state. */
618 		struct {
619 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
620 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
621 			list_node_t _te_flows;	/* lists of connections */
622 			list_t  _te_flowlist;	/* Who flowcontrols on me */
623 		} _te_clts_state;
624 	} _te_transport_state;
625 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
626 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
627 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
628 #define	te_conp		_te_transport_state._te_cots_state._te_conp
629 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
630 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
631 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
632 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
633 #define	te_flows	_te_transport_state._te_clts_state._te_flows
634 
635 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
636 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
637 	pid_t		te_cpid;	/* cached pid of endpoint */
638 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
639 	/*
640 	 * Pieces of the endpoint state needed for closing.
641 	 */
642 	kmutex_t	te_closelock;
643 	kcondvar_t	te_closecv;
644 	uint8_t		te_closing;	/* The endpoint started closing */
645 	uint8_t		te_closewait;	/* Wait in close until zero */
646 	mblk_t		te_closemp;	/* for entering serializer on close */
647 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
648 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
649 	kmutex_t	te_srv_lock;
650 	kcondvar_t	te_srv_cv;
651 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
652 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
653 	/*
654 	 * Pieces of the endpoint state needed for serializer transitions.
655 	 */
656 	kmutex_t	te_ser_lock;	/* Protects the count below */
657 	uint_t		te_ser_count;	/* Number of messages on serializer */
658 };
659 
660 /*
661  * Flag values. Lower 4 bits specify that transport used.
662  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
663  * they allow to identify the endpoint more easily.
664  */
665 #define	TL_LISTENER	0x00010	/* the listener endpoint */
666 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
667 #define	TL_EAGER	0x00040	/* connecting endpoint */
668 #define	TL_ACCEPTED	0x00080	/* accepted connection */
669 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
670 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
671 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
672 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
673 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
674 /*
675  * Boolean checks for the endpoint type.
676  */
677 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
678 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
679 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
680 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
681 
682 /*
683  * Certain operations are always used together. These macros reduce the chance
684  * of missing a part of a combination.
685  */
686 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
687 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
688 
689 #define	TL_PUTBQ(x, mp) {		\
690 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
691 	(x)->te_nowsrv = B_TRUE;	\
692 	(void) putbq((x)->te_wq, mp);	\
693 }
694 
695 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
696 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
697 
698 /*
699  * STREAMS driver glue data structures.
700  */
701 static	struct	module_info	tl_minfo = {
702 	TL_ID,			/* mi_idnum */
703 	TL_NAME,		/* mi_idname */
704 	TL_MINPSZ,		/* mi_minpsz */
705 	TL_MAXPSZ,		/* mi_maxpsz */
706 	TL_HIWAT,		/* mi_hiwat */
707 	TL_LOWAT		/* mi_lowat */
708 };
709 
710 static	struct	qinit	tl_rinit = {
711 	NULL,			/* qi_putp */
712 	(int (*)())tl_rsrv,	/* qi_srvp */
713 	tl_open,		/* qi_qopen */
714 	tl_close,		/* qi_qclose */
715 	NULL,			/* qi_qadmin */
716 	&tl_minfo,		/* qi_minfo */
717 	NULL			/* qi_mstat */
718 };
719 
720 static	struct	qinit	tl_winit = {
721 	(int (*)())tl_wput,	/* qi_putp */
722 	(int (*)())tl_wsrv,	/* qi_srvp */
723 	NULL,			/* qi_qopen */
724 	NULL,			/* qi_qclose */
725 	NULL,			/* qi_qadmin */
726 	&tl_minfo,		/* qi_minfo */
727 	NULL			/* qi_mstat */
728 };
729 
730 static	struct streamtab	tlinfo = {
731 	&tl_rinit,		/* st_rdinit */
732 	&tl_winit,		/* st_wrinit */
733 	NULL,			/* st_muxrinit */
734 	NULL			/* st_muxwrinit */
735 };
736 
737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
738     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
739 
740 static struct modldrv modldrv = {
741 	&mod_driverops,		/* Type of module -- pseudo driver here */
742 	"TPI Local Transport (tl)",
743 	&tl_devops,		/* driver ops */
744 };
745 
746 /*
747  * Module linkage information for the kernel.
748  */
749 static struct modlinkage modlinkage = {
750 	MODREV_1,
751 	&modldrv,
752 	NULL
753 };
754 
755 /*
756  * Templates for response to info request
757  * Check sanity of unlimited connect data etc.
758  */
759 
760 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
761 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
762 
763 static struct T_info_ack tl_cots_info_ack =
764 	{
765 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
766 		T_INFINITE,	/* TSDU size */
767 		T_INFINITE,	/* ETSDU size */
768 		T_INFINITE,	/* CDATA_size */
769 		T_INFINITE,	/* DDATA_size */
770 		T_INFINITE,	/* ADDR_size  */
771 		T_INFINITE,	/* OPT_size */
772 		0,		/* TIDU_size - fill at run time */
773 		T_COTS,		/* SERV_type */
774 		-1,		/* CURRENT_state */
775 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
776 	};
777 
778 static struct T_info_ack tl_clts_info_ack =
779 	{
780 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
781 		0,		/* TSDU_size - fill at run time */
782 		-2,		/* ETSDU_size -2 => not supported */
783 		-2,		/* CDATA_size -2 => not supported */
784 		-2,		/* DDATA_size  -2 => not supported */
785 		-1,		/* ADDR_size -1 => infinite */
786 		-1,		/* OPT_size */
787 		0,		/* TIDU_size - fill at run time */
788 		T_CLTS,		/* SERV_type */
789 		-1,		/* CURRENT_state */
790 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
791 	};
792 
793 /*
794  * private copy of devinfo pointer used in tl_info
795  */
796 static dev_info_t *tl_dip;
797 
798 /*
799  * Endpoints cache.
800  */
801 static kmem_cache_t *tl_cache;
802 /*
803  * Minor number space.
804  */
805 static id_space_t *tl_minors;
806 
807 /*
808  * Default Data Unit size.
809  */
810 static t_scalar_t tl_tidusz;
811 
812 /*
813  * Size of hash tables.
814  */
815 static size_t tl_hash_size = TL_HASH_SIZE;
816 
817 /*
818  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
819  * for sockets.
820  */
821 static int tl_disable_early_connect = 0;
822 static int tl_client_closing_when_accepting;
823 
824 static int tl_serializer_noswitch;
825 
826 /*
827  * LOCAL FUNCTION PROTOTYPES
828  * -------------------------
829  */
830 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
831 static void tl_do_proto(mblk_t *, tl_endpt_t *);
832 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
833 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
834 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
835 	t_scalar_t);
836 static void tl_bind(mblk_t *, tl_endpt_t *);
837 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
838 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
839 static void tl_unbind(mblk_t *, tl_endpt_t *);
840 static void tl_optmgmt(queue_t *, mblk_t *);
841 static void tl_conn_req(queue_t *, mblk_t *);
842 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
843 static void tl_conn_res(mblk_t *, tl_endpt_t *);
844 static void tl_discon_req(mblk_t *, tl_endpt_t *);
845 static void tl_capability_req(mblk_t *, tl_endpt_t *);
846 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
847 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
848 static void tl_info_req(mblk_t *, tl_endpt_t *);
849 static void tl_addr_req(mblk_t *, tl_endpt_t *);
850 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
851 static void tl_data(mblk_t  *, tl_endpt_t *);
852 static void tl_exdata(mblk_t *, tl_endpt_t *);
853 static void tl_ordrel(mblk_t *, tl_endpt_t *);
854 static void tl_unitdata(mblk_t *, tl_endpt_t *);
855 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
856 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
857 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
858 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
859 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
860 static void tl_cl_backenable(tl_endpt_t *);
861 static void tl_co_unconnect(tl_endpt_t *);
862 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
863 static void tl_discon_ind(tl_endpt_t *, uint32_t);
864 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
865 static mblk_t *tl_ordrel_ind_alloc(void);
866 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
867 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
868 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
869 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
870 static void tl_icon_freemsgs(mblk_t **);
871 static void tl_merror(queue_t *, mblk_t *, int);
872 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
873 static int tl_default_opt(queue_t *, int, int, uchar_t *);
874 static int tl_get_opt(queue_t *, int, int, uchar_t *);
875 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
876     uchar_t *, void *, cred_t *);
877 static void tl_memrecover(queue_t *, mblk_t *, size_t);
878 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
879 static void tl_free(tl_endpt_t *);
880 static int  tl_constructor(void *, void *, int);
881 static void tl_destructor(void *, void *);
882 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
883 static tl_serializer_t *tl_serializer_alloc(int);
884 static void tl_serializer_refhold(tl_serializer_t *);
885 static void tl_serializer_refrele(tl_serializer_t *);
886 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
887 static void tl_serializer_exit(tl_endpt_t *);
888 static boolean_t tl_noclose(tl_endpt_t *);
889 static void tl_closeok(tl_endpt_t *);
890 static void tl_refhold(tl_endpt_t *);
891 static void tl_refrele(tl_endpt_t *);
892 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
893 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
894 static void tl_close_ser(mblk_t *, tl_endpt_t *);
895 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
896 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
897 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
898 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
900 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
901 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
902 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
903 static void tl_addr_unbind(tl_endpt_t *);
904 
905 /*
906  * Intialize option database object for TL
907  */
908 
909 optdb_obj_t tl_opt_obj = {
910 	tl_default_opt,		/* TL default value function pointer */
911 	tl_get_opt,		/* TL get function pointer */
912 	tl_set_opt,		/* TL set function pointer */
913 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
914 	tl_opt_arr,		/* TL option database */
915 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
916 	tl_valid_levels_arr	/* TL valid level array */
917 };
918 
919 /*
920  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
921  * ---------------------------------------
922  */
923 
924 /*
925  * Loadable module routines
926  */
927 int
928 _init(void)
929 {
930 	return (mod_install(&modlinkage));
931 }
932 
933 int
934 _fini(void)
935 {
936 	return (mod_remove(&modlinkage));
937 }
938 
939 int
940 _info(struct modinfo *modinfop)
941 {
942 	return (mod_info(&modlinkage, modinfop));
943 }
944 
945 /*
946  * Driver Entry Points and Other routines
947  */
948 static int
949 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
950 {
951 	int i;
952 	char name[32];
953 
954 	/*
955 	 * Resume from a checkpoint state.
956 	 */
957 	if (cmd == DDI_RESUME)
958 		return (DDI_SUCCESS);
959 
960 	if (cmd != DDI_ATTACH)
961 		return (DDI_FAILURE);
962 
963 	/*
964 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
965 	 * streams message sizes can be unlimited. We use a defined constant
966 	 * instead.
967 	 */
968 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
969 
970 	/*
971 	 * Create subdevices for each transport.
972 	 */
973 	for (i = 0; i < TL_UNUSED; i++) {
974 		if (ddi_create_minor_node(devi,
975 		    tl_transports[i].tr_name,
976 		    S_IFCHR, tl_transports[i].tr_minor,
977 		    DDI_PSEUDO, NULL) == DDI_FAILURE) {
978 			ddi_remove_minor_node(devi, NULL);
979 			return (DDI_FAILURE);
980 		}
981 	}
982 
983 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
984 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
985 
986 	if (tl_cache == NULL) {
987 		ddi_remove_minor_node(devi, NULL);
988 		return (DDI_FAILURE);
989 	}
990 
991 	tl_minors = id_space_create("tl_minor_space",
992 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
993 
994 	/*
995 	 * Create ID space for minor numbers
996 	 */
997 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
998 		tl_transport_state_t *t = &tl_transports[i];
999 
1000 		if (i == TL_UNUSED)
1001 			continue;
1002 
1003 		/* Socket COTSORD shares namespace with COTS */
1004 		if (i == TL_SOCK_COTSORD) {
1005 			t->tr_ai_hash =
1006 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1007 			ASSERT(t->tr_ai_hash != NULL);
1008 			t->tr_addr_hash =
1009 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1010 			ASSERT(t->tr_addr_hash != NULL);
1011 			continue;
1012 		}
1013 
1014 		/*
1015 		 * Create hash tables.
1016 		 */
1017 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1018 		    t->tr_name);
1019 #ifdef _ILP32
1020 		if (i & TL_SOCKET)
1021 			t->tr_ai_hash =
1022 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1023 			    mod_hash_null_valdtor);
1024 		else
1025 			t->tr_ai_hash =
1026 			    mod_hash_create_ptrhash(name, tl_hash_size,
1027 			    mod_hash_null_valdtor, sizeof (queue_t));
1028 #else
1029 		t->tr_ai_hash =
1030 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1031 		    mod_hash_null_valdtor);
1032 #endif /* _ILP32 */
1033 
1034 		if (i & TL_SOCKET) {
1035 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1036 			    t->tr_name);
1037 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1038 			    tl_hash_size, mod_hash_null_valdtor,
1039 			    sizeof (uintptr_t));
1040 		} else {
1041 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1042 			    t->tr_name);
1043 			t->tr_addr_hash = mod_hash_create_extended(name,
1044 			    tl_hash_size, mod_hash_null_keydtor,
1045 			    mod_hash_null_valdtor,
1046 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1047 		}
1048 
1049 		/* Create serializer for connectionless transports. */
1050 		if (i & TL_TICLTS)
1051 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1052 	}
1053 
1054 	tl_dip = devi;
1055 
1056 	return (DDI_SUCCESS);
1057 }
1058 
1059 static int
1060 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1061 {
1062 	int i;
1063 
1064 	if (cmd == DDI_SUSPEND)
1065 		return (DDI_SUCCESS);
1066 
1067 	if (cmd != DDI_DETACH)
1068 		return (DDI_FAILURE);
1069 
1070 	/*
1071 	 * Destroy arenas and hash tables.
1072 	 */
1073 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1074 		tl_transport_state_t *t = &tl_transports[i];
1075 
1076 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1077 			continue;
1078 
1079 		EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1080 		if (t->tr_serializer != NULL) {
1081 			tl_serializer_refrele(t->tr_serializer);
1082 			t->tr_serializer = NULL;
1083 		}
1084 
1085 #ifdef _ILP32
1086 		if (i & TL_SOCKET)
1087 			mod_hash_destroy_idhash(t->tr_ai_hash);
1088 		else
1089 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1090 #else
1091 		mod_hash_destroy_idhash(t->tr_ai_hash);
1092 #endif /* _ILP32 */
1093 		t->tr_ai_hash = NULL;
1094 		if (i & TL_SOCKET)
1095 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1096 		else
1097 			mod_hash_destroy_hash(t->tr_addr_hash);
1098 		t->tr_addr_hash = NULL;
1099 	}
1100 
1101 	kmem_cache_destroy(tl_cache);
1102 	tl_cache = NULL;
1103 	id_space_destroy(tl_minors);
1104 	tl_minors = NULL;
1105 	ddi_remove_minor_node(devi, NULL);
1106 	return (DDI_SUCCESS);
1107 }
1108 
1109 /* ARGSUSED */
1110 static int
1111 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1112 {
1113 
1114 	int retcode = DDI_FAILURE;
1115 
1116 	switch (infocmd) {
1117 
1118 	case DDI_INFO_DEVT2DEVINFO:
1119 		if (tl_dip != NULL) {
1120 			*result = (void *)tl_dip;
1121 			retcode = DDI_SUCCESS;
1122 		}
1123 		break;
1124 
1125 	case DDI_INFO_DEVT2INSTANCE:
1126 		*result = (void *)0;
1127 		retcode = DDI_SUCCESS;
1128 		break;
1129 
1130 	default:
1131 		break;
1132 	}
1133 	return (retcode);
1134 }
1135 
1136 /*
1137  * Endpoint reference management.
1138  */
1139 static void
1140 tl_refhold(tl_endpt_t *tep)
1141 {
1142 	atomic_inc_32(&tep->te_refcnt);
1143 }
1144 
1145 static void
1146 tl_refrele(tl_endpt_t *tep)
1147 {
1148 	ASSERT(tep->te_refcnt != 0);
1149 
1150 	if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1151 		tl_free(tep);
1152 }
1153 
1154 /*ARGSUSED*/
1155 static int
1156 tl_constructor(void *buf, void *cdrarg, int kmflags)
1157 {
1158 	tl_endpt_t *tep = buf;
1159 
1160 	bzero(tep, sizeof (tl_endpt_t));
1161 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1162 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1163 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1164 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1165 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1166 
1167 	return (0);
1168 }
1169 
1170 /*ARGSUSED*/
1171 static void
1172 tl_destructor(void *buf, void *cdrarg)
1173 {
1174 	tl_endpt_t *tep = buf;
1175 
1176 	mutex_destroy(&tep->te_closelock);
1177 	cv_destroy(&tep->te_closecv);
1178 	mutex_destroy(&tep->te_srv_lock);
1179 	cv_destroy(&tep->te_srv_cv);
1180 	mutex_destroy(&tep->te_ser_lock);
1181 }
1182 
1183 static void
1184 tl_free(tl_endpt_t *tep)
1185 {
1186 	ASSERT(tep->te_refcnt == 0);
1187 	ASSERT(tep->te_transport != NULL);
1188 	ASSERT(tep->te_rq == NULL);
1189 	ASSERT(tep->te_wq == NULL);
1190 	ASSERT(tep->te_ser != NULL);
1191 	ASSERT(tep->te_ser_count == 0);
1192 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1193 
1194 	if (IS_SOCKET(tep)) {
1195 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1196 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1197 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1198 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1199 	} else if (tep->te_abuf != NULL) {
1200 		kmem_free(tep->te_abuf, tep->te_alen);
1201 		tep->te_alen = -1; /* uninitialized */
1202 		tep->te_abuf = NULL;
1203 	} else {
1204 		ASSERT(tep->te_alen == -1);
1205 	}
1206 
1207 	id_free(tl_minors, tep->te_minor);
1208 	ASSERT(tep->te_credp == NULL);
1209 
1210 	if (tep->te_hash_hndl != NULL)
1211 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1212 
1213 	if (IS_COTS(tep)) {
1214 		TL_REMOVE_PEER(tep->te_conp);
1215 		TL_REMOVE_PEER(tep->te_oconp);
1216 		tl_serializer_refrele(tep->te_ser);
1217 		tep->te_ser = NULL;
1218 		ASSERT(tep->te_nicon == 0);
1219 		ASSERT(list_head(&tep->te_iconp) == NULL);
1220 	} else {
1221 		ASSERT(tep->te_lastep == NULL);
1222 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1223 		ASSERT(tep->te_flowq == NULL);
1224 	}
1225 
1226 	ASSERT(tep->te_bufcid == 0);
1227 	ASSERT(tep->te_timoutid == 0);
1228 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1229 	tep->te_acceptor_id = 0;
1230 
1231 	ASSERT(tep->te_closewait == 0);
1232 	ASSERT(!tep->te_rsrv_active);
1233 	ASSERT(!tep->te_wsrv_active);
1234 	tep->te_closing = 0;
1235 	tep->te_nowsrv = B_FALSE;
1236 	tep->te_flag = 0;
1237 
1238 	kmem_cache_free(tl_cache, tep);
1239 }
1240 
1241 /*
1242  * Allocate/free reference-counted wrappers for serializers.
1243  */
1244 static tl_serializer_t *
1245 tl_serializer_alloc(int flags)
1246 {
1247 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1248 	serializer_t *ser;
1249 
1250 	if (s == NULL)
1251 		return (NULL);
1252 
1253 	ser = serializer_create(flags);
1254 
1255 	if (ser == NULL) {
1256 		kmem_free(s, sizeof (tl_serializer_t));
1257 		return (NULL);
1258 	}
1259 
1260 	s->ts_refcnt = 1;
1261 	s->ts_serializer = ser;
1262 	return (s);
1263 }
1264 
1265 static void
1266 tl_serializer_refhold(tl_serializer_t *s)
1267 {
1268 	atomic_inc_32(&s->ts_refcnt);
1269 }
1270 
1271 static void
1272 tl_serializer_refrele(tl_serializer_t *s)
1273 {
1274 	if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1275 		serializer_destroy(s->ts_serializer);
1276 		kmem_free(s, sizeof (tl_serializer_t));
1277 	}
1278 }
1279 
1280 /*
1281  * Post a request on the endpoint serializer. For COTS transports keep track of
1282  * the number of pending requests.
1283  */
1284 static void
1285 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1286 {
1287 	if (IS_COTS(tep)) {
1288 		mutex_enter(&tep->te_ser_lock);
1289 		tep->te_ser_count++;
1290 		mutex_exit(&tep->te_ser_lock);
1291 	}
1292 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1293 }
1294 
1295 /*
1296  * Complete processing the request on the serializer. Decrement the counter for
1297  * pending requests for COTS transports.
1298  */
1299 static void
1300 tl_serializer_exit(tl_endpt_t *tep)
1301 {
1302 	if (IS_COTS(tep)) {
1303 		mutex_enter(&tep->te_ser_lock);
1304 		ASSERT(tep->te_ser_count != 0);
1305 		tep->te_ser_count--;
1306 		mutex_exit(&tep->te_ser_lock);
1307 	}
1308 }
1309 
1310 /*
1311  * Hash management functions.
1312  */
1313 
1314 /*
1315  * Return TRUE if two addresses are equal, false otherwise.
1316  */
1317 static boolean_t
1318 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1319 {
1320 	return ((ap1->ta_alen > 0) &&
1321 	    (ap1->ta_alen == ap2->ta_alen) &&
1322 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1323 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1324 }
1325 
1326 /*
1327  * This function is called whenever an endpoint is found in the hash table.
1328  */
1329 /* ARGSUSED0 */
1330 static void
1331 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1332 {
1333 	tl_refhold((tl_endpt_t *)val);
1334 }
1335 
1336 /*
1337  * Address hash function.
1338  */
1339 /* ARGSUSED */
1340 static uint_t
1341 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1342 {
1343 	tl_addr_t *ap = (tl_addr_t *)key;
1344 	size_t	len = ap->ta_alen;
1345 	uchar_t *p = ap->ta_abuf;
1346 	uint_t i, g;
1347 
1348 	ASSERT((len > 0) && (p != NULL));
1349 
1350 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1351 		i = (i << 4) + (*p);
1352 		if ((g = (i & 0xf0000000U)) != 0) {
1353 			i ^= (g >> 24);
1354 			i ^= g;
1355 		}
1356 	}
1357 	return (i);
1358 }
1359 
1360 /*
1361  * This function is used by hash lookups. It compares two generic addresses.
1362  */
1363 static int
1364 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1365 {
1366 #ifdef 	DEBUG
1367 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1368 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1369 
1370 	ASSERT(key1 != NULL);
1371 	ASSERT(key2 != NULL);
1372 
1373 	ASSERT(ap1->ta_abuf != NULL);
1374 	ASSERT(ap2->ta_abuf != NULL);
1375 	ASSERT(ap1->ta_alen > 0);
1376 	ASSERT(ap2->ta_alen > 0);
1377 #endif
1378 
1379 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1380 }
1381 
1382 /*
1383  * Prevent endpoint from closing if possible.
1384  * Return B_TRUE on success, B_FALSE on failure.
1385  */
1386 static boolean_t
1387 tl_noclose(tl_endpt_t *tep)
1388 {
1389 	boolean_t rc = B_FALSE;
1390 
1391 	mutex_enter(&tep->te_closelock);
1392 	if (! tep->te_closing) {
1393 		ASSERT(tep->te_closewait == 0);
1394 		tep->te_closewait++;
1395 		rc = B_TRUE;
1396 	}
1397 	mutex_exit(&tep->te_closelock);
1398 	return (rc);
1399 }
1400 
1401 /*
1402  * Allow endpoint to close if needed.
1403  */
1404 static void
1405 tl_closeok(tl_endpt_t *tep)
1406 {
1407 	ASSERT(tep->te_closewait > 0);
1408 	mutex_enter(&tep->te_closelock);
1409 	ASSERT(tep->te_closewait == 1);
1410 	tep->te_closewait--;
1411 	cv_signal(&tep->te_closecv);
1412 	mutex_exit(&tep->te_closelock);
1413 }
1414 
1415 /*
1416  * STREAMS open entry point.
1417  */
1418 /* ARGSUSED */
1419 static int
1420 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1421 {
1422 	tl_endpt_t *tep;
1423 	minor_t	    minor = getminor(*devp);
1424 
1425 	/*
1426 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1427 	 * are illegal
1428 	 */
1429 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1430 		return (ENXIO);
1431 
1432 	if (rq->q_ptr != NULL)
1433 		return (0);
1434 
1435 	/* Minor number should specify the mode used for the driver. */
1436 	if ((minor >= TL_UNUSED))
1437 		return (ENXIO);
1438 
1439 	if (oflag & SO_SOCKSTR) {
1440 		minor |= TL_SOCKET;
1441 	}
1442 
1443 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1444 	tep->te_refcnt = 1;
1445 	tep->te_cpid = curproc->p_pid;
1446 	rq->q_ptr = WR(rq)->q_ptr = tep;
1447 	tep->te_state = TS_UNBND;
1448 	tep->te_credp = credp;
1449 	crhold(credp);
1450 	tep->te_zoneid = getzoneid();
1451 
1452 	tep->te_flag = minor & TL_MINOR_MASK;
1453 	tep->te_transport = &tl_transports[minor];
1454 
1455 	/* Allocate a unique minor number for this instance. */
1456 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1457 
1458 	/* Reserve hash handle for bind(). */
1459 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1460 
1461 	/* Transport-specific initialization */
1462 	if (IS_COTS(tep)) {
1463 		/* Use private serializer */
1464 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1465 
1466 		/* Create list for pending connections */
1467 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1468 		    offsetof(tl_icon_t, ti_node));
1469 		tep->te_qlen = 0;
1470 		tep->te_nicon = 0;
1471 		tep->te_oconp = NULL;
1472 		tep->te_conp = NULL;
1473 	} else {
1474 		/* Use shared serializer */
1475 		tep->te_ser = tep->te_transport->tr_serializer;
1476 		bzero(&tep->te_flows, sizeof (list_node_t));
1477 		/* Create list for flow control */
1478 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1479 		    offsetof(tl_endpt_t, te_flows));
1480 		tep->te_flowq = NULL;
1481 		tep->te_lastep = NULL;
1482 
1483 	}
1484 
1485 	/* Initialize endpoint address */
1486 	if (IS_SOCKET(tep)) {
1487 		/* Socket-specific address handling. */
1488 		tep->te_alen = TL_SOUX_ADDRLEN;
1489 		tep->te_abuf = &tep->te_uxaddr;
1490 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1491 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1492 	} else {
1493 		tep->te_alen = -1;
1494 		tep->te_abuf = NULL;
1495 	}
1496 
1497 	/* clone the driver */
1498 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1499 
1500 	tep->te_rq = rq;
1501 	tep->te_wq = WR(rq);
1502 
1503 #ifdef	_ILP32
1504 	if (IS_SOCKET(tep))
1505 		tep->te_acceptor_id = tep->te_minor;
1506 	else
1507 		tep->te_acceptor_id = (t_uscalar_t)rq;
1508 #else
1509 	tep->te_acceptor_id = tep->te_minor;
1510 #endif	/* _ILP32 */
1511 
1512 
1513 	qprocson(rq);
1514 
1515 	/*
1516 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1517 	 * insertion so insertion can't fail.
1518 	 */
1519 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1520 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1521 	    (mod_hash_val_t)tep);
1522 
1523 	return (0);
1524 }
1525 
1526 /* ARGSUSED1 */
1527 static int
1528 tl_close(queue_t *rq, int flag,	cred_t *credp)
1529 {
1530 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1531 	tl_endpt_t *elp = NULL;
1532 	queue_t *wq = tep->te_wq;
1533 	int rc;
1534 
1535 	ASSERT(wq == WR(rq));
1536 
1537 	/*
1538 	 * Remove the endpoint from acceptor hash.
1539 	 */
1540 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1541 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1542 	    (mod_hash_val_t *)&elp);
1543 	ASSERT(rc == 0 && tep == elp);
1544 	if ((rc != 0) || (tep != elp)) {
1545 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1546 		    SL_TRACE|SL_ERROR,
1547 		    "tl_close:inconsistency in AI hash"));
1548 	}
1549 
1550 	/*
1551 	 * Wait till close is safe, then mark endpoint as closing.
1552 	 */
1553 	mutex_enter(&tep->te_closelock);
1554 	while (tep->te_closewait)
1555 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1556 	tep->te_closing = B_TRUE;
1557 	/*
1558 	 * Will wait for the serializer part of the close to finish, so set
1559 	 * te_closewait now.
1560 	 */
1561 	tep->te_closewait = 1;
1562 	tep->te_nowsrv = B_FALSE;
1563 	mutex_exit(&tep->te_closelock);
1564 
1565 	/*
1566 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1567 	 * It is safe because close will wait for tl_close_ser to finish.
1568 	 */
1569 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1570 
1571 	/*
1572 	 * Wait for the first phase of close to complete before qprocsoff().
1573 	 */
1574 	mutex_enter(&tep->te_closelock);
1575 	while (tep->te_closewait)
1576 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1577 	mutex_exit(&tep->te_closelock);
1578 
1579 	qprocsoff(rq);
1580 
1581 	if (tep->te_bufcid) {
1582 		qunbufcall(rq, tep->te_bufcid);
1583 		tep->te_bufcid = 0;
1584 	}
1585 	if (tep->te_timoutid) {
1586 		(void) quntimeout(rq, tep->te_timoutid);
1587 		tep->te_timoutid = 0;
1588 	}
1589 
1590 	/*
1591 	 * Finish close behind serializer.
1592 	 *
1593 	 * For a CLTS endpoint increase a refcount and continue close processing
1594 	 * with serializer protection. This processing may happen asynchronously
1595 	 * with the completion of tl_close().
1596 	 *
1597 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1598 	 * may go away together with tep and we need to destroy serializer
1599 	 * outside of serializer context.
1600 	 */
1601 	ASSERT(tep->te_closewait == 0);
1602 	if (IS_COTS(tep))
1603 		tep->te_closewait = 1;
1604 	else
1605 		tl_refhold(tep);
1606 
1607 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1608 
1609 	/*
1610 	 * For connection-oriented transports wait for all serializer activity
1611 	 * to settle down.
1612 	 */
1613 	if (IS_COTS(tep)) {
1614 		mutex_enter(&tep->te_closelock);
1615 		while (tep->te_closewait)
1616 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1617 		mutex_exit(&tep->te_closelock);
1618 	}
1619 
1620 	crfree(tep->te_credp);
1621 	tep->te_credp = NULL;
1622 	tep->te_wq = NULL;
1623 	tl_refrele(tep);
1624 	/*
1625 	 * tep is likely to be destroyed now, so can't reference it any more.
1626 	 */
1627 
1628 	rq->q_ptr = wq->q_ptr = NULL;
1629 	return (0);
1630 }
1631 
1632 /*
1633  * First phase of close processing done behind the serializer.
1634  *
1635  * Do not drop the reference in the end - tl_close() wants this reference to
1636  * stay.
1637  */
1638 /* ARGSUSED0 */
1639 static void
1640 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1641 {
1642 	ASSERT(tep->te_closing);
1643 	ASSERT(tep->te_closewait == 1);
1644 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1645 
1646 	tep->te_flag |= TL_CLOSE_SER;
1647 
1648 	/*
1649 	 * Drain out all messages on queue except for TL_TICOTS where the
1650 	 * abortive release semantics permit discarding of data on close
1651 	 */
1652 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1653 		tl_wsrv_ser(NULL, tep);
1654 	}
1655 
1656 	/* Remove address from hash table. */
1657 	tl_addr_unbind(tep);
1658 	/*
1659 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1660 	 * queue of the driver, so clear these before qprocsoff() is called.
1661 	 * Also clear q_next for the peer since this queue is going away.
1662 	 */
1663 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1664 		tl_endpt_t *peer_tep = tep->te_conp;
1665 
1666 		tep->te_wq->q_next = NULL;
1667 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1668 			peer_tep->te_wq->q_next = NULL;
1669 	}
1670 
1671 	tep->te_rq = NULL;
1672 
1673 	/* wake up tl_close() */
1674 	tl_closeok(tep);
1675 	tl_serializer_exit(tep);
1676 }
1677 
1678 /*
1679  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1680  * the reference for CLTS.
1681  *
1682  * Called from serializer. Should drop reference count for CLTS only.
1683  */
1684 /* ARGSUSED0 */
1685 static void
1686 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1687 {
1688 	ASSERT(tep->te_closing);
1689 	IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1690 	IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1691 
1692 	tep->te_state = -1;	/* Uninitialized */
1693 	if (IS_COTS(tep)) {
1694 		tl_co_unconnect(tep);
1695 	} else {
1696 		/* Connectionless specific cleanup */
1697 		TL_REMOVE_PEER(tep->te_lastep);
1698 		/*
1699 		 * Backenable anybody that is flow controlled waiting for
1700 		 * this endpoint.
1701 		 */
1702 		tl_cl_backenable(tep);
1703 		if (tep->te_flowq != NULL) {
1704 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1705 			tep->te_flowq = NULL;
1706 		}
1707 	}
1708 
1709 	tl_serializer_exit(tep);
1710 	if (IS_COTS(tep))
1711 		tl_closeok(tep);
1712 	else
1713 		tl_refrele(tep);
1714 }
1715 
1716 /*
1717  * STREAMS write-side put procedure.
1718  * Enter serializer for most of the processing.
1719  *
1720  * The T_CONN_REQ is processed outside of serializer.
1721  */
1722 static void
1723 tl_wput(queue_t *wq, mblk_t *mp)
1724 {
1725 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1726 	ssize_t			msz = MBLKL(mp);
1727 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1728 	tlproc_t		*tl_proc = NULL;
1729 
1730 	switch (DB_TYPE(mp)) {
1731 	case M_DATA:
1732 		/* Only valid for connection-oriented transports */
1733 		if (IS_CLTS(tep)) {
1734 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1735 			    SL_TRACE|SL_ERROR,
1736 			    "tl_wput:M_DATA invalid for ticlts driver"));
1737 			tl_merror(wq, mp, EPROTO);
1738 			return;
1739 		}
1740 		tl_proc = tl_wput_data_ser;
1741 		break;
1742 
1743 	case M_IOCTL:
1744 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1745 		case TL_IOC_CREDOPT:
1746 			/* FALLTHROUGH */
1747 		case TL_IOC_UCREDOPT:
1748 			/*
1749 			 * Serialize endpoint state change.
1750 			 */
1751 			tl_proc = tl_do_ioctl_ser;
1752 			break;
1753 
1754 		default:
1755 			miocnak(wq, mp, 0, EINVAL);
1756 			return;
1757 		}
1758 		break;
1759 
1760 	case M_FLUSH:
1761 		/*
1762 		 * do canonical M_FLUSH processing
1763 		 */
1764 		if (*mp->b_rptr & FLUSHW) {
1765 			flushq(wq, FLUSHALL);
1766 			*mp->b_rptr &= ~FLUSHW;
1767 		}
1768 		if (*mp->b_rptr & FLUSHR) {
1769 			flushq(RD(wq), FLUSHALL);
1770 			qreply(wq, mp);
1771 		} else {
1772 			freemsg(mp);
1773 		}
1774 		return;
1775 
1776 	case M_PROTO:
1777 		if (msz < sizeof (prim->type)) {
1778 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1779 			    SL_TRACE|SL_ERROR,
1780 			    "tl_wput:M_PROTO data too short"));
1781 			tl_merror(wq, mp, EPROTO);
1782 			return;
1783 		}
1784 		switch (prim->type) {
1785 		case T_OPTMGMT_REQ:
1786 		case T_SVR4_OPTMGMT_REQ:
1787 			/*
1788 			 * Process TPI option management requests immediately
1789 			 * in put procedure regardless of in-order processing
1790 			 * of already queued messages.
1791 			 * (Note: This driver supports AF_UNIX socket
1792 			 * implementation.  Unless we implement this processing,
1793 			 * setsockopt() on socket endpoint will block on flow
1794 			 * controlled endpoints which it should not. That is
1795 			 * required for successful execution of VSU socket tests
1796 			 * and is consistent with BSD socket behavior).
1797 			 */
1798 			tl_optmgmt(wq, mp);
1799 			return;
1800 		case O_T_BIND_REQ:
1801 		case T_BIND_REQ:
1802 			tl_proc = tl_bind_ser;
1803 			break;
1804 		case T_CONN_REQ:
1805 			if (IS_CLTS(tep)) {
1806 				tl_merror(wq, mp, EPROTO);
1807 				return;
1808 			}
1809 			tl_conn_req(wq, mp);
1810 			return;
1811 		case T_DATA_REQ:
1812 		case T_OPTDATA_REQ:
1813 		case T_EXDATA_REQ:
1814 		case T_ORDREL_REQ:
1815 			tl_proc = tl_putq_ser;
1816 			break;
1817 		case T_UNITDATA_REQ:
1818 			if (IS_COTS(tep) ||
1819 			    (msz < sizeof (struct T_unitdata_req))) {
1820 				tl_merror(wq, mp, EPROTO);
1821 				return;
1822 			}
1823 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1824 				tl_proc = tl_unitdata_ser;
1825 			} else {
1826 				tl_proc = tl_putq_ser;
1827 			}
1828 			break;
1829 		default:
1830 			/*
1831 			 * process in service procedure if message already
1832 			 * queued (maintain in-order processing)
1833 			 */
1834 			if (wq->q_first != NULL) {
1835 				tl_proc = tl_putq_ser;
1836 			} else {
1837 				tl_proc = tl_wput_ser;
1838 			}
1839 			break;
1840 		}
1841 		break;
1842 
1843 	case M_PCPROTO:
1844 		/*
1845 		 * Check that the message has enough data to figure out TPI
1846 		 * primitive.
1847 		 */
1848 		if (msz < sizeof (prim->type)) {
1849 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1850 			    SL_TRACE|SL_ERROR,
1851 			    "tl_wput:M_PCROTO data too short"));
1852 			tl_merror(wq, mp, EPROTO);
1853 			return;
1854 		}
1855 		switch (prim->type) {
1856 		case T_CAPABILITY_REQ:
1857 			tl_capability_req(mp, tep);
1858 			return;
1859 		case T_INFO_REQ:
1860 			tl_proc = tl_info_req_ser;
1861 			break;
1862 		case T_ADDR_REQ:
1863 			tl_proc = tl_addr_req_ser;
1864 			break;
1865 
1866 		default:
1867 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1868 			    SL_TRACE|SL_ERROR,
1869 			    "tl_wput:unknown TPI msg primitive"));
1870 			tl_merror(wq, mp, EPROTO);
1871 			return;
1872 		}
1873 		break;
1874 	default:
1875 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1876 		    "tl_wput:default:unexpected Streams message"));
1877 		freemsg(mp);
1878 		return;
1879 	}
1880 
1881 	/*
1882 	 * Continue processing via serializer.
1883 	 */
1884 	ASSERT(tl_proc != NULL);
1885 	tl_refhold(tep);
1886 	tl_serializer_enter(tep, tl_proc, mp);
1887 }
1888 
1889 /*
1890  * Place message on the queue while preserving order.
1891  */
1892 static void
1893 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1894 {
1895 	if (tep->te_closing) {
1896 		tl_wput_ser(mp, tep);
1897 	} else {
1898 		TL_PUTQ(tep, mp);
1899 		tl_serializer_exit(tep);
1900 		tl_refrele(tep);
1901 	}
1902 
1903 }
1904 
1905 static void
1906 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1907 {
1908 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1909 
1910 	switch (DB_TYPE(mp)) {
1911 	case M_DATA:
1912 		tl_data(mp, tep);
1913 		break;
1914 	case M_PROTO:
1915 		tl_do_proto(mp, tep);
1916 		break;
1917 	default:
1918 		freemsg(mp);
1919 		break;
1920 	}
1921 }
1922 
1923 /*
1924  * Write side put procedure called from serializer.
1925  */
1926 static void
1927 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1928 {
1929 	tl_wput_common_ser(mp, tep);
1930 	tl_serializer_exit(tep);
1931 	tl_refrele(tep);
1932 }
1933 
1934 /*
1935  * M_DATA processing. Called from serializer.
1936  */
1937 static void
1938 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1939 {
1940 	tl_endpt_t	*peer_tep = tep->te_conp;
1941 	queue_t		*peer_rq;
1942 
1943 	ASSERT(DB_TYPE(mp) == M_DATA);
1944 	ASSERT(IS_COTS(tep));
1945 
1946 	IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1947 
1948 	/*
1949 	 * fastpath for data. Ignore flow control if tep is closing.
1950 	 */
1951 	if ((peer_tep != NULL) &&
1952 	    !peer_tep->te_closing &&
1953 	    ((tep->te_state == TS_DATA_XFER) ||
1954 	    (tep->te_state == TS_WREQ_ORDREL)) &&
1955 	    (tep->te_wq != NULL) &&
1956 	    (tep->te_wq->q_first == NULL) &&
1957 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1958 	    (peer_tep->te_state == TS_WREQ_ORDREL))	&&
1959 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1960 	    (canputnext(peer_rq) || tep->te_closing)) {
1961 		putnext(peer_rq, mp);
1962 	} else if (tep->te_closing) {
1963 		/*
1964 		 * It is possible that by the time we got here tep started to
1965 		 * close. If the write queue is not empty, and the state is
1966 		 * TS_DATA_XFER the data should be delivered in order, so we
1967 		 * call putq() instead of freeing the data.
1968 		 */
1969 		if ((tep->te_wq != NULL) &&
1970 		    ((tep->te_state == TS_DATA_XFER) ||
1971 		    (tep->te_state == TS_WREQ_ORDREL))) {
1972 			TL_PUTQ(tep, mp);
1973 		} else {
1974 			freemsg(mp);
1975 		}
1976 	} else {
1977 		TL_PUTQ(tep, mp);
1978 	}
1979 
1980 	tl_serializer_exit(tep);
1981 	tl_refrele(tep);
1982 }
1983 
1984 /*
1985  * Write side service routine.
1986  *
1987  * All actual processing happens within serializer which is entered
1988  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1989  * messages that need processing may have arrived, so tl_wsrv repeats until
1990  * queue is empty or te_nowsrv is set.
1991  */
1992 static void
1993 tl_wsrv(queue_t *wq)
1994 {
1995 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1996 
1997 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1998 		mutex_enter(&tep->te_srv_lock);
1999 		ASSERT(tep->te_wsrv_active == B_FALSE);
2000 		tep->te_wsrv_active = B_TRUE;
2001 		mutex_exit(&tep->te_srv_lock);
2002 
2003 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2004 
2005 		/*
2006 		 * Wait for serializer job to complete.
2007 		 */
2008 		mutex_enter(&tep->te_srv_lock);
2009 		while (tep->te_wsrv_active) {
2010 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2011 		}
2012 		cv_signal(&tep->te_srv_cv);
2013 		mutex_exit(&tep->te_srv_lock);
2014 	}
2015 }
2016 
2017 /*
2018  * Serialized write side processing of the STREAMS queue.
2019  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2020  * is NULL.
2021  */
2022 static void
2023 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2024 {
2025 	mblk_t *mp;
2026 	queue_t *wq = tep->te_wq;
2027 
2028 	ASSERT(wq != NULL);
2029 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2030 		tl_wput_common_ser(mp, tep);
2031 	}
2032 
2033 	/*
2034 	 * Wakeup service routine unless called from close.
2035 	 * If ser_mp is specified, the caller is tl_wsrv().
2036 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2037 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2038 	 * be no matching tl_serializer_exit() in this case.
2039 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2040 	 * waiting on te_srv_cv.
2041 	 */
2042 	if (ser_mp != NULL) {
2043 		/*
2044 		 * We are called from tl_wsrv.
2045 		 */
2046 		mutex_enter(&tep->te_srv_lock);
2047 		ASSERT(tep->te_wsrv_active);
2048 		tep->te_wsrv_active = B_FALSE;
2049 		cv_signal(&tep->te_srv_cv);
2050 		mutex_exit(&tep->te_srv_lock);
2051 		tl_serializer_exit(tep);
2052 	}
2053 }
2054 
2055 /*
2056  * Called when the stream is backenabled. Enter serializer and qenable everyone
2057  * flow controlled by tep.
2058  *
2059  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2060  * is possible that two instances of tl_rsrv will be running reusing the same
2061  * rsrv mblk.
2062  */
2063 static void
2064 tl_rsrv(queue_t *rq)
2065 {
2066 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2067 
2068 	ASSERT(rq->q_first == NULL);
2069 	ASSERT(tep->te_rsrv_active == 0);
2070 
2071 	tep->te_rsrv_active = B_TRUE;
2072 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2073 	/*
2074 	 * Wait for serializer job to complete.
2075 	 */
2076 	mutex_enter(&tep->te_srv_lock);
2077 	while (tep->te_rsrv_active) {
2078 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2079 	}
2080 	cv_signal(&tep->te_srv_cv);
2081 	mutex_exit(&tep->te_srv_lock);
2082 }
2083 
2084 /* ARGSUSED */
2085 static void
2086 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2087 {
2088 	tl_endpt_t *peer_tep;
2089 
2090 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2091 		tl_cl_backenable(tep);
2092 	} else if (
2093 	    IS_COTS(tep) &&
2094 	    ((peer_tep = tep->te_conp) != NULL) &&
2095 	    !peer_tep->te_closing &&
2096 	    ((tep->te_state == TS_DATA_XFER) ||
2097 	    (tep->te_state == TS_WIND_ORDREL)||
2098 	    (tep->te_state == TS_WREQ_ORDREL))) {
2099 		TL_QENABLE(peer_tep);
2100 	}
2101 
2102 	/*
2103 	 * Wakeup read side service routine.
2104 	 */
2105 	mutex_enter(&tep->te_srv_lock);
2106 	ASSERT(tep->te_rsrv_active);
2107 	tep->te_rsrv_active = B_FALSE;
2108 	cv_signal(&tep->te_srv_cv);
2109 	mutex_exit(&tep->te_srv_lock);
2110 	tl_serializer_exit(tep);
2111 }
2112 
2113 /*
2114  * process M_PROTO messages. Always called from serializer.
2115  */
2116 static void
2117 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2118 {
2119 	ssize_t			msz = MBLKL(mp);
2120 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2121 
2122 	/* Message size was validated by tl_wput(). */
2123 	ASSERT(msz >= sizeof (prim->type));
2124 
2125 	switch (prim->type) {
2126 	case T_UNBIND_REQ:
2127 		tl_unbind(mp, tep);
2128 		break;
2129 
2130 	case T_ADDR_REQ:
2131 		tl_addr_req(mp, tep);
2132 		break;
2133 
2134 	case O_T_CONN_RES:
2135 	case T_CONN_RES:
2136 		if (IS_CLTS(tep)) {
2137 			tl_merror(tep->te_wq, mp, EPROTO);
2138 			break;
2139 		}
2140 		tl_conn_res(mp, tep);
2141 		break;
2142 
2143 	case T_DISCON_REQ:
2144 		if (IS_CLTS(tep)) {
2145 			tl_merror(tep->te_wq, mp, EPROTO);
2146 			break;
2147 		}
2148 		tl_discon_req(mp, tep);
2149 		break;
2150 
2151 	case T_DATA_REQ:
2152 		if (IS_CLTS(tep)) {
2153 			tl_merror(tep->te_wq, mp, EPROTO);
2154 			break;
2155 		}
2156 		tl_data(mp, tep);
2157 		break;
2158 
2159 	case T_OPTDATA_REQ:
2160 		if (IS_CLTS(tep)) {
2161 			tl_merror(tep->te_wq, mp, EPROTO);
2162 			break;
2163 		}
2164 		tl_data(mp, tep);
2165 		break;
2166 
2167 	case T_EXDATA_REQ:
2168 		if (IS_CLTS(tep)) {
2169 			tl_merror(tep->te_wq, mp, EPROTO);
2170 			break;
2171 		}
2172 		tl_exdata(mp, tep);
2173 		break;
2174 
2175 	case T_ORDREL_REQ:
2176 		if (! IS_COTSORD(tep)) {
2177 			tl_merror(tep->te_wq, mp, EPROTO);
2178 			break;
2179 		}
2180 		tl_ordrel(mp, tep);
2181 		break;
2182 
2183 	case T_UNITDATA_REQ:
2184 		if (IS_COTS(tep)) {
2185 			tl_merror(tep->te_wq, mp, EPROTO);
2186 			break;
2187 		}
2188 		tl_unitdata(mp, tep);
2189 		break;
2190 
2191 	default:
2192 		tl_merror(tep->te_wq, mp, EPROTO);
2193 		break;
2194 	}
2195 }
2196 
2197 /*
2198  * Process ioctl from serializer.
2199  * This is a wrapper around tl_do_ioctl().
2200  */
2201 static void
2202 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2203 {
2204 	if (! tep->te_closing)
2205 		tl_do_ioctl(mp, tep);
2206 	else
2207 		freemsg(mp);
2208 
2209 	tl_serializer_exit(tep);
2210 	tl_refrele(tep);
2211 }
2212 
2213 static void
2214 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2215 {
2216 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2217 	int cmd = iocbp->ioc_cmd;
2218 	queue_t *wq = tep->te_wq;
2219 	int error;
2220 	int thisopt, otheropt;
2221 
2222 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2223 
2224 	switch (cmd) {
2225 	case TL_IOC_CREDOPT:
2226 		if (cmd == TL_IOC_CREDOPT) {
2227 			thisopt = TL_SETCRED;
2228 			otheropt = TL_SETUCRED;
2229 		} else {
2230 			/* FALLTHROUGH */
2231 	case TL_IOC_UCREDOPT:
2232 			thisopt = TL_SETUCRED;
2233 			otheropt = TL_SETCRED;
2234 		}
2235 		/*
2236 		 * The credentials passing does not apply to sockets.
2237 		 * Only one of the cred options can be set at a given time.
2238 		 */
2239 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2240 			miocnak(wq, mp, 0, EINVAL);
2241 			return;
2242 		}
2243 
2244 		/*
2245 		 * Turn on generation of credential options for
2246 		 * T_conn_req, T_conn_con, T_unidata_ind.
2247 		 */
2248 		error = miocpullup(mp, sizeof (uint32_t));
2249 		if (error != 0) {
2250 			miocnak(wq, mp, 0, error);
2251 			return;
2252 		}
2253 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2254 			miocnak(wq, mp, 0, EINVAL);
2255 			return;
2256 		}
2257 
2258 		if (*(uint32_t *)mp->b_cont->b_rptr)
2259 			tep->te_flag |= thisopt;
2260 		else
2261 			tep->te_flag &= ~thisopt;
2262 
2263 		miocack(wq, mp, 0, 0);
2264 		break;
2265 
2266 	default:
2267 		/* Should not be here */
2268 		miocnak(wq, mp, 0, EINVAL);
2269 		break;
2270 	}
2271 }
2272 
2273 
2274 /*
2275  * send T_ERROR_ACK
2276  * Note: assumes enough memory or caller passed big enough mp
2277  *	- no recovery from allocb failures
2278  */
2279 
2280 static void
2281 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2282     t_scalar_t unix_err, t_scalar_t type)
2283 {
2284 	struct T_error_ack *err_ack;
2285 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2286 	    M_PCPROTO, T_ERROR_ACK);
2287 
2288 	if (ackmp == NULL) {
2289 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2290 		    "tl_error_ack:out of mblk memory"));
2291 		tl_merror(wq, NULL, ENOSR);
2292 		return;
2293 	}
2294 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2295 	err_ack->ERROR_prim = type;
2296 	err_ack->TLI_error = tli_err;
2297 	err_ack->UNIX_error = unix_err;
2298 
2299 	/*
2300 	 * send error ack message
2301 	 */
2302 	qreply(wq, ackmp);
2303 }
2304 
2305 
2306 
2307 /*
2308  * send T_OK_ACK
2309  * Note: assumes enough memory or caller passed big enough mp
2310  *	- no recovery from allocb failures
2311  */
2312 static void
2313 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2314 {
2315 	struct T_ok_ack *ok_ack;
2316 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2317 	    M_PCPROTO, T_OK_ACK);
2318 
2319 	if (ackmp == NULL) {
2320 		tl_merror(wq, NULL, ENOMEM);
2321 		return;
2322 	}
2323 
2324 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2325 	ok_ack->CORRECT_prim = type;
2326 
2327 	(void) qreply(wq, ackmp);
2328 }
2329 
2330 /*
2331  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2332  * This is a wrapper around tl_bind().
2333  */
2334 static void
2335 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2336 {
2337 	if (! tep->te_closing)
2338 		tl_bind(mp, tep);
2339 	else
2340 		freemsg(mp);
2341 
2342 	tl_serializer_exit(tep);
2343 	tl_refrele(tep);
2344 }
2345 
2346 /*
2347  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2348  * Assumes that the endpoint is in the unbound.
2349  */
2350 static void
2351 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2352 {
2353 	queue_t			*wq = tep->te_wq;
2354 	struct T_bind_ack	*b_ack;
2355 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2356 	mblk_t			*ackmp, *bamp;
2357 	soux_addr_t		ux_addr;
2358 	t_uscalar_t		qlen = 0;
2359 	t_scalar_t		alen, aoff;
2360 	tl_addr_t		addr_req;
2361 	void			*addr_startp;
2362 	ssize_t			msz = MBLKL(mp), basize;
2363 	t_scalar_t		tli_err = 0, unix_err = 0;
2364 	t_scalar_t		save_prim_type = bind->PRIM_type;
2365 	t_scalar_t		save_state = tep->te_state;
2366 
2367 	if (tep->te_state != TS_UNBND) {
2368 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2369 		    SL_TRACE|SL_ERROR,
2370 		    "tl_wput:bind_request:out of state, state=%d",
2371 		    tep->te_state));
2372 		tli_err = TOUTSTATE;
2373 		goto error;
2374 	}
2375 
2376 	if (msz < sizeof (struct T_bind_req)) {
2377 		tli_err = TSYSERR; unix_err = EINVAL;
2378 		goto error;
2379 	}
2380 
2381 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2382 
2383 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2384 	    (bind->PRIM_type == T_BIND_REQ));
2385 
2386 	alen = bind->ADDR_length;
2387 	aoff = bind->ADDR_offset;
2388 
2389 	/* negotiate max conn req pending */
2390 	if (IS_COTS(tep)) {
2391 		qlen = bind->CONIND_number;
2392 		if (qlen > tl_maxqlen)
2393 			qlen = tl_maxqlen;
2394 	}
2395 
2396 	/*
2397 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2398 	 * and bound again.
2399 	 */
2400 	if ((tep->te_hash_hndl == NULL) &&
2401 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2402 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2403 	    &tep->te_hash_hndl) != 0) {
2404 		tli_err = TSYSERR; unix_err = ENOSR;
2405 		goto error;
2406 	}
2407 
2408 	/*
2409 	 * Verify address correctness.
2410 	 */
2411 	if (IS_SOCKET(tep)) {
2412 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2413 
2414 		if ((alen != TL_SOUX_ADDRLEN) ||
2415 		    (aoff < 0) ||
2416 		    (aoff + alen > msz)) {
2417 			(void) (STRLOG(TL_ID, tep->te_minor,
2418 			    1, SL_TRACE|SL_ERROR,
2419 			    "tl_bind: invalid socket addr"));
2420 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2421 			tli_err = TSYSERR; unix_err = EINVAL;
2422 			goto error;
2423 		}
2424 		/* Copy address from message to local buffer. */
2425 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2426 		/*
2427 		 * Check that we got correct address from sockets
2428 		 */
2429 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2430 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2431 			(void) (STRLOG(TL_ID, tep->te_minor,
2432 			    1, SL_TRACE|SL_ERROR,
2433 			    "tl_bind: invalid socket magic"));
2434 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2435 			tli_err = TSYSERR; unix_err = EINVAL;
2436 			goto error;
2437 		}
2438 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2439 		    (ux_addr.soua_vp != NULL)) {
2440 			(void) (STRLOG(TL_ID, tep->te_minor,
2441 			    1, SL_TRACE|SL_ERROR,
2442 			    "tl_bind: implicit addr non-empty"));
2443 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2444 			tli_err = TSYSERR; unix_err = EINVAL;
2445 			goto error;
2446 		}
2447 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2448 		    (ux_addr.soua_vp == NULL)) {
2449 			(void) (STRLOG(TL_ID, tep->te_minor,
2450 			    1, SL_TRACE|SL_ERROR,
2451 			    "tl_bind: explicit addr empty"));
2452 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2453 			tli_err = TSYSERR; unix_err = EINVAL;
2454 			goto error;
2455 		}
2456 	} else {
2457 		if ((alen > 0) && ((aoff < 0) ||
2458 		    ((ssize_t)(aoff + alen) > msz) ||
2459 		    ((aoff + alen) < 0))) {
2460 			(void) (STRLOG(TL_ID, tep->te_minor,
2461 			    1, SL_TRACE|SL_ERROR,
2462 			    "tl_bind: invalid message"));
2463 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2464 			tli_err = TSYSERR; unix_err = EINVAL;
2465 			goto error;
2466 		}
2467 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2468 			(void) (STRLOG(TL_ID, tep->te_minor,
2469 			    1, SL_TRACE|SL_ERROR,
2470 			    "tl_bind: bad addr in  message"));
2471 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2472 			tli_err = TBADADDR;
2473 			goto error;
2474 		}
2475 #ifdef DEBUG
2476 		/*
2477 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2478 		 * if (! assertion)
2479 		 *	log warning;
2480 		 */
2481 		if (! ((alen == 0 && aoff == 0) ||
2482 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2483 			(void) (STRLOG(TL_ID, tep->te_minor,
2484 				    3, SL_TRACE|SL_ERROR,
2485 				    "tl_bind: addr overlaps TPI message"));
2486 		}
2487 #endif
2488 	}
2489 
2490 	/*
2491 	 * Bind the address provided or allocate one if requested.
2492 	 * Allow rebinds with a new qlen value.
2493 	 */
2494 	if (IS_SOCKET(tep)) {
2495 		/*
2496 		 * For anonymous requests the te_ap is already set up properly
2497 		 * so use minor number as an address.
2498 		 * For explicit requests need to check whether the address is
2499 		 * already in use.
2500 		 */
2501 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2502 			int rc;
2503 
2504 			if (tep->te_flag & TL_ADDRHASHED) {
2505 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2506 				if (tep->te_vp == ux_addr.soua_vp)
2507 					goto skip_addr_bind;
2508 				else /* Rebind to a new address. */
2509 					tl_addr_unbind(tep);
2510 			}
2511 			/*
2512 			 * Insert address in the hash if it is not already
2513 			 * there.  Since we use preallocated handle, the insert
2514 			 * can fail only if the key is already present.
2515 			 */
2516 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2517 			    (mod_hash_key_t)ux_addr.soua_vp,
2518 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2519 
2520 			if (rc != 0) {
2521 				ASSERT(rc == MH_ERR_DUPLICATE);
2522 				/*
2523 				 * Violate O_T_BIND_REQ semantics and fail with
2524 				 * TADDRBUSY - sockets will not use any address
2525 				 * other than supplied one for explicit binds.
2526 				 */
2527 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2528 				    SL_TRACE|SL_ERROR,
2529 				    "tl_bind:requested addr %p is busy",
2530 				    ux_addr.soua_vp));
2531 				tli_err = TADDRBUSY; unix_err = 0;
2532 				goto error;
2533 			}
2534 			tep->te_uxaddr = ux_addr;
2535 			tep->te_flag |= TL_ADDRHASHED;
2536 			tep->te_hash_hndl = NULL;
2537 		}
2538 	} else if (alen == 0) {
2539 		/*
2540 		 * assign any free address
2541 		 */
2542 		if (! tl_get_any_addr(tep, NULL)) {
2543 			(void) (STRLOG(TL_ID, tep->te_minor,
2544 			    1, SL_TRACE|SL_ERROR,
2545 			    "tl_bind:failed to get buffer for any "
2546 			    "address"));
2547 			tli_err = TSYSERR; unix_err = ENOSR;
2548 			goto error;
2549 		}
2550 	} else {
2551 		addr_req.ta_alen = alen;
2552 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2553 		addr_req.ta_zoneid = tep->te_zoneid;
2554 
2555 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2556 		if (tep->te_abuf == NULL) {
2557 			tli_err = TSYSERR; unix_err = ENOSR;
2558 			goto error;
2559 		}
2560 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2561 		tep->te_alen = alen;
2562 
2563 		if (mod_hash_insert_reserve(tep->te_addrhash,
2564 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2565 		    tep->te_hash_hndl) != 0) {
2566 			if (save_prim_type == T_BIND_REQ) {
2567 				/*
2568 				 * The bind semantics for this primitive
2569 				 * require a failure if the exact address
2570 				 * requested is busy
2571 				 */
2572 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2573 				    SL_TRACE|SL_ERROR,
2574 				    "tl_bind:requested addr is busy"));
2575 				tli_err = TADDRBUSY; unix_err = 0;
2576 				goto error;
2577 			}
2578 
2579 			/*
2580 			 * O_T_BIND_REQ semantics say if address if requested
2581 			 * address is busy, bind to any available free address
2582 			 */
2583 			if (! tl_get_any_addr(tep, &addr_req)) {
2584 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2585 				    SL_TRACE|SL_ERROR,
2586 				    "tl_bind:unable to get any addr buf"));
2587 				tli_err = TSYSERR; unix_err = ENOMEM;
2588 				goto error;
2589 			}
2590 		} else {
2591 			tep->te_flag |= TL_ADDRHASHED;
2592 			tep->te_hash_hndl = NULL;
2593 		}
2594 	}
2595 
2596 	ASSERT(tep->te_alen >= 0);
2597 
2598 skip_addr_bind:
2599 	/*
2600 	 * prepare T_BIND_ACK TPI message
2601 	 */
2602 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2603 	bamp = reallocb(mp, basize, 0);
2604 	if (bamp == NULL) {
2605 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2606 		    "tl_wput:tl_bind: allocb failed"));
2607 		/*
2608 		 * roll back state changes
2609 		 */
2610 		tl_addr_unbind(tep);
2611 		tep->te_state = TS_UNBND;
2612 		tl_memrecover(wq, mp, basize);
2613 		return;
2614 	}
2615 
2616 	DB_TYPE(bamp) = M_PCPROTO;
2617 	bamp->b_wptr = bamp->b_rptr + basize;
2618 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2619 	b_ack->PRIM_type = T_BIND_ACK;
2620 	b_ack->CONIND_number = qlen;
2621 	b_ack->ADDR_length = tep->te_alen;
2622 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2623 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2624 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2625 
2626 	if (IS_COTS(tep)) {
2627 		tep->te_qlen = qlen;
2628 		if (qlen > 0)
2629 			tep->te_flag |= TL_LISTENER;
2630 	}
2631 
2632 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2633 	/*
2634 	 * send T_BIND_ACK message
2635 	 */
2636 	(void) qreply(wq, bamp);
2637 	return;
2638 
2639 error:
2640 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2641 	if (ackmp == NULL) {
2642 		/*
2643 		 * roll back state changes
2644 		 */
2645 		tep->te_state = save_state;
2646 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2647 		return;
2648 	}
2649 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2650 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2651 }
2652 
2653 /*
2654  * Process T_UNBIND_REQ.
2655  * Called from serializer.
2656  */
2657 static void
2658 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2659 {
2660 	queue_t *wq;
2661 	mblk_t *ackmp;
2662 
2663 	if (tep->te_closing) {
2664 		freemsg(mp);
2665 		return;
2666 	}
2667 
2668 	wq = tep->te_wq;
2669 
2670 	/*
2671 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2672 	 * ==> allocate for T_ERROR_ACK (known max)
2673 	 */
2674 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2675 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2676 		return;
2677 	}
2678 	/*
2679 	 * memory resources committed
2680 	 * Note: no message validation. T_UNBIND_REQ message is
2681 	 * same size as PRIM_type field so already verified earlier.
2682 	 */
2683 
2684 	/*
2685 	 * validate state
2686 	 */
2687 	if (tep->te_state != TS_IDLE) {
2688 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2689 		    SL_TRACE|SL_ERROR,
2690 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2691 		    tep->te_state));
2692 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2693 		return;
2694 	}
2695 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2696 
2697 	/*
2698 	 * TPI says on T_UNBIND_REQ:
2699 	 *    send up a M_FLUSH to flush both
2700 	 *    read and write queues
2701 	 */
2702 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2703 
2704 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2705 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2706 
2707 		/*
2708 		 * Sockets use bind with qlen==0 followed by bind() to
2709 		 * the same address with qlen > 0 for listeners.
2710 		 * We allow rebind with a new qlen value.
2711 		 */
2712 		tl_addr_unbind(tep);
2713 	}
2714 
2715 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2716 	/*
2717 	 * send  T_OK_ACK
2718 	 */
2719 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2720 }
2721 
2722 
2723 /*
2724  * Option management code from drv/ip is used here
2725  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2726  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2727  *	However, that is what we want as that option is 'unorthodox'
2728  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2729  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2730  * Note2: use of optcom_req means this routine is an exception to
2731  *	 recovery from allocb() failures.
2732  */
2733 
2734 static void
2735 tl_optmgmt(queue_t *wq, mblk_t *mp)
2736 {
2737 	tl_endpt_t *tep;
2738 	mblk_t *ackmp;
2739 	union T_primitives *prim;
2740 	cred_t *cr;
2741 
2742 	tep = (tl_endpt_t *)wq->q_ptr;
2743 	prim = (union T_primitives *)mp->b_rptr;
2744 
2745 	/*
2746 	 * All Solaris components should pass a db_credp
2747 	 * for this TPI message, hence we ASSERT.
2748 	 * But in case there is some other M_PROTO that looks
2749 	 * like a TPI message sent by some other kernel
2750 	 * component, we check and return an error.
2751 	 */
2752 	cr = msg_getcred(mp, NULL);
2753 	ASSERT(cr != NULL);
2754 	if (cr == NULL) {
2755 		tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2756 		return;
2757 	}
2758 
2759 	/*  all states OK for AF_UNIX options ? */
2760 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2761 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2762 		/*
2763 		 * Broken TLI semantics that options can only be managed
2764 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2765 		 * tests this TLI (mis)feature using this device driver.
2766 		 */
2767 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2768 		    SL_TRACE|SL_ERROR,
2769 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2770 		    tep->te_state));
2771 		/*
2772 		 * preallocate memory for T_ERROR_ACK
2773 		 */
2774 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2775 		if (! ackmp) {
2776 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2777 			return;
2778 		}
2779 
2780 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2781 		freemsg(mp);
2782 		return;
2783 	}
2784 
2785 	/*
2786 	 * call common option management routine from drv/ip
2787 	 */
2788 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2789 		svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2790 	} else {
2791 		ASSERT(prim->type == T_OPTMGMT_REQ);
2792 		tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2793 	}
2794 }
2795 
2796 /*
2797  * Handle T_conn_req - the driver part of accept().
2798  * If TL_SET[U]CRED generate the credentials options.
2799  * If this is a socket pass through options unmodified.
2800  * For sockets generate the T_CONN_CON here instead of
2801  * waiting for the T_CONN_RES.
2802  */
2803 static void
2804 tl_conn_req(queue_t *wq, mblk_t *mp)
2805 {
2806 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2807 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2808 	ssize_t			msz = MBLKL(mp);
2809 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2810 	tl_endpt_t		*peer_tep = NULL;
2811 	mblk_t			*ackmp;
2812 	mblk_t			*dimp;
2813 	struct T_discon_ind	*di;
2814 	soux_addr_t		ux_addr;
2815 	tl_addr_t		dst;
2816 
2817 	ASSERT(IS_COTS(tep));
2818 
2819 	if (tep->te_closing) {
2820 		freemsg(mp);
2821 		return;
2822 	}
2823 
2824 	/*
2825 	 * preallocate memory for:
2826 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2827 	 *	==> known max T_ERROR_ACK
2828 	 * 2. max of T_DISCON_IND and T_CONN_IND
2829 	 */
2830 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2831 	if (! ackmp) {
2832 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2833 		return;
2834 	}
2835 	/*
2836 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2837 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2838 	 */
2839 
2840 	if (tep->te_state != TS_IDLE) {
2841 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2842 		    SL_TRACE|SL_ERROR,
2843 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2844 		    tep->te_state));
2845 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2846 		freemsg(mp);
2847 		return;
2848 	}
2849 
2850 	/*
2851 	 * validate the message
2852 	 * Note: dereference fields in struct inside message only
2853 	 * after validating the message length.
2854 	 */
2855 	if (msz < sizeof (struct T_conn_req)) {
2856 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2857 		    "tl_conn_req:invalid message length"));
2858 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2859 		freemsg(mp);
2860 		return;
2861 	}
2862 	alen = creq->DEST_length;
2863 	aoff = creq->DEST_offset;
2864 	olen = creq->OPT_length;
2865 	ooff = creq->OPT_offset;
2866 	if (olen == 0)
2867 		ooff = 0;
2868 
2869 	if (IS_SOCKET(tep)) {
2870 		if ((alen != TL_SOUX_ADDRLEN) ||
2871 		    (aoff < 0) ||
2872 		    (aoff + alen > msz) ||
2873 		    (alen > msz - sizeof (struct T_conn_req))) {
2874 			(void) (STRLOG(TL_ID, tep->te_minor,
2875 				    1, SL_TRACE|SL_ERROR,
2876 				    "tl_conn_req: invalid socket addr"));
2877 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2878 			freemsg(mp);
2879 			return;
2880 		}
2881 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2882 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2883 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2884 			(void) (STRLOG(TL_ID, tep->te_minor,
2885 			    1, SL_TRACE|SL_ERROR,
2886 			    "tl_conn_req: invalid socket magic"));
2887 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2888 			freemsg(mp);
2889 			return;
2890 		}
2891 	} else {
2892 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2893 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2894 		    ooff + olen < 0)) ||
2895 		    olen < 0 || ooff < 0) {
2896 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2897 			    SL_TRACE|SL_ERROR,
2898 			    "tl_conn_req:invalid message"));
2899 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2900 			freemsg(mp);
2901 			return;
2902 		}
2903 
2904 		if (alen <= 0 || aoff < 0 ||
2905 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2906 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2907 				    SL_TRACE|SL_ERROR,
2908 				    "tl_conn_req:bad addr in message, "
2909 				    "alen=%d, msz=%ld",
2910 				    alen, msz));
2911 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2912 			freemsg(mp);
2913 			return;
2914 		}
2915 #ifdef DEBUG
2916 		/*
2917 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2918 		 * if (! assertion)
2919 		 *	log warning;
2920 		 */
2921 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2922 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2923 			    SL_TRACE|SL_ERROR,
2924 			    "tl_conn_req: addr overlaps TPI message"));
2925 		}
2926 #endif
2927 		if (olen) {
2928 			/*
2929 			 * no opts in connect req
2930 			 * supported in this provider except for sockets.
2931 			 */
2932 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2933 			    SL_TRACE|SL_ERROR,
2934 			    "tl_conn_req:options not supported "
2935 			    "in message"));
2936 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2937 			freemsg(mp);
2938 			return;
2939 		}
2940 	}
2941 
2942 	/*
2943 	 * Prevent tep from closing on us.
2944 	 */
2945 	if (! tl_noclose(tep)) {
2946 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2947 		    "tl_conn_req:endpoint is closing"));
2948 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2949 		freemsg(mp);
2950 		return;
2951 	}
2952 
2953 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2954 	/*
2955 	 * get endpoint to connect to
2956 	 * check that peer with DEST addr is bound to addr
2957 	 * and has CONIND_number > 0
2958 	 */
2959 	dst.ta_alen = alen;
2960 	dst.ta_abuf = mp->b_rptr + aoff;
2961 	dst.ta_zoneid = tep->te_zoneid;
2962 
2963 	/*
2964 	 * Verify if remote addr is in use
2965 	 */
2966 	peer_tep = (IS_SOCKET(tep) ?
2967 	    tl_sock_find_peer(tep, &ux_addr) :
2968 	    tl_find_peer(tep, &dst));
2969 
2970 	if (peer_tep == NULL) {
2971 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2972 		    "tl_conn_req:no one at connect address"));
2973 		err = ECONNREFUSED;
2974 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2975 		/*
2976 		 * validate that number of incoming connection is
2977 		 * not to capacity on destination endpoint
2978 		 */
2979 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2980 		    "tl_conn_req: qlen overflow connection refused"));
2981 			err = ECONNREFUSED;
2982 	}
2983 
2984 	/*
2985 	 * Send T_DISCON_IND in case of error
2986 	 */
2987 	if (err != 0) {
2988 		if (peer_tep != NULL)
2989 			tl_refrele(peer_tep);
2990 		/* We are still expected to send T_OK_ACK */
2991 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2992 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2993 		tl_closeok(tep);
2994 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2995 		    M_PROTO, T_DISCON_IND);
2996 		if (dimp == NULL) {
2997 			tl_merror(wq, NULL, ENOSR);
2998 			return;
2999 		}
3000 		di = (struct T_discon_ind *)dimp->b_rptr;
3001 		di->DISCON_reason = err;
3002 		di->SEQ_number = BADSEQNUM;
3003 
3004 		tep->te_state = TS_IDLE;
3005 		/*
3006 		 * send T_DISCON_IND message
3007 		 */
3008 		putnext(tep->te_rq, dimp);
3009 		return;
3010 	}
3011 
3012 	ASSERT(IS_COTS(peer_tep));
3013 
3014 	/*
3015 	 * Found the listener. At this point processing will continue on
3016 	 * listener serializer. Close of the endpoint should be blocked while we
3017 	 * switch serializers.
3018 	 */
3019 	tl_serializer_refhold(peer_tep->te_ser);
3020 	tl_serializer_refrele(tep->te_ser);
3021 	tep->te_ser = peer_tep->te_ser;
3022 	ASSERT(tep->te_oconp == NULL);
3023 	tep->te_oconp = peer_tep;
3024 
3025 	/*
3026 	 * It is safe to close now. Close may continue on listener serializer.
3027 	 */
3028 	tl_closeok(tep);
3029 
3030 	/*
3031 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3032 	 * data, so we link mp to ackmp.
3033 	 */
3034 	ackmp->b_cont = mp;
3035 	mp = ackmp;
3036 
3037 	tl_refhold(tep);
3038 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3039 }
3040 
3041 /*
3042  * Finish T_CONN_REQ processing on listener serializer.
3043  */
3044 static void
3045 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3046 {
3047 	queue_t		*wq;
3048 	tl_endpt_t	*peer_tep = tep->te_oconp;
3049 	mblk_t		*confmp, *cimp, *indmp;
3050 	void		*opts = NULL;
3051 	mblk_t		*ackmp = mp;
3052 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3053 	struct T_conn_ind	*ci;
3054 	tl_icon_t	*tip;
3055 	void		*addr_startp;
3056 	t_scalar_t	olen = creq->OPT_length;
3057 	t_scalar_t	ooff = creq->OPT_offset;
3058 	size_t 		ci_msz;
3059 	size_t		size;
3060 	cred_t		*cr = NULL;
3061 	pid_t		cpid;
3062 
3063 	if (tep->te_closing) {
3064 		TL_UNCONNECT(tep->te_oconp);
3065 		tl_serializer_exit(tep);
3066 		tl_refrele(tep);
3067 		freemsg(mp);
3068 		return;
3069 	}
3070 
3071 	wq = tep->te_wq;
3072 	tep->te_flag |= TL_EAGER;
3073 
3074 	/*
3075 	 * Extract preallocated ackmp from mp.
3076 	 */
3077 	mp = mp->b_cont;
3078 	ackmp->b_cont = NULL;
3079 
3080 	if (olen == 0)
3081 		ooff = 0;
3082 
3083 	if (peer_tep->te_closing ||
3084 	    !((peer_tep->te_state == TS_IDLE) ||
3085 	    (peer_tep->te_state == TS_WRES_CIND))) {
3086 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3087 		    "tl_conn_req:peer in bad state (%d)",
3088 		    peer_tep->te_state));
3089 		TL_UNCONNECT(tep->te_oconp);
3090 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3091 		freemsg(ackmp);
3092 		tl_serializer_exit(tep);
3093 		tl_refrele(tep);
3094 		return;
3095 	}
3096 
3097 	/*
3098 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3099 	 */
3100 	/*
3101 	 * calculate length of T_CONN_IND message
3102 	 */
3103 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3104 		cr = msg_getcred(mp, &cpid);
3105 		ASSERT(cr != NULL);
3106 		if (peer_tep->te_flag & TL_SETCRED) {
3107 			ooff = 0;
3108 			olen = (t_scalar_t) sizeof (struct opthdr) +
3109 			    OPTLEN(sizeof (tl_credopt_t));
3110 			/* 1 option only */
3111 		} else {
3112 			ooff = 0;
3113 			olen = (t_scalar_t)sizeof (struct opthdr) +
3114 			    OPTLEN(ucredminsize(cr));
3115 			/* 1 option only */
3116 		}
3117 	}
3118 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3119 	ci_msz = T_ALIGN(ci_msz) + olen;
3120 	size = max(ci_msz, sizeof (struct T_discon_ind));
3121 
3122 	/*
3123 	 * Save options from mp - we'll need them for T_CONN_IND.
3124 	 */
3125 	if (ooff != 0) {
3126 		opts = kmem_alloc(olen, KM_NOSLEEP);
3127 		if (opts == NULL) {
3128 			/*
3129 			 * roll back state changes
3130 			 */
3131 			tep->te_state = TS_IDLE;
3132 			tl_memrecover(wq, mp, size);
3133 			freemsg(ackmp);
3134 			TL_UNCONNECT(tep->te_oconp);
3135 			tl_serializer_exit(tep);
3136 			tl_refrele(tep);
3137 			return;
3138 		}
3139 		/* Copy options to a temp buffer */
3140 		bcopy(mp->b_rptr + ooff, opts, olen);
3141 	}
3142 
3143 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3144 		/*
3145 		 * Generate a T_CONN_CON that has the identical address
3146 		 * (and options) as the T_CONN_REQ.
3147 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3148 		 * are isomorphic.
3149 		 */
3150 		confmp = copyb(mp);
3151 		if (! confmp) {
3152 			/*
3153 			 * roll back state changes
3154 			 */
3155 			tep->te_state = TS_IDLE;
3156 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3157 			freemsg(ackmp);
3158 			if (opts != NULL)
3159 				kmem_free(opts, olen);
3160 			TL_UNCONNECT(tep->te_oconp);
3161 			tl_serializer_exit(tep);
3162 			tl_refrele(tep);
3163 			return;
3164 		}
3165 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3166 		    T_CONN_CON;
3167 	} else {
3168 		confmp = NULL;
3169 	}
3170 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3171 		/*
3172 		 * roll back state changes
3173 		 */
3174 		tep->te_state = TS_IDLE;
3175 		tl_memrecover(wq, mp, size);
3176 		freemsg(ackmp);
3177 		if (opts != NULL)
3178 			kmem_free(opts, olen);
3179 		freemsg(confmp);
3180 		TL_UNCONNECT(tep->te_oconp);
3181 		tl_serializer_exit(tep);
3182 		tl_refrele(tep);
3183 		return;
3184 	}
3185 
3186 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3187 	if (tip == NULL) {
3188 		/*
3189 		 * roll back state changes
3190 		 */
3191 		tep->te_state = TS_IDLE;
3192 		tl_memrecover(wq, indmp, sizeof (*tip));
3193 		freemsg(ackmp);
3194 		if (opts != NULL)
3195 			kmem_free(opts, olen);
3196 		freemsg(confmp);
3197 		TL_UNCONNECT(tep->te_oconp);
3198 		tl_serializer_exit(tep);
3199 		tl_refrele(tep);
3200 		return;
3201 	}
3202 	tip->ti_mp = NULL;
3203 
3204 	/*
3205 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3206 	 * and tl_icon_t cell.
3207 	 */
3208 
3209 	/*
3210 	 * ack validity of request and send the peer credential in the ACK.
3211 	 */
3212 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3213 
3214 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3215 	    confmp != NULL) {
3216 		mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3217 	}
3218 
3219 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3220 
3221 	/*
3222 	 * prepare message to send T_CONN_IND
3223 	 */
3224 	/*
3225 	 * allocate the message - original data blocks retained
3226 	 * in the returned mblk
3227 	 */
3228 	cimp = tl_resizemp(indmp, size);
3229 	if (! cimp) {
3230 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3231 		    "tl_conn_req:con_ind:allocb failure"));
3232 		tl_merror(wq, indmp, ENOMEM);
3233 		TL_UNCONNECT(tep->te_oconp);
3234 		tl_serializer_exit(tep);
3235 		tl_refrele(tep);
3236 		if (opts != NULL)
3237 			kmem_free(opts, olen);
3238 		freemsg(confmp);
3239 		ASSERT(tip->ti_mp == NULL);
3240 		kmem_free(tip, sizeof (*tip));
3241 		return;
3242 	}
3243 
3244 	DB_TYPE(cimp) = M_PROTO;
3245 	ci = (struct T_conn_ind *)cimp->b_rptr;
3246 	ci->PRIM_type  = T_CONN_IND;
3247 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3248 	ci->SRC_length = tep->te_alen;
3249 	ci->SEQ_number = tep->te_seqno;
3250 
3251 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3252 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3253 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3254 
3255 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3256 		    ci->SRC_length);
3257 		ci->OPT_length = olen; /* because only 1 option */
3258 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3259 		    cr, cpid,
3260 		    peer_tep->te_flag, peer_tep->te_credp);
3261 	} else if (ooff != 0) {
3262 		/* Copy option from T_CONN_REQ */
3263 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3264 		    ci->SRC_length);
3265 		ci->OPT_length = olen;
3266 		ASSERT(opts != NULL);
3267 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3268 	} else {
3269 		ci->OPT_offset = 0;
3270 		ci->OPT_length = 0;
3271 	}
3272 	if (opts != NULL)
3273 		kmem_free(opts, olen);
3274 
3275 	/*
3276 	 * register connection request with server peer
3277 	 * append to list of incoming connections
3278 	 * increment references for both peer_tep and tep: peer_tep is placed on
3279 	 * te_oconp and tep is placed on listeners queue.
3280 	 */
3281 	tip->ti_tep = tep;
3282 	tip->ti_seqno = tep->te_seqno;
3283 	list_insert_tail(&peer_tep->te_iconp, tip);
3284 	peer_tep->te_nicon++;
3285 
3286 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3287 	/*
3288 	 * send the T_CONN_IND message
3289 	 */
3290 	putnext(peer_tep->te_rq, cimp);
3291 
3292 	/*
3293 	 * Send a T_CONN_CON message for sockets.
3294 	 * Disable the queues until we have reached the correct state!
3295 	 */
3296 	if (confmp != NULL) {
3297 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3298 		noenable(wq);
3299 		putnext(tep->te_rq, confmp);
3300 	}
3301 	/*
3302 	 * Now we need to increment tep reference because tep is referenced by
3303 	 * server list of pending connections. We also need to decrement
3304 	 * reference before exiting serializer. Two operations void each other
3305 	 * so we don't modify reference at all.
3306 	 */
3307 	ASSERT(tep->te_refcnt >= 2);
3308 	ASSERT(peer_tep->te_refcnt >= 2);
3309 	tl_serializer_exit(tep);
3310 }
3311 
3312 
3313 
3314 /*
3315  * Handle T_conn_res on listener stream. Called on listener serializer.
3316  * tl_conn_req has already generated the T_CONN_CON.
3317  * tl_conn_res is called on listener serializer.
3318  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3319  * Switch eager serializer to acceptor's.
3320  *
3321  * If TL_SET[U]CRED generate the credentials options.
3322  * For sockets tl_conn_req has already generated the T_CONN_CON.
3323  */
3324 static void
3325 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3326 {
3327 	queue_t			*wq;
3328 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3329 	ssize_t			msz = MBLKL(mp);
3330 	t_scalar_t		olen, ooff, err = 0;
3331 	t_scalar_t		prim = cres->PRIM_type;
3332 	uchar_t			*addr_startp;
3333 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3334 	tl_icon_t		*tip;
3335 	size_t			size;
3336 	mblk_t			*ackmp, *respmp;
3337 	mblk_t			*dimp, *ccmp = NULL;
3338 	struct T_discon_ind	*di;
3339 	struct T_conn_con	*cc;
3340 	boolean_t		client_noclose_set = B_FALSE;
3341 	boolean_t		switch_client_serializer = B_TRUE;
3342 
3343 	ASSERT(IS_COTS(tep));
3344 
3345 	if (tep->te_closing) {
3346 		freemsg(mp);
3347 		return;
3348 	}
3349 
3350 	wq = tep->te_wq;
3351 
3352 	/*
3353 	 * preallocate memory for:
3354 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3355 	 *	==> known max T_ERROR_ACK
3356 	 * 2. max of T_DISCON_IND and T_CONN_CON
3357 	 */
3358 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3359 	if (! ackmp) {
3360 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3361 		return;
3362 	}
3363 	/*
3364 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3365 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3366 	 */
3367 
3368 
3369 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3370 
3371 	/*
3372 	 * validate state
3373 	 */
3374 	if (tep->te_state != TS_WRES_CIND) {
3375 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3376 		    SL_TRACE|SL_ERROR,
3377 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3378 		    tep->te_state));
3379 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3380 		freemsg(mp);
3381 		return;
3382 	}
3383 
3384 	/*
3385 	 * validate the message
3386 	 * Note: dereference fields in struct inside message only
3387 	 * after validating the message length.
3388 	 */
3389 	if (msz < sizeof (struct T_conn_res)) {
3390 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3391 		    "tl_conn_res:invalid message length"));
3392 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3393 		freemsg(mp);
3394 		return;
3395 	}
3396 	olen = cres->OPT_length;
3397 	ooff = cres->OPT_offset;
3398 	if (((olen > 0) && ((ooff + olen) > msz))) {
3399 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3400 		    "tl_conn_res:invalid message"));
3401 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3402 		freemsg(mp);
3403 		return;
3404 	}
3405 	if (olen) {
3406 		/*
3407 		 * no opts in connect res
3408 		 * supported in this provider
3409 		 */
3410 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3411 		    "tl_conn_res:options not supported in message"));
3412 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3413 		freemsg(mp);
3414 		return;
3415 	}
3416 
3417 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3418 	ASSERT(tep->te_state == TS_WACK_CRES);
3419 
3420 	if (cres->SEQ_number < TL_MINOR_START &&
3421 	    cres->SEQ_number >= BADSEQNUM) {
3422 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3423 		    "tl_conn_res:remote endpoint sequence number bad"));
3424 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3425 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3426 		freemsg(mp);
3427 		return;
3428 	}
3429 
3430 	/*
3431 	 * find accepting endpoint. Will have extra reference if found.
3432 	 */
3433 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3434 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3435 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3436 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3437 		    "tl_conn_res:bad accepting endpoint"));
3438 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3439 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3440 		freemsg(mp);
3441 		return;
3442 	}
3443 
3444 	/*
3445 	 * Prevent acceptor from closing.
3446 	 */
3447 	if (! tl_noclose(acc_ep)) {
3448 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3449 		    "tl_conn_res:bad accepting endpoint"));
3450 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3451 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3452 		tl_refrele(acc_ep);
3453 		freemsg(mp);
3454 		return;
3455 	}
3456 
3457 	acc_ep->te_flag |= TL_ACCEPTOR;
3458 
3459 	/*
3460 	 * validate that accepting endpoint, if different from listening
3461 	 * has address bound => state is TS_IDLE
3462 	 * TROUBLE in XPG4 !!?
3463 	 */
3464 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3465 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3466 		    "tl_conn_res:accepting endpoint has no address bound,"
3467 		    "state=%d", acc_ep->te_state));
3468 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3469 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3470 		freemsg(mp);
3471 		tl_closeok(acc_ep);
3472 		tl_refrele(acc_ep);
3473 		return;
3474 	}
3475 
3476 	/*
3477 	 * validate if accepting endpt same as listening, then
3478 	 * no other incoming connection should be on the queue
3479 	 */
3480 
3481 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3482 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3483 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3484 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3485 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3486 		freemsg(mp);
3487 		tl_closeok(acc_ep);
3488 		tl_refrele(acc_ep);
3489 		return;
3490 	}
3491 
3492 	/*
3493 	 * Mark for deletion, the entry corresponding to client
3494 	 * on list of pending connections made by the listener
3495 	 *  search list to see if client is one of the
3496 	 * recorded as a listener.
3497 	 */
3498 	tip = tl_icon_find(tep, cres->SEQ_number);
3499 	if (tip == NULL) {
3500 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3501 		    "tl_conn_res:no client in listener list"));
3502 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3503 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3504 		freemsg(mp);
3505 		tl_closeok(acc_ep);
3506 		tl_refrele(acc_ep);
3507 		return;
3508 	}
3509 
3510 	/*
3511 	 * If ti_tep is NULL the client has already closed. In this case
3512 	 * the code below will avoid any action on the client side
3513 	 * but complete the server and acceptor state transitions.
3514 	 */
3515 	ASSERT(tip->ti_tep == NULL ||
3516 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3517 	cl_ep = tip->ti_tep;
3518 
3519 	/*
3520 	 * If the client is present it is switched from listener's to acceptor's
3521 	 * serializer. We should block client closes while serializers are
3522 	 * being switched.
3523 	 *
3524 	 * It is possible that the client is present but is currently being
3525 	 * closed. There are two possible cases:
3526 	 *
3527 	 * 1) The client has already entered tl_close_finish_ser() and sent
3528 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3529 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3530 	 *
3531 	 * 2) The client started the close but has not entered
3532 	 *    tl_close_finish_ser() yet. In this case, the client is already
3533 	 *    proceeding asynchronously on the listener's serializer, so we're
3534 	 *    forced to change the acceptor to use the listener's serializer to
3535 	 *    ensure that any operations on the acceptor are serialized with
3536 	 *    respect to the close that's in-progress.
3537 	 */
3538 	if (cl_ep != NULL) {
3539 		if (tl_noclose(cl_ep)) {
3540 			client_noclose_set = B_TRUE;
3541 		} else {
3542 			/*
3543 			 * Client is closing. If it it has sent the
3544 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3545 			 * we have to let let the client continue until it is
3546 			 * sent.
3547 			 *
3548 			 * If we do continue using the client, acceptor will
3549 			 * switch to client's serializer which is used by client
3550 			 * for its close.
3551 			 */
3552 			tl_client_closing_when_accepting++;
3553 			switch_client_serializer = B_FALSE;
3554 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3555 			    cl_ep->te_state == -1)
3556 				cl_ep = NULL;
3557 		}
3558 	}
3559 
3560 	if (cl_ep != NULL) {
3561 		/*
3562 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3563 		 * (latter for sockets only)
3564 		 */
3565 		if (cl_ep->te_state != TS_WCON_CREQ &&
3566 		    (cl_ep->te_state != TS_DATA_XFER &&
3567 		    IS_SOCKET(cl_ep))) {
3568 			err = ECONNREFUSED;
3569 			/*
3570 			 * T_DISCON_IND sent later after committing memory
3571 			 * and acking validity of request
3572 			 */
3573 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3574 			    "tl_conn_res:peer in bad state"));
3575 		}
3576 
3577 		/*
3578 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3579 		 * ack validity of request (T_OK_ACK) after memory committed
3580 		 */
3581 
3582 		if (err)
3583 			size = sizeof (struct T_discon_ind);
3584 		else {
3585 			/*
3586 			 * calculate length of T_CONN_CON message
3587 			 */
3588 			olen = 0;
3589 			if (cl_ep->te_flag & TL_SETCRED) {
3590 				olen = (t_scalar_t)sizeof (struct opthdr) +
3591 				    OPTLEN(sizeof (tl_credopt_t));
3592 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3593 				olen = (t_scalar_t)sizeof (struct opthdr) +
3594 				    OPTLEN(ucredminsize(acc_ep->te_credp));
3595 			}
3596 			size = T_ALIGN(sizeof (struct T_conn_con) +
3597 			    acc_ep->te_alen) + olen;
3598 		}
3599 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3600 			/*
3601 			 * roll back state changes
3602 			 */
3603 			tep->te_state = TS_WRES_CIND;
3604 			tl_memrecover(wq, mp, size);
3605 			freemsg(ackmp);
3606 			if (client_noclose_set)
3607 				tl_closeok(cl_ep);
3608 			tl_closeok(acc_ep);
3609 			tl_refrele(acc_ep);
3610 			return;
3611 		}
3612 		mp = NULL;
3613 	}
3614 
3615 	/*
3616 	 * Now ack validity of request
3617 	 */
3618 	if (tep->te_nicon == 1) {
3619 		if (tep == acc_ep)
3620 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3621 		else
3622 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3623 	} else
3624 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3625 
3626 	/*
3627 	 * send T_DISCON_IND now if client state validation failed earlier
3628 	 */
3629 	if (err) {
3630 		tl_ok_ack(wq, ackmp, prim);
3631 		/*
3632 		 * flush the queues - why always ?
3633 		 */
3634 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3635 
3636 		dimp = tl_resizemp(respmp, size);
3637 		if (! dimp) {
3638 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3639 			    SL_TRACE|SL_ERROR,
3640 			    "tl_conn_res:con_ind:allocb failure"));
3641 			tl_merror(wq, respmp, ENOMEM);
3642 			tl_closeok(acc_ep);
3643 			if (client_noclose_set)
3644 				tl_closeok(cl_ep);
3645 			tl_refrele(acc_ep);
3646 			return;
3647 		}
3648 		if (dimp->b_cont) {
3649 			/* no user data in provider generated discon ind */
3650 			freemsg(dimp->b_cont);
3651 			dimp->b_cont = NULL;
3652 		}
3653 
3654 		DB_TYPE(dimp) = M_PROTO;
3655 		di = (struct T_discon_ind *)dimp->b_rptr;
3656 		di->PRIM_type  = T_DISCON_IND;
3657 		di->DISCON_reason = err;
3658 		di->SEQ_number = BADSEQNUM;
3659 
3660 		tep->te_state = TS_IDLE;
3661 		/*
3662 		 * send T_DISCON_IND message
3663 		 */
3664 		putnext(acc_ep->te_rq, dimp);
3665 		if (client_noclose_set)
3666 			tl_closeok(cl_ep);
3667 		tl_closeok(acc_ep);
3668 		tl_refrele(acc_ep);
3669 		return;
3670 	}
3671 
3672 	/*
3673 	 * now start connecting the accepting endpoint
3674 	 */
3675 	if (tep != acc_ep)
3676 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3677 
3678 	if (cl_ep == NULL) {
3679 		/*
3680 		 * The client has already closed. Send up any queued messages
3681 		 * and change the state accordingly.
3682 		 */
3683 		tl_ok_ack(wq, ackmp, prim);
3684 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3685 
3686 		/*
3687 		 * remove endpoint from incoming connection
3688 		 * delete client from list of incoming connections
3689 		 */
3690 		tl_freetip(tep, tip);
3691 		freemsg(mp);
3692 		tl_closeok(acc_ep);
3693 		tl_refrele(acc_ep);
3694 		return;
3695 	} else if (tip->ti_mp != NULL) {
3696 		/*
3697 		 * The client could have queued a T_DISCON_IND which needs
3698 		 * to be sent up.
3699 		 * Note that t_discon_req can not operate the same as
3700 		 * t_data_req since it is not possible for it to putbq
3701 		 * the message and return -1 due to the use of qwriter.
3702 		 */
3703 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3704 	}
3705 
3706 	/*
3707 	 * prepare connect confirm T_CONN_CON message
3708 	 */
3709 
3710 	/*
3711 	 * allocate the message - original data blocks
3712 	 * retained in the returned mblk
3713 	 */
3714 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3715 		ccmp = tl_resizemp(respmp, size);
3716 		if (ccmp == NULL) {
3717 			tl_ok_ack(wq, ackmp, prim);
3718 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3719 			    SL_TRACE|SL_ERROR,
3720 			    "tl_conn_res:conn_con:allocb failure"));
3721 			tl_merror(wq, respmp, ENOMEM);
3722 			tl_closeok(acc_ep);
3723 			if (client_noclose_set)
3724 				tl_closeok(cl_ep);
3725 			tl_refrele(acc_ep);
3726 			return;
3727 		}
3728 
3729 		DB_TYPE(ccmp) = M_PROTO;
3730 		cc = (struct T_conn_con *)ccmp->b_rptr;
3731 		cc->PRIM_type  = T_CONN_CON;
3732 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3733 		cc->RES_length = acc_ep->te_alen;
3734 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3735 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3736 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3737 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3738 			    cc->RES_length);
3739 			cc->OPT_length = olen;
3740 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3741 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3742 			    cl_ep->te_credp);
3743 		} else {
3744 			cc->OPT_offset = 0;
3745 			cc->OPT_length = 0;
3746 		}
3747 		/*
3748 		 * Forward the credential in the packet so it can be picked up
3749 		 * at the higher layers for more complete credential processing
3750 		 */
3751 		mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3752 	} else {
3753 		freemsg(respmp);
3754 		respmp = NULL;
3755 	}
3756 
3757 	/*
3758 	 * make connection linking
3759 	 * accepting and client endpoints
3760 	 * No need to increment references:
3761 	 *	on client: it should already have one from tip->ti_tep linkage.
3762 	 *	on acceptor is should already have one from the table lookup.
3763 	 *
3764 	 * At this point both client and acceptor can't close. Set client
3765 	 * serializer to acceptor's.
3766 	 */
3767 	ASSERT(cl_ep->te_refcnt >= 2);
3768 	ASSERT(acc_ep->te_refcnt >= 2);
3769 	ASSERT(cl_ep->te_conp == NULL);
3770 	ASSERT(acc_ep->te_conp == NULL);
3771 	cl_ep->te_conp = acc_ep;
3772 	acc_ep->te_conp = cl_ep;
3773 	ASSERT(cl_ep->te_ser == tep->te_ser);
3774 	if (switch_client_serializer) {
3775 		mutex_enter(&cl_ep->te_ser_lock);
3776 		if (cl_ep->te_ser_count > 0) {
3777 			switch_client_serializer = B_FALSE;
3778 			tl_serializer_noswitch++;
3779 		} else {
3780 			/*
3781 			 * Move client to the acceptor's serializer.
3782 			 */
3783 			tl_serializer_refhold(acc_ep->te_ser);
3784 			tl_serializer_refrele(cl_ep->te_ser);
3785 			cl_ep->te_ser = acc_ep->te_ser;
3786 		}
3787 		mutex_exit(&cl_ep->te_ser_lock);
3788 	}
3789 	if (!switch_client_serializer) {
3790 		/*
3791 		 * It is not possible to switch client to use acceptor's.
3792 		 * Move acceptor to client's serializer (which is the same as
3793 		 * listener's).
3794 		 */
3795 		tl_serializer_refhold(cl_ep->te_ser);
3796 		tl_serializer_refrele(acc_ep->te_ser);
3797 		acc_ep->te_ser = cl_ep->te_ser;
3798 	}
3799 
3800 	TL_REMOVE_PEER(cl_ep->te_oconp);
3801 	TL_REMOVE_PEER(acc_ep->te_oconp);
3802 
3803 	/*
3804 	 * remove endpoint from incoming connection
3805 	 * delete client from list of incoming connections
3806 	 */
3807 	tip->ti_tep = NULL;
3808 	tl_freetip(tep, tip);
3809 	tl_ok_ack(wq, ackmp, prim);
3810 
3811 	/*
3812 	 * data blocks already linked in reallocb()
3813 	 */
3814 
3815 	/*
3816 	 * link queues so that I_SENDFD will work
3817 	 */
3818 	if (! IS_SOCKET(tep)) {
3819 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3820 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3821 	}
3822 
3823 	/*
3824 	 * send T_CONN_CON up on client side unless it was already
3825 	 * done (for a socket). In cases any data or ordrel req has been
3826 	 * queued make sure that the service procedure runs.
3827 	 */
3828 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3829 		enableok(cl_ep->te_wq);
3830 		TL_QENABLE(cl_ep);
3831 		if (ccmp != NULL)
3832 			freemsg(ccmp);
3833 	} else {
3834 		/*
3835 		 * change client state on TE_CONN_CON event
3836 		 */
3837 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3838 		putnext(cl_ep->te_rq, ccmp);
3839 	}
3840 
3841 	/* Mark the both endpoints as accepted */
3842 	cl_ep->te_flag |= TL_ACCEPTED;
3843 	acc_ep->te_flag |= TL_ACCEPTED;
3844 
3845 	/*
3846 	 * Allow client and acceptor to close.
3847 	 */
3848 	tl_closeok(acc_ep);
3849 	if (client_noclose_set)
3850 		tl_closeok(cl_ep);
3851 }
3852 
3853 
3854 
3855 
3856 static void
3857 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3858 {
3859 	queue_t			*wq;
3860 	struct T_discon_req	*dr;
3861 	ssize_t			msz;
3862 	tl_endpt_t		*peer_tep = tep->te_conp;
3863 	tl_endpt_t		*srv_tep = tep->te_oconp;
3864 	tl_icon_t		*tip;
3865 	size_t			size;
3866 	mblk_t			*ackmp, *dimp, *respmp;
3867 	struct T_discon_ind	*di;
3868 	t_scalar_t		save_state, new_state;
3869 
3870 	if (tep->te_closing) {
3871 		freemsg(mp);
3872 		return;
3873 	}
3874 
3875 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3876 		TL_UNCONNECT(tep->te_conp);
3877 		peer_tep = NULL;
3878 	}
3879 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3880 		TL_UNCONNECT(tep->te_oconp);
3881 		srv_tep = NULL;
3882 	}
3883 
3884 	wq = tep->te_wq;
3885 
3886 	/*
3887 	 * preallocate memory for:
3888 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3889 	 *	==> known max T_ERROR_ACK
3890 	 * 2. for  T_DISCON_IND
3891 	 */
3892 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3893 	if (! ackmp) {
3894 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3895 		return;
3896 	}
3897 	/*
3898 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3899 	 * will be committed for T_DISCON_IND  later
3900 	 */
3901 
3902 	dr = (struct T_discon_req *)mp->b_rptr;
3903 	msz = MBLKL(mp);
3904 
3905 	/*
3906 	 * validate the state
3907 	 */
3908 	save_state = new_state = tep->te_state;
3909 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3910 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3911 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3912 		    SL_TRACE|SL_ERROR,
3913 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
3914 		    tep->te_state));
3915 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3916 		freemsg(mp);
3917 		return;
3918 	}
3919 	/*
3920 	 * Defer committing the state change until it is determined if
3921 	 * the message will be queued with the tl_icon or not.
3922 	 */
3923 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3924 
3925 	/* validate the message */
3926 	if (msz < sizeof (struct T_discon_req)) {
3927 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3928 		    "tl_discon_req:invalid message"));
3929 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3930 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3931 		freemsg(mp);
3932 		return;
3933 	}
3934 
3935 	/*
3936 	 * if server, then validate that client exists
3937 	 * by connection sequence number etc.
3938 	 */
3939 	if (tep->te_nicon > 0) { /* server */
3940 
3941 		/*
3942 		 * search server list for disconnect client
3943 		 */
3944 		tip = tl_icon_find(tep, dr->SEQ_number);
3945 		if (tip == NULL) {
3946 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3947 			    SL_TRACE|SL_ERROR,
3948 			    "tl_discon_req:no disconnect endpoint"));
3949 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3950 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3951 			freemsg(mp);
3952 			return;
3953 		}
3954 		/*
3955 		 * If ti_tep is NULL the client has already closed. In this case
3956 		 * the code below will avoid any action on the client side.
3957 		 */
3958 
3959 		IMPLY(tip->ti_tep != NULL,
3960 		    tip->ti_tep->te_seqno == dr->SEQ_number);
3961 		peer_tep = tip->ti_tep;
3962 	}
3963 
3964 	/*
3965 	 * preallocate now for T_DISCON_IND
3966 	 * ack validity of request (T_OK_ACK) after memory committed
3967 	 */
3968 	size = sizeof (struct T_discon_ind);
3969 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3970 		tl_memrecover(wq, mp, size);
3971 		freemsg(ackmp);
3972 		return;
3973 	}
3974 
3975 	/*
3976 	 * prepare message to ack validity of request
3977 	 */
3978 	if (tep->te_nicon == 0)
3979 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3980 	else
3981 		if (tep->te_nicon == 1)
3982 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3983 		else
3984 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3985 
3986 	/*
3987 	 * Flushing queues according to TPI. Using the old state.
3988 	 */
3989 	if ((tep->te_nicon <= 1) &&
3990 	    ((save_state == TS_DATA_XFER) ||
3991 	    (save_state == TS_WIND_ORDREL) ||
3992 	    (save_state == TS_WREQ_ORDREL)))
3993 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3994 
3995 	/* send T_OK_ACK up  */
3996 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3997 
3998 	/*
3999 	 * now do disconnect business
4000 	 */
4001 	if (tep->te_nicon > 0) { /* listener */
4002 		if (peer_tep != NULL && !peer_tep->te_closing) {
4003 			/*
4004 			 * disconnect incoming connect request pending to tep
4005 			 */
4006 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4007 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
4008 				    SL_TRACE|SL_ERROR,
4009 				    "tl_discon_req: reallocb failed"));
4010 				tep->te_state = new_state;
4011 				tl_merror(wq, respmp, ENOMEM);
4012 				return;
4013 			}
4014 			di = (struct T_discon_ind *)dimp->b_rptr;
4015 			di->SEQ_number = BADSEQNUM;
4016 			save_state = peer_tep->te_state;
4017 			peer_tep->te_state = TS_IDLE;
4018 
4019 			TL_REMOVE_PEER(peer_tep->te_oconp);
4020 			enableok(peer_tep->te_wq);
4021 			TL_QENABLE(peer_tep);
4022 		} else {
4023 			freemsg(respmp);
4024 			dimp = NULL;
4025 		}
4026 
4027 		/*
4028 		 * remove endpoint from incoming connection list
4029 		 * - remove disconnect client from list on server
4030 		 */
4031 		tl_freetip(tep, tip);
4032 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4033 		/*
4034 		 * disconnect an outgoing request pending from tep
4035 		 */
4036 
4037 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4038 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4039 			    SL_TRACE|SL_ERROR,
4040 			    "tl_discon_req: reallocb failed"));
4041 			tep->te_state = new_state;
4042 			tl_merror(wq, respmp, ENOMEM);
4043 			return;
4044 		}
4045 		di = (struct T_discon_ind *)dimp->b_rptr;
4046 		DB_TYPE(dimp) = M_PROTO;
4047 		di->PRIM_type  = T_DISCON_IND;
4048 		di->DISCON_reason = ECONNRESET;
4049 		di->SEQ_number = tep->te_seqno;
4050 
4051 		/*
4052 		 * If this is a socket the T_DISCON_IND is queued with
4053 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4054 		 * from the list of pending connections.
4055 		 * Note that when te_oconp is set the peer better have
4056 		 * a t_connind_t for the client.
4057 		 */
4058 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4059 			/*
4060 			 * No need to check that
4061 			 * ti_tep == NULL since the T_DISCON_IND
4062 			 * takes precedence over other queued
4063 			 * messages.
4064 			 */
4065 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4066 			peer_tep = NULL;
4067 			dimp = NULL;
4068 			/*
4069 			 * Can't clear te_oconp since tl_co_unconnect needs
4070 			 * it as a hint not to free the tep.
4071 			 * Keep the state unchanged since tl_conn_res inspects
4072 			 * it.
4073 			 */
4074 			new_state = tep->te_state;
4075 		} else {
4076 			/* Found - delete it */
4077 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4078 			if (tip != NULL) {
4079 				ASSERT(tep == tip->ti_tep);
4080 				save_state = peer_tep->te_state;
4081 				if (peer_tep->te_nicon == 1)
4082 					peer_tep->te_state =
4083 					    NEXTSTATE(TE_DISCON_IND2,
4084 					    peer_tep->te_state);
4085 				else
4086 					peer_tep->te_state =
4087 					    NEXTSTATE(TE_DISCON_IND3,
4088 					    peer_tep->te_state);
4089 				tl_freetip(peer_tep, tip);
4090 			}
4091 			ASSERT(tep->te_oconp != NULL);
4092 			TL_UNCONNECT(tep->te_oconp);
4093 		}
4094 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4095 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4096 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4097 			    SL_TRACE|SL_ERROR,
4098 			    "tl_discon_req: reallocb failed"));
4099 			tep->te_state = new_state;
4100 			tl_merror(wq, respmp, ENOMEM);
4101 			return;
4102 		}
4103 		di = (struct T_discon_ind *)dimp->b_rptr;
4104 		di->SEQ_number = BADSEQNUM;
4105 
4106 		save_state = peer_tep->te_state;
4107 		peer_tep->te_state = TS_IDLE;
4108 	} else {
4109 		/* Not connected */
4110 		tep->te_state = new_state;
4111 		freemsg(respmp);
4112 		return;
4113 	}
4114 
4115 	/* Commit state changes */
4116 	tep->te_state = new_state;
4117 
4118 	if (peer_tep == NULL) {
4119 		ASSERT(dimp == NULL);
4120 		goto done;
4121 	}
4122 	/*
4123 	 * Flush queues on peer before sending up
4124 	 * T_DISCON_IND according to TPI
4125 	 */
4126 
4127 	if ((save_state == TS_DATA_XFER) ||
4128 	    (save_state == TS_WIND_ORDREL) ||
4129 	    (save_state == TS_WREQ_ORDREL))
4130 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4131 
4132 	DB_TYPE(dimp) = M_PROTO;
4133 	di->PRIM_type  = T_DISCON_IND;
4134 	di->DISCON_reason = ECONNRESET;
4135 
4136 	/*
4137 	 * data blocks already linked into dimp by reallocb()
4138 	 */
4139 	/*
4140 	 * send indication message to peer user module
4141 	 */
4142 	ASSERT(dimp != NULL);
4143 	putnext(peer_tep->te_rq, dimp);
4144 done:
4145 	if (tep->te_conp) {	/* disconnect pointers if connected */
4146 		ASSERT(! peer_tep->te_closing);
4147 
4148 		/*
4149 		 * Messages may be queued on peer's write queue
4150 		 * waiting to be processed by its write service
4151 		 * procedure. Before the pointer to the peer transport
4152 		 * structure is set to NULL, qenable the peer's write
4153 		 * queue so that the queued up messages are processed.
4154 		 */
4155 		if ((save_state == TS_DATA_XFER) ||
4156 		    (save_state == TS_WIND_ORDREL) ||
4157 		    (save_state == TS_WREQ_ORDREL))
4158 			TL_QENABLE(peer_tep);
4159 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4160 		TL_UNCONNECT(peer_tep->te_conp);
4161 		if (! IS_SOCKET(tep)) {
4162 			/*
4163 			 * unlink the streams
4164 			 */
4165 			tep->te_wq->q_next = NULL;
4166 			peer_tep->te_wq->q_next = NULL;
4167 		}
4168 		TL_UNCONNECT(tep->te_conp);
4169 	}
4170 }
4171 
4172 static void
4173 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4174 {
4175 	if (!tep->te_closing)
4176 		tl_addr_req(mp, tep);
4177 	else
4178 		freemsg(mp);
4179 
4180 	tl_serializer_exit(tep);
4181 	tl_refrele(tep);
4182 }
4183 
4184 static void
4185 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4186 {
4187 	queue_t			*wq;
4188 	size_t			ack_sz;
4189 	mblk_t			*ackmp;
4190 	struct T_addr_ack	*taa;
4191 
4192 	if (tep->te_closing) {
4193 		freemsg(mp);
4194 		return;
4195 	}
4196 
4197 	wq = tep->te_wq;
4198 
4199 	/*
4200 	 * Note: T_ADDR_REQ message has only PRIM_type field
4201 	 * so it is already validated earlier.
4202 	 */
4203 
4204 	if (IS_CLTS(tep) ||
4205 	    (tep->te_state > TS_WREQ_ORDREL) ||
4206 	    (tep->te_state < TS_DATA_XFER)) {
4207 		/*
4208 		 * Either connectionless or connection oriented but not
4209 		 * in connected data transfer state or half-closed states.
4210 		 */
4211 		ack_sz = sizeof (struct T_addr_ack);
4212 		if (tep->te_state >= TS_IDLE)
4213 			/* is bound */
4214 			ack_sz += tep->te_alen;
4215 		ackmp = reallocb(mp, ack_sz, 0);
4216 		if (ackmp == NULL) {
4217 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4218 			    SL_TRACE|SL_ERROR,
4219 			    "tl_addr_req: reallocb failed"));
4220 			tl_memrecover(wq, mp, ack_sz);
4221 			return;
4222 		}
4223 
4224 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4225 
4226 		bzero(taa, sizeof (struct T_addr_ack));
4227 
4228 		taa->PRIM_type = T_ADDR_ACK;
4229 		ackmp->b_datap->db_type = M_PCPROTO;
4230 		ackmp->b_wptr = (uchar_t *)&taa[1];
4231 
4232 		if (tep->te_state >= TS_IDLE) {
4233 			/* endpoint is bound */
4234 			taa->LOCADDR_length = tep->te_alen;
4235 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4236 
4237 			bcopy(tep->te_abuf, ackmp->b_wptr,
4238 			    tep->te_alen);
4239 			ackmp->b_wptr += tep->te_alen;
4240 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4241 		}
4242 
4243 		(void) qreply(wq, ackmp);
4244 	} else {
4245 		ASSERT(tep->te_state == TS_DATA_XFER ||
4246 		    tep->te_state == TS_WIND_ORDREL ||
4247 		    tep->te_state == TS_WREQ_ORDREL);
4248 		/* connection oriented in data transfer */
4249 		tl_connected_cots_addr_req(mp, tep);
4250 	}
4251 }
4252 
4253 
4254 static void
4255 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4256 {
4257 	tl_endpt_t		*peer_tep = tep->te_conp;
4258 	size_t			ack_sz;
4259 	mblk_t			*ackmp;
4260 	struct T_addr_ack	*taa;
4261 	uchar_t			*addr_startp;
4262 
4263 	if (tep->te_closing) {
4264 		freemsg(mp);
4265 		return;
4266 	}
4267 
4268 	if (peer_tep == NULL || peer_tep->te_closing) {
4269 		tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4270 		return;
4271 	}
4272 
4273 	ASSERT(tep->te_state >= TS_IDLE);
4274 
4275 	ack_sz = sizeof (struct T_addr_ack);
4276 	ack_sz += T_ALIGN(tep->te_alen);
4277 	ack_sz += peer_tep->te_alen;
4278 
4279 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4280 	if (ackmp == NULL) {
4281 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4282 		    "tl_connected_cots_addr_req: reallocb failed"));
4283 		tl_memrecover(tep->te_wq, mp, ack_sz);
4284 		return;
4285 	}
4286 
4287 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4288 
4289 	/* endpoint is bound */
4290 	taa->LOCADDR_length = tep->te_alen;
4291 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4292 
4293 	addr_startp = (uchar_t *)&taa[1];
4294 
4295 	bcopy(tep->te_abuf, addr_startp,
4296 	    tep->te_alen);
4297 
4298 	taa->REMADDR_length = peer_tep->te_alen;
4299 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4300 	    taa->LOCADDR_length);
4301 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4302 	bcopy(peer_tep->te_abuf, addr_startp,
4303 	    peer_tep->te_alen);
4304 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4305 	    taa->REMADDR_offset + peer_tep->te_alen;
4306 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4307 
4308 	putnext(tep->te_rq, ackmp);
4309 }
4310 
4311 static void
4312 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4313 {
4314 	if (IS_CLTS(tep)) {
4315 		*ia = tl_clts_info_ack;
4316 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4317 	} else {
4318 		*ia = tl_cots_info_ack;
4319 		if (IS_COTSORD(tep))
4320 			ia->SERV_type = T_COTS_ORD;
4321 	}
4322 	ia->TIDU_size = tl_tidusz;
4323 	ia->CURRENT_state = tep->te_state;
4324 }
4325 
4326 /*
4327  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4328  * tl_wput.
4329  */
4330 static void
4331 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4332 {
4333 	mblk_t			*ackmp;
4334 	t_uscalar_t		cap_bits1;
4335 	struct T_capability_ack	*tcap;
4336 
4337 	if (tep->te_closing) {
4338 		freemsg(mp);
4339 		return;
4340 	}
4341 
4342 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4343 
4344 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4345 	    M_PCPROTO, T_CAPABILITY_ACK);
4346 	if (ackmp == NULL) {
4347 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4348 		    "tl_capability_req: reallocb failed"));
4349 		tl_memrecover(tep->te_wq, mp,
4350 		    sizeof (struct T_capability_ack));
4351 		return;
4352 	}
4353 
4354 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4355 	tcap->CAP_bits1 = 0;
4356 
4357 	if (cap_bits1 & TC1_INFO) {
4358 		tl_copy_info(&tcap->INFO_ack, tep);
4359 		tcap->CAP_bits1 |= TC1_INFO;
4360 	}
4361 
4362 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4363 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4364 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4365 	}
4366 
4367 	putnext(tep->te_rq, ackmp);
4368 }
4369 
4370 static void
4371 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4372 {
4373 	if (! tep->te_closing)
4374 		tl_info_req(mp, tep);
4375 	else
4376 		freemsg(mp);
4377 
4378 	tl_serializer_exit(tep);
4379 	tl_refrele(tep);
4380 }
4381 
4382 static void
4383 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4384 {
4385 	mblk_t *ackmp;
4386 
4387 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4388 	    M_PCPROTO, T_INFO_ACK);
4389 	if (ackmp == NULL) {
4390 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4391 		    "tl_info_req: reallocb failed"));
4392 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4393 		return;
4394 	}
4395 
4396 	/*
4397 	 * fill in T_INFO_ACK contents
4398 	 */
4399 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4400 
4401 	/*
4402 	 * send ack message
4403 	 */
4404 	putnext(tep->te_rq, ackmp);
4405 }
4406 
4407 /*
4408  * Handle M_DATA, T_data_req and T_optdata_req.
4409  * If this is a socket pass through T_optdata_req options unmodified.
4410  */
4411 static void
4412 tl_data(mblk_t *mp, tl_endpt_t *tep)
4413 {
4414 	queue_t			*wq = tep->te_wq;
4415 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4416 	ssize_t			msz = MBLKL(mp);
4417 	tl_endpt_t		*peer_tep;
4418 	queue_t			*peer_rq;
4419 	boolean_t		closing = tep->te_closing;
4420 
4421 	if (IS_CLTS(tep)) {
4422 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4423 		    SL_TRACE|SL_ERROR,
4424 		    "tl_wput:clts:unattached M_DATA"));
4425 		if (!closing) {
4426 			tl_merror(wq, mp, EPROTO);
4427 		} else {
4428 			freemsg(mp);
4429 		}
4430 		return;
4431 	}
4432 
4433 	/*
4434 	 * If the endpoint is closing it should still forward any data to the
4435 	 * peer (if it has one). If it is not allowed to forward it can just
4436 	 * free the message.
4437 	 */
4438 	if (closing &&
4439 	    (tep->te_state != TS_DATA_XFER) &&
4440 	    (tep->te_state != TS_WREQ_ORDREL)) {
4441 		freemsg(mp);
4442 		return;
4443 	}
4444 
4445 	if (DB_TYPE(mp) == M_PROTO) {
4446 		if (prim->type == T_DATA_REQ &&
4447 		    msz < sizeof (struct T_data_req)) {
4448 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4449 				SL_TRACE|SL_ERROR,
4450 				"tl_data:T_DATA_REQ:invalid message"));
4451 			if (!closing) {
4452 				tl_merror(wq, mp, EPROTO);
4453 			} else {
4454 				freemsg(mp);
4455 			}
4456 			return;
4457 		} else if (prim->type == T_OPTDATA_REQ &&
4458 		    (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4459 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4460 			    SL_TRACE|SL_ERROR,
4461 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4462 			if (!closing) {
4463 				tl_merror(wq, mp, EPROTO);
4464 			} else {
4465 				freemsg(mp);
4466 			}
4467 			return;
4468 		}
4469 	}
4470 
4471 	/*
4472 	 * connection oriented provider
4473 	 */
4474 	switch (tep->te_state) {
4475 	case TS_IDLE:
4476 		/*
4477 		 * Other end not here - do nothing.
4478 		 */
4479 		freemsg(mp);
4480 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4481 		    "tl_data:cots with endpoint idle"));
4482 		return;
4483 
4484 	case TS_DATA_XFER:
4485 		/* valid states */
4486 		if (tep->te_conp != NULL)
4487 			break;
4488 
4489 		if (tep->te_oconp == NULL) {
4490 			if (!closing) {
4491 				tl_merror(wq, mp, EPROTO);
4492 			} else {
4493 				freemsg(mp);
4494 			}
4495 			return;
4496 		}
4497 		/*
4498 		 * For a socket the T_CONN_CON is sent early thus
4499 		 * the peer might not yet have accepted the connection.
4500 		 * If we are closing queue the packet with the T_CONN_IND.
4501 		 * Otherwise defer processing the packet until the peer
4502 		 * accepts the connection.
4503 		 * Note that the queue is noenabled when we go into this
4504 		 * state.
4505 		 */
4506 		if (!closing) {
4507 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4508 			    SL_TRACE|SL_ERROR,
4509 			    "tl_data: ocon"));
4510 			TL_PUTBQ(tep, mp);
4511 			return;
4512 		}
4513 		if (DB_TYPE(mp) == M_PROTO) {
4514 			if (msz < sizeof (t_scalar_t)) {
4515 				freemsg(mp);
4516 				return;
4517 			}
4518 			/* reuse message block - just change REQ to IND */
4519 			if (prim->type == T_DATA_REQ)
4520 				prim->type = T_DATA_IND;
4521 			else
4522 				prim->type = T_OPTDATA_IND;
4523 		}
4524 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4525 		return;
4526 
4527 	case TS_WREQ_ORDREL:
4528 		if (tep->te_conp == NULL) {
4529 			/*
4530 			 * Other end closed - generate discon_ind
4531 			 * with reason 0 to cause an EPIPE but no
4532 			 * read side error on AF_UNIX sockets.
4533 			 */
4534 			freemsg(mp);
4535 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4536 			    SL_TRACE|SL_ERROR,
4537 			    "tl_data: WREQ_ORDREL and no peer"));
4538 			tl_discon_ind(tep, 0);
4539 			return;
4540 		}
4541 		break;
4542 
4543 	default:
4544 		/* invalid state for event TE_DATA_REQ */
4545 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4546 		    "tl_data:cots:out of state"));
4547 		tl_merror(wq, mp, EPROTO);
4548 		return;
4549 	}
4550 	/*
4551 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4552 	 * (State stays same on this event)
4553 	 */
4554 
4555 	/*
4556 	 * get connected endpoint
4557 	 */
4558 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4559 		freemsg(mp);
4560 		/* Peer closed */
4561 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4562 		    "tl_data: peer gone"));
4563 		return;
4564 	}
4565 
4566 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4567 	peer_rq = peer_tep->te_rq;
4568 
4569 	/*
4570 	 * Put it back if flow controlled
4571 	 * Note: Messages already on queue when we are closing is bounded
4572 	 * so we can ignore flow control.
4573 	 */
4574 	if (!canputnext(peer_rq) && !closing) {
4575 		TL_PUTBQ(tep, mp);
4576 		return;
4577 	}
4578 
4579 	/*
4580 	 * validate peer state
4581 	 */
4582 	switch (peer_tep->te_state) {
4583 	case TS_DATA_XFER:
4584 	case TS_WIND_ORDREL:
4585 		/* valid states */
4586 		break;
4587 	default:
4588 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4589 		    "tl_data:rx side:invalid state"));
4590 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4591 		return;
4592 	}
4593 	if (DB_TYPE(mp) == M_PROTO) {
4594 		/* reuse message block - just change REQ to IND */
4595 		if (prim->type == T_DATA_REQ)
4596 			prim->type = T_DATA_IND;
4597 		else
4598 			prim->type = T_OPTDATA_IND;
4599 	}
4600 	/*
4601 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4602 	 * (peer state stays same on this event)
4603 	 */
4604 	/*
4605 	 * send data to connected peer
4606 	 */
4607 	putnext(peer_rq, mp);
4608 }
4609 
4610 
4611 
4612 static void
4613 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4614 {
4615 	queue_t			*wq = tep->te_wq;
4616 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4617 	ssize_t			msz = MBLKL(mp);
4618 	tl_endpt_t		*peer_tep;
4619 	queue_t			*peer_rq;
4620 	boolean_t		closing = tep->te_closing;
4621 
4622 	if (msz < sizeof (struct T_exdata_req)) {
4623 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4624 		    "tl_exdata:invalid message"));
4625 		if (!closing) {
4626 			tl_merror(wq, mp, EPROTO);
4627 		} else {
4628 			freemsg(mp);
4629 		}
4630 		return;
4631 	}
4632 
4633 	/*
4634 	 * If the endpoint is closing it should still forward any data to the
4635 	 * peer (if it has one). If it is not allowed to forward it can just
4636 	 * free the message.
4637 	 */
4638 	if (closing &&
4639 	    (tep->te_state != TS_DATA_XFER) &&
4640 	    (tep->te_state != TS_WREQ_ORDREL)) {
4641 		freemsg(mp);
4642 		return;
4643 	}
4644 
4645 	/*
4646 	 * validate state
4647 	 */
4648 	switch (tep->te_state) {
4649 	case TS_IDLE:
4650 		/*
4651 		 * Other end not here - do nothing.
4652 		 */
4653 		freemsg(mp);
4654 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4655 		    "tl_exdata:cots with endpoint idle"));
4656 		return;
4657 
4658 	case TS_DATA_XFER:
4659 		/* valid states */
4660 		if (tep->te_conp != NULL)
4661 			break;
4662 
4663 		if (tep->te_oconp == NULL) {
4664 			if (!closing) {
4665 				tl_merror(wq, mp, EPROTO);
4666 			} else {
4667 				freemsg(mp);
4668 			}
4669 			return;
4670 		}
4671 		/*
4672 		 * For a socket the T_CONN_CON is sent early thus
4673 		 * the peer might not yet have accepted the connection.
4674 		 * If we are closing queue the packet with the T_CONN_IND.
4675 		 * Otherwise defer processing the packet until the peer
4676 		 * accepts the connection.
4677 		 * Note that the queue is noenabled when we go into this
4678 		 * state.
4679 		 */
4680 		if (!closing) {
4681 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4682 			    SL_TRACE|SL_ERROR,
4683 			    "tl_exdata: ocon"));
4684 			TL_PUTBQ(tep, mp);
4685 			return;
4686 		}
4687 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4688 		    "tl_exdata: closing socket ocon"));
4689 		prim->type = T_EXDATA_IND;
4690 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4691 		return;
4692 
4693 	case TS_WREQ_ORDREL:
4694 		if (tep->te_conp == NULL) {
4695 			/*
4696 			 * Other end closed - generate discon_ind
4697 			 * with reason 0 to cause an EPIPE but no
4698 			 * read side error on AF_UNIX sockets.
4699 			 */
4700 			freemsg(mp);
4701 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4702 			    SL_TRACE|SL_ERROR,
4703 			    "tl_exdata: WREQ_ORDREL and no peer"));
4704 			tl_discon_ind(tep, 0);
4705 			return;
4706 		}
4707 		break;
4708 
4709 	default:
4710 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4711 		    SL_TRACE|SL_ERROR,
4712 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4713 		    tep->te_state));
4714 		tl_merror(wq, mp, EPROTO);
4715 		return;
4716 	}
4717 	/*
4718 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4719 	 * (state stays same on this event)
4720 	 */
4721 
4722 	/*
4723 	 * get connected endpoint
4724 	 */
4725 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4726 		freemsg(mp);
4727 		/* Peer closed */
4728 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4729 		    "tl_exdata: peer gone"));
4730 		return;
4731 	}
4732 
4733 	peer_rq = peer_tep->te_rq;
4734 
4735 	/*
4736 	 * Put it back if flow controlled
4737 	 * Note: Messages already on queue when we are closing is bounded
4738 	 * so we can ignore flow control.
4739 	 */
4740 	if (!canputnext(peer_rq) && !closing) {
4741 		TL_PUTBQ(tep, mp);
4742 		return;
4743 	}
4744 
4745 	/*
4746 	 * validate state on peer
4747 	 */
4748 	switch (peer_tep->te_state) {
4749 	case TS_DATA_XFER:
4750 	case TS_WIND_ORDREL:
4751 		/* valid states */
4752 		break;
4753 	default:
4754 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4755 		    "tl_exdata:rx side:invalid state"));
4756 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4757 		return;
4758 	}
4759 	/*
4760 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4761 	 * (peer state stays same on this event)
4762 	 */
4763 	/*
4764 	 * reuse message block
4765 	 */
4766 	prim->type = T_EXDATA_IND;
4767 
4768 	/*
4769 	 * send data to connected peer
4770 	 */
4771 	putnext(peer_rq, mp);
4772 }
4773 
4774 
4775 
4776 static void
4777 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4778 {
4779 	queue_t			*wq =  tep->te_wq;
4780 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4781 	ssize_t			msz = MBLKL(mp);
4782 	tl_endpt_t		*peer_tep;
4783 	queue_t			*peer_rq;
4784 	boolean_t		closing = tep->te_closing;
4785 
4786 	if (msz < sizeof (struct T_ordrel_req)) {
4787 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4788 		    "tl_ordrel:invalid message"));
4789 		if (!closing) {
4790 			tl_merror(wq, mp, EPROTO);
4791 		} else {
4792 			freemsg(mp);
4793 		}
4794 		return;
4795 	}
4796 
4797 	/*
4798 	 * validate state
4799 	 */
4800 	switch (tep->te_state) {
4801 	case TS_DATA_XFER:
4802 	case TS_WREQ_ORDREL:
4803 		/* valid states */
4804 		if (tep->te_conp != NULL)
4805 			break;
4806 
4807 		if (tep->te_oconp == NULL)
4808 			break;
4809 
4810 		/*
4811 		 * For a socket the T_CONN_CON is sent early thus
4812 		 * the peer might not yet have accepted the connection.
4813 		 * If we are closing queue the packet with the T_CONN_IND.
4814 		 * Otherwise defer processing the packet until the peer
4815 		 * accepts the connection.
4816 		 * Note that the queue is noenabled when we go into this
4817 		 * state.
4818 		 */
4819 		if (!closing) {
4820 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4821 			    SL_TRACE|SL_ERROR,
4822 			    "tl_ordlrel: ocon"));
4823 			TL_PUTBQ(tep, mp);
4824 			return;
4825 		}
4826 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4827 		    "tl_ordlrel: closing socket ocon"));
4828 		prim->type = T_ORDREL_IND;
4829 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4830 		return;
4831 
4832 	default:
4833 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4834 		    SL_TRACE|SL_ERROR,
4835 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4836 		    tep->te_state));
4837 		if (!closing) {
4838 			tl_merror(wq, mp, EPROTO);
4839 		} else {
4840 			freemsg(mp);
4841 		}
4842 		return;
4843 	}
4844 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4845 
4846 	/*
4847 	 * get connected endpoint
4848 	 */
4849 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4850 		/* Peer closed */
4851 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4852 		    "tl_ordrel: peer gone"));
4853 		freemsg(mp);
4854 		return;
4855 	}
4856 
4857 	peer_rq = peer_tep->te_rq;
4858 
4859 	/*
4860 	 * Put it back if flow controlled except when we are closing.
4861 	 * Note: Messages already on queue when we are closing is bounded
4862 	 * so we can ignore flow control.
4863 	 */
4864 	if (! canputnext(peer_rq) && !closing) {
4865 		TL_PUTBQ(tep, mp);
4866 		return;
4867 	}
4868 
4869 	/*
4870 	 * validate state on peer
4871 	 */
4872 	switch (peer_tep->te_state) {
4873 	case TS_DATA_XFER:
4874 	case TS_WIND_ORDREL:
4875 		/* valid states */
4876 		break;
4877 	default:
4878 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4879 		    "tl_ordrel:rx side:invalid state"));
4880 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4881 		return;
4882 	}
4883 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4884 
4885 	/*
4886 	 * reuse message block
4887 	 */
4888 	prim->type = T_ORDREL_IND;
4889 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4890 	    "tl_ordrel: send ordrel_ind"));
4891 
4892 	/*
4893 	 * send data to connected peer
4894 	 */
4895 	putnext(peer_rq, mp);
4896 }
4897 
4898 
4899 /*
4900  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4901  */
4902 static void
4903 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4904 {
4905 	size_t			err_sz;
4906 	tl_endpt_t		*tep;
4907 	struct T_unitdata_req	*udreq;
4908 	mblk_t			*err_mp;
4909 	t_scalar_t		alen;
4910 	t_scalar_t		olen;
4911 	struct T_uderror_ind	*uderr;
4912 	uchar_t			*addr_startp;
4913 
4914 	err_sz = sizeof (struct T_uderror_ind);
4915 	tep = (tl_endpt_t *)wq->q_ptr;
4916 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4917 	alen = udreq->DEST_length;
4918 	olen = udreq->OPT_length;
4919 
4920 	if (alen > 0)
4921 		err_sz = T_ALIGN(err_sz + alen);
4922 	if (olen > 0)
4923 		err_sz += olen;
4924 
4925 	err_mp = allocb(err_sz, BPRI_MED);
4926 	if (! err_mp) {
4927 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4928 		    "tl_uderr:allocb failure"));
4929 		/*
4930 		 * Note: no rollback of state needed as it does
4931 		 * not change in connectionless transport
4932 		 */
4933 		tl_memrecover(wq, mp, err_sz);
4934 		return;
4935 	}
4936 
4937 	DB_TYPE(err_mp) = M_PROTO;
4938 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4939 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4940 	uderr->PRIM_type = T_UDERROR_IND;
4941 	uderr->ERROR_type = err;
4942 	uderr->DEST_length = alen;
4943 	uderr->OPT_length = olen;
4944 	if (alen <= 0) {
4945 		uderr->DEST_offset = 0;
4946 	} else {
4947 		uderr->DEST_offset =
4948 		    (t_scalar_t)sizeof (struct T_uderror_ind);
4949 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4950 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4951 		    (size_t)alen);
4952 	}
4953 	if (olen <= 0) {
4954 		uderr->OPT_offset = 0;
4955 	} else {
4956 		uderr->OPT_offset =
4957 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4958 		    uderr->DEST_length);
4959 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4960 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4961 		    (size_t)olen);
4962 	}
4963 	freemsg(mp);
4964 
4965 	/*
4966 	 * send indication message
4967 	 */
4968 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4969 
4970 	qreply(wq, err_mp);
4971 }
4972 
4973 static void
4974 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4975 {
4976 	queue_t *wq = tep->te_wq;
4977 
4978 	if (!tep->te_closing && (wq->q_first != NULL)) {
4979 		TL_PUTQ(tep, mp);
4980 	} else if (tep->te_rq != NULL)
4981 		tl_unitdata(mp, tep);
4982 	else
4983 		freemsg(mp);
4984 
4985 	tl_serializer_exit(tep);
4986 	tl_refrele(tep);
4987 }
4988 
4989 /*
4990  * Handle T_unitdata_req.
4991  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4992  * If this is a socket pass through options unmodified.
4993  */
4994 static void
4995 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4996 {
4997 	queue_t			*wq = tep->te_wq;
4998 	soux_addr_t		ux_addr;
4999 	tl_addr_t		destaddr;
5000 	uchar_t			*addr_startp;
5001 	tl_endpt_t		*peer_tep;
5002 	struct T_unitdata_ind	*udind;
5003 	struct T_unitdata_req	*udreq;
5004 	ssize_t			msz, ui_sz, reuse_mb_sz;
5005 	t_scalar_t		alen, aoff, olen, ooff;
5006 	t_scalar_t		oldolen = 0;
5007 	cred_t			*cr = NULL;
5008 	pid_t			cpid;
5009 
5010 	udreq = (struct T_unitdata_req *)mp->b_rptr;
5011 	msz = MBLKL(mp);
5012 
5013 	/*
5014 	 * validate the state
5015 	 */
5016 	if (tep->te_state != TS_IDLE) {
5017 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
5018 		    SL_TRACE|SL_ERROR,
5019 		    "tl_wput:T_CONN_REQ:out of state"));
5020 		tl_merror(wq, mp, EPROTO);
5021 		return;
5022 	}
5023 	/*
5024 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5025 	 * (state does not change on this event)
5026 	 */
5027 
5028 	/*
5029 	 * validate the message
5030 	 * Note: dereference fields in struct inside message only
5031 	 * after validating the message length.
5032 	 */
5033 	if (msz < sizeof (struct T_unitdata_req)) {
5034 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5035 		    "tl_unitdata:invalid message length"));
5036 		tl_merror(wq, mp, EINVAL);
5037 		return;
5038 	}
5039 	alen = udreq->DEST_length;
5040 	aoff = udreq->DEST_offset;
5041 	oldolen = olen = udreq->OPT_length;
5042 	ooff = udreq->OPT_offset;
5043 	if (olen == 0)
5044 		ooff = 0;
5045 
5046 	if (IS_SOCKET(tep)) {
5047 		if ((alen != TL_SOUX_ADDRLEN) ||
5048 		    (aoff < 0) ||
5049 		    (aoff + alen > msz) ||
5050 		    (olen < 0) || (ooff < 0) ||
5051 		    ((olen > 0) && ((ooff + olen) > msz))) {
5052 			(void) (STRLOG(TL_ID, tep->te_minor,
5053 			    1, SL_TRACE|SL_ERROR,
5054 			    "tl_unitdata_req: invalid socket addr "
5055 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5056 			    (int)msz, alen, aoff, olen, ooff));
5057 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5058 			return;
5059 		}
5060 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5061 
5062 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5063 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5064 			(void) (STRLOG(TL_ID, tep->te_minor,
5065 			    1, SL_TRACE|SL_ERROR,
5066 			    "tl_conn_req: invalid socket magic"));
5067 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5068 			return;
5069 		}
5070 	} else {
5071 		if ((alen < 0) ||
5072 		    (aoff < 0) ||
5073 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5074 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5075 		    ((aoff + alen) < 0) ||
5076 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5077 		    (olen < 0) ||
5078 		    (ooff < 0) ||
5079 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5080 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5081 				    SL_TRACE|SL_ERROR,
5082 				    "tl_unitdata:invalid unit data message"));
5083 			tl_merror(wq, mp, EINVAL);
5084 			return;
5085 		}
5086 	}
5087 
5088 	/* Options not supported unless it's a socket */
5089 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5090 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5091 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5092 		tl_uderr(wq, mp, EPROTO);
5093 		return;
5094 	}
5095 #ifdef DEBUG
5096 	/*
5097 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5098 	 * if (! assertion)
5099 	 *	log warning;
5100 	 */
5101 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5102 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5103 		    "tl_unitdata:addr overlaps TPI message"));
5104 	}
5105 #endif
5106 	/*
5107 	 * get destination endpoint
5108 	 */
5109 	destaddr.ta_alen = alen;
5110 	destaddr.ta_abuf = mp->b_rptr + aoff;
5111 	destaddr.ta_zoneid = tep->te_zoneid;
5112 
5113 	/*
5114 	 * Check whether the destination is the same that was used previously
5115 	 * and the destination endpoint is in the right state. If something is
5116 	 * wrong, find destination again and cache it.
5117 	 */
5118 	peer_tep = tep->te_lastep;
5119 
5120 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5121 	    (peer_tep->te_state != TS_IDLE) ||
5122 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5123 		/*
5124 		 * Not the same as cached destination , need to find the right
5125 		 * destination.
5126 		 */
5127 		peer_tep = (IS_SOCKET(tep) ?
5128 		    tl_sock_find_peer(tep, &ux_addr) :
5129 		    tl_find_peer(tep, &destaddr));
5130 
5131 		if (peer_tep == NULL) {
5132 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5133 			    SL_TRACE|SL_ERROR,
5134 			    "tl_unitdata:no one at destination address"));
5135 			tl_uderr(wq, mp, ECONNRESET);
5136 			return;
5137 		}
5138 
5139 		/*
5140 		 * Cache the new peer.
5141 		 */
5142 		if (tep->te_lastep != NULL)
5143 			tl_refrele(tep->te_lastep);
5144 
5145 		tep->te_lastep = peer_tep;
5146 	}
5147 
5148 	if (peer_tep->te_state != TS_IDLE) {
5149 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5150 		    "tl_unitdata:provider in invalid state"));
5151 		tl_uderr(wq, mp, EPROTO);
5152 		return;
5153 	}
5154 
5155 	ASSERT(peer_tep->te_rq != NULL);
5156 
5157 	/*
5158 	 * Put it back if flow controlled except when we are closing.
5159 	 * Note: Messages already on queue when we are closing is bounded
5160 	 * so we can ignore flow control.
5161 	 */
5162 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5163 		/* record what we are flow controlled on */
5164 		if (tep->te_flowq != NULL) {
5165 			list_remove(&tep->te_flowq->te_flowlist, tep);
5166 		}
5167 		list_insert_head(&peer_tep->te_flowlist, tep);
5168 		tep->te_flowq = peer_tep;
5169 		TL_PUTBQ(tep, mp);
5170 		return;
5171 	}
5172 	/*
5173 	 * prepare indication message
5174 	 */
5175 
5176 	/*
5177 	 * calculate length of message
5178 	 */
5179 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5180 		cr = msg_getcred(mp, &cpid);
5181 		ASSERT(cr != NULL);
5182 
5183 		if (peer_tep->te_flag & TL_SETCRED) {
5184 			ASSERT(olen == 0);
5185 			olen = (t_scalar_t)sizeof (struct opthdr) +
5186 			    OPTLEN(sizeof (tl_credopt_t));
5187 						/* 1 option only */
5188 		} else if (peer_tep->te_flag & TL_SETUCRED) {
5189 			ASSERT(olen == 0);
5190 			olen = (t_scalar_t)sizeof (struct opthdr) +
5191 			    OPTLEN(ucredminsize(cr));
5192 						/* 1 option only */
5193 		} else {
5194 			/* Possibly more than one option */
5195 			olen += (t_scalar_t)sizeof (struct T_opthdr) +
5196 			    OPTLEN(ucredminsize(cr));
5197 		}
5198 	}
5199 
5200 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5201 	reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5202 
5203 	/*
5204 	 * If the unitdata_ind fits and we are not adding options
5205 	 * reuse the udreq mblk.
5206 	 *
5207 	 * Otherwise, it is possible we need to append an option if one of the
5208 	 * te_flag bits is set. This requires extra space in the data block for
5209 	 * the additional option but the traditional technique used below to
5210 	 * allocate a new block and copy into it will not work when there is a
5211 	 * message block with a free pointer (since we don't know anything
5212 	 * about the layout of the data, pointers referencing or within the
5213 	 * data, etc.). To handle this possibility the upper layers may have
5214 	 * preallocated some space to use for appending an option. We check the
5215 	 * overall mblock size against the size we need ('reuse_mb_sz' with the
5216 	 * original address length [alen] to ensure we won't overrun the
5217 	 * current mblk data size) to see if there is free space and thus
5218 	 * avoid allocating a new message block.
5219 	 */
5220 	if (msz >= ui_sz && alen >= tep->te_alen &&
5221 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5222 		/*
5223 		 * Reuse the original mblk. Leave options in place.
5224 		 */
5225 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5226 		udind->PRIM_type = T_UNITDATA_IND;
5227 		udind->SRC_length = tep->te_alen;
5228 		addr_startp = mp->b_rptr + udind->SRC_offset;
5229 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5230 
5231 	} else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5232 	    mp->b_datap->db_frtnp != NULL) {
5233 		/*
5234 		 * We have a message block with a free pointer, but extra space
5235 		 * has been pre-allocated for us in case we need to append an
5236 		 * option. Reuse the original mblk, leaving existing options in
5237 		 * place.
5238 		 */
5239 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5240 		udind->PRIM_type = T_UNITDATA_IND;
5241 		udind->SRC_length = tep->te_alen;
5242 		addr_startp = mp->b_rptr + udind->SRC_offset;
5243 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5244 
5245 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5246 			ASSERT(cr != NULL);
5247 			/*
5248 			 * We're appending one new option here after the
5249 			 * original ones.
5250 			 */
5251 			tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5252 			    cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5253 		}
5254 
5255 	} else if (mp->b_datap->db_frtnp != NULL) {
5256 		/*
5257 		 * The next block creates a new mp and tries to copy the data
5258 		 * block into it, but that cannot handle a message with a free
5259 		 * pointer (for more details see the comment in kstrputmsg()
5260 		 * where dupmsg() is called). Since we can never properly
5261 		 * duplicate the mp while also extending the data, just error
5262 		 * out now.
5263 		 */
5264 		tl_uderr(wq, mp, EPROTO);
5265 		return;
5266 	} else {
5267 		/* Allocate a new T_unitdata_ind message */
5268 		mblk_t *ui_mp;
5269 
5270 		ui_mp = allocb(ui_sz, BPRI_MED);
5271 		if (! ui_mp) {
5272 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5273 			    "tl_unitdata:allocb failure:message queued"));
5274 			tl_memrecover(wq, mp, ui_sz);
5275 			return;
5276 		}
5277 
5278 		/*
5279 		 * fill in T_UNITDATA_IND contents
5280 		 */
5281 		DB_TYPE(ui_mp) = M_PROTO;
5282 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5283 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5284 		udind->PRIM_type = T_UNITDATA_IND;
5285 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5286 		udind->SRC_length = tep->te_alen;
5287 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5288 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5289 		udind->OPT_offset =
5290 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5291 		udind->OPT_length = olen;
5292 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5293 
5294 			if (oldolen != 0) {
5295 				bcopy((void *)((uintptr_t)udreq + ooff),
5296 				    (void *)((uintptr_t)udind +
5297 				    udind->OPT_offset),
5298 				    oldolen);
5299 			}
5300 			ASSERT(cr != NULL);
5301 
5302 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5303 			    oldolen, cr, cpid,
5304 			    peer_tep->te_flag, peer_tep->te_credp);
5305 		} else {
5306 			bcopy((void *)((uintptr_t)udreq + ooff),
5307 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5308 			    olen);
5309 		}
5310 
5311 		/*
5312 		 * relink data blocks from mp to ui_mp
5313 		 */
5314 		ui_mp->b_cont = mp->b_cont;
5315 		freeb(mp);
5316 		mp = ui_mp;
5317 	}
5318 	/*
5319 	 * send indication message
5320 	 */
5321 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5322 	putnext(peer_tep->te_rq, mp);
5323 }
5324 
5325 
5326 
5327 /*
5328  * Check if a given addr is in use.
5329  * Endpoint ptr returned or NULL if not found.
5330  * The name space is separate for each mode. This implies that
5331  * sockets get their own name space.
5332  */
5333 static tl_endpt_t *
5334 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5335 {
5336 	tl_endpt_t *peer_tep = NULL;
5337 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5338 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5339 
5340 	ASSERT(! IS_SOCKET(tep));
5341 
5342 	ASSERT(ap != NULL && ap->ta_alen > 0);
5343 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5344 	ASSERT(ap->ta_abuf != NULL);
5345 	EQUIV(rc == 0, peer_tep != NULL);
5346 	IMPLY(rc == 0,
5347 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5348 	    (tep->te_transport == peer_tep->te_transport));
5349 
5350 	if ((rc == 0) && (peer_tep->te_closing)) {
5351 		tl_refrele(peer_tep);
5352 		peer_tep = NULL;
5353 	}
5354 
5355 	return (peer_tep);
5356 }
5357 
5358 /*
5359  * Find peer for a socket based on unix domain address.
5360  * For implicit addresses our peer can be found by minor number in ai hash. For
5361  * explicit binds we look vnode address at addr_hash.
5362  */
5363 static tl_endpt_t *
5364 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5365 {
5366 	tl_endpt_t *peer_tep = NULL;
5367 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5368 	    tep->te_aihash : tep->te_addrhash;
5369 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5370 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5371 
5372 	ASSERT(IS_SOCKET(tep));
5373 	EQUIV(rc == 0, peer_tep != NULL);
5374 	IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5375 
5376 	if (peer_tep != NULL) {
5377 		/* Don't attempt to use closing peer. */
5378 		if (peer_tep->te_closing)
5379 			goto errout;
5380 
5381 		/*
5382 		 * Cross-zone unix sockets are permitted, but for Trusted
5383 		 * Extensions only, the "server" for these must be in the
5384 		 * global zone.
5385 		 */
5386 		if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5387 		    is_system_labeled() &&
5388 		    (peer_tep->te_zoneid != GLOBAL_ZONEID))
5389 			goto errout;
5390 	}
5391 
5392 	return (peer_tep);
5393 
5394 errout:
5395 	tl_refrele(peer_tep);
5396 	return (NULL);
5397 }
5398 
5399 /*
5400  * Generate a free addr and return it in struct pointed by ap
5401  * but allocating space for address buffer.
5402  * The generated address will be at least 4 bytes long and, if req->ta_alen
5403  * exceeds 4 bytes, be req->ta_alen bytes long.
5404  *
5405  * If address is found it will be inserted in the hash.
5406  *
5407  * If req->ta_alen is larger than the default alen (4 bytes) the last
5408  * alen-4 bytes will always be the same as in req.
5409  *
5410  * Return 0 for failure.
5411  * Return non-zero for success.
5412  */
5413 static boolean_t
5414 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5415 {
5416 	t_scalar_t	alen;
5417 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5418 
5419 	ASSERT(tep->te_hash_hndl != NULL);
5420 	ASSERT(! IS_SOCKET(tep));
5421 
5422 	if (tep->te_hash_hndl == NULL)
5423 		return (B_FALSE);
5424 
5425 	/*
5426 	 * check if default addr is in use
5427 	 * if it is - bump it and try again
5428 	 */
5429 	if (req == NULL) {
5430 		alen = sizeof (uint32_t);
5431 	} else {
5432 		alen = max(req->ta_alen, sizeof (uint32_t));
5433 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5434 	}
5435 
5436 	if (tep->te_alen < alen) {
5437 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5438 
5439 		/*
5440 		 * Not enough space in tep->ta_ap to hold the address,
5441 		 * allocate a bigger space.
5442 		 */
5443 		if (abuf == NULL)
5444 			return (B_FALSE);
5445 
5446 		if (tep->te_alen > 0)
5447 			kmem_free(tep->te_abuf, tep->te_alen);
5448 
5449 		tep->te_alen = alen;
5450 		tep->te_abuf = abuf;
5451 	}
5452 
5453 	/* Copy in the address in req */
5454 	if (req != NULL) {
5455 		ASSERT(alen >= req->ta_alen);
5456 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5457 	}
5458 
5459 	/*
5460 	 * First try minor number then try default addresses.
5461 	 */
5462 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5463 
5464 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5465 		if (mod_hash_insert_reserve(tep->te_addrhash,
5466 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5467 		    tep->te_hash_hndl) == 0) {
5468 			/*
5469 			 * found free address
5470 			 */
5471 			tep->te_flag |= TL_ADDRHASHED;
5472 			tep->te_hash_hndl = NULL;
5473 
5474 			return (B_TRUE); /* successful return */
5475 		}
5476 		/*
5477 		 * Use default address.
5478 		 */
5479 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5480 		atomic_inc_32(&tep->te_defaddr);
5481 	}
5482 
5483 	/*
5484 	 * Failed to find anything.
5485 	 */
5486 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5487 	    "tl_get_any_addr:looped 2^32 times"));
5488 	return (B_FALSE);
5489 }
5490 
5491 /*
5492  * reallocb + set r/w ptrs to reflect size.
5493  */
5494 static mblk_t *
5495 tl_resizemp(mblk_t *mp, ssize_t new_size)
5496 {
5497 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5498 		return (NULL);
5499 
5500 	mp->b_rptr = DB_BASE(mp);
5501 	mp->b_wptr = mp->b_rptr + new_size;
5502 	return (mp);
5503 }
5504 
5505 static void
5506 tl_cl_backenable(tl_endpt_t *tep)
5507 {
5508 	list_t *l = &tep->te_flowlist;
5509 	tl_endpt_t *elp;
5510 
5511 	ASSERT(IS_CLTS(tep));
5512 
5513 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5514 		ASSERT(tep->te_ser == elp->te_ser);
5515 		ASSERT(elp->te_flowq == tep);
5516 		if (! elp->te_closing)
5517 			TL_QENABLE(elp);
5518 		elp->te_flowq = NULL;
5519 		list_remove(l, elp);
5520 	}
5521 }
5522 
5523 /*
5524  * Unconnect endpoints.
5525  */
5526 static void
5527 tl_co_unconnect(tl_endpt_t *tep)
5528 {
5529 	tl_endpt_t	*peer_tep = tep->te_conp;
5530 	tl_endpt_t	*srv_tep = tep->te_oconp;
5531 	list_t		*l;
5532 	tl_icon_t  	*tip;
5533 	tl_endpt_t	*cl_tep;
5534 	mblk_t		*d_mp;
5535 
5536 	ASSERT(IS_COTS(tep));
5537 	/*
5538 	 * If our peer is closing, don't use it.
5539 	 */
5540 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5541 		TL_UNCONNECT(tep->te_conp);
5542 		peer_tep = NULL;
5543 	}
5544 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5545 		TL_UNCONNECT(tep->te_oconp);
5546 		srv_tep = NULL;
5547 	}
5548 
5549 	if (tep->te_nicon > 0) {
5550 		l = &tep->te_iconp;
5551 		/*
5552 		 * If incoming requests pending, change state
5553 		 * of clients on disconnect ind event and send
5554 		 * discon_ind pdu to modules above them
5555 		 * for server: all clients get disconnect
5556 		 */
5557 
5558 		while (tep->te_nicon > 0) {
5559 			tip    = list_head(l);
5560 			cl_tep = tip->ti_tep;
5561 
5562 			if (cl_tep == NULL) {
5563 				tl_freetip(tep, tip);
5564 				continue;
5565 			}
5566 
5567 			if (cl_tep->te_oconp != NULL) {
5568 				ASSERT(cl_tep != cl_tep->te_oconp);
5569 				TL_UNCONNECT(cl_tep->te_oconp);
5570 			}
5571 
5572 			if (cl_tep->te_closing) {
5573 				tl_freetip(tep, tip);
5574 				continue;
5575 			}
5576 
5577 			enableok(cl_tep->te_wq);
5578 			TL_QENABLE(cl_tep);
5579 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5580 			if (d_mp != NULL) {
5581 				cl_tep->te_state = TS_IDLE;
5582 				putnext(cl_tep->te_rq, d_mp);
5583 			} else {
5584 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5585 				    SL_TRACE|SL_ERROR,
5586 				    "tl_co_unconnect:icmng: "
5587 				    "allocb failure"));
5588 			}
5589 			tl_freetip(tep, tip);
5590 		}
5591 	} else if (srv_tep != NULL) {
5592 		/*
5593 		 * If outgoing request pending, change state
5594 		 * of server on discon ind event
5595 		 */
5596 
5597 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5598 		    IS_COTSORD(srv_tep) &&
5599 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5600 			/*
5601 			 * Queue ordrel_ind for server to be picked up
5602 			 * when the connection is accepted.
5603 			 */
5604 			d_mp = tl_ordrel_ind_alloc();
5605 		} else {
5606 			/*
5607 			 * send discon_ind to server
5608 			 */
5609 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5610 		}
5611 		if (d_mp == NULL) {
5612 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5613 			    SL_TRACE|SL_ERROR,
5614 			    "tl_co_unconnect:outgoing:allocb failure"));
5615 			TL_UNCONNECT(tep->te_oconp);
5616 			goto discon_peer;
5617 		}
5618 
5619 		/*
5620 		 * If this is a socket the T_DISCON_IND is queued with
5621 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5622 		 * from the list of pending connections.
5623 		 * Note that when te_oconp is set the peer better have
5624 		 * a t_connind_t for the client.
5625 		 */
5626 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5627 			/*
5628 			 * Queue the disconnection message.
5629 			 */
5630 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5631 		} else {
5632 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5633 			if (tip == NULL) {
5634 				freemsg(d_mp);
5635 			} else {
5636 				ASSERT(tep == tip->ti_tep);
5637 				ASSERT(tep->te_ser == srv_tep->te_ser);
5638 				/*
5639 				 * Delete tip from the server list.
5640 				 */
5641 				if (srv_tep->te_nicon == 1) {
5642 					srv_tep->te_state =
5643 					    NEXTSTATE(TE_DISCON_IND2,
5644 					    srv_tep->te_state);
5645 				} else {
5646 					srv_tep->te_state =
5647 					    NEXTSTATE(TE_DISCON_IND3,
5648 					    srv_tep->te_state);
5649 				}
5650 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5651 				    T_DISCON_IND);
5652 				putnext(srv_tep->te_rq, d_mp);
5653 				tl_freetip(srv_tep, tip);
5654 			}
5655 			TL_UNCONNECT(tep->te_oconp);
5656 			srv_tep = NULL;
5657 		}
5658 	} else if (peer_tep != NULL) {
5659 		/*
5660 		 * unconnect existing connection
5661 		 * If connected, change state of peer on
5662 		 * discon ind event and send discon ind pdu
5663 		 * to module above it
5664 		 */
5665 
5666 		ASSERT(tep->te_ser == peer_tep->te_ser);
5667 		if (IS_COTSORD(peer_tep) &&
5668 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5669 		    peer_tep->te_state == TS_DATA_XFER)) {
5670 			/*
5671 			 * send ordrel ind
5672 			 */
5673 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5674 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5675 			    peer_tep->te_state,
5676 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5677 			d_mp = tl_ordrel_ind_alloc();
5678 			if (! d_mp) {
5679 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5680 				    SL_TRACE|SL_ERROR,
5681 				    "tl_co_unconnect:connected:"
5682 				    "allocb failure"));
5683 				/*
5684 				 * Continue with cleaning up peer as
5685 				 * this side may go away with the close
5686 				 */
5687 				TL_QENABLE(peer_tep);
5688 				goto discon_peer;
5689 			}
5690 			peer_tep->te_state =
5691 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5692 
5693 			putnext(peer_tep->te_rq, d_mp);
5694 			/*
5695 			 * Handle flow control case.  This will generate
5696 			 * a t_discon_ind message with reason 0 if there
5697 			 * is data queued on the write side.
5698 			 */
5699 			TL_QENABLE(peer_tep);
5700 		} else if (IS_COTSORD(peer_tep) &&
5701 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5702 			/*
5703 			 * Sent an ordrel_ind. We send a discon with
5704 			 * with error 0 to inform that the peer is gone.
5705 			 */
5706 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5707 			    SL_TRACE|SL_ERROR,
5708 			    "tl_co_unconnect: discon in state %d",
5709 			    tep->te_state));
5710 			tl_discon_ind(peer_tep, 0);
5711 		} else {
5712 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5713 			    SL_TRACE|SL_ERROR,
5714 			    "tl_co_unconnect: state %d", tep->te_state));
5715 			tl_discon_ind(peer_tep, ECONNRESET);
5716 		}
5717 
5718 discon_peer:
5719 		/*
5720 		 * Disconnect cross-pointers only for close
5721 		 */
5722 		if (tep->te_closing) {
5723 			peer_tep = tep->te_conp;
5724 			TL_REMOVE_PEER(peer_tep->te_conp);
5725 			TL_REMOVE_PEER(tep->te_conp);
5726 		}
5727 	}
5728 }
5729 
5730 /*
5731  * Note: The following routine does not recover from allocb()
5732  * failures
5733  * The reason should be from the <sys/errno.h> space.
5734  */
5735 static void
5736 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5737 {
5738 	mblk_t *d_mp;
5739 
5740 	if (tep->te_closing)
5741 		return;
5742 
5743 	/*
5744 	 * flush the queues.
5745 	 */
5746 	flushq(tep->te_rq, FLUSHDATA);
5747 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5748 
5749 	/*
5750 	 * send discon ind
5751 	 */
5752 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5753 	if (! d_mp) {
5754 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5755 		    "tl_discon_ind:allocb failure"));
5756 		return;
5757 	}
5758 	tep->te_state = TS_IDLE;
5759 	putnext(tep->te_rq, d_mp);
5760 }
5761 
5762 /*
5763  * Note: The following routine does not recover from allocb()
5764  * failures
5765  * The reason should be from the <sys/errno.h> space.
5766  */
5767 static mblk_t *
5768 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5769 {
5770 	mblk_t *mp;
5771 	struct T_discon_ind *tdi;
5772 
5773 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5774 		DB_TYPE(mp) = M_PROTO;
5775 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5776 		tdi = (struct T_discon_ind *)mp->b_rptr;
5777 		tdi->PRIM_type = T_DISCON_IND;
5778 		tdi->DISCON_reason = reason;
5779 		tdi->SEQ_number = seqnum;
5780 	}
5781 	return (mp);
5782 }
5783 
5784 
5785 /*
5786  * Note: The following routine does not recover from allocb()
5787  * failures
5788  */
5789 static mblk_t *
5790 tl_ordrel_ind_alloc(void)
5791 {
5792 	mblk_t *mp;
5793 	struct T_ordrel_ind *toi;
5794 
5795 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5796 		DB_TYPE(mp) = M_PROTO;
5797 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5798 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5799 		toi->PRIM_type = T_ORDREL_IND;
5800 	}
5801 	return (mp);
5802 }
5803 
5804 
5805 /*
5806  * Lookup the seqno in the list of queued connections.
5807  */
5808 static tl_icon_t *
5809 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5810 {
5811 	list_t *l = &tep->te_iconp;
5812 	tl_icon_t *tip = list_head(l);
5813 
5814 	ASSERT(seqno != 0);
5815 
5816 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5817 		;
5818 
5819 	return (tip);
5820 }
5821 
5822 /*
5823  * Queue data for a given T_CONN_IND while verifying that redundant
5824  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5825  * Used when the originator of the connection closes.
5826  */
5827 static void
5828 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5829 {
5830 	tl_icon_t		*tip;
5831 	mblk_t			**mpp, *mp;
5832 	int			prim, nprim;
5833 
5834 	if (nmp->b_datap->db_type == M_PROTO)
5835 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5836 	else
5837 		nprim = -1;	/* M_DATA */
5838 
5839 	tip = tl_icon_find(tep, seqno);
5840 	if (tip == NULL) {
5841 		freemsg(nmp);
5842 		return;
5843 	}
5844 
5845 	ASSERT(tip->ti_seqno != 0);
5846 	mpp = &tip->ti_mp;
5847 	while (*mpp != NULL) {
5848 		mp = *mpp;
5849 
5850 		if (mp->b_datap->db_type == M_PROTO)
5851 			prim = ((union T_primitives *)mp->b_rptr)->type;
5852 		else
5853 			prim = -1;	/* M_DATA */
5854 
5855 		/*
5856 		 * Allow nothing after a T_DISCON_IND
5857 		 */
5858 		if (prim == T_DISCON_IND) {
5859 			freemsg(nmp);
5860 			return;
5861 		}
5862 		/*
5863 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5864 		 */
5865 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5866 			freemsg(nmp);
5867 			return;
5868 		}
5869 		mpp = &(mp->b_next);
5870 	}
5871 	*mpp = nmp;
5872 }
5873 
5874 /*
5875  * Verify if a certain TPI primitive exists on the connind queue.
5876  * Use prim -1 for M_DATA.
5877  * Return non-zero if found.
5878  */
5879 static boolean_t
5880 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5881 {
5882 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5883 	boolean_t found = B_FALSE;
5884 
5885 	if (tip != NULL) {
5886 		mblk_t *mp;
5887 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5888 			found = (DB_TYPE(mp) == M_PROTO &&
5889 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5890 		}
5891 	}
5892 	return (found);
5893 }
5894 
5895 /*
5896  * Send the b_next mblk chain that has accumulated before the connection
5897  * was accepted. Perform the necessary state transitions.
5898  */
5899 static void
5900 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5901 {
5902 	mblk_t			*mp;
5903 	union T_primitives	*primp;
5904 
5905 	if (tep->te_closing) {
5906 		tl_icon_freemsgs(mpp);
5907 		return;
5908 	}
5909 
5910 	ASSERT(tep->te_state == TS_DATA_XFER);
5911 	ASSERT(tep->te_rq->q_first == NULL);
5912 
5913 	while ((mp = *mpp) != NULL) {
5914 		*mpp = mp->b_next;
5915 		mp->b_next = NULL;
5916 
5917 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5918 		switch (DB_TYPE(mp)) {
5919 		default:
5920 			freemsg(mp);
5921 			break;
5922 		case M_DATA:
5923 			putnext(tep->te_rq, mp);
5924 			break;
5925 		case M_PROTO:
5926 			primp = (union T_primitives *)mp->b_rptr;
5927 			switch (primp->type) {
5928 			case T_UNITDATA_IND:
5929 			case T_DATA_IND:
5930 			case T_OPTDATA_IND:
5931 			case T_EXDATA_IND:
5932 				putnext(tep->te_rq, mp);
5933 				break;
5934 			case T_ORDREL_IND:
5935 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5936 				    tep->te_state);
5937 				putnext(tep->te_rq, mp);
5938 				break;
5939 			case T_DISCON_IND:
5940 				tep->te_state = TS_IDLE;
5941 				putnext(tep->te_rq, mp);
5942 				break;
5943 			default:
5944 #ifdef DEBUG
5945 				cmn_err(CE_PANIC,
5946 				    "tl_icon_sendmsgs: unknown primitive");
5947 #endif /* DEBUG */
5948 				freemsg(mp);
5949 				break;
5950 			}
5951 			break;
5952 		}
5953 	}
5954 }
5955 
5956 /*
5957  * Free the b_next mblk chain that has accumulated before the connection
5958  * was accepted.
5959  */
5960 static void
5961 tl_icon_freemsgs(mblk_t **mpp)
5962 {
5963 	mblk_t *mp;
5964 
5965 	while ((mp = *mpp) != NULL) {
5966 		*mpp = mp->b_next;
5967 		mp->b_next = NULL;
5968 		freemsg(mp);
5969 	}
5970 }
5971 
5972 /*
5973  * Send M_ERROR
5974  * Note: assumes caller ensured enough space in mp or enough
5975  *	memory available. Does not attempt recovery from allocb()
5976  *	failures
5977  */
5978 
5979 static void
5980 tl_merror(queue_t *wq, mblk_t *mp, int error)
5981 {
5982 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5983 
5984 	if (tep->te_closing) {
5985 		freemsg(mp);
5986 		return;
5987 	}
5988 
5989 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5990 	    SL_TRACE|SL_ERROR,
5991 	    "tl_merror: tep=%p, err=%d", (void *)tep, error));
5992 
5993 	/*
5994 	 * flush all messages on queue. we are shutting
5995 	 * the stream down on fatal error
5996 	 */
5997 	flushq(wq, FLUSHALL);
5998 	if (IS_COTS(tep)) {
5999 		/* connection oriented - unconnect endpoints */
6000 		tl_co_unconnect(tep);
6001 	}
6002 	if (mp->b_cont) {
6003 		freemsg(mp->b_cont);
6004 		mp->b_cont = NULL;
6005 	}
6006 
6007 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6008 		freemsg(mp);
6009 		mp = allocb(1, BPRI_HI);
6010 		if (!mp) {
6011 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6012 			    SL_TRACE|SL_ERROR,
6013 			    "tl_merror:M_PROTO: out of memory"));
6014 			return;
6015 		}
6016 	}
6017 	if (mp) {
6018 		DB_TYPE(mp) = M_ERROR;
6019 		mp->b_rptr = DB_BASE(mp);
6020 		*mp->b_rptr = (char)error;
6021 		mp->b_wptr = mp->b_rptr + sizeof (char);
6022 		qreply(wq, mp);
6023 	} else {
6024 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
6025 	}
6026 }
6027 
6028 static void
6029 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6030 {
6031 	ASSERT(cr != NULL);
6032 
6033 	if (flag & TL_SETCRED) {
6034 		struct opthdr *opt = (struct opthdr *)buf;
6035 		tl_credopt_t *tlcred;
6036 
6037 		opt->level = TL_PROT_LEVEL;
6038 		opt->name = TL_OPT_PEER_CRED;
6039 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6040 
6041 		tlcred = (tl_credopt_t *)(opt + 1);
6042 		tlcred->tc_uid = crgetuid(cr);
6043 		tlcred->tc_gid = crgetgid(cr);
6044 		tlcred->tc_ruid = crgetruid(cr);
6045 		tlcred->tc_rgid = crgetrgid(cr);
6046 		tlcred->tc_suid = crgetsuid(cr);
6047 		tlcred->tc_sgid = crgetsgid(cr);
6048 		tlcred->tc_ngroups = crgetngroups(cr);
6049 	} else if (flag & TL_SETUCRED) {
6050 		struct opthdr *opt = (struct opthdr *)buf;
6051 
6052 		opt->level = TL_PROT_LEVEL;
6053 		opt->name = TL_OPT_PEER_UCRED;
6054 		opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6055 
6056 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6057 	} else {
6058 		struct T_opthdr *topt = (struct T_opthdr *)buf;
6059 		ASSERT(flag & TL_SOCKUCRED);
6060 
6061 		topt->level = SOL_SOCKET;
6062 		topt->name = SCM_UCRED;
6063 		topt->len = ucredminsize(cr) + sizeof (*topt);
6064 		topt->status = 0;
6065 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6066 	}
6067 }
6068 
6069 /* ARGSUSED */
6070 static int
6071 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6072 {
6073 	/* no default value processed in protocol specific code currently */
6074 	return (-1);
6075 }
6076 
6077 /* ARGSUSED */
6078 static int
6079 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6080 {
6081 	int len;
6082 	tl_endpt_t *tep;
6083 	int *valp;
6084 
6085 	tep = (tl_endpt_t *)wq->q_ptr;
6086 
6087 	len = 0;
6088 
6089 	/*
6090 	 * Assumes: option level and name sanity check done elsewhere
6091 	 */
6092 
6093 	switch (level) {
6094 	case SOL_SOCKET:
6095 		if (! IS_SOCKET(tep))
6096 			break;
6097 		switch (name) {
6098 		case SO_RECVUCRED:
6099 			len = sizeof (int);
6100 			valp = (int *)ptr;
6101 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6102 			break;
6103 		default:
6104 			break;
6105 		}
6106 		break;
6107 	case TL_PROT_LEVEL:
6108 		switch (name) {
6109 		case TL_OPT_PEER_CRED:
6110 		case TL_OPT_PEER_UCRED:
6111 			/*
6112 			 * option not supposed to retrieved directly
6113 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6114 			 * when some internal flags set by other options
6115 			 * Direct retrieval always designed to fail(ignored)
6116 			 * for this option.
6117 			 */
6118 			break;
6119 		}
6120 	}
6121 	return (len);
6122 }
6123 
6124 /* ARGSUSED */
6125 static int
6126 tl_set_opt(
6127 	queue_t		*wq,
6128 	uint_t		mgmt_flags,
6129 	int		level,
6130 	int		name,
6131 	uint_t		inlen,
6132 	uchar_t		*invalp,
6133 	uint_t		*outlenp,
6134 	uchar_t		*outvalp,
6135 	void		*thisdg_attrs,
6136 	cred_t		*cr)
6137 {
6138 	int error;
6139 	tl_endpt_t *tep;
6140 
6141 	tep = (tl_endpt_t *)wq->q_ptr;
6142 
6143 	error = 0;		/* NOERROR */
6144 
6145 	/*
6146 	 * Assumes: option level and name sanity checks done elsewhere
6147 	 */
6148 
6149 	switch (level) {
6150 	case SOL_SOCKET:
6151 		if (! IS_SOCKET(tep)) {
6152 			error = EINVAL;
6153 			break;
6154 		}
6155 		/*
6156 		 * TBD: fill in other AF_UNIX socket options and then stop
6157 		 * returning error.
6158 		 */
6159 		switch (name) {
6160 		case SO_RECVUCRED:
6161 			/*
6162 			 * We only support this for datagram sockets;
6163 			 * getpeerucred handles the connection oriented
6164 			 * transports.
6165 			 */
6166 			if (! IS_CLTS(tep)) {
6167 				error = EINVAL;
6168 				break;
6169 			}
6170 			if (*(int *)invalp == 0)
6171 				tep->te_flag &= ~TL_SOCKUCRED;
6172 			else
6173 				tep->te_flag |= TL_SOCKUCRED;
6174 			break;
6175 		default:
6176 			error = EINVAL;
6177 			break;
6178 		}
6179 		break;
6180 	case TL_PROT_LEVEL:
6181 		switch (name) {
6182 		case TL_OPT_PEER_CRED:
6183 		case TL_OPT_PEER_UCRED:
6184 			/*
6185 			 * option not supposed to be set directly
6186 			 * Its value in initialized for each endpoint at
6187 			 * driver open time.
6188 			 * Direct setting always designed to fail for this
6189 			 * option.
6190 			 */
6191 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6192 			    SL_TRACE|SL_ERROR,
6193 			    "tl_set_opt: option is not supported"));
6194 			error = EPROTO;
6195 			break;
6196 		}
6197 	}
6198 	return (error);
6199 }
6200 
6201 
6202 static void
6203 tl_timer(void *arg)
6204 {
6205 	queue_t *wq = arg;
6206 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6207 
6208 	ASSERT(tep);
6209 
6210 	tep->te_timoutid = 0;
6211 
6212 	enableok(wq);
6213 	/*
6214 	 * Note: can call wsrv directly here and save context switch
6215 	 * Consider change when qtimeout (not timeout) is active
6216 	 */
6217 	qenable(wq);
6218 }
6219 
6220 static void
6221 tl_buffer(void *arg)
6222 {
6223 	queue_t *wq = arg;
6224 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6225 
6226 	ASSERT(tep);
6227 
6228 	tep->te_bufcid = 0;
6229 	tep->te_nowsrv = B_FALSE;
6230 
6231 	enableok(wq);
6232 	/*
6233 	 *  Note: can call wsrv directly here and save context switch
6234 	 * Consider change when qbufcall (not bufcall) is active
6235 	 */
6236 	qenable(wq);
6237 }
6238 
6239 static void
6240 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6241 {
6242 	tl_endpt_t *tep;
6243 
6244 	tep = (tl_endpt_t *)wq->q_ptr;
6245 
6246 	if (tep->te_closing) {
6247 		freemsg(mp);
6248 		return;
6249 	}
6250 	noenable(wq);
6251 
6252 	(void) insq(wq, wq->q_first, mp);
6253 
6254 	if (tep->te_bufcid || tep->te_timoutid) {
6255 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6256 		    "tl_memrecover:recover %p pending", (void *)wq));
6257 		return;
6258 	}
6259 
6260 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6261 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6262 		    drv_usectohz(TL_BUFWAIT));
6263 	}
6264 }
6265 
6266 static void
6267 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6268 {
6269 	ASSERT(tip->ti_seqno != 0);
6270 
6271 	if (tip->ti_mp != NULL) {
6272 		tl_icon_freemsgs(&tip->ti_mp);
6273 		tip->ti_mp = NULL;
6274 	}
6275 	if (tip->ti_tep != NULL) {
6276 		tl_refrele(tip->ti_tep);
6277 		tip->ti_tep = NULL;
6278 	}
6279 	list_remove(&tep->te_iconp, tip);
6280 	kmem_free(tip, sizeof (tl_icon_t));
6281 	tep->te_nicon--;
6282 }
6283 
6284 /*
6285  * Remove address from address hash.
6286  */
6287 static void
6288 tl_addr_unbind(tl_endpt_t *tep)
6289 {
6290 	tl_endpt_t *elp;
6291 
6292 	if (tep->te_flag & TL_ADDRHASHED) {
6293 		if (IS_SOCKET(tep)) {
6294 			(void) mod_hash_remove(tep->te_addrhash,
6295 			    (mod_hash_key_t)tep->te_vp,
6296 			    (mod_hash_val_t *)&elp);
6297 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6298 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6299 		} else {
6300 			(void) mod_hash_remove(tep->te_addrhash,
6301 			    (mod_hash_key_t)&tep->te_ap,
6302 			    (mod_hash_val_t *)&elp);
6303 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6304 			tep->te_alen = -1;
6305 			tep->te_abuf = NULL;
6306 		}
6307 		tep->te_flag &= ~TL_ADDRHASHED;
6308 	}
6309 }
6310