xref: /illumos-gate/usr/src/uts/common/io/tl.c (revision 66582b606a8194f7f3ba5b3a3a6dca5b0d346361)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
27  * Copyright (c) 2012 by Delphix. All rights reserved.
28  * Copyright (c) 2018, Joyent, Inc.
29  */
30 
31 /*
32  * Multithreaded STREAMS Local Transport Provider.
33  *
34  * OVERVIEW
35  * ========
36  *
37  * This driver provides TLI as well as socket semantics.  It provides
38  * connectionless, connection oriented, and connection oriented with orderly
39  * release transports for TLI and sockets. Each transport type has separate name
40  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
41  * this removes any name space conflicts when binding to socket style transport
42  * addresses.
43  *
44  * NOTE: There is one exception: Socket ticots and ticotsord transports share
45  * the same namespace. In fact, sockets always use ticotsord type transport.
46  *
47  * The driver mode is specified during open() by the minor number used for
48  * open.
49  *
50  *  The sockets in addition have the following semantic differences:
51  *  No support for passing up credentials (TL_SET[U]CRED).
52  *
53  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
54  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
55  *	T_OPTDATA_IND.
56  *
57  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
58  *	a T_CONN_RES is received from the acceptor. This means that a socket
59  *	connect will complete before the peer has called accept.
60  *
61  *
62  * MULTITHREADING
63  * ==============
64  *
65  * The driver does not use STREAMS protection mechanisms. Instead it uses a
66  * generic "serializer" abstraction. Most of the operations are executed behind
67  * the serializer and are, essentially single-threaded. All functions executed
68  * behind the same serializer are strictly serialized. So if one thread calls
69  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
70  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
71  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
72  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
73  * same time.
74  *
75  * Connectionless transport use a single serializer per transport type (one for
76  * TLI and one for sockets. Connection-oriented transports use finer-grained
77  * serializers.
78  *
79  * All COTS-type endpoints start their life with private serializers. During
80  * connection request processing the endpoint serializer is switched to the
81  * listener's serializer and the rest of T_CONN_REQ processing is done on the
82  * listener serializer. During T_CONN_RES processing the eager serializer is
83  * switched from listener to acceptor serializer and after that point all
84  * processing for eager and acceptor happens on this serializer. To avoid races
85  * with endpoint closes while its serializer may be changing closes are blocked
86  * while serializers are manipulated.
87  *
88  * References accounting
89  * ---------------------
90  *
91  * Endpoints are reference counted and freed when the last reference is
92  * dropped. Functions within the serializer may access an endpoint state even
93  * after an endpoint closed. The te_closing being set on the endpoint indicates
94  * that the endpoint entered its close routine.
95  *
96  * One reference is held for each opened endpoint instance. The reference
97  * counter is incremented when the endpoint is linked to another endpoint and
98  * decremented when the link disappears. It is also incremented when the
99  * endpoint is found by the hash table lookup. This increment is atomic with the
100  * lookup itself and happens while the hash table read lock is held.
101  *
102  * Close synchronization
103  * ---------------------
104  *
105  * During close the endpoint as marked as closing using te_closing flag. It is
106  * usually enough to check for te_closing flag since all other state changes
107  * happen after this flag is set and the close entered serializer. Immediately
108  * after setting te_closing flag tl_close() enters serializer and waits until
109  * the callback finishes. This allows all functions called within serializer to
110  * simply check te_closing without any locks.
111  *
112  * Serializer management.
113  * ---------------------
114  *
115  * For COTS transports serializers are created when the endpoint is constructed
116  * and destroyed when the endpoint is destructed. CLTS transports use global
117  * serializers - one for sockets and one for TLI.
118  *
119  * COTS serializers have separate reference counts to deal with several
120  * endpoints sharing the same serializer. There is a subtle problem related to
121  * the serializer destruction. The serializer should never be destroyed by any
122  * function executed inside serializer. This means that close has to wait till
123  * all serializer activity for this endpoint is finished before it can drop the
124  * last reference on the endpoint (which may as well free the serializer).  This
125  * is only relevant for COTS transports which manage serializers
126  * dynamically. For CLTS transports close may complete without waiting for all
127  * serializer activity to finish since serializer is only destroyed at driver
128  * detach time.
129  *
130  * COTS endpoints keep track of the number of outstanding requests on the
131  * serializer for the endpoint. The code handling accept() avoids changing
132  * client serializer if it has any pending messages on the serializer and
133  * instead moves acceptor to listener's serializer.
134  *
135  *
136  * Use of hash tables
137  * ------------------
138  *
139  * The driver uses modhash hash table implementation. Each transport uses two
140  * hash tables - one for finding endpoints by acceptor ID and another one for
141  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
142  * pair of hash tables since sockets only use TICOTSORD.
143  *
144  * All hash tables lookups increment a reference count for returned endpoints,
145  * so we may safely check the endpoint state even when the endpoint is removed
146  * from the hash by another thread immediately after it is found.
147  *
148  *
149  * CLOSE processing
150  * ================
151  *
152  * The driver enters serializer twice on close(). The close sequence is the
153  * following:
154  *
155  * 1) Wait until closing is safe (te_closewait becomes zero)
156  *	This step is needed to prevent close during serializer switches. In most
157  *	cases (close happening after connection establishment) te_closewait is
158  *	zero.
159  * 1) Set te_closing.
160  * 2) Call tl_close_ser() within serializer and wait for it to complete.
161  *
162  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
163  *	It also needs to clear write-side q_next pointers - this should be done
164  *	before qprocsoff().
165  *
166  *    This synchronous serializer entry during close is needed to ensure that
167  *    the queue is valid everywhere inside the serializer.
168  *
169  *    Note that in many cases close will execute tl_close_ser() synchronously,
170  *    so it will not wait at all.
171  *
172  * 3) Calls qprocsoff().
173  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
174  *	complete (for COTS transports). For CLTS transport there is no wait.
175  *
176  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
177  *	close if there is any.
178  *
179  *    Note that in most cases close will enter te_close_ser_finish()
180  *    synchronously and will not wait at all.
181  *
182  *
183  * Flow Control
184  * ============
185  *
186  * The driver implements both read and write side service routines. No one calls
187  * putq() on the read queue. The read side service routine tl_rsrv() is called
188  * when the read side stream is back-enabled. It enters serializer synchronously
189  * (waits till serializer processing is complete). Within serializer it
190  * back-enables all endpoints blocked by the queue for connection-less
191  * transports and enables write side service processing for the peer for
192  * connection-oriented transports.
193  *
194  * Read and write side service routines use special mblk_sized space in the
195  * endpoint structure to enter perimeter.
196  *
197  * Write-side flow control
198  * -----------------------
199  *
200  * Write side flow control is a bit tricky. The driver needs to deal with two
201  * message queues - the explicit STREAMS message queue maintained by
202  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
203  * queues should be synchronized to preserve message ordering and should
204  * maintain a single order determined by the order in which messages enter
205  * tl_wput(). In order to maintain the ordering between these two queues the
206  * STREAMS queue is only manipulated within the serializer, so the ordering is
207  * provided by the serializer.
208  *
209  * Functions called from the tl_wsrv() sometimes may call putbq(). To
210  * immediately stop any further processing of the STREAMS message queues the
211  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
212  * side service processing stops when the flag is set.
213  *
214  * The tl_wsrv() function enters serializer synchronously and waits for it to
215  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
216  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
217  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
218  * always bounded by the amount of messages on the STREAMS queue at the time
219  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
220  * queue from another serialized entry which can't happen in parallel. This
221  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
222  * of it draining forever while writer places new messages on the STREAMS
223  * queue).
224  *
225  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
226  *
227  *
228  * Unix Domain Sockets
229  * ===================
230  *
231  * The driver knows the structure of Unix Domain sockets addresses and treats
232  * them differently from generic TLI addresses. For sockets implicit binds are
233  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
234  * instead of using address length of zero. Explicit binds specify
235  * SOU_MAGIC_EXPLICIT as magic.
236  *
237  * For implicit binds we always use minor number as soua_vp part of the address
238  * and avoid any hash table lookups. This saves two hash tables lookups per
239  * anonymous bind.
240  *
241  * For explicit address we hash the vnode pointer instead of hashing the
242  * full-scale address+zone+length. Hashing by pointer is more efficient then
243  * hashing by the full address.
244  *
245  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
246  * tep structure, so it should be never freed.
247  *
248  * Also for sockets the driver always uses minor number as acceptor id.
249  *
250  * TPI VIOLATIONS
251  * --------------
252  *
253  * This driver violates TPI in several respects for Unix Domain Sockets:
254  *
255  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
256  *	is requested and the endpoint is already in use. There is no point in
257  *	generating an unused address since this address will be rejected by
258  *	sockfs anyway. For implicit binds it always generates a new address
259  *	(sets soua_vp to its minor number).
260  *
261  * 2) It always uses minor number as acceptor ID and never uses queue
262  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
263  *	message and they do not use the queue pointer.
264  *
265  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
266  *	followed by listen(). The listen() should be issued with non-zero
267  *	backlog, so sotpi_listen() issues unbind request followed by bind
268  *	request to the same address but with a non-zero qlen value. Both
269  *	tl_bind() and tl_unbind() require write lock on the hash table to
270  *	insert/remove the address. The driver does not remove the address from
271  *	the hash for endpoints that are bound to the explicit address and have
272  *	backlog of zero. During T_BIND_REQ processing if the address requested
273  *	is equal to the address the endpoint already has it updates the backlog
274  *	without reinserting the address in the hash table. This optimization
275  *	avoids two hash table updates for each listener created. It always
276  *	avoids the problem of a "stolen" address when another listener may use
277  *	the same address between the unbind and bind and suddenly listen() fails
278  *	because address is in use even though the bind() succeeded.
279  *
280  *
281  * CONNECTIONLESS TRANSPORTS
282  * =========================
283  *
284  * Connectionless transports all share the same serializer (one for TLI and one
285  * for Sockets). Functions executing behind serializer can check or modify state
286  * of any endpoint.
287  *
288  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
289  * te_lastep field. The next time X talks to some address A it checks whether A
290  * is the same as Y's address and if it is there is no need to lookup Y. If the
291  * address is different or the state of Y is not appropriate (e.g. closed or not
292  * idle) X does a lookup using tl_find_peer() and caches the new address.
293  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
294  * on the endpoint found.
295  *
296  * During close of endpoint Y it doesn't try to remove itself from other
297  * endpoints caches. They will detect that Y is gone and will search the peer
298  * endpoint again.
299  *
300  * Flow Control Handling.
301  * ----------------------
302  *
303  * Each connectionless endpoint keeps a list of endpoints which are
304  * flow-controlled by its queue. It also keeps a pointer to the queue which
305  * flow-controls itself.  Whenever flow control releases for endpoint X it
306  * enables all queues from the list. During close it also back-enables everyone
307  * in the list. If X is flow-controlled when it is closing it removes it from
308  * the peers list.
309  *
310  * DATA STRUCTURES
311  * ===============
312  *
313  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
314  * endpoint state. For connection-oriented transports it has a keeps a list
315  * of pending connections (tl_icon_t). For connectionless transports it keeps a
316  * list of endpoints flow controlled by this one.
317  *
318  * Each transport type is represented by a per-transport data structure
319  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
320  * endpoint address hash tables for each transport. It also contains pointer to
321  * transport serializer for connectionless transports.
322  *
323  * Each endpoint keeps a link to its transport structure, so the code can find
324  * all per-transport information quickly.
325  */
326 
327 #include	<sys/types.h>
328 #include	<sys/inttypes.h>
329 #include	<sys/stream.h>
330 #include	<sys/stropts.h>
331 #define	_SUN_TPI_VERSION 2
332 #include	<sys/tihdr.h>
333 #include	<sys/strlog.h>
334 #include	<sys/debug.h>
335 #include	<sys/cred.h>
336 #include	<sys/errno.h>
337 #include	<sys/kmem.h>
338 #include	<sys/id_space.h>
339 #include	<sys/modhash.h>
340 #include	<sys/mkdev.h>
341 #include	<sys/tl.h>
342 #include	<sys/stat.h>
343 #include	<sys/conf.h>
344 #include	<sys/modctl.h>
345 #include	<sys/strsun.h>
346 #include	<sys/socket.h>
347 #include	<sys/socketvar.h>
348 #include	<sys/sysmacros.h>
349 #include	<sys/xti_xtiopt.h>
350 #include	<sys/ddi.h>
351 #include	<sys/sunddi.h>
352 #include	<sys/zone.h>
353 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
354 #include	<inet/optcom.h>
355 #include	<sys/strsubr.h>
356 #include	<sys/ucred.h>
357 #include	<sys/suntpi.h>
358 #include	<sys/list.h>
359 #include	<sys/serializer.h>
360 
361 /*
362  * TBD List
363  * 14 Eliminate state changes through table
364  * 16. AF_UNIX socket options
365  * 17. connect() for ticlts
366  * 18. support for "netstat" to show AF_UNIX plus TLI local
367  *	transport connections
368  * 21. sanity check to flushing on sending M_ERROR
369  */
370 
371 /*
372  * CONSTANT DECLARATIONS
373  * --------------------
374  */
375 
376 /*
377  * Local declarations
378  */
379 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
380 
381 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
382 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
383 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
384 /*
385  * Hash tables size.
386  */
387 #define	TL_HASH_SIZE 311
388 
389 /*
390  * Definitions for module_info
391  */
392 #define		TL_ID		(104)		/* module ID number */
393 #define		TL_NAME		"tl"		/* module name */
394 #define		TL_MINPSZ	(0)		/* min packet size */
395 #define		TL_MAXPSZ	INFPSZ		/* max packet size ZZZ */
396 #define		TL_HIWAT	(16*1024)	/* hi water mark */
397 #define		TL_LOWAT	(256)		/* lo water mark */
398 /*
399  * Definition of minor numbers/modes for new transport provider modes.
400  * We view the socket use as a separate mode to get a separate name space.
401  */
402 #define		TL_TICOTS	0	/* connection oriented transport */
403 #define		TL_TICOTSORD	1	/* COTS w/ orderly release */
404 #define		TL_TICLTS	2	/* connectionless transport */
405 #define		TL_UNUSED	3
406 #define		TL_SOCKET	4	/* Socket */
407 #define		TL_SOCK_COTS	(TL_SOCKET | TL_TICOTS)
408 #define		TL_SOCK_COTSORD	(TL_SOCKET | TL_TICOTSORD)
409 #define		TL_SOCK_CLTS	(TL_SOCKET | TL_TICLTS)
410 
411 #define		TL_MINOR_MASK	0x7
412 #define		TL_MINOR_START	(TL_TICLTS + 1)
413 
414 /*
415  * LOCAL MACROS
416  */
417 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
418 
419 /*
420  * EXTERNAL VARIABLE DECLARATIONS
421  * -----------------------------
422  */
423 /*
424  * state table defined in the OS space.c
425  */
426 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
427 
428 /*
429  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
430  */
431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
432 static int tl_close(queue_t *, int, cred_t *);
433 static int tl_wput(queue_t *, mblk_t *);
434 static int tl_wsrv(queue_t *);
435 static int tl_rsrv(queue_t *);
436 
437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
440 
441 
442 /*
443  * GLOBAL DATA STRUCTURES AND VARIABLES
444  * -----------------------------------
445  */
446 
447 /*
448  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
449  * For now, we only manage the SO_RECVUCRED option but we also have
450  * harmless dummy options to make things work with some common code we access.
451  */
452 opdes_t	tl_opt_arr[] = {
453 	/* The SO_TYPE is needed for the hack below */
454 	{
455 		SO_TYPE,
456 		SOL_SOCKET,
457 		OA_R,
458 		OA_R,
459 		OP_NP,
460 		0,
461 		sizeof (t_scalar_t),
462 		0
463 	},
464 	{
465 		SO_RECVUCRED,
466 		SOL_SOCKET,
467 		OA_RW,
468 		OA_RW,
469 		OP_NP,
470 		0,
471 		sizeof (int),
472 		0
473 	}
474 };
475 
476 /*
477  * Table of all supported levels
478  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
479  * any supported options so we need this info separately.
480  *
481  * This is needed only for topmost tpi providers.
482  */
483 optlevel_t	tl_valid_levels_arr[] = {
484 	XTI_GENERIC,
485 	SOL_SOCKET,
486 	TL_PROT_LEVEL
487 };
488 
489 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
490 /*
491  * Current upper bound on the amount of space needed to return all options.
492  * Additional options with data size of sizeof(long) are handled automatically.
493  * Others need hand job.
494  */
495 #define	TL_MAX_OPT_BUF_LEN						\
496 		((A_CNT(tl_opt_arr) << 2) +				\
497 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
498 		+ 64 + sizeof (struct T_optmgmt_ack))
499 
500 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
501 
502 /*
503  *	transport addr structure
504  */
505 typedef struct tl_addr {
506 	zoneid_t	ta_zoneid;		/* Zone scope of address */
507 	t_scalar_t	ta_alen;		/* length of abuf */
508 	void		*ta_abuf;		/* the addr itself */
509 } tl_addr_t;
510 
511 /*
512  * Refcounted version of serializer.
513  */
514 typedef struct tl_serializer {
515 	uint_t		ts_refcnt;
516 	serializer_t	*ts_serializer;
517 } tl_serializer_t;
518 
519 /*
520  * Each transport type has a separate state.
521  * Per-transport state.
522  */
523 typedef struct tl_transport_state {
524 	char		*tr_name;
525 	minor_t		tr_minor;
526 	uint32_t	tr_defaddr;
527 	mod_hash_t	*tr_ai_hash;
528 	mod_hash_t	*tr_addr_hash;
529 	tl_serializer_t	*tr_serializer;
530 } tl_transport_state_t;
531 
532 #define	TL_DFADDR 0x1000
533 
534 static tl_transport_state_t tl_transports[] = {
535 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
536 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
537 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
538 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
539 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
540 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
541 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
542 };
543 
544 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
545 
546 struct tl_endpt;
547 typedef struct tl_endpt tl_endpt_t;
548 
549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
550 
551 /*
552  * Data structure used to represent pending connects.
553  * Records enough information so that the connecting peer can close
554  * before the connection gets accepted.
555  */
556 typedef struct tl_icon {
557 	list_node_t	ti_node;
558 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
559 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
560 	t_scalar_t	ti_seqno;	/* Sequence number */
561 } tl_icon_t;
562 
563 typedef struct so_ux_addr soux_addr_t;
564 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
565 
566 /*
567  * Maximum number of unaccepted connection indications allowed per listener.
568  */
569 #define	TL_MAXQLEN	4096
570 int tl_maxqlen = TL_MAXQLEN;
571 
572 /*
573  *	transport endpoint structure
574  */
575 struct tl_endpt {
576 	queue_t		*te_rq;		/* stream read queue */
577 	queue_t		*te_wq;		/* stream write queue */
578 	uint32_t	te_refcnt;
579 	int32_t		te_state;	/* TPI state of endpoint */
580 	minor_t		te_minor;	/* minor number */
581 #define	te_seqno	te_minor
582 	uint_t		te_flag;	/* flag field */
583 	boolean_t	te_nowsrv;
584 	tl_serializer_t	*te_ser;	/* Serializer to use */
585 #define	te_serializer	te_ser->ts_serializer
586 
587 	soux_addr_t	te_uxaddr;	/* Socket address */
588 #define	te_magic	te_uxaddr.soua_magic
589 #define	te_vp		te_uxaddr.soua_vp
590 	tl_addr_t	te_ap;		/* addr bound to this endpt */
591 #define	te_zoneid te_ap.ta_zoneid
592 #define	te_alen	te_ap.ta_alen
593 #define	te_abuf	te_ap.ta_abuf
594 
595 	tl_transport_state_t *te_transport;
596 #define	te_addrhash	te_transport->tr_addr_hash
597 #define	te_aihash	te_transport->tr_ai_hash
598 #define	te_defaddr	te_transport->tr_defaddr
599 	cred_t		*te_credp;	/* endpoint user credentials */
600 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
601 
602 	/*
603 	 * State specific for connection-oriented and connectionless transports.
604 	 */
605 	union {
606 		/* Connection-oriented state. */
607 		struct {
608 			t_uscalar_t _te_nicon;	/* count of conn requests */
609 			t_uscalar_t _te_qlen;	/* max conn requests */
610 			tl_endpt_t  *_te_oconp;	/* conn request pending */
611 			tl_endpt_t  *_te_conp;	/* connected endpt */
612 #ifndef _ILP32
613 			void	    *_te_pad;
614 #endif
615 			list_t	_te_iconp;	/* list of conn ind. pending */
616 		} _te_cots_state;
617 		/* Connection-less state. */
618 		struct {
619 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
620 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
621 			list_node_t _te_flows;	/* lists of connections */
622 			list_t  _te_flowlist;	/* Who flowcontrols on me */
623 		} _te_clts_state;
624 	} _te_transport_state;
625 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
626 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
627 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
628 #define	te_conp		_te_transport_state._te_cots_state._te_conp
629 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
630 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
631 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
632 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
633 #define	te_flows	_te_transport_state._te_clts_state._te_flows
634 
635 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
636 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
637 	pid_t		te_cpid;	/* cached pid of endpoint */
638 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
639 	/*
640 	 * Pieces of the endpoint state needed for closing.
641 	 */
642 	kmutex_t	te_closelock;
643 	kcondvar_t	te_closecv;
644 	uint8_t		te_closing;	/* The endpoint started closing */
645 	uint8_t		te_closewait;	/* Wait in close until zero */
646 	mblk_t		te_closemp;	/* for entering serializer on close */
647 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
648 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
649 	kmutex_t	te_srv_lock;
650 	kcondvar_t	te_srv_cv;
651 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
652 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
653 	/*
654 	 * Pieces of the endpoint state needed for serializer transitions.
655 	 */
656 	kmutex_t	te_ser_lock;	/* Protects the count below */
657 	uint_t		te_ser_count;	/* Number of messages on serializer */
658 };
659 
660 /*
661  * Flag values. Lower 4 bits specify that transport used.
662  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
663  * they allow to identify the endpoint more easily.
664  */
665 #define	TL_LISTENER	0x00010	/* the listener endpoint */
666 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
667 #define	TL_EAGER	0x00040	/* connecting endpoint */
668 #define	TL_ACCEPTED	0x00080	/* accepted connection */
669 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
670 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
671 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
672 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
673 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
674 /*
675  * Boolean checks for the endpoint type.
676  */
677 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
678 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
679 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
680 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
681 
682 /*
683  * Certain operations are always used together. These macros reduce the chance
684  * of missing a part of a combination.
685  */
686 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
687 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
688 
689 #define	TL_PUTBQ(x, mp) {		\
690 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
691 	(x)->te_nowsrv = B_TRUE;	\
692 	(void) putbq((x)->te_wq, mp);	\
693 }
694 
695 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
696 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
697 
698 /*
699  * STREAMS driver glue data structures.
700  */
701 static	struct	module_info	tl_minfo = {
702 	TL_ID,			/* mi_idnum */
703 	TL_NAME,		/* mi_idname */
704 	TL_MINPSZ,		/* mi_minpsz */
705 	TL_MAXPSZ,		/* mi_maxpsz */
706 	TL_HIWAT,		/* mi_hiwat */
707 	TL_LOWAT		/* mi_lowat */
708 };
709 
710 static	struct	qinit	tl_rinit = {
711 	NULL,			/* qi_putp */
712 	tl_rsrv,		/* qi_srvp */
713 	tl_open,		/* qi_qopen */
714 	tl_close,		/* qi_qclose */
715 	NULL,			/* qi_qadmin */
716 	&tl_minfo,		/* qi_minfo */
717 	NULL			/* qi_mstat */
718 };
719 
720 static	struct	qinit	tl_winit = {
721 	tl_wput,		/* qi_putp */
722 	tl_wsrv,		/* qi_srvp */
723 	NULL,			/* qi_qopen */
724 	NULL,			/* qi_qclose */
725 	NULL,			/* qi_qadmin */
726 	&tl_minfo,		/* qi_minfo */
727 	NULL			/* qi_mstat */
728 };
729 
730 static	struct streamtab	tlinfo = {
731 	&tl_rinit,		/* st_rdinit */
732 	&tl_winit,		/* st_wrinit */
733 	NULL,			/* st_muxrinit */
734 	NULL			/* st_muxwrinit */
735 };
736 
737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
738     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
739 
740 static struct modldrv modldrv = {
741 	&mod_driverops,		/* Type of module -- pseudo driver here */
742 	"TPI Local Transport (tl)",
743 	&tl_devops,		/* driver ops */
744 };
745 
746 /*
747  * Module linkage information for the kernel.
748  */
749 static struct modlinkage modlinkage = {
750 	MODREV_1,
751 	&modldrv,
752 	NULL
753 };
754 
755 /*
756  * Templates for response to info request
757  * Check sanity of unlimited connect data etc.
758  */
759 
760 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1 | SENDZERO)
761 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1 | SENDZERO)
762 
763 static struct T_info_ack tl_cots_info_ack =
764 	{
765 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
766 		T_INFINITE,	/* TSDU size */
767 		T_INFINITE,	/* ETSDU size */
768 		T_INFINITE,	/* CDATA_size */
769 		T_INFINITE,	/* DDATA_size */
770 		T_INFINITE,	/* ADDR_size  */
771 		T_INFINITE,	/* OPT_size */
772 		0,		/* TIDU_size - fill at run time */
773 		T_COTS,		/* SERV_type */
774 		-1,		/* CURRENT_state */
775 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
776 	};
777 
778 static struct T_info_ack tl_clts_info_ack =
779 	{
780 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
781 		0,		/* TSDU_size - fill at run time */
782 		-2,		/* ETSDU_size -2 => not supported */
783 		-2,		/* CDATA_size -2 => not supported */
784 		-2,		/* DDATA_size  -2 => not supported */
785 		-1,		/* ADDR_size -1 => infinite */
786 		-1,		/* OPT_size */
787 		0,		/* TIDU_size - fill at run time */
788 		T_CLTS,		/* SERV_type */
789 		-1,		/* CURRENT_state */
790 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
791 	};
792 
793 /*
794  * private copy of devinfo pointer used in tl_info
795  */
796 static dev_info_t *tl_dip;
797 
798 /*
799  * Endpoints cache.
800  */
801 static kmem_cache_t *tl_cache;
802 /*
803  * Minor number space.
804  */
805 static id_space_t *tl_minors;
806 
807 /*
808  * Default Data Unit size.
809  */
810 static t_scalar_t tl_tidusz;
811 
812 /*
813  * Size of hash tables.
814  */
815 static size_t tl_hash_size = TL_HASH_SIZE;
816 
817 /*
818  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
819  * for sockets.
820  */
821 static int tl_disable_early_connect = 0;
822 static int tl_client_closing_when_accepting;
823 
824 static int tl_serializer_noswitch;
825 
826 /*
827  * LOCAL FUNCTION PROTOTYPES
828  * -------------------------
829  */
830 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
831 static void tl_do_proto(mblk_t *, tl_endpt_t *);
832 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
833 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
834 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
835     t_scalar_t);
836 static void tl_bind(mblk_t *, tl_endpt_t *);
837 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
838 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
839 static void tl_unbind(mblk_t *, tl_endpt_t *);
840 static void tl_optmgmt(queue_t *, mblk_t *);
841 static void tl_conn_req(queue_t *, mblk_t *);
842 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
843 static void tl_conn_res(mblk_t *, tl_endpt_t *);
844 static void tl_discon_req(mblk_t *, tl_endpt_t *);
845 static void tl_capability_req(mblk_t *, tl_endpt_t *);
846 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
847 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
848 static void tl_info_req(mblk_t *, tl_endpt_t *);
849 static void tl_addr_req(mblk_t *, tl_endpt_t *);
850 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
851 static void tl_data(mblk_t  *, tl_endpt_t *);
852 static void tl_exdata(mblk_t *, tl_endpt_t *);
853 static void tl_ordrel(mblk_t *, tl_endpt_t *);
854 static void tl_unitdata(mblk_t *, tl_endpt_t *);
855 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
856 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
857 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
858 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
859 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
860 static void tl_cl_backenable(tl_endpt_t *);
861 static void tl_co_unconnect(tl_endpt_t *);
862 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
863 static void tl_discon_ind(tl_endpt_t *, uint32_t);
864 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
865 static mblk_t *tl_ordrel_ind_alloc(void);
866 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
867 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
868 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
869 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
870 static void tl_icon_freemsgs(mblk_t **);
871 static void tl_merror(queue_t *, mblk_t *, int);
872 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
873 static int tl_default_opt(queue_t *, int, int, uchar_t *);
874 static int tl_get_opt(queue_t *, int, int, uchar_t *);
875 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
876     uchar_t *, void *, cred_t *);
877 static void tl_memrecover(queue_t *, mblk_t *, size_t);
878 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
879 static void tl_free(tl_endpt_t *);
880 static int  tl_constructor(void *, void *, int);
881 static void tl_destructor(void *, void *);
882 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
883 static tl_serializer_t *tl_serializer_alloc(int);
884 static void tl_serializer_refhold(tl_serializer_t *);
885 static void tl_serializer_refrele(tl_serializer_t *);
886 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
887 static void tl_serializer_exit(tl_endpt_t *);
888 static boolean_t tl_noclose(tl_endpt_t *);
889 static void tl_closeok(tl_endpt_t *);
890 static void tl_refhold(tl_endpt_t *);
891 static void tl_refrele(tl_endpt_t *);
892 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
893 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
894 static void tl_close_ser(mblk_t *, tl_endpt_t *);
895 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
896 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
897 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
898 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
900 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
901 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
902 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
903 static void tl_addr_unbind(tl_endpt_t *);
904 
905 /*
906  * Intialize option database object for TL
907  */
908 
909 optdb_obj_t tl_opt_obj = {
910 	tl_default_opt,		/* TL default value function pointer */
911 	tl_get_opt,		/* TL get function pointer */
912 	tl_set_opt,		/* TL set function pointer */
913 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
914 	tl_opt_arr,		/* TL option database */
915 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
916 	tl_valid_levels_arr	/* TL valid level array */
917 };
918 
919 /*
920  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
921  * ---------------------------------------
922  */
923 
924 /*
925  * Loadable module routines
926  */
927 int
928 _init(void)
929 {
930 	return (mod_install(&modlinkage));
931 }
932 
933 int
934 _fini(void)
935 {
936 	return (mod_remove(&modlinkage));
937 }
938 
939 int
940 _info(struct modinfo *modinfop)
941 {
942 	return (mod_info(&modlinkage, modinfop));
943 }
944 
945 /*
946  * Driver Entry Points and Other routines
947  */
948 static int
949 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
950 {
951 	int i;
952 	char name[32];
953 
954 	/*
955 	 * Resume from a checkpoint state.
956 	 */
957 	if (cmd == DDI_RESUME)
958 		return (DDI_SUCCESS);
959 
960 	if (cmd != DDI_ATTACH)
961 		return (DDI_FAILURE);
962 
963 	/*
964 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
965 	 * streams message sizes can be unlimited. We use a defined constant
966 	 * instead.
967 	 */
968 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
969 
970 	/*
971 	 * Create subdevices for each transport.
972 	 */
973 	for (i = 0; i < TL_UNUSED; i++) {
974 		if (ddi_create_minor_node(devi,
975 		    tl_transports[i].tr_name,
976 		    S_IFCHR, tl_transports[i].tr_minor,
977 		    DDI_PSEUDO, 0) == DDI_FAILURE) {
978 			ddi_remove_minor_node(devi, NULL);
979 			return (DDI_FAILURE);
980 		}
981 	}
982 
983 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
984 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
985 
986 	if (tl_cache == NULL) {
987 		ddi_remove_minor_node(devi, NULL);
988 		return (DDI_FAILURE);
989 	}
990 
991 	tl_minors = id_space_create("tl_minor_space",
992 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
993 
994 	/*
995 	 * Create ID space for minor numbers
996 	 */
997 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
998 		tl_transport_state_t *t = &tl_transports[i];
999 
1000 		if (i == TL_UNUSED)
1001 			continue;
1002 
1003 		/* Socket COTSORD shares namespace with COTS */
1004 		if (i == TL_SOCK_COTSORD) {
1005 			t->tr_ai_hash =
1006 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1007 			ASSERT(t->tr_ai_hash != NULL);
1008 			t->tr_addr_hash =
1009 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1010 			ASSERT(t->tr_addr_hash != NULL);
1011 			continue;
1012 		}
1013 
1014 		/*
1015 		 * Create hash tables.
1016 		 */
1017 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1018 		    t->tr_name);
1019 #ifdef _ILP32
1020 		if (i & TL_SOCKET)
1021 			t->tr_ai_hash =
1022 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1023 			    mod_hash_null_valdtor);
1024 		else
1025 			t->tr_ai_hash =
1026 			    mod_hash_create_ptrhash(name, tl_hash_size,
1027 			    mod_hash_null_valdtor, sizeof (queue_t));
1028 #else
1029 		t->tr_ai_hash =
1030 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1031 		    mod_hash_null_valdtor);
1032 #endif /* _ILP32 */
1033 
1034 		if (i & TL_SOCKET) {
1035 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1036 			    t->tr_name);
1037 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1038 			    tl_hash_size, mod_hash_null_valdtor,
1039 			    sizeof (uintptr_t));
1040 		} else {
1041 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1042 			    t->tr_name);
1043 			t->tr_addr_hash = mod_hash_create_extended(name,
1044 			    tl_hash_size, mod_hash_null_keydtor,
1045 			    mod_hash_null_valdtor,
1046 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1047 		}
1048 
1049 		/* Create serializer for connectionless transports. */
1050 		if (i & TL_TICLTS)
1051 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1052 	}
1053 
1054 	tl_dip = devi;
1055 
1056 	return (DDI_SUCCESS);
1057 }
1058 
1059 static int
1060 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1061 {
1062 	int i;
1063 
1064 	if (cmd == DDI_SUSPEND)
1065 		return (DDI_SUCCESS);
1066 
1067 	if (cmd != DDI_DETACH)
1068 		return (DDI_FAILURE);
1069 
1070 	/*
1071 	 * Destroy arenas and hash tables.
1072 	 */
1073 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1074 		tl_transport_state_t *t = &tl_transports[i];
1075 
1076 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1077 			continue;
1078 
1079 		EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1080 		if (t->tr_serializer != NULL) {
1081 			tl_serializer_refrele(t->tr_serializer);
1082 			t->tr_serializer = NULL;
1083 		}
1084 
1085 #ifdef _ILP32
1086 		if (i & TL_SOCKET)
1087 			mod_hash_destroy_idhash(t->tr_ai_hash);
1088 		else
1089 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1090 #else
1091 		mod_hash_destroy_idhash(t->tr_ai_hash);
1092 #endif /* _ILP32 */
1093 		t->tr_ai_hash = NULL;
1094 		if (i & TL_SOCKET)
1095 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1096 		else
1097 			mod_hash_destroy_hash(t->tr_addr_hash);
1098 		t->tr_addr_hash = NULL;
1099 	}
1100 
1101 	kmem_cache_destroy(tl_cache);
1102 	tl_cache = NULL;
1103 	id_space_destroy(tl_minors);
1104 	tl_minors = NULL;
1105 	ddi_remove_minor_node(devi, NULL);
1106 	return (DDI_SUCCESS);
1107 }
1108 
1109 /* ARGSUSED */
1110 static int
1111 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1112 {
1113 
1114 	int retcode = DDI_FAILURE;
1115 
1116 	switch (infocmd) {
1117 
1118 	case DDI_INFO_DEVT2DEVINFO:
1119 		if (tl_dip != NULL) {
1120 			*result = (void *)tl_dip;
1121 			retcode = DDI_SUCCESS;
1122 		}
1123 		break;
1124 
1125 	case DDI_INFO_DEVT2INSTANCE:
1126 		*result = NULL;
1127 		retcode = DDI_SUCCESS;
1128 		break;
1129 
1130 	default:
1131 		break;
1132 	}
1133 	return (retcode);
1134 }
1135 
1136 /*
1137  * Endpoint reference management.
1138  */
1139 static void
1140 tl_refhold(tl_endpt_t *tep)
1141 {
1142 	atomic_inc_32(&tep->te_refcnt);
1143 }
1144 
1145 static void
1146 tl_refrele(tl_endpt_t *tep)
1147 {
1148 	ASSERT(tep->te_refcnt != 0);
1149 
1150 	if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1151 		tl_free(tep);
1152 }
1153 
1154 /*ARGSUSED*/
1155 static int
1156 tl_constructor(void *buf, void *cdrarg, int kmflags)
1157 {
1158 	tl_endpt_t *tep = buf;
1159 
1160 	bzero(tep, sizeof (tl_endpt_t));
1161 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1162 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1163 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1164 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1165 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1166 
1167 	return (0);
1168 }
1169 
1170 /*ARGSUSED*/
1171 static void
1172 tl_destructor(void *buf, void *cdrarg)
1173 {
1174 	tl_endpt_t *tep = buf;
1175 
1176 	mutex_destroy(&tep->te_closelock);
1177 	cv_destroy(&tep->te_closecv);
1178 	mutex_destroy(&tep->te_srv_lock);
1179 	cv_destroy(&tep->te_srv_cv);
1180 	mutex_destroy(&tep->te_ser_lock);
1181 }
1182 
1183 static void
1184 tl_free(tl_endpt_t *tep)
1185 {
1186 	ASSERT(tep->te_refcnt == 0);
1187 	ASSERT(tep->te_transport != NULL);
1188 	ASSERT(tep->te_rq == NULL);
1189 	ASSERT(tep->te_wq == NULL);
1190 	ASSERT(tep->te_ser != NULL);
1191 	ASSERT(tep->te_ser_count == 0);
1192 	ASSERT(!(tep->te_flag & TL_ADDRHASHED));
1193 
1194 	if (IS_SOCKET(tep)) {
1195 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1196 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1197 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1198 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1199 	} else if (tep->te_abuf != NULL) {
1200 		kmem_free(tep->te_abuf, tep->te_alen);
1201 		tep->te_alen = -1; /* uninitialized */
1202 		tep->te_abuf = NULL;
1203 	} else {
1204 		ASSERT(tep->te_alen == -1);
1205 	}
1206 
1207 	id_free(tl_minors, tep->te_minor);
1208 	ASSERT(tep->te_credp == NULL);
1209 
1210 	if (tep->te_hash_hndl != NULL)
1211 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1212 
1213 	if (IS_COTS(tep)) {
1214 		TL_REMOVE_PEER(tep->te_conp);
1215 		TL_REMOVE_PEER(tep->te_oconp);
1216 		tl_serializer_refrele(tep->te_ser);
1217 		tep->te_ser = NULL;
1218 		ASSERT(tep->te_nicon == 0);
1219 		ASSERT(list_head(&tep->te_iconp) == NULL);
1220 	} else {
1221 		ASSERT(tep->te_lastep == NULL);
1222 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1223 		ASSERT(tep->te_flowq == NULL);
1224 	}
1225 
1226 	ASSERT(tep->te_bufcid == 0);
1227 	ASSERT(tep->te_timoutid == 0);
1228 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1229 	tep->te_acceptor_id = 0;
1230 
1231 	ASSERT(tep->te_closewait == 0);
1232 	ASSERT(!tep->te_rsrv_active);
1233 	ASSERT(!tep->te_wsrv_active);
1234 	tep->te_closing = 0;
1235 	tep->te_nowsrv = B_FALSE;
1236 	tep->te_flag = 0;
1237 
1238 	kmem_cache_free(tl_cache, tep);
1239 }
1240 
1241 /*
1242  * Allocate/free reference-counted wrappers for serializers.
1243  */
1244 static tl_serializer_t *
1245 tl_serializer_alloc(int flags)
1246 {
1247 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1248 	serializer_t *ser;
1249 
1250 	if (s == NULL)
1251 		return (NULL);
1252 
1253 	ser = serializer_create(flags);
1254 
1255 	if (ser == NULL) {
1256 		kmem_free(s, sizeof (tl_serializer_t));
1257 		return (NULL);
1258 	}
1259 
1260 	s->ts_refcnt = 1;
1261 	s->ts_serializer = ser;
1262 	return (s);
1263 }
1264 
1265 static void
1266 tl_serializer_refhold(tl_serializer_t *s)
1267 {
1268 	atomic_inc_32(&s->ts_refcnt);
1269 }
1270 
1271 static void
1272 tl_serializer_refrele(tl_serializer_t *s)
1273 {
1274 	if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1275 		serializer_destroy(s->ts_serializer);
1276 		kmem_free(s, sizeof (tl_serializer_t));
1277 	}
1278 }
1279 
1280 /*
1281  * Post a request on the endpoint serializer. For COTS transports keep track of
1282  * the number of pending requests.
1283  */
1284 static void
1285 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1286 {
1287 	if (IS_COTS(tep)) {
1288 		mutex_enter(&tep->te_ser_lock);
1289 		tep->te_ser_count++;
1290 		mutex_exit(&tep->te_ser_lock);
1291 	}
1292 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1293 }
1294 
1295 /*
1296  * Complete processing the request on the serializer. Decrement the counter for
1297  * pending requests for COTS transports.
1298  */
1299 static void
1300 tl_serializer_exit(tl_endpt_t *tep)
1301 {
1302 	if (IS_COTS(tep)) {
1303 		mutex_enter(&tep->te_ser_lock);
1304 		ASSERT(tep->te_ser_count != 0);
1305 		tep->te_ser_count--;
1306 		mutex_exit(&tep->te_ser_lock);
1307 	}
1308 }
1309 
1310 /*
1311  * Hash management functions.
1312  */
1313 
1314 /*
1315  * Return TRUE if two addresses are equal, false otherwise.
1316  */
1317 static boolean_t
1318 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1319 {
1320 	return ((ap1->ta_alen > 0) &&
1321 	    (ap1->ta_alen == ap2->ta_alen) &&
1322 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1323 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1324 }
1325 
1326 /*
1327  * This function is called whenever an endpoint is found in the hash table.
1328  */
1329 /* ARGSUSED0 */
1330 static void
1331 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1332 {
1333 	tl_refhold((tl_endpt_t *)val);
1334 }
1335 
1336 /*
1337  * Address hash function.
1338  */
1339 /* ARGSUSED */
1340 static uint_t
1341 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1342 {
1343 	tl_addr_t *ap = (tl_addr_t *)key;
1344 	size_t	len = ap->ta_alen;
1345 	uchar_t *p = ap->ta_abuf;
1346 	uint_t i, g;
1347 
1348 	ASSERT((len > 0) && (p != NULL));
1349 
1350 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1351 		i = (i << 4) + (*p);
1352 		if ((g = (i & 0xf0000000U)) != 0) {
1353 			i ^= (g >> 24);
1354 			i ^= g;
1355 		}
1356 	}
1357 	return (i);
1358 }
1359 
1360 /*
1361  * This function is used by hash lookups. It compares two generic addresses.
1362  */
1363 static int
1364 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1365 {
1366 #ifdef	DEBUG
1367 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1368 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1369 
1370 	ASSERT(key1 != NULL);
1371 	ASSERT(key2 != NULL);
1372 
1373 	ASSERT(ap1->ta_abuf != NULL);
1374 	ASSERT(ap2->ta_abuf != NULL);
1375 	ASSERT(ap1->ta_alen > 0);
1376 	ASSERT(ap2->ta_alen > 0);
1377 #endif
1378 
1379 	return (!tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1380 }
1381 
1382 /*
1383  * Prevent endpoint from closing if possible.
1384  * Return B_TRUE on success, B_FALSE on failure.
1385  */
1386 static boolean_t
1387 tl_noclose(tl_endpt_t *tep)
1388 {
1389 	boolean_t rc = B_FALSE;
1390 
1391 	mutex_enter(&tep->te_closelock);
1392 	if (!tep->te_closing) {
1393 		ASSERT(tep->te_closewait == 0);
1394 		tep->te_closewait++;
1395 		rc = B_TRUE;
1396 	}
1397 	mutex_exit(&tep->te_closelock);
1398 	return (rc);
1399 }
1400 
1401 /*
1402  * Allow endpoint to close if needed.
1403  */
1404 static void
1405 tl_closeok(tl_endpt_t *tep)
1406 {
1407 	ASSERT(tep->te_closewait > 0);
1408 	mutex_enter(&tep->te_closelock);
1409 	ASSERT(tep->te_closewait == 1);
1410 	tep->te_closewait--;
1411 	cv_signal(&tep->te_closecv);
1412 	mutex_exit(&tep->te_closelock);
1413 }
1414 
1415 /*
1416  * STREAMS open entry point.
1417  */
1418 /* ARGSUSED */
1419 static int
1420 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1421 {
1422 	tl_endpt_t *tep;
1423 	minor_t	    minor = getminor(*devp);
1424 
1425 	/*
1426 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1427 	 * are illegal
1428 	 */
1429 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1430 		return (ENXIO);
1431 
1432 	if (rq->q_ptr != NULL)
1433 		return (0);
1434 
1435 	/* Minor number should specify the mode used for the driver. */
1436 	if ((minor >= TL_UNUSED))
1437 		return (ENXIO);
1438 
1439 	if (oflag & SO_SOCKSTR) {
1440 		minor |= TL_SOCKET;
1441 	}
1442 
1443 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1444 	tep->te_refcnt = 1;
1445 	tep->te_cpid = curproc->p_pid;
1446 	rq->q_ptr = WR(rq)->q_ptr = tep;
1447 	tep->te_state = TS_UNBND;
1448 	tep->te_credp = credp;
1449 	crhold(credp);
1450 	tep->te_zoneid = getzoneid();
1451 
1452 	tep->te_flag = minor & TL_MINOR_MASK;
1453 	tep->te_transport = &tl_transports[minor];
1454 
1455 	/* Allocate a unique minor number for this instance. */
1456 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1457 
1458 	/* Reserve hash handle for bind(). */
1459 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1460 
1461 	/* Transport-specific initialization */
1462 	if (IS_COTS(tep)) {
1463 		/* Use private serializer */
1464 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1465 
1466 		/* Create list for pending connections */
1467 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1468 		    offsetof(tl_icon_t, ti_node));
1469 		tep->te_qlen = 0;
1470 		tep->te_nicon = 0;
1471 		tep->te_oconp = NULL;
1472 		tep->te_conp = NULL;
1473 	} else {
1474 		/* Use shared serializer */
1475 		tep->te_ser = tep->te_transport->tr_serializer;
1476 		bzero(&tep->te_flows, sizeof (list_node_t));
1477 		/* Create list for flow control */
1478 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1479 		    offsetof(tl_endpt_t, te_flows));
1480 		tep->te_flowq = NULL;
1481 		tep->te_lastep = NULL;
1482 
1483 	}
1484 
1485 	/* Initialize endpoint address */
1486 	if (IS_SOCKET(tep)) {
1487 		/* Socket-specific address handling. */
1488 		tep->te_alen = TL_SOUX_ADDRLEN;
1489 		tep->te_abuf = &tep->te_uxaddr;
1490 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1491 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1492 	} else {
1493 		tep->te_alen = -1;
1494 		tep->te_abuf = NULL;
1495 	}
1496 
1497 	/* clone the driver */
1498 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1499 
1500 	tep->te_rq = rq;
1501 	tep->te_wq = WR(rq);
1502 
1503 #ifdef	_ILP32
1504 	if (IS_SOCKET(tep))
1505 		tep->te_acceptor_id = tep->te_minor;
1506 	else
1507 		tep->te_acceptor_id = (t_uscalar_t)rq;
1508 #else
1509 	tep->te_acceptor_id = tep->te_minor;
1510 #endif	/* _ILP32 */
1511 
1512 
1513 	qprocson(rq);
1514 
1515 	/*
1516 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1517 	 * insertion so insertion can't fail.
1518 	 */
1519 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1520 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1521 	    (mod_hash_val_t)tep);
1522 
1523 	return (0);
1524 }
1525 
1526 /* ARGSUSED1 */
1527 static int
1528 tl_close(queue_t *rq, int flag,	cred_t *credp)
1529 {
1530 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1531 	tl_endpt_t *elp = NULL;
1532 	queue_t *wq = tep->te_wq;
1533 	int rc;
1534 
1535 	ASSERT(wq == WR(rq));
1536 
1537 	/*
1538 	 * Remove the endpoint from acceptor hash.
1539 	 */
1540 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1541 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1542 	    (mod_hash_val_t *)&elp);
1543 	ASSERT(rc == 0 && tep == elp);
1544 	if ((rc != 0) || (tep != elp)) {
1545 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1546 		    SL_TRACE | SL_ERROR,
1547 		    "tl_close:inconsistency in AI hash"));
1548 	}
1549 
1550 	/*
1551 	 * Wait till close is safe, then mark endpoint as closing.
1552 	 */
1553 	mutex_enter(&tep->te_closelock);
1554 	while (tep->te_closewait)
1555 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1556 	tep->te_closing = B_TRUE;
1557 	/*
1558 	 * Will wait for the serializer part of the close to finish, so set
1559 	 * te_closewait now.
1560 	 */
1561 	tep->te_closewait = 1;
1562 	tep->te_nowsrv = B_FALSE;
1563 	mutex_exit(&tep->te_closelock);
1564 
1565 	/*
1566 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1567 	 * It is safe because close will wait for tl_close_ser to finish.
1568 	 */
1569 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1570 
1571 	/*
1572 	 * Wait for the first phase of close to complete before qprocsoff().
1573 	 */
1574 	mutex_enter(&tep->te_closelock);
1575 	while (tep->te_closewait)
1576 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1577 	mutex_exit(&tep->te_closelock);
1578 
1579 	qprocsoff(rq);
1580 
1581 	if (tep->te_bufcid) {
1582 		qunbufcall(rq, tep->te_bufcid);
1583 		tep->te_bufcid = 0;
1584 	}
1585 	if (tep->te_timoutid) {
1586 		(void) quntimeout(rq, tep->te_timoutid);
1587 		tep->te_timoutid = 0;
1588 	}
1589 
1590 	/*
1591 	 * Finish close behind serializer.
1592 	 *
1593 	 * For a CLTS endpoint increase a refcount and continue close processing
1594 	 * with serializer protection. This processing may happen asynchronously
1595 	 * with the completion of tl_close().
1596 	 *
1597 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1598 	 * may go away together with tep and we need to destroy serializer
1599 	 * outside of serializer context.
1600 	 */
1601 	ASSERT(tep->te_closewait == 0);
1602 	if (IS_COTS(tep))
1603 		tep->te_closewait = 1;
1604 	else
1605 		tl_refhold(tep);
1606 
1607 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1608 
1609 	/*
1610 	 * For connection-oriented transports wait for all serializer activity
1611 	 * to settle down.
1612 	 */
1613 	if (IS_COTS(tep)) {
1614 		mutex_enter(&tep->te_closelock);
1615 		while (tep->te_closewait)
1616 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1617 		mutex_exit(&tep->te_closelock);
1618 	}
1619 
1620 	crfree(tep->te_credp);
1621 	tep->te_credp = NULL;
1622 	tep->te_wq = NULL;
1623 	tl_refrele(tep);
1624 	/*
1625 	 * tep is likely to be destroyed now, so can't reference it any more.
1626 	 */
1627 
1628 	rq->q_ptr = wq->q_ptr = NULL;
1629 	return (0);
1630 }
1631 
1632 /*
1633  * First phase of close processing done behind the serializer.
1634  *
1635  * Do not drop the reference in the end - tl_close() wants this reference to
1636  * stay.
1637  */
1638 /* ARGSUSED0 */
1639 static void
1640 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1641 {
1642 	ASSERT(tep->te_closing);
1643 	ASSERT(tep->te_closewait == 1);
1644 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1645 
1646 	tep->te_flag |= TL_CLOSE_SER;
1647 
1648 	/*
1649 	 * Drain out all messages on queue except for TL_TICOTS where the
1650 	 * abortive release semantics permit discarding of data on close
1651 	 */
1652 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1653 		tl_wsrv_ser(NULL, tep);
1654 	}
1655 
1656 	/* Remove address from hash table. */
1657 	tl_addr_unbind(tep);
1658 	/*
1659 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1660 	 * queue of the driver, so clear these before qprocsoff() is called.
1661 	 * Also clear q_next for the peer since this queue is going away.
1662 	 */
1663 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1664 		tl_endpt_t *peer_tep = tep->te_conp;
1665 
1666 		tep->te_wq->q_next = NULL;
1667 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1668 			peer_tep->te_wq->q_next = NULL;
1669 	}
1670 
1671 	tep->te_rq = NULL;
1672 
1673 	/* wake up tl_close() */
1674 	tl_closeok(tep);
1675 	tl_serializer_exit(tep);
1676 }
1677 
1678 /*
1679  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1680  * the reference for CLTS.
1681  *
1682  * Called from serializer. Should drop reference count for CLTS only.
1683  */
1684 /* ARGSUSED0 */
1685 static void
1686 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1687 {
1688 	ASSERT(tep->te_closing);
1689 	IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1690 	IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1691 
1692 	tep->te_state = -1;	/* Uninitialized */
1693 	if (IS_COTS(tep)) {
1694 		tl_co_unconnect(tep);
1695 	} else {
1696 		/* Connectionless specific cleanup */
1697 		TL_REMOVE_PEER(tep->te_lastep);
1698 		/*
1699 		 * Backenable anybody that is flow controlled waiting for
1700 		 * this endpoint.
1701 		 */
1702 		tl_cl_backenable(tep);
1703 		if (tep->te_flowq != NULL) {
1704 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1705 			tep->te_flowq = NULL;
1706 		}
1707 	}
1708 
1709 	tl_serializer_exit(tep);
1710 	if (IS_COTS(tep))
1711 		tl_closeok(tep);
1712 	else
1713 		tl_refrele(tep);
1714 }
1715 
1716 /*
1717  * STREAMS write-side put procedure.
1718  * Enter serializer for most of the processing.
1719  *
1720  * The T_CONN_REQ is processed outside of serializer.
1721  */
1722 static int
1723 tl_wput(queue_t *wq, mblk_t *mp)
1724 {
1725 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1726 	ssize_t			msz = MBLKL(mp);
1727 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1728 	tlproc_t		*tl_proc = NULL;
1729 
1730 	switch (DB_TYPE(mp)) {
1731 	case M_DATA:
1732 		/* Only valid for connection-oriented transports */
1733 		if (IS_CLTS(tep)) {
1734 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1735 			    SL_TRACE | SL_ERROR,
1736 			    "tl_wput:M_DATA invalid for ticlts driver"));
1737 			tl_merror(wq, mp, EPROTO);
1738 			return (0);
1739 		}
1740 		tl_proc = tl_wput_data_ser;
1741 		break;
1742 
1743 	case M_IOCTL:
1744 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1745 		case TL_IOC_CREDOPT:
1746 			/* FALLTHROUGH */
1747 		case TL_IOC_UCREDOPT:
1748 			/*
1749 			 * Serialize endpoint state change.
1750 			 */
1751 			tl_proc = tl_do_ioctl_ser;
1752 			break;
1753 
1754 		default:
1755 			miocnak(wq, mp, 0, EINVAL);
1756 			return (0);
1757 		}
1758 		break;
1759 
1760 	case M_FLUSH:
1761 		/*
1762 		 * do canonical M_FLUSH processing
1763 		 */
1764 		if (*mp->b_rptr & FLUSHW) {
1765 			flushq(wq, FLUSHALL);
1766 			*mp->b_rptr &= ~FLUSHW;
1767 		}
1768 		if (*mp->b_rptr & FLUSHR) {
1769 			flushq(RD(wq), FLUSHALL);
1770 			qreply(wq, mp);
1771 		} else {
1772 			freemsg(mp);
1773 		}
1774 		return (0);
1775 
1776 	case M_PROTO:
1777 		if (msz < sizeof (prim->type)) {
1778 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1779 			    SL_TRACE | SL_ERROR,
1780 			    "tl_wput:M_PROTO data too short"));
1781 			tl_merror(wq, mp, EPROTO);
1782 			return (0);
1783 		}
1784 		switch (prim->type) {
1785 		case T_OPTMGMT_REQ:
1786 		case T_SVR4_OPTMGMT_REQ:
1787 			/*
1788 			 * Process TPI option management requests immediately
1789 			 * in put procedure regardless of in-order processing
1790 			 * of already queued messages.
1791 			 * (Note: This driver supports AF_UNIX socket
1792 			 * implementation.  Unless we implement this processing,
1793 			 * setsockopt() on socket endpoint will block on flow
1794 			 * controlled endpoints which it should not. That is
1795 			 * required for successful execution of VSU socket tests
1796 			 * and is consistent with BSD socket behavior).
1797 			 */
1798 			tl_optmgmt(wq, mp);
1799 			return (0);
1800 		case O_T_BIND_REQ:
1801 		case T_BIND_REQ:
1802 			tl_proc = tl_bind_ser;
1803 			break;
1804 		case T_CONN_REQ:
1805 			if (IS_CLTS(tep)) {
1806 				tl_merror(wq, mp, EPROTO);
1807 				return (0);
1808 			}
1809 			tl_conn_req(wq, mp);
1810 			return (0);
1811 		case T_DATA_REQ:
1812 		case T_OPTDATA_REQ:
1813 		case T_EXDATA_REQ:
1814 		case T_ORDREL_REQ:
1815 			tl_proc = tl_putq_ser;
1816 			break;
1817 		case T_UNITDATA_REQ:
1818 			if (IS_COTS(tep) ||
1819 			    (msz < sizeof (struct T_unitdata_req))) {
1820 				tl_merror(wq, mp, EPROTO);
1821 				return (0);
1822 			}
1823 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1824 				tl_proc = tl_unitdata_ser;
1825 			} else {
1826 				tl_proc = tl_putq_ser;
1827 			}
1828 			break;
1829 		default:
1830 			/*
1831 			 * process in service procedure if message already
1832 			 * queued (maintain in-order processing)
1833 			 */
1834 			if (wq->q_first != NULL) {
1835 				tl_proc = tl_putq_ser;
1836 			} else {
1837 				tl_proc = tl_wput_ser;
1838 			}
1839 			break;
1840 		}
1841 		break;
1842 
1843 	case M_PCPROTO:
1844 		/*
1845 		 * Check that the message has enough data to figure out TPI
1846 		 * primitive.
1847 		 */
1848 		if (msz < sizeof (prim->type)) {
1849 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1850 			    SL_TRACE | SL_ERROR,
1851 			    "tl_wput:M_PCROTO data too short"));
1852 			tl_merror(wq, mp, EPROTO);
1853 			return (0);
1854 		}
1855 		switch (prim->type) {
1856 		case T_CAPABILITY_REQ:
1857 			tl_capability_req(mp, tep);
1858 			return (0);
1859 		case T_INFO_REQ:
1860 			tl_proc = tl_info_req_ser;
1861 			break;
1862 		case T_ADDR_REQ:
1863 			tl_proc = tl_addr_req_ser;
1864 			break;
1865 
1866 		default:
1867 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1868 			    SL_TRACE | SL_ERROR,
1869 			    "tl_wput:unknown TPI msg primitive"));
1870 			tl_merror(wq, mp, EPROTO);
1871 			return (0);
1872 		}
1873 		break;
1874 	default:
1875 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
1876 		    "tl_wput:default:unexpected Streams message"));
1877 		freemsg(mp);
1878 		return (0);
1879 	}
1880 
1881 	/*
1882 	 * Continue processing via serializer.
1883 	 */
1884 	ASSERT(tl_proc != NULL);
1885 	tl_refhold(tep);
1886 	tl_serializer_enter(tep, tl_proc, mp);
1887 	return (0);
1888 }
1889 
1890 /*
1891  * Place message on the queue while preserving order.
1892  */
1893 static void
1894 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1895 {
1896 	if (tep->te_closing) {
1897 		tl_wput_ser(mp, tep);
1898 	} else {
1899 		TL_PUTQ(tep, mp);
1900 		tl_serializer_exit(tep);
1901 		tl_refrele(tep);
1902 	}
1903 
1904 }
1905 
1906 static void
1907 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1908 {
1909 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1910 
1911 	switch (DB_TYPE(mp)) {
1912 	case M_DATA:
1913 		tl_data(mp, tep);
1914 		break;
1915 	case M_PROTO:
1916 		tl_do_proto(mp, tep);
1917 		break;
1918 	default:
1919 		freemsg(mp);
1920 		break;
1921 	}
1922 }
1923 
1924 /*
1925  * Write side put procedure called from serializer.
1926  */
1927 static void
1928 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1929 {
1930 	tl_wput_common_ser(mp, tep);
1931 	tl_serializer_exit(tep);
1932 	tl_refrele(tep);
1933 }
1934 
1935 /*
1936  * M_DATA processing. Called from serializer.
1937  */
1938 static void
1939 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1940 {
1941 	tl_endpt_t	*peer_tep = tep->te_conp;
1942 	queue_t		*peer_rq;
1943 
1944 	ASSERT(DB_TYPE(mp) == M_DATA);
1945 	ASSERT(IS_COTS(tep));
1946 
1947 	IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1948 
1949 	/*
1950 	 * fastpath for data. Ignore flow control if tep is closing.
1951 	 */
1952 	if ((peer_tep != NULL) &&
1953 	    !peer_tep->te_closing &&
1954 	    ((tep->te_state == TS_DATA_XFER) ||
1955 	    (tep->te_state == TS_WREQ_ORDREL)) &&
1956 	    (tep->te_wq != NULL) &&
1957 	    (tep->te_wq->q_first == NULL) &&
1958 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1959 	    (peer_tep->te_state == TS_WREQ_ORDREL))	&&
1960 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1961 	    (canputnext(peer_rq) || tep->te_closing)) {
1962 		putnext(peer_rq, mp);
1963 	} else if (tep->te_closing) {
1964 		/*
1965 		 * It is possible that by the time we got here tep started to
1966 		 * close. If the write queue is not empty, and the state is
1967 		 * TS_DATA_XFER the data should be delivered in order, so we
1968 		 * call putq() instead of freeing the data.
1969 		 */
1970 		if ((tep->te_wq != NULL) &&
1971 		    ((tep->te_state == TS_DATA_XFER) ||
1972 		    (tep->te_state == TS_WREQ_ORDREL))) {
1973 			TL_PUTQ(tep, mp);
1974 		} else {
1975 			freemsg(mp);
1976 		}
1977 	} else {
1978 		TL_PUTQ(tep, mp);
1979 	}
1980 
1981 	tl_serializer_exit(tep);
1982 	tl_refrele(tep);
1983 }
1984 
1985 /*
1986  * Write side service routine.
1987  *
1988  * All actual processing happens within serializer which is entered
1989  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1990  * messages that need processing may have arrived, so tl_wsrv repeats until
1991  * queue is empty or te_nowsrv is set.
1992  */
1993 static int
1994 tl_wsrv(queue_t *wq)
1995 {
1996 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1997 
1998 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1999 		mutex_enter(&tep->te_srv_lock);
2000 		ASSERT(tep->te_wsrv_active == B_FALSE);
2001 		tep->te_wsrv_active = B_TRUE;
2002 		mutex_exit(&tep->te_srv_lock);
2003 
2004 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2005 
2006 		/*
2007 		 * Wait for serializer job to complete.
2008 		 */
2009 		mutex_enter(&tep->te_srv_lock);
2010 		while (tep->te_wsrv_active) {
2011 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2012 		}
2013 		cv_signal(&tep->te_srv_cv);
2014 		mutex_exit(&tep->te_srv_lock);
2015 	}
2016 	return (0);
2017 }
2018 
2019 /*
2020  * Serialized write side processing of the STREAMS queue.
2021  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2022  * is NULL.
2023  */
2024 static void
2025 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2026 {
2027 	mblk_t *mp;
2028 	queue_t *wq = tep->te_wq;
2029 
2030 	ASSERT(wq != NULL);
2031 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2032 		tl_wput_common_ser(mp, tep);
2033 	}
2034 
2035 	/*
2036 	 * Wakeup service routine unless called from close.
2037 	 * If ser_mp is specified, the caller is tl_wsrv().
2038 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2039 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2040 	 * be no matching tl_serializer_exit() in this case.
2041 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2042 	 * waiting on te_srv_cv.
2043 	 */
2044 	if (ser_mp != NULL) {
2045 		/*
2046 		 * We are called from tl_wsrv.
2047 		 */
2048 		mutex_enter(&tep->te_srv_lock);
2049 		ASSERT(tep->te_wsrv_active);
2050 		tep->te_wsrv_active = B_FALSE;
2051 		cv_signal(&tep->te_srv_cv);
2052 		mutex_exit(&tep->te_srv_lock);
2053 		tl_serializer_exit(tep);
2054 	}
2055 }
2056 
2057 /*
2058  * Called when the stream is backenabled. Enter serializer and qenable everyone
2059  * flow controlled by tep.
2060  *
2061  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2062  * is possible that two instances of tl_rsrv will be running reusing the same
2063  * rsrv mblk.
2064  */
2065 static int
2066 tl_rsrv(queue_t *rq)
2067 {
2068 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2069 
2070 	ASSERT(rq->q_first == NULL);
2071 	ASSERT(tep->te_rsrv_active == 0);
2072 
2073 	tep->te_rsrv_active = B_TRUE;
2074 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2075 	/*
2076 	 * Wait for serializer job to complete.
2077 	 */
2078 	mutex_enter(&tep->te_srv_lock);
2079 	while (tep->te_rsrv_active) {
2080 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2081 	}
2082 	cv_signal(&tep->te_srv_cv);
2083 	mutex_exit(&tep->te_srv_lock);
2084 	return (0);
2085 }
2086 
2087 /* ARGSUSED */
2088 static void
2089 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2090 {
2091 	tl_endpt_t *peer_tep;
2092 
2093 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2094 		tl_cl_backenable(tep);
2095 	} else if (
2096 	    IS_COTS(tep) &&
2097 	    ((peer_tep = tep->te_conp) != NULL) &&
2098 	    !peer_tep->te_closing &&
2099 	    ((tep->te_state == TS_DATA_XFER) ||
2100 	    (tep->te_state == TS_WIND_ORDREL)||
2101 	    (tep->te_state == TS_WREQ_ORDREL))) {
2102 		TL_QENABLE(peer_tep);
2103 	}
2104 
2105 	/*
2106 	 * Wakeup read side service routine.
2107 	 */
2108 	mutex_enter(&tep->te_srv_lock);
2109 	ASSERT(tep->te_rsrv_active);
2110 	tep->te_rsrv_active = B_FALSE;
2111 	cv_signal(&tep->te_srv_cv);
2112 	mutex_exit(&tep->te_srv_lock);
2113 	tl_serializer_exit(tep);
2114 }
2115 
2116 /*
2117  * process M_PROTO messages. Always called from serializer.
2118  */
2119 static void
2120 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2121 {
2122 	ssize_t			msz = MBLKL(mp);
2123 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2124 
2125 	/* Message size was validated by tl_wput(). */
2126 	ASSERT(msz >= sizeof (prim->type));
2127 
2128 	switch (prim->type) {
2129 	case T_UNBIND_REQ:
2130 		tl_unbind(mp, tep);
2131 		break;
2132 
2133 	case T_ADDR_REQ:
2134 		tl_addr_req(mp, tep);
2135 		break;
2136 
2137 	case O_T_CONN_RES:
2138 	case T_CONN_RES:
2139 		if (IS_CLTS(tep)) {
2140 			tl_merror(tep->te_wq, mp, EPROTO);
2141 			break;
2142 		}
2143 		tl_conn_res(mp, tep);
2144 		break;
2145 
2146 	case T_DISCON_REQ:
2147 		if (IS_CLTS(tep)) {
2148 			tl_merror(tep->te_wq, mp, EPROTO);
2149 			break;
2150 		}
2151 		tl_discon_req(mp, tep);
2152 		break;
2153 
2154 	case T_DATA_REQ:
2155 		if (IS_CLTS(tep)) {
2156 			tl_merror(tep->te_wq, mp, EPROTO);
2157 			break;
2158 		}
2159 		tl_data(mp, tep);
2160 		break;
2161 
2162 	case T_OPTDATA_REQ:
2163 		if (IS_CLTS(tep)) {
2164 			tl_merror(tep->te_wq, mp, EPROTO);
2165 			break;
2166 		}
2167 		tl_data(mp, tep);
2168 		break;
2169 
2170 	case T_EXDATA_REQ:
2171 		if (IS_CLTS(tep)) {
2172 			tl_merror(tep->te_wq, mp, EPROTO);
2173 			break;
2174 		}
2175 		tl_exdata(mp, tep);
2176 		break;
2177 
2178 	case T_ORDREL_REQ:
2179 		if (!IS_COTSORD(tep)) {
2180 			tl_merror(tep->te_wq, mp, EPROTO);
2181 			break;
2182 		}
2183 		tl_ordrel(mp, tep);
2184 		break;
2185 
2186 	case T_UNITDATA_REQ:
2187 		if (IS_COTS(tep)) {
2188 			tl_merror(tep->te_wq, mp, EPROTO);
2189 			break;
2190 		}
2191 		tl_unitdata(mp, tep);
2192 		break;
2193 
2194 	default:
2195 		tl_merror(tep->te_wq, mp, EPROTO);
2196 		break;
2197 	}
2198 }
2199 
2200 /*
2201  * Process ioctl from serializer.
2202  * This is a wrapper around tl_do_ioctl().
2203  */
2204 static void
2205 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2206 {
2207 	if (!tep->te_closing)
2208 		tl_do_ioctl(mp, tep);
2209 	else
2210 		freemsg(mp);
2211 
2212 	tl_serializer_exit(tep);
2213 	tl_refrele(tep);
2214 }
2215 
2216 static void
2217 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2218 {
2219 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2220 	int cmd = iocbp->ioc_cmd;
2221 	queue_t *wq = tep->te_wq;
2222 	int error;
2223 	int thisopt, otheropt;
2224 
2225 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2226 
2227 	switch (cmd) {
2228 	case TL_IOC_CREDOPT:
2229 		if (cmd == TL_IOC_CREDOPT) {
2230 			thisopt = TL_SETCRED;
2231 			otheropt = TL_SETUCRED;
2232 		} else {
2233 			/* FALLTHROUGH */
2234 	case TL_IOC_UCREDOPT:
2235 			thisopt = TL_SETUCRED;
2236 			otheropt = TL_SETCRED;
2237 		}
2238 		/*
2239 		 * The credentials passing does not apply to sockets.
2240 		 * Only one of the cred options can be set at a given time.
2241 		 */
2242 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2243 			miocnak(wq, mp, 0, EINVAL);
2244 			return;
2245 		}
2246 
2247 		/*
2248 		 * Turn on generation of credential options for
2249 		 * T_conn_req, T_conn_con, T_unidata_ind.
2250 		 */
2251 		error = miocpullup(mp, sizeof (uint32_t));
2252 		if (error != 0) {
2253 			miocnak(wq, mp, 0, error);
2254 			return;
2255 		}
2256 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2257 			miocnak(wq, mp, 0, EINVAL);
2258 			return;
2259 		}
2260 
2261 		if (*(uint32_t *)mp->b_cont->b_rptr)
2262 			tep->te_flag |= thisopt;
2263 		else
2264 			tep->te_flag &= ~thisopt;
2265 
2266 		miocack(wq, mp, 0, 0);
2267 		break;
2268 
2269 	default:
2270 		/* Should not be here */
2271 		miocnak(wq, mp, 0, EINVAL);
2272 		break;
2273 	}
2274 }
2275 
2276 
2277 /*
2278  * send T_ERROR_ACK
2279  * Note: assumes enough memory or caller passed big enough mp
2280  *	- no recovery from allocb failures
2281  */
2282 
2283 static void
2284 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2285     t_scalar_t unix_err, t_scalar_t type)
2286 {
2287 	struct T_error_ack *err_ack;
2288 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2289 	    M_PCPROTO, T_ERROR_ACK);
2290 
2291 	if (ackmp == NULL) {
2292 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE | SL_ERROR,
2293 		    "tl_error_ack:out of mblk memory"));
2294 		tl_merror(wq, NULL, ENOSR);
2295 		return;
2296 	}
2297 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2298 	err_ack->ERROR_prim = type;
2299 	err_ack->TLI_error = tli_err;
2300 	err_ack->UNIX_error = unix_err;
2301 
2302 	/*
2303 	 * send error ack message
2304 	 */
2305 	qreply(wq, ackmp);
2306 }
2307 
2308 
2309 
2310 /*
2311  * send T_OK_ACK
2312  * Note: assumes enough memory or caller passed big enough mp
2313  *	- no recovery from allocb failures
2314  */
2315 static void
2316 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2317 {
2318 	struct T_ok_ack *ok_ack;
2319 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2320 	    M_PCPROTO, T_OK_ACK);
2321 
2322 	if (ackmp == NULL) {
2323 		tl_merror(wq, NULL, ENOMEM);
2324 		return;
2325 	}
2326 
2327 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2328 	ok_ack->CORRECT_prim = type;
2329 
2330 	(void) qreply(wq, ackmp);
2331 }
2332 
2333 /*
2334  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2335  * This is a wrapper around tl_bind().
2336  */
2337 static void
2338 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2339 {
2340 	if (!tep->te_closing)
2341 		tl_bind(mp, tep);
2342 	else
2343 		freemsg(mp);
2344 
2345 	tl_serializer_exit(tep);
2346 	tl_refrele(tep);
2347 }
2348 
2349 /*
2350  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2351  * Assumes that the endpoint is in the unbound.
2352  */
2353 static void
2354 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2355 {
2356 	queue_t			*wq = tep->te_wq;
2357 	struct T_bind_ack	*b_ack;
2358 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2359 	mblk_t			*ackmp, *bamp;
2360 	soux_addr_t		ux_addr;
2361 	t_uscalar_t		qlen = 0;
2362 	t_scalar_t		alen, aoff;
2363 	tl_addr_t		addr_req;
2364 	void			*addr_startp;
2365 	ssize_t			msz = MBLKL(mp), basize;
2366 	t_scalar_t		tli_err = 0, unix_err = 0;
2367 	t_scalar_t		save_prim_type = bind->PRIM_type;
2368 	t_scalar_t		save_state = tep->te_state;
2369 
2370 	if (tep->te_state != TS_UNBND) {
2371 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2372 		    SL_TRACE | SL_ERROR,
2373 		    "tl_wput:bind_request:out of state, state=%d",
2374 		    tep->te_state));
2375 		tli_err = TOUTSTATE;
2376 		goto error;
2377 	}
2378 
2379 	if (msz < sizeof (struct T_bind_req)) {
2380 		tli_err = TSYSERR;
2381 		unix_err = EINVAL;
2382 		goto error;
2383 	}
2384 
2385 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2386 
2387 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2388 	    (bind->PRIM_type == T_BIND_REQ));
2389 
2390 	alen = bind->ADDR_length;
2391 	aoff = bind->ADDR_offset;
2392 
2393 	/* negotiate max conn req pending */
2394 	if (IS_COTS(tep)) {
2395 		qlen = bind->CONIND_number;
2396 		if (qlen > tl_maxqlen)
2397 			qlen = tl_maxqlen;
2398 	}
2399 
2400 	/*
2401 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2402 	 * and bound again.
2403 	 */
2404 	if ((tep->te_hash_hndl == NULL) &&
2405 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2406 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2407 	    &tep->te_hash_hndl) != 0) {
2408 		tli_err = TSYSERR;
2409 		unix_err = ENOSR;
2410 		goto error;
2411 	}
2412 
2413 	/*
2414 	 * Verify address correctness.
2415 	 */
2416 	if (IS_SOCKET(tep)) {
2417 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2418 
2419 		if ((alen != TL_SOUX_ADDRLEN) ||
2420 		    (aoff < 0) ||
2421 		    (aoff + alen > msz)) {
2422 			(void) (STRLOG(TL_ID, tep->te_minor,
2423 			    1, SL_TRACE | SL_ERROR,
2424 			    "tl_bind: invalid socket addr"));
2425 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2426 			tli_err = TSYSERR;
2427 			unix_err = EINVAL;
2428 			goto error;
2429 		}
2430 		/* Copy address from message to local buffer. */
2431 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2432 		/*
2433 		 * Check that we got correct address from sockets
2434 		 */
2435 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2436 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2437 			(void) (STRLOG(TL_ID, tep->te_minor,
2438 			    1, SL_TRACE | SL_ERROR,
2439 			    "tl_bind: invalid socket magic"));
2440 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2441 			tli_err = TSYSERR;
2442 			unix_err = EINVAL;
2443 			goto error;
2444 		}
2445 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2446 		    (ux_addr.soua_vp != NULL)) {
2447 			(void) (STRLOG(TL_ID, tep->te_minor,
2448 			    1, SL_TRACE | SL_ERROR,
2449 			    "tl_bind: implicit addr non-empty"));
2450 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451 			tli_err = TSYSERR;
2452 			unix_err = EINVAL;
2453 			goto error;
2454 		}
2455 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2456 		    (ux_addr.soua_vp == NULL)) {
2457 			(void) (STRLOG(TL_ID, tep->te_minor,
2458 			    1, SL_TRACE | SL_ERROR,
2459 			    "tl_bind: explicit addr empty"));
2460 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2461 			tli_err = TSYSERR;
2462 			unix_err = EINVAL;
2463 			goto error;
2464 		}
2465 	} else {
2466 		if ((alen > 0) && ((aoff < 0) ||
2467 		    ((ssize_t)(aoff + alen) > msz) ||
2468 		    ((aoff + alen) < 0))) {
2469 			(void) (STRLOG(TL_ID, tep->te_minor,
2470 			    1, SL_TRACE | SL_ERROR,
2471 			    "tl_bind: invalid message"));
2472 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2473 			tli_err = TSYSERR;
2474 			unix_err = EINVAL;
2475 			goto error;
2476 		}
2477 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2478 			(void) (STRLOG(TL_ID, tep->te_minor,
2479 			    1, SL_TRACE | SL_ERROR,
2480 			    "tl_bind: bad addr in  message"));
2481 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2482 			tli_err = TBADADDR;
2483 			goto error;
2484 		}
2485 #ifdef DEBUG
2486 		/*
2487 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2488 		 * if (!assertion)
2489 		 *	log warning;
2490 		 */
2491 		if (!((alen == 0 && aoff == 0) ||
2492 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2493 			(void) (STRLOG(TL_ID, tep->te_minor,
2494 				    3, SL_TRACE | SL_ERROR,
2495 				    "tl_bind: addr overlaps TPI message"));
2496 		}
2497 #endif
2498 	}
2499 
2500 	/*
2501 	 * Bind the address provided or allocate one if requested.
2502 	 * Allow rebinds with a new qlen value.
2503 	 */
2504 	if (IS_SOCKET(tep)) {
2505 		/*
2506 		 * For anonymous requests the te_ap is already set up properly
2507 		 * so use minor number as an address.
2508 		 * For explicit requests need to check whether the address is
2509 		 * already in use.
2510 		 */
2511 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2512 			int rc;
2513 
2514 			if (tep->te_flag & TL_ADDRHASHED) {
2515 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2516 				if (tep->te_vp == ux_addr.soua_vp)
2517 					goto skip_addr_bind;
2518 				else /* Rebind to a new address. */
2519 					tl_addr_unbind(tep);
2520 			}
2521 			/*
2522 			 * Insert address in the hash if it is not already
2523 			 * there.  Since we use preallocated handle, the insert
2524 			 * can fail only if the key is already present.
2525 			 */
2526 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2527 			    (mod_hash_key_t)ux_addr.soua_vp,
2528 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2529 
2530 			if (rc != 0) {
2531 				ASSERT(rc == MH_ERR_DUPLICATE);
2532 				/*
2533 				 * Violate O_T_BIND_REQ semantics and fail with
2534 				 * TADDRBUSY - sockets will not use any address
2535 				 * other than supplied one for explicit binds.
2536 				 */
2537 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2538 				    SL_TRACE | SL_ERROR,
2539 				    "tl_bind:requested addr %p is busy",
2540 				    ux_addr.soua_vp));
2541 				tli_err = TADDRBUSY;
2542 				unix_err = 0;
2543 				goto error;
2544 			}
2545 			tep->te_uxaddr = ux_addr;
2546 			tep->te_flag |= TL_ADDRHASHED;
2547 			tep->te_hash_hndl = NULL;
2548 		}
2549 	} else if (alen == 0) {
2550 		/*
2551 		 * assign any free address
2552 		 */
2553 		if (!tl_get_any_addr(tep, NULL)) {
2554 			(void) (STRLOG(TL_ID, tep->te_minor,
2555 			    1, SL_TRACE | SL_ERROR,
2556 			    "tl_bind:failed to get buffer for any "
2557 			    "address"));
2558 			tli_err = TSYSERR;
2559 			unix_err = ENOSR;
2560 			goto error;
2561 		}
2562 	} else {
2563 		addr_req.ta_alen = alen;
2564 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2565 		addr_req.ta_zoneid = tep->te_zoneid;
2566 
2567 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2568 		if (tep->te_abuf == NULL) {
2569 			tli_err = TSYSERR;
2570 			unix_err = ENOSR;
2571 			goto error;
2572 		}
2573 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2574 		tep->te_alen = alen;
2575 
2576 		if (mod_hash_insert_reserve(tep->te_addrhash,
2577 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2578 		    tep->te_hash_hndl) != 0) {
2579 			if (save_prim_type == T_BIND_REQ) {
2580 				/*
2581 				 * The bind semantics for this primitive
2582 				 * require a failure if the exact address
2583 				 * requested is busy
2584 				 */
2585 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2586 				    SL_TRACE | SL_ERROR,
2587 				    "tl_bind:requested addr is busy"));
2588 				tli_err = TADDRBUSY;
2589 				unix_err = 0;
2590 				goto error;
2591 			}
2592 
2593 			/*
2594 			 * O_T_BIND_REQ semantics say if address if requested
2595 			 * address is busy, bind to any available free address
2596 			 */
2597 			if (!tl_get_any_addr(tep, &addr_req)) {
2598 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2599 				    SL_TRACE | SL_ERROR,
2600 				    "tl_bind:unable to get any addr buf"));
2601 				tli_err = TSYSERR;
2602 				unix_err = ENOMEM;
2603 				goto error;
2604 			}
2605 		} else {
2606 			tep->te_flag |= TL_ADDRHASHED;
2607 			tep->te_hash_hndl = NULL;
2608 		}
2609 	}
2610 
2611 	ASSERT(tep->te_alen >= 0);
2612 
2613 skip_addr_bind:
2614 	/*
2615 	 * prepare T_BIND_ACK TPI message
2616 	 */
2617 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2618 	bamp = reallocb(mp, basize, 0);
2619 	if (bamp == NULL) {
2620 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2621 		    "tl_wput:tl_bind: allocb failed"));
2622 		/*
2623 		 * roll back state changes
2624 		 */
2625 		tl_addr_unbind(tep);
2626 		tep->te_state = TS_UNBND;
2627 		tl_memrecover(wq, mp, basize);
2628 		return;
2629 	}
2630 
2631 	DB_TYPE(bamp) = M_PCPROTO;
2632 	bamp->b_wptr = bamp->b_rptr + basize;
2633 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2634 	b_ack->PRIM_type = T_BIND_ACK;
2635 	b_ack->CONIND_number = qlen;
2636 	b_ack->ADDR_length = tep->te_alen;
2637 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2638 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2639 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2640 
2641 	if (IS_COTS(tep)) {
2642 		tep->te_qlen = qlen;
2643 		if (qlen > 0)
2644 			tep->te_flag |= TL_LISTENER;
2645 	}
2646 
2647 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2648 	/*
2649 	 * send T_BIND_ACK message
2650 	 */
2651 	(void) qreply(wq, bamp);
2652 	return;
2653 
2654 error:
2655 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2656 	if (ackmp == NULL) {
2657 		/*
2658 		 * roll back state changes
2659 		 */
2660 		tep->te_state = save_state;
2661 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2662 		return;
2663 	}
2664 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2665 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2666 }
2667 
2668 /*
2669  * Process T_UNBIND_REQ.
2670  * Called from serializer.
2671  */
2672 static void
2673 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2674 {
2675 	queue_t *wq;
2676 	mblk_t *ackmp;
2677 
2678 	if (tep->te_closing) {
2679 		freemsg(mp);
2680 		return;
2681 	}
2682 
2683 	wq = tep->te_wq;
2684 
2685 	/*
2686 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2687 	 * ==> allocate for T_ERROR_ACK (known max)
2688 	 */
2689 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2690 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2691 		return;
2692 	}
2693 	/*
2694 	 * memory resources committed
2695 	 * Note: no message validation. T_UNBIND_REQ message is
2696 	 * same size as PRIM_type field so already verified earlier.
2697 	 */
2698 
2699 	/*
2700 	 * validate state
2701 	 */
2702 	if (tep->te_state != TS_IDLE) {
2703 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2704 		    SL_TRACE | SL_ERROR,
2705 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2706 		    tep->te_state));
2707 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2708 		return;
2709 	}
2710 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2711 
2712 	/*
2713 	 * TPI says on T_UNBIND_REQ:
2714 	 *    send up a M_FLUSH to flush both
2715 	 *    read and write queues
2716 	 */
2717 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2718 
2719 	if (!IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2720 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2721 
2722 		/*
2723 		 * Sockets use bind with qlen==0 followed by bind() to
2724 		 * the same address with qlen > 0 for listeners.
2725 		 * We allow rebind with a new qlen value.
2726 		 */
2727 		tl_addr_unbind(tep);
2728 	}
2729 
2730 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2731 	/*
2732 	 * send  T_OK_ACK
2733 	 */
2734 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2735 }
2736 
2737 
2738 /*
2739  * Option management code from drv/ip is used here
2740  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2741  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2742  *	However, that is what we want as that option is 'unorthodox'
2743  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2744  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2745  * Note2: use of optcom_req means this routine is an exception to
2746  *	 recovery from allocb() failures.
2747  */
2748 
2749 static void
2750 tl_optmgmt(queue_t *wq, mblk_t *mp)
2751 {
2752 	tl_endpt_t *tep;
2753 	mblk_t *ackmp;
2754 	union T_primitives *prim;
2755 	cred_t *cr;
2756 
2757 	tep = (tl_endpt_t *)wq->q_ptr;
2758 	prim = (union T_primitives *)mp->b_rptr;
2759 
2760 	/*
2761 	 * All Solaris components should pass a db_credp
2762 	 * for this TPI message, hence we ASSERT.
2763 	 * But in case there is some other M_PROTO that looks
2764 	 * like a TPI message sent by some other kernel
2765 	 * component, we check and return an error.
2766 	 */
2767 	cr = msg_getcred(mp, NULL);
2768 	ASSERT(cr != NULL);
2769 	if (cr == NULL) {
2770 		tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2771 		return;
2772 	}
2773 
2774 	/*  all states OK for AF_UNIX options ? */
2775 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2776 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2777 		/*
2778 		 * Broken TLI semantics that options can only be managed
2779 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2780 		 * tests this TLI (mis)feature using this device driver.
2781 		 */
2782 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2783 		    SL_TRACE | SL_ERROR,
2784 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2785 		    tep->te_state));
2786 		/*
2787 		 * preallocate memory for T_ERROR_ACK
2788 		 */
2789 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2790 		if (ackmp == NULL) {
2791 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2792 			return;
2793 		}
2794 
2795 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2796 		freemsg(mp);
2797 		return;
2798 	}
2799 
2800 	/*
2801 	 * call common option management routine from drv/ip
2802 	 */
2803 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2804 		svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2805 	} else {
2806 		ASSERT(prim->type == T_OPTMGMT_REQ);
2807 		tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2808 	}
2809 }
2810 
2811 /*
2812  * Handle T_conn_req - the driver part of accept().
2813  * If TL_SET[U]CRED generate the credentials options.
2814  * If this is a socket pass through options unmodified.
2815  * For sockets generate the T_CONN_CON here instead of
2816  * waiting for the T_CONN_RES.
2817  */
2818 static void
2819 tl_conn_req(queue_t *wq, mblk_t *mp)
2820 {
2821 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2822 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2823 	ssize_t			msz = MBLKL(mp);
2824 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2825 	tl_endpt_t		*peer_tep = NULL;
2826 	mblk_t			*ackmp;
2827 	mblk_t			*dimp;
2828 	struct T_discon_ind	*di;
2829 	soux_addr_t		ux_addr;
2830 	tl_addr_t		dst;
2831 
2832 	ASSERT(IS_COTS(tep));
2833 
2834 	if (tep->te_closing) {
2835 		freemsg(mp);
2836 		return;
2837 	}
2838 
2839 	/*
2840 	 * preallocate memory for:
2841 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2842 	 *	==> known max T_ERROR_ACK
2843 	 * 2. max of T_DISCON_IND and T_CONN_IND
2844 	 */
2845 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2846 	if (ackmp == NULL) {
2847 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2848 		return;
2849 	}
2850 	/*
2851 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2852 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2853 	 */
2854 
2855 	if (tep->te_state != TS_IDLE) {
2856 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2857 		    SL_TRACE | SL_ERROR,
2858 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2859 		    tep->te_state));
2860 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2861 		freemsg(mp);
2862 		return;
2863 	}
2864 
2865 	/*
2866 	 * validate the message
2867 	 * Note: dereference fields in struct inside message only
2868 	 * after validating the message length.
2869 	 */
2870 	if (msz < sizeof (struct T_conn_req)) {
2871 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2872 		    "tl_conn_req:invalid message length"));
2873 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2874 		freemsg(mp);
2875 		return;
2876 	}
2877 	alen = creq->DEST_length;
2878 	aoff = creq->DEST_offset;
2879 	olen = creq->OPT_length;
2880 	ooff = creq->OPT_offset;
2881 	if (olen == 0)
2882 		ooff = 0;
2883 
2884 	if (IS_SOCKET(tep)) {
2885 		if ((alen != TL_SOUX_ADDRLEN) ||
2886 		    (aoff < 0) ||
2887 		    (aoff + alen > msz) ||
2888 		    (alen > msz - sizeof (struct T_conn_req))) {
2889 			(void) (STRLOG(TL_ID, tep->te_minor,
2890 				    1, SL_TRACE | SL_ERROR,
2891 				    "tl_conn_req: invalid socket addr"));
2892 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2893 			freemsg(mp);
2894 			return;
2895 		}
2896 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2897 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2898 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2899 			(void) (STRLOG(TL_ID, tep->te_minor,
2900 			    1, SL_TRACE | SL_ERROR,
2901 			    "tl_conn_req: invalid socket magic"));
2902 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2903 			freemsg(mp);
2904 			return;
2905 		}
2906 	} else {
2907 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2908 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2909 		    ooff + olen < 0)) ||
2910 		    olen < 0 || ooff < 0) {
2911 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2912 			    SL_TRACE | SL_ERROR,
2913 			    "tl_conn_req:invalid message"));
2914 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2915 			freemsg(mp);
2916 			return;
2917 		}
2918 
2919 		if (alen <= 0 || aoff < 0 ||
2920 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2921 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2922 				    SL_TRACE | SL_ERROR,
2923 				    "tl_conn_req:bad addr in message, "
2924 				    "alen=%d, msz=%ld",
2925 				    alen, msz));
2926 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2927 			freemsg(mp);
2928 			return;
2929 		}
2930 #ifdef DEBUG
2931 		/*
2932 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2933 		 * if (!assertion)
2934 		 *	log warning;
2935 		 */
2936 		if (!(aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2937 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2938 			    SL_TRACE | SL_ERROR,
2939 			    "tl_conn_req: addr overlaps TPI message"));
2940 		}
2941 #endif
2942 		if (olen) {
2943 			/*
2944 			 * no opts in connect req
2945 			 * supported in this provider except for sockets.
2946 			 */
2947 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2948 			    SL_TRACE | SL_ERROR,
2949 			    "tl_conn_req:options not supported "
2950 			    "in message"));
2951 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2952 			freemsg(mp);
2953 			return;
2954 		}
2955 	}
2956 
2957 	/*
2958 	 * Prevent tep from closing on us.
2959 	 */
2960 	if (!tl_noclose(tep)) {
2961 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2962 		    "tl_conn_req:endpoint is closing"));
2963 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2964 		freemsg(mp);
2965 		return;
2966 	}
2967 
2968 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2969 	/*
2970 	 * get endpoint to connect to
2971 	 * check that peer with DEST addr is bound to addr
2972 	 * and has CONIND_number > 0
2973 	 */
2974 	dst.ta_alen = alen;
2975 	dst.ta_abuf = mp->b_rptr + aoff;
2976 	dst.ta_zoneid = tep->te_zoneid;
2977 
2978 	/*
2979 	 * Verify if remote addr is in use
2980 	 */
2981 	peer_tep = (IS_SOCKET(tep) ?
2982 	    tl_sock_find_peer(tep, &ux_addr) :
2983 	    tl_find_peer(tep, &dst));
2984 
2985 	if (peer_tep == NULL) {
2986 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2987 		    "tl_conn_req:no one at connect address"));
2988 		err = ECONNREFUSED;
2989 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2990 		/*
2991 		 * validate that number of incoming connection is
2992 		 * not to capacity on destination endpoint
2993 		 */
2994 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2995 		    "tl_conn_req: qlen overflow connection refused"));
2996 		err = ECONNREFUSED;
2997 	}
2998 
2999 	/*
3000 	 * Send T_DISCON_IND in case of error
3001 	 */
3002 	if (err != 0) {
3003 		if (peer_tep != NULL)
3004 			tl_refrele(peer_tep);
3005 		/* We are still expected to send T_OK_ACK */
3006 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3007 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
3008 		tl_closeok(tep);
3009 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
3010 		    M_PROTO, T_DISCON_IND);
3011 		if (dimp == NULL) {
3012 			tl_merror(wq, NULL, ENOSR);
3013 			return;
3014 		}
3015 		di = (struct T_discon_ind *)dimp->b_rptr;
3016 		di->DISCON_reason = err;
3017 		di->SEQ_number = BADSEQNUM;
3018 
3019 		tep->te_state = TS_IDLE;
3020 		/*
3021 		 * send T_DISCON_IND message
3022 		 */
3023 		putnext(tep->te_rq, dimp);
3024 		return;
3025 	}
3026 
3027 	ASSERT(IS_COTS(peer_tep));
3028 
3029 	/*
3030 	 * Found the listener. At this point processing will continue on
3031 	 * listener serializer. Close of the endpoint should be blocked while we
3032 	 * switch serializers.
3033 	 */
3034 	tl_serializer_refhold(peer_tep->te_ser);
3035 	tl_serializer_refrele(tep->te_ser);
3036 	tep->te_ser = peer_tep->te_ser;
3037 	ASSERT(tep->te_oconp == NULL);
3038 	tep->te_oconp = peer_tep;
3039 
3040 	/*
3041 	 * It is safe to close now. Close may continue on listener serializer.
3042 	 */
3043 	tl_closeok(tep);
3044 
3045 	/*
3046 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3047 	 * data, so we link mp to ackmp.
3048 	 */
3049 	ackmp->b_cont = mp;
3050 	mp = ackmp;
3051 
3052 	tl_refhold(tep);
3053 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3054 }
3055 
3056 /*
3057  * Finish T_CONN_REQ processing on listener serializer.
3058  */
3059 static void
3060 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3061 {
3062 	queue_t		*wq;
3063 	tl_endpt_t	*peer_tep = tep->te_oconp;
3064 	mblk_t		*confmp, *cimp, *indmp;
3065 	void		*opts = NULL;
3066 	mblk_t		*ackmp = mp;
3067 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3068 	struct T_conn_ind	*ci;
3069 	tl_icon_t	*tip;
3070 	void		*addr_startp;
3071 	t_scalar_t	olen = creq->OPT_length;
3072 	t_scalar_t	ooff = creq->OPT_offset;
3073 	size_t		ci_msz;
3074 	size_t		size;
3075 	cred_t		*cr = NULL;
3076 	pid_t		cpid;
3077 
3078 	if (tep->te_closing) {
3079 		TL_UNCONNECT(tep->te_oconp);
3080 		tl_serializer_exit(tep);
3081 		tl_refrele(tep);
3082 		freemsg(mp);
3083 		return;
3084 	}
3085 
3086 	wq = tep->te_wq;
3087 	tep->te_flag |= TL_EAGER;
3088 
3089 	/*
3090 	 * Extract preallocated ackmp from mp.
3091 	 */
3092 	mp = mp->b_cont;
3093 	ackmp->b_cont = NULL;
3094 
3095 	if (olen == 0)
3096 		ooff = 0;
3097 
3098 	if (peer_tep->te_closing ||
3099 	    !((peer_tep->te_state == TS_IDLE) ||
3100 	    (peer_tep->te_state == TS_WRES_CIND))) {
3101 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3102 		    "tl_conn_req:peer in bad state (%d)",
3103 		    peer_tep->te_state));
3104 		TL_UNCONNECT(tep->te_oconp);
3105 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3106 		freemsg(ackmp);
3107 		tl_serializer_exit(tep);
3108 		tl_refrele(tep);
3109 		return;
3110 	}
3111 
3112 	/*
3113 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3114 	 */
3115 	/*
3116 	 * calculate length of T_CONN_IND message
3117 	 */
3118 	if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3119 		cr = msg_getcred(mp, &cpid);
3120 		ASSERT(cr != NULL);
3121 		if (peer_tep->te_flag & TL_SETCRED) {
3122 			ooff = 0;
3123 			olen = (t_scalar_t) sizeof (struct opthdr) +
3124 			    OPTLEN(sizeof (tl_credopt_t));
3125 			/* 1 option only */
3126 		} else {
3127 			ooff = 0;
3128 			olen = (t_scalar_t)sizeof (struct opthdr) +
3129 			    OPTLEN(ucredminsize(cr));
3130 			/* 1 option only */
3131 		}
3132 	}
3133 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3134 	ci_msz = T_ALIGN(ci_msz) + olen;
3135 	size = max(ci_msz, sizeof (struct T_discon_ind));
3136 
3137 	/*
3138 	 * Save options from mp - we'll need them for T_CONN_IND.
3139 	 */
3140 	if (ooff != 0) {
3141 		opts = kmem_alloc(olen, KM_NOSLEEP);
3142 		if (opts == NULL) {
3143 			/*
3144 			 * roll back state changes
3145 			 */
3146 			tep->te_state = TS_IDLE;
3147 			tl_memrecover(wq, mp, size);
3148 			freemsg(ackmp);
3149 			TL_UNCONNECT(tep->te_oconp);
3150 			tl_serializer_exit(tep);
3151 			tl_refrele(tep);
3152 			return;
3153 		}
3154 		/* Copy options to a temp buffer */
3155 		bcopy(mp->b_rptr + ooff, opts, olen);
3156 	}
3157 
3158 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3159 		/*
3160 		 * Generate a T_CONN_CON that has the identical address
3161 		 * (and options) as the T_CONN_REQ.
3162 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3163 		 * are isomorphic.
3164 		 */
3165 		confmp = copyb(mp);
3166 		if (confmp == NULL) {
3167 			/*
3168 			 * roll back state changes
3169 			 */
3170 			tep->te_state = TS_IDLE;
3171 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3172 			freemsg(ackmp);
3173 			if (opts != NULL)
3174 				kmem_free(opts, olen);
3175 			TL_UNCONNECT(tep->te_oconp);
3176 			tl_serializer_exit(tep);
3177 			tl_refrele(tep);
3178 			return;
3179 		}
3180 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3181 		    T_CONN_CON;
3182 	} else {
3183 		confmp = NULL;
3184 	}
3185 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3186 		/*
3187 		 * roll back state changes
3188 		 */
3189 		tep->te_state = TS_IDLE;
3190 		tl_memrecover(wq, mp, size);
3191 		freemsg(ackmp);
3192 		if (opts != NULL)
3193 			kmem_free(opts, olen);
3194 		freemsg(confmp);
3195 		TL_UNCONNECT(tep->te_oconp);
3196 		tl_serializer_exit(tep);
3197 		tl_refrele(tep);
3198 		return;
3199 	}
3200 
3201 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3202 	if (tip == NULL) {
3203 		/*
3204 		 * roll back state changes
3205 		 */
3206 		tep->te_state = TS_IDLE;
3207 		tl_memrecover(wq, indmp, sizeof (*tip));
3208 		freemsg(ackmp);
3209 		if (opts != NULL)
3210 			kmem_free(opts, olen);
3211 		freemsg(confmp);
3212 		TL_UNCONNECT(tep->te_oconp);
3213 		tl_serializer_exit(tep);
3214 		tl_refrele(tep);
3215 		return;
3216 	}
3217 	tip->ti_mp = NULL;
3218 
3219 	/*
3220 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3221 	 * and tl_icon_t cell.
3222 	 */
3223 
3224 	/*
3225 	 * ack validity of request and send the peer credential in the ACK.
3226 	 */
3227 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3228 
3229 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3230 	    confmp != NULL) {
3231 		mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3232 	}
3233 
3234 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3235 
3236 	/*
3237 	 * prepare message to send T_CONN_IND
3238 	 */
3239 	/*
3240 	 * allocate the message - original data blocks retained
3241 	 * in the returned mblk
3242 	 */
3243 	cimp = tl_resizemp(indmp, size);
3244 	if (cimp == NULL) {
3245 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3246 		    "tl_conn_req:con_ind:allocb failure"));
3247 		tl_merror(wq, indmp, ENOMEM);
3248 		TL_UNCONNECT(tep->te_oconp);
3249 		tl_serializer_exit(tep);
3250 		tl_refrele(tep);
3251 		if (opts != NULL)
3252 			kmem_free(opts, olen);
3253 		freemsg(confmp);
3254 		ASSERT(tip->ti_mp == NULL);
3255 		kmem_free(tip, sizeof (*tip));
3256 		return;
3257 	}
3258 
3259 	DB_TYPE(cimp) = M_PROTO;
3260 	ci = (struct T_conn_ind *)cimp->b_rptr;
3261 	ci->PRIM_type  = T_CONN_IND;
3262 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3263 	ci->SRC_length = tep->te_alen;
3264 	ci->SEQ_number = tep->te_seqno;
3265 
3266 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3267 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3268 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3269 
3270 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3271 		    ci->SRC_length);
3272 		ci->OPT_length = olen; /* because only 1 option */
3273 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3274 		    cr, cpid,
3275 		    peer_tep->te_flag, peer_tep->te_credp);
3276 	} else if (ooff != 0) {
3277 		/* Copy option from T_CONN_REQ */
3278 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3279 		    ci->SRC_length);
3280 		ci->OPT_length = olen;
3281 		ASSERT(opts != NULL);
3282 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3283 	} else {
3284 		ci->OPT_offset = 0;
3285 		ci->OPT_length = 0;
3286 	}
3287 	if (opts != NULL)
3288 		kmem_free(opts, olen);
3289 
3290 	/*
3291 	 * register connection request with server peer
3292 	 * append to list of incoming connections
3293 	 * increment references for both peer_tep and tep: peer_tep is placed on
3294 	 * te_oconp and tep is placed on listeners queue.
3295 	 */
3296 	tip->ti_tep = tep;
3297 	tip->ti_seqno = tep->te_seqno;
3298 	list_insert_tail(&peer_tep->te_iconp, tip);
3299 	peer_tep->te_nicon++;
3300 
3301 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3302 	/*
3303 	 * send the T_CONN_IND message
3304 	 */
3305 	putnext(peer_tep->te_rq, cimp);
3306 
3307 	/*
3308 	 * Send a T_CONN_CON message for sockets.
3309 	 * Disable the queues until we have reached the correct state!
3310 	 */
3311 	if (confmp != NULL) {
3312 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3313 		noenable(wq);
3314 		putnext(tep->te_rq, confmp);
3315 	}
3316 	/*
3317 	 * Now we need to increment tep reference because tep is referenced by
3318 	 * server list of pending connections. We also need to decrement
3319 	 * reference before exiting serializer. Two operations void each other
3320 	 * so we don't modify reference at all.
3321 	 */
3322 	ASSERT(tep->te_refcnt >= 2);
3323 	ASSERT(peer_tep->te_refcnt >= 2);
3324 	tl_serializer_exit(tep);
3325 }
3326 
3327 
3328 
3329 /*
3330  * Handle T_conn_res on listener stream. Called on listener serializer.
3331  * tl_conn_req has already generated the T_CONN_CON.
3332  * tl_conn_res is called on listener serializer.
3333  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3334  * Switch eager serializer to acceptor's.
3335  *
3336  * If TL_SET[U]CRED generate the credentials options.
3337  * For sockets tl_conn_req has already generated the T_CONN_CON.
3338  */
3339 static void
3340 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3341 {
3342 	queue_t			*wq;
3343 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3344 	ssize_t			msz = MBLKL(mp);
3345 	t_scalar_t		olen, ooff, err = 0;
3346 	t_scalar_t		prim = cres->PRIM_type;
3347 	uchar_t			*addr_startp;
3348 	tl_endpt_t		*acc_ep = NULL, *cl_ep = NULL;
3349 	tl_icon_t		*tip;
3350 	size_t			size;
3351 	mblk_t			*ackmp, *respmp;
3352 	mblk_t			*dimp, *ccmp = NULL;
3353 	struct T_discon_ind	*di;
3354 	struct T_conn_con	*cc;
3355 	boolean_t		client_noclose_set = B_FALSE;
3356 	boolean_t		switch_client_serializer = B_TRUE;
3357 
3358 	ASSERT(IS_COTS(tep));
3359 
3360 	if (tep->te_closing) {
3361 		freemsg(mp);
3362 		return;
3363 	}
3364 
3365 	wq = tep->te_wq;
3366 
3367 	/*
3368 	 * preallocate memory for:
3369 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3370 	 *	==> known max T_ERROR_ACK
3371 	 * 2. max of T_DISCON_IND and T_CONN_CON
3372 	 */
3373 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3374 	if (ackmp == NULL) {
3375 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3376 		return;
3377 	}
3378 	/*
3379 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3380 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3381 	 */
3382 
3383 
3384 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3385 
3386 	/*
3387 	 * validate state
3388 	 */
3389 	if (tep->te_state != TS_WRES_CIND) {
3390 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3391 		    SL_TRACE | SL_ERROR,
3392 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3393 		    tep->te_state));
3394 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3395 		freemsg(mp);
3396 		return;
3397 	}
3398 
3399 	/*
3400 	 * validate the message
3401 	 * Note: dereference fields in struct inside message only
3402 	 * after validating the message length.
3403 	 */
3404 	if (msz < sizeof (struct T_conn_res)) {
3405 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3406 		    "tl_conn_res:invalid message length"));
3407 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3408 		freemsg(mp);
3409 		return;
3410 	}
3411 	olen = cres->OPT_length;
3412 	ooff = cres->OPT_offset;
3413 	if (((olen > 0) && ((ooff + olen) > msz))) {
3414 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3415 		    "tl_conn_res:invalid message"));
3416 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3417 		freemsg(mp);
3418 		return;
3419 	}
3420 	if (olen) {
3421 		/*
3422 		 * no opts in connect res
3423 		 * supported in this provider
3424 		 */
3425 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3426 		    "tl_conn_res:options not supported in message"));
3427 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3428 		freemsg(mp);
3429 		return;
3430 	}
3431 
3432 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3433 	ASSERT(tep->te_state == TS_WACK_CRES);
3434 
3435 	if (cres->SEQ_number < TL_MINOR_START &&
3436 	    cres->SEQ_number >= BADSEQNUM) {
3437 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3438 		    "tl_conn_res:remote endpoint sequence number bad"));
3439 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3440 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3441 		freemsg(mp);
3442 		return;
3443 	}
3444 
3445 	/*
3446 	 * find accepting endpoint. Will have extra reference if found.
3447 	 */
3448 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3449 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3450 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3451 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3452 		    "tl_conn_res:bad accepting endpoint"));
3453 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3454 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3455 		freemsg(mp);
3456 		return;
3457 	}
3458 
3459 	/*
3460 	 * Prevent acceptor from closing.
3461 	 */
3462 	if (!tl_noclose(acc_ep)) {
3463 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3464 		    "tl_conn_res:bad accepting endpoint"));
3465 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3466 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3467 		tl_refrele(acc_ep);
3468 		freemsg(mp);
3469 		return;
3470 	}
3471 
3472 	acc_ep->te_flag |= TL_ACCEPTOR;
3473 
3474 	/*
3475 	 * validate that accepting endpoint, if different from listening
3476 	 * has address bound => state is TS_IDLE
3477 	 * TROUBLE in XPG4 !!?
3478 	 */
3479 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3480 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3481 		    "tl_conn_res:accepting endpoint has no address bound,"
3482 		    "state=%d", acc_ep->te_state));
3483 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3484 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3485 		freemsg(mp);
3486 		tl_closeok(acc_ep);
3487 		tl_refrele(acc_ep);
3488 		return;
3489 	}
3490 
3491 	/*
3492 	 * validate if accepting endpt same as listening, then
3493 	 * no other incoming connection should be on the queue
3494 	 */
3495 
3496 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3497 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3498 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3499 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3500 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3501 		freemsg(mp);
3502 		tl_closeok(acc_ep);
3503 		tl_refrele(acc_ep);
3504 		return;
3505 	}
3506 
3507 	/*
3508 	 * Mark for deletion, the entry corresponding to client
3509 	 * on list of pending connections made by the listener
3510 	 *  search list to see if client is one of the
3511 	 * recorded as a listener.
3512 	 */
3513 	tip = tl_icon_find(tep, cres->SEQ_number);
3514 	if (tip == NULL) {
3515 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3516 		    "tl_conn_res:no client in listener list"));
3517 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3518 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3519 		freemsg(mp);
3520 		tl_closeok(acc_ep);
3521 		tl_refrele(acc_ep);
3522 		return;
3523 	}
3524 
3525 	/*
3526 	 * If ti_tep is NULL the client has already closed. In this case
3527 	 * the code below will avoid any action on the client side
3528 	 * but complete the server and acceptor state transitions.
3529 	 */
3530 	ASSERT(tip->ti_tep == NULL ||
3531 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3532 	cl_ep = tip->ti_tep;
3533 
3534 	/*
3535 	 * If the client is present it is switched from listener's to acceptor's
3536 	 * serializer. We should block client closes while serializers are
3537 	 * being switched.
3538 	 *
3539 	 * It is possible that the client is present but is currently being
3540 	 * closed. There are two possible cases:
3541 	 *
3542 	 * 1) The client has already entered tl_close_finish_ser() and sent
3543 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3544 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3545 	 *
3546 	 * 2) The client started the close but has not entered
3547 	 *    tl_close_finish_ser() yet. In this case, the client is already
3548 	 *    proceeding asynchronously on the listener's serializer, so we're
3549 	 *    forced to change the acceptor to use the listener's serializer to
3550 	 *    ensure that any operations on the acceptor are serialized with
3551 	 *    respect to the close that's in-progress.
3552 	 */
3553 	if (cl_ep != NULL) {
3554 		if (tl_noclose(cl_ep)) {
3555 			client_noclose_set = B_TRUE;
3556 		} else {
3557 			/*
3558 			 * Client is closing. If it it has sent the
3559 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3560 			 * we have to let let the client continue until it is
3561 			 * sent.
3562 			 *
3563 			 * If we do continue using the client, acceptor will
3564 			 * switch to client's serializer which is used by client
3565 			 * for its close.
3566 			 */
3567 			tl_client_closing_when_accepting++;
3568 			switch_client_serializer = B_FALSE;
3569 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3570 			    cl_ep->te_state == -1)
3571 				cl_ep = NULL;
3572 		}
3573 	}
3574 
3575 	if (cl_ep != NULL) {
3576 		/*
3577 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3578 		 * (latter for sockets only)
3579 		 */
3580 		if (cl_ep->te_state != TS_WCON_CREQ &&
3581 		    (cl_ep->te_state != TS_DATA_XFER &&
3582 		    IS_SOCKET(cl_ep))) {
3583 			err = ECONNREFUSED;
3584 			/*
3585 			 * T_DISCON_IND sent later after committing memory
3586 			 * and acking validity of request
3587 			 */
3588 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3589 			    "tl_conn_res:peer in bad state"));
3590 		}
3591 
3592 		/*
3593 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3594 		 * ack validity of request (T_OK_ACK) after memory committed
3595 		 */
3596 
3597 		if (err) {
3598 			size = sizeof (struct T_discon_ind);
3599 		} else {
3600 			/*
3601 			 * calculate length of T_CONN_CON message
3602 			 */
3603 			olen = 0;
3604 			if (cl_ep->te_flag & TL_SETCRED) {
3605 				olen = (t_scalar_t)sizeof (struct opthdr) +
3606 				    OPTLEN(sizeof (tl_credopt_t));
3607 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3608 				olen = (t_scalar_t)sizeof (struct opthdr) +
3609 				    OPTLEN(ucredminsize(acc_ep->te_credp));
3610 			}
3611 			size = T_ALIGN(sizeof (struct T_conn_con) +
3612 			    acc_ep->te_alen) + olen;
3613 		}
3614 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3615 			/*
3616 			 * roll back state changes
3617 			 */
3618 			tep->te_state = TS_WRES_CIND;
3619 			tl_memrecover(wq, mp, size);
3620 			freemsg(ackmp);
3621 			if (client_noclose_set)
3622 				tl_closeok(cl_ep);
3623 			tl_closeok(acc_ep);
3624 			tl_refrele(acc_ep);
3625 			return;
3626 		}
3627 		mp = NULL;
3628 	}
3629 
3630 	/*
3631 	 * Now ack validity of request
3632 	 */
3633 	if (tep->te_nicon == 1) {
3634 		if (tep == acc_ep)
3635 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3636 		else
3637 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3638 	} else {
3639 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3640 	}
3641 
3642 	/*
3643 	 * send T_DISCON_IND now if client state validation failed earlier
3644 	 */
3645 	if (err) {
3646 		tl_ok_ack(wq, ackmp, prim);
3647 		/*
3648 		 * flush the queues - why always ?
3649 		 */
3650 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3651 
3652 		dimp = tl_resizemp(respmp, size);
3653 		if (dimp == NULL) {
3654 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3655 			    SL_TRACE | SL_ERROR,
3656 			    "tl_conn_res:con_ind:allocb failure"));
3657 			tl_merror(wq, respmp, ENOMEM);
3658 			tl_closeok(acc_ep);
3659 			if (client_noclose_set)
3660 				tl_closeok(cl_ep);
3661 			tl_refrele(acc_ep);
3662 			return;
3663 		}
3664 		if (dimp->b_cont) {
3665 			/* no user data in provider generated discon ind */
3666 			freemsg(dimp->b_cont);
3667 			dimp->b_cont = NULL;
3668 		}
3669 
3670 		DB_TYPE(dimp) = M_PROTO;
3671 		di = (struct T_discon_ind *)dimp->b_rptr;
3672 		di->PRIM_type  = T_DISCON_IND;
3673 		di->DISCON_reason = err;
3674 		di->SEQ_number = BADSEQNUM;
3675 
3676 		tep->te_state = TS_IDLE;
3677 		/*
3678 		 * send T_DISCON_IND message
3679 		 */
3680 		putnext(acc_ep->te_rq, dimp);
3681 		if (client_noclose_set)
3682 			tl_closeok(cl_ep);
3683 		tl_closeok(acc_ep);
3684 		tl_refrele(acc_ep);
3685 		return;
3686 	}
3687 
3688 	/*
3689 	 * now start connecting the accepting endpoint
3690 	 */
3691 	if (tep != acc_ep)
3692 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3693 
3694 	if (cl_ep == NULL) {
3695 		/*
3696 		 * The client has already closed. Send up any queued messages
3697 		 * and change the state accordingly.
3698 		 */
3699 		tl_ok_ack(wq, ackmp, prim);
3700 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3701 
3702 		/*
3703 		 * remove endpoint from incoming connection
3704 		 * delete client from list of incoming connections
3705 		 */
3706 		tl_freetip(tep, tip);
3707 		freemsg(mp);
3708 		tl_closeok(acc_ep);
3709 		tl_refrele(acc_ep);
3710 		return;
3711 	} else if (tip->ti_mp != NULL) {
3712 		/*
3713 		 * The client could have queued a T_DISCON_IND which needs
3714 		 * to be sent up.
3715 		 * Note that t_discon_req can not operate the same as
3716 		 * t_data_req since it is not possible for it to putbq
3717 		 * the message and return -1 due to the use of qwriter.
3718 		 */
3719 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3720 	}
3721 
3722 	/*
3723 	 * prepare connect confirm T_CONN_CON message
3724 	 */
3725 
3726 	/*
3727 	 * allocate the message - original data blocks
3728 	 * retained in the returned mblk
3729 	 */
3730 	if (!IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3731 		ccmp = tl_resizemp(respmp, size);
3732 		if (ccmp == NULL) {
3733 			tl_ok_ack(wq, ackmp, prim);
3734 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3735 			    SL_TRACE | SL_ERROR,
3736 			    "tl_conn_res:conn_con:allocb failure"));
3737 			tl_merror(wq, respmp, ENOMEM);
3738 			tl_closeok(acc_ep);
3739 			if (client_noclose_set)
3740 				tl_closeok(cl_ep);
3741 			tl_refrele(acc_ep);
3742 			return;
3743 		}
3744 
3745 		DB_TYPE(ccmp) = M_PROTO;
3746 		cc = (struct T_conn_con *)ccmp->b_rptr;
3747 		cc->PRIM_type  = T_CONN_CON;
3748 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3749 		cc->RES_length = acc_ep->te_alen;
3750 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3751 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3752 		if (cl_ep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3753 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3754 			    cc->RES_length);
3755 			cc->OPT_length = olen;
3756 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3757 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3758 			    cl_ep->te_credp);
3759 		} else {
3760 			cc->OPT_offset = 0;
3761 			cc->OPT_length = 0;
3762 		}
3763 		/*
3764 		 * Forward the credential in the packet so it can be picked up
3765 		 * at the higher layers for more complete credential processing
3766 		 */
3767 		mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3768 	} else {
3769 		freemsg(respmp);
3770 		respmp = NULL;
3771 	}
3772 
3773 	/*
3774 	 * make connection linking
3775 	 * accepting and client endpoints
3776 	 * No need to increment references:
3777 	 *	on client: it should already have one from tip->ti_tep linkage.
3778 	 *	on acceptor is should already have one from the table lookup.
3779 	 *
3780 	 * At this point both client and acceptor can't close. Set client
3781 	 * serializer to acceptor's.
3782 	 */
3783 	ASSERT(cl_ep->te_refcnt >= 2);
3784 	ASSERT(acc_ep->te_refcnt >= 2);
3785 	ASSERT(cl_ep->te_conp == NULL);
3786 	ASSERT(acc_ep->te_conp == NULL);
3787 	cl_ep->te_conp = acc_ep;
3788 	acc_ep->te_conp = cl_ep;
3789 	ASSERT(cl_ep->te_ser == tep->te_ser);
3790 	if (switch_client_serializer) {
3791 		mutex_enter(&cl_ep->te_ser_lock);
3792 		if (cl_ep->te_ser_count > 0) {
3793 			switch_client_serializer = B_FALSE;
3794 			tl_serializer_noswitch++;
3795 		} else {
3796 			/*
3797 			 * Move client to the acceptor's serializer.
3798 			 */
3799 			tl_serializer_refhold(acc_ep->te_ser);
3800 			tl_serializer_refrele(cl_ep->te_ser);
3801 			cl_ep->te_ser = acc_ep->te_ser;
3802 		}
3803 		mutex_exit(&cl_ep->te_ser_lock);
3804 	}
3805 	if (!switch_client_serializer) {
3806 		/*
3807 		 * It is not possible to switch client to use acceptor's.
3808 		 * Move acceptor to client's serializer (which is the same as
3809 		 * listener's).
3810 		 */
3811 		tl_serializer_refhold(cl_ep->te_ser);
3812 		tl_serializer_refrele(acc_ep->te_ser);
3813 		acc_ep->te_ser = cl_ep->te_ser;
3814 	}
3815 
3816 	TL_REMOVE_PEER(cl_ep->te_oconp);
3817 	TL_REMOVE_PEER(acc_ep->te_oconp);
3818 
3819 	/*
3820 	 * remove endpoint from incoming connection
3821 	 * delete client from list of incoming connections
3822 	 */
3823 	tip->ti_tep = NULL;
3824 	tl_freetip(tep, tip);
3825 	tl_ok_ack(wq, ackmp, prim);
3826 
3827 	/*
3828 	 * data blocks already linked in reallocb()
3829 	 */
3830 
3831 	/*
3832 	 * link queues so that I_SENDFD will work
3833 	 */
3834 	if (!IS_SOCKET(tep)) {
3835 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3836 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3837 	}
3838 
3839 	/*
3840 	 * send T_CONN_CON up on client side unless it was already
3841 	 * done (for a socket). In cases any data or ordrel req has been
3842 	 * queued make sure that the service procedure runs.
3843 	 */
3844 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3845 		enableok(cl_ep->te_wq);
3846 		TL_QENABLE(cl_ep);
3847 		if (ccmp != NULL)
3848 			freemsg(ccmp);
3849 	} else {
3850 		/*
3851 		 * change client state on TE_CONN_CON event
3852 		 */
3853 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3854 		putnext(cl_ep->te_rq, ccmp);
3855 	}
3856 
3857 	/* Mark the both endpoints as accepted */
3858 	cl_ep->te_flag |= TL_ACCEPTED;
3859 	acc_ep->te_flag |= TL_ACCEPTED;
3860 
3861 	/*
3862 	 * Allow client and acceptor to close.
3863 	 */
3864 	tl_closeok(acc_ep);
3865 	if (client_noclose_set)
3866 		tl_closeok(cl_ep);
3867 }
3868 
3869 
3870 
3871 
3872 static void
3873 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3874 {
3875 	queue_t			*wq;
3876 	struct T_discon_req	*dr;
3877 	ssize_t			msz;
3878 	tl_endpt_t		*peer_tep = tep->te_conp;
3879 	tl_endpt_t		*srv_tep = tep->te_oconp;
3880 	tl_icon_t		*tip;
3881 	size_t			size;
3882 	mblk_t			*ackmp, *dimp, *respmp;
3883 	struct T_discon_ind	*di;
3884 	t_scalar_t		save_state, new_state;
3885 
3886 	if (tep->te_closing) {
3887 		freemsg(mp);
3888 		return;
3889 	}
3890 
3891 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3892 		TL_UNCONNECT(tep->te_conp);
3893 		peer_tep = NULL;
3894 	}
3895 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3896 		TL_UNCONNECT(tep->te_oconp);
3897 		srv_tep = NULL;
3898 	}
3899 
3900 	wq = tep->te_wq;
3901 
3902 	/*
3903 	 * preallocate memory for:
3904 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3905 	 *	==> known max T_ERROR_ACK
3906 	 * 2. for  T_DISCON_IND
3907 	 */
3908 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3909 	if (ackmp == NULL) {
3910 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3911 		return;
3912 	}
3913 	/*
3914 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3915 	 * will be committed for T_DISCON_IND  later
3916 	 */
3917 
3918 	dr = (struct T_discon_req *)mp->b_rptr;
3919 	msz = MBLKL(mp);
3920 
3921 	/*
3922 	 * validate the state
3923 	 */
3924 	save_state = new_state = tep->te_state;
3925 	if (!(save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3926 	    !(save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3927 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3928 		    SL_TRACE | SL_ERROR,
3929 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
3930 		    tep->te_state));
3931 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3932 		freemsg(mp);
3933 		return;
3934 	}
3935 	/*
3936 	 * Defer committing the state change until it is determined if
3937 	 * the message will be queued with the tl_icon or not.
3938 	 */
3939 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3940 
3941 	/* validate the message */
3942 	if (msz < sizeof (struct T_discon_req)) {
3943 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3944 		    "tl_discon_req:invalid message"));
3945 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3946 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3947 		freemsg(mp);
3948 		return;
3949 	}
3950 
3951 	/*
3952 	 * if server, then validate that client exists
3953 	 * by connection sequence number etc.
3954 	 */
3955 	if (tep->te_nicon > 0) { /* server */
3956 
3957 		/*
3958 		 * search server list for disconnect client
3959 		 */
3960 		tip = tl_icon_find(tep, dr->SEQ_number);
3961 		if (tip == NULL) {
3962 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3963 			    SL_TRACE | SL_ERROR,
3964 			    "tl_discon_req:no disconnect endpoint"));
3965 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3966 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3967 			freemsg(mp);
3968 			return;
3969 		}
3970 		/*
3971 		 * If ti_tep is NULL the client has already closed. In this case
3972 		 * the code below will avoid any action on the client side.
3973 		 */
3974 
3975 		IMPLY(tip->ti_tep != NULL,
3976 		    tip->ti_tep->te_seqno == dr->SEQ_number);
3977 		peer_tep = tip->ti_tep;
3978 	}
3979 
3980 	/*
3981 	 * preallocate now for T_DISCON_IND
3982 	 * ack validity of request (T_OK_ACK) after memory committed
3983 	 */
3984 	size = sizeof (struct T_discon_ind);
3985 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3986 		tl_memrecover(wq, mp, size);
3987 		freemsg(ackmp);
3988 		return;
3989 	}
3990 
3991 	/*
3992 	 * prepare message to ack validity of request
3993 	 */
3994 	if (tep->te_nicon == 0) {
3995 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3996 	} else {
3997 		if (tep->te_nicon == 1)
3998 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3999 		else
4000 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
4001 	}
4002 
4003 	/*
4004 	 * Flushing queues according to TPI. Using the old state.
4005 	 */
4006 	if ((tep->te_nicon <= 1) &&
4007 	    ((save_state == TS_DATA_XFER) ||
4008 	    (save_state == TS_WIND_ORDREL) ||
4009 	    (save_state == TS_WREQ_ORDREL)))
4010 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
4011 
4012 	/* send T_OK_ACK up  */
4013 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
4014 
4015 	/*
4016 	 * now do disconnect business
4017 	 */
4018 	if (tep->te_nicon > 0) { /* listener */
4019 		if (peer_tep != NULL && !peer_tep->te_closing) {
4020 			/*
4021 			 * disconnect incoming connect request pending to tep
4022 			 */
4023 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4024 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
4025 				    SL_TRACE | SL_ERROR,
4026 				    "tl_discon_req: reallocb failed"));
4027 				tep->te_state = new_state;
4028 				tl_merror(wq, respmp, ENOMEM);
4029 				return;
4030 			}
4031 			di = (struct T_discon_ind *)dimp->b_rptr;
4032 			di->SEQ_number = BADSEQNUM;
4033 			save_state = peer_tep->te_state;
4034 			peer_tep->te_state = TS_IDLE;
4035 
4036 			TL_REMOVE_PEER(peer_tep->te_oconp);
4037 			enableok(peer_tep->te_wq);
4038 			TL_QENABLE(peer_tep);
4039 		} else {
4040 			freemsg(respmp);
4041 			dimp = NULL;
4042 		}
4043 
4044 		/*
4045 		 * remove endpoint from incoming connection list
4046 		 * - remove disconnect client from list on server
4047 		 */
4048 		tl_freetip(tep, tip);
4049 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4050 		/*
4051 		 * disconnect an outgoing request pending from tep
4052 		 */
4053 
4054 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4055 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4056 			    SL_TRACE | SL_ERROR,
4057 			    "tl_discon_req: reallocb failed"));
4058 			tep->te_state = new_state;
4059 			tl_merror(wq, respmp, ENOMEM);
4060 			return;
4061 		}
4062 		di = (struct T_discon_ind *)dimp->b_rptr;
4063 		DB_TYPE(dimp) = M_PROTO;
4064 		di->PRIM_type  = T_DISCON_IND;
4065 		di->DISCON_reason = ECONNRESET;
4066 		di->SEQ_number = tep->te_seqno;
4067 
4068 		/*
4069 		 * If this is a socket the T_DISCON_IND is queued with
4070 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4071 		 * from the list of pending connections.
4072 		 * Note that when te_oconp is set the peer better have
4073 		 * a t_connind_t for the client.
4074 		 */
4075 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4076 			/*
4077 			 * No need to check that
4078 			 * ti_tep == NULL since the T_DISCON_IND
4079 			 * takes precedence over other queued
4080 			 * messages.
4081 			 */
4082 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4083 			peer_tep = NULL;
4084 			dimp = NULL;
4085 			/*
4086 			 * Can't clear te_oconp since tl_co_unconnect needs
4087 			 * it as a hint not to free the tep.
4088 			 * Keep the state unchanged since tl_conn_res inspects
4089 			 * it.
4090 			 */
4091 			new_state = tep->te_state;
4092 		} else {
4093 			/* Found - delete it */
4094 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4095 			if (tip != NULL) {
4096 				ASSERT(tep == tip->ti_tep);
4097 				save_state = peer_tep->te_state;
4098 				if (peer_tep->te_nicon == 1)
4099 					peer_tep->te_state =
4100 					    NEXTSTATE(TE_DISCON_IND2,
4101 					    peer_tep->te_state);
4102 				else
4103 					peer_tep->te_state =
4104 					    NEXTSTATE(TE_DISCON_IND3,
4105 					    peer_tep->te_state);
4106 				tl_freetip(peer_tep, tip);
4107 			}
4108 			ASSERT(tep->te_oconp != NULL);
4109 			TL_UNCONNECT(tep->te_oconp);
4110 		}
4111 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4112 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4113 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4114 			    SL_TRACE | SL_ERROR,
4115 			    "tl_discon_req: reallocb failed"));
4116 			tep->te_state = new_state;
4117 			tl_merror(wq, respmp, ENOMEM);
4118 			return;
4119 		}
4120 		di = (struct T_discon_ind *)dimp->b_rptr;
4121 		di->SEQ_number = BADSEQNUM;
4122 
4123 		save_state = peer_tep->te_state;
4124 		peer_tep->te_state = TS_IDLE;
4125 	} else {
4126 		/* Not connected */
4127 		tep->te_state = new_state;
4128 		freemsg(respmp);
4129 		return;
4130 	}
4131 
4132 	/* Commit state changes */
4133 	tep->te_state = new_state;
4134 
4135 	if (peer_tep == NULL) {
4136 		ASSERT(dimp == NULL);
4137 		goto done;
4138 	}
4139 	/*
4140 	 * Flush queues on peer before sending up
4141 	 * T_DISCON_IND according to TPI
4142 	 */
4143 
4144 	if ((save_state == TS_DATA_XFER) ||
4145 	    (save_state == TS_WIND_ORDREL) ||
4146 	    (save_state == TS_WREQ_ORDREL))
4147 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4148 
4149 	DB_TYPE(dimp) = M_PROTO;
4150 	di->PRIM_type  = T_DISCON_IND;
4151 	di->DISCON_reason = ECONNRESET;
4152 
4153 	/*
4154 	 * data blocks already linked into dimp by reallocb()
4155 	 */
4156 	/*
4157 	 * send indication message to peer user module
4158 	 */
4159 	ASSERT(dimp != NULL);
4160 	putnext(peer_tep->te_rq, dimp);
4161 done:
4162 	if (tep->te_conp) {	/* disconnect pointers if connected */
4163 		ASSERT(!peer_tep->te_closing);
4164 
4165 		/*
4166 		 * Messages may be queued on peer's write queue
4167 		 * waiting to be processed by its write service
4168 		 * procedure. Before the pointer to the peer transport
4169 		 * structure is set to NULL, qenable the peer's write
4170 		 * queue so that the queued up messages are processed.
4171 		 */
4172 		if ((save_state == TS_DATA_XFER) ||
4173 		    (save_state == TS_WIND_ORDREL) ||
4174 		    (save_state == TS_WREQ_ORDREL))
4175 			TL_QENABLE(peer_tep);
4176 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4177 		TL_UNCONNECT(peer_tep->te_conp);
4178 		if (!IS_SOCKET(tep)) {
4179 			/*
4180 			 * unlink the streams
4181 			 */
4182 			tep->te_wq->q_next = NULL;
4183 			peer_tep->te_wq->q_next = NULL;
4184 		}
4185 		TL_UNCONNECT(tep->te_conp);
4186 	}
4187 }
4188 
4189 static void
4190 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4191 {
4192 	if (!tep->te_closing)
4193 		tl_addr_req(mp, tep);
4194 	else
4195 		freemsg(mp);
4196 
4197 	tl_serializer_exit(tep);
4198 	tl_refrele(tep);
4199 }
4200 
4201 static void
4202 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4203 {
4204 	queue_t			*wq;
4205 	size_t			ack_sz;
4206 	mblk_t			*ackmp;
4207 	struct T_addr_ack	*taa;
4208 
4209 	if (tep->te_closing) {
4210 		freemsg(mp);
4211 		return;
4212 	}
4213 
4214 	wq = tep->te_wq;
4215 
4216 	/*
4217 	 * Note: T_ADDR_REQ message has only PRIM_type field
4218 	 * so it is already validated earlier.
4219 	 */
4220 
4221 	if (IS_CLTS(tep) ||
4222 	    (tep->te_state > TS_WREQ_ORDREL) ||
4223 	    (tep->te_state < TS_DATA_XFER)) {
4224 		/*
4225 		 * Either connectionless or connection oriented but not
4226 		 * in connected data transfer state or half-closed states.
4227 		 */
4228 		ack_sz = sizeof (struct T_addr_ack);
4229 		if (tep->te_state >= TS_IDLE)
4230 			/* is bound */
4231 			ack_sz += tep->te_alen;
4232 		ackmp = reallocb(mp, ack_sz, 0);
4233 		if (ackmp == NULL) {
4234 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4235 			    SL_TRACE | SL_ERROR,
4236 			    "tl_addr_req: reallocb failed"));
4237 			tl_memrecover(wq, mp, ack_sz);
4238 			return;
4239 		}
4240 
4241 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4242 
4243 		bzero(taa, sizeof (struct T_addr_ack));
4244 
4245 		taa->PRIM_type = T_ADDR_ACK;
4246 		ackmp->b_datap->db_type = M_PCPROTO;
4247 		ackmp->b_wptr = (uchar_t *)&taa[1];
4248 
4249 		if (tep->te_state >= TS_IDLE) {
4250 			/* endpoint is bound */
4251 			taa->LOCADDR_length = tep->te_alen;
4252 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4253 
4254 			bcopy(tep->te_abuf, ackmp->b_wptr,
4255 			    tep->te_alen);
4256 			ackmp->b_wptr += tep->te_alen;
4257 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4258 		}
4259 
4260 		(void) qreply(wq, ackmp);
4261 	} else {
4262 		ASSERT(tep->te_state == TS_DATA_XFER ||
4263 		    tep->te_state == TS_WIND_ORDREL ||
4264 		    tep->te_state == TS_WREQ_ORDREL);
4265 		/* connection oriented in data transfer */
4266 		tl_connected_cots_addr_req(mp, tep);
4267 	}
4268 }
4269 
4270 
4271 static void
4272 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4273 {
4274 	tl_endpt_t		*peer_tep = tep->te_conp;
4275 	size_t			ack_sz;
4276 	mblk_t			*ackmp;
4277 	struct T_addr_ack	*taa;
4278 	uchar_t			*addr_startp;
4279 
4280 	if (tep->te_closing) {
4281 		freemsg(mp);
4282 		return;
4283 	}
4284 
4285 	if (peer_tep == NULL || peer_tep->te_closing) {
4286 		tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4287 		return;
4288 	}
4289 
4290 	ASSERT(tep->te_state >= TS_IDLE);
4291 
4292 	ack_sz = sizeof (struct T_addr_ack);
4293 	ack_sz += T_ALIGN(tep->te_alen);
4294 	ack_sz += peer_tep->te_alen;
4295 
4296 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4297 	if (ackmp == NULL) {
4298 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4299 		    "tl_connected_cots_addr_req: reallocb failed"));
4300 		tl_memrecover(tep->te_wq, mp, ack_sz);
4301 		return;
4302 	}
4303 
4304 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4305 
4306 	/* endpoint is bound */
4307 	taa->LOCADDR_length = tep->te_alen;
4308 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4309 
4310 	addr_startp = (uchar_t *)&taa[1];
4311 
4312 	bcopy(tep->te_abuf, addr_startp,
4313 	    tep->te_alen);
4314 
4315 	taa->REMADDR_length = peer_tep->te_alen;
4316 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4317 	    taa->LOCADDR_length);
4318 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4319 	bcopy(peer_tep->te_abuf, addr_startp,
4320 	    peer_tep->te_alen);
4321 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4322 	    taa->REMADDR_offset + peer_tep->te_alen;
4323 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4324 
4325 	putnext(tep->te_rq, ackmp);
4326 }
4327 
4328 static void
4329 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4330 {
4331 	if (IS_CLTS(tep)) {
4332 		*ia = tl_clts_info_ack;
4333 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4334 	} else {
4335 		*ia = tl_cots_info_ack;
4336 		if (IS_COTSORD(tep))
4337 			ia->SERV_type = T_COTS_ORD;
4338 	}
4339 	ia->TIDU_size = tl_tidusz;
4340 	ia->CURRENT_state = tep->te_state;
4341 }
4342 
4343 /*
4344  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4345  * tl_wput.
4346  */
4347 static void
4348 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4349 {
4350 	mblk_t			*ackmp;
4351 	t_uscalar_t		cap_bits1;
4352 	struct T_capability_ack	*tcap;
4353 
4354 	if (tep->te_closing) {
4355 		freemsg(mp);
4356 		return;
4357 	}
4358 
4359 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4360 
4361 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4362 	    M_PCPROTO, T_CAPABILITY_ACK);
4363 	if (ackmp == NULL) {
4364 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4365 		    "tl_capability_req: reallocb failed"));
4366 		tl_memrecover(tep->te_wq, mp,
4367 		    sizeof (struct T_capability_ack));
4368 		return;
4369 	}
4370 
4371 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4372 	tcap->CAP_bits1 = 0;
4373 
4374 	if (cap_bits1 & TC1_INFO) {
4375 		tl_copy_info(&tcap->INFO_ack, tep);
4376 		tcap->CAP_bits1 |= TC1_INFO;
4377 	}
4378 
4379 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4380 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4381 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4382 	}
4383 
4384 	putnext(tep->te_rq, ackmp);
4385 }
4386 
4387 static void
4388 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4389 {
4390 	if (!tep->te_closing)
4391 		tl_info_req(mp, tep);
4392 	else
4393 		freemsg(mp);
4394 
4395 	tl_serializer_exit(tep);
4396 	tl_refrele(tep);
4397 }
4398 
4399 static void
4400 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4401 {
4402 	mblk_t *ackmp;
4403 
4404 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4405 	    M_PCPROTO, T_INFO_ACK);
4406 	if (ackmp == NULL) {
4407 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4408 		    "tl_info_req: reallocb failed"));
4409 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4410 		return;
4411 	}
4412 
4413 	/*
4414 	 * fill in T_INFO_ACK contents
4415 	 */
4416 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4417 
4418 	/*
4419 	 * send ack message
4420 	 */
4421 	putnext(tep->te_rq, ackmp);
4422 }
4423 
4424 /*
4425  * Handle M_DATA, T_data_req and T_optdata_req.
4426  * If this is a socket pass through T_optdata_req options unmodified.
4427  */
4428 static void
4429 tl_data(mblk_t *mp, tl_endpt_t *tep)
4430 {
4431 	queue_t			*wq = tep->te_wq;
4432 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4433 	ssize_t			msz = MBLKL(mp);
4434 	tl_endpt_t		*peer_tep;
4435 	queue_t			*peer_rq;
4436 	boolean_t		closing = tep->te_closing;
4437 
4438 	if (IS_CLTS(tep)) {
4439 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4440 		    SL_TRACE | SL_ERROR,
4441 		    "tl_wput:clts:unattached M_DATA"));
4442 		if (!closing) {
4443 			tl_merror(wq, mp, EPROTO);
4444 		} else {
4445 			freemsg(mp);
4446 		}
4447 		return;
4448 	}
4449 
4450 	/*
4451 	 * If the endpoint is closing it should still forward any data to the
4452 	 * peer (if it has one). If it is not allowed to forward it can just
4453 	 * free the message.
4454 	 */
4455 	if (closing &&
4456 	    (tep->te_state != TS_DATA_XFER) &&
4457 	    (tep->te_state != TS_WREQ_ORDREL)) {
4458 		freemsg(mp);
4459 		return;
4460 	}
4461 
4462 	if (DB_TYPE(mp) == M_PROTO) {
4463 		if (prim->type == T_DATA_REQ &&
4464 		    msz < sizeof (struct T_data_req)) {
4465 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4466 				SL_TRACE | SL_ERROR,
4467 				"tl_data:T_DATA_REQ:invalid message"));
4468 			if (!closing) {
4469 				tl_merror(wq, mp, EPROTO);
4470 			} else {
4471 				freemsg(mp);
4472 			}
4473 			return;
4474 		} else if (prim->type == T_OPTDATA_REQ &&
4475 		    (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4476 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4477 			    SL_TRACE | SL_ERROR,
4478 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4479 			if (!closing) {
4480 				tl_merror(wq, mp, EPROTO);
4481 			} else {
4482 				freemsg(mp);
4483 			}
4484 			return;
4485 		}
4486 	}
4487 
4488 	/*
4489 	 * connection oriented provider
4490 	 */
4491 	switch (tep->te_state) {
4492 	case TS_IDLE:
4493 		/*
4494 		 * Other end not here - do nothing.
4495 		 */
4496 		freemsg(mp);
4497 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4498 		    "tl_data:cots with endpoint idle"));
4499 		return;
4500 
4501 	case TS_DATA_XFER:
4502 		/* valid states */
4503 		if (tep->te_conp != NULL)
4504 			break;
4505 
4506 		if (tep->te_oconp == NULL) {
4507 			if (!closing) {
4508 				tl_merror(wq, mp, EPROTO);
4509 			} else {
4510 				freemsg(mp);
4511 			}
4512 			return;
4513 		}
4514 		/*
4515 		 * For a socket the T_CONN_CON is sent early thus
4516 		 * the peer might not yet have accepted the connection.
4517 		 * If we are closing queue the packet with the T_CONN_IND.
4518 		 * Otherwise defer processing the packet until the peer
4519 		 * accepts the connection.
4520 		 * Note that the queue is noenabled when we go into this
4521 		 * state.
4522 		 */
4523 		if (!closing) {
4524 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4525 			    SL_TRACE | SL_ERROR,
4526 			    "tl_data: ocon"));
4527 			TL_PUTBQ(tep, mp);
4528 			return;
4529 		}
4530 		if (DB_TYPE(mp) == M_PROTO) {
4531 			if (msz < sizeof (t_scalar_t)) {
4532 				freemsg(mp);
4533 				return;
4534 			}
4535 			/* reuse message block - just change REQ to IND */
4536 			if (prim->type == T_DATA_REQ)
4537 				prim->type = T_DATA_IND;
4538 			else
4539 				prim->type = T_OPTDATA_IND;
4540 		}
4541 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4542 		return;
4543 
4544 	case TS_WREQ_ORDREL:
4545 		if (tep->te_conp == NULL) {
4546 			/*
4547 			 * Other end closed - generate discon_ind
4548 			 * with reason 0 to cause an EPIPE but no
4549 			 * read side error on AF_UNIX sockets.
4550 			 */
4551 			freemsg(mp);
4552 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4553 			    SL_TRACE | SL_ERROR,
4554 			    "tl_data: WREQ_ORDREL and no peer"));
4555 			tl_discon_ind(tep, 0);
4556 			return;
4557 		}
4558 		break;
4559 
4560 	default:
4561 		/* invalid state for event TE_DATA_REQ */
4562 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4563 		    "tl_data:cots:out of state"));
4564 		tl_merror(wq, mp, EPROTO);
4565 		return;
4566 	}
4567 	/*
4568 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4569 	 * (State stays same on this event)
4570 	 */
4571 
4572 	/*
4573 	 * get connected endpoint
4574 	 */
4575 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4576 		freemsg(mp);
4577 		/* Peer closed */
4578 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4579 		    "tl_data: peer gone"));
4580 		return;
4581 	}
4582 
4583 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4584 	peer_rq = peer_tep->te_rq;
4585 
4586 	/*
4587 	 * Put it back if flow controlled
4588 	 * Note: Messages already on queue when we are closing is bounded
4589 	 * so we can ignore flow control.
4590 	 */
4591 	if (!canputnext(peer_rq) && !closing) {
4592 		TL_PUTBQ(tep, mp);
4593 		return;
4594 	}
4595 
4596 	/*
4597 	 * validate peer state
4598 	 */
4599 	switch (peer_tep->te_state) {
4600 	case TS_DATA_XFER:
4601 	case TS_WIND_ORDREL:
4602 		/* valid states */
4603 		break;
4604 	default:
4605 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4606 		    "tl_data:rx side:invalid state"));
4607 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4608 		return;
4609 	}
4610 	if (DB_TYPE(mp) == M_PROTO) {
4611 		/* reuse message block - just change REQ to IND */
4612 		if (prim->type == T_DATA_REQ)
4613 			prim->type = T_DATA_IND;
4614 		else
4615 			prim->type = T_OPTDATA_IND;
4616 	}
4617 	/*
4618 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4619 	 * (peer state stays same on this event)
4620 	 */
4621 	/*
4622 	 * send data to connected peer
4623 	 */
4624 	putnext(peer_rq, mp);
4625 }
4626 
4627 
4628 
4629 static void
4630 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4631 {
4632 	queue_t			*wq = tep->te_wq;
4633 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4634 	ssize_t			msz = MBLKL(mp);
4635 	tl_endpt_t		*peer_tep;
4636 	queue_t			*peer_rq;
4637 	boolean_t		closing = tep->te_closing;
4638 
4639 	if (msz < sizeof (struct T_exdata_req)) {
4640 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4641 		    "tl_exdata:invalid message"));
4642 		if (!closing) {
4643 			tl_merror(wq, mp, EPROTO);
4644 		} else {
4645 			freemsg(mp);
4646 		}
4647 		return;
4648 	}
4649 
4650 	/*
4651 	 * If the endpoint is closing it should still forward any data to the
4652 	 * peer (if it has one). If it is not allowed to forward it can just
4653 	 * free the message.
4654 	 */
4655 	if (closing &&
4656 	    (tep->te_state != TS_DATA_XFER) &&
4657 	    (tep->te_state != TS_WREQ_ORDREL)) {
4658 		freemsg(mp);
4659 		return;
4660 	}
4661 
4662 	/*
4663 	 * validate state
4664 	 */
4665 	switch (tep->te_state) {
4666 	case TS_IDLE:
4667 		/*
4668 		 * Other end not here - do nothing.
4669 		 */
4670 		freemsg(mp);
4671 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4672 		    "tl_exdata:cots with endpoint idle"));
4673 		return;
4674 
4675 	case TS_DATA_XFER:
4676 		/* valid states */
4677 		if (tep->te_conp != NULL)
4678 			break;
4679 
4680 		if (tep->te_oconp == NULL) {
4681 			if (!closing) {
4682 				tl_merror(wq, mp, EPROTO);
4683 			} else {
4684 				freemsg(mp);
4685 			}
4686 			return;
4687 		}
4688 		/*
4689 		 * For a socket the T_CONN_CON is sent early thus
4690 		 * the peer might not yet have accepted the connection.
4691 		 * If we are closing queue the packet with the T_CONN_IND.
4692 		 * Otherwise defer processing the packet until the peer
4693 		 * accepts the connection.
4694 		 * Note that the queue is noenabled when we go into this
4695 		 * state.
4696 		 */
4697 		if (!closing) {
4698 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4699 			    SL_TRACE | SL_ERROR,
4700 			    "tl_exdata: ocon"));
4701 			TL_PUTBQ(tep, mp);
4702 			return;
4703 		}
4704 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4705 		    "tl_exdata: closing socket ocon"));
4706 		prim->type = T_EXDATA_IND;
4707 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4708 		return;
4709 
4710 	case TS_WREQ_ORDREL:
4711 		if (tep->te_conp == NULL) {
4712 			/*
4713 			 * Other end closed - generate discon_ind
4714 			 * with reason 0 to cause an EPIPE but no
4715 			 * read side error on AF_UNIX sockets.
4716 			 */
4717 			freemsg(mp);
4718 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4719 			    SL_TRACE | SL_ERROR,
4720 			    "tl_exdata: WREQ_ORDREL and no peer"));
4721 			tl_discon_ind(tep, 0);
4722 			return;
4723 		}
4724 		break;
4725 
4726 	default:
4727 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4728 		    SL_TRACE | SL_ERROR,
4729 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4730 		    tep->te_state));
4731 		tl_merror(wq, mp, EPROTO);
4732 		return;
4733 	}
4734 	/*
4735 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4736 	 * (state stays same on this event)
4737 	 */
4738 
4739 	/*
4740 	 * get connected endpoint
4741 	 */
4742 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4743 		freemsg(mp);
4744 		/* Peer closed */
4745 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4746 		    "tl_exdata: peer gone"));
4747 		return;
4748 	}
4749 
4750 	peer_rq = peer_tep->te_rq;
4751 
4752 	/*
4753 	 * Put it back if flow controlled
4754 	 * Note: Messages already on queue when we are closing is bounded
4755 	 * so we can ignore flow control.
4756 	 */
4757 	if (!canputnext(peer_rq) && !closing) {
4758 		TL_PUTBQ(tep, mp);
4759 		return;
4760 	}
4761 
4762 	/*
4763 	 * validate state on peer
4764 	 */
4765 	switch (peer_tep->te_state) {
4766 	case TS_DATA_XFER:
4767 	case TS_WIND_ORDREL:
4768 		/* valid states */
4769 		break;
4770 	default:
4771 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4772 		    "tl_exdata:rx side:invalid state"));
4773 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4774 		return;
4775 	}
4776 	/*
4777 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4778 	 * (peer state stays same on this event)
4779 	 */
4780 	/*
4781 	 * reuse message block
4782 	 */
4783 	prim->type = T_EXDATA_IND;
4784 
4785 	/*
4786 	 * send data to connected peer
4787 	 */
4788 	putnext(peer_rq, mp);
4789 }
4790 
4791 
4792 
4793 static void
4794 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4795 {
4796 	queue_t			*wq = tep->te_wq;
4797 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4798 	ssize_t			msz = MBLKL(mp);
4799 	tl_endpt_t		*peer_tep;
4800 	queue_t			*peer_rq;
4801 	boolean_t		closing = tep->te_closing;
4802 
4803 	if (msz < sizeof (struct T_ordrel_req)) {
4804 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4805 		    "tl_ordrel:invalid message"));
4806 		if (!closing) {
4807 			tl_merror(wq, mp, EPROTO);
4808 		} else {
4809 			freemsg(mp);
4810 		}
4811 		return;
4812 	}
4813 
4814 	/*
4815 	 * validate state
4816 	 */
4817 	switch (tep->te_state) {
4818 	case TS_DATA_XFER:
4819 	case TS_WREQ_ORDREL:
4820 		/* valid states */
4821 		if (tep->te_conp != NULL)
4822 			break;
4823 
4824 		if (tep->te_oconp == NULL)
4825 			break;
4826 
4827 		/*
4828 		 * For a socket the T_CONN_CON is sent early thus
4829 		 * the peer might not yet have accepted the connection.
4830 		 * If we are closing queue the packet with the T_CONN_IND.
4831 		 * Otherwise defer processing the packet until the peer
4832 		 * accepts the connection.
4833 		 * Note that the queue is noenabled when we go into this
4834 		 * state.
4835 		 */
4836 		if (!closing) {
4837 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4838 			    SL_TRACE | SL_ERROR,
4839 			    "tl_ordlrel: ocon"));
4840 			TL_PUTBQ(tep, mp);
4841 			return;
4842 		}
4843 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4844 		    "tl_ordlrel: closing socket ocon"));
4845 		prim->type = T_ORDREL_IND;
4846 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4847 		return;
4848 
4849 	default:
4850 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4851 		    SL_TRACE | SL_ERROR,
4852 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4853 		    tep->te_state));
4854 		if (!closing) {
4855 			tl_merror(wq, mp, EPROTO);
4856 		} else {
4857 			freemsg(mp);
4858 		}
4859 		return;
4860 	}
4861 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4862 
4863 	/*
4864 	 * get connected endpoint
4865 	 */
4866 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4867 		/* Peer closed */
4868 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4869 		    "tl_ordrel: peer gone"));
4870 		freemsg(mp);
4871 		return;
4872 	}
4873 
4874 	peer_rq = peer_tep->te_rq;
4875 
4876 	/*
4877 	 * Put it back if flow controlled except when we are closing.
4878 	 * Note: Messages already on queue when we are closing is bounded
4879 	 * so we can ignore flow control.
4880 	 */
4881 	if (!canputnext(peer_rq) && !closing) {
4882 		TL_PUTBQ(tep, mp);
4883 		return;
4884 	}
4885 
4886 	/*
4887 	 * validate state on peer
4888 	 */
4889 	switch (peer_tep->te_state) {
4890 	case TS_DATA_XFER:
4891 	case TS_WIND_ORDREL:
4892 		/* valid states */
4893 		break;
4894 	default:
4895 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4896 		    "tl_ordrel:rx side:invalid state"));
4897 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4898 		return;
4899 	}
4900 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4901 
4902 	/*
4903 	 * reuse message block
4904 	 */
4905 	prim->type = T_ORDREL_IND;
4906 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4907 	    "tl_ordrel: send ordrel_ind"));
4908 
4909 	/*
4910 	 * send data to connected peer
4911 	 */
4912 	putnext(peer_rq, mp);
4913 }
4914 
4915 
4916 /*
4917  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4918  */
4919 static void
4920 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4921 {
4922 	size_t			err_sz;
4923 	tl_endpt_t		*tep;
4924 	struct T_unitdata_req	*udreq;
4925 	mblk_t			*err_mp;
4926 	t_scalar_t		alen;
4927 	t_scalar_t		olen;
4928 	struct T_uderror_ind	*uderr;
4929 	uchar_t			*addr_startp;
4930 
4931 	err_sz = sizeof (struct T_uderror_ind);
4932 	tep = (tl_endpt_t *)wq->q_ptr;
4933 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4934 	alen = udreq->DEST_length;
4935 	olen = udreq->OPT_length;
4936 
4937 	if (alen > 0)
4938 		err_sz = T_ALIGN(err_sz + alen);
4939 	if (olen > 0)
4940 		err_sz += olen;
4941 
4942 	err_mp = allocb(err_sz, BPRI_MED);
4943 	if (err_mp == NULL) {
4944 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4945 		    "tl_uderr:allocb failure"));
4946 		/*
4947 		 * Note: no rollback of state needed as it does
4948 		 * not change in connectionless transport
4949 		 */
4950 		tl_memrecover(wq, mp, err_sz);
4951 		return;
4952 	}
4953 
4954 	DB_TYPE(err_mp) = M_PROTO;
4955 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4956 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4957 	uderr->PRIM_type = T_UDERROR_IND;
4958 	uderr->ERROR_type = err;
4959 	uderr->DEST_length = alen;
4960 	uderr->OPT_length = olen;
4961 	if (alen <= 0) {
4962 		uderr->DEST_offset = 0;
4963 	} else {
4964 		uderr->DEST_offset =
4965 		    (t_scalar_t)sizeof (struct T_uderror_ind);
4966 		addr_startp = mp->b_rptr + udreq->DEST_offset;
4967 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4968 		    (size_t)alen);
4969 	}
4970 	if (olen <= 0) {
4971 		uderr->OPT_offset = 0;
4972 	} else {
4973 		uderr->OPT_offset =
4974 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4975 		    uderr->DEST_length);
4976 		addr_startp = mp->b_rptr + udreq->OPT_offset;
4977 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4978 		    (size_t)olen);
4979 	}
4980 	freemsg(mp);
4981 
4982 	/*
4983 	 * send indication message
4984 	 */
4985 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4986 
4987 	qreply(wq, err_mp);
4988 }
4989 
4990 static void
4991 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4992 {
4993 	queue_t *wq = tep->te_wq;
4994 
4995 	if (!tep->te_closing && (wq->q_first != NULL)) {
4996 		TL_PUTQ(tep, mp);
4997 	} else {
4998 		if (tep->te_rq != NULL)
4999 			tl_unitdata(mp, tep);
5000 		else
5001 			freemsg(mp);
5002 	}
5003 
5004 	tl_serializer_exit(tep);
5005 	tl_refrele(tep);
5006 }
5007 
5008 /*
5009  * Handle T_unitdata_req.
5010  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
5011  * If this is a socket pass through options unmodified.
5012  */
5013 static void
5014 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
5015 {
5016 	queue_t			*wq = tep->te_wq;
5017 	soux_addr_t		ux_addr;
5018 	tl_addr_t		destaddr;
5019 	uchar_t			*addr_startp;
5020 	tl_endpt_t		*peer_tep;
5021 	struct T_unitdata_ind	*udind;
5022 	struct T_unitdata_req	*udreq;
5023 	ssize_t			msz, ui_sz, reuse_mb_sz;
5024 	t_scalar_t		alen, aoff, olen, ooff;
5025 	t_scalar_t		oldolen = 0;
5026 	cred_t			*cr = NULL;
5027 	pid_t			cpid;
5028 
5029 	udreq = (struct T_unitdata_req *)mp->b_rptr;
5030 	msz = MBLKL(mp);
5031 
5032 	/*
5033 	 * validate the state
5034 	 */
5035 	if (tep->te_state != TS_IDLE) {
5036 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
5037 		    SL_TRACE | SL_ERROR,
5038 		    "tl_wput:T_CONN_REQ:out of state"));
5039 		tl_merror(wq, mp, EPROTO);
5040 		return;
5041 	}
5042 	/*
5043 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5044 	 * (state does not change on this event)
5045 	 */
5046 
5047 	/*
5048 	 * validate the message
5049 	 * Note: dereference fields in struct inside message only
5050 	 * after validating the message length.
5051 	 */
5052 	if (msz < sizeof (struct T_unitdata_req)) {
5053 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5054 		    "tl_unitdata:invalid message length"));
5055 		tl_merror(wq, mp, EINVAL);
5056 		return;
5057 	}
5058 	alen = udreq->DEST_length;
5059 	aoff = udreq->DEST_offset;
5060 	oldolen = olen = udreq->OPT_length;
5061 	ooff = udreq->OPT_offset;
5062 	if (olen == 0)
5063 		ooff = 0;
5064 
5065 	if (IS_SOCKET(tep)) {
5066 		if ((alen != TL_SOUX_ADDRLEN) ||
5067 		    (aoff < 0) ||
5068 		    (aoff + alen > msz) ||
5069 		    (olen < 0) || (ooff < 0) ||
5070 		    ((olen > 0) && ((ooff + olen) > msz))) {
5071 			(void) (STRLOG(TL_ID, tep->te_minor,
5072 			    1, SL_TRACE | SL_ERROR,
5073 			    "tl_unitdata_req: invalid socket addr "
5074 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5075 			    (int)msz, alen, aoff, olen, ooff));
5076 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5077 			return;
5078 		}
5079 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5080 
5081 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5082 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5083 			(void) (STRLOG(TL_ID, tep->te_minor,
5084 			    1, SL_TRACE | SL_ERROR,
5085 			    "tl_conn_req: invalid socket magic"));
5086 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5087 			return;
5088 		}
5089 	} else {
5090 		if ((alen < 0) ||
5091 		    (aoff < 0) ||
5092 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5093 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5094 		    ((aoff + alen) < 0) ||
5095 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5096 		    (olen < 0) ||
5097 		    (ooff < 0) ||
5098 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5099 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5100 				    SL_TRACE | SL_ERROR,
5101 				    "tl_unitdata:invalid unit data message"));
5102 			tl_merror(wq, mp, EINVAL);
5103 			return;
5104 		}
5105 	}
5106 
5107 	/* Options not supported unless it's a socket */
5108 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5109 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5110 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5111 		tl_uderr(wq, mp, EPROTO);
5112 		return;
5113 	}
5114 #ifdef DEBUG
5115 	/*
5116 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5117 	 * if (!assertion)
5118 	 *	log warning;
5119 	 */
5120 	if (!(aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5121 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5122 		    "tl_unitdata:addr overlaps TPI message"));
5123 	}
5124 #endif
5125 	/*
5126 	 * get destination endpoint
5127 	 */
5128 	destaddr.ta_alen = alen;
5129 	destaddr.ta_abuf = mp->b_rptr + aoff;
5130 	destaddr.ta_zoneid = tep->te_zoneid;
5131 
5132 	/*
5133 	 * Check whether the destination is the same that was used previously
5134 	 * and the destination endpoint is in the right state. If something is
5135 	 * wrong, find destination again and cache it.
5136 	 */
5137 	peer_tep = tep->te_lastep;
5138 
5139 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5140 	    (peer_tep->te_state != TS_IDLE) ||
5141 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5142 		/*
5143 		 * Not the same as cached destination , need to find the right
5144 		 * destination.
5145 		 */
5146 		peer_tep = (IS_SOCKET(tep) ?
5147 		    tl_sock_find_peer(tep, &ux_addr) :
5148 		    tl_find_peer(tep, &destaddr));
5149 
5150 		if (peer_tep == NULL) {
5151 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5152 			    SL_TRACE | SL_ERROR,
5153 			    "tl_unitdata:no one at destination address"));
5154 			tl_uderr(wq, mp, ECONNRESET);
5155 			return;
5156 		}
5157 
5158 		/*
5159 		 * Cache the new peer.
5160 		 */
5161 		if (tep->te_lastep != NULL)
5162 			tl_refrele(tep->te_lastep);
5163 
5164 		tep->te_lastep = peer_tep;
5165 	}
5166 
5167 	if (peer_tep->te_state != TS_IDLE) {
5168 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5169 		    "tl_unitdata:provider in invalid state"));
5170 		tl_uderr(wq, mp, EPROTO);
5171 		return;
5172 	}
5173 
5174 	ASSERT(peer_tep->te_rq != NULL);
5175 
5176 	/*
5177 	 * Put it back if flow controlled except when we are closing.
5178 	 * Note: Messages already on queue when we are closing is bounded
5179 	 * so we can ignore flow control.
5180 	 */
5181 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5182 		/* record what we are flow controlled on */
5183 		if (tep->te_flowq != NULL) {
5184 			list_remove(&tep->te_flowq->te_flowlist, tep);
5185 		}
5186 		list_insert_head(&peer_tep->te_flowlist, tep);
5187 		tep->te_flowq = peer_tep;
5188 		TL_PUTBQ(tep, mp);
5189 		return;
5190 	}
5191 	/*
5192 	 * prepare indication message
5193 	 */
5194 
5195 	/*
5196 	 * calculate length of message
5197 	 */
5198 	if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5199 		cr = msg_getcred(mp, &cpid);
5200 		ASSERT(cr != NULL);
5201 
5202 		if (peer_tep->te_flag & TL_SETCRED) {
5203 			ASSERT(olen == 0);
5204 			olen = (t_scalar_t)sizeof (struct opthdr) +
5205 			    OPTLEN(sizeof (tl_credopt_t));
5206 						/* 1 option only */
5207 		} else if (peer_tep->te_flag & TL_SETUCRED) {
5208 			ASSERT(olen == 0);
5209 			olen = (t_scalar_t)sizeof (struct opthdr) +
5210 			    OPTLEN(ucredminsize(cr));
5211 						/* 1 option only */
5212 		} else {
5213 			/* Possibly more than one option */
5214 			olen += (t_scalar_t)sizeof (struct T_opthdr) +
5215 			    OPTLEN(ucredminsize(cr));
5216 		}
5217 	}
5218 
5219 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5220 	reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5221 
5222 	/*
5223 	 * If the unitdata_ind fits and we are not adding options
5224 	 * reuse the udreq mblk.
5225 	 *
5226 	 * Otherwise, it is possible we need to append an option if one of the
5227 	 * te_flag bits is set. This requires extra space in the data block for
5228 	 * the additional option but the traditional technique used below to
5229 	 * allocate a new block and copy into it will not work when there is a
5230 	 * message block with a free pointer (since we don't know anything
5231 	 * about the layout of the data, pointers referencing or within the
5232 	 * data, etc.). To handle this possibility the upper layers may have
5233 	 * preallocated some space to use for appending an option. We check the
5234 	 * overall mblock size against the size we need ('reuse_mb_sz' with the
5235 	 * original address length [alen] to ensure we won't overrun the
5236 	 * current mblk data size) to see if there is free space and thus
5237 	 * avoid allocating a new message block.
5238 	 */
5239 	if (msz >= ui_sz && alen >= tep->te_alen &&
5240 	    !(peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED))) {
5241 		/*
5242 		 * Reuse the original mblk. Leave options in place.
5243 		 */
5244 		udind = (struct T_unitdata_ind *)mp->b_rptr;
5245 		udind->PRIM_type = T_UNITDATA_IND;
5246 		udind->SRC_length = tep->te_alen;
5247 		addr_startp = mp->b_rptr + udind->SRC_offset;
5248 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5249 
5250 	} else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5251 	    mp->b_datap->db_frtnp != NULL) {
5252 		/*
5253 		 * We have a message block with a free pointer, but extra space
5254 		 * has been pre-allocated for us in case we need to append an
5255 		 * option. Reuse the original mblk, leaving existing options in
5256 		 * place.
5257 		 */
5258 		udind = (struct T_unitdata_ind *)mp->b_rptr;
5259 		udind->PRIM_type = T_UNITDATA_IND;
5260 		udind->SRC_length = tep->te_alen;
5261 		addr_startp = mp->b_rptr + udind->SRC_offset;
5262 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5263 
5264 		if (peer_tep->te_flag &
5265 		    (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5266 			ASSERT(cr != NULL);
5267 			/*
5268 			 * We're appending one new option here after the
5269 			 * original ones.
5270 			 */
5271 			tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5272 			    cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5273 		}
5274 
5275 	} else if (mp->b_datap->db_frtnp != NULL) {
5276 		/*
5277 		 * The next block creates a new mp and tries to copy the data
5278 		 * block into it, but that cannot handle a message with a free
5279 		 * pointer (for more details see the comment in kstrputmsg()
5280 		 * where dupmsg() is called). Since we can never properly
5281 		 * duplicate the mp while also extending the data, just error
5282 		 * out now.
5283 		 */
5284 		tl_uderr(wq, mp, EPROTO);
5285 		return;
5286 	} else {
5287 		/* Allocate a new T_unitdata_ind message */
5288 		mblk_t *ui_mp;
5289 
5290 		ui_mp = allocb(ui_sz, BPRI_MED);
5291 		if (ui_mp == NULL) {
5292 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5293 			    "tl_unitdata:allocb failure:message queued"));
5294 			tl_memrecover(wq, mp, ui_sz);
5295 			return;
5296 		}
5297 
5298 		/*
5299 		 * fill in T_UNITDATA_IND contents
5300 		 */
5301 		DB_TYPE(ui_mp) = M_PROTO;
5302 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5303 		udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5304 		udind->PRIM_type = T_UNITDATA_IND;
5305 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5306 		udind->SRC_length = tep->te_alen;
5307 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5308 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5309 		udind->OPT_offset =
5310 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5311 		udind->OPT_length = olen;
5312 		if (peer_tep->te_flag &
5313 		    (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5314 
5315 			if (oldolen != 0) {
5316 				bcopy((void *)((uintptr_t)udreq + ooff),
5317 				    (void *)((uintptr_t)udind +
5318 				    udind->OPT_offset),
5319 				    oldolen);
5320 			}
5321 			ASSERT(cr != NULL);
5322 
5323 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5324 			    oldolen, cr, cpid,
5325 			    peer_tep->te_flag, peer_tep->te_credp);
5326 		} else {
5327 			bcopy((void *)((uintptr_t)udreq + ooff),
5328 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5329 			    olen);
5330 		}
5331 
5332 		/*
5333 		 * relink data blocks from mp to ui_mp
5334 		 */
5335 		ui_mp->b_cont = mp->b_cont;
5336 		freeb(mp);
5337 		mp = ui_mp;
5338 	}
5339 	/*
5340 	 * send indication message
5341 	 */
5342 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5343 	putnext(peer_tep->te_rq, mp);
5344 }
5345 
5346 
5347 
5348 /*
5349  * Check if a given addr is in use.
5350  * Endpoint ptr returned or NULL if not found.
5351  * The name space is separate for each mode. This implies that
5352  * sockets get their own name space.
5353  */
5354 static tl_endpt_t *
5355 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5356 {
5357 	tl_endpt_t *peer_tep = NULL;
5358 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5359 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5360 
5361 	ASSERT(!IS_SOCKET(tep));
5362 
5363 	ASSERT(ap != NULL && ap->ta_alen > 0);
5364 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5365 	ASSERT(ap->ta_abuf != NULL);
5366 	EQUIV(rc == 0, peer_tep != NULL);
5367 	IMPLY(rc == 0,
5368 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5369 	    (tep->te_transport == peer_tep->te_transport));
5370 
5371 	if ((rc == 0) && (peer_tep->te_closing)) {
5372 		tl_refrele(peer_tep);
5373 		peer_tep = NULL;
5374 	}
5375 
5376 	return (peer_tep);
5377 }
5378 
5379 /*
5380  * Find peer for a socket based on unix domain address.
5381  * For implicit addresses our peer can be found by minor number in ai hash. For
5382  * explicit binds we look vnode address at addr_hash.
5383  */
5384 static tl_endpt_t *
5385 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5386 {
5387 	tl_endpt_t *peer_tep = NULL;
5388 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5389 	    tep->te_aihash : tep->te_addrhash;
5390 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5391 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5392 
5393 	ASSERT(IS_SOCKET(tep));
5394 	EQUIV(rc == 0, peer_tep != NULL);
5395 	IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5396 
5397 	if (peer_tep != NULL) {
5398 		/* Don't attempt to use closing peer. */
5399 		if (peer_tep->te_closing)
5400 			goto errout;
5401 
5402 		/*
5403 		 * Cross-zone unix sockets are permitted, but for Trusted
5404 		 * Extensions only, the "server" for these must be in the
5405 		 * global zone.
5406 		 */
5407 		if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5408 		    is_system_labeled() &&
5409 		    (peer_tep->te_zoneid != GLOBAL_ZONEID))
5410 			goto errout;
5411 	}
5412 
5413 	return (peer_tep);
5414 
5415 errout:
5416 	tl_refrele(peer_tep);
5417 	return (NULL);
5418 }
5419 
5420 /*
5421  * Generate a free addr and return it in struct pointed by ap
5422  * but allocating space for address buffer.
5423  * The generated address will be at least 4 bytes long and, if req->ta_alen
5424  * exceeds 4 bytes, be req->ta_alen bytes long.
5425  *
5426  * If address is found it will be inserted in the hash.
5427  *
5428  * If req->ta_alen is larger than the default alen (4 bytes) the last
5429  * alen-4 bytes will always be the same as in req.
5430  *
5431  * Return 0 for failure.
5432  * Return non-zero for success.
5433  */
5434 static boolean_t
5435 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5436 {
5437 	t_scalar_t	alen;
5438 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5439 
5440 	ASSERT(tep->te_hash_hndl != NULL);
5441 	ASSERT(!IS_SOCKET(tep));
5442 
5443 	if (tep->te_hash_hndl == NULL)
5444 		return (B_FALSE);
5445 
5446 	/*
5447 	 * check if default addr is in use
5448 	 * if it is - bump it and try again
5449 	 */
5450 	if (req == NULL) {
5451 		alen = sizeof (uint32_t);
5452 	} else {
5453 		alen = max(req->ta_alen, sizeof (uint32_t));
5454 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5455 	}
5456 
5457 	if (tep->te_alen < alen) {
5458 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5459 
5460 		/*
5461 		 * Not enough space in tep->ta_ap to hold the address,
5462 		 * allocate a bigger space.
5463 		 */
5464 		if (abuf == NULL)
5465 			return (B_FALSE);
5466 
5467 		if (tep->te_alen > 0)
5468 			kmem_free(tep->te_abuf, tep->te_alen);
5469 
5470 		tep->te_alen = alen;
5471 		tep->te_abuf = abuf;
5472 	}
5473 
5474 	/* Copy in the address in req */
5475 	if (req != NULL) {
5476 		ASSERT(alen >= req->ta_alen);
5477 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5478 	}
5479 
5480 	/*
5481 	 * First try minor number then try default addresses.
5482 	 */
5483 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5484 
5485 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5486 		if (mod_hash_insert_reserve(tep->te_addrhash,
5487 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5488 		    tep->te_hash_hndl) == 0) {
5489 			/*
5490 			 * found free address
5491 			 */
5492 			tep->te_flag |= TL_ADDRHASHED;
5493 			tep->te_hash_hndl = NULL;
5494 
5495 			return (B_TRUE); /* successful return */
5496 		}
5497 		/*
5498 		 * Use default address.
5499 		 */
5500 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5501 		atomic_inc_32(&tep->te_defaddr);
5502 	}
5503 
5504 	/*
5505 	 * Failed to find anything.
5506 	 */
5507 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5508 	    "tl_get_any_addr:looped 2^32 times"));
5509 	return (B_FALSE);
5510 }
5511 
5512 /*
5513  * reallocb + set r/w ptrs to reflect size.
5514  */
5515 static mblk_t *
5516 tl_resizemp(mblk_t *mp, ssize_t new_size)
5517 {
5518 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5519 		return (NULL);
5520 
5521 	mp->b_rptr = DB_BASE(mp);
5522 	mp->b_wptr = mp->b_rptr + new_size;
5523 	return (mp);
5524 }
5525 
5526 static void
5527 tl_cl_backenable(tl_endpt_t *tep)
5528 {
5529 	list_t *l = &tep->te_flowlist;
5530 	tl_endpt_t *elp;
5531 
5532 	ASSERT(IS_CLTS(tep));
5533 
5534 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5535 		ASSERT(tep->te_ser == elp->te_ser);
5536 		ASSERT(elp->te_flowq == tep);
5537 		if (!elp->te_closing)
5538 			TL_QENABLE(elp);
5539 		elp->te_flowq = NULL;
5540 		list_remove(l, elp);
5541 	}
5542 }
5543 
5544 /*
5545  * Unconnect endpoints.
5546  */
5547 static void
5548 tl_co_unconnect(tl_endpt_t *tep)
5549 {
5550 	tl_endpt_t	*peer_tep = tep->te_conp;
5551 	tl_endpt_t	*srv_tep = tep->te_oconp;
5552 	list_t		*l;
5553 	tl_icon_t	*tip;
5554 	tl_endpt_t	*cl_tep;
5555 	mblk_t		*d_mp;
5556 
5557 	ASSERT(IS_COTS(tep));
5558 	/*
5559 	 * If our peer is closing, don't use it.
5560 	 */
5561 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5562 		TL_UNCONNECT(tep->te_conp);
5563 		peer_tep = NULL;
5564 	}
5565 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5566 		TL_UNCONNECT(tep->te_oconp);
5567 		srv_tep = NULL;
5568 	}
5569 
5570 	if (tep->te_nicon > 0) {
5571 		l = &tep->te_iconp;
5572 		/*
5573 		 * If incoming requests pending, change state
5574 		 * of clients on disconnect ind event and send
5575 		 * discon_ind pdu to modules above them
5576 		 * for server: all clients get disconnect
5577 		 */
5578 
5579 		while (tep->te_nicon > 0) {
5580 			tip    = list_head(l);
5581 			cl_tep = tip->ti_tep;
5582 
5583 			if (cl_tep == NULL) {
5584 				tl_freetip(tep, tip);
5585 				continue;
5586 			}
5587 
5588 			if (cl_tep->te_oconp != NULL) {
5589 				ASSERT(cl_tep != cl_tep->te_oconp);
5590 				TL_UNCONNECT(cl_tep->te_oconp);
5591 			}
5592 
5593 			if (cl_tep->te_closing) {
5594 				tl_freetip(tep, tip);
5595 				continue;
5596 			}
5597 
5598 			enableok(cl_tep->te_wq);
5599 			TL_QENABLE(cl_tep);
5600 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5601 			if (d_mp != NULL) {
5602 				cl_tep->te_state = TS_IDLE;
5603 				putnext(cl_tep->te_rq, d_mp);
5604 			} else {
5605 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5606 				    SL_TRACE | SL_ERROR,
5607 				    "tl_co_unconnect:icmng: "
5608 				    "allocb failure"));
5609 			}
5610 			tl_freetip(tep, tip);
5611 		}
5612 	} else if (srv_tep != NULL) {
5613 		/*
5614 		 * If outgoing request pending, change state
5615 		 * of server on discon ind event
5616 		 */
5617 
5618 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5619 		    IS_COTSORD(srv_tep) &&
5620 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5621 			/*
5622 			 * Queue ordrel_ind for server to be picked up
5623 			 * when the connection is accepted.
5624 			 */
5625 			d_mp = tl_ordrel_ind_alloc();
5626 		} else {
5627 			/*
5628 			 * send discon_ind to server
5629 			 */
5630 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5631 		}
5632 		if (d_mp == NULL) {
5633 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5634 			    SL_TRACE | SL_ERROR,
5635 			    "tl_co_unconnect:outgoing:allocb failure"));
5636 			TL_UNCONNECT(tep->te_oconp);
5637 			goto discon_peer;
5638 		}
5639 
5640 		/*
5641 		 * If this is a socket the T_DISCON_IND is queued with
5642 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5643 		 * from the list of pending connections.
5644 		 * Note that when te_oconp is set the peer better have
5645 		 * a t_connind_t for the client.
5646 		 */
5647 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5648 			/*
5649 			 * Queue the disconnection message.
5650 			 */
5651 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5652 		} else {
5653 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5654 			if (tip == NULL) {
5655 				freemsg(d_mp);
5656 			} else {
5657 				ASSERT(tep == tip->ti_tep);
5658 				ASSERT(tep->te_ser == srv_tep->te_ser);
5659 				/*
5660 				 * Delete tip from the server list.
5661 				 */
5662 				if (srv_tep->te_nicon == 1) {
5663 					srv_tep->te_state =
5664 					    NEXTSTATE(TE_DISCON_IND2,
5665 					    srv_tep->te_state);
5666 				} else {
5667 					srv_tep->te_state =
5668 					    NEXTSTATE(TE_DISCON_IND3,
5669 					    srv_tep->te_state);
5670 				}
5671 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5672 				    T_DISCON_IND);
5673 				putnext(srv_tep->te_rq, d_mp);
5674 				tl_freetip(srv_tep, tip);
5675 			}
5676 			TL_UNCONNECT(tep->te_oconp);
5677 			srv_tep = NULL;
5678 		}
5679 	} else if (peer_tep != NULL) {
5680 		/*
5681 		 * unconnect existing connection
5682 		 * If connected, change state of peer on
5683 		 * discon ind event and send discon ind pdu
5684 		 * to module above it
5685 		 */
5686 
5687 		ASSERT(tep->te_ser == peer_tep->te_ser);
5688 		if (IS_COTSORD(peer_tep) &&
5689 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5690 		    peer_tep->te_state == TS_DATA_XFER)) {
5691 			/*
5692 			 * send ordrel ind
5693 			 */
5694 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5695 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5696 			    peer_tep->te_state,
5697 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5698 			d_mp = tl_ordrel_ind_alloc();
5699 			if (d_mp == NULL) {
5700 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5701 				    SL_TRACE | SL_ERROR,
5702 				    "tl_co_unconnect:connected:"
5703 				    "allocb failure"));
5704 				/*
5705 				 * Continue with cleaning up peer as
5706 				 * this side may go away with the close
5707 				 */
5708 				TL_QENABLE(peer_tep);
5709 				goto discon_peer;
5710 			}
5711 			peer_tep->te_state =
5712 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5713 
5714 			putnext(peer_tep->te_rq, d_mp);
5715 			/*
5716 			 * Handle flow control case.  This will generate
5717 			 * a t_discon_ind message with reason 0 if there
5718 			 * is data queued on the write side.
5719 			 */
5720 			TL_QENABLE(peer_tep);
5721 		} else if (IS_COTSORD(peer_tep) &&
5722 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5723 			/*
5724 			 * Sent an ordrel_ind. We send a discon with
5725 			 * with error 0 to inform that the peer is gone.
5726 			 */
5727 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5728 			    SL_TRACE | SL_ERROR,
5729 			    "tl_co_unconnect: discon in state %d",
5730 			    tep->te_state));
5731 			tl_discon_ind(peer_tep, 0);
5732 		} else {
5733 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5734 			    SL_TRACE | SL_ERROR,
5735 			    "tl_co_unconnect: state %d", tep->te_state));
5736 			tl_discon_ind(peer_tep, ECONNRESET);
5737 		}
5738 
5739 discon_peer:
5740 		/*
5741 		 * Disconnect cross-pointers only for close
5742 		 */
5743 		if (tep->te_closing) {
5744 			peer_tep = tep->te_conp;
5745 			TL_REMOVE_PEER(peer_tep->te_conp);
5746 			TL_REMOVE_PEER(tep->te_conp);
5747 		}
5748 	}
5749 }
5750 
5751 /*
5752  * Note: The following routine does not recover from allocb()
5753  * failures
5754  * The reason should be from the <sys/errno.h> space.
5755  */
5756 static void
5757 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5758 {
5759 	mblk_t *d_mp;
5760 
5761 	if (tep->te_closing)
5762 		return;
5763 
5764 	/*
5765 	 * flush the queues.
5766 	 */
5767 	flushq(tep->te_rq, FLUSHDATA);
5768 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5769 
5770 	/*
5771 	 * send discon ind
5772 	 */
5773 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5774 	if (d_mp == NULL) {
5775 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5776 		    "tl_discon_ind:allocb failure"));
5777 		return;
5778 	}
5779 	tep->te_state = TS_IDLE;
5780 	putnext(tep->te_rq, d_mp);
5781 }
5782 
5783 /*
5784  * Note: The following routine does not recover from allocb()
5785  * failures
5786  * The reason should be from the <sys/errno.h> space.
5787  */
5788 static mblk_t *
5789 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5790 {
5791 	mblk_t *mp;
5792 	struct T_discon_ind *tdi;
5793 
5794 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5795 		DB_TYPE(mp) = M_PROTO;
5796 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5797 		tdi = (struct T_discon_ind *)mp->b_rptr;
5798 		tdi->PRIM_type = T_DISCON_IND;
5799 		tdi->DISCON_reason = reason;
5800 		tdi->SEQ_number = seqnum;
5801 	}
5802 	return (mp);
5803 }
5804 
5805 
5806 /*
5807  * Note: The following routine does not recover from allocb()
5808  * failures
5809  */
5810 static mblk_t *
5811 tl_ordrel_ind_alloc(void)
5812 {
5813 	mblk_t *mp;
5814 	struct T_ordrel_ind *toi;
5815 
5816 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5817 		DB_TYPE(mp) = M_PROTO;
5818 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5819 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5820 		toi->PRIM_type = T_ORDREL_IND;
5821 	}
5822 	return (mp);
5823 }
5824 
5825 
5826 /*
5827  * Lookup the seqno in the list of queued connections.
5828  */
5829 static tl_icon_t *
5830 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5831 {
5832 	list_t *l = &tep->te_iconp;
5833 	tl_icon_t *tip = list_head(l);
5834 
5835 	ASSERT(seqno != 0);
5836 
5837 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5838 		;
5839 
5840 	return (tip);
5841 }
5842 
5843 /*
5844  * Queue data for a given T_CONN_IND while verifying that redundant
5845  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5846  * Used when the originator of the connection closes.
5847  */
5848 static void
5849 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5850 {
5851 	tl_icon_t		*tip;
5852 	mblk_t			**mpp, *mp;
5853 	int			prim, nprim;
5854 
5855 	if (nmp->b_datap->db_type == M_PROTO)
5856 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5857 	else
5858 		nprim = -1;	/* M_DATA */
5859 
5860 	tip = tl_icon_find(tep, seqno);
5861 	if (tip == NULL) {
5862 		freemsg(nmp);
5863 		return;
5864 	}
5865 
5866 	ASSERT(tip->ti_seqno != 0);
5867 	mpp = &tip->ti_mp;
5868 	while (*mpp != NULL) {
5869 		mp = *mpp;
5870 
5871 		if (mp->b_datap->db_type == M_PROTO)
5872 			prim = ((union T_primitives *)mp->b_rptr)->type;
5873 		else
5874 			prim = -1;	/* M_DATA */
5875 
5876 		/*
5877 		 * Allow nothing after a T_DISCON_IND
5878 		 */
5879 		if (prim == T_DISCON_IND) {
5880 			freemsg(nmp);
5881 			return;
5882 		}
5883 		/*
5884 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5885 		 */
5886 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5887 			freemsg(nmp);
5888 			return;
5889 		}
5890 		mpp = &(mp->b_next);
5891 	}
5892 	*mpp = nmp;
5893 }
5894 
5895 /*
5896  * Verify if a certain TPI primitive exists on the connind queue.
5897  * Use prim -1 for M_DATA.
5898  * Return non-zero if found.
5899  */
5900 static boolean_t
5901 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5902 {
5903 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5904 	boolean_t found = B_FALSE;
5905 
5906 	if (tip != NULL) {
5907 		mblk_t *mp;
5908 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5909 			found = (DB_TYPE(mp) == M_PROTO &&
5910 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5911 		}
5912 	}
5913 	return (found);
5914 }
5915 
5916 /*
5917  * Send the b_next mblk chain that has accumulated before the connection
5918  * was accepted. Perform the necessary state transitions.
5919  */
5920 static void
5921 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5922 {
5923 	mblk_t			*mp;
5924 	union T_primitives	*primp;
5925 
5926 	if (tep->te_closing) {
5927 		tl_icon_freemsgs(mpp);
5928 		return;
5929 	}
5930 
5931 	ASSERT(tep->te_state == TS_DATA_XFER);
5932 	ASSERT(tep->te_rq->q_first == NULL);
5933 
5934 	while ((mp = *mpp) != NULL) {
5935 		*mpp = mp->b_next;
5936 		mp->b_next = NULL;
5937 
5938 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5939 		switch (DB_TYPE(mp)) {
5940 		default:
5941 			freemsg(mp);
5942 			break;
5943 		case M_DATA:
5944 			putnext(tep->te_rq, mp);
5945 			break;
5946 		case M_PROTO:
5947 			primp = (union T_primitives *)mp->b_rptr;
5948 			switch (primp->type) {
5949 			case T_UNITDATA_IND:
5950 			case T_DATA_IND:
5951 			case T_OPTDATA_IND:
5952 			case T_EXDATA_IND:
5953 				putnext(tep->te_rq, mp);
5954 				break;
5955 			case T_ORDREL_IND:
5956 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5957 				    tep->te_state);
5958 				putnext(tep->te_rq, mp);
5959 				break;
5960 			case T_DISCON_IND:
5961 				tep->te_state = TS_IDLE;
5962 				putnext(tep->te_rq, mp);
5963 				break;
5964 			default:
5965 #ifdef DEBUG
5966 				cmn_err(CE_PANIC,
5967 				    "tl_icon_sendmsgs: unknown primitive");
5968 #endif /* DEBUG */
5969 				freemsg(mp);
5970 				break;
5971 			}
5972 			break;
5973 		}
5974 	}
5975 }
5976 
5977 /*
5978  * Free the b_next mblk chain that has accumulated before the connection
5979  * was accepted.
5980  */
5981 static void
5982 tl_icon_freemsgs(mblk_t **mpp)
5983 {
5984 	mblk_t *mp;
5985 
5986 	while ((mp = *mpp) != NULL) {
5987 		*mpp = mp->b_next;
5988 		mp->b_next = NULL;
5989 		freemsg(mp);
5990 	}
5991 }
5992 
5993 /*
5994  * Send M_ERROR
5995  * Note: assumes caller ensured enough space in mp or enough
5996  *	memory available. Does not attempt recovery from allocb()
5997  *	failures
5998  */
5999 
6000 static void
6001 tl_merror(queue_t *wq, mblk_t *mp, int error)
6002 {
6003 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6004 
6005 	if (tep->te_closing) {
6006 		freemsg(mp);
6007 		return;
6008 	}
6009 
6010 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
6011 	    SL_TRACE | SL_ERROR,
6012 	    "tl_merror: tep=%p, err=%d", (void *)tep, error));
6013 
6014 	/*
6015 	 * flush all messages on queue. we are shutting
6016 	 * the stream down on fatal error
6017 	 */
6018 	flushq(wq, FLUSHALL);
6019 	if (IS_COTS(tep)) {
6020 		/* connection oriented - unconnect endpoints */
6021 		tl_co_unconnect(tep);
6022 	}
6023 	if (mp->b_cont) {
6024 		freemsg(mp->b_cont);
6025 		mp->b_cont = NULL;
6026 	}
6027 
6028 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6029 		freemsg(mp);
6030 		mp = allocb(1, BPRI_HI);
6031 		if (mp == NULL) {
6032 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6033 			    SL_TRACE | SL_ERROR,
6034 			    "tl_merror:M_PROTO: out of memory"));
6035 			return;
6036 		}
6037 	}
6038 	if (mp) {
6039 		DB_TYPE(mp) = M_ERROR;
6040 		mp->b_rptr = DB_BASE(mp);
6041 		*mp->b_rptr = (char)error;
6042 		mp->b_wptr = mp->b_rptr + sizeof (char);
6043 		qreply(wq, mp);
6044 	} else {
6045 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
6046 	}
6047 }
6048 
6049 static void
6050 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6051 {
6052 	ASSERT(cr != NULL);
6053 
6054 	if (flag & TL_SETCRED) {
6055 		struct opthdr *opt = (struct opthdr *)buf;
6056 		tl_credopt_t *tlcred;
6057 
6058 		opt->level = TL_PROT_LEVEL;
6059 		opt->name = TL_OPT_PEER_CRED;
6060 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6061 
6062 		tlcred = (tl_credopt_t *)(opt + 1);
6063 		tlcred->tc_uid = crgetuid(cr);
6064 		tlcred->tc_gid = crgetgid(cr);
6065 		tlcred->tc_ruid = crgetruid(cr);
6066 		tlcred->tc_rgid = crgetrgid(cr);
6067 		tlcred->tc_suid = crgetsuid(cr);
6068 		tlcred->tc_sgid = crgetsgid(cr);
6069 		tlcred->tc_ngroups = crgetngroups(cr);
6070 	} else if (flag & TL_SETUCRED) {
6071 		struct opthdr *opt = (struct opthdr *)buf;
6072 
6073 		opt->level = TL_PROT_LEVEL;
6074 		opt->name = TL_OPT_PEER_UCRED;
6075 		opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6076 
6077 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6078 	} else {
6079 		struct T_opthdr *topt = (struct T_opthdr *)buf;
6080 		ASSERT(flag & TL_SOCKUCRED);
6081 
6082 		topt->level = SOL_SOCKET;
6083 		topt->name = SCM_UCRED;
6084 		topt->len = ucredminsize(cr) + sizeof (*topt);
6085 		topt->status = 0;
6086 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6087 	}
6088 }
6089 
6090 /* ARGSUSED */
6091 static int
6092 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6093 {
6094 	/* no default value processed in protocol specific code currently */
6095 	return (-1);
6096 }
6097 
6098 /* ARGSUSED */
6099 static int
6100 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6101 {
6102 	int len;
6103 	tl_endpt_t *tep;
6104 	int *valp;
6105 
6106 	tep = (tl_endpt_t *)wq->q_ptr;
6107 
6108 	len = 0;
6109 
6110 	/*
6111 	 * Assumes: option level and name sanity check done elsewhere
6112 	 */
6113 
6114 	switch (level) {
6115 	case SOL_SOCKET:
6116 		if (!IS_SOCKET(tep))
6117 			break;
6118 		switch (name) {
6119 		case SO_RECVUCRED:
6120 			len = sizeof (int);
6121 			valp = (int *)ptr;
6122 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6123 			break;
6124 		default:
6125 			break;
6126 		}
6127 		break;
6128 	case TL_PROT_LEVEL:
6129 		switch (name) {
6130 		case TL_OPT_PEER_CRED:
6131 		case TL_OPT_PEER_UCRED:
6132 			/*
6133 			 * option not supposed to retrieved directly
6134 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6135 			 * when some internal flags set by other options
6136 			 * Direct retrieval always designed to fail(ignored)
6137 			 * for this option.
6138 			 */
6139 			break;
6140 		}
6141 	}
6142 	return (len);
6143 }
6144 
6145 /* ARGSUSED */
6146 static int
6147 tl_set_opt(queue_t *wq, uint_t mgmt_flags, int level, int name, uint_t inlen,
6148     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs,
6149     cred_t *cr)
6150 {
6151 	int error;
6152 	tl_endpt_t *tep;
6153 
6154 	tep = (tl_endpt_t *)wq->q_ptr;
6155 
6156 	error = 0;		/* NOERROR */
6157 
6158 	/*
6159 	 * Assumes: option level and name sanity checks done elsewhere
6160 	 */
6161 
6162 	switch (level) {
6163 	case SOL_SOCKET:
6164 		if (!IS_SOCKET(tep)) {
6165 			error = EINVAL;
6166 			break;
6167 		}
6168 		/*
6169 		 * TBD: fill in other AF_UNIX socket options and then stop
6170 		 * returning error.
6171 		 */
6172 		switch (name) {
6173 		case SO_RECVUCRED:
6174 			/*
6175 			 * We only support this for datagram sockets;
6176 			 * getpeerucred handles the connection oriented
6177 			 * transports.
6178 			 */
6179 			if (!IS_CLTS(tep)) {
6180 				error = EINVAL;
6181 				break;
6182 			}
6183 			if (*(int *)invalp == 0)
6184 				tep->te_flag &= ~TL_SOCKUCRED;
6185 			else
6186 				tep->te_flag |= TL_SOCKUCRED;
6187 			break;
6188 		default:
6189 			error = EINVAL;
6190 			break;
6191 		}
6192 		break;
6193 	case TL_PROT_LEVEL:
6194 		switch (name) {
6195 		case TL_OPT_PEER_CRED:
6196 		case TL_OPT_PEER_UCRED:
6197 			/*
6198 			 * option not supposed to be set directly
6199 			 * Its value in initialized for each endpoint at
6200 			 * driver open time.
6201 			 * Direct setting always designed to fail for this
6202 			 * option.
6203 			 */
6204 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6205 			    SL_TRACE | SL_ERROR,
6206 			    "tl_set_opt: option is not supported"));
6207 			error = EPROTO;
6208 			break;
6209 		}
6210 	}
6211 	return (error);
6212 }
6213 
6214 
6215 static void
6216 tl_timer(void *arg)
6217 {
6218 	queue_t *wq = arg;
6219 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6220 
6221 	ASSERT(tep);
6222 
6223 	tep->te_timoutid = 0;
6224 
6225 	enableok(wq);
6226 	/*
6227 	 * Note: can call wsrv directly here and save context switch
6228 	 * Consider change when qtimeout (not timeout) is active
6229 	 */
6230 	qenable(wq);
6231 }
6232 
6233 static void
6234 tl_buffer(void *arg)
6235 {
6236 	queue_t *wq = arg;
6237 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6238 
6239 	ASSERT(tep);
6240 
6241 	tep->te_bufcid = 0;
6242 	tep->te_nowsrv = B_FALSE;
6243 
6244 	enableok(wq);
6245 	/*
6246 	 *  Note: can call wsrv directly here and save context switch
6247 	 * Consider change when qbufcall (not bufcall) is active
6248 	 */
6249 	qenable(wq);
6250 }
6251 
6252 static void
6253 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6254 {
6255 	tl_endpt_t *tep;
6256 
6257 	tep = (tl_endpt_t *)wq->q_ptr;
6258 
6259 	if (tep->te_closing) {
6260 		freemsg(mp);
6261 		return;
6262 	}
6263 	noenable(wq);
6264 
6265 	(void) insq(wq, wq->q_first, mp);
6266 
6267 	if (tep->te_bufcid || tep->te_timoutid) {
6268 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
6269 		    "tl_memrecover:recover %p pending", (void *)wq));
6270 		return;
6271 	}
6272 
6273 	tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq);
6274 	if (tep->te_bufcid == NULL) {
6275 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6276 		    drv_usectohz(TL_BUFWAIT));
6277 	}
6278 }
6279 
6280 static void
6281 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6282 {
6283 	ASSERT(tip->ti_seqno != 0);
6284 
6285 	if (tip->ti_mp != NULL) {
6286 		tl_icon_freemsgs(&tip->ti_mp);
6287 		tip->ti_mp = NULL;
6288 	}
6289 	if (tip->ti_tep != NULL) {
6290 		tl_refrele(tip->ti_tep);
6291 		tip->ti_tep = NULL;
6292 	}
6293 	list_remove(&tep->te_iconp, tip);
6294 	kmem_free(tip, sizeof (tl_icon_t));
6295 	tep->te_nicon--;
6296 }
6297 
6298 /*
6299  * Remove address from address hash.
6300  */
6301 static void
6302 tl_addr_unbind(tl_endpt_t *tep)
6303 {
6304 	tl_endpt_t *elp;
6305 
6306 	if (tep->te_flag & TL_ADDRHASHED) {
6307 		if (IS_SOCKET(tep)) {
6308 			(void) mod_hash_remove(tep->te_addrhash,
6309 			    (mod_hash_key_t)tep->te_vp,
6310 			    (mod_hash_val_t *)&elp);
6311 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6312 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6313 		} else {
6314 			(void) mod_hash_remove(tep->te_addrhash,
6315 			    (mod_hash_key_t)&tep->te_ap,
6316 			    (mod_hash_val_t *)&elp);
6317 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6318 			tep->te_alen = -1;
6319 			tep->te_abuf = NULL;
6320 		}
6321 		tep->te_flag &= ~TL_ADDRHASHED;
6322 	}
6323 }
6324