xref: /illumos-gate/usr/src/uts/common/io/tl.c (revision 1c8449e95a93a750df972545379490366b392934)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
27  * Copyright (c) 2012 by Delphix. All rights reserved.
28  * Copyright 2020 Joyent, Inc.
29  */
30 
31 /*
32  * Multithreaded STREAMS Local Transport Provider.
33  *
34  * OVERVIEW
35  * ========
36  *
37  * This driver provides TLI as well as socket semantics.  It provides
38  * connectionless, connection oriented, and connection oriented with orderly
39  * release transports for TLI and sockets. Each transport type has separate name
40  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
41  * this removes any name space conflicts when binding to socket style transport
42  * addresses.
43  *
44  * NOTE: There is one exception: Socket ticots and ticotsord transports share
45  * the same namespace. In fact, sockets always use ticotsord type transport.
46  *
47  * The driver mode is specified during open() by the minor number used for
48  * open.
49  *
50  *  The sockets in addition have the following semantic differences:
51  *  No support for passing up credentials (TL_SET[U]CRED).
52  *
53  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
54  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
55  *	T_OPTDATA_IND.
56  *
57  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
58  *	a T_CONN_RES is received from the acceptor. This means that a socket
59  *	connect will complete before the peer has called accept.
60  *
61  *
62  * MULTITHREADING
63  * ==============
64  *
65  * The driver does not use STREAMS protection mechanisms. Instead it uses a
66  * generic "serializer" abstraction. Most of the operations are executed behind
67  * the serializer and are, essentially single-threaded. All functions executed
68  * behind the same serializer are strictly serialized. So if one thread calls
69  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
70  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
71  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
72  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
73  * same time.
74  *
75  * Connectionless transport use a single serializer per transport type (one for
76  * TLI and one for sockets. Connection-oriented transports use finer-grained
77  * serializers.
78  *
79  * All COTS-type endpoints start their life with private serializers. During
80  * connection request processing the endpoint serializer is switched to the
81  * listener's serializer and the rest of T_CONN_REQ processing is done on the
82  * listener serializer. During T_CONN_RES processing the eager serializer is
83  * switched from listener to acceptor serializer and after that point all
84  * processing for eager and acceptor happens on this serializer. To avoid races
85  * with endpoint closes while its serializer may be changing closes are blocked
86  * while serializers are manipulated.
87  *
88  * References accounting
89  * ---------------------
90  *
91  * Endpoints are reference counted and freed when the last reference is
92  * dropped. Functions within the serializer may access an endpoint state even
93  * after an endpoint closed. The te_closing being set on the endpoint indicates
94  * that the endpoint entered its close routine.
95  *
96  * One reference is held for each opened endpoint instance. The reference
97  * counter is incremented when the endpoint is linked to another endpoint and
98  * decremented when the link disappears. It is also incremented when the
99  * endpoint is found by the hash table lookup. This increment is atomic with the
100  * lookup itself and happens while the hash table read lock is held.
101  *
102  * Close synchronization
103  * ---------------------
104  *
105  * During close the endpoint as marked as closing using te_closing flag. It is
106  * usually enough to check for te_closing flag since all other state changes
107  * happen after this flag is set and the close entered serializer. Immediately
108  * after setting te_closing flag tl_close() enters serializer and waits until
109  * the callback finishes. This allows all functions called within serializer to
110  * simply check te_closing without any locks.
111  *
112  * Serializer management.
113  * ---------------------
114  *
115  * For COTS transports serializers are created when the endpoint is constructed
116  * and destroyed when the endpoint is destructed. CLTS transports use global
117  * serializers - one for sockets and one for TLI.
118  *
119  * COTS serializers have separate reference counts to deal with several
120  * endpoints sharing the same serializer. There is a subtle problem related to
121  * the serializer destruction. The serializer should never be destroyed by any
122  * function executed inside serializer. This means that close has to wait till
123  * all serializer activity for this endpoint is finished before it can drop the
124  * last reference on the endpoint (which may as well free the serializer).  This
125  * is only relevant for COTS transports which manage serializers
126  * dynamically. For CLTS transports close may complete without waiting for all
127  * serializer activity to finish since serializer is only destroyed at driver
128  * detach time.
129  *
130  * COTS endpoints keep track of the number of outstanding requests on the
131  * serializer for the endpoint. The code handling accept() avoids changing
132  * client serializer if it has any pending messages on the serializer and
133  * instead moves acceptor to listener's serializer.
134  *
135  *
136  * Use of hash tables
137  * ------------------
138  *
139  * The driver uses modhash hash table implementation. Each transport uses two
140  * hash tables - one for finding endpoints by acceptor ID and another one for
141  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
142  * pair of hash tables since sockets only use TICOTSORD.
143  *
144  * All hash tables lookups increment a reference count for returned endpoints,
145  * so we may safely check the endpoint state even when the endpoint is removed
146  * from the hash by another thread immediately after it is found.
147  *
148  *
149  * CLOSE processing
150  * ================
151  *
152  * The driver enters serializer twice on close(). The close sequence is the
153  * following:
154  *
155  * 1) Wait until closing is safe (te_closewait becomes zero)
156  *	This step is needed to prevent close during serializer switches. In most
157  *	cases (close happening after connection establishment) te_closewait is
158  *	zero.
159  * 1) Set te_closing.
160  * 2) Call tl_close_ser() within serializer and wait for it to complete.
161  *
162  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
163  *	It also needs to clear write-side q_next pointers - this should be done
164  *	before qprocsoff().
165  *
166  *    This synchronous serializer entry during close is needed to ensure that
167  *    the queue is valid everywhere inside the serializer.
168  *
169  *    Note that in many cases close will execute tl_close_ser() synchronously,
170  *    so it will not wait at all.
171  *
172  * 3) Calls qprocsoff().
173  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
174  *	complete (for COTS transports). For CLTS transport there is no wait.
175  *
176  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
177  *	close if there is any.
178  *
179  *    Note that in most cases close will enter te_close_ser_finish()
180  *    synchronously and will not wait at all.
181  *
182  *
183  * Flow Control
184  * ============
185  *
186  * The driver implements both read and write side service routines. No one calls
187  * putq() on the read queue. The read side service routine tl_rsrv() is called
188  * when the read side stream is back-enabled. It enters serializer synchronously
189  * (waits till serializer processing is complete). Within serializer it
190  * back-enables all endpoints blocked by the queue for connection-less
191  * transports and enables write side service processing for the peer for
192  * connection-oriented transports.
193  *
194  * Read and write side service routines use special mblk_sized space in the
195  * endpoint structure to enter perimeter.
196  *
197  * Write-side flow control
198  * -----------------------
199  *
200  * Write side flow control is a bit tricky. The driver needs to deal with two
201  * message queues - the explicit STREAMS message queue maintained by
202  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
203  * queues should be synchronized to preserve message ordering and should
204  * maintain a single order determined by the order in which messages enter
205  * tl_wput(). In order to maintain the ordering between these two queues the
206  * STREAMS queue is only manipulated within the serializer, so the ordering is
207  * provided by the serializer.
208  *
209  * Functions called from the tl_wsrv() sometimes may call putbq(). To
210  * immediately stop any further processing of the STREAMS message queues the
211  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
212  * side service processing stops when the flag is set.
213  *
214  * The tl_wsrv() function enters serializer synchronously and waits for it to
215  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
216  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
217  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
218  * always bounded by the amount of messages on the STREAMS queue at the time
219  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
220  * queue from another serialized entry which can't happen in parallel. This
221  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
222  * of it draining forever while writer places new messages on the STREAMS
223  * queue).
224  *
225  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
226  *
227  *
228  * Unix Domain Sockets
229  * ===================
230  *
231  * The driver knows the structure of Unix Domain sockets addresses and treats
232  * them differently from generic TLI addresses. For sockets implicit binds are
233  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
234  * instead of using address length of zero. Explicit binds specify
235  * SOU_MAGIC_EXPLICIT as magic.
236  *
237  * For implicit binds we always use minor number as soua_vp part of the address
238  * and avoid any hash table lookups. This saves two hash tables lookups per
239  * anonymous bind.
240  *
241  * For explicit address we hash the vnode pointer instead of hashing the
242  * full-scale address+zone+length. Hashing by pointer is more efficient then
243  * hashing by the full address.
244  *
245  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
246  * tep structure, so it should be never freed.
247  *
248  * Also for sockets the driver always uses minor number as acceptor id.
249  *
250  * TPI VIOLATIONS
251  * --------------
252  *
253  * This driver violates TPI in several respects for Unix Domain Sockets:
254  *
255  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
256  *	is requested and the endpoint is already in use. There is no point in
257  *	generating an unused address since this address will be rejected by
258  *	sockfs anyway. For implicit binds it always generates a new address
259  *	(sets soua_vp to its minor number).
260  *
261  * 2) It always uses minor number as acceptor ID and never uses queue
262  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
263  *	message and they do not use the queue pointer.
264  *
265  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
266  *	followed by listen(). The listen() should be issued with non-zero
267  *	backlog, so sotpi_listen() issues unbind request followed by bind
268  *	request to the same address but with a non-zero qlen value. Both
269  *	tl_bind() and tl_unbind() require write lock on the hash table to
270  *	insert/remove the address. The driver does not remove the address from
271  *	the hash for endpoints that are bound to the explicit address and have
272  *	backlog of zero. During T_BIND_REQ processing if the address requested
273  *	is equal to the address the endpoint already has it updates the backlog
274  *	without reinserting the address in the hash table. This optimization
275  *	avoids two hash table updates for each listener created. It always
276  *	avoids the problem of a "stolen" address when another listener may use
277  *	the same address between the unbind and bind and suddenly listen() fails
278  *	because address is in use even though the bind() succeeded.
279  *
280  *
281  * CONNECTIONLESS TRANSPORTS
282  * =========================
283  *
284  * Connectionless transports all share the same serializer (one for TLI and one
285  * for Sockets). Functions executing behind serializer can check or modify state
286  * of any endpoint.
287  *
288  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
289  * te_lastep field. The next time X talks to some address A it checks whether A
290  * is the same as Y's address and if it is there is no need to lookup Y. If the
291  * address is different or the state of Y is not appropriate (e.g. closed or not
292  * idle) X does a lookup using tl_find_peer() and caches the new address.
293  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
294  * on the endpoint found.
295  *
296  * During close of endpoint Y it doesn't try to remove itself from other
297  * endpoints caches. They will detect that Y is gone and will search the peer
298  * endpoint again.
299  *
300  * Flow Control Handling.
301  * ----------------------
302  *
303  * Each connectionless endpoint keeps a list of endpoints which are
304  * flow-controlled by its queue. It also keeps a pointer to the queue which
305  * flow-controls itself.  Whenever flow control releases for endpoint X it
306  * enables all queues from the list. During close it also back-enables everyone
307  * in the list. If X is flow-controlled when it is closing it removes it from
308  * the peers list.
309  *
310  * DATA STRUCTURES
311  * ===============
312  *
313  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
314  * endpoint state. For connection-oriented transports it has a keeps a list
315  * of pending connections (tl_icon_t). For connectionless transports it keeps a
316  * list of endpoints flow controlled by this one.
317  *
318  * Each transport type is represented by a per-transport data structure
319  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
320  * endpoint address hash tables for each transport. It also contains pointer to
321  * transport serializer for connectionless transports.
322  *
323  * Each endpoint keeps a link to its transport structure, so the code can find
324  * all per-transport information quickly.
325  */
326 
327 #include	<sys/types.h>
328 #include	<sys/inttypes.h>
329 #include	<sys/stream.h>
330 #include	<sys/stropts.h>
331 #define	_SUN_TPI_VERSION 2
332 #include	<sys/tihdr.h>
333 #include	<sys/strlog.h>
334 #include	<sys/debug.h>
335 #include	<sys/cred.h>
336 #include	<sys/errno.h>
337 #include	<sys/kmem.h>
338 #include	<sys/id_space.h>
339 #include	<sys/modhash.h>
340 #include	<sys/mkdev.h>
341 #include	<sys/tl.h>
342 #include	<sys/stat.h>
343 #include	<sys/conf.h>
344 #include	<sys/modctl.h>
345 #include	<sys/strsun.h>
346 #include	<sys/socket.h>
347 #include	<sys/socketvar.h>
348 #include	<sys/sysmacros.h>
349 #include	<sys/xti_xtiopt.h>
350 #include	<sys/ddi.h>
351 #include	<sys/sunddi.h>
352 #include	<sys/zone.h>
353 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
354 #include	<inet/optcom.h>
355 #include	<sys/strsubr.h>
356 #include	<sys/ucred.h>
357 #include	<sys/suntpi.h>
358 #include	<sys/list.h>
359 #include	<sys/serializer.h>
360 
361 /*
362  * TBD List
363  * 14 Eliminate state changes through table
364  * 16. AF_UNIX socket options
365  * 17. connect() for ticlts
366  * 18. support for "netstat" to show AF_UNIX plus TLI local
367  *	transport connections
368  * 21. sanity check to flushing on sending M_ERROR
369  */
370 
371 /*
372  * CONSTANT DECLARATIONS
373  * --------------------
374  */
375 
376 /*
377  * Local declarations
378  */
379 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
380 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
381 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
382 /*
383  * Hash tables size.
384  */
385 #define	TL_HASH_SIZE 311
386 
387 /*
388  * Definitions for module_info
389  */
390 #define		TL_ID		(104)		/* module ID number */
391 #define		TL_NAME		"tl"		/* module name */
392 #define		TL_MINPSZ	(0)		/* min packet size */
393 #define		TL_MAXPSZ	INFPSZ		/* max packet size ZZZ */
394 #define		TL_HIWAT	(16*1024)	/* hi water mark */
395 #define		TL_LOWAT	(256)		/* lo water mark */
396 /*
397  * Definition of minor numbers/modes for new transport provider modes.
398  * We view the socket use as a separate mode to get a separate name space.
399  */
400 #define		TL_TICOTS	0	/* connection oriented transport */
401 #define		TL_TICOTSORD	1	/* COTS w/ orderly release */
402 #define		TL_TICLTS	2	/* connectionless transport */
403 #define		TL_UNUSED	3
404 #define		TL_SOCKET	4	/* Socket */
405 #define		TL_SOCK_COTS	(TL_SOCKET | TL_TICOTS)
406 #define		TL_SOCK_COTSORD	(TL_SOCKET | TL_TICOTSORD)
407 #define		TL_SOCK_CLTS	(TL_SOCKET | TL_TICLTS)
408 
409 #define		TL_MINOR_MASK	0x7
410 #define		TL_MINOR_START	(TL_TICLTS + 1)
411 
412 /*
413  * LOCAL MACROS
414  */
415 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
416 
417 /*
418  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
419  */
420 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
421 static int tl_close(queue_t *, int, cred_t *);
422 static int tl_wput(queue_t *, mblk_t *);
423 static int tl_wsrv(queue_t *);
424 static int tl_rsrv(queue_t *);
425 
426 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
427 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
428 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
429 
430 
431 /*
432  * GLOBAL DATA STRUCTURES AND VARIABLES
433  * -----------------------------------
434  */
435 
436 /*
437  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
438  * For now, we only manage the SO_RECVUCRED option but we also have
439  * harmless dummy options to make things work with some common code we access.
440  */
441 opdes_t	tl_opt_arr[] = {
442 	/* The SO_TYPE is needed for the hack below */
443 	{
444 		SO_TYPE,
445 		SOL_SOCKET,
446 		OA_R,
447 		OA_R,
448 		OP_NP,
449 		0,
450 		sizeof (t_scalar_t),
451 		0
452 	},
453 	{
454 		SO_RECVUCRED,
455 		SOL_SOCKET,
456 		OA_RW,
457 		OA_RW,
458 		OP_NP,
459 		0,
460 		sizeof (int),
461 		0
462 	}
463 };
464 
465 /*
466  * Table of all supported levels
467  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
468  * any supported options so we need this info separately.
469  *
470  * This is needed only for topmost tpi providers.
471  */
472 optlevel_t	tl_valid_levels_arr[] = {
473 	XTI_GENERIC,
474 	SOL_SOCKET,
475 	TL_PROT_LEVEL
476 };
477 
478 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
479 /*
480  * Current upper bound on the amount of space needed to return all options.
481  * Additional options with data size of sizeof(long) are handled automatically.
482  * Others need hand job.
483  */
484 #define	TL_MAX_OPT_BUF_LEN						\
485 		((A_CNT(tl_opt_arr) << 2) +				\
486 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
487 		+ 64 + sizeof (struct T_optmgmt_ack))
488 
489 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
490 
491 /*
492  *	transport addr structure
493  */
494 typedef struct tl_addr {
495 	zoneid_t	ta_zoneid;		/* Zone scope of address */
496 	t_scalar_t	ta_alen;		/* length of abuf */
497 	void		*ta_abuf;		/* the addr itself */
498 } tl_addr_t;
499 
500 /*
501  * Refcounted version of serializer.
502  */
503 typedef struct tl_serializer {
504 	uint_t		ts_refcnt;
505 	serializer_t	*ts_serializer;
506 } tl_serializer_t;
507 
508 /*
509  * Each transport type has a separate state.
510  * Per-transport state.
511  */
512 typedef struct tl_transport_state {
513 	char		*tr_name;
514 	minor_t		tr_minor;
515 	uint32_t	tr_defaddr;
516 	mod_hash_t	*tr_ai_hash;
517 	mod_hash_t	*tr_addr_hash;
518 	tl_serializer_t	*tr_serializer;
519 } tl_transport_state_t;
520 
521 #define	TL_DFADDR 0x1000
522 
523 static tl_transport_state_t tl_transports[] = {
524 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
525 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
526 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
527 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
528 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
529 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
530 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
531 };
532 
533 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
534 
535 struct tl_endpt;
536 typedef struct tl_endpt tl_endpt_t;
537 
538 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
539 
540 /*
541  * Data structure used to represent pending connects.
542  * Records enough information so that the connecting peer can close
543  * before the connection gets accepted.
544  */
545 typedef struct tl_icon {
546 	list_node_t	ti_node;
547 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
548 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
549 	t_scalar_t	ti_seqno;	/* Sequence number */
550 } tl_icon_t;
551 
552 typedef struct so_ux_addr soux_addr_t;
553 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
554 
555 /*
556  * Maximum number of unaccepted connection indications allowed per listener.
557  */
558 #define	TL_MAXQLEN	4096
559 int tl_maxqlen = TL_MAXQLEN;
560 
561 /*
562  *	transport endpoint structure
563  */
564 struct tl_endpt {
565 	queue_t		*te_rq;		/* stream read queue */
566 	queue_t		*te_wq;		/* stream write queue */
567 	uint32_t	te_refcnt;
568 	int32_t		te_state;	/* TPI state of endpoint */
569 	minor_t		te_minor;	/* minor number */
570 #define	te_seqno	te_minor
571 	uint_t		te_flag;	/* flag field */
572 	boolean_t	te_nowsrv;
573 	tl_serializer_t	*te_ser;	/* Serializer to use */
574 #define	te_serializer	te_ser->ts_serializer
575 
576 	soux_addr_t	te_uxaddr;	/* Socket address */
577 #define	te_magic	te_uxaddr.soua_magic
578 #define	te_vp		te_uxaddr.soua_vp
579 	tl_addr_t	te_ap;		/* addr bound to this endpt */
580 #define	te_zoneid te_ap.ta_zoneid
581 #define	te_alen	te_ap.ta_alen
582 #define	te_abuf	te_ap.ta_abuf
583 
584 	tl_transport_state_t *te_transport;
585 #define	te_addrhash	te_transport->tr_addr_hash
586 #define	te_aihash	te_transport->tr_ai_hash
587 #define	te_defaddr	te_transport->tr_defaddr
588 	cred_t		*te_credp;	/* endpoint user credentials */
589 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
590 
591 	/*
592 	 * State specific for connection-oriented and connectionless transports.
593 	 */
594 	union {
595 		/* Connection-oriented state. */
596 		struct {
597 			t_uscalar_t _te_nicon;	/* count of conn requests */
598 			t_uscalar_t _te_qlen;	/* max conn requests */
599 			tl_endpt_t  *_te_oconp;	/* conn request pending */
600 			tl_endpt_t  *_te_conp;	/* connected endpt */
601 #ifndef _ILP32
602 			void	    *_te_pad;
603 #endif
604 			list_t	_te_iconp;	/* list of conn ind. pending */
605 		} _te_cots_state;
606 		/* Connection-less state. */
607 		struct {
608 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
609 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
610 			list_node_t _te_flows;	/* lists of connections */
611 			list_t  _te_flowlist;	/* Who flowcontrols on me */
612 		} _te_clts_state;
613 	} _te_transport_state;
614 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
615 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
616 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
617 #define	te_conp		_te_transport_state._te_cots_state._te_conp
618 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
619 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
620 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
621 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
622 #define	te_flows	_te_transport_state._te_clts_state._te_flows
623 
624 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
625 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
626 	pid_t		te_cpid;	/* cached pid of endpoint */
627 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
628 	/*
629 	 * Pieces of the endpoint state needed for closing.
630 	 */
631 	kmutex_t	te_closelock;
632 	kcondvar_t	te_closecv;
633 	uint8_t		te_closing;	/* The endpoint started closing */
634 	uint8_t		te_closewait;	/* Wait in close until zero */
635 	mblk_t		te_closemp;	/* for entering serializer on close */
636 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
637 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
638 	kmutex_t	te_srv_lock;
639 	kcondvar_t	te_srv_cv;
640 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
641 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
642 	/*
643 	 * Pieces of the endpoint state needed for serializer transitions.
644 	 */
645 	kmutex_t	te_ser_lock;	/* Protects the count below */
646 	uint_t		te_ser_count;	/* Number of messages on serializer */
647 };
648 
649 /*
650  * Flag values. Lower 4 bits specify that transport used.
651  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
652  * they allow to identify the endpoint more easily.
653  */
654 #define	TL_LISTENER	0x00010	/* the listener endpoint */
655 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
656 #define	TL_EAGER	0x00040	/* connecting endpoint */
657 #define	TL_ACCEPTED	0x00080	/* accepted connection */
658 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
659 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
660 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
661 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
662 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
663 /*
664  * Boolean checks for the endpoint type.
665  */
666 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
667 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
668 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
669 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
670 
671 /*
672  * Certain operations are always used together. These macros reduce the chance
673  * of missing a part of a combination.
674  */
675 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
676 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
677 
678 #define	TL_PUTBQ(x, mp) {		\
679 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
680 	(x)->te_nowsrv = B_TRUE;	\
681 	(void) putbq((x)->te_wq, mp);	\
682 }
683 
684 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
685 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
686 
687 /*
688  * STREAMS driver glue data structures.
689  */
690 static	struct	module_info	tl_minfo = {
691 	TL_ID,			/* mi_idnum */
692 	TL_NAME,		/* mi_idname */
693 	TL_MINPSZ,		/* mi_minpsz */
694 	TL_MAXPSZ,		/* mi_maxpsz */
695 	TL_HIWAT,		/* mi_hiwat */
696 	TL_LOWAT		/* mi_lowat */
697 };
698 
699 static	struct	qinit	tl_rinit = {
700 	NULL,			/* qi_putp */
701 	tl_rsrv,		/* qi_srvp */
702 	tl_open,		/* qi_qopen */
703 	tl_close,		/* qi_qclose */
704 	NULL,			/* qi_qadmin */
705 	&tl_minfo,		/* qi_minfo */
706 	NULL			/* qi_mstat */
707 };
708 
709 static	struct	qinit	tl_winit = {
710 	tl_wput,		/* qi_putp */
711 	tl_wsrv,		/* qi_srvp */
712 	NULL,			/* qi_qopen */
713 	NULL,			/* qi_qclose */
714 	NULL,			/* qi_qadmin */
715 	&tl_minfo,		/* qi_minfo */
716 	NULL			/* qi_mstat */
717 };
718 
719 static	struct streamtab	tlinfo = {
720 	&tl_rinit,		/* st_rdinit */
721 	&tl_winit,		/* st_wrinit */
722 	NULL,			/* st_muxrinit */
723 	NULL			/* st_muxwrinit */
724 };
725 
726 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
727     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
728 
729 static struct modldrv modldrv = {
730 	&mod_driverops,		/* Type of module -- pseudo driver here */
731 	"TPI Local Transport (tl)",
732 	&tl_devops,		/* driver ops */
733 };
734 
735 /*
736  * Module linkage information for the kernel.
737  */
738 static struct modlinkage modlinkage = {
739 	MODREV_1,
740 	&modldrv,
741 	NULL
742 };
743 
744 /*
745  * Templates for response to info request
746  * Check sanity of unlimited connect data etc.
747  */
748 
749 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1 | SENDZERO)
750 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1 | SENDZERO)
751 
752 static struct T_info_ack tl_cots_info_ack =
753 	{
754 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
755 		T_INFINITE,	/* TSDU size */
756 		T_INFINITE,	/* ETSDU size */
757 		T_INFINITE,	/* CDATA_size */
758 		T_INFINITE,	/* DDATA_size */
759 		T_INFINITE,	/* ADDR_size  */
760 		T_INFINITE,	/* OPT_size */
761 		0,		/* TIDU_size - fill at run time */
762 		T_COTS,		/* SERV_type */
763 		-1,		/* CURRENT_state */
764 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
765 	};
766 
767 static struct T_info_ack tl_clts_info_ack =
768 	{
769 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
770 		0,		/* TSDU_size - fill at run time */
771 		-2,		/* ETSDU_size -2 => not supported */
772 		-2,		/* CDATA_size -2 => not supported */
773 		-2,		/* DDATA_size  -2 => not supported */
774 		-1,		/* ADDR_size -1 => infinite */
775 		-1,		/* OPT_size */
776 		0,		/* TIDU_size - fill at run time */
777 		T_CLTS,		/* SERV_type */
778 		-1,		/* CURRENT_state */
779 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
780 	};
781 
782 /*
783  * private copy of devinfo pointer used in tl_info
784  */
785 static dev_info_t *tl_dip;
786 
787 /*
788  * Endpoints cache.
789  */
790 static kmem_cache_t *tl_cache;
791 /*
792  * Minor number space.
793  */
794 static id_space_t *tl_minors;
795 
796 /*
797  * Default Data Unit size.
798  */
799 static t_scalar_t tl_tidusz;
800 
801 /*
802  * Size of hash tables.
803  */
804 static size_t tl_hash_size = TL_HASH_SIZE;
805 
806 /*
807  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
808  * for sockets.
809  */
810 static int tl_disable_early_connect = 0;
811 static int tl_client_closing_when_accepting;
812 
813 static int tl_serializer_noswitch;
814 
815 #define	nr	127		/* not reachable */
816 
817 #define	TE_NOEVENTS	28
818 
819 static char nextstate[TE_NOEVENTS][TS_NOSTATES] = {
820 				/* STATES */
821 	/* 0  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16 */
822 
823 /* Initialization events */
824 
825 #define	TE_BIND_REQ	0	/* bind request				*/
826 	{ 1, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
827 #define	TE_UNBIND_REQ	1	/* unbind request			*/
828 	{nr, nr, nr,  2, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
829 #define	TE_OPTMGMT_REQ	2	/* manage options req			*/
830 	{nr, nr, nr,  4, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
831 #define	TE_BIND_ACK	3	/* bind acknowledment			*/
832 	{nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
833 #define	TE_OPTMGMT_ACK	4	/* manage options ack			*/
834 	{nr, nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
835 #define	TE_ERROR_ACK	5	/* error acknowledgment			*/
836 	{nr,  0,  3, nr,  3,  3, nr, nr,  7, nr, nr, nr,  6,  7,  9, 10, 11},
837 #define	TE_OK_ACK1	6	/* ok ack  seqcnt == 0			*/
838 	{nr, nr,  0, nr, nr,  6, nr, nr, nr, nr, nr, nr,  3, nr,  3,  3,  3},
839 #define	TE_OK_ACK2	7	/* ok ack  seqcnt == 1, q == resq	*/
840 	{nr, nr, nr, nr, nr, nr, nr, nr,  9, nr, nr, nr, nr,  3, nr, nr, nr},
841 #define	TE_OK_ACK3	8	/* ok ack  seqcnt == 1, q != resq	*/
842 	{nr, nr, nr, nr, nr, nr, nr, nr,  3, nr, nr, nr, nr,  3, nr, nr, nr},
843 #define	TE_OK_ACK4	9	/* ok ack  seqcnt > 1			*/
844 	{nr, nr, nr, nr, nr, nr, nr, nr,  7, nr, nr, nr, nr,  7, nr, nr, nr},
845 
846 /* Connection oriented events */
847 #define	TE_CONN_REQ	10	/* connection request			*/
848 	{nr, nr, nr,  5, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
849 #define	TE_CONN_RES	11	/* connection response			*/
850 	{nr, nr, nr, nr, nr, nr, nr,  8, nr, nr, nr, nr, nr, nr, nr, nr, nr},
851 #define	TE_DISCON_REQ	12	/* disconnect request			*/
852 	{nr, nr, nr, nr, nr, nr, 12, 13, nr, 14, 15, 16, nr, nr, nr, nr, nr},
853 #define	TE_DATA_REQ	13	/* data request				*/
854 	{nr, nr, nr, nr, nr, nr, nr, nr, nr,  9, nr, 11, nr, nr, nr, nr, nr},
855 #define	TE_EXDATA_REQ	14	/* expedited data request		*/
856 	{nr, nr, nr, nr, nr, nr, nr, nr, nr,  9, nr, 11, nr, nr, nr, nr, nr},
857 #define	TE_ORDREL_REQ	15	/* orderly release req			*/
858 	{nr, nr, nr, nr, nr, nr, nr, nr, nr, 10, nr,  3, nr, nr, nr, nr, nr},
859 #define	TE_CONN_IND	16	/* connection indication		*/
860 	{nr, nr, nr,  7, nr, nr, nr,  7, nr, nr, nr, nr, nr, nr, nr, nr, nr},
861 #define	TE_CONN_CON	17	/* connection confirmation		*/
862 	{nr, nr, nr, nr, nr, nr,  9, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
863 #define	TE_DATA_IND	18	/* data indication			*/
864 	{nr, nr, nr, nr, nr, nr, nr, nr, nr,  9, 10, nr, nr, nr, nr, nr, nr},
865 #define	TE_EXDATA_IND	19	/* expedited data indication		*/
866 	{nr, nr, nr, nr, nr, nr, nr, nr, nr,  9, 10, nr, nr, nr, nr, nr, nr},
867 #define	TE_ORDREL_IND	20	/* orderly release ind			*/
868 	{nr, nr, nr, nr, nr, nr, nr, nr, nr, 11,  3, nr, nr, nr, nr, nr, nr},
869 #define	TE_DISCON_IND1	21	/* disconnect indication seq == 0	*/
870 	{nr, nr, nr, nr, nr, nr,  3, nr, nr,  3,  3,  3, nr, nr, nr, nr, nr},
871 #define	TE_DISCON_IND2	22	/* disconnect indication seq == 1	*/
872 	{nr, nr, nr, nr, nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr},
873 #define	TE_DISCON_IND3	23	/* disconnect indication seq > 1	*/
874 	{nr, nr, nr, nr, nr, nr, nr,  7, nr, nr, nr, nr, nr, nr, nr, nr, nr},
875 #define	TE_PASS_CONN	24	/* pass connection			*/
876 	{nr, nr, nr,  9, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
877 
878 
879 /* Unit data events */
880 
881 #define	TE_UNITDATA_REQ	25	/* unitdata request			*/
882 	{nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
883 #define	TE_UNITDATA_IND	26	/* unitdata indication			*/
884 	{nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
885 #define	TE_UDERROR_IND	27	/* unitdata error indication		*/
886 	{nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
887 };
888 
889 
890 
891 /*
892  * LOCAL FUNCTION PROTOTYPES
893  * -------------------------
894  */
895 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
896 static void tl_do_proto(mblk_t *, tl_endpt_t *);
897 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
898 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
899 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
900     t_scalar_t);
901 static void tl_bind(mblk_t *, tl_endpt_t *);
902 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
903 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
904 static void tl_unbind(mblk_t *, tl_endpt_t *);
905 static void tl_optmgmt(queue_t *, mblk_t *);
906 static void tl_conn_req(queue_t *, mblk_t *);
907 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
908 static void tl_conn_res(mblk_t *, tl_endpt_t *);
909 static void tl_discon_req(mblk_t *, tl_endpt_t *);
910 static void tl_capability_req(mblk_t *, tl_endpt_t *);
911 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
912 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
913 static void tl_info_req(mblk_t *, tl_endpt_t *);
914 static void tl_addr_req(mblk_t *, tl_endpt_t *);
915 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
916 static void tl_data(mblk_t  *, tl_endpt_t *);
917 static void tl_exdata(mblk_t *, tl_endpt_t *);
918 static void tl_ordrel(mblk_t *, tl_endpt_t *);
919 static void tl_unitdata(mblk_t *, tl_endpt_t *);
920 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
921 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
922 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
923 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
924 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
925 static void tl_cl_backenable(tl_endpt_t *);
926 static void tl_co_unconnect(tl_endpt_t *);
927 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
928 static void tl_discon_ind(tl_endpt_t *, uint32_t);
929 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
930 static mblk_t *tl_ordrel_ind_alloc(void);
931 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
932 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
933 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
934 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
935 static void tl_icon_freemsgs(mblk_t **);
936 static void tl_merror(queue_t *, mblk_t *, int);
937 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
938 static int tl_default_opt(queue_t *, int, int, uchar_t *);
939 static int tl_get_opt(queue_t *, int, int, uchar_t *);
940 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
941     uchar_t *, void *, cred_t *);
942 static void tl_memrecover(queue_t *, mblk_t *, size_t);
943 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
944 static void tl_free(tl_endpt_t *);
945 static int  tl_constructor(void *, void *, int);
946 static void tl_destructor(void *, void *);
947 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
948 static tl_serializer_t *tl_serializer_alloc(int);
949 static void tl_serializer_refhold(tl_serializer_t *);
950 static void tl_serializer_refrele(tl_serializer_t *);
951 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
952 static void tl_serializer_exit(tl_endpt_t *);
953 static boolean_t tl_noclose(tl_endpt_t *);
954 static void tl_closeok(tl_endpt_t *);
955 static void tl_refhold(tl_endpt_t *);
956 static void tl_refrele(tl_endpt_t *);
957 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
958 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
959 static void tl_close_ser(mblk_t *, tl_endpt_t *);
960 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
961 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
962 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
963 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
964 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
965 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
966 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
967 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
968 static void tl_addr_unbind(tl_endpt_t *);
969 
970 /*
971  * Intialize option database object for TL
972  */
973 
974 optdb_obj_t tl_opt_obj = {
975 	tl_default_opt,		/* TL default value function pointer */
976 	tl_get_opt,		/* TL get function pointer */
977 	tl_set_opt,		/* TL set function pointer */
978 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
979 	tl_opt_arr,		/* TL option database */
980 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
981 	tl_valid_levels_arr	/* TL valid level array */
982 };
983 
984 /*
985  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
986  * ---------------------------------------
987  */
988 
989 /*
990  * Loadable module routines
991  */
992 int
_init(void)993 _init(void)
994 {
995 	return (mod_install(&modlinkage));
996 }
997 
998 int
_fini(void)999 _fini(void)
1000 {
1001 	return (mod_remove(&modlinkage));
1002 }
1003 
1004 int
_info(struct modinfo * modinfop)1005 _info(struct modinfo *modinfop)
1006 {
1007 	return (mod_info(&modlinkage, modinfop));
1008 }
1009 
1010 /*
1011  * Driver Entry Points and Other routines
1012  */
1013 static int
tl_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)1014 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1015 {
1016 	int i;
1017 	char name[32];
1018 
1019 	/*
1020 	 * Resume from a checkpoint state.
1021 	 */
1022 	if (cmd == DDI_RESUME)
1023 		return (DDI_SUCCESS);
1024 
1025 	if (cmd != DDI_ATTACH)
1026 		return (DDI_FAILURE);
1027 
1028 	/*
1029 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
1030 	 * streams message sizes can be unlimited. We use a defined constant
1031 	 * instead.
1032 	 */
1033 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
1034 
1035 	/*
1036 	 * Create subdevices for each transport.
1037 	 */
1038 	for (i = 0; i < TL_UNUSED; i++) {
1039 		if (ddi_create_minor_node(devi,
1040 		    tl_transports[i].tr_name,
1041 		    S_IFCHR, tl_transports[i].tr_minor,
1042 		    DDI_PSEUDO, 0) == DDI_FAILURE) {
1043 			ddi_remove_minor_node(devi, NULL);
1044 			return (DDI_FAILURE);
1045 		}
1046 	}
1047 
1048 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
1049 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
1050 
1051 	if (tl_cache == NULL) {
1052 		ddi_remove_minor_node(devi, NULL);
1053 		return (DDI_FAILURE);
1054 	}
1055 
1056 	tl_minors = id_space_create("tl_minor_space",
1057 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
1058 
1059 	/*
1060 	 * Create ID space for minor numbers
1061 	 */
1062 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1063 		tl_transport_state_t *t = &tl_transports[i];
1064 
1065 		if (i == TL_UNUSED)
1066 			continue;
1067 
1068 		/* Socket COTSORD shares namespace with COTS */
1069 		if (i == TL_SOCK_COTSORD) {
1070 			t->tr_ai_hash =
1071 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1072 			ASSERT(t->tr_ai_hash != NULL);
1073 			t->tr_addr_hash =
1074 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1075 			ASSERT(t->tr_addr_hash != NULL);
1076 			continue;
1077 		}
1078 
1079 		/*
1080 		 * Create hash tables.
1081 		 */
1082 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1083 		    t->tr_name);
1084 #ifdef _ILP32
1085 		if (i & TL_SOCKET)
1086 			t->tr_ai_hash =
1087 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1088 			    mod_hash_null_valdtor);
1089 		else
1090 			t->tr_ai_hash =
1091 			    mod_hash_create_ptrhash(name, tl_hash_size,
1092 			    mod_hash_null_valdtor, sizeof (queue_t));
1093 #else
1094 		t->tr_ai_hash =
1095 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1096 		    mod_hash_null_valdtor);
1097 #endif /* _ILP32 */
1098 
1099 		if (i & TL_SOCKET) {
1100 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1101 			    t->tr_name);
1102 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1103 			    tl_hash_size, mod_hash_null_valdtor,
1104 			    sizeof (uintptr_t));
1105 		} else {
1106 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1107 			    t->tr_name);
1108 			t->tr_addr_hash = mod_hash_create_extended(name,
1109 			    tl_hash_size, mod_hash_null_keydtor,
1110 			    mod_hash_null_valdtor,
1111 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1112 		}
1113 
1114 		/* Create serializer for connectionless transports. */
1115 		if (i & TL_TICLTS)
1116 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1117 	}
1118 
1119 	tl_dip = devi;
1120 
1121 	return (DDI_SUCCESS);
1122 }
1123 
1124 static int
tl_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)1125 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1126 {
1127 	int i;
1128 
1129 	if (cmd == DDI_SUSPEND)
1130 		return (DDI_SUCCESS);
1131 
1132 	if (cmd != DDI_DETACH)
1133 		return (DDI_FAILURE);
1134 
1135 	/*
1136 	 * Destroy arenas and hash tables.
1137 	 */
1138 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1139 		tl_transport_state_t *t = &tl_transports[i];
1140 
1141 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1142 			continue;
1143 
1144 		EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1145 		if (t->tr_serializer != NULL) {
1146 			tl_serializer_refrele(t->tr_serializer);
1147 			t->tr_serializer = NULL;
1148 		}
1149 
1150 #ifdef _ILP32
1151 		if (i & TL_SOCKET)
1152 			mod_hash_destroy_idhash(t->tr_ai_hash);
1153 		else
1154 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1155 #else
1156 		mod_hash_destroy_idhash(t->tr_ai_hash);
1157 #endif /* _ILP32 */
1158 		t->tr_ai_hash = NULL;
1159 		if (i & TL_SOCKET)
1160 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1161 		else
1162 			mod_hash_destroy_hash(t->tr_addr_hash);
1163 		t->tr_addr_hash = NULL;
1164 	}
1165 
1166 	kmem_cache_destroy(tl_cache);
1167 	tl_cache = NULL;
1168 	id_space_destroy(tl_minors);
1169 	tl_minors = NULL;
1170 	ddi_remove_minor_node(devi, NULL);
1171 	return (DDI_SUCCESS);
1172 }
1173 
1174 /* ARGSUSED */
1175 static int
tl_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)1176 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1177 {
1178 
1179 	int retcode = DDI_FAILURE;
1180 
1181 	switch (infocmd) {
1182 
1183 	case DDI_INFO_DEVT2DEVINFO:
1184 		if (tl_dip != NULL) {
1185 			*result = (void *)tl_dip;
1186 			retcode = DDI_SUCCESS;
1187 		}
1188 		break;
1189 
1190 	case DDI_INFO_DEVT2INSTANCE:
1191 		*result = NULL;
1192 		retcode = DDI_SUCCESS;
1193 		break;
1194 
1195 	default:
1196 		break;
1197 	}
1198 	return (retcode);
1199 }
1200 
1201 /*
1202  * Endpoint reference management.
1203  */
1204 static void
tl_refhold(tl_endpt_t * tep)1205 tl_refhold(tl_endpt_t *tep)
1206 {
1207 	atomic_inc_32(&tep->te_refcnt);
1208 }
1209 
1210 static void
tl_refrele(tl_endpt_t * tep)1211 tl_refrele(tl_endpt_t *tep)
1212 {
1213 	ASSERT(tep->te_refcnt != 0);
1214 
1215 	if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1216 		tl_free(tep);
1217 }
1218 
1219 /*ARGSUSED*/
1220 static int
tl_constructor(void * buf,void * cdrarg,int kmflags)1221 tl_constructor(void *buf, void *cdrarg, int kmflags)
1222 {
1223 	tl_endpt_t *tep = buf;
1224 
1225 	bzero(tep, sizeof (tl_endpt_t));
1226 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1227 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1228 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1229 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1230 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1231 
1232 	return (0);
1233 }
1234 
1235 /*ARGSUSED*/
1236 static void
tl_destructor(void * buf,void * cdrarg)1237 tl_destructor(void *buf, void *cdrarg)
1238 {
1239 	tl_endpt_t *tep = buf;
1240 
1241 	mutex_destroy(&tep->te_closelock);
1242 	cv_destroy(&tep->te_closecv);
1243 	mutex_destroy(&tep->te_srv_lock);
1244 	cv_destroy(&tep->te_srv_cv);
1245 	mutex_destroy(&tep->te_ser_lock);
1246 }
1247 
1248 static void
tl_free(tl_endpt_t * tep)1249 tl_free(tl_endpt_t *tep)
1250 {
1251 	ASSERT(tep->te_refcnt == 0);
1252 	ASSERT(tep->te_transport != NULL);
1253 	ASSERT(tep->te_rq == NULL);
1254 	ASSERT(tep->te_wq == NULL);
1255 	ASSERT(tep->te_ser != NULL);
1256 	ASSERT(tep->te_ser_count == 0);
1257 	ASSERT(!(tep->te_flag & TL_ADDRHASHED));
1258 
1259 	if (IS_SOCKET(tep)) {
1260 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1261 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1262 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1263 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1264 	} else if (tep->te_abuf != NULL) {
1265 		kmem_free(tep->te_abuf, tep->te_alen);
1266 		tep->te_alen = -1; /* uninitialized */
1267 		tep->te_abuf = NULL;
1268 	} else {
1269 		ASSERT(tep->te_alen == -1);
1270 	}
1271 
1272 	id_free(tl_minors, tep->te_minor);
1273 	ASSERT(tep->te_credp == NULL);
1274 
1275 	if (tep->te_hash_hndl != NULL)
1276 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1277 
1278 	if (IS_COTS(tep)) {
1279 		TL_REMOVE_PEER(tep->te_conp);
1280 		TL_REMOVE_PEER(tep->te_oconp);
1281 		tl_serializer_refrele(tep->te_ser);
1282 		tep->te_ser = NULL;
1283 		ASSERT(tep->te_nicon == 0);
1284 		ASSERT(list_head(&tep->te_iconp) == NULL);
1285 	} else {
1286 		ASSERT(tep->te_lastep == NULL);
1287 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1288 		ASSERT(tep->te_flowq == NULL);
1289 	}
1290 
1291 	ASSERT(tep->te_bufcid == 0);
1292 	ASSERT(tep->te_timoutid == 0);
1293 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1294 	tep->te_acceptor_id = 0;
1295 
1296 	ASSERT(tep->te_closewait == 0);
1297 	ASSERT(!tep->te_rsrv_active);
1298 	ASSERT(!tep->te_wsrv_active);
1299 	tep->te_closing = 0;
1300 	tep->te_nowsrv = B_FALSE;
1301 	tep->te_flag = 0;
1302 
1303 	kmem_cache_free(tl_cache, tep);
1304 }
1305 
1306 /*
1307  * Allocate/free reference-counted wrappers for serializers.
1308  */
1309 static tl_serializer_t *
tl_serializer_alloc(int flags)1310 tl_serializer_alloc(int flags)
1311 {
1312 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1313 	serializer_t *ser;
1314 
1315 	if (s == NULL)
1316 		return (NULL);
1317 
1318 	ser = serializer_create(flags);
1319 
1320 	if (ser == NULL) {
1321 		kmem_free(s, sizeof (tl_serializer_t));
1322 		return (NULL);
1323 	}
1324 
1325 	s->ts_refcnt = 1;
1326 	s->ts_serializer = ser;
1327 	return (s);
1328 }
1329 
1330 static void
tl_serializer_refhold(tl_serializer_t * s)1331 tl_serializer_refhold(tl_serializer_t *s)
1332 {
1333 	atomic_inc_32(&s->ts_refcnt);
1334 }
1335 
1336 static void
tl_serializer_refrele(tl_serializer_t * s)1337 tl_serializer_refrele(tl_serializer_t *s)
1338 {
1339 	if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1340 		serializer_destroy(s->ts_serializer);
1341 		kmem_free(s, sizeof (tl_serializer_t));
1342 	}
1343 }
1344 
1345 /*
1346  * Post a request on the endpoint serializer. For COTS transports keep track of
1347  * the number of pending requests.
1348  */
1349 static void
tl_serializer_enter(tl_endpt_t * tep,tlproc_t tlproc,mblk_t * mp)1350 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1351 {
1352 	if (IS_COTS(tep)) {
1353 		mutex_enter(&tep->te_ser_lock);
1354 		tep->te_ser_count++;
1355 		mutex_exit(&tep->te_ser_lock);
1356 	}
1357 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1358 }
1359 
1360 /*
1361  * Complete processing the request on the serializer. Decrement the counter for
1362  * pending requests for COTS transports.
1363  */
1364 static void
tl_serializer_exit(tl_endpt_t * tep)1365 tl_serializer_exit(tl_endpt_t *tep)
1366 {
1367 	if (IS_COTS(tep)) {
1368 		mutex_enter(&tep->te_ser_lock);
1369 		ASSERT(tep->te_ser_count != 0);
1370 		tep->te_ser_count--;
1371 		mutex_exit(&tep->te_ser_lock);
1372 	}
1373 }
1374 
1375 /*
1376  * Hash management functions.
1377  */
1378 
1379 /*
1380  * Return TRUE if two addresses are equal, false otherwise.
1381  */
1382 static boolean_t
tl_eqaddr(tl_addr_t * ap1,tl_addr_t * ap2)1383 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1384 {
1385 	return ((ap1->ta_alen > 0) &&
1386 	    (ap1->ta_alen == ap2->ta_alen) &&
1387 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1388 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1389 }
1390 
1391 /*
1392  * This function is called whenever an endpoint is found in the hash table.
1393  */
1394 /* ARGSUSED0 */
1395 static void
tl_find_callback(mod_hash_key_t key,mod_hash_val_t val)1396 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1397 {
1398 	tl_refhold((tl_endpt_t *)val);
1399 }
1400 
1401 /*
1402  * Address hash function.
1403  */
1404 /* ARGSUSED */
1405 static uint_t
tl_hash_by_addr(void * hash_data,mod_hash_key_t key)1406 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1407 {
1408 	tl_addr_t *ap = (tl_addr_t *)key;
1409 	size_t	len = ap->ta_alen;
1410 	uchar_t *p = ap->ta_abuf;
1411 	uint_t i, g;
1412 
1413 	ASSERT((len > 0) && (p != NULL));
1414 
1415 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1416 		i = (i << 4) + (*p);
1417 		if ((g = (i & 0xf0000000U)) != 0) {
1418 			i ^= (g >> 24);
1419 			i ^= g;
1420 		}
1421 	}
1422 	return (i);
1423 }
1424 
1425 /*
1426  * This function is used by hash lookups. It compares two generic addresses.
1427  */
1428 static int
tl_hash_cmp_addr(mod_hash_key_t key1,mod_hash_key_t key2)1429 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1430 {
1431 #ifdef	DEBUG
1432 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1433 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1434 
1435 	ASSERT(key1 != NULL);
1436 	ASSERT(key2 != NULL);
1437 
1438 	ASSERT(ap1->ta_abuf != NULL);
1439 	ASSERT(ap2->ta_abuf != NULL);
1440 	ASSERT(ap1->ta_alen > 0);
1441 	ASSERT(ap2->ta_alen > 0);
1442 #endif
1443 
1444 	return (!tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1445 }
1446 
1447 /*
1448  * Prevent endpoint from closing if possible.
1449  * Return B_TRUE on success, B_FALSE on failure.
1450  */
1451 static boolean_t
tl_noclose(tl_endpt_t * tep)1452 tl_noclose(tl_endpt_t *tep)
1453 {
1454 	boolean_t rc = B_FALSE;
1455 
1456 	mutex_enter(&tep->te_closelock);
1457 	if (!tep->te_closing) {
1458 		ASSERT(tep->te_closewait == 0);
1459 		tep->te_closewait++;
1460 		rc = B_TRUE;
1461 	}
1462 	mutex_exit(&tep->te_closelock);
1463 	return (rc);
1464 }
1465 
1466 /*
1467  * Allow endpoint to close if needed.
1468  */
1469 static void
tl_closeok(tl_endpt_t * tep)1470 tl_closeok(tl_endpt_t *tep)
1471 {
1472 	ASSERT(tep->te_closewait > 0);
1473 	mutex_enter(&tep->te_closelock);
1474 	ASSERT(tep->te_closewait == 1);
1475 	tep->te_closewait--;
1476 	cv_signal(&tep->te_closecv);
1477 	mutex_exit(&tep->te_closelock);
1478 }
1479 
1480 /*
1481  * STREAMS open entry point.
1482  */
1483 /* ARGSUSED */
1484 static int
tl_open(queue_t * rq,dev_t * devp,int oflag,int sflag,cred_t * credp)1485 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1486 {
1487 	tl_endpt_t	*tep;
1488 	minor_t		minor = getminor(*devp);
1489 	id_t		inst_minor;
1490 
1491 	/*
1492 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1493 	 * are illegal
1494 	 */
1495 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1496 		return (ENXIO);
1497 
1498 	if (rq->q_ptr != NULL)
1499 		return (0);
1500 
1501 	/* Minor number should specify the mode used for the driver. */
1502 	if ((minor >= TL_UNUSED))
1503 		return (ENXIO);
1504 
1505 	if (oflag & SO_SOCKSTR) {
1506 		minor |= TL_SOCKET;
1507 	}
1508 
1509 	/*
1510 	 * Attempt to allocate a unique minor number for this instance.
1511 	 * Avoid an uninterruptable sleep if none are available.
1512 	 */
1513 	if ((inst_minor = id_alloc_nosleep(tl_minors)) == -1) {
1514 		return (ENOMEM);
1515 	}
1516 
1517 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1518 	tep->te_refcnt = 1;
1519 	tep->te_cpid = curproc->p_pid;
1520 	rq->q_ptr = WR(rq)->q_ptr = tep;
1521 	tep->te_state = TS_UNBND;
1522 	tep->te_credp = credp;
1523 	crhold(credp);
1524 	tep->te_zoneid = getzoneid();
1525 
1526 	tep->te_flag = minor & TL_MINOR_MASK;
1527 	tep->te_transport = &tl_transports[minor];
1528 	tep->te_minor = (minor_t)inst_minor;
1529 
1530 	/* Reserve hash handle for bind(). */
1531 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1532 
1533 	/* Transport-specific initialization */
1534 	if (IS_COTS(tep)) {
1535 		/* Use private serializer */
1536 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1537 
1538 		/* Create list for pending connections */
1539 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1540 		    offsetof(tl_icon_t, ti_node));
1541 		tep->te_qlen = 0;
1542 		tep->te_nicon = 0;
1543 		tep->te_oconp = NULL;
1544 		tep->te_conp = NULL;
1545 	} else {
1546 		/* Use shared serializer */
1547 		tep->te_ser = tep->te_transport->tr_serializer;
1548 		bzero(&tep->te_flows, sizeof (list_node_t));
1549 		/* Create list for flow control */
1550 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1551 		    offsetof(tl_endpt_t, te_flows));
1552 		tep->te_flowq = NULL;
1553 		tep->te_lastep = NULL;
1554 
1555 	}
1556 
1557 	/* Initialize endpoint address */
1558 	if (IS_SOCKET(tep)) {
1559 		/* Socket-specific address handling. */
1560 		tep->te_alen = TL_SOUX_ADDRLEN;
1561 		tep->te_abuf = &tep->te_uxaddr;
1562 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1563 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1564 	} else {
1565 		tep->te_alen = -1;
1566 		tep->te_abuf = NULL;
1567 	}
1568 
1569 	/* clone the driver */
1570 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1571 
1572 	tep->te_rq = rq;
1573 	tep->te_wq = WR(rq);
1574 
1575 #ifdef	_ILP32
1576 	if (IS_SOCKET(tep))
1577 		tep->te_acceptor_id = tep->te_minor;
1578 	else
1579 		tep->te_acceptor_id = (t_uscalar_t)rq;
1580 #else
1581 	tep->te_acceptor_id = tep->te_minor;
1582 #endif	/* _ILP32 */
1583 
1584 
1585 	qprocson(rq);
1586 
1587 	/*
1588 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1589 	 * insertion so insertion can't fail.
1590 	 */
1591 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1592 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1593 	    (mod_hash_val_t)tep);
1594 
1595 	return (0);
1596 }
1597 
1598 /* ARGSUSED1 */
1599 static int
tl_close(queue_t * rq,int flag,cred_t * credp)1600 tl_close(queue_t *rq, int flag,	cred_t *credp)
1601 {
1602 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1603 	tl_endpt_t *elp = NULL;
1604 	queue_t *wq = tep->te_wq;
1605 	int rc;
1606 
1607 	ASSERT(wq == WR(rq));
1608 
1609 	/*
1610 	 * Remove the endpoint from acceptor hash.
1611 	 */
1612 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1613 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1614 	    (mod_hash_val_t *)&elp);
1615 	ASSERT(rc == 0 && tep == elp);
1616 	if ((rc != 0) || (tep != elp)) {
1617 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1618 		    SL_TRACE | SL_ERROR,
1619 		    "tl_close:inconsistency in AI hash"));
1620 	}
1621 
1622 	/*
1623 	 * Wait till close is safe, then mark endpoint as closing.
1624 	 */
1625 	mutex_enter(&tep->te_closelock);
1626 	while (tep->te_closewait)
1627 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1628 	tep->te_closing = B_TRUE;
1629 	/*
1630 	 * Will wait for the serializer part of the close to finish, so set
1631 	 * te_closewait now.
1632 	 */
1633 	tep->te_closewait = 1;
1634 	tep->te_nowsrv = B_FALSE;
1635 	mutex_exit(&tep->te_closelock);
1636 
1637 	/*
1638 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1639 	 * It is safe because close will wait for tl_close_ser to finish.
1640 	 */
1641 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1642 
1643 	/*
1644 	 * Wait for the first phase of close to complete before qprocsoff().
1645 	 */
1646 	mutex_enter(&tep->te_closelock);
1647 	while (tep->te_closewait)
1648 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1649 	mutex_exit(&tep->te_closelock);
1650 
1651 	qprocsoff(rq);
1652 
1653 	if (tep->te_bufcid) {
1654 		qunbufcall(rq, tep->te_bufcid);
1655 		tep->te_bufcid = 0;
1656 	}
1657 	if (tep->te_timoutid) {
1658 		(void) quntimeout(rq, tep->te_timoutid);
1659 		tep->te_timoutid = 0;
1660 	}
1661 
1662 	/*
1663 	 * Finish close behind serializer.
1664 	 *
1665 	 * For a CLTS endpoint increase a refcount and continue close processing
1666 	 * with serializer protection. This processing may happen asynchronously
1667 	 * with the completion of tl_close().
1668 	 *
1669 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1670 	 * may go away together with tep and we need to destroy serializer
1671 	 * outside of serializer context.
1672 	 */
1673 	ASSERT(tep->te_closewait == 0);
1674 	if (IS_COTS(tep))
1675 		tep->te_closewait = 1;
1676 	else
1677 		tl_refhold(tep);
1678 
1679 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1680 
1681 	/*
1682 	 * For connection-oriented transports wait for all serializer activity
1683 	 * to settle down.
1684 	 */
1685 	if (IS_COTS(tep)) {
1686 		mutex_enter(&tep->te_closelock);
1687 		while (tep->te_closewait)
1688 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1689 		mutex_exit(&tep->te_closelock);
1690 	}
1691 
1692 	crfree(tep->te_credp);
1693 	tep->te_credp = NULL;
1694 	tep->te_wq = NULL;
1695 	tl_refrele(tep);
1696 	/*
1697 	 * tep is likely to be destroyed now, so can't reference it any more.
1698 	 */
1699 
1700 	rq->q_ptr = wq->q_ptr = NULL;
1701 	return (0);
1702 }
1703 
1704 /*
1705  * First phase of close processing done behind the serializer.
1706  *
1707  * Do not drop the reference in the end - tl_close() wants this reference to
1708  * stay.
1709  */
1710 /* ARGSUSED0 */
1711 static void
tl_close_ser(mblk_t * mp,tl_endpt_t * tep)1712 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1713 {
1714 	ASSERT(tep->te_closing);
1715 	ASSERT(tep->te_closewait == 1);
1716 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1717 
1718 	tep->te_flag |= TL_CLOSE_SER;
1719 
1720 	/*
1721 	 * Drain out all messages on queue except for TL_TICOTS where the
1722 	 * abortive release semantics permit discarding of data on close
1723 	 */
1724 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1725 		tl_wsrv_ser(NULL, tep);
1726 	}
1727 
1728 	/* Remove address from hash table. */
1729 	tl_addr_unbind(tep);
1730 	/*
1731 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1732 	 * queue of the driver, so clear these before qprocsoff() is called.
1733 	 * Also clear q_next for the peer since this queue is going away.
1734 	 */
1735 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1736 		tl_endpt_t *peer_tep = tep->te_conp;
1737 
1738 		tep->te_wq->q_next = NULL;
1739 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1740 			peer_tep->te_wq->q_next = NULL;
1741 	}
1742 
1743 	tep->te_rq = NULL;
1744 
1745 	/* wake up tl_close() */
1746 	tl_closeok(tep);
1747 	tl_serializer_exit(tep);
1748 }
1749 
1750 /*
1751  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1752  * the reference for CLTS.
1753  *
1754  * Called from serializer. Should drop reference count for CLTS only.
1755  */
1756 /* ARGSUSED0 */
1757 static void
tl_close_finish_ser(mblk_t * mp,tl_endpt_t * tep)1758 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1759 {
1760 	ASSERT(tep->te_closing);
1761 	IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1762 	IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1763 
1764 	tep->te_state = -1;	/* Uninitialized */
1765 	if (IS_COTS(tep)) {
1766 		tl_co_unconnect(tep);
1767 	} else {
1768 		/* Connectionless specific cleanup */
1769 		TL_REMOVE_PEER(tep->te_lastep);
1770 		/*
1771 		 * Backenable anybody that is flow controlled waiting for
1772 		 * this endpoint.
1773 		 */
1774 		tl_cl_backenable(tep);
1775 		if (tep->te_flowq != NULL) {
1776 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1777 			tep->te_flowq = NULL;
1778 		}
1779 	}
1780 
1781 	tl_serializer_exit(tep);
1782 	if (IS_COTS(tep))
1783 		tl_closeok(tep);
1784 	else
1785 		tl_refrele(tep);
1786 }
1787 
1788 /*
1789  * STREAMS write-side put procedure.
1790  * Enter serializer for most of the processing.
1791  *
1792  * The T_CONN_REQ is processed outside of serializer.
1793  */
1794 static int
tl_wput(queue_t * wq,mblk_t * mp)1795 tl_wput(queue_t *wq, mblk_t *mp)
1796 {
1797 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1798 	ssize_t			msz = MBLKL(mp);
1799 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1800 	tlproc_t		*tl_proc = NULL;
1801 
1802 	switch (DB_TYPE(mp)) {
1803 	case M_DATA:
1804 		/* Only valid for connection-oriented transports */
1805 		if (IS_CLTS(tep)) {
1806 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1807 			    SL_TRACE | SL_ERROR,
1808 			    "tl_wput:M_DATA invalid for ticlts driver"));
1809 			tl_merror(wq, mp, EPROTO);
1810 			return (0);
1811 		}
1812 		tl_proc = tl_wput_data_ser;
1813 		break;
1814 
1815 	case M_IOCTL:
1816 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1817 		case TL_IOC_CREDOPT:
1818 			/* FALLTHROUGH */
1819 		case TL_IOC_UCREDOPT:
1820 			/*
1821 			 * Serialize endpoint state change.
1822 			 */
1823 			tl_proc = tl_do_ioctl_ser;
1824 			break;
1825 
1826 		default:
1827 			miocnak(wq, mp, 0, EINVAL);
1828 			return (0);
1829 		}
1830 		break;
1831 
1832 	case M_FLUSH:
1833 		/*
1834 		 * do canonical M_FLUSH processing
1835 		 */
1836 		if (*mp->b_rptr & FLUSHW) {
1837 			flushq(wq, FLUSHALL);
1838 			*mp->b_rptr &= ~FLUSHW;
1839 		}
1840 		if (*mp->b_rptr & FLUSHR) {
1841 			flushq(RD(wq), FLUSHALL);
1842 			qreply(wq, mp);
1843 		} else {
1844 			freemsg(mp);
1845 		}
1846 		return (0);
1847 
1848 	case M_PROTO:
1849 		if (msz < sizeof (prim->type)) {
1850 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1851 			    SL_TRACE | SL_ERROR,
1852 			    "tl_wput:M_PROTO data too short"));
1853 			tl_merror(wq, mp, EPROTO);
1854 			return (0);
1855 		}
1856 		switch (prim->type) {
1857 		case T_OPTMGMT_REQ:
1858 		case T_SVR4_OPTMGMT_REQ:
1859 			/*
1860 			 * Process TPI option management requests immediately
1861 			 * in put procedure regardless of in-order processing
1862 			 * of already queued messages.
1863 			 * (Note: This driver supports AF_UNIX socket
1864 			 * implementation.  Unless we implement this processing,
1865 			 * setsockopt() on socket endpoint will block on flow
1866 			 * controlled endpoints which it should not. That is
1867 			 * required for successful execution of VSU socket tests
1868 			 * and is consistent with BSD socket behavior).
1869 			 */
1870 			tl_optmgmt(wq, mp);
1871 			return (0);
1872 		case O_T_BIND_REQ:
1873 		case T_BIND_REQ:
1874 			tl_proc = tl_bind_ser;
1875 			break;
1876 		case T_CONN_REQ:
1877 			if (IS_CLTS(tep)) {
1878 				tl_merror(wq, mp, EPROTO);
1879 				return (0);
1880 			}
1881 			tl_conn_req(wq, mp);
1882 			return (0);
1883 		case T_DATA_REQ:
1884 		case T_OPTDATA_REQ:
1885 		case T_EXDATA_REQ:
1886 		case T_ORDREL_REQ:
1887 			tl_proc = tl_putq_ser;
1888 			break;
1889 		case T_UNITDATA_REQ:
1890 			if (IS_COTS(tep) ||
1891 			    (msz < sizeof (struct T_unitdata_req))) {
1892 				tl_merror(wq, mp, EPROTO);
1893 				return (0);
1894 			}
1895 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1896 				tl_proc = tl_unitdata_ser;
1897 			} else {
1898 				tl_proc = tl_putq_ser;
1899 			}
1900 			break;
1901 		default:
1902 			/*
1903 			 * process in service procedure if message already
1904 			 * queued (maintain in-order processing)
1905 			 */
1906 			if (wq->q_first != NULL) {
1907 				tl_proc = tl_putq_ser;
1908 			} else {
1909 				tl_proc = tl_wput_ser;
1910 			}
1911 			break;
1912 		}
1913 		break;
1914 
1915 	case M_PCPROTO:
1916 		/*
1917 		 * Check that the message has enough data to figure out TPI
1918 		 * primitive.
1919 		 */
1920 		if (msz < sizeof (prim->type)) {
1921 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1922 			    SL_TRACE | SL_ERROR,
1923 			    "tl_wput:M_PCROTO data too short"));
1924 			tl_merror(wq, mp, EPROTO);
1925 			return (0);
1926 		}
1927 		switch (prim->type) {
1928 		case T_CAPABILITY_REQ:
1929 			tl_capability_req(mp, tep);
1930 			return (0);
1931 		case T_INFO_REQ:
1932 			tl_proc = tl_info_req_ser;
1933 			break;
1934 		case T_ADDR_REQ:
1935 			tl_proc = tl_addr_req_ser;
1936 			break;
1937 
1938 		default:
1939 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1940 			    SL_TRACE | SL_ERROR,
1941 			    "tl_wput:unknown TPI msg primitive"));
1942 			tl_merror(wq, mp, EPROTO);
1943 			return (0);
1944 		}
1945 		break;
1946 	default:
1947 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
1948 		    "tl_wput:default:unexpected Streams message"));
1949 		freemsg(mp);
1950 		return (0);
1951 	}
1952 
1953 	/*
1954 	 * Continue processing via serializer.
1955 	 */
1956 	ASSERT(tl_proc != NULL);
1957 	tl_refhold(tep);
1958 	tl_serializer_enter(tep, tl_proc, mp);
1959 	return (0);
1960 }
1961 
1962 /*
1963  * Place message on the queue while preserving order.
1964  */
1965 static void
tl_putq_ser(mblk_t * mp,tl_endpt_t * tep)1966 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1967 {
1968 	if (tep->te_closing) {
1969 		tl_wput_ser(mp, tep);
1970 	} else {
1971 		TL_PUTQ(tep, mp);
1972 		tl_serializer_exit(tep);
1973 		tl_refrele(tep);
1974 	}
1975 
1976 }
1977 
1978 static void
tl_wput_common_ser(mblk_t * mp,tl_endpt_t * tep)1979 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1980 {
1981 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1982 
1983 	switch (DB_TYPE(mp)) {
1984 	case M_DATA:
1985 		tl_data(mp, tep);
1986 		break;
1987 	case M_PROTO:
1988 		tl_do_proto(mp, tep);
1989 		break;
1990 	default:
1991 		freemsg(mp);
1992 		break;
1993 	}
1994 }
1995 
1996 /*
1997  * Write side put procedure called from serializer.
1998  */
1999 static void
tl_wput_ser(mblk_t * mp,tl_endpt_t * tep)2000 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
2001 {
2002 	tl_wput_common_ser(mp, tep);
2003 	tl_serializer_exit(tep);
2004 	tl_refrele(tep);
2005 }
2006 
2007 /*
2008  * M_DATA processing. Called from serializer.
2009  */
2010 static void
tl_wput_data_ser(mblk_t * mp,tl_endpt_t * tep)2011 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
2012 {
2013 	tl_endpt_t	*peer_tep = tep->te_conp;
2014 	queue_t		*peer_rq;
2015 
2016 	ASSERT(DB_TYPE(mp) == M_DATA);
2017 	ASSERT(IS_COTS(tep));
2018 
2019 	IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
2020 
2021 	/*
2022 	 * fastpath for data. Ignore flow control if tep is closing.
2023 	 */
2024 	if ((peer_tep != NULL) &&
2025 	    !peer_tep->te_closing &&
2026 	    ((tep->te_state == TS_DATA_XFER) ||
2027 	    (tep->te_state == TS_WREQ_ORDREL)) &&
2028 	    (tep->te_wq != NULL) &&
2029 	    (tep->te_wq->q_first == NULL) &&
2030 	    (peer_tep->te_state == TS_DATA_XFER ||
2031 	    peer_tep->te_state == TS_WIND_ORDREL ||
2032 	    peer_tep->te_state == TS_WREQ_ORDREL) &&
2033 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
2034 	    (canputnext(peer_rq) || tep->te_closing)) {
2035 		putnext(peer_rq, mp);
2036 	} else if (tep->te_closing) {
2037 		/*
2038 		 * It is possible that by the time we got here tep started to
2039 		 * close. If the write queue is not empty, and the state is
2040 		 * TS_DATA_XFER the data should be delivered in order, so we
2041 		 * call putq() instead of freeing the data.
2042 		 */
2043 		if ((tep->te_wq != NULL) &&
2044 		    ((tep->te_state == TS_DATA_XFER) ||
2045 		    (tep->te_state == TS_WREQ_ORDREL))) {
2046 			TL_PUTQ(tep, mp);
2047 		} else {
2048 			freemsg(mp);
2049 		}
2050 	} else {
2051 		TL_PUTQ(tep, mp);
2052 	}
2053 
2054 	tl_serializer_exit(tep);
2055 	tl_refrele(tep);
2056 }
2057 
2058 /*
2059  * Write side service routine.
2060  *
2061  * All actual processing happens within serializer which is entered
2062  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
2063  * messages that need processing may have arrived, so tl_wsrv repeats until
2064  * queue is empty or te_nowsrv is set.
2065  */
2066 static int
tl_wsrv(queue_t * wq)2067 tl_wsrv(queue_t *wq)
2068 {
2069 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2070 
2071 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
2072 		mutex_enter(&tep->te_srv_lock);
2073 		ASSERT(tep->te_wsrv_active == B_FALSE);
2074 		tep->te_wsrv_active = B_TRUE;
2075 		mutex_exit(&tep->te_srv_lock);
2076 
2077 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2078 
2079 		/*
2080 		 * Wait for serializer job to complete.
2081 		 */
2082 		mutex_enter(&tep->te_srv_lock);
2083 		while (tep->te_wsrv_active) {
2084 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2085 		}
2086 		cv_signal(&tep->te_srv_cv);
2087 		mutex_exit(&tep->te_srv_lock);
2088 	}
2089 	return (0);
2090 }
2091 
2092 /*
2093  * Serialized write side processing of the STREAMS queue.
2094  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2095  * is NULL.
2096  */
2097 static void
tl_wsrv_ser(mblk_t * ser_mp,tl_endpt_t * tep)2098 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2099 {
2100 	mblk_t *mp;
2101 	queue_t *wq = tep->te_wq;
2102 
2103 	ASSERT(wq != NULL);
2104 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2105 		tl_wput_common_ser(mp, tep);
2106 	}
2107 
2108 	/*
2109 	 * Wakeup service routine unless called from close.
2110 	 * If ser_mp is specified, the caller is tl_wsrv().
2111 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2112 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2113 	 * be no matching tl_serializer_exit() in this case.
2114 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2115 	 * waiting on te_srv_cv.
2116 	 */
2117 	if (ser_mp != NULL) {
2118 		/*
2119 		 * We are called from tl_wsrv.
2120 		 */
2121 		mutex_enter(&tep->te_srv_lock);
2122 		ASSERT(tep->te_wsrv_active);
2123 		tep->te_wsrv_active = B_FALSE;
2124 		cv_signal(&tep->te_srv_cv);
2125 		mutex_exit(&tep->te_srv_lock);
2126 		tl_serializer_exit(tep);
2127 	}
2128 }
2129 
2130 /*
2131  * Called when the stream is backenabled. Enter serializer and qenable everyone
2132  * flow controlled by tep.
2133  *
2134  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2135  * is possible that two instances of tl_rsrv will be running reusing the same
2136  * rsrv mblk.
2137  */
2138 static int
tl_rsrv(queue_t * rq)2139 tl_rsrv(queue_t *rq)
2140 {
2141 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2142 
2143 	ASSERT(rq->q_first == NULL);
2144 	ASSERT(tep->te_rsrv_active == 0);
2145 
2146 	tep->te_rsrv_active = B_TRUE;
2147 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2148 	/*
2149 	 * Wait for serializer job to complete.
2150 	 */
2151 	mutex_enter(&tep->te_srv_lock);
2152 	while (tep->te_rsrv_active) {
2153 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2154 	}
2155 	cv_signal(&tep->te_srv_cv);
2156 	mutex_exit(&tep->te_srv_lock);
2157 	return (0);
2158 }
2159 
2160 /* ARGSUSED */
2161 static void
tl_rsrv_ser(mblk_t * mp,tl_endpt_t * tep)2162 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2163 {
2164 	tl_endpt_t *peer_tep;
2165 
2166 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2167 		tl_cl_backenable(tep);
2168 	} else if (
2169 	    IS_COTS(tep) &&
2170 	    ((peer_tep = tep->te_conp) != NULL) &&
2171 	    !peer_tep->te_closing &&
2172 	    ((tep->te_state == TS_DATA_XFER) ||
2173 	    (tep->te_state == TS_WIND_ORDREL)||
2174 	    (tep->te_state == TS_WREQ_ORDREL))) {
2175 		TL_QENABLE(peer_tep);
2176 	}
2177 
2178 	/*
2179 	 * Wakeup read side service routine.
2180 	 */
2181 	mutex_enter(&tep->te_srv_lock);
2182 	ASSERT(tep->te_rsrv_active);
2183 	tep->te_rsrv_active = B_FALSE;
2184 	cv_signal(&tep->te_srv_cv);
2185 	mutex_exit(&tep->te_srv_lock);
2186 	tl_serializer_exit(tep);
2187 }
2188 
2189 /*
2190  * process M_PROTO messages. Always called from serializer.
2191  */
2192 static void
tl_do_proto(mblk_t * mp,tl_endpt_t * tep)2193 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2194 {
2195 	ssize_t			msz = MBLKL(mp);
2196 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2197 
2198 	/* Message size was validated by tl_wput(). */
2199 	ASSERT(msz >= sizeof (prim->type));
2200 
2201 	switch (prim->type) {
2202 	case T_UNBIND_REQ:
2203 		tl_unbind(mp, tep);
2204 		break;
2205 
2206 	case T_ADDR_REQ:
2207 		tl_addr_req(mp, tep);
2208 		break;
2209 
2210 	case O_T_CONN_RES:
2211 	case T_CONN_RES:
2212 		if (IS_CLTS(tep)) {
2213 			tl_merror(tep->te_wq, mp, EPROTO);
2214 			break;
2215 		}
2216 		tl_conn_res(mp, tep);
2217 		break;
2218 
2219 	case T_DISCON_REQ:
2220 		if (IS_CLTS(tep)) {
2221 			tl_merror(tep->te_wq, mp, EPROTO);
2222 			break;
2223 		}
2224 		tl_discon_req(mp, tep);
2225 		break;
2226 
2227 	case T_DATA_REQ:
2228 		if (IS_CLTS(tep)) {
2229 			tl_merror(tep->te_wq, mp, EPROTO);
2230 			break;
2231 		}
2232 		tl_data(mp, tep);
2233 		break;
2234 
2235 	case T_OPTDATA_REQ:
2236 		if (IS_CLTS(tep)) {
2237 			tl_merror(tep->te_wq, mp, EPROTO);
2238 			break;
2239 		}
2240 		tl_data(mp, tep);
2241 		break;
2242 
2243 	case T_EXDATA_REQ:
2244 		if (IS_CLTS(tep)) {
2245 			tl_merror(tep->te_wq, mp, EPROTO);
2246 			break;
2247 		}
2248 		tl_exdata(mp, tep);
2249 		break;
2250 
2251 	case T_ORDREL_REQ:
2252 		if (!IS_COTSORD(tep)) {
2253 			tl_merror(tep->te_wq, mp, EPROTO);
2254 			break;
2255 		}
2256 		tl_ordrel(mp, tep);
2257 		break;
2258 
2259 	case T_UNITDATA_REQ:
2260 		if (IS_COTS(tep)) {
2261 			tl_merror(tep->te_wq, mp, EPROTO);
2262 			break;
2263 		}
2264 		tl_unitdata(mp, tep);
2265 		break;
2266 
2267 	default:
2268 		tl_merror(tep->te_wq, mp, EPROTO);
2269 		break;
2270 	}
2271 }
2272 
2273 /*
2274  * Process ioctl from serializer.
2275  * This is a wrapper around tl_do_ioctl().
2276  */
2277 static void
tl_do_ioctl_ser(mblk_t * mp,tl_endpt_t * tep)2278 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2279 {
2280 	if (!tep->te_closing)
2281 		tl_do_ioctl(mp, tep);
2282 	else
2283 		freemsg(mp);
2284 
2285 	tl_serializer_exit(tep);
2286 	tl_refrele(tep);
2287 }
2288 
2289 static void
tl_do_ioctl(mblk_t * mp,tl_endpt_t * tep)2290 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2291 {
2292 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2293 	int cmd = iocbp->ioc_cmd;
2294 	queue_t *wq = tep->te_wq;
2295 	int error;
2296 	int thisopt, otheropt;
2297 
2298 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2299 
2300 	switch (cmd) {
2301 	case TL_IOC_CREDOPT:
2302 		if (cmd == TL_IOC_CREDOPT) {
2303 			thisopt = TL_SETCRED;
2304 			otheropt = TL_SETUCRED;
2305 		} else {
2306 			/* FALLTHROUGH */
2307 	case TL_IOC_UCREDOPT:
2308 			thisopt = TL_SETUCRED;
2309 			otheropt = TL_SETCRED;
2310 		}
2311 		/*
2312 		 * The credentials passing does not apply to sockets.
2313 		 * Only one of the cred options can be set at a given time.
2314 		 */
2315 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2316 			miocnak(wq, mp, 0, EINVAL);
2317 			return;
2318 		}
2319 
2320 		/*
2321 		 * Turn on generation of credential options for
2322 		 * T_conn_req, T_conn_con, T_unidata_ind.
2323 		 */
2324 		error = miocpullup(mp, sizeof (uint32_t));
2325 		if (error != 0) {
2326 			miocnak(wq, mp, 0, error);
2327 			return;
2328 		}
2329 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2330 			miocnak(wq, mp, 0, EINVAL);
2331 			return;
2332 		}
2333 
2334 		if (*(uint32_t *)mp->b_cont->b_rptr)
2335 			tep->te_flag |= thisopt;
2336 		else
2337 			tep->te_flag &= ~thisopt;
2338 
2339 		miocack(wq, mp, 0, 0);
2340 		break;
2341 
2342 	default:
2343 		/* Should not be here */
2344 		miocnak(wq, mp, 0, EINVAL);
2345 		break;
2346 	}
2347 }
2348 
2349 
2350 /*
2351  * send T_ERROR_ACK
2352  * Note: assumes enough memory or caller passed big enough mp
2353  *	- no recovery from allocb failures
2354  */
2355 
2356 static void
tl_error_ack(queue_t * wq,mblk_t * mp,t_scalar_t tli_err,t_scalar_t unix_err,t_scalar_t type)2357 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2358     t_scalar_t unix_err, t_scalar_t type)
2359 {
2360 	struct T_error_ack *err_ack;
2361 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2362 	    M_PCPROTO, T_ERROR_ACK);
2363 
2364 	if (ackmp == NULL) {
2365 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE | SL_ERROR,
2366 		    "tl_error_ack:out of mblk memory"));
2367 		tl_merror(wq, NULL, ENOSR);
2368 		return;
2369 	}
2370 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2371 	err_ack->ERROR_prim = type;
2372 	err_ack->TLI_error = tli_err;
2373 	err_ack->UNIX_error = unix_err;
2374 
2375 	/*
2376 	 * send error ack message
2377 	 */
2378 	qreply(wq, ackmp);
2379 }
2380 
2381 
2382 
2383 /*
2384  * send T_OK_ACK
2385  * Note: assumes enough memory or caller passed big enough mp
2386  *	- no recovery from allocb failures
2387  */
2388 static void
tl_ok_ack(queue_t * wq,mblk_t * mp,t_scalar_t type)2389 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2390 {
2391 	struct T_ok_ack *ok_ack;
2392 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2393 	    M_PCPROTO, T_OK_ACK);
2394 
2395 	if (ackmp == NULL) {
2396 		tl_merror(wq, NULL, ENOMEM);
2397 		return;
2398 	}
2399 
2400 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2401 	ok_ack->CORRECT_prim = type;
2402 
2403 	(void) qreply(wq, ackmp);
2404 }
2405 
2406 /*
2407  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2408  * This is a wrapper around tl_bind().
2409  */
2410 static void
tl_bind_ser(mblk_t * mp,tl_endpt_t * tep)2411 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2412 {
2413 	if (!tep->te_closing)
2414 		tl_bind(mp, tep);
2415 	else
2416 		freemsg(mp);
2417 
2418 	tl_serializer_exit(tep);
2419 	tl_refrele(tep);
2420 }
2421 
2422 /*
2423  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2424  * Assumes that the endpoint is in the unbound.
2425  */
2426 static void
tl_bind(mblk_t * mp,tl_endpt_t * tep)2427 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2428 {
2429 	queue_t			*wq = tep->te_wq;
2430 	struct T_bind_ack	*b_ack;
2431 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2432 	mblk_t			*ackmp, *bamp;
2433 	soux_addr_t		ux_addr;
2434 	t_uscalar_t		qlen = 0;
2435 	t_scalar_t		alen, aoff;
2436 	tl_addr_t		addr_req;
2437 	void			*addr_startp;
2438 	ssize_t			msz = MBLKL(mp), basize;
2439 	t_scalar_t		tli_err = 0, unix_err = 0;
2440 	t_scalar_t		save_prim_type = bind->PRIM_type;
2441 	t_scalar_t		save_state = tep->te_state;
2442 
2443 	if (tep->te_state != TS_UNBND) {
2444 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2445 		    SL_TRACE | SL_ERROR,
2446 		    "tl_wput:bind_request:out of state, state=%d",
2447 		    tep->te_state));
2448 		tli_err = TOUTSTATE;
2449 		goto error;
2450 	}
2451 
2452 	if (msz < sizeof (struct T_bind_req)) {
2453 		tli_err = TSYSERR;
2454 		unix_err = EINVAL;
2455 		goto error;
2456 	}
2457 
2458 	tep->te_state = nextstate[TE_BIND_REQ][tep->te_state];
2459 
2460 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2461 	    (bind->PRIM_type == T_BIND_REQ));
2462 
2463 	alen = bind->ADDR_length;
2464 	aoff = bind->ADDR_offset;
2465 
2466 	/* negotiate max conn req pending */
2467 	if (IS_COTS(tep)) {
2468 		qlen = bind->CONIND_number;
2469 		if (qlen > tl_maxqlen)
2470 			qlen = tl_maxqlen;
2471 	}
2472 
2473 	/*
2474 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2475 	 * and bound again.
2476 	 */
2477 	if ((tep->te_hash_hndl == NULL) &&
2478 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2479 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2480 	    &tep->te_hash_hndl) != 0) {
2481 		tli_err = TSYSERR;
2482 		unix_err = ENOSR;
2483 		goto error;
2484 	}
2485 
2486 	/*
2487 	 * Verify address correctness.
2488 	 */
2489 	if (IS_SOCKET(tep)) {
2490 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2491 
2492 		if ((alen != TL_SOUX_ADDRLEN) ||
2493 		    (aoff < 0) ||
2494 		    (aoff + alen > msz)) {
2495 			(void) (STRLOG(TL_ID, tep->te_minor,
2496 			    1, SL_TRACE | SL_ERROR,
2497 			    "tl_bind: invalid socket addr"));
2498 			tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2499 			tli_err = TSYSERR;
2500 			unix_err = EINVAL;
2501 			goto error;
2502 		}
2503 		/* Copy address from message to local buffer. */
2504 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2505 		/*
2506 		 * Check that we got correct address from sockets
2507 		 */
2508 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2509 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2510 			(void) (STRLOG(TL_ID, tep->te_minor,
2511 			    1, SL_TRACE | SL_ERROR,
2512 			    "tl_bind: invalid socket magic"));
2513 			tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2514 			tli_err = TSYSERR;
2515 			unix_err = EINVAL;
2516 			goto error;
2517 		}
2518 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2519 		    (ux_addr.soua_vp != NULL)) {
2520 			(void) (STRLOG(TL_ID, tep->te_minor,
2521 			    1, SL_TRACE | SL_ERROR,
2522 			    "tl_bind: implicit addr non-empty"));
2523 			tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2524 			tli_err = TSYSERR;
2525 			unix_err = EINVAL;
2526 			goto error;
2527 		}
2528 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2529 		    (ux_addr.soua_vp == NULL)) {
2530 			(void) (STRLOG(TL_ID, tep->te_minor,
2531 			    1, SL_TRACE | SL_ERROR,
2532 			    "tl_bind: explicit addr empty"));
2533 			tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2534 			tli_err = TSYSERR;
2535 			unix_err = EINVAL;
2536 			goto error;
2537 		}
2538 	} else {
2539 		if ((alen > 0) && ((aoff < 0) ||
2540 		    ((ssize_t)(aoff + alen) > msz) ||
2541 		    ((aoff + alen) < 0))) {
2542 			(void) (STRLOG(TL_ID, tep->te_minor,
2543 			    1, SL_TRACE | SL_ERROR,
2544 			    "tl_bind: invalid message"));
2545 			tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2546 			tli_err = TSYSERR;
2547 			unix_err = EINVAL;
2548 			goto error;
2549 		}
2550 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2551 			(void) (STRLOG(TL_ID, tep->te_minor,
2552 			    1, SL_TRACE | SL_ERROR,
2553 			    "tl_bind: bad addr in  message"));
2554 			tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2555 			tli_err = TBADADDR;
2556 			goto error;
2557 		}
2558 #ifdef DEBUG
2559 		/*
2560 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2561 		 * if (!assertion)
2562 		 *	log warning;
2563 		 */
2564 		if (!((alen == 0 && aoff == 0) ||
2565 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2566 			(void) (STRLOG(TL_ID, tep->te_minor,
2567 				    3, SL_TRACE | SL_ERROR,
2568 				    "tl_bind: addr overlaps TPI message"));
2569 		}
2570 #endif
2571 	}
2572 
2573 	/*
2574 	 * Bind the address provided or allocate one if requested.
2575 	 * Allow rebinds with a new qlen value.
2576 	 */
2577 	if (IS_SOCKET(tep)) {
2578 		/*
2579 		 * For anonymous requests the te_ap is already set up properly
2580 		 * so use minor number as an address.
2581 		 * For explicit requests need to check whether the address is
2582 		 * already in use.
2583 		 */
2584 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2585 			int rc;
2586 
2587 			if (tep->te_flag & TL_ADDRHASHED) {
2588 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2589 				if (tep->te_vp == ux_addr.soua_vp)
2590 					goto skip_addr_bind;
2591 				else /* Rebind to a new address. */
2592 					tl_addr_unbind(tep);
2593 			}
2594 			/*
2595 			 * Insert address in the hash if it is not already
2596 			 * there.  Since we use preallocated handle, the insert
2597 			 * can fail only if the key is already present.
2598 			 */
2599 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2600 			    (mod_hash_key_t)ux_addr.soua_vp,
2601 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2602 
2603 			if (rc != 0) {
2604 				ASSERT(rc == MH_ERR_DUPLICATE);
2605 				/*
2606 				 * Violate O_T_BIND_REQ semantics and fail with
2607 				 * TADDRBUSY - sockets will not use any address
2608 				 * other than supplied one for explicit binds.
2609 				 */
2610 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2611 				    SL_TRACE | SL_ERROR,
2612 				    "tl_bind:requested addr %p is busy",
2613 				    ux_addr.soua_vp));
2614 				tli_err = TADDRBUSY;
2615 				unix_err = 0;
2616 				goto error;
2617 			}
2618 			tep->te_uxaddr = ux_addr;
2619 			tep->te_flag |= TL_ADDRHASHED;
2620 			tep->te_hash_hndl = NULL;
2621 		}
2622 	} else if (alen == 0) {
2623 		/*
2624 		 * assign any free address
2625 		 */
2626 		if (!tl_get_any_addr(tep, NULL)) {
2627 			(void) (STRLOG(TL_ID, tep->te_minor,
2628 			    1, SL_TRACE | SL_ERROR,
2629 			    "tl_bind:failed to get buffer for any "
2630 			    "address"));
2631 			tli_err = TSYSERR;
2632 			unix_err = ENOSR;
2633 			goto error;
2634 		}
2635 	} else {
2636 		addr_req.ta_alen = alen;
2637 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2638 		addr_req.ta_zoneid = tep->te_zoneid;
2639 
2640 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2641 		if (tep->te_abuf == NULL) {
2642 			tli_err = TSYSERR;
2643 			unix_err = ENOSR;
2644 			goto error;
2645 		}
2646 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2647 		tep->te_alen = alen;
2648 
2649 		if (mod_hash_insert_reserve(tep->te_addrhash,
2650 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2651 		    tep->te_hash_hndl) != 0) {
2652 			if (save_prim_type == T_BIND_REQ) {
2653 				/*
2654 				 * The bind semantics for this primitive
2655 				 * require a failure if the exact address
2656 				 * requested is busy
2657 				 */
2658 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2659 				    SL_TRACE | SL_ERROR,
2660 				    "tl_bind:requested addr is busy"));
2661 				tli_err = TADDRBUSY;
2662 				unix_err = 0;
2663 				goto error;
2664 			}
2665 
2666 			/*
2667 			 * O_T_BIND_REQ semantics say if address if requested
2668 			 * address is busy, bind to any available free address
2669 			 */
2670 			if (!tl_get_any_addr(tep, &addr_req)) {
2671 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2672 				    SL_TRACE | SL_ERROR,
2673 				    "tl_bind:unable to get any addr buf"));
2674 				tli_err = TSYSERR;
2675 				unix_err = ENOMEM;
2676 				goto error;
2677 			}
2678 		} else {
2679 			tep->te_flag |= TL_ADDRHASHED;
2680 			tep->te_hash_hndl = NULL;
2681 		}
2682 	}
2683 
2684 	ASSERT(tep->te_alen >= 0);
2685 
2686 skip_addr_bind:
2687 	/*
2688 	 * prepare T_BIND_ACK TPI message
2689 	 */
2690 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2691 	bamp = reallocb(mp, basize, 0);
2692 	if (bamp == NULL) {
2693 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2694 		    "tl_wput:tl_bind: allocb failed"));
2695 		/*
2696 		 * roll back state changes
2697 		 */
2698 		tl_addr_unbind(tep);
2699 		tep->te_state = TS_UNBND;
2700 		tl_memrecover(wq, mp, basize);
2701 		return;
2702 	}
2703 
2704 	DB_TYPE(bamp) = M_PCPROTO;
2705 	bamp->b_wptr = bamp->b_rptr + basize;
2706 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2707 	b_ack->PRIM_type = T_BIND_ACK;
2708 	b_ack->CONIND_number = qlen;
2709 	b_ack->ADDR_length = tep->te_alen;
2710 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2711 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2712 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2713 
2714 	if (IS_COTS(tep)) {
2715 		tep->te_qlen = qlen;
2716 		if (qlen > 0)
2717 			tep->te_flag |= TL_LISTENER;
2718 	}
2719 
2720 	tep->te_state = nextstate[TE_BIND_ACK][tep->te_state];
2721 	/*
2722 	 * send T_BIND_ACK message
2723 	 */
2724 	(void) qreply(wq, bamp);
2725 	return;
2726 
2727 error:
2728 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2729 	if (ackmp == NULL) {
2730 		/*
2731 		 * roll back state changes
2732 		 */
2733 		tep->te_state = save_state;
2734 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2735 		return;
2736 	}
2737 	tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2738 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2739 }
2740 
2741 /*
2742  * Process T_UNBIND_REQ.
2743  * Called from serializer.
2744  */
2745 static void
tl_unbind(mblk_t * mp,tl_endpt_t * tep)2746 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2747 {
2748 	queue_t *wq;
2749 	mblk_t *ackmp;
2750 
2751 	if (tep->te_closing) {
2752 		freemsg(mp);
2753 		return;
2754 	}
2755 
2756 	wq = tep->te_wq;
2757 
2758 	/*
2759 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2760 	 * ==> allocate for T_ERROR_ACK (known max)
2761 	 */
2762 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2763 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2764 		return;
2765 	}
2766 	/*
2767 	 * memory resources committed
2768 	 * Note: no message validation. T_UNBIND_REQ message is
2769 	 * same size as PRIM_type field so already verified earlier.
2770 	 */
2771 
2772 	/*
2773 	 * validate state
2774 	 */
2775 	if (tep->te_state != TS_IDLE) {
2776 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2777 		    SL_TRACE | SL_ERROR,
2778 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2779 		    tep->te_state));
2780 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2781 		return;
2782 	}
2783 	tep->te_state = nextstate[TE_UNBIND_REQ][tep->te_state];
2784 
2785 	/*
2786 	 * TPI says on T_UNBIND_REQ:
2787 	 *    send up a M_FLUSH to flush both
2788 	 *    read and write queues
2789 	 */
2790 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2791 
2792 	if (!IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2793 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2794 
2795 		/*
2796 		 * Sockets use bind with qlen==0 followed by bind() to
2797 		 * the same address with qlen > 0 for listeners.
2798 		 * We allow rebind with a new qlen value.
2799 		 */
2800 		tl_addr_unbind(tep);
2801 	}
2802 
2803 	tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
2804 	/*
2805 	 * send  T_OK_ACK
2806 	 */
2807 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2808 }
2809 
2810 
2811 /*
2812  * Option management code from drv/ip is used here
2813  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2814  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2815  *	However, that is what we want as that option is 'unorthodox'
2816  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2817  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2818  * Note2: use of optcom_req means this routine is an exception to
2819  *	 recovery from allocb() failures.
2820  */
2821 
2822 static void
tl_optmgmt(queue_t * wq,mblk_t * mp)2823 tl_optmgmt(queue_t *wq, mblk_t *mp)
2824 {
2825 	tl_endpt_t *tep;
2826 	mblk_t *ackmp;
2827 	union T_primitives *prim;
2828 	cred_t *cr;
2829 
2830 	tep = (tl_endpt_t *)wq->q_ptr;
2831 	prim = (union T_primitives *)mp->b_rptr;
2832 
2833 	/*
2834 	 * All Solaris components should pass a db_credp
2835 	 * for this TPI message, hence we ASSERT.
2836 	 * But in case there is some other M_PROTO that looks
2837 	 * like a TPI message sent by some other kernel
2838 	 * component, we check and return an error.
2839 	 */
2840 	cr = msg_getcred(mp, NULL);
2841 	ASSERT(cr != NULL);
2842 	if (cr == NULL) {
2843 		tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2844 		return;
2845 	}
2846 
2847 	/*  all states OK for AF_UNIX options ? */
2848 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2849 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2850 		/*
2851 		 * Broken TLI semantics that options can only be managed
2852 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2853 		 * tests this TLI (mis)feature using this device driver.
2854 		 */
2855 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2856 		    SL_TRACE | SL_ERROR,
2857 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2858 		    tep->te_state));
2859 		/*
2860 		 * preallocate memory for T_ERROR_ACK
2861 		 */
2862 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2863 		if (ackmp == NULL) {
2864 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2865 			return;
2866 		}
2867 
2868 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2869 		freemsg(mp);
2870 		return;
2871 	}
2872 
2873 	/*
2874 	 * call common option management routine from drv/ip
2875 	 */
2876 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2877 		svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2878 	} else {
2879 		ASSERT(prim->type == T_OPTMGMT_REQ);
2880 		tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2881 	}
2882 }
2883 
2884 /*
2885  * Handle T_conn_req - the driver part of accept().
2886  * If TL_SET[U]CRED generate the credentials options.
2887  * If this is a socket pass through options unmodified.
2888  * For sockets generate the T_CONN_CON here instead of
2889  * waiting for the T_CONN_RES.
2890  */
2891 static void
tl_conn_req(queue_t * wq,mblk_t * mp)2892 tl_conn_req(queue_t *wq, mblk_t *mp)
2893 {
2894 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2895 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2896 	ssize_t			msz = MBLKL(mp);
2897 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2898 	tl_endpt_t		*peer_tep = NULL;
2899 	mblk_t			*ackmp;
2900 	mblk_t			*dimp;
2901 	struct T_discon_ind	*di;
2902 	soux_addr_t		ux_addr;
2903 	tl_addr_t		dst;
2904 
2905 	ASSERT(IS_COTS(tep));
2906 
2907 	if (tep->te_closing) {
2908 		freemsg(mp);
2909 		return;
2910 	}
2911 
2912 	/*
2913 	 * preallocate memory for:
2914 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2915 	 *	==> known max T_ERROR_ACK
2916 	 * 2. max of T_DISCON_IND and T_CONN_IND
2917 	 */
2918 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2919 	if (ackmp == NULL) {
2920 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2921 		return;
2922 	}
2923 	/*
2924 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2925 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2926 	 */
2927 
2928 	if (tep->te_state != TS_IDLE) {
2929 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2930 		    SL_TRACE | SL_ERROR,
2931 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2932 		    tep->te_state));
2933 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2934 		freemsg(mp);
2935 		return;
2936 	}
2937 
2938 	/*
2939 	 * validate the message
2940 	 * Note: dereference fields in struct inside message only
2941 	 * after validating the message length.
2942 	 */
2943 	if (msz < sizeof (struct T_conn_req)) {
2944 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2945 		    "tl_conn_req:invalid message length"));
2946 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2947 		freemsg(mp);
2948 		return;
2949 	}
2950 	alen = creq->DEST_length;
2951 	aoff = creq->DEST_offset;
2952 	olen = creq->OPT_length;
2953 	ooff = creq->OPT_offset;
2954 	if (olen == 0)
2955 		ooff = 0;
2956 
2957 	if (IS_SOCKET(tep)) {
2958 		if ((alen != TL_SOUX_ADDRLEN) ||
2959 		    (aoff < 0) ||
2960 		    (aoff + alen > msz) ||
2961 		    (alen > msz - sizeof (struct T_conn_req))) {
2962 			(void) (STRLOG(TL_ID, tep->te_minor,
2963 				    1, SL_TRACE | SL_ERROR,
2964 				    "tl_conn_req: invalid socket addr"));
2965 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2966 			freemsg(mp);
2967 			return;
2968 		}
2969 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2970 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2971 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2972 			(void) (STRLOG(TL_ID, tep->te_minor,
2973 			    1, SL_TRACE | SL_ERROR,
2974 			    "tl_conn_req: invalid socket magic"));
2975 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2976 			freemsg(mp);
2977 			return;
2978 		}
2979 	} else {
2980 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2981 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2982 		    ooff + olen < 0)) ||
2983 		    olen < 0 || ooff < 0) {
2984 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2985 			    SL_TRACE | SL_ERROR,
2986 			    "tl_conn_req:invalid message"));
2987 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2988 			freemsg(mp);
2989 			return;
2990 		}
2991 
2992 		if (alen <= 0 || aoff < 0 ||
2993 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2994 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2995 				    SL_TRACE | SL_ERROR,
2996 				    "tl_conn_req:bad addr in message, "
2997 				    "alen=%d, msz=%ld",
2998 				    alen, msz));
2999 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
3000 			freemsg(mp);
3001 			return;
3002 		}
3003 #ifdef DEBUG
3004 		/*
3005 		 * Mild form of ASSERT()ion to detect broken TPI apps.
3006 		 * if (!assertion)
3007 		 *	log warning;
3008 		 */
3009 		if (!(aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
3010 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3011 			    SL_TRACE | SL_ERROR,
3012 			    "tl_conn_req: addr overlaps TPI message"));
3013 		}
3014 #endif
3015 		if (olen) {
3016 			/*
3017 			 * no opts in connect req
3018 			 * supported in this provider except for sockets.
3019 			 */
3020 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
3021 			    SL_TRACE | SL_ERROR,
3022 			    "tl_conn_req:options not supported "
3023 			    "in message"));
3024 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
3025 			freemsg(mp);
3026 			return;
3027 		}
3028 	}
3029 
3030 	/*
3031 	 * Prevent tep from closing on us.
3032 	 */
3033 	if (!tl_noclose(tep)) {
3034 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3035 		    "tl_conn_req:endpoint is closing"));
3036 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
3037 		freemsg(mp);
3038 		return;
3039 	}
3040 
3041 	tep->te_state = nextstate[TE_CONN_REQ][tep->te_state];
3042 	/*
3043 	 * get endpoint to connect to
3044 	 * check that peer with DEST addr is bound to addr
3045 	 * and has CONIND_number > 0
3046 	 */
3047 	dst.ta_alen = alen;
3048 	dst.ta_abuf = mp->b_rptr + aoff;
3049 	dst.ta_zoneid = tep->te_zoneid;
3050 
3051 	/*
3052 	 * Verify if remote addr is in use
3053 	 */
3054 	peer_tep = (IS_SOCKET(tep) ?
3055 	    tl_sock_find_peer(tep, &ux_addr) :
3056 	    tl_find_peer(tep, &dst));
3057 
3058 	if (peer_tep == NULL) {
3059 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3060 		    "tl_conn_req:no one at connect address"));
3061 		err = ECONNREFUSED;
3062 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
3063 		/*
3064 		 * validate that number of incoming connection is
3065 		 * not to capacity on destination endpoint
3066 		 */
3067 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3068 		    "tl_conn_req: qlen overflow connection refused"));
3069 		err = ECONNREFUSED;
3070 	}
3071 
3072 	/*
3073 	 * Send T_DISCON_IND in case of error
3074 	 */
3075 	if (err != 0) {
3076 		if (peer_tep != NULL)
3077 			tl_refrele(peer_tep);
3078 		/* We are still expected to send T_OK_ACK */
3079 		tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
3080 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
3081 		tl_closeok(tep);
3082 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
3083 		    M_PROTO, T_DISCON_IND);
3084 		if (dimp == NULL) {
3085 			tl_merror(wq, NULL, ENOSR);
3086 			return;
3087 		}
3088 		di = (struct T_discon_ind *)dimp->b_rptr;
3089 		di->DISCON_reason = err;
3090 		di->SEQ_number = BADSEQNUM;
3091 
3092 		tep->te_state = TS_IDLE;
3093 		/*
3094 		 * send T_DISCON_IND message
3095 		 */
3096 		putnext(tep->te_rq, dimp);
3097 		return;
3098 	}
3099 
3100 	ASSERT(IS_COTS(peer_tep));
3101 
3102 	/*
3103 	 * Found the listener. At this point processing will continue on
3104 	 * listener serializer. Close of the endpoint should be blocked while we
3105 	 * switch serializers.
3106 	 */
3107 	tl_serializer_refhold(peer_tep->te_ser);
3108 	tl_serializer_refrele(tep->te_ser);
3109 	tep->te_ser = peer_tep->te_ser;
3110 	ASSERT(tep->te_oconp == NULL);
3111 	tep->te_oconp = peer_tep;
3112 
3113 	/*
3114 	 * It is safe to close now. Close may continue on listener serializer.
3115 	 */
3116 	tl_closeok(tep);
3117 
3118 	/*
3119 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3120 	 * data, so we link mp to ackmp.
3121 	 */
3122 	ackmp->b_cont = mp;
3123 	mp = ackmp;
3124 
3125 	tl_refhold(tep);
3126 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3127 }
3128 
3129 /*
3130  * Finish T_CONN_REQ processing on listener serializer.
3131  */
3132 static void
tl_conn_req_ser(mblk_t * mp,tl_endpt_t * tep)3133 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3134 {
3135 	queue_t		*wq;
3136 	tl_endpt_t	*peer_tep = tep->te_oconp;
3137 	mblk_t		*confmp, *cimp, *indmp;
3138 	void		*opts = NULL;
3139 	mblk_t		*ackmp = mp;
3140 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3141 	struct T_conn_ind	*ci;
3142 	tl_icon_t	*tip;
3143 	void		*addr_startp;
3144 	t_scalar_t	olen = creq->OPT_length;
3145 	t_scalar_t	ooff = creq->OPT_offset;
3146 	size_t		ci_msz;
3147 	size_t		size;
3148 	cred_t		*cr = NULL;
3149 	pid_t		cpid;
3150 
3151 	if (tep->te_closing) {
3152 		TL_UNCONNECT(tep->te_oconp);
3153 		tl_serializer_exit(tep);
3154 		tl_refrele(tep);
3155 		freemsg(mp);
3156 		return;
3157 	}
3158 
3159 	wq = tep->te_wq;
3160 	tep->te_flag |= TL_EAGER;
3161 
3162 	/*
3163 	 * Extract preallocated ackmp from mp.
3164 	 */
3165 	mp = mp->b_cont;
3166 	ackmp->b_cont = NULL;
3167 
3168 	if (olen == 0)
3169 		ooff = 0;
3170 
3171 	if (peer_tep->te_closing ||
3172 	    !((peer_tep->te_state == TS_IDLE) ||
3173 	    (peer_tep->te_state == TS_WRES_CIND))) {
3174 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3175 		    "tl_conn_req:peer in bad state (%d)",
3176 		    peer_tep->te_state));
3177 		TL_UNCONNECT(tep->te_oconp);
3178 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3179 		freemsg(ackmp);
3180 		tl_serializer_exit(tep);
3181 		tl_refrele(tep);
3182 		return;
3183 	}
3184 
3185 	/*
3186 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3187 	 */
3188 	/*
3189 	 * calculate length of T_CONN_IND message
3190 	 */
3191 	if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3192 		cr = msg_getcred(mp, &cpid);
3193 		ASSERT(cr != NULL);
3194 		if (peer_tep->te_flag & TL_SETCRED) {
3195 			ooff = 0;
3196 			olen = (t_scalar_t) sizeof (struct opthdr) +
3197 			    OPTLEN(sizeof (tl_credopt_t));
3198 			/* 1 option only */
3199 		} else {
3200 			ooff = 0;
3201 			olen = (t_scalar_t)sizeof (struct opthdr) +
3202 			    OPTLEN(ucredminsize(cr));
3203 			/* 1 option only */
3204 		}
3205 	}
3206 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3207 	ci_msz = T_ALIGN(ci_msz) + olen;
3208 	size = max(ci_msz, sizeof (struct T_discon_ind));
3209 
3210 	/*
3211 	 * Save options from mp - we'll need them for T_CONN_IND.
3212 	 */
3213 	if (ooff != 0) {
3214 		opts = kmem_alloc(olen, KM_NOSLEEP);
3215 		if (opts == NULL) {
3216 			/*
3217 			 * roll back state changes
3218 			 */
3219 			tep->te_state = TS_IDLE;
3220 			tl_memrecover(wq, mp, size);
3221 			freemsg(ackmp);
3222 			TL_UNCONNECT(tep->te_oconp);
3223 			tl_serializer_exit(tep);
3224 			tl_refrele(tep);
3225 			return;
3226 		}
3227 		/* Copy options to a temp buffer */
3228 		bcopy(mp->b_rptr + ooff, opts, olen);
3229 	}
3230 
3231 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3232 		/*
3233 		 * Generate a T_CONN_CON that has the identical address
3234 		 * (and options) as the T_CONN_REQ.
3235 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3236 		 * are isomorphic.
3237 		 */
3238 		confmp = copyb(mp);
3239 		if (confmp == NULL) {
3240 			/*
3241 			 * roll back state changes
3242 			 */
3243 			tep->te_state = TS_IDLE;
3244 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3245 			freemsg(ackmp);
3246 			if (opts != NULL)
3247 				kmem_free(opts, olen);
3248 			TL_UNCONNECT(tep->te_oconp);
3249 			tl_serializer_exit(tep);
3250 			tl_refrele(tep);
3251 			return;
3252 		}
3253 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3254 		    T_CONN_CON;
3255 	} else {
3256 		confmp = NULL;
3257 	}
3258 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3259 		/*
3260 		 * roll back state changes
3261 		 */
3262 		tep->te_state = TS_IDLE;
3263 		tl_memrecover(wq, mp, size);
3264 		freemsg(ackmp);
3265 		if (opts != NULL)
3266 			kmem_free(opts, olen);
3267 		freemsg(confmp);
3268 		TL_UNCONNECT(tep->te_oconp);
3269 		tl_serializer_exit(tep);
3270 		tl_refrele(tep);
3271 		return;
3272 	}
3273 
3274 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3275 	if (tip == NULL) {
3276 		/*
3277 		 * roll back state changes
3278 		 */
3279 		tep->te_state = TS_IDLE;
3280 		tl_memrecover(wq, indmp, sizeof (*tip));
3281 		freemsg(ackmp);
3282 		if (opts != NULL)
3283 			kmem_free(opts, olen);
3284 		freemsg(confmp);
3285 		TL_UNCONNECT(tep->te_oconp);
3286 		tl_serializer_exit(tep);
3287 		tl_refrele(tep);
3288 		return;
3289 	}
3290 	tip->ti_mp = NULL;
3291 
3292 	/*
3293 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3294 	 * and tl_icon_t cell.
3295 	 */
3296 
3297 	/*
3298 	 * ack validity of request and send the peer credential in the ACK.
3299 	 */
3300 	tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
3301 
3302 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3303 	    confmp != NULL) {
3304 		mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3305 	}
3306 
3307 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3308 
3309 	/*
3310 	 * prepare message to send T_CONN_IND
3311 	 */
3312 	/*
3313 	 * allocate the message - original data blocks retained
3314 	 * in the returned mblk
3315 	 */
3316 	cimp = tl_resizemp(indmp, size);
3317 	if (cimp == NULL) {
3318 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3319 		    "tl_conn_req:con_ind:allocb failure"));
3320 		tl_merror(wq, indmp, ENOMEM);
3321 		TL_UNCONNECT(tep->te_oconp);
3322 		tl_serializer_exit(tep);
3323 		tl_refrele(tep);
3324 		if (opts != NULL)
3325 			kmem_free(opts, olen);
3326 		freemsg(confmp);
3327 		ASSERT(tip->ti_mp == NULL);
3328 		kmem_free(tip, sizeof (*tip));
3329 		return;
3330 	}
3331 
3332 	DB_TYPE(cimp) = M_PROTO;
3333 	ci = (struct T_conn_ind *)cimp->b_rptr;
3334 	ci->PRIM_type  = T_CONN_IND;
3335 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3336 	ci->SRC_length = tep->te_alen;
3337 	ci->SEQ_number = tep->te_seqno;
3338 
3339 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3340 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3341 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3342 
3343 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3344 		    ci->SRC_length);
3345 		ci->OPT_length = olen; /* because only 1 option */
3346 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3347 		    cr, cpid,
3348 		    peer_tep->te_flag, peer_tep->te_credp);
3349 	} else if (ooff != 0) {
3350 		/* Copy option from T_CONN_REQ */
3351 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3352 		    ci->SRC_length);
3353 		ci->OPT_length = olen;
3354 		ASSERT(opts != NULL);
3355 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3356 	} else {
3357 		ci->OPT_offset = 0;
3358 		ci->OPT_length = 0;
3359 	}
3360 	if (opts != NULL)
3361 		kmem_free(opts, olen);
3362 
3363 	/*
3364 	 * register connection request with server peer
3365 	 * append to list of incoming connections
3366 	 * increment references for both peer_tep and tep: peer_tep is placed on
3367 	 * te_oconp and tep is placed on listeners queue.
3368 	 */
3369 	tip->ti_tep = tep;
3370 	tip->ti_seqno = tep->te_seqno;
3371 	list_insert_tail(&peer_tep->te_iconp, tip);
3372 	peer_tep->te_nicon++;
3373 
3374 	peer_tep->te_state = nextstate[TE_CONN_IND][peer_tep->te_state];
3375 	/*
3376 	 * send the T_CONN_IND message
3377 	 */
3378 	putnext(peer_tep->te_rq, cimp);
3379 
3380 	/*
3381 	 * Send a T_CONN_CON message for sockets.
3382 	 * Disable the queues until we have reached the correct state!
3383 	 */
3384 	if (confmp != NULL) {
3385 		tep->te_state = nextstate[TE_CONN_CON][tep->te_state];
3386 		noenable(wq);
3387 		putnext(tep->te_rq, confmp);
3388 	}
3389 	/*
3390 	 * Now we need to increment tep reference because tep is referenced by
3391 	 * server list of pending connections. We also need to decrement
3392 	 * reference before exiting serializer. Two operations void each other
3393 	 * so we don't modify reference at all.
3394 	 */
3395 	ASSERT(tep->te_refcnt >= 2);
3396 	ASSERT(peer_tep->te_refcnt >= 2);
3397 	tl_serializer_exit(tep);
3398 }
3399 
3400 
3401 
3402 /*
3403  * Handle T_conn_res on listener stream. Called on listener serializer.
3404  * tl_conn_req has already generated the T_CONN_CON.
3405  * tl_conn_res is called on listener serializer.
3406  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3407  * Switch eager serializer to acceptor's.
3408  *
3409  * If TL_SET[U]CRED generate the credentials options.
3410  * For sockets tl_conn_req has already generated the T_CONN_CON.
3411  */
3412 static void
tl_conn_res(mblk_t * mp,tl_endpt_t * tep)3413 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3414 {
3415 	queue_t			*wq;
3416 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3417 	ssize_t			msz = MBLKL(mp);
3418 	t_scalar_t		olen, ooff, err = 0;
3419 	t_scalar_t		prim = cres->PRIM_type;
3420 	uchar_t			*addr_startp;
3421 	tl_endpt_t		*acc_ep = NULL, *cl_ep = NULL;
3422 	tl_icon_t		*tip;
3423 	size_t			size;
3424 	mblk_t			*ackmp, *respmp;
3425 	mblk_t			*dimp, *ccmp = NULL;
3426 	struct T_discon_ind	*di;
3427 	struct T_conn_con	*cc;
3428 	boolean_t		client_noclose_set = B_FALSE;
3429 	boolean_t		switch_client_serializer = B_TRUE;
3430 
3431 	ASSERT(IS_COTS(tep));
3432 
3433 	if (tep->te_closing) {
3434 		freemsg(mp);
3435 		return;
3436 	}
3437 
3438 	wq = tep->te_wq;
3439 
3440 	/*
3441 	 * preallocate memory for:
3442 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3443 	 *	==> known max T_ERROR_ACK
3444 	 * 2. max of T_DISCON_IND and T_CONN_CON
3445 	 */
3446 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3447 	if (ackmp == NULL) {
3448 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3449 		return;
3450 	}
3451 	/*
3452 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3453 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3454 	 */
3455 
3456 
3457 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3458 
3459 	/*
3460 	 * validate state
3461 	 */
3462 	if (tep->te_state != TS_WRES_CIND) {
3463 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3464 		    SL_TRACE | SL_ERROR,
3465 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3466 		    tep->te_state));
3467 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3468 		freemsg(mp);
3469 		return;
3470 	}
3471 
3472 	/*
3473 	 * validate the message
3474 	 * Note: dereference fields in struct inside message only
3475 	 * after validating the message length.
3476 	 */
3477 	if (msz < sizeof (struct T_conn_res)) {
3478 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3479 		    "tl_conn_res:invalid message length"));
3480 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3481 		freemsg(mp);
3482 		return;
3483 	}
3484 	olen = cres->OPT_length;
3485 	ooff = cres->OPT_offset;
3486 	if (((olen > 0) && ((ooff + olen) > msz))) {
3487 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3488 		    "tl_conn_res:invalid message"));
3489 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3490 		freemsg(mp);
3491 		return;
3492 	}
3493 	if (olen) {
3494 		/*
3495 		 * no opts in connect res
3496 		 * supported in this provider
3497 		 */
3498 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3499 		    "tl_conn_res:options not supported in message"));
3500 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3501 		freemsg(mp);
3502 		return;
3503 	}
3504 
3505 	tep->te_state = nextstate[TE_CONN_RES][tep->te_state];
3506 	ASSERT(tep->te_state == TS_WACK_CRES);
3507 
3508 	if (cres->SEQ_number < TL_MINOR_START &&
3509 	    cres->SEQ_number >= BADSEQNUM) {
3510 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3511 		    "tl_conn_res:remote endpoint sequence number bad"));
3512 		tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3513 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3514 		freemsg(mp);
3515 		return;
3516 	}
3517 
3518 	/*
3519 	 * find accepting endpoint. Will have extra reference if found.
3520 	 */
3521 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3522 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3523 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3524 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3525 		    "tl_conn_res:bad accepting endpoint"));
3526 		tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3527 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3528 		freemsg(mp);
3529 		return;
3530 	}
3531 
3532 	/*
3533 	 * Prevent acceptor from closing.
3534 	 */
3535 	if (!tl_noclose(acc_ep)) {
3536 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3537 		    "tl_conn_res:bad accepting endpoint"));
3538 		tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3539 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3540 		tl_refrele(acc_ep);
3541 		freemsg(mp);
3542 		return;
3543 	}
3544 
3545 	acc_ep->te_flag |= TL_ACCEPTOR;
3546 
3547 	/*
3548 	 * validate that accepting endpoint, if different from listening
3549 	 * has address bound => state is TS_IDLE
3550 	 * TROUBLE in XPG4 !!?
3551 	 */
3552 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3553 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3554 		    "tl_conn_res:accepting endpoint has no address bound,"
3555 		    "state=%d", acc_ep->te_state));
3556 		tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3557 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3558 		freemsg(mp);
3559 		tl_closeok(acc_ep);
3560 		tl_refrele(acc_ep);
3561 		return;
3562 	}
3563 
3564 	/*
3565 	 * validate if accepting endpt same as listening, then
3566 	 * no other incoming connection should be on the queue
3567 	 */
3568 
3569 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3570 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3571 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3572 		tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3573 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3574 		freemsg(mp);
3575 		tl_closeok(acc_ep);
3576 		tl_refrele(acc_ep);
3577 		return;
3578 	}
3579 
3580 	/*
3581 	 * Mark for deletion, the entry corresponding to client
3582 	 * on list of pending connections made by the listener
3583 	 *  search list to see if client is one of the
3584 	 * recorded as a listener.
3585 	 */
3586 	tip = tl_icon_find(tep, cres->SEQ_number);
3587 	if (tip == NULL) {
3588 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3589 		    "tl_conn_res:no client in listener list"));
3590 		tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3591 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3592 		freemsg(mp);
3593 		tl_closeok(acc_ep);
3594 		tl_refrele(acc_ep);
3595 		return;
3596 	}
3597 
3598 	/*
3599 	 * If ti_tep is NULL the client has already closed. In this case
3600 	 * the code below will avoid any action on the client side
3601 	 * but complete the server and acceptor state transitions.
3602 	 */
3603 	ASSERT(tip->ti_tep == NULL ||
3604 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3605 	cl_ep = tip->ti_tep;
3606 
3607 	/*
3608 	 * If the client is present it is switched from listener's to acceptor's
3609 	 * serializer. We should block client closes while serializers are
3610 	 * being switched.
3611 	 *
3612 	 * It is possible that the client is present but is currently being
3613 	 * closed. There are two possible cases:
3614 	 *
3615 	 * 1) The client has already entered tl_close_finish_ser() and sent
3616 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3617 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3618 	 *
3619 	 * 2) The client started the close but has not entered
3620 	 *    tl_close_finish_ser() yet. In this case, the client is already
3621 	 *    proceeding asynchronously on the listener's serializer, so we're
3622 	 *    forced to change the acceptor to use the listener's serializer to
3623 	 *    ensure that any operations on the acceptor are serialized with
3624 	 *    respect to the close that's in-progress.
3625 	 */
3626 	if (cl_ep != NULL) {
3627 		if (tl_noclose(cl_ep)) {
3628 			client_noclose_set = B_TRUE;
3629 		} else {
3630 			/*
3631 			 * Client is closing. If it it has sent the
3632 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3633 			 * we have to let let the client continue until it is
3634 			 * sent.
3635 			 *
3636 			 * If we do continue using the client, acceptor will
3637 			 * switch to client's serializer which is used by client
3638 			 * for its close.
3639 			 */
3640 			tl_client_closing_when_accepting++;
3641 			switch_client_serializer = B_FALSE;
3642 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3643 			    cl_ep->te_state == -1)
3644 				cl_ep = NULL;
3645 		}
3646 	}
3647 
3648 	if (cl_ep != NULL) {
3649 		/*
3650 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3651 		 * (latter for sockets only)
3652 		 */
3653 		if (cl_ep->te_state != TS_WCON_CREQ &&
3654 		    (cl_ep->te_state != TS_DATA_XFER &&
3655 		    IS_SOCKET(cl_ep))) {
3656 			err = ECONNREFUSED;
3657 			/*
3658 			 * T_DISCON_IND sent later after committing memory
3659 			 * and acking validity of request
3660 			 */
3661 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3662 			    "tl_conn_res:peer in bad state"));
3663 		}
3664 
3665 		/*
3666 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3667 		 * ack validity of request (T_OK_ACK) after memory committed
3668 		 */
3669 
3670 		if (err) {
3671 			size = sizeof (struct T_discon_ind);
3672 		} else {
3673 			/*
3674 			 * calculate length of T_CONN_CON message
3675 			 */
3676 			olen = 0;
3677 			if (cl_ep->te_flag & TL_SETCRED) {
3678 				olen = (t_scalar_t)sizeof (struct opthdr) +
3679 				    OPTLEN(sizeof (tl_credopt_t));
3680 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3681 				olen = (t_scalar_t)sizeof (struct opthdr) +
3682 				    OPTLEN(ucredminsize(acc_ep->te_credp));
3683 			}
3684 			size = T_ALIGN(sizeof (struct T_conn_con) +
3685 			    acc_ep->te_alen) + olen;
3686 		}
3687 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3688 			/*
3689 			 * roll back state changes
3690 			 */
3691 			tep->te_state = TS_WRES_CIND;
3692 			tl_memrecover(wq, mp, size);
3693 			freemsg(ackmp);
3694 			if (client_noclose_set)
3695 				tl_closeok(cl_ep);
3696 			tl_closeok(acc_ep);
3697 			tl_refrele(acc_ep);
3698 			return;
3699 		}
3700 		mp = NULL;
3701 	}
3702 
3703 	/*
3704 	 * Now ack validity of request
3705 	 */
3706 	if (tep->te_nicon == 1) {
3707 		if (tep == acc_ep)
3708 			tep->te_state = nextstate[TE_OK_ACK2][tep->te_state];
3709 		else
3710 			tep->te_state = nextstate[TE_OK_ACK3][tep->te_state];
3711 	} else {
3712 		tep->te_state = nextstate[TE_OK_ACK4][tep->te_state];
3713 	}
3714 
3715 	/*
3716 	 * send T_DISCON_IND now if client state validation failed earlier
3717 	 */
3718 	if (err) {
3719 		tl_ok_ack(wq, ackmp, prim);
3720 		/*
3721 		 * flush the queues - why always ?
3722 		 */
3723 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3724 
3725 		dimp = tl_resizemp(respmp, size);
3726 		if (dimp == NULL) {
3727 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3728 			    SL_TRACE | SL_ERROR,
3729 			    "tl_conn_res:con_ind:allocb failure"));
3730 			tl_merror(wq, respmp, ENOMEM);
3731 			tl_closeok(acc_ep);
3732 			if (client_noclose_set)
3733 				tl_closeok(cl_ep);
3734 			tl_refrele(acc_ep);
3735 			return;
3736 		}
3737 		if (dimp->b_cont) {
3738 			/* no user data in provider generated discon ind */
3739 			freemsg(dimp->b_cont);
3740 			dimp->b_cont = NULL;
3741 		}
3742 
3743 		DB_TYPE(dimp) = M_PROTO;
3744 		di = (struct T_discon_ind *)dimp->b_rptr;
3745 		di->PRIM_type  = T_DISCON_IND;
3746 		di->DISCON_reason = err;
3747 		di->SEQ_number = BADSEQNUM;
3748 
3749 		tep->te_state = TS_IDLE;
3750 		/*
3751 		 * send T_DISCON_IND message
3752 		 */
3753 		putnext(acc_ep->te_rq, dimp);
3754 		if (client_noclose_set)
3755 			tl_closeok(cl_ep);
3756 		tl_closeok(acc_ep);
3757 		tl_refrele(acc_ep);
3758 		return;
3759 	}
3760 
3761 	/*
3762 	 * now start connecting the accepting endpoint
3763 	 */
3764 	if (tep != acc_ep)
3765 		acc_ep->te_state = nextstate[TE_PASS_CONN][acc_ep->te_state];
3766 
3767 	if (cl_ep == NULL) {
3768 		/*
3769 		 * The client has already closed. Send up any queued messages
3770 		 * and change the state accordingly.
3771 		 */
3772 		tl_ok_ack(wq, ackmp, prim);
3773 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3774 
3775 		/*
3776 		 * remove endpoint from incoming connection
3777 		 * delete client from list of incoming connections
3778 		 */
3779 		tl_freetip(tep, tip);
3780 		freemsg(mp);
3781 		tl_closeok(acc_ep);
3782 		tl_refrele(acc_ep);
3783 		return;
3784 	} else if (tip->ti_mp != NULL) {
3785 		/*
3786 		 * The client could have queued a T_DISCON_IND which needs
3787 		 * to be sent up.
3788 		 * Note that t_discon_req can not operate the same as
3789 		 * t_data_req since it is not possible for it to putbq
3790 		 * the message and return -1 due to the use of qwriter.
3791 		 */
3792 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3793 	}
3794 
3795 	/*
3796 	 * prepare connect confirm T_CONN_CON message
3797 	 */
3798 
3799 	/*
3800 	 * allocate the message - original data blocks
3801 	 * retained in the returned mblk
3802 	 */
3803 	if (!IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3804 		ccmp = tl_resizemp(respmp, size);
3805 		if (ccmp == NULL) {
3806 			tl_ok_ack(wq, ackmp, prim);
3807 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3808 			    SL_TRACE | SL_ERROR,
3809 			    "tl_conn_res:conn_con:allocb failure"));
3810 			tl_merror(wq, respmp, ENOMEM);
3811 			tl_closeok(acc_ep);
3812 			if (client_noclose_set)
3813 				tl_closeok(cl_ep);
3814 			tl_refrele(acc_ep);
3815 			return;
3816 		}
3817 
3818 		DB_TYPE(ccmp) = M_PROTO;
3819 		cc = (struct T_conn_con *)ccmp->b_rptr;
3820 		cc->PRIM_type  = T_CONN_CON;
3821 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3822 		cc->RES_length = acc_ep->te_alen;
3823 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3824 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3825 		if (cl_ep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3826 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3827 			    cc->RES_length);
3828 			cc->OPT_length = olen;
3829 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3830 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3831 			    cl_ep->te_credp);
3832 		} else {
3833 			cc->OPT_offset = 0;
3834 			cc->OPT_length = 0;
3835 		}
3836 		/*
3837 		 * Forward the credential in the packet so it can be picked up
3838 		 * at the higher layers for more complete credential processing
3839 		 */
3840 		mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3841 	} else {
3842 		freemsg(respmp);
3843 		respmp = NULL;
3844 	}
3845 
3846 	/*
3847 	 * make connection linking
3848 	 * accepting and client endpoints
3849 	 * No need to increment references:
3850 	 *	on client: it should already have one from tip->ti_tep linkage.
3851 	 *	on acceptor is should already have one from the table lookup.
3852 	 *
3853 	 * At this point both client and acceptor can't close. Set client
3854 	 * serializer to acceptor's.
3855 	 */
3856 	ASSERT(cl_ep->te_refcnt >= 2);
3857 	ASSERT(acc_ep->te_refcnt >= 2);
3858 	ASSERT(cl_ep->te_conp == NULL);
3859 	ASSERT(acc_ep->te_conp == NULL);
3860 	cl_ep->te_conp = acc_ep;
3861 	acc_ep->te_conp = cl_ep;
3862 	ASSERT(cl_ep->te_ser == tep->te_ser);
3863 	if (switch_client_serializer) {
3864 		mutex_enter(&cl_ep->te_ser_lock);
3865 		if (cl_ep->te_ser_count > 0) {
3866 			switch_client_serializer = B_FALSE;
3867 			tl_serializer_noswitch++;
3868 		} else {
3869 			/*
3870 			 * Move client to the acceptor's serializer.
3871 			 */
3872 			tl_serializer_refhold(acc_ep->te_ser);
3873 			tl_serializer_refrele(cl_ep->te_ser);
3874 			cl_ep->te_ser = acc_ep->te_ser;
3875 		}
3876 		mutex_exit(&cl_ep->te_ser_lock);
3877 	}
3878 	if (!switch_client_serializer) {
3879 		/*
3880 		 * It is not possible to switch client to use acceptor's.
3881 		 * Move acceptor to client's serializer (which is the same as
3882 		 * listener's).
3883 		 */
3884 		tl_serializer_refhold(cl_ep->te_ser);
3885 		tl_serializer_refrele(acc_ep->te_ser);
3886 		acc_ep->te_ser = cl_ep->te_ser;
3887 	}
3888 
3889 	TL_REMOVE_PEER(cl_ep->te_oconp);
3890 	TL_REMOVE_PEER(acc_ep->te_oconp);
3891 
3892 	/*
3893 	 * remove endpoint from incoming connection
3894 	 * delete client from list of incoming connections
3895 	 */
3896 	tip->ti_tep = NULL;
3897 	tl_freetip(tep, tip);
3898 	tl_ok_ack(wq, ackmp, prim);
3899 
3900 	/*
3901 	 * data blocks already linked in reallocb()
3902 	 */
3903 
3904 	/*
3905 	 * link queues so that I_SENDFD will work
3906 	 */
3907 	if (!IS_SOCKET(tep)) {
3908 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3909 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3910 	}
3911 
3912 	/*
3913 	 * send T_CONN_CON up on client side unless it was already
3914 	 * done (for a socket). In cases any data or ordrel req has been
3915 	 * queued make sure that the service procedure runs.
3916 	 */
3917 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3918 		enableok(cl_ep->te_wq);
3919 		TL_QENABLE(cl_ep);
3920 		if (ccmp != NULL)
3921 			freemsg(ccmp);
3922 	} else {
3923 		/*
3924 		 * change client state on TE_CONN_CON event
3925 		 */
3926 		cl_ep->te_state = nextstate[TE_CONN_CON][cl_ep->te_state];
3927 		putnext(cl_ep->te_rq, ccmp);
3928 	}
3929 
3930 	/* Mark the both endpoints as accepted */
3931 	cl_ep->te_flag |= TL_ACCEPTED;
3932 	acc_ep->te_flag |= TL_ACCEPTED;
3933 
3934 	/*
3935 	 * Allow client and acceptor to close.
3936 	 */
3937 	tl_closeok(acc_ep);
3938 	if (client_noclose_set)
3939 		tl_closeok(cl_ep);
3940 }
3941 
3942 
3943 
3944 
3945 static void
tl_discon_req(mblk_t * mp,tl_endpt_t * tep)3946 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3947 {
3948 	queue_t			*wq;
3949 	struct T_discon_req	*dr;
3950 	ssize_t			msz;
3951 	tl_endpt_t		*peer_tep = tep->te_conp;
3952 	tl_endpt_t		*srv_tep = tep->te_oconp;
3953 	tl_icon_t		*tip;
3954 	size_t			size;
3955 	mblk_t			*ackmp, *dimp, *respmp;
3956 	struct T_discon_ind	*di;
3957 	t_scalar_t		save_state, new_state;
3958 
3959 	if (tep->te_closing) {
3960 		freemsg(mp);
3961 		return;
3962 	}
3963 
3964 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3965 		TL_UNCONNECT(tep->te_conp);
3966 		peer_tep = NULL;
3967 	}
3968 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3969 		TL_UNCONNECT(tep->te_oconp);
3970 		srv_tep = NULL;
3971 	}
3972 
3973 	wq = tep->te_wq;
3974 
3975 	/*
3976 	 * preallocate memory for:
3977 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3978 	 *	==> known max T_ERROR_ACK
3979 	 * 2. for  T_DISCON_IND
3980 	 */
3981 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3982 	if (ackmp == NULL) {
3983 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3984 		return;
3985 	}
3986 	/*
3987 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3988 	 * will be committed for T_DISCON_IND  later
3989 	 */
3990 
3991 	dr = (struct T_discon_req *)mp->b_rptr;
3992 	msz = MBLKL(mp);
3993 
3994 	/*
3995 	 * validate the state
3996 	 */
3997 	save_state = new_state = tep->te_state;
3998 	if (!(save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3999 	    !(save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
4000 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4001 		    SL_TRACE | SL_ERROR,
4002 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
4003 		    tep->te_state));
4004 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
4005 		freemsg(mp);
4006 		return;
4007 	}
4008 	/*
4009 	 * Defer committing the state change until it is determined if
4010 	 * the message will be queued with the tl_icon or not.
4011 	 */
4012 	new_state  = nextstate[TE_DISCON_REQ][tep->te_state];
4013 
4014 	/* validate the message */
4015 	if (msz < sizeof (struct T_discon_req)) {
4016 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4017 		    "tl_discon_req:invalid message"));
4018 		tep->te_state = nextstate[TE_ERROR_ACK][new_state];
4019 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
4020 		freemsg(mp);
4021 		return;
4022 	}
4023 
4024 	/*
4025 	 * if server, then validate that client exists
4026 	 * by connection sequence number etc.
4027 	 */
4028 	if (tep->te_nicon > 0) { /* server */
4029 
4030 		/*
4031 		 * search server list for disconnect client
4032 		 */
4033 		tip = tl_icon_find(tep, dr->SEQ_number);
4034 		if (tip == NULL) {
4035 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4036 			    SL_TRACE | SL_ERROR,
4037 			    "tl_discon_req:no disconnect endpoint"));
4038 			tep->te_state = nextstate[TE_ERROR_ACK][new_state];
4039 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
4040 			freemsg(mp);
4041 			return;
4042 		}
4043 		/*
4044 		 * If ti_tep is NULL the client has already closed. In this case
4045 		 * the code below will avoid any action on the client side.
4046 		 */
4047 
4048 		IMPLY(tip->ti_tep != NULL,
4049 		    tip->ti_tep->te_seqno == dr->SEQ_number);
4050 		peer_tep = tip->ti_tep;
4051 	}
4052 
4053 	/*
4054 	 * preallocate now for T_DISCON_IND
4055 	 * ack validity of request (T_OK_ACK) after memory committed
4056 	 */
4057 	size = sizeof (struct T_discon_ind);
4058 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
4059 		tl_memrecover(wq, mp, size);
4060 		freemsg(ackmp);
4061 		return;
4062 	}
4063 
4064 	/*
4065 	 * prepare message to ack validity of request
4066 	 */
4067 	if (tep->te_nicon == 0) {
4068 		new_state = nextstate[TE_OK_ACK1][new_state];
4069 	} else {
4070 		if (tep->te_nicon == 1)
4071 			new_state = nextstate[TE_OK_ACK2][new_state];
4072 		else
4073 			new_state = nextstate[TE_OK_ACK4][new_state];
4074 	}
4075 
4076 	/*
4077 	 * Flushing queues according to TPI. Using the old state.
4078 	 */
4079 	if ((tep->te_nicon <= 1) &&
4080 	    ((save_state == TS_DATA_XFER) ||
4081 	    (save_state == TS_WIND_ORDREL) ||
4082 	    (save_state == TS_WREQ_ORDREL)))
4083 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
4084 
4085 	/* send T_OK_ACK up  */
4086 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
4087 
4088 	/*
4089 	 * now do disconnect business
4090 	 */
4091 	if (tep->te_nicon > 0) { /* listener */
4092 		if (peer_tep != NULL && !peer_tep->te_closing) {
4093 			/*
4094 			 * disconnect incoming connect request pending to tep
4095 			 */
4096 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4097 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
4098 				    SL_TRACE | SL_ERROR,
4099 				    "tl_discon_req: reallocb failed"));
4100 				tep->te_state = new_state;
4101 				tl_merror(wq, respmp, ENOMEM);
4102 				return;
4103 			}
4104 			di = (struct T_discon_ind *)dimp->b_rptr;
4105 			di->SEQ_number = BADSEQNUM;
4106 			save_state = peer_tep->te_state;
4107 			peer_tep->te_state = TS_IDLE;
4108 
4109 			TL_REMOVE_PEER(peer_tep->te_oconp);
4110 			enableok(peer_tep->te_wq);
4111 			TL_QENABLE(peer_tep);
4112 		} else {
4113 			freemsg(respmp);
4114 			dimp = NULL;
4115 		}
4116 
4117 		/*
4118 		 * remove endpoint from incoming connection list
4119 		 * - remove disconnect client from list on server
4120 		 */
4121 		tl_freetip(tep, tip);
4122 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4123 		/*
4124 		 * disconnect an outgoing request pending from tep
4125 		 */
4126 
4127 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4128 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4129 			    SL_TRACE | SL_ERROR,
4130 			    "tl_discon_req: reallocb failed"));
4131 			tep->te_state = new_state;
4132 			tl_merror(wq, respmp, ENOMEM);
4133 			return;
4134 		}
4135 		di = (struct T_discon_ind *)dimp->b_rptr;
4136 		DB_TYPE(dimp) = M_PROTO;
4137 		di->PRIM_type  = T_DISCON_IND;
4138 		di->DISCON_reason = ECONNRESET;
4139 		di->SEQ_number = tep->te_seqno;
4140 
4141 		/*
4142 		 * If this is a socket the T_DISCON_IND is queued with
4143 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4144 		 * from the list of pending connections.
4145 		 * Note that when te_oconp is set the peer better have
4146 		 * a t_connind_t for the client.
4147 		 */
4148 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4149 			/*
4150 			 * No need to check that
4151 			 * ti_tep == NULL since the T_DISCON_IND
4152 			 * takes precedence over other queued
4153 			 * messages.
4154 			 */
4155 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4156 			peer_tep = NULL;
4157 			dimp = NULL;
4158 			/*
4159 			 * Can't clear te_oconp since tl_co_unconnect needs
4160 			 * it as a hint not to free the tep.
4161 			 * Keep the state unchanged since tl_conn_res inspects
4162 			 * it.
4163 			 */
4164 			new_state = tep->te_state;
4165 		} else {
4166 			/* Found - delete it */
4167 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4168 			if (tip != NULL) {
4169 				ASSERT(tep == tip->ti_tep);
4170 				save_state = peer_tep->te_state;
4171 				if (peer_tep->te_nicon == 1)
4172 					peer_tep->te_state =
4173 					    nextstate[TE_DISCON_IND2]
4174 					    [peer_tep->te_state];
4175 				else
4176 					peer_tep->te_state =
4177 					    nextstate[TE_DISCON_IND3]
4178 					    [peer_tep->te_state];
4179 				tl_freetip(peer_tep, tip);
4180 			}
4181 			ASSERT(tep->te_oconp != NULL);
4182 			TL_UNCONNECT(tep->te_oconp);
4183 		}
4184 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4185 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4186 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4187 			    SL_TRACE | SL_ERROR,
4188 			    "tl_discon_req: reallocb failed"));
4189 			tep->te_state = new_state;
4190 			tl_merror(wq, respmp, ENOMEM);
4191 			return;
4192 		}
4193 		di = (struct T_discon_ind *)dimp->b_rptr;
4194 		di->SEQ_number = BADSEQNUM;
4195 
4196 		save_state = peer_tep->te_state;
4197 		peer_tep->te_state = TS_IDLE;
4198 	} else {
4199 		/* Not connected */
4200 		tep->te_state = new_state;
4201 		freemsg(respmp);
4202 		return;
4203 	}
4204 
4205 	/* Commit state changes */
4206 	tep->te_state = new_state;
4207 
4208 	if (peer_tep == NULL) {
4209 		ASSERT(dimp == NULL);
4210 		goto done;
4211 	}
4212 	/*
4213 	 * Flush queues on peer before sending up
4214 	 * T_DISCON_IND according to TPI
4215 	 */
4216 
4217 	if ((save_state == TS_DATA_XFER) ||
4218 	    (save_state == TS_WIND_ORDREL) ||
4219 	    (save_state == TS_WREQ_ORDREL))
4220 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4221 
4222 	DB_TYPE(dimp) = M_PROTO;
4223 	di->PRIM_type  = T_DISCON_IND;
4224 	di->DISCON_reason = ECONNRESET;
4225 
4226 	/*
4227 	 * data blocks already linked into dimp by reallocb()
4228 	 */
4229 	/*
4230 	 * send indication message to peer user module
4231 	 */
4232 	ASSERT(dimp != NULL);
4233 	putnext(peer_tep->te_rq, dimp);
4234 done:
4235 	if (tep->te_conp) {	/* disconnect pointers if connected */
4236 		ASSERT(!peer_tep->te_closing);
4237 
4238 		/*
4239 		 * Messages may be queued on peer's write queue
4240 		 * waiting to be processed by its write service
4241 		 * procedure. Before the pointer to the peer transport
4242 		 * structure is set to NULL, qenable the peer's write
4243 		 * queue so that the queued up messages are processed.
4244 		 */
4245 		if ((save_state == TS_DATA_XFER) ||
4246 		    (save_state == TS_WIND_ORDREL) ||
4247 		    (save_state == TS_WREQ_ORDREL))
4248 			TL_QENABLE(peer_tep);
4249 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4250 		TL_UNCONNECT(peer_tep->te_conp);
4251 		if (!IS_SOCKET(tep)) {
4252 			/*
4253 			 * unlink the streams
4254 			 */
4255 			tep->te_wq->q_next = NULL;
4256 			peer_tep->te_wq->q_next = NULL;
4257 		}
4258 		TL_UNCONNECT(tep->te_conp);
4259 	}
4260 }
4261 
4262 static void
tl_addr_req_ser(mblk_t * mp,tl_endpt_t * tep)4263 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4264 {
4265 	if (!tep->te_closing)
4266 		tl_addr_req(mp, tep);
4267 	else
4268 		freemsg(mp);
4269 
4270 	tl_serializer_exit(tep);
4271 	tl_refrele(tep);
4272 }
4273 
4274 static void
tl_addr_req(mblk_t * mp,tl_endpt_t * tep)4275 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4276 {
4277 	queue_t			*wq;
4278 	size_t			ack_sz;
4279 	mblk_t			*ackmp;
4280 	struct T_addr_ack	*taa;
4281 
4282 	if (tep->te_closing) {
4283 		freemsg(mp);
4284 		return;
4285 	}
4286 
4287 	wq = tep->te_wq;
4288 
4289 	/*
4290 	 * Note: T_ADDR_REQ message has only PRIM_type field
4291 	 * so it is already validated earlier.
4292 	 */
4293 
4294 	if (IS_CLTS(tep) ||
4295 	    (tep->te_state > TS_WREQ_ORDREL) ||
4296 	    (tep->te_state < TS_DATA_XFER)) {
4297 		/*
4298 		 * Either connectionless or connection oriented but not
4299 		 * in connected data transfer state or half-closed states.
4300 		 */
4301 		ack_sz = sizeof (struct T_addr_ack);
4302 		if (tep->te_state >= TS_IDLE)
4303 			/* is bound */
4304 			ack_sz += tep->te_alen;
4305 		ackmp = reallocb(mp, ack_sz, 0);
4306 		if (ackmp == NULL) {
4307 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4308 			    SL_TRACE | SL_ERROR,
4309 			    "tl_addr_req: reallocb failed"));
4310 			tl_memrecover(wq, mp, ack_sz);
4311 			return;
4312 		}
4313 
4314 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4315 
4316 		bzero(taa, sizeof (struct T_addr_ack));
4317 
4318 		taa->PRIM_type = T_ADDR_ACK;
4319 		ackmp->b_datap->db_type = M_PCPROTO;
4320 		ackmp->b_wptr = (uchar_t *)&taa[1];
4321 
4322 		if (tep->te_state >= TS_IDLE) {
4323 			/* endpoint is bound */
4324 			taa->LOCADDR_length = tep->te_alen;
4325 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4326 
4327 			bcopy(tep->te_abuf, ackmp->b_wptr,
4328 			    tep->te_alen);
4329 			ackmp->b_wptr += tep->te_alen;
4330 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4331 		}
4332 
4333 		(void) qreply(wq, ackmp);
4334 	} else {
4335 		ASSERT(tep->te_state == TS_DATA_XFER ||
4336 		    tep->te_state == TS_WIND_ORDREL ||
4337 		    tep->te_state == TS_WREQ_ORDREL);
4338 		/* connection oriented in data transfer */
4339 		tl_connected_cots_addr_req(mp, tep);
4340 	}
4341 }
4342 
4343 
4344 static void
tl_connected_cots_addr_req(mblk_t * mp,tl_endpt_t * tep)4345 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4346 {
4347 	tl_endpt_t		*peer_tep = tep->te_conp;
4348 	size_t			ack_sz;
4349 	mblk_t			*ackmp;
4350 	struct T_addr_ack	*taa;
4351 	uchar_t			*addr_startp;
4352 
4353 	if (tep->te_closing) {
4354 		freemsg(mp);
4355 		return;
4356 	}
4357 
4358 	if (peer_tep == NULL || peer_tep->te_closing) {
4359 		tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4360 		return;
4361 	}
4362 
4363 	ASSERT(tep->te_state >= TS_IDLE);
4364 
4365 	ack_sz = sizeof (struct T_addr_ack);
4366 	ack_sz += T_ALIGN(tep->te_alen);
4367 	ack_sz += peer_tep->te_alen;
4368 
4369 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4370 	if (ackmp == NULL) {
4371 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4372 		    "tl_connected_cots_addr_req: reallocb failed"));
4373 		tl_memrecover(tep->te_wq, mp, ack_sz);
4374 		return;
4375 	}
4376 
4377 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4378 
4379 	/* endpoint is bound */
4380 	taa->LOCADDR_length = tep->te_alen;
4381 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4382 
4383 	addr_startp = (uchar_t *)&taa[1];
4384 
4385 	bcopy(tep->te_abuf, addr_startp,
4386 	    tep->te_alen);
4387 
4388 	taa->REMADDR_length = peer_tep->te_alen;
4389 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4390 	    taa->LOCADDR_length);
4391 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4392 	bcopy(peer_tep->te_abuf, addr_startp,
4393 	    peer_tep->te_alen);
4394 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4395 	    taa->REMADDR_offset + peer_tep->te_alen;
4396 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4397 
4398 	putnext(tep->te_rq, ackmp);
4399 }
4400 
4401 static void
tl_copy_info(struct T_info_ack * ia,tl_endpt_t * tep)4402 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4403 {
4404 	if (IS_CLTS(tep)) {
4405 		*ia = tl_clts_info_ack;
4406 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4407 	} else {
4408 		*ia = tl_cots_info_ack;
4409 		if (IS_COTSORD(tep))
4410 			ia->SERV_type = T_COTS_ORD;
4411 	}
4412 	ia->TIDU_size = tl_tidusz;
4413 	ia->CURRENT_state = tep->te_state;
4414 }
4415 
4416 /*
4417  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4418  * tl_wput.
4419  */
4420 static void
tl_capability_req(mblk_t * mp,tl_endpt_t * tep)4421 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4422 {
4423 	mblk_t			*ackmp;
4424 	t_uscalar_t		cap_bits1;
4425 	struct T_capability_ack	*tcap;
4426 
4427 	if (tep->te_closing) {
4428 		freemsg(mp);
4429 		return;
4430 	}
4431 
4432 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4433 
4434 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4435 	    M_PCPROTO, T_CAPABILITY_ACK);
4436 	if (ackmp == NULL) {
4437 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4438 		    "tl_capability_req: reallocb failed"));
4439 		tl_memrecover(tep->te_wq, mp,
4440 		    sizeof (struct T_capability_ack));
4441 		return;
4442 	}
4443 
4444 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4445 	tcap->CAP_bits1 = 0;
4446 
4447 	if (cap_bits1 & TC1_INFO) {
4448 		tl_copy_info(&tcap->INFO_ack, tep);
4449 		tcap->CAP_bits1 |= TC1_INFO;
4450 	}
4451 
4452 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4453 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4454 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4455 	}
4456 
4457 	putnext(tep->te_rq, ackmp);
4458 }
4459 
4460 static void
tl_info_req_ser(mblk_t * mp,tl_endpt_t * tep)4461 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4462 {
4463 	if (!tep->te_closing)
4464 		tl_info_req(mp, tep);
4465 	else
4466 		freemsg(mp);
4467 
4468 	tl_serializer_exit(tep);
4469 	tl_refrele(tep);
4470 }
4471 
4472 static void
tl_info_req(mblk_t * mp,tl_endpt_t * tep)4473 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4474 {
4475 	mblk_t *ackmp;
4476 
4477 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4478 	    M_PCPROTO, T_INFO_ACK);
4479 	if (ackmp == NULL) {
4480 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4481 		    "tl_info_req: reallocb failed"));
4482 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4483 		return;
4484 	}
4485 
4486 	/*
4487 	 * fill in T_INFO_ACK contents
4488 	 */
4489 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4490 
4491 	/*
4492 	 * send ack message
4493 	 */
4494 	putnext(tep->te_rq, ackmp);
4495 }
4496 
4497 /*
4498  * Handle M_DATA, T_data_req and T_optdata_req.
4499  * If this is a socket pass through T_optdata_req options unmodified.
4500  */
4501 static void
tl_data(mblk_t * mp,tl_endpt_t * tep)4502 tl_data(mblk_t *mp, tl_endpt_t *tep)
4503 {
4504 	queue_t			*wq = tep->te_wq;
4505 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4506 	ssize_t			msz = MBLKL(mp);
4507 	tl_endpt_t		*peer_tep;
4508 	queue_t			*peer_rq;
4509 	boolean_t		closing = tep->te_closing;
4510 
4511 	if (IS_CLTS(tep)) {
4512 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4513 		    SL_TRACE | SL_ERROR,
4514 		    "tl_wput:clts:unattached M_DATA"));
4515 		if (!closing) {
4516 			tl_merror(wq, mp, EPROTO);
4517 		} else {
4518 			freemsg(mp);
4519 		}
4520 		return;
4521 	}
4522 
4523 	/*
4524 	 * If the endpoint is closing it should still forward any data to the
4525 	 * peer (if it has one). If it is not allowed to forward it can just
4526 	 * free the message.
4527 	 */
4528 	if (closing &&
4529 	    (tep->te_state != TS_DATA_XFER) &&
4530 	    (tep->te_state != TS_WREQ_ORDREL)) {
4531 		freemsg(mp);
4532 		return;
4533 	}
4534 
4535 	if (DB_TYPE(mp) == M_PROTO) {
4536 		if (prim->type == T_DATA_REQ &&
4537 		    msz < sizeof (struct T_data_req)) {
4538 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4539 				SL_TRACE | SL_ERROR,
4540 				"tl_data:T_DATA_REQ:invalid message"));
4541 			if (!closing) {
4542 				tl_merror(wq, mp, EPROTO);
4543 			} else {
4544 				freemsg(mp);
4545 			}
4546 			return;
4547 		} else if (prim->type == T_OPTDATA_REQ &&
4548 		    (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4549 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4550 			    SL_TRACE | SL_ERROR,
4551 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4552 			if (!closing) {
4553 				tl_merror(wq, mp, EPROTO);
4554 			} else {
4555 				freemsg(mp);
4556 			}
4557 			return;
4558 		}
4559 	}
4560 
4561 	/*
4562 	 * connection oriented provider
4563 	 */
4564 	switch (tep->te_state) {
4565 	case TS_IDLE:
4566 		/*
4567 		 * Other end not here - do nothing.
4568 		 */
4569 		freemsg(mp);
4570 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4571 		    "tl_data:cots with endpoint idle"));
4572 		return;
4573 
4574 	case TS_DATA_XFER:
4575 		/* valid states */
4576 		if (tep->te_conp != NULL)
4577 			break;
4578 
4579 		if (tep->te_oconp == NULL) {
4580 			if (!closing) {
4581 				tl_merror(wq, mp, EPROTO);
4582 			} else {
4583 				freemsg(mp);
4584 			}
4585 			return;
4586 		}
4587 		/*
4588 		 * For a socket the T_CONN_CON is sent early thus
4589 		 * the peer might not yet have accepted the connection.
4590 		 * If we are closing queue the packet with the T_CONN_IND.
4591 		 * Otherwise defer processing the packet until the peer
4592 		 * accepts the connection.
4593 		 * Note that the queue is noenabled when we go into this
4594 		 * state.
4595 		 */
4596 		if (!closing) {
4597 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4598 			    SL_TRACE | SL_ERROR,
4599 			    "tl_data: ocon"));
4600 			TL_PUTBQ(tep, mp);
4601 			return;
4602 		}
4603 		if (DB_TYPE(mp) == M_PROTO) {
4604 			if (msz < sizeof (t_scalar_t)) {
4605 				freemsg(mp);
4606 				return;
4607 			}
4608 			/* reuse message block - just change REQ to IND */
4609 			if (prim->type == T_DATA_REQ)
4610 				prim->type = T_DATA_IND;
4611 			else
4612 				prim->type = T_OPTDATA_IND;
4613 		}
4614 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4615 		return;
4616 
4617 	case TS_WREQ_ORDREL:
4618 		if (tep->te_conp == NULL) {
4619 			/*
4620 			 * Other end closed - generate discon_ind
4621 			 * with reason 0 to cause an EPIPE but no
4622 			 * read side error on AF_UNIX sockets.
4623 			 */
4624 			freemsg(mp);
4625 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4626 			    SL_TRACE | SL_ERROR,
4627 			    "tl_data: WREQ_ORDREL and no peer"));
4628 			tl_discon_ind(tep, 0);
4629 			return;
4630 		}
4631 		break;
4632 
4633 	default:
4634 		/* invalid state for event TE_DATA_REQ */
4635 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4636 		    "tl_data:cots:out of state"));
4637 		tl_merror(wq, mp, EPROTO);
4638 		return;
4639 	}
4640 	/*
4641 	 * tep->te_state = nextstate[TE_DATA_REQ][tep->te_state];
4642 	 * (State stays same on this event)
4643 	 */
4644 
4645 	/*
4646 	 * get connected endpoint
4647 	 */
4648 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4649 		freemsg(mp);
4650 		/* Peer closed */
4651 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4652 		    "tl_data: peer gone"));
4653 		return;
4654 	}
4655 
4656 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4657 	peer_rq = peer_tep->te_rq;
4658 
4659 	/*
4660 	 * Put it back if flow controlled
4661 	 * Note: Messages already on queue when we are closing is bounded
4662 	 * so we can ignore flow control.
4663 	 */
4664 	if (!canputnext(peer_rq) && !closing) {
4665 		TL_PUTBQ(tep, mp);
4666 		return;
4667 	}
4668 
4669 	/*
4670 	 * validate peer state
4671 	 */
4672 	switch (peer_tep->te_state) {
4673 	case TS_DATA_XFER:
4674 	case TS_WIND_ORDREL:
4675 		/* valid states */
4676 		break;
4677 	default:
4678 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4679 		    "tl_data:rx side:invalid state"));
4680 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4681 		return;
4682 	}
4683 	if (DB_TYPE(mp) == M_PROTO) {
4684 		/* reuse message block - just change REQ to IND */
4685 		if (prim->type == T_DATA_REQ)
4686 			prim->type = T_DATA_IND;
4687 		else
4688 			prim->type = T_OPTDATA_IND;
4689 	}
4690 	/*
4691 	 * peer_tep->te_state = nextstate[TE_DATA_IND][peer_tep->te_state];
4692 	 * (peer state stays same on this event)
4693 	 */
4694 	/*
4695 	 * send data to connected peer
4696 	 */
4697 	putnext(peer_rq, mp);
4698 }
4699 
4700 
4701 
4702 static void
tl_exdata(mblk_t * mp,tl_endpt_t * tep)4703 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4704 {
4705 	queue_t			*wq = tep->te_wq;
4706 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4707 	ssize_t			msz = MBLKL(mp);
4708 	tl_endpt_t		*peer_tep;
4709 	queue_t			*peer_rq;
4710 	boolean_t		closing = tep->te_closing;
4711 
4712 	if (msz < sizeof (struct T_exdata_req)) {
4713 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4714 		    "tl_exdata:invalid message"));
4715 		if (!closing) {
4716 			tl_merror(wq, mp, EPROTO);
4717 		} else {
4718 			freemsg(mp);
4719 		}
4720 		return;
4721 	}
4722 
4723 	/*
4724 	 * If the endpoint is closing it should still forward any data to the
4725 	 * peer (if it has one). If it is not allowed to forward it can just
4726 	 * free the message.
4727 	 */
4728 	if (closing &&
4729 	    (tep->te_state != TS_DATA_XFER) &&
4730 	    (tep->te_state != TS_WREQ_ORDREL)) {
4731 		freemsg(mp);
4732 		return;
4733 	}
4734 
4735 	/*
4736 	 * validate state
4737 	 */
4738 	switch (tep->te_state) {
4739 	case TS_IDLE:
4740 		/*
4741 		 * Other end not here - do nothing.
4742 		 */
4743 		freemsg(mp);
4744 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4745 		    "tl_exdata:cots with endpoint idle"));
4746 		return;
4747 
4748 	case TS_DATA_XFER:
4749 		/* valid states */
4750 		if (tep->te_conp != NULL)
4751 			break;
4752 
4753 		if (tep->te_oconp == NULL) {
4754 			if (!closing) {
4755 				tl_merror(wq, mp, EPROTO);
4756 			} else {
4757 				freemsg(mp);
4758 			}
4759 			return;
4760 		}
4761 		/*
4762 		 * For a socket the T_CONN_CON is sent early thus
4763 		 * the peer might not yet have accepted the connection.
4764 		 * If we are closing queue the packet with the T_CONN_IND.
4765 		 * Otherwise defer processing the packet until the peer
4766 		 * accepts the connection.
4767 		 * Note that the queue is noenabled when we go into this
4768 		 * state.
4769 		 */
4770 		if (!closing) {
4771 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4772 			    SL_TRACE | SL_ERROR,
4773 			    "tl_exdata: ocon"));
4774 			TL_PUTBQ(tep, mp);
4775 			return;
4776 		}
4777 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4778 		    "tl_exdata: closing socket ocon"));
4779 		prim->type = T_EXDATA_IND;
4780 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4781 		return;
4782 
4783 	case TS_WREQ_ORDREL:
4784 		if (tep->te_conp == NULL) {
4785 			/*
4786 			 * Other end closed - generate discon_ind
4787 			 * with reason 0 to cause an EPIPE but no
4788 			 * read side error on AF_UNIX sockets.
4789 			 */
4790 			freemsg(mp);
4791 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4792 			    SL_TRACE | SL_ERROR,
4793 			    "tl_exdata: WREQ_ORDREL and no peer"));
4794 			tl_discon_ind(tep, 0);
4795 			return;
4796 		}
4797 		break;
4798 
4799 	default:
4800 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4801 		    SL_TRACE | SL_ERROR,
4802 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4803 		    tep->te_state));
4804 		tl_merror(wq, mp, EPROTO);
4805 		return;
4806 	}
4807 	/*
4808 	 * tep->te_state = nextstate[TE_EXDATA_REQ][tep->te_state];
4809 	 * (state stays same on this event)
4810 	 */
4811 
4812 	/*
4813 	 * get connected endpoint
4814 	 */
4815 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4816 		freemsg(mp);
4817 		/* Peer closed */
4818 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4819 		    "tl_exdata: peer gone"));
4820 		return;
4821 	}
4822 
4823 	peer_rq = peer_tep->te_rq;
4824 
4825 	/*
4826 	 * Put it back if flow controlled
4827 	 * Note: Messages already on queue when we are closing is bounded
4828 	 * so we can ignore flow control.
4829 	 */
4830 	if (!canputnext(peer_rq) && !closing) {
4831 		TL_PUTBQ(tep, mp);
4832 		return;
4833 	}
4834 
4835 	/*
4836 	 * validate state on peer
4837 	 */
4838 	switch (peer_tep->te_state) {
4839 	case TS_DATA_XFER:
4840 	case TS_WIND_ORDREL:
4841 		/* valid states */
4842 		break;
4843 	default:
4844 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4845 		    "tl_exdata:rx side:invalid state"));
4846 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4847 		return;
4848 	}
4849 	/*
4850 	 * peer_tep->te_state = nextstate[TE_DATA_IND][peer_tep->te_state];
4851 	 * (peer state stays same on this event)
4852 	 */
4853 	/*
4854 	 * reuse message block
4855 	 */
4856 	prim->type = T_EXDATA_IND;
4857 
4858 	/*
4859 	 * send data to connected peer
4860 	 */
4861 	putnext(peer_rq, mp);
4862 }
4863 
4864 
4865 
4866 static void
tl_ordrel(mblk_t * mp,tl_endpt_t * tep)4867 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4868 {
4869 	queue_t			*wq = tep->te_wq;
4870 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4871 	ssize_t			msz = MBLKL(mp);
4872 	tl_endpt_t		*peer_tep;
4873 	queue_t			*peer_rq;
4874 	boolean_t		closing = tep->te_closing;
4875 
4876 	if (msz < sizeof (struct T_ordrel_req)) {
4877 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4878 		    "tl_ordrel:invalid message"));
4879 		if (!closing) {
4880 			tl_merror(wq, mp, EPROTO);
4881 		} else {
4882 			freemsg(mp);
4883 		}
4884 		return;
4885 	}
4886 
4887 	/*
4888 	 * validate state
4889 	 */
4890 	switch (tep->te_state) {
4891 	case TS_DATA_XFER:
4892 	case TS_WREQ_ORDREL:
4893 		/* valid states */
4894 		if (tep->te_conp != NULL)
4895 			break;
4896 
4897 		if (tep->te_oconp == NULL)
4898 			break;
4899 
4900 		/*
4901 		 * For a socket the T_CONN_CON is sent early thus
4902 		 * the peer might not yet have accepted the connection.
4903 		 * If we are closing queue the packet with the T_CONN_IND.
4904 		 * Otherwise defer processing the packet until the peer
4905 		 * accepts the connection.
4906 		 * Note that the queue is noenabled when we go into this
4907 		 * state.
4908 		 */
4909 		if (!closing) {
4910 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4911 			    SL_TRACE | SL_ERROR,
4912 			    "tl_ordlrel: ocon"));
4913 			TL_PUTBQ(tep, mp);
4914 			return;
4915 		}
4916 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4917 		    "tl_ordlrel: closing socket ocon"));
4918 		prim->type = T_ORDREL_IND;
4919 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4920 		return;
4921 
4922 	default:
4923 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4924 		    SL_TRACE | SL_ERROR,
4925 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4926 		    tep->te_state));
4927 		if (!closing) {
4928 			tl_merror(wq, mp, EPROTO);
4929 		} else {
4930 			freemsg(mp);
4931 		}
4932 		return;
4933 	}
4934 	tep->te_state = nextstate[TE_ORDREL_REQ][tep->te_state];
4935 
4936 	/*
4937 	 * get connected endpoint
4938 	 */
4939 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4940 		/* Peer closed */
4941 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4942 		    "tl_ordrel: peer gone"));
4943 		freemsg(mp);
4944 		return;
4945 	}
4946 
4947 	peer_rq = peer_tep->te_rq;
4948 
4949 	/*
4950 	 * Put it back if flow controlled except when we are closing.
4951 	 * Note: Messages already on queue when we are closing is bounded
4952 	 * so we can ignore flow control.
4953 	 */
4954 	if (!canputnext(peer_rq) && !closing) {
4955 		TL_PUTBQ(tep, mp);
4956 		return;
4957 	}
4958 
4959 	/*
4960 	 * validate state on peer
4961 	 */
4962 	switch (peer_tep->te_state) {
4963 	case TS_DATA_XFER:
4964 	case TS_WIND_ORDREL:
4965 		/* valid states */
4966 		break;
4967 	default:
4968 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4969 		    "tl_ordrel:rx side:invalid state"));
4970 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4971 		return;
4972 	}
4973 	peer_tep->te_state = nextstate[TE_ORDREL_IND][peer_tep->te_state];
4974 
4975 	/*
4976 	 * reuse message block
4977 	 */
4978 	prim->type = T_ORDREL_IND;
4979 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4980 	    "tl_ordrel: send ordrel_ind"));
4981 
4982 	/*
4983 	 * send data to connected peer
4984 	 */
4985 	putnext(peer_rq, mp);
4986 }
4987 
4988 
4989 /*
4990  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4991  */
4992 static void
tl_uderr(queue_t * wq,mblk_t * mp,t_scalar_t err)4993 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4994 {
4995 	size_t			err_sz;
4996 	tl_endpt_t		*tep;
4997 	struct T_unitdata_req	*udreq;
4998 	mblk_t			*err_mp;
4999 	t_scalar_t		alen;
5000 	t_scalar_t		olen;
5001 	struct T_uderror_ind	*uderr;
5002 	uchar_t			*addr_startp;
5003 
5004 	err_sz = sizeof (struct T_uderror_ind);
5005 	tep = (tl_endpt_t *)wq->q_ptr;
5006 	udreq = (struct T_unitdata_req *)mp->b_rptr;
5007 	alen = udreq->DEST_length;
5008 	olen = udreq->OPT_length;
5009 
5010 	if (alen > 0)
5011 		err_sz = T_ALIGN(err_sz + alen);
5012 	if (olen > 0)
5013 		err_sz += olen;
5014 
5015 	err_mp = allocb(err_sz, BPRI_MED);
5016 	if (err_mp == NULL) {
5017 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5018 		    "tl_uderr:allocb failure"));
5019 		/*
5020 		 * Note: no rollback of state needed as it does
5021 		 * not change in connectionless transport
5022 		 */
5023 		tl_memrecover(wq, mp, err_sz);
5024 		return;
5025 	}
5026 
5027 	DB_TYPE(err_mp) = M_PROTO;
5028 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
5029 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
5030 	uderr->PRIM_type = T_UDERROR_IND;
5031 	uderr->ERROR_type = err;
5032 	uderr->DEST_length = alen;
5033 	uderr->OPT_length = olen;
5034 	if (alen <= 0) {
5035 		uderr->DEST_offset = 0;
5036 	} else {
5037 		uderr->DEST_offset =
5038 		    (t_scalar_t)sizeof (struct T_uderror_ind);
5039 		addr_startp = mp->b_rptr + udreq->DEST_offset;
5040 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
5041 		    (size_t)alen);
5042 	}
5043 	if (olen <= 0) {
5044 		uderr->OPT_offset = 0;
5045 	} else {
5046 		uderr->OPT_offset =
5047 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
5048 		    uderr->DEST_length);
5049 		addr_startp = mp->b_rptr + udreq->OPT_offset;
5050 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
5051 		    (size_t)olen);
5052 	}
5053 	freemsg(mp);
5054 
5055 	/*
5056 	 * send indication message
5057 	 */
5058 	tep->te_state = nextstate[TE_UDERROR_IND][tep->te_state];
5059 
5060 	qreply(wq, err_mp);
5061 }
5062 
5063 static void
tl_unitdata_ser(mblk_t * mp,tl_endpt_t * tep)5064 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
5065 {
5066 	queue_t *wq = tep->te_wq;
5067 
5068 	if (!tep->te_closing && (wq->q_first != NULL)) {
5069 		TL_PUTQ(tep, mp);
5070 	} else {
5071 		if (tep->te_rq != NULL)
5072 			tl_unitdata(mp, tep);
5073 		else
5074 			freemsg(mp);
5075 	}
5076 
5077 	tl_serializer_exit(tep);
5078 	tl_refrele(tep);
5079 }
5080 
5081 /*
5082  * Handle T_unitdata_req.
5083  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
5084  * If this is a socket pass through options unmodified.
5085  */
5086 static void
tl_unitdata(mblk_t * mp,tl_endpt_t * tep)5087 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
5088 {
5089 	queue_t			*wq = tep->te_wq;
5090 	soux_addr_t		ux_addr;
5091 	tl_addr_t		destaddr;
5092 	uchar_t			*addr_startp;
5093 	tl_endpt_t		*peer_tep;
5094 	struct T_unitdata_ind	*udind;
5095 	struct T_unitdata_req	*udreq;
5096 	ssize_t			msz, ui_sz, reuse_mb_sz;
5097 	t_scalar_t		alen, aoff, olen, ooff;
5098 	t_scalar_t		oldolen = 0;
5099 	cred_t			*cr = NULL;
5100 	pid_t			cpid;
5101 
5102 	udreq = (struct T_unitdata_req *)mp->b_rptr;
5103 	msz = MBLKL(mp);
5104 
5105 	/*
5106 	 * validate the state
5107 	 */
5108 	if (tep->te_state != TS_IDLE) {
5109 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
5110 		    SL_TRACE | SL_ERROR,
5111 		    "tl_wput:T_CONN_REQ:out of state"));
5112 		tl_merror(wq, mp, EPROTO);
5113 		return;
5114 	}
5115 	/*
5116 	 * tep->te_state = nextstate[TE_UNITDATA_REQ][tep->te_state];
5117 	 * (state does not change on this event)
5118 	 */
5119 
5120 	/*
5121 	 * validate the message
5122 	 * Note: dereference fields in struct inside message only
5123 	 * after validating the message length.
5124 	 */
5125 	if (msz < sizeof (struct T_unitdata_req)) {
5126 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5127 		    "tl_unitdata:invalid message length"));
5128 		tl_merror(wq, mp, EINVAL);
5129 		return;
5130 	}
5131 	alen = udreq->DEST_length;
5132 	aoff = udreq->DEST_offset;
5133 	oldolen = olen = udreq->OPT_length;
5134 	ooff = udreq->OPT_offset;
5135 	if (olen == 0)
5136 		ooff = 0;
5137 
5138 	if (IS_SOCKET(tep)) {
5139 		if ((alen != TL_SOUX_ADDRLEN) ||
5140 		    (aoff < 0) ||
5141 		    (aoff + alen > msz) ||
5142 		    (olen < 0) || (ooff < 0) ||
5143 		    ((olen > 0) && ((ooff + olen) > msz))) {
5144 			(void) (STRLOG(TL_ID, tep->te_minor,
5145 			    1, SL_TRACE | SL_ERROR,
5146 			    "tl_unitdata_req: invalid socket addr "
5147 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5148 			    (int)msz, alen, aoff, olen, ooff));
5149 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5150 			return;
5151 		}
5152 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5153 
5154 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5155 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5156 			(void) (STRLOG(TL_ID, tep->te_minor,
5157 			    1, SL_TRACE | SL_ERROR,
5158 			    "tl_conn_req: invalid socket magic"));
5159 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5160 			return;
5161 		}
5162 	} else {
5163 		if ((alen < 0) ||
5164 		    (aoff < 0) ||
5165 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5166 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5167 		    ((aoff + alen) < 0) ||
5168 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5169 		    (olen < 0) ||
5170 		    (ooff < 0) ||
5171 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5172 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5173 				    SL_TRACE | SL_ERROR,
5174 				    "tl_unitdata:invalid unit data message"));
5175 			tl_merror(wq, mp, EINVAL);
5176 			return;
5177 		}
5178 	}
5179 
5180 	/* Options not supported unless it's a socket */
5181 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5182 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5183 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5184 		tl_uderr(wq, mp, EPROTO);
5185 		return;
5186 	}
5187 #ifdef DEBUG
5188 	/*
5189 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5190 	 * if (!assertion)
5191 	 *	log warning;
5192 	 */
5193 	if (!(aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5194 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5195 		    "tl_unitdata:addr overlaps TPI message"));
5196 	}
5197 #endif
5198 	/*
5199 	 * get destination endpoint
5200 	 */
5201 	destaddr.ta_alen = alen;
5202 	destaddr.ta_abuf = mp->b_rptr + aoff;
5203 	destaddr.ta_zoneid = tep->te_zoneid;
5204 
5205 	/*
5206 	 * Check whether the destination is the same that was used previously
5207 	 * and the destination endpoint is in the right state. If something is
5208 	 * wrong, find destination again and cache it.
5209 	 */
5210 	peer_tep = tep->te_lastep;
5211 
5212 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5213 	    (peer_tep->te_state != TS_IDLE) ||
5214 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5215 		/*
5216 		 * Not the same as cached destination , need to find the right
5217 		 * destination.
5218 		 */
5219 		peer_tep = (IS_SOCKET(tep) ?
5220 		    tl_sock_find_peer(tep, &ux_addr) :
5221 		    tl_find_peer(tep, &destaddr));
5222 
5223 		if (peer_tep == NULL) {
5224 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5225 			    SL_TRACE | SL_ERROR,
5226 			    "tl_unitdata:no one at destination address"));
5227 			tl_uderr(wq, mp, ECONNRESET);
5228 			return;
5229 		}
5230 
5231 		/*
5232 		 * Cache the new peer.
5233 		 */
5234 		if (tep->te_lastep != NULL)
5235 			tl_refrele(tep->te_lastep);
5236 
5237 		tep->te_lastep = peer_tep;
5238 	}
5239 
5240 	if (peer_tep->te_state != TS_IDLE) {
5241 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5242 		    "tl_unitdata:provider in invalid state"));
5243 		tl_uderr(wq, mp, EPROTO);
5244 		return;
5245 	}
5246 
5247 	ASSERT(peer_tep->te_rq != NULL);
5248 
5249 	/*
5250 	 * Put it back if flow controlled except when we are closing.
5251 	 * Note: Messages already on queue when we are closing is bounded
5252 	 * so we can ignore flow control.
5253 	 */
5254 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5255 		/* record what we are flow controlled on */
5256 		if (tep->te_flowq != NULL) {
5257 			list_remove(&tep->te_flowq->te_flowlist, tep);
5258 		}
5259 		list_insert_head(&peer_tep->te_flowlist, tep);
5260 		tep->te_flowq = peer_tep;
5261 		TL_PUTBQ(tep, mp);
5262 		return;
5263 	}
5264 	/*
5265 	 * prepare indication message
5266 	 */
5267 
5268 	/*
5269 	 * calculate length of message
5270 	 */
5271 	if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5272 		cr = msg_getcred(mp, &cpid);
5273 		ASSERT(cr != NULL);
5274 
5275 		if (peer_tep->te_flag & TL_SETCRED) {
5276 			ASSERT(olen == 0);
5277 			olen = (t_scalar_t)sizeof (struct opthdr) +
5278 			    OPTLEN(sizeof (tl_credopt_t));
5279 						/* 1 option only */
5280 		} else if (peer_tep->te_flag & TL_SETUCRED) {
5281 			ASSERT(olen == 0);
5282 			olen = (t_scalar_t)sizeof (struct opthdr) +
5283 			    OPTLEN(ucredminsize(cr));
5284 						/* 1 option only */
5285 		} else {
5286 			/* Possibly more than one option */
5287 			olen += (t_scalar_t)sizeof (struct T_opthdr) +
5288 			    OPTLEN(ucredminsize(cr));
5289 		}
5290 	}
5291 
5292 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5293 	reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5294 
5295 	/*
5296 	 * If the unitdata_ind fits and we are not adding options
5297 	 * reuse the udreq mblk.
5298 	 *
5299 	 * Otherwise, it is possible we need to append an option if one of the
5300 	 * te_flag bits is set. This requires extra space in the data block for
5301 	 * the additional option but the traditional technique used below to
5302 	 * allocate a new block and copy into it will not work when there is a
5303 	 * message block with a free pointer (since we don't know anything
5304 	 * about the layout of the data, pointers referencing or within the
5305 	 * data, etc.). To handle this possibility the upper layers may have
5306 	 * preallocated some space to use for appending an option. We check the
5307 	 * overall mblock size against the size we need ('reuse_mb_sz' with the
5308 	 * original address length [alen] to ensure we won't overrun the
5309 	 * current mblk data size) to see if there is free space and thus
5310 	 * avoid allocating a new message block.
5311 	 */
5312 	if (msz >= ui_sz && alen >= tep->te_alen &&
5313 	    !(peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED))) {
5314 		/*
5315 		 * Reuse the original mblk. Leave options in place.
5316 		 */
5317 		udind = (struct T_unitdata_ind *)mp->b_rptr;
5318 		udind->PRIM_type = T_UNITDATA_IND;
5319 		udind->SRC_length = tep->te_alen;
5320 		addr_startp = mp->b_rptr + udind->SRC_offset;
5321 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5322 
5323 	} else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5324 	    mp->b_datap->db_frtnp != NULL) {
5325 		/*
5326 		 * We have a message block with a free pointer, but extra space
5327 		 * has been pre-allocated for us in case we need to append an
5328 		 * option. Reuse the original mblk, leaving existing options in
5329 		 * place.
5330 		 */
5331 		udind = (struct T_unitdata_ind *)mp->b_rptr;
5332 		udind->PRIM_type = T_UNITDATA_IND;
5333 		udind->SRC_length = tep->te_alen;
5334 		addr_startp = mp->b_rptr + udind->SRC_offset;
5335 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5336 
5337 		if (peer_tep->te_flag &
5338 		    (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5339 			ASSERT(cr != NULL);
5340 			/*
5341 			 * We're appending one new option here after the
5342 			 * original ones.
5343 			 */
5344 			tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5345 			    cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5346 		}
5347 
5348 	} else if (mp->b_datap->db_frtnp != NULL) {
5349 		/*
5350 		 * The next block creates a new mp and tries to copy the data
5351 		 * block into it, but that cannot handle a message with a free
5352 		 * pointer (for more details see the comment in kstrputmsg()
5353 		 * where dupmsg() is called). Since we can never properly
5354 		 * duplicate the mp while also extending the data, just error
5355 		 * out now.
5356 		 */
5357 		tl_uderr(wq, mp, EPROTO);
5358 		return;
5359 	} else {
5360 		/* Allocate a new T_unitdata_ind message */
5361 		mblk_t *ui_mp;
5362 
5363 		ui_mp = allocb(ui_sz, BPRI_MED);
5364 		if (ui_mp == NULL) {
5365 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5366 			    "tl_unitdata:allocb failure:message queued"));
5367 			tl_memrecover(wq, mp, ui_sz);
5368 			return;
5369 		}
5370 
5371 		/*
5372 		 * fill in T_UNITDATA_IND contents
5373 		 */
5374 		DB_TYPE(ui_mp) = M_PROTO;
5375 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5376 		udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5377 		udind->PRIM_type = T_UNITDATA_IND;
5378 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5379 		udind->SRC_length = tep->te_alen;
5380 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5381 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5382 		udind->OPT_offset =
5383 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5384 		udind->OPT_length = olen;
5385 		if (peer_tep->te_flag &
5386 		    (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5387 
5388 			if (oldolen != 0) {
5389 				bcopy((void *)((uintptr_t)udreq + ooff),
5390 				    (void *)((uintptr_t)udind +
5391 				    udind->OPT_offset),
5392 				    oldolen);
5393 			}
5394 			ASSERT(cr != NULL);
5395 
5396 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5397 			    oldolen, cr, cpid,
5398 			    peer_tep->te_flag, peer_tep->te_credp);
5399 		} else {
5400 			bcopy((void *)((uintptr_t)udreq + ooff),
5401 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5402 			    olen);
5403 		}
5404 
5405 		/*
5406 		 * relink data blocks from mp to ui_mp
5407 		 */
5408 		ui_mp->b_cont = mp->b_cont;
5409 		freeb(mp);
5410 		mp = ui_mp;
5411 	}
5412 	/*
5413 	 * send indication message
5414 	 */
5415 	peer_tep->te_state = nextstate[TE_UNITDATA_IND][peer_tep->te_state];
5416 	putnext(peer_tep->te_rq, mp);
5417 }
5418 
5419 
5420 
5421 /*
5422  * Check if a given addr is in use.
5423  * Endpoint ptr returned or NULL if not found.
5424  * The name space is separate for each mode. This implies that
5425  * sockets get their own name space.
5426  */
5427 static tl_endpt_t *
tl_find_peer(tl_endpt_t * tep,tl_addr_t * ap)5428 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5429 {
5430 	tl_endpt_t *peer_tep = NULL;
5431 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5432 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5433 
5434 	ASSERT(!IS_SOCKET(tep));
5435 
5436 	ASSERT(ap != NULL && ap->ta_alen > 0);
5437 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5438 	ASSERT(ap->ta_abuf != NULL);
5439 	EQUIV(rc == 0, peer_tep != NULL);
5440 	IMPLY(rc == 0,
5441 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5442 	    (tep->te_transport == peer_tep->te_transport));
5443 
5444 	if ((rc == 0) && (peer_tep->te_closing)) {
5445 		tl_refrele(peer_tep);
5446 		peer_tep = NULL;
5447 	}
5448 
5449 	return (peer_tep);
5450 }
5451 
5452 /*
5453  * Find peer for a socket based on unix domain address.
5454  * For implicit addresses our peer can be found by minor number in ai hash. For
5455  * explicit binds we look vnode address at addr_hash.
5456  */
5457 static tl_endpt_t *
tl_sock_find_peer(tl_endpt_t * tep,soux_addr_t * ux_addr)5458 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5459 {
5460 	tl_endpt_t *peer_tep = NULL;
5461 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5462 	    tep->te_aihash : tep->te_addrhash;
5463 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5464 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5465 
5466 	ASSERT(IS_SOCKET(tep));
5467 	EQUIV(rc == 0, peer_tep != NULL);
5468 	IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5469 
5470 	if (peer_tep != NULL) {
5471 		/* Don't attempt to use closing peer. */
5472 		if (peer_tep->te_closing)
5473 			goto errout;
5474 
5475 		/*
5476 		 * Cross-zone unix sockets are permitted, but for Trusted
5477 		 * Extensions only, the "server" for these must be in the
5478 		 * global zone.
5479 		 */
5480 		if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5481 		    is_system_labeled() &&
5482 		    (peer_tep->te_zoneid != GLOBAL_ZONEID))
5483 			goto errout;
5484 	}
5485 
5486 	return (peer_tep);
5487 
5488 errout:
5489 	tl_refrele(peer_tep);
5490 	return (NULL);
5491 }
5492 
5493 /*
5494  * Generate a free addr and return it in struct pointed by ap
5495  * but allocating space for address buffer.
5496  * The generated address will be at least 4 bytes long and, if req->ta_alen
5497  * exceeds 4 bytes, be req->ta_alen bytes long.
5498  *
5499  * If address is found it will be inserted in the hash.
5500  *
5501  * If req->ta_alen is larger than the default alen (4 bytes) the last
5502  * alen-4 bytes will always be the same as in req.
5503  *
5504  * Return 0 for failure.
5505  * Return non-zero for success.
5506  */
5507 static boolean_t
tl_get_any_addr(tl_endpt_t * tep,tl_addr_t * req)5508 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5509 {
5510 	t_scalar_t	alen;
5511 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5512 
5513 	ASSERT(tep->te_hash_hndl != NULL);
5514 	ASSERT(!IS_SOCKET(tep));
5515 
5516 	if (tep->te_hash_hndl == NULL)
5517 		return (B_FALSE);
5518 
5519 	/*
5520 	 * check if default addr is in use
5521 	 * if it is - bump it and try again
5522 	 */
5523 	if (req == NULL) {
5524 		alen = sizeof (uint32_t);
5525 	} else {
5526 		alen = max(req->ta_alen, sizeof (uint32_t));
5527 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5528 	}
5529 
5530 	if (tep->te_alen < alen) {
5531 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5532 
5533 		/*
5534 		 * Not enough space in tep->ta_ap to hold the address,
5535 		 * allocate a bigger space.
5536 		 */
5537 		if (abuf == NULL)
5538 			return (B_FALSE);
5539 
5540 		if (tep->te_alen > 0)
5541 			kmem_free(tep->te_abuf, tep->te_alen);
5542 
5543 		tep->te_alen = alen;
5544 		tep->te_abuf = abuf;
5545 	}
5546 
5547 	/* Copy in the address in req */
5548 	if (req != NULL) {
5549 		ASSERT(alen >= req->ta_alen);
5550 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5551 	}
5552 
5553 	/*
5554 	 * First try minor number then try default addresses.
5555 	 */
5556 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5557 
5558 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5559 		if (mod_hash_insert_reserve(tep->te_addrhash,
5560 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5561 		    tep->te_hash_hndl) == 0) {
5562 			/*
5563 			 * found free address
5564 			 */
5565 			tep->te_flag |= TL_ADDRHASHED;
5566 			tep->te_hash_hndl = NULL;
5567 
5568 			return (B_TRUE); /* successful return */
5569 		}
5570 		/*
5571 		 * Use default address.
5572 		 */
5573 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5574 		atomic_inc_32(&tep->te_defaddr);
5575 	}
5576 
5577 	/*
5578 	 * Failed to find anything.
5579 	 */
5580 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5581 	    "tl_get_any_addr:looped 2^32 times"));
5582 	return (B_FALSE);
5583 }
5584 
5585 /*
5586  * reallocb + set r/w ptrs to reflect size.
5587  */
5588 static mblk_t *
tl_resizemp(mblk_t * mp,ssize_t new_size)5589 tl_resizemp(mblk_t *mp, ssize_t new_size)
5590 {
5591 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5592 		return (NULL);
5593 
5594 	mp->b_rptr = DB_BASE(mp);
5595 	mp->b_wptr = mp->b_rptr + new_size;
5596 	return (mp);
5597 }
5598 
5599 static void
tl_cl_backenable(tl_endpt_t * tep)5600 tl_cl_backenable(tl_endpt_t *tep)
5601 {
5602 	list_t *l = &tep->te_flowlist;
5603 	tl_endpt_t *elp;
5604 
5605 	ASSERT(IS_CLTS(tep));
5606 
5607 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5608 		ASSERT(tep->te_ser == elp->te_ser);
5609 		ASSERT(elp->te_flowq == tep);
5610 		if (!elp->te_closing)
5611 			TL_QENABLE(elp);
5612 		elp->te_flowq = NULL;
5613 		list_remove(l, elp);
5614 	}
5615 }
5616 
5617 /*
5618  * Unconnect endpoints.
5619  */
5620 static void
tl_co_unconnect(tl_endpt_t * tep)5621 tl_co_unconnect(tl_endpt_t *tep)
5622 {
5623 	tl_endpt_t	*peer_tep = tep->te_conp;
5624 	tl_endpt_t	*srv_tep = tep->te_oconp;
5625 	list_t		*l;
5626 	tl_icon_t	*tip;
5627 	tl_endpt_t	*cl_tep;
5628 	mblk_t		*d_mp;
5629 
5630 	ASSERT(IS_COTS(tep));
5631 	/*
5632 	 * If our peer is closing, don't use it.
5633 	 */
5634 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5635 		TL_UNCONNECT(tep->te_conp);
5636 		peer_tep = NULL;
5637 	}
5638 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5639 		TL_UNCONNECT(tep->te_oconp);
5640 		srv_tep = NULL;
5641 	}
5642 
5643 	if (tep->te_nicon > 0) {
5644 		l = &tep->te_iconp;
5645 		/*
5646 		 * If incoming requests pending, change state
5647 		 * of clients on disconnect ind event and send
5648 		 * discon_ind pdu to modules above them
5649 		 * for server: all clients get disconnect
5650 		 */
5651 
5652 		while (tep->te_nicon > 0) {
5653 			tip    = list_head(l);
5654 			cl_tep = tip->ti_tep;
5655 
5656 			if (cl_tep == NULL) {
5657 				tl_freetip(tep, tip);
5658 				continue;
5659 			}
5660 
5661 			if (cl_tep->te_oconp != NULL) {
5662 				ASSERT(cl_tep != cl_tep->te_oconp);
5663 				TL_UNCONNECT(cl_tep->te_oconp);
5664 			}
5665 
5666 			if (cl_tep->te_closing) {
5667 				tl_freetip(tep, tip);
5668 				continue;
5669 			}
5670 
5671 			enableok(cl_tep->te_wq);
5672 			TL_QENABLE(cl_tep);
5673 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5674 			if (d_mp != NULL) {
5675 				cl_tep->te_state = TS_IDLE;
5676 				putnext(cl_tep->te_rq, d_mp);
5677 			} else {
5678 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5679 				    SL_TRACE | SL_ERROR,
5680 				    "tl_co_unconnect:icmng: "
5681 				    "allocb failure"));
5682 			}
5683 			tl_freetip(tep, tip);
5684 		}
5685 	} else if (srv_tep != NULL) {
5686 		/*
5687 		 * If outgoing request pending, change state
5688 		 * of server on discon ind event
5689 		 */
5690 
5691 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5692 		    IS_COTSORD(srv_tep) &&
5693 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5694 			/*
5695 			 * Queue ordrel_ind for server to be picked up
5696 			 * when the connection is accepted.
5697 			 */
5698 			d_mp = tl_ordrel_ind_alloc();
5699 		} else {
5700 			/*
5701 			 * send discon_ind to server
5702 			 */
5703 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5704 		}
5705 		if (d_mp == NULL) {
5706 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5707 			    SL_TRACE | SL_ERROR,
5708 			    "tl_co_unconnect:outgoing:allocb failure"));
5709 			TL_UNCONNECT(tep->te_oconp);
5710 			goto discon_peer;
5711 		}
5712 
5713 		/*
5714 		 * If this is a socket the T_DISCON_IND is queued with
5715 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5716 		 * from the list of pending connections.
5717 		 * Note that when te_oconp is set the peer better have
5718 		 * a t_connind_t for the client.
5719 		 */
5720 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5721 			/*
5722 			 * Queue the disconnection message.
5723 			 */
5724 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5725 		} else {
5726 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5727 			if (tip == NULL) {
5728 				freemsg(d_mp);
5729 			} else {
5730 				ASSERT(tep == tip->ti_tep);
5731 				ASSERT(tep->te_ser == srv_tep->te_ser);
5732 				/*
5733 				 * Delete tip from the server list.
5734 				 */
5735 				if (srv_tep->te_nicon == 1) {
5736 					srv_tep->te_state =
5737 					    nextstate[TE_DISCON_IND2]
5738 					    [srv_tep->te_state];
5739 				} else {
5740 					srv_tep->te_state =
5741 					    nextstate[TE_DISCON_IND3]
5742 					    [srv_tep->te_state];
5743 				}
5744 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5745 				    T_DISCON_IND);
5746 				putnext(srv_tep->te_rq, d_mp);
5747 				tl_freetip(srv_tep, tip);
5748 			}
5749 			TL_UNCONNECT(tep->te_oconp);
5750 			srv_tep = NULL;
5751 		}
5752 	} else if (peer_tep != NULL) {
5753 		/*
5754 		 * unconnect existing connection
5755 		 * If connected, change state of peer on
5756 		 * discon ind event and send discon ind pdu
5757 		 * to module above it
5758 		 */
5759 
5760 		ASSERT(tep->te_ser == peer_tep->te_ser);
5761 		if (IS_COTSORD(peer_tep) &&
5762 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5763 		    peer_tep->te_state == TS_DATA_XFER)) {
5764 			/*
5765 			 * send ordrel ind
5766 			 */
5767 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5768 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5769 			    peer_tep->te_state,
5770 			    nextstate[TE_ORDREL_IND][peer_tep->te_state]));
5771 			d_mp = tl_ordrel_ind_alloc();
5772 			if (d_mp == NULL) {
5773 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5774 				    SL_TRACE | SL_ERROR,
5775 				    "tl_co_unconnect:connected:"
5776 				    "allocb failure"));
5777 				/*
5778 				 * Continue with cleaning up peer as
5779 				 * this side may go away with the close
5780 				 */
5781 				TL_QENABLE(peer_tep);
5782 				goto discon_peer;
5783 			}
5784 			peer_tep->te_state =
5785 			    nextstate[TE_ORDREL_IND][peer_tep->te_state];
5786 
5787 			putnext(peer_tep->te_rq, d_mp);
5788 			/*
5789 			 * Handle flow control case.  This will generate
5790 			 * a t_discon_ind message with reason 0 if there
5791 			 * is data queued on the write side.
5792 			 */
5793 			TL_QENABLE(peer_tep);
5794 		} else if (IS_COTSORD(peer_tep) &&
5795 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5796 			/*
5797 			 * Sent an ordrel_ind. We send a discon with
5798 			 * with error 0 to inform that the peer is gone.
5799 			 */
5800 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5801 			    SL_TRACE | SL_ERROR,
5802 			    "tl_co_unconnect: discon in state %d",
5803 			    tep->te_state));
5804 			tl_discon_ind(peer_tep, 0);
5805 		} else {
5806 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5807 			    SL_TRACE | SL_ERROR,
5808 			    "tl_co_unconnect: state %d", tep->te_state));
5809 			tl_discon_ind(peer_tep, ECONNRESET);
5810 		}
5811 
5812 discon_peer:
5813 		/*
5814 		 * Disconnect cross-pointers only for close
5815 		 */
5816 		if (tep->te_closing) {
5817 			peer_tep = tep->te_conp;
5818 			TL_REMOVE_PEER(peer_tep->te_conp);
5819 			TL_REMOVE_PEER(tep->te_conp);
5820 		}
5821 	}
5822 }
5823 
5824 /*
5825  * Note: The following routine does not recover from allocb()
5826  * failures
5827  * The reason should be from the <sys/errno.h> space.
5828  */
5829 static void
tl_discon_ind(tl_endpt_t * tep,uint32_t reason)5830 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5831 {
5832 	mblk_t *d_mp;
5833 
5834 	if (tep->te_closing)
5835 		return;
5836 
5837 	/*
5838 	 * flush the queues.
5839 	 */
5840 	flushq(tep->te_rq, FLUSHDATA);
5841 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5842 
5843 	/*
5844 	 * send discon ind
5845 	 */
5846 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5847 	if (d_mp == NULL) {
5848 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5849 		    "tl_discon_ind:allocb failure"));
5850 		return;
5851 	}
5852 	tep->te_state = TS_IDLE;
5853 	putnext(tep->te_rq, d_mp);
5854 }
5855 
5856 /*
5857  * Note: The following routine does not recover from allocb()
5858  * failures
5859  * The reason should be from the <sys/errno.h> space.
5860  */
5861 static mblk_t *
tl_discon_ind_alloc(uint32_t reason,t_scalar_t seqnum)5862 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5863 {
5864 	mblk_t *mp;
5865 	struct T_discon_ind *tdi;
5866 
5867 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5868 		DB_TYPE(mp) = M_PROTO;
5869 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5870 		tdi = (struct T_discon_ind *)mp->b_rptr;
5871 		tdi->PRIM_type = T_DISCON_IND;
5872 		tdi->DISCON_reason = reason;
5873 		tdi->SEQ_number = seqnum;
5874 	}
5875 	return (mp);
5876 }
5877 
5878 
5879 /*
5880  * Note: The following routine does not recover from allocb()
5881  * failures
5882  */
5883 static mblk_t *
tl_ordrel_ind_alloc(void)5884 tl_ordrel_ind_alloc(void)
5885 {
5886 	mblk_t *mp;
5887 	struct T_ordrel_ind *toi;
5888 
5889 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5890 		DB_TYPE(mp) = M_PROTO;
5891 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5892 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5893 		toi->PRIM_type = T_ORDREL_IND;
5894 	}
5895 	return (mp);
5896 }
5897 
5898 
5899 /*
5900  * Lookup the seqno in the list of queued connections.
5901  */
5902 static tl_icon_t *
tl_icon_find(tl_endpt_t * tep,t_scalar_t seqno)5903 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5904 {
5905 	list_t *l = &tep->te_iconp;
5906 	tl_icon_t *tip = list_head(l);
5907 
5908 	ASSERT(seqno != 0);
5909 
5910 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5911 		;
5912 
5913 	return (tip);
5914 }
5915 
5916 /*
5917  * Queue data for a given T_CONN_IND while verifying that redundant
5918  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5919  * Used when the originator of the connection closes.
5920  */
5921 static void
tl_icon_queuemsg(tl_endpt_t * tep,t_scalar_t seqno,mblk_t * nmp)5922 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5923 {
5924 	tl_icon_t		*tip;
5925 	mblk_t			**mpp, *mp;
5926 	int			prim, nprim;
5927 
5928 	if (nmp->b_datap->db_type == M_PROTO)
5929 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5930 	else
5931 		nprim = -1;	/* M_DATA */
5932 
5933 	tip = tl_icon_find(tep, seqno);
5934 	if (tip == NULL) {
5935 		freemsg(nmp);
5936 		return;
5937 	}
5938 
5939 	ASSERT(tip->ti_seqno != 0);
5940 	mpp = &tip->ti_mp;
5941 	while (*mpp != NULL) {
5942 		mp = *mpp;
5943 
5944 		if (mp->b_datap->db_type == M_PROTO)
5945 			prim = ((union T_primitives *)mp->b_rptr)->type;
5946 		else
5947 			prim = -1;	/* M_DATA */
5948 
5949 		/*
5950 		 * Allow nothing after a T_DISCON_IND
5951 		 */
5952 		if (prim == T_DISCON_IND) {
5953 			freemsg(nmp);
5954 			return;
5955 		}
5956 		/*
5957 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5958 		 */
5959 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5960 			freemsg(nmp);
5961 			return;
5962 		}
5963 		mpp = &(mp->b_next);
5964 	}
5965 	*mpp = nmp;
5966 }
5967 
5968 /*
5969  * Verify if a certain TPI primitive exists on the connind queue.
5970  * Use prim -1 for M_DATA.
5971  * Return non-zero if found.
5972  */
5973 static boolean_t
tl_icon_hasprim(tl_endpt_t * tep,t_scalar_t seqno,t_scalar_t prim)5974 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5975 {
5976 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5977 	boolean_t found = B_FALSE;
5978 
5979 	if (tip != NULL) {
5980 		mblk_t *mp;
5981 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5982 			found = (DB_TYPE(mp) == M_PROTO &&
5983 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5984 		}
5985 	}
5986 	return (found);
5987 }
5988 
5989 /*
5990  * Send the b_next mblk chain that has accumulated before the connection
5991  * was accepted. Perform the necessary state transitions.
5992  */
5993 static void
tl_icon_sendmsgs(tl_endpt_t * tep,mblk_t ** mpp)5994 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5995 {
5996 	mblk_t			*mp;
5997 	union T_primitives	*primp;
5998 
5999 	if (tep->te_closing) {
6000 		tl_icon_freemsgs(mpp);
6001 		return;
6002 	}
6003 
6004 	ASSERT(tep->te_state == TS_DATA_XFER);
6005 	ASSERT(tep->te_rq->q_first == NULL);
6006 
6007 	while ((mp = *mpp) != NULL) {
6008 		*mpp = mp->b_next;
6009 		mp->b_next = NULL;
6010 
6011 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
6012 		switch (DB_TYPE(mp)) {
6013 		default:
6014 			freemsg(mp);
6015 			break;
6016 		case M_DATA:
6017 			putnext(tep->te_rq, mp);
6018 			break;
6019 		case M_PROTO:
6020 			primp = (union T_primitives *)mp->b_rptr;
6021 			switch (primp->type) {
6022 			case T_UNITDATA_IND:
6023 			case T_DATA_IND:
6024 			case T_OPTDATA_IND:
6025 			case T_EXDATA_IND:
6026 				putnext(tep->te_rq, mp);
6027 				break;
6028 			case T_ORDREL_IND:
6029 				tep->te_state = nextstate[TE_ORDREL_IND]
6030 				    [tep->te_state];
6031 				putnext(tep->te_rq, mp);
6032 				break;
6033 			case T_DISCON_IND:
6034 				tep->te_state = TS_IDLE;
6035 				putnext(tep->te_rq, mp);
6036 				break;
6037 			default:
6038 #ifdef DEBUG
6039 				cmn_err(CE_PANIC,
6040 				    "tl_icon_sendmsgs: unknown primitive");
6041 #endif /* DEBUG */
6042 				freemsg(mp);
6043 				break;
6044 			}
6045 			break;
6046 		}
6047 	}
6048 }
6049 
6050 /*
6051  * Free the b_next mblk chain that has accumulated before the connection
6052  * was accepted.
6053  */
6054 static void
tl_icon_freemsgs(mblk_t ** mpp)6055 tl_icon_freemsgs(mblk_t **mpp)
6056 {
6057 	mblk_t *mp;
6058 
6059 	while ((mp = *mpp) != NULL) {
6060 		*mpp = mp->b_next;
6061 		mp->b_next = NULL;
6062 		freemsg(mp);
6063 	}
6064 }
6065 
6066 /*
6067  * Send M_ERROR
6068  * Note: assumes caller ensured enough space in mp or enough
6069  *	memory available. Does not attempt recovery from allocb()
6070  *	failures
6071  */
6072 
6073 static void
tl_merror(queue_t * wq,mblk_t * mp,int error)6074 tl_merror(queue_t *wq, mblk_t *mp, int error)
6075 {
6076 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6077 
6078 	if (tep->te_closing) {
6079 		freemsg(mp);
6080 		return;
6081 	}
6082 
6083 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
6084 	    SL_TRACE | SL_ERROR,
6085 	    "tl_merror: tep=%p, err=%d", (void *)tep, error));
6086 
6087 	/*
6088 	 * flush all messages on queue. we are shutting
6089 	 * the stream down on fatal error
6090 	 */
6091 	flushq(wq, FLUSHALL);
6092 	if (IS_COTS(tep)) {
6093 		/* connection oriented - unconnect endpoints */
6094 		tl_co_unconnect(tep);
6095 	}
6096 	if (mp->b_cont) {
6097 		freemsg(mp->b_cont);
6098 		mp->b_cont = NULL;
6099 	}
6100 
6101 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6102 		freemsg(mp);
6103 		mp = allocb(1, BPRI_HI);
6104 		if (mp == NULL) {
6105 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6106 			    SL_TRACE | SL_ERROR,
6107 			    "tl_merror:M_PROTO: out of memory"));
6108 			return;
6109 		}
6110 	}
6111 	if (mp) {
6112 		DB_TYPE(mp) = M_ERROR;
6113 		mp->b_rptr = DB_BASE(mp);
6114 		*mp->b_rptr = (char)error;
6115 		mp->b_wptr = mp->b_rptr + sizeof (char);
6116 		qreply(wq, mp);
6117 	} else {
6118 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
6119 	}
6120 }
6121 
6122 static void
tl_fill_option(uchar_t * buf,cred_t * cr,pid_t cpid,int flag,cred_t * pcr)6123 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6124 {
6125 	ASSERT(cr != NULL);
6126 
6127 	if (flag & TL_SETCRED) {
6128 		struct opthdr *opt = (struct opthdr *)buf;
6129 		tl_credopt_t *tlcred;
6130 
6131 		opt->level = TL_PROT_LEVEL;
6132 		opt->name = TL_OPT_PEER_CRED;
6133 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6134 
6135 		tlcred = (tl_credopt_t *)(opt + 1);
6136 		tlcred->tc_uid = crgetuid(cr);
6137 		tlcred->tc_gid = crgetgid(cr);
6138 		tlcred->tc_ruid = crgetruid(cr);
6139 		tlcred->tc_rgid = crgetrgid(cr);
6140 		tlcred->tc_suid = crgetsuid(cr);
6141 		tlcred->tc_sgid = crgetsgid(cr);
6142 		tlcred->tc_ngroups = crgetngroups(cr);
6143 	} else if (flag & TL_SETUCRED) {
6144 		struct opthdr *opt = (struct opthdr *)buf;
6145 
6146 		opt->level = TL_PROT_LEVEL;
6147 		opt->name = TL_OPT_PEER_UCRED;
6148 		opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6149 
6150 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6151 	} else {
6152 		struct T_opthdr *topt = (struct T_opthdr *)buf;
6153 		ASSERT(flag & TL_SOCKUCRED);
6154 
6155 		topt->level = SOL_SOCKET;
6156 		topt->name = SCM_UCRED;
6157 		topt->len = ucredminsize(cr) + sizeof (*topt);
6158 		topt->status = 0;
6159 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6160 	}
6161 }
6162 
6163 /* ARGSUSED */
6164 static int
tl_default_opt(queue_t * wq,int level,int name,uchar_t * ptr)6165 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6166 {
6167 	/* no default value processed in protocol specific code currently */
6168 	return (-1);
6169 }
6170 
6171 /* ARGSUSED */
6172 static int
tl_get_opt(queue_t * wq,int level,int name,uchar_t * ptr)6173 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6174 {
6175 	int len;
6176 	tl_endpt_t *tep;
6177 	int *valp;
6178 
6179 	tep = (tl_endpt_t *)wq->q_ptr;
6180 
6181 	len = 0;
6182 
6183 	/*
6184 	 * Assumes: option level and name sanity check done elsewhere
6185 	 */
6186 
6187 	switch (level) {
6188 	case SOL_SOCKET:
6189 		if (!IS_SOCKET(tep))
6190 			break;
6191 		switch (name) {
6192 		case SO_RECVUCRED:
6193 			len = sizeof (int);
6194 			valp = (int *)ptr;
6195 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6196 			break;
6197 		default:
6198 			break;
6199 		}
6200 		break;
6201 	case TL_PROT_LEVEL:
6202 		switch (name) {
6203 		case TL_OPT_PEER_CRED:
6204 		case TL_OPT_PEER_UCRED:
6205 			/*
6206 			 * option not supposed to retrieved directly
6207 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6208 			 * when some internal flags set by other options
6209 			 * Direct retrieval always designed to fail(ignored)
6210 			 * for this option.
6211 			 */
6212 			break;
6213 		}
6214 	}
6215 	return (len);
6216 }
6217 
6218 /* ARGSUSED */
6219 static int
tl_set_opt(queue_t * wq,uint_t mgmt_flags,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)6220 tl_set_opt(queue_t *wq, uint_t mgmt_flags, int level, int name, uint_t inlen,
6221     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs,
6222     cred_t *cr)
6223 {
6224 	int error;
6225 	tl_endpt_t *tep;
6226 
6227 	tep = (tl_endpt_t *)wq->q_ptr;
6228 
6229 	error = 0;		/* NOERROR */
6230 
6231 	/*
6232 	 * Assumes: option level and name sanity checks done elsewhere
6233 	 */
6234 
6235 	switch (level) {
6236 	case SOL_SOCKET:
6237 		if (!IS_SOCKET(tep)) {
6238 			error = EINVAL;
6239 			break;
6240 		}
6241 		/*
6242 		 * TBD: fill in other AF_UNIX socket options and then stop
6243 		 * returning error.
6244 		 */
6245 		switch (name) {
6246 		case SO_RECVUCRED:
6247 			/*
6248 			 * We only support this for datagram sockets;
6249 			 * getpeerucred handles the connection oriented
6250 			 * transports.
6251 			 */
6252 			if (!IS_CLTS(tep)) {
6253 				error = EINVAL;
6254 				break;
6255 			}
6256 			if (*(int *)invalp == 0)
6257 				tep->te_flag &= ~TL_SOCKUCRED;
6258 			else
6259 				tep->te_flag |= TL_SOCKUCRED;
6260 			break;
6261 		default:
6262 			error = EINVAL;
6263 			break;
6264 		}
6265 		break;
6266 	case TL_PROT_LEVEL:
6267 		switch (name) {
6268 		case TL_OPT_PEER_CRED:
6269 		case TL_OPT_PEER_UCRED:
6270 			/*
6271 			 * option not supposed to be set directly
6272 			 * Its value in initialized for each endpoint at
6273 			 * driver open time.
6274 			 * Direct setting always designed to fail for this
6275 			 * option.
6276 			 */
6277 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6278 			    SL_TRACE | SL_ERROR,
6279 			    "tl_set_opt: option is not supported"));
6280 			error = EPROTO;
6281 			break;
6282 		}
6283 	}
6284 	return (error);
6285 }
6286 
6287 
6288 static void
tl_timer(void * arg)6289 tl_timer(void *arg)
6290 {
6291 	queue_t *wq = arg;
6292 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6293 
6294 	ASSERT(tep);
6295 
6296 	tep->te_timoutid = 0;
6297 
6298 	enableok(wq);
6299 	/*
6300 	 * Note: can call wsrv directly here and save context switch
6301 	 * Consider change when qtimeout (not timeout) is active
6302 	 */
6303 	qenable(wq);
6304 }
6305 
6306 static void
tl_buffer(void * arg)6307 tl_buffer(void *arg)
6308 {
6309 	queue_t *wq = arg;
6310 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6311 
6312 	ASSERT(tep);
6313 
6314 	tep->te_bufcid = 0;
6315 	tep->te_nowsrv = B_FALSE;
6316 
6317 	enableok(wq);
6318 	/*
6319 	 *  Note: can call wsrv directly here and save context switch
6320 	 * Consider change when qbufcall (not bufcall) is active
6321 	 */
6322 	qenable(wq);
6323 }
6324 
6325 static void
tl_memrecover(queue_t * wq,mblk_t * mp,size_t size)6326 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6327 {
6328 	tl_endpt_t *tep;
6329 
6330 	tep = (tl_endpt_t *)wq->q_ptr;
6331 
6332 	if (tep->te_closing) {
6333 		freemsg(mp);
6334 		return;
6335 	}
6336 	noenable(wq);
6337 
6338 	(void) insq(wq, wq->q_first, mp);
6339 
6340 	if (tep->te_bufcid || tep->te_timoutid) {
6341 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
6342 		    "tl_memrecover:recover %p pending", (void *)wq));
6343 		return;
6344 	}
6345 
6346 	tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq);
6347 	if (tep->te_bufcid == NULL) {
6348 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6349 		    drv_usectohz(TL_BUFWAIT));
6350 	}
6351 }
6352 
6353 static void
tl_freetip(tl_endpt_t * tep,tl_icon_t * tip)6354 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6355 {
6356 	ASSERT(tip->ti_seqno != 0);
6357 
6358 	if (tip->ti_mp != NULL) {
6359 		tl_icon_freemsgs(&tip->ti_mp);
6360 		tip->ti_mp = NULL;
6361 	}
6362 	if (tip->ti_tep != NULL) {
6363 		tl_refrele(tip->ti_tep);
6364 		tip->ti_tep = NULL;
6365 	}
6366 	list_remove(&tep->te_iconp, tip);
6367 	kmem_free(tip, sizeof (tl_icon_t));
6368 	tep->te_nicon--;
6369 }
6370 
6371 /*
6372  * Remove address from address hash.
6373  */
6374 static void
tl_addr_unbind(tl_endpt_t * tep)6375 tl_addr_unbind(tl_endpt_t *tep)
6376 {
6377 	tl_endpt_t *elp;
6378 
6379 	if (tep->te_flag & TL_ADDRHASHED) {
6380 		if (IS_SOCKET(tep)) {
6381 			(void) mod_hash_remove(tep->te_addrhash,
6382 			    (mod_hash_key_t)tep->te_vp,
6383 			    (mod_hash_val_t *)&elp);
6384 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6385 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6386 		} else {
6387 			(void) mod_hash_remove(tep->te_addrhash,
6388 			    (mod_hash_key_t)&tep->te_ap,
6389 			    (mod_hash_val_t *)&elp);
6390 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6391 			tep->te_alen = -1;
6392 			tep->te_abuf = NULL;
6393 		}
6394 		tep->te_flag &= ~TL_ADDRHASHED;
6395 	}
6396 }
6397