xref: /titanic_52/usr/src/uts/common/inet/tcp/tcp_tpi.c (revision dd49f125507979bb2ab505a8daf2a46d1be27051)
1439b3deaSKacheong Poon /*
2439b3deaSKacheong Poon  * CDDL HEADER START
3439b3deaSKacheong Poon  *
4439b3deaSKacheong Poon  * The contents of this file are subject to the terms of the
5439b3deaSKacheong Poon  * Common Development and Distribution License (the "License").
6439b3deaSKacheong Poon  * You may not use this file except in compliance with the License.
7439b3deaSKacheong Poon  *
8439b3deaSKacheong Poon  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9439b3deaSKacheong Poon  * or http://www.opensolaris.org/os/licensing.
10439b3deaSKacheong Poon  * See the License for the specific language governing permissions
11439b3deaSKacheong Poon  * and limitations under the License.
12439b3deaSKacheong Poon  *
13439b3deaSKacheong Poon  * When distributing Covered Code, include this CDDL HEADER in each
14439b3deaSKacheong Poon  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15439b3deaSKacheong Poon  * If applicable, add the following below this CDDL HEADER, with the
16439b3deaSKacheong Poon  * fields enclosed by brackets "[]" replaced with your own identifying
17439b3deaSKacheong Poon  * information: Portions Copyright [yyyy] [name of copyright owner]
18439b3deaSKacheong Poon  *
19439b3deaSKacheong Poon  * CDDL HEADER END
20439b3deaSKacheong Poon  */
21439b3deaSKacheong Poon 
22439b3deaSKacheong Poon /*
233e95bd4aSAnders Persson  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24439b3deaSKacheong Poon  */
25439b3deaSKacheong Poon 
26439b3deaSKacheong Poon /* This files contains all TCP TLI/TPI related functions */
27439b3deaSKacheong Poon 
28439b3deaSKacheong Poon #include <sys/types.h>
29439b3deaSKacheong Poon #include <sys/stream.h>
30439b3deaSKacheong Poon #include <sys/strsun.h>
31439b3deaSKacheong Poon #include <sys/strsubr.h>
32439b3deaSKacheong Poon #include <sys/stropts.h>
33439b3deaSKacheong Poon #include <sys/strlog.h>
34439b3deaSKacheong Poon #define	_SUN_TPI_VERSION 2
35439b3deaSKacheong Poon #include <sys/tihdr.h>
36439b3deaSKacheong Poon #include <sys/suntpi.h>
37439b3deaSKacheong Poon #include <sys/xti_inet.h>
38439b3deaSKacheong Poon #include <sys/squeue_impl.h>
39439b3deaSKacheong Poon #include <sys/squeue.h>
40439b3deaSKacheong Poon 
41439b3deaSKacheong Poon #include <inet/common.h>
42439b3deaSKacheong Poon #include <inet/ip.h>
43439b3deaSKacheong Poon #include <inet/tcp.h>
44439b3deaSKacheong Poon #include <inet/tcp_impl.h>
45439b3deaSKacheong Poon #include <inet/proto_set.h>
46439b3deaSKacheong Poon 
47439b3deaSKacheong Poon static void	tcp_accept_swap(tcp_t *, tcp_t *, tcp_t *);
48439b3deaSKacheong Poon static int	tcp_conprim_opt_process(tcp_t *, mblk_t *, int *, int *, int *);
49439b3deaSKacheong Poon 
50439b3deaSKacheong Poon void
51439b3deaSKacheong Poon tcp_use_pure_tpi(tcp_t *tcp)
52439b3deaSKacheong Poon {
53439b3deaSKacheong Poon 	conn_t		*connp = tcp->tcp_connp;
54439b3deaSKacheong Poon 
55439b3deaSKacheong Poon #ifdef	_ILP32
56439b3deaSKacheong Poon 	tcp->tcp_acceptor_id = (t_uscalar_t)connp->conn_rq;
57439b3deaSKacheong Poon #else
58439b3deaSKacheong Poon 	tcp->tcp_acceptor_id = connp->conn_dev;
59439b3deaSKacheong Poon #endif
60439b3deaSKacheong Poon 	/*
61439b3deaSKacheong Poon 	 * Insert this socket into the acceptor hash.
62439b3deaSKacheong Poon 	 * We might need it for T_CONN_RES message
63439b3deaSKacheong Poon 	 */
64439b3deaSKacheong Poon 	tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp);
65439b3deaSKacheong Poon 
66439b3deaSKacheong Poon 	tcp->tcp_issocket = B_FALSE;
67439b3deaSKacheong Poon 	TCP_STAT(tcp->tcp_tcps, tcp_sock_fallback);
68439b3deaSKacheong Poon }
69439b3deaSKacheong Poon 
70439b3deaSKacheong Poon /* Shorthand to generate and send TPI error acks to our client */
71439b3deaSKacheong Poon void
72439b3deaSKacheong Poon tcp_err_ack(tcp_t *tcp, mblk_t *mp, int t_error, int sys_error)
73439b3deaSKacheong Poon {
74439b3deaSKacheong Poon 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
75439b3deaSKacheong Poon 		putnext(tcp->tcp_connp->conn_rq, mp);
76439b3deaSKacheong Poon }
77439b3deaSKacheong Poon 
78439b3deaSKacheong Poon /* Shorthand to generate and send TPI error acks to our client */
79439b3deaSKacheong Poon void
80439b3deaSKacheong Poon tcp_err_ack_prim(tcp_t *tcp, mblk_t *mp, int primitive,
81439b3deaSKacheong Poon     int t_error, int sys_error)
82439b3deaSKacheong Poon {
83439b3deaSKacheong Poon 	struct T_error_ack	*teackp;
84439b3deaSKacheong Poon 
85439b3deaSKacheong Poon 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
86439b3deaSKacheong Poon 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
87439b3deaSKacheong Poon 		teackp = (struct T_error_ack *)mp->b_rptr;
88439b3deaSKacheong Poon 		teackp->ERROR_prim = primitive;
89439b3deaSKacheong Poon 		teackp->TLI_error = t_error;
90439b3deaSKacheong Poon 		teackp->UNIX_error = sys_error;
91439b3deaSKacheong Poon 		putnext(tcp->tcp_connp->conn_rq, mp);
92439b3deaSKacheong Poon 	}
93439b3deaSKacheong Poon }
94439b3deaSKacheong Poon 
95439b3deaSKacheong Poon /*
96439b3deaSKacheong Poon  * TCP routine to get the values of options.
97439b3deaSKacheong Poon  */
98439b3deaSKacheong Poon int
99439b3deaSKacheong Poon tcp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
100439b3deaSKacheong Poon {
101439b3deaSKacheong Poon 	return (tcp_opt_get(Q_TO_CONN(q), level, name, ptr));
102439b3deaSKacheong Poon }
103439b3deaSKacheong Poon 
104439b3deaSKacheong Poon /* ARGSUSED */
105439b3deaSKacheong Poon int
106439b3deaSKacheong Poon tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
107439b3deaSKacheong Poon     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
108439b3deaSKacheong Poon     void *thisdg_attrs, cred_t *cr)
109439b3deaSKacheong Poon {
110439b3deaSKacheong Poon 	conn_t	*connp =  Q_TO_CONN(q);
111439b3deaSKacheong Poon 
112439b3deaSKacheong Poon 	return (tcp_opt_set(connp, optset_context, level, name, inlen, invalp,
113439b3deaSKacheong Poon 	    outlenp, outvalp, thisdg_attrs, cr));
114439b3deaSKacheong Poon }
115439b3deaSKacheong Poon 
116439b3deaSKacheong Poon static int
117439b3deaSKacheong Poon tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp,
118439b3deaSKacheong Poon     int *t_errorp, int *sys_errorp)
119439b3deaSKacheong Poon {
120439b3deaSKacheong Poon 	int error;
121439b3deaSKacheong Poon 	int is_absreq_failure;
122439b3deaSKacheong Poon 	t_scalar_t *opt_lenp;
123439b3deaSKacheong Poon 	t_scalar_t opt_offset;
124439b3deaSKacheong Poon 	int prim_type;
125439b3deaSKacheong Poon 	struct T_conn_req *tcreqp;
126439b3deaSKacheong Poon 	struct T_conn_res *tcresp;
127439b3deaSKacheong Poon 	cred_t *cr;
128439b3deaSKacheong Poon 
129439b3deaSKacheong Poon 	/*
130439b3deaSKacheong Poon 	 * All Solaris components should pass a db_credp
131439b3deaSKacheong Poon 	 * for this TPI message, hence we ASSERT.
132439b3deaSKacheong Poon 	 * But in case there is some other M_PROTO that looks
133439b3deaSKacheong Poon 	 * like a TPI message sent by some other kernel
134439b3deaSKacheong Poon 	 * component, we check and return an error.
135439b3deaSKacheong Poon 	 */
136439b3deaSKacheong Poon 	cr = msg_getcred(mp, NULL);
137439b3deaSKacheong Poon 	ASSERT(cr != NULL);
138439b3deaSKacheong Poon 	if (cr == NULL)
139439b3deaSKacheong Poon 		return (-1);
140439b3deaSKacheong Poon 
141439b3deaSKacheong Poon 	prim_type = ((union T_primitives *)mp->b_rptr)->type;
142439b3deaSKacheong Poon 	ASSERT(prim_type == T_CONN_REQ || prim_type == O_T_CONN_RES ||
143439b3deaSKacheong Poon 	    prim_type == T_CONN_RES);
144439b3deaSKacheong Poon 
145439b3deaSKacheong Poon 	switch (prim_type) {
146439b3deaSKacheong Poon 	case T_CONN_REQ:
147439b3deaSKacheong Poon 		tcreqp = (struct T_conn_req *)mp->b_rptr;
148439b3deaSKacheong Poon 		opt_offset = tcreqp->OPT_offset;
149439b3deaSKacheong Poon 		opt_lenp = (t_scalar_t *)&tcreqp->OPT_length;
150439b3deaSKacheong Poon 		break;
151439b3deaSKacheong Poon 	case O_T_CONN_RES:
152439b3deaSKacheong Poon 	case T_CONN_RES:
153439b3deaSKacheong Poon 		tcresp = (struct T_conn_res *)mp->b_rptr;
154439b3deaSKacheong Poon 		opt_offset = tcresp->OPT_offset;
155439b3deaSKacheong Poon 		opt_lenp = (t_scalar_t *)&tcresp->OPT_length;
156439b3deaSKacheong Poon 		break;
157439b3deaSKacheong Poon 	}
158439b3deaSKacheong Poon 
159439b3deaSKacheong Poon 	*t_errorp = 0;
160439b3deaSKacheong Poon 	*sys_errorp = 0;
161439b3deaSKacheong Poon 	*do_disconnectp = 0;
162439b3deaSKacheong Poon 
163439b3deaSKacheong Poon 	error = tpi_optcom_buf(tcp->tcp_connp->conn_wq, mp, opt_lenp,
164439b3deaSKacheong Poon 	    opt_offset, cr, &tcp_opt_obj,
165439b3deaSKacheong Poon 	    NULL, &is_absreq_failure);
166439b3deaSKacheong Poon 
167439b3deaSKacheong Poon 	switch (error) {
168439b3deaSKacheong Poon 	case  0:		/* no error */
169439b3deaSKacheong Poon 		ASSERT(is_absreq_failure == 0);
170439b3deaSKacheong Poon 		return (0);
171439b3deaSKacheong Poon 	case ENOPROTOOPT:
172439b3deaSKacheong Poon 		*t_errorp = TBADOPT;
173439b3deaSKacheong Poon 		break;
174439b3deaSKacheong Poon 	case EACCES:
175439b3deaSKacheong Poon 		*t_errorp = TACCES;
176439b3deaSKacheong Poon 		break;
177439b3deaSKacheong Poon 	default:
178439b3deaSKacheong Poon 		*t_errorp = TSYSERR; *sys_errorp = error;
179439b3deaSKacheong Poon 		break;
180439b3deaSKacheong Poon 	}
181439b3deaSKacheong Poon 	if (is_absreq_failure != 0) {
182439b3deaSKacheong Poon 		/*
183439b3deaSKacheong Poon 		 * The connection request should get the local ack
184439b3deaSKacheong Poon 		 * T_OK_ACK and then a T_DISCON_IND.
185439b3deaSKacheong Poon 		 */
186439b3deaSKacheong Poon 		*do_disconnectp = 1;
187439b3deaSKacheong Poon 	}
188439b3deaSKacheong Poon 	return (-1);
189439b3deaSKacheong Poon }
190439b3deaSKacheong Poon 
191439b3deaSKacheong Poon void
192439b3deaSKacheong Poon tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
193439b3deaSKacheong Poon {
194439b3deaSKacheong Poon 	int	error;
195439b3deaSKacheong Poon 	conn_t	*connp = tcp->tcp_connp;
196439b3deaSKacheong Poon 	struct sockaddr	*sa;
197439b3deaSKacheong Poon 	mblk_t  *mp1;
198439b3deaSKacheong Poon 	struct T_bind_req *tbr;
199439b3deaSKacheong Poon 	int	backlog;
200439b3deaSKacheong Poon 	socklen_t	len;
201439b3deaSKacheong Poon 	sin_t	*sin;
202439b3deaSKacheong Poon 	sin6_t	*sin6;
203439b3deaSKacheong Poon 	cred_t		*cr;
204439b3deaSKacheong Poon 
205439b3deaSKacheong Poon 	/*
206439b3deaSKacheong Poon 	 * All Solaris components should pass a db_credp
207439b3deaSKacheong Poon 	 * for this TPI message, hence we ASSERT.
208439b3deaSKacheong Poon 	 * But in case there is some other M_PROTO that looks
209439b3deaSKacheong Poon 	 * like a TPI message sent by some other kernel
210439b3deaSKacheong Poon 	 * component, we check and return an error.
211439b3deaSKacheong Poon 	 */
212439b3deaSKacheong Poon 	cr = msg_getcred(mp, NULL);
213439b3deaSKacheong Poon 	ASSERT(cr != NULL);
214439b3deaSKacheong Poon 	if (cr == NULL) {
215439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
216439b3deaSKacheong Poon 		return;
217439b3deaSKacheong Poon 	}
218439b3deaSKacheong Poon 
219439b3deaSKacheong Poon 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
220439b3deaSKacheong Poon 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
221439b3deaSKacheong Poon 		if (connp->conn_debug) {
222439b3deaSKacheong Poon 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
223439b3deaSKacheong Poon 			    "tcp_tpi_bind: bad req, len %u",
224439b3deaSKacheong Poon 			    (uint_t)(mp->b_wptr - mp->b_rptr));
225439b3deaSKacheong Poon 		}
226439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TPROTO, 0);
227439b3deaSKacheong Poon 		return;
228439b3deaSKacheong Poon 	}
229439b3deaSKacheong Poon 	/* Make sure the largest address fits */
230439b3deaSKacheong Poon 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
231439b3deaSKacheong Poon 	if (mp1 == NULL) {
232439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
233439b3deaSKacheong Poon 		return;
234439b3deaSKacheong Poon 	}
235439b3deaSKacheong Poon 	mp = mp1;
236439b3deaSKacheong Poon 	tbr = (struct T_bind_req *)mp->b_rptr;
237439b3deaSKacheong Poon 
238439b3deaSKacheong Poon 	backlog = tbr->CONIND_number;
239439b3deaSKacheong Poon 	len = tbr->ADDR_length;
240439b3deaSKacheong Poon 
241439b3deaSKacheong Poon 	switch (len) {
242439b3deaSKacheong Poon 	case 0:		/* request for a generic port */
243439b3deaSKacheong Poon 		tbr->ADDR_offset = sizeof (struct T_bind_req);
244439b3deaSKacheong Poon 		if (connp->conn_family == AF_INET) {
245439b3deaSKacheong Poon 			tbr->ADDR_length = sizeof (sin_t);
246439b3deaSKacheong Poon 			sin = (sin_t *)&tbr[1];
247439b3deaSKacheong Poon 			*sin = sin_null;
248439b3deaSKacheong Poon 			sin->sin_family = AF_INET;
249439b3deaSKacheong Poon 			sa = (struct sockaddr *)sin;
250439b3deaSKacheong Poon 			len = sizeof (sin_t);
251439b3deaSKacheong Poon 			mp->b_wptr = (uchar_t *)&sin[1];
252439b3deaSKacheong Poon 		} else {
253439b3deaSKacheong Poon 			ASSERT(connp->conn_family == AF_INET6);
254439b3deaSKacheong Poon 			tbr->ADDR_length = sizeof (sin6_t);
255439b3deaSKacheong Poon 			sin6 = (sin6_t *)&tbr[1];
256439b3deaSKacheong Poon 			*sin6 = sin6_null;
257439b3deaSKacheong Poon 			sin6->sin6_family = AF_INET6;
258439b3deaSKacheong Poon 			sa = (struct sockaddr *)sin6;
259439b3deaSKacheong Poon 			len = sizeof (sin6_t);
260439b3deaSKacheong Poon 			mp->b_wptr = (uchar_t *)&sin6[1];
261439b3deaSKacheong Poon 		}
262439b3deaSKacheong Poon 		break;
263439b3deaSKacheong Poon 
264439b3deaSKacheong Poon 	case sizeof (sin_t):    /* Complete IPv4 address */
265439b3deaSKacheong Poon 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
266439b3deaSKacheong Poon 		    sizeof (sin_t));
267439b3deaSKacheong Poon 		break;
268439b3deaSKacheong Poon 
269439b3deaSKacheong Poon 	case sizeof (sin6_t): /* Complete IPv6 address */
270439b3deaSKacheong Poon 		sa = (struct sockaddr *)mi_offset_param(mp,
271439b3deaSKacheong Poon 		    tbr->ADDR_offset, sizeof (sin6_t));
272439b3deaSKacheong Poon 		break;
273439b3deaSKacheong Poon 
274439b3deaSKacheong Poon 	default:
275439b3deaSKacheong Poon 		if (connp->conn_debug) {
276439b3deaSKacheong Poon 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
277439b3deaSKacheong Poon 			    "tcp_tpi_bind: bad address length, %d",
278439b3deaSKacheong Poon 			    tbr->ADDR_length);
279439b3deaSKacheong Poon 		}
280439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TBADADDR, 0);
281439b3deaSKacheong Poon 		return;
282439b3deaSKacheong Poon 	}
283439b3deaSKacheong Poon 
284439b3deaSKacheong Poon 	if (backlog > 0) {
285439b3deaSKacheong Poon 		error = tcp_do_listen(connp, sa, len, backlog, DB_CRED(mp),
286439b3deaSKacheong Poon 		    tbr->PRIM_type != O_T_BIND_REQ);
287439b3deaSKacheong Poon 	} else {
288439b3deaSKacheong Poon 		error = tcp_do_bind(connp, sa, len, DB_CRED(mp),
289439b3deaSKacheong Poon 		    tbr->PRIM_type != O_T_BIND_REQ);
290439b3deaSKacheong Poon 	}
291439b3deaSKacheong Poon done:
292439b3deaSKacheong Poon 	if (error > 0) {
293439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TSYSERR, error);
294439b3deaSKacheong Poon 	} else if (error < 0) {
295439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, -error, 0);
296439b3deaSKacheong Poon 	} else {
297439b3deaSKacheong Poon 		/*
298439b3deaSKacheong Poon 		 * Update port information as sockfs/tpi needs it for checking
299439b3deaSKacheong Poon 		 */
300439b3deaSKacheong Poon 		if (connp->conn_family == AF_INET) {
301439b3deaSKacheong Poon 			sin = (sin_t *)sa;
302439b3deaSKacheong Poon 			sin->sin_port = connp->conn_lport;
303439b3deaSKacheong Poon 		} else {
304439b3deaSKacheong Poon 			sin6 = (sin6_t *)sa;
305439b3deaSKacheong Poon 			sin6->sin6_port = connp->conn_lport;
306439b3deaSKacheong Poon 		}
307439b3deaSKacheong Poon 		mp->b_datap->db_type = M_PCPROTO;
308439b3deaSKacheong Poon 		tbr->PRIM_type = T_BIND_ACK;
309439b3deaSKacheong Poon 		putnext(connp->conn_rq, mp);
310439b3deaSKacheong Poon 	}
311439b3deaSKacheong Poon }
312439b3deaSKacheong Poon 
313439b3deaSKacheong Poon /* tcp_unbind is called by tcp_wput_proto to handle T_UNBIND_REQ messages. */
314439b3deaSKacheong Poon void
315439b3deaSKacheong Poon tcp_tpi_unbind(tcp_t *tcp, mblk_t *mp)
316439b3deaSKacheong Poon {
317439b3deaSKacheong Poon 	conn_t *connp = tcp->tcp_connp;
318439b3deaSKacheong Poon 	int error;
319439b3deaSKacheong Poon 
320439b3deaSKacheong Poon 	error = tcp_do_unbind(connp);
321439b3deaSKacheong Poon 	if (error > 0) {
322439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TSYSERR, error);
323439b3deaSKacheong Poon 	} else if (error < 0) {
324439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, -error, 0);
325439b3deaSKacheong Poon 	} else {
326439b3deaSKacheong Poon 		/* Send M_FLUSH according to TPI */
327439b3deaSKacheong Poon 		(void) putnextctl1(connp->conn_rq, M_FLUSH, FLUSHRW);
328439b3deaSKacheong Poon 
329439b3deaSKacheong Poon 		mp = mi_tpi_ok_ack_alloc(mp);
330439b3deaSKacheong Poon 		if (mp != NULL)
331439b3deaSKacheong Poon 			putnext(connp->conn_rq, mp);
332439b3deaSKacheong Poon 	}
333439b3deaSKacheong Poon }
334439b3deaSKacheong Poon 
335439b3deaSKacheong Poon int
336439b3deaSKacheong Poon tcp_tpi_close(queue_t *q, int flags)
337439b3deaSKacheong Poon {
338439b3deaSKacheong Poon 	conn_t		*connp;
339439b3deaSKacheong Poon 
340439b3deaSKacheong Poon 	ASSERT(WR(q)->q_next == NULL);
341439b3deaSKacheong Poon 
342439b3deaSKacheong Poon 	if (flags & SO_FALLBACK) {
343439b3deaSKacheong Poon 		/*
344439b3deaSKacheong Poon 		 * stream is being closed while in fallback
345439b3deaSKacheong Poon 		 * simply free the resources that were allocated
346439b3deaSKacheong Poon 		 */
347439b3deaSKacheong Poon 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
348439b3deaSKacheong Poon 		qprocsoff(q);
349439b3deaSKacheong Poon 		goto done;
350439b3deaSKacheong Poon 	}
351439b3deaSKacheong Poon 
352439b3deaSKacheong Poon 	connp = Q_TO_CONN(q);
353439b3deaSKacheong Poon 	/*
354439b3deaSKacheong Poon 	 * We are being closed as /dev/tcp or /dev/tcp6.
355439b3deaSKacheong Poon 	 */
356439b3deaSKacheong Poon 	tcp_close_common(connp, flags);
357439b3deaSKacheong Poon 
358439b3deaSKacheong Poon 	qprocsoff(q);
359439b3deaSKacheong Poon 	inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
360439b3deaSKacheong Poon 
361439b3deaSKacheong Poon 	/*
362439b3deaSKacheong Poon 	 * Drop IP's reference on the conn. This is the last reference
363439b3deaSKacheong Poon 	 * on the connp if the state was less than established. If the
364439b3deaSKacheong Poon 	 * connection has gone into timewait state, then we will have
365439b3deaSKacheong Poon 	 * one ref for the TCP and one more ref (total of two) for the
366439b3deaSKacheong Poon 	 * classifier connected hash list (a timewait connections stays
367439b3deaSKacheong Poon 	 * in connected hash till closed).
368439b3deaSKacheong Poon 	 *
369439b3deaSKacheong Poon 	 * We can't assert the references because there might be other
370439b3deaSKacheong Poon 	 * transient reference places because of some walkers or queued
371439b3deaSKacheong Poon 	 * packets in squeue for the timewait state.
372439b3deaSKacheong Poon 	 */
373439b3deaSKacheong Poon 	CONN_DEC_REF(connp);
374439b3deaSKacheong Poon done:
375439b3deaSKacheong Poon 	q->q_ptr = WR(q)->q_ptr = NULL;
376439b3deaSKacheong Poon 	return (0);
377439b3deaSKacheong Poon }
378439b3deaSKacheong Poon 
379439b3deaSKacheong Poon int
380439b3deaSKacheong Poon tcp_tpi_close_accept(queue_t *q)
381439b3deaSKacheong Poon {
382439b3deaSKacheong Poon 	vmem_t	*minor_arena;
383439b3deaSKacheong Poon 	dev_t	conn_dev;
384439b3deaSKacheong Poon 	extern struct qinit tcp_acceptor_winit;
385439b3deaSKacheong Poon 
386439b3deaSKacheong Poon 	ASSERT(WR(q)->q_qinfo == &tcp_acceptor_winit);
387439b3deaSKacheong Poon 
388439b3deaSKacheong Poon 	/*
389439b3deaSKacheong Poon 	 * We had opened an acceptor STREAM for sockfs which is
390439b3deaSKacheong Poon 	 * now being closed due to some error.
391439b3deaSKacheong Poon 	 */
392439b3deaSKacheong Poon 	qprocsoff(q);
393439b3deaSKacheong Poon 
394439b3deaSKacheong Poon 	minor_arena = (vmem_t *)WR(q)->q_ptr;
395439b3deaSKacheong Poon 	conn_dev = (dev_t)RD(q)->q_ptr;
396439b3deaSKacheong Poon 	ASSERT(minor_arena != NULL);
397439b3deaSKacheong Poon 	ASSERT(conn_dev != 0);
398439b3deaSKacheong Poon 	inet_minor_free(minor_arena, conn_dev);
399439b3deaSKacheong Poon 	q->q_ptr = WR(q)->q_ptr = NULL;
400439b3deaSKacheong Poon 	return (0);
401439b3deaSKacheong Poon }
402439b3deaSKacheong Poon 
403439b3deaSKacheong Poon /*
404439b3deaSKacheong Poon  * Put a connection confirmation message upstream built from the
405439b3deaSKacheong Poon  * address/flowid information with the conn and iph. Report our success or
406439b3deaSKacheong Poon  * failure.
407439b3deaSKacheong Poon  */
408439b3deaSKacheong Poon boolean_t
409439b3deaSKacheong Poon tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, mblk_t *idmp,
410439b3deaSKacheong Poon     mblk_t **defermp, ip_recv_attr_t *ira)
411439b3deaSKacheong Poon {
412439b3deaSKacheong Poon 	sin_t	sin;
413439b3deaSKacheong Poon 	sin6_t	sin6;
414439b3deaSKacheong Poon 	mblk_t	*mp;
415439b3deaSKacheong Poon 	char	*optp = NULL;
416439b3deaSKacheong Poon 	int	optlen = 0;
417439b3deaSKacheong Poon 	conn_t	*connp = tcp->tcp_connp;
418439b3deaSKacheong Poon 
419439b3deaSKacheong Poon 	if (defermp != NULL)
420439b3deaSKacheong Poon 		*defermp = NULL;
421439b3deaSKacheong Poon 
422439b3deaSKacheong Poon 	if (tcp->tcp_conn.tcp_opts_conn_req != NULL) {
423439b3deaSKacheong Poon 		/*
424439b3deaSKacheong Poon 		 * Return in T_CONN_CON results of option negotiation through
425439b3deaSKacheong Poon 		 * the T_CONN_REQ. Note: If there is an real end-to-end option
426439b3deaSKacheong Poon 		 * negotiation, then what is received from remote end needs
427439b3deaSKacheong Poon 		 * to be taken into account but there is no such thing (yet?)
428439b3deaSKacheong Poon 		 * in our TCP/IP.
429439b3deaSKacheong Poon 		 * Note: We do not use mi_offset_param() here as
430439b3deaSKacheong Poon 		 * tcp_opts_conn_req contents do not directly come from
431439b3deaSKacheong Poon 		 * an application and are either generated in kernel or
432439b3deaSKacheong Poon 		 * from user input that was already verified.
433439b3deaSKacheong Poon 		 */
434439b3deaSKacheong Poon 		mp = tcp->tcp_conn.tcp_opts_conn_req;
435439b3deaSKacheong Poon 		optp = (char *)(mp->b_rptr +
436439b3deaSKacheong Poon 		    ((struct T_conn_req *)mp->b_rptr)->OPT_offset);
437439b3deaSKacheong Poon 		optlen = (int)
438439b3deaSKacheong Poon 		    ((struct T_conn_req *)mp->b_rptr)->OPT_length;
439439b3deaSKacheong Poon 	}
440439b3deaSKacheong Poon 
441439b3deaSKacheong Poon 	if (IPH_HDR_VERSION(iphdr) == IPV4_VERSION) {
442439b3deaSKacheong Poon 
443439b3deaSKacheong Poon 		/* packet is IPv4 */
444439b3deaSKacheong Poon 		if (connp->conn_family == AF_INET) {
445439b3deaSKacheong Poon 			sin = sin_null;
446439b3deaSKacheong Poon 			sin.sin_addr.s_addr = connp->conn_faddr_v4;
447439b3deaSKacheong Poon 			sin.sin_port = connp->conn_fport;
448439b3deaSKacheong Poon 			sin.sin_family = AF_INET;
449439b3deaSKacheong Poon 			mp = mi_tpi_conn_con(NULL, (char *)&sin,
450439b3deaSKacheong Poon 			    (int)sizeof (sin_t), optp, optlen);
451439b3deaSKacheong Poon 		} else {
452439b3deaSKacheong Poon 			sin6 = sin6_null;
453439b3deaSKacheong Poon 			sin6.sin6_addr = connp->conn_faddr_v6;
454439b3deaSKacheong Poon 			sin6.sin6_port = connp->conn_fport;
455439b3deaSKacheong Poon 			sin6.sin6_family = AF_INET6;
456439b3deaSKacheong Poon 			mp = mi_tpi_conn_con(NULL, (char *)&sin6,
457439b3deaSKacheong Poon 			    (int)sizeof (sin6_t), optp, optlen);
458439b3deaSKacheong Poon 
459439b3deaSKacheong Poon 		}
460439b3deaSKacheong Poon 	} else {
461439b3deaSKacheong Poon 		ip6_t	*ip6h = (ip6_t *)iphdr;
462439b3deaSKacheong Poon 
463439b3deaSKacheong Poon 		ASSERT(IPH_HDR_VERSION(iphdr) == IPV6_VERSION);
464439b3deaSKacheong Poon 		ASSERT(connp->conn_family == AF_INET6);
465439b3deaSKacheong Poon 		sin6 = sin6_null;
466439b3deaSKacheong Poon 		sin6.sin6_addr = connp->conn_faddr_v6;
467439b3deaSKacheong Poon 		sin6.sin6_port = connp->conn_fport;
468439b3deaSKacheong Poon 		sin6.sin6_family = AF_INET6;
469439b3deaSKacheong Poon 		sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
470439b3deaSKacheong Poon 		mp = mi_tpi_conn_con(NULL, (char *)&sin6,
471439b3deaSKacheong Poon 		    (int)sizeof (sin6_t), optp, optlen);
472439b3deaSKacheong Poon 	}
473439b3deaSKacheong Poon 
474439b3deaSKacheong Poon 	if (!mp)
475439b3deaSKacheong Poon 		return (B_FALSE);
476439b3deaSKacheong Poon 
477439b3deaSKacheong Poon 	mblk_copycred(mp, idmp);
478439b3deaSKacheong Poon 
479439b3deaSKacheong Poon 	if (defermp == NULL) {
480439b3deaSKacheong Poon 		conn_t *connp = tcp->tcp_connp;
481439b3deaSKacheong Poon 		if (IPCL_IS_NONSTR(connp)) {
482439b3deaSKacheong Poon 			(*connp->conn_upcalls->su_connected)
483439b3deaSKacheong Poon 			    (connp->conn_upper_handle, tcp->tcp_connid,
484439b3deaSKacheong Poon 			    ira->ira_cred, ira->ira_cpid);
485439b3deaSKacheong Poon 			freemsg(mp);
486439b3deaSKacheong Poon 		} else {
487439b3deaSKacheong Poon 			if (ira->ira_cred != NULL) {
488439b3deaSKacheong Poon 				/* So that getpeerucred works for TPI sockfs */
489439b3deaSKacheong Poon 				mblk_setcred(mp, ira->ira_cred, ira->ira_cpid);
490439b3deaSKacheong Poon 			}
491439b3deaSKacheong Poon 			putnext(connp->conn_rq, mp);
492439b3deaSKacheong Poon 		}
493439b3deaSKacheong Poon 	} else {
494439b3deaSKacheong Poon 		*defermp = mp;
495439b3deaSKacheong Poon 	}
496439b3deaSKacheong Poon 
497439b3deaSKacheong Poon 	if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
498439b3deaSKacheong Poon 		tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
499439b3deaSKacheong Poon 	return (B_TRUE);
500439b3deaSKacheong Poon }
501439b3deaSKacheong Poon 
502439b3deaSKacheong Poon /*
503439b3deaSKacheong Poon  * Successful connect request processing begins when our client passes
504439b3deaSKacheong Poon  * a T_CONN_REQ message into tcp_wput(), which performs function calls into
505439b3deaSKacheong Poon  * IP and the passes a T_OK_ACK (or T_ERROR_ACK upstream).
506439b3deaSKacheong Poon  *
507439b3deaSKacheong Poon  * After various error checks are completed, tcp_tpi_connect() lays
508439b3deaSKacheong Poon  * the target address and port into the composite header template.
509439b3deaSKacheong Poon  * Then we ask IP for information, including a source address if we didn't
510439b3deaSKacheong Poon  * already have one. Finally we prepare to send the SYN packet, and then
511439b3deaSKacheong Poon  * send up the T_OK_ACK reply message.
512439b3deaSKacheong Poon  */
513439b3deaSKacheong Poon void
514439b3deaSKacheong Poon tcp_tpi_connect(tcp_t *tcp, mblk_t *mp)
515439b3deaSKacheong Poon {
516439b3deaSKacheong Poon 	sin_t		*sin;
517439b3deaSKacheong Poon 	struct T_conn_req	*tcr;
518439b3deaSKacheong Poon 	struct sockaddr	*sa;
519439b3deaSKacheong Poon 	socklen_t	len;
520439b3deaSKacheong Poon 	int		error;
521439b3deaSKacheong Poon 	cred_t		*cr;
522439b3deaSKacheong Poon 	pid_t		cpid;
523439b3deaSKacheong Poon 	conn_t		*connp = tcp->tcp_connp;
524439b3deaSKacheong Poon 	queue_t		*q = connp->conn_wq;
525439b3deaSKacheong Poon 
526439b3deaSKacheong Poon 	/*
527439b3deaSKacheong Poon 	 * All Solaris components should pass a db_credp
528439b3deaSKacheong Poon 	 * for this TPI message, hence we ASSERT.
529439b3deaSKacheong Poon 	 * But in case there is some other M_PROTO that looks
530439b3deaSKacheong Poon 	 * like a TPI message sent by some other kernel
531439b3deaSKacheong Poon 	 * component, we check and return an error.
532439b3deaSKacheong Poon 	 */
533439b3deaSKacheong Poon 	cr = msg_getcred(mp, &cpid);
534439b3deaSKacheong Poon 	ASSERT(cr != NULL);
535439b3deaSKacheong Poon 	if (cr == NULL) {
536439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
537439b3deaSKacheong Poon 		return;
538439b3deaSKacheong Poon 	}
539439b3deaSKacheong Poon 
540439b3deaSKacheong Poon 	tcr = (struct T_conn_req *)mp->b_rptr;
541439b3deaSKacheong Poon 
542439b3deaSKacheong Poon 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
543439b3deaSKacheong Poon 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) {
544439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TPROTO, 0);
545439b3deaSKacheong Poon 		return;
546439b3deaSKacheong Poon 	}
547439b3deaSKacheong Poon 
548439b3deaSKacheong Poon 	/*
549439b3deaSKacheong Poon 	 * Pre-allocate the T_ordrel_ind mblk so that at close time, we
550439b3deaSKacheong Poon 	 * will always have that to send up.  Otherwise, we need to do
551439b3deaSKacheong Poon 	 * special handling in case the allocation fails at that time.
552439b3deaSKacheong Poon 	 * If the end point is TPI, the tcp_t can be reused and the
553439b3deaSKacheong Poon 	 * tcp_ordrel_mp may be allocated already.
554439b3deaSKacheong Poon 	 */
555439b3deaSKacheong Poon 	if (tcp->tcp_ordrel_mp == NULL) {
556439b3deaSKacheong Poon 		if ((tcp->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL) {
557439b3deaSKacheong Poon 			tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
558439b3deaSKacheong Poon 			return;
559439b3deaSKacheong Poon 		}
560439b3deaSKacheong Poon 	}
561439b3deaSKacheong Poon 
562439b3deaSKacheong Poon 	/*
563439b3deaSKacheong Poon 	 * Determine packet type based on type of address passed in
564439b3deaSKacheong Poon 	 * the request should contain an IPv4 or IPv6 address.
565439b3deaSKacheong Poon 	 * Make sure that address family matches the type of
566439b3deaSKacheong Poon 	 * family of the address passed down.
567439b3deaSKacheong Poon 	 */
568439b3deaSKacheong Poon 	switch (tcr->DEST_length) {
569439b3deaSKacheong Poon 	default:
570439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TBADADDR, 0);
571439b3deaSKacheong Poon 		return;
572439b3deaSKacheong Poon 
573439b3deaSKacheong Poon 	case (sizeof (sin_t) - sizeof (sin->sin_zero)): {
574439b3deaSKacheong Poon 		/*
575439b3deaSKacheong Poon 		 * XXX: The check for valid DEST_length was not there
576439b3deaSKacheong Poon 		 * in earlier releases and some buggy
577439b3deaSKacheong Poon 		 * TLI apps (e.g Sybase) got away with not feeding
578439b3deaSKacheong Poon 		 * in sin_zero part of address.
579439b3deaSKacheong Poon 		 * We allow that bug to keep those buggy apps humming.
580439b3deaSKacheong Poon 		 * Test suites require the check on DEST_length.
581439b3deaSKacheong Poon 		 * We construct a new mblk with valid DEST_length
582439b3deaSKacheong Poon 		 * free the original so the rest of the code does
583439b3deaSKacheong Poon 		 * not have to keep track of this special shorter
584439b3deaSKacheong Poon 		 * length address case.
585439b3deaSKacheong Poon 		 */
586439b3deaSKacheong Poon 		mblk_t *nmp;
587439b3deaSKacheong Poon 		struct T_conn_req *ntcr;
588439b3deaSKacheong Poon 		sin_t *nsin;
589439b3deaSKacheong Poon 
590439b3deaSKacheong Poon 		nmp = allocb(sizeof (struct T_conn_req) + sizeof (sin_t) +
591439b3deaSKacheong Poon 		    tcr->OPT_length, BPRI_HI);
592439b3deaSKacheong Poon 		if (nmp == NULL) {
593439b3deaSKacheong Poon 			tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
594439b3deaSKacheong Poon 			return;
595439b3deaSKacheong Poon 		}
596439b3deaSKacheong Poon 		ntcr = (struct T_conn_req *)nmp->b_rptr;
597439b3deaSKacheong Poon 		bzero(ntcr, sizeof (struct T_conn_req)); /* zero fill */
598439b3deaSKacheong Poon 		ntcr->PRIM_type = T_CONN_REQ;
599439b3deaSKacheong Poon 		ntcr->DEST_length = sizeof (sin_t);
600439b3deaSKacheong Poon 		ntcr->DEST_offset = sizeof (struct T_conn_req);
601439b3deaSKacheong Poon 
602439b3deaSKacheong Poon 		nsin = (sin_t *)((uchar_t *)ntcr + ntcr->DEST_offset);
603439b3deaSKacheong Poon 		*nsin = sin_null;
604439b3deaSKacheong Poon 		/* Get pointer to shorter address to copy from original mp */
605439b3deaSKacheong Poon 		sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
606439b3deaSKacheong Poon 		    tcr->DEST_length); /* extract DEST_length worth of sin_t */
607439b3deaSKacheong Poon 		if (sin == NULL || !OK_32PTR((char *)sin)) {
608439b3deaSKacheong Poon 			freemsg(nmp);
609439b3deaSKacheong Poon 			tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
610439b3deaSKacheong Poon 			return;
611439b3deaSKacheong Poon 		}
612439b3deaSKacheong Poon 		nsin->sin_family = sin->sin_family;
613439b3deaSKacheong Poon 		nsin->sin_port = sin->sin_port;
614439b3deaSKacheong Poon 		nsin->sin_addr = sin->sin_addr;
615439b3deaSKacheong Poon 		/* Note:nsin->sin_zero zero-fill with sin_null assign above */
616439b3deaSKacheong Poon 		nmp->b_wptr = (uchar_t *)&nsin[1];
617439b3deaSKacheong Poon 		if (tcr->OPT_length != 0) {
618439b3deaSKacheong Poon 			ntcr->OPT_length = tcr->OPT_length;
619439b3deaSKacheong Poon 			ntcr->OPT_offset = nmp->b_wptr - nmp->b_rptr;
620439b3deaSKacheong Poon 			bcopy((uchar_t *)tcr + tcr->OPT_offset,
621439b3deaSKacheong Poon 			    (uchar_t *)ntcr + ntcr->OPT_offset,
622439b3deaSKacheong Poon 			    tcr->OPT_length);
623439b3deaSKacheong Poon 			nmp->b_wptr += tcr->OPT_length;
624439b3deaSKacheong Poon 		}
625439b3deaSKacheong Poon 		freemsg(mp);	/* original mp freed */
626439b3deaSKacheong Poon 		mp = nmp;	/* re-initialize original variables */
627439b3deaSKacheong Poon 		tcr = ntcr;
628439b3deaSKacheong Poon 	}
629439b3deaSKacheong Poon 	/* FALLTHRU */
630439b3deaSKacheong Poon 
631439b3deaSKacheong Poon 	case sizeof (sin_t):
632439b3deaSKacheong Poon 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
633439b3deaSKacheong Poon 		    sizeof (sin_t));
634439b3deaSKacheong Poon 		len = sizeof (sin_t);
635439b3deaSKacheong Poon 		break;
636439b3deaSKacheong Poon 
637439b3deaSKacheong Poon 	case sizeof (sin6_t):
638439b3deaSKacheong Poon 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
639439b3deaSKacheong Poon 		    sizeof (sin6_t));
640439b3deaSKacheong Poon 		len = sizeof (sin6_t);
641439b3deaSKacheong Poon 		break;
642439b3deaSKacheong Poon 	}
643439b3deaSKacheong Poon 
644439b3deaSKacheong Poon 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
645439b3deaSKacheong Poon 	if (error != 0) {
646439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TSYSERR, error);
647439b3deaSKacheong Poon 		return;
648439b3deaSKacheong Poon 	}
649439b3deaSKacheong Poon 
650439b3deaSKacheong Poon 	/*
651439b3deaSKacheong Poon 	 * TODO: If someone in TCPS_TIME_WAIT has this dst/port we
652439b3deaSKacheong Poon 	 * should key on their sequence number and cut them loose.
653439b3deaSKacheong Poon 	 */
654439b3deaSKacheong Poon 
655439b3deaSKacheong Poon 	/*
656439b3deaSKacheong Poon 	 * If options passed in, feed it for verification and handling
657439b3deaSKacheong Poon 	 */
658439b3deaSKacheong Poon 	if (tcr->OPT_length != 0) {
659439b3deaSKacheong Poon 		mblk_t	*ok_mp;
660439b3deaSKacheong Poon 		mblk_t	*discon_mp;
661439b3deaSKacheong Poon 		mblk_t  *conn_opts_mp;
662439b3deaSKacheong Poon 		int t_error, sys_error, do_disconnect;
663439b3deaSKacheong Poon 
664439b3deaSKacheong Poon 		conn_opts_mp = NULL;
665439b3deaSKacheong Poon 
666439b3deaSKacheong Poon 		if (tcp_conprim_opt_process(tcp, mp,
667439b3deaSKacheong Poon 		    &do_disconnect, &t_error, &sys_error) < 0) {
668439b3deaSKacheong Poon 			if (do_disconnect) {
669439b3deaSKacheong Poon 				ASSERT(t_error == 0 && sys_error == 0);
670439b3deaSKacheong Poon 				discon_mp = mi_tpi_discon_ind(NULL,
671439b3deaSKacheong Poon 				    ECONNREFUSED, 0);
672439b3deaSKacheong Poon 				if (!discon_mp) {
673439b3deaSKacheong Poon 					tcp_err_ack_prim(tcp, mp, T_CONN_REQ,
674439b3deaSKacheong Poon 					    TSYSERR, ENOMEM);
675439b3deaSKacheong Poon 					return;
676439b3deaSKacheong Poon 				}
677439b3deaSKacheong Poon 				ok_mp = mi_tpi_ok_ack_alloc(mp);
678439b3deaSKacheong Poon 				if (!ok_mp) {
679439b3deaSKacheong Poon 					tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
680439b3deaSKacheong Poon 					    TSYSERR, ENOMEM);
681439b3deaSKacheong Poon 					return;
682439b3deaSKacheong Poon 				}
683439b3deaSKacheong Poon 				qreply(q, ok_mp);
684439b3deaSKacheong Poon 				qreply(q, discon_mp); /* no flush! */
685439b3deaSKacheong Poon 			} else {
686439b3deaSKacheong Poon 				ASSERT(t_error != 0);
687439b3deaSKacheong Poon 				tcp_err_ack_prim(tcp, mp, T_CONN_REQ, t_error,
688439b3deaSKacheong Poon 				    sys_error);
689439b3deaSKacheong Poon 			}
690439b3deaSKacheong Poon 			return;
691439b3deaSKacheong Poon 		}
692439b3deaSKacheong Poon 		/*
693439b3deaSKacheong Poon 		 * Success in setting options, the mp option buffer represented
694439b3deaSKacheong Poon 		 * by OPT_length/offset has been potentially modified and
695439b3deaSKacheong Poon 		 * contains results of option processing. We copy it in
696439b3deaSKacheong Poon 		 * another mp to save it for potentially influencing returning
697439b3deaSKacheong Poon 		 * it in T_CONN_CONN.
698439b3deaSKacheong Poon 		 */
699439b3deaSKacheong Poon 		if (tcr->OPT_length != 0) { /* there are resulting options */
700439b3deaSKacheong Poon 			conn_opts_mp = copyb(mp);
701439b3deaSKacheong Poon 			if (!conn_opts_mp) {
702439b3deaSKacheong Poon 				tcp_err_ack_prim(tcp, mp, T_CONN_REQ,
703439b3deaSKacheong Poon 				    TSYSERR, ENOMEM);
704439b3deaSKacheong Poon 				return;
705439b3deaSKacheong Poon 			}
706439b3deaSKacheong Poon 			ASSERT(tcp->tcp_conn.tcp_opts_conn_req == NULL);
707439b3deaSKacheong Poon 			tcp->tcp_conn.tcp_opts_conn_req = conn_opts_mp;
708439b3deaSKacheong Poon 			/*
709439b3deaSKacheong Poon 			 * Note:
710439b3deaSKacheong Poon 			 * These resulting option negotiation can include any
711439b3deaSKacheong Poon 			 * end-to-end negotiation options but there no such
712439b3deaSKacheong Poon 			 * thing (yet?) in our TCP/IP.
713439b3deaSKacheong Poon 			 */
714439b3deaSKacheong Poon 		}
715439b3deaSKacheong Poon 	}
716439b3deaSKacheong Poon 
717439b3deaSKacheong Poon 	/* call the non-TPI version */
718439b3deaSKacheong Poon 	error = tcp_do_connect(tcp->tcp_connp, sa, len, cr, cpid);
719439b3deaSKacheong Poon 	if (error < 0) {
720439b3deaSKacheong Poon 		mp = mi_tpi_err_ack_alloc(mp, -error, 0);
721439b3deaSKacheong Poon 	} else if (error > 0) {
722439b3deaSKacheong Poon 		mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error);
723439b3deaSKacheong Poon 	} else {
724439b3deaSKacheong Poon 		mp = mi_tpi_ok_ack_alloc(mp);
725439b3deaSKacheong Poon 	}
726439b3deaSKacheong Poon 
727439b3deaSKacheong Poon 	/*
728439b3deaSKacheong Poon 	 * Note: Code below is the "failure" case
729439b3deaSKacheong Poon 	 */
730439b3deaSKacheong Poon 	/* return error ack and blow away saved option results if any */
731439b3deaSKacheong Poon connect_failed:
732439b3deaSKacheong Poon 	if (mp != NULL)
733439b3deaSKacheong Poon 		putnext(connp->conn_rq, mp);
734439b3deaSKacheong Poon 	else {
735439b3deaSKacheong Poon 		tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
736439b3deaSKacheong Poon 		    TSYSERR, ENOMEM);
737439b3deaSKacheong Poon 	}
738439b3deaSKacheong Poon }
739439b3deaSKacheong Poon 
740439b3deaSKacheong Poon /* Return the TPI/TLI equivalent of our current tcp_state */
741439b3deaSKacheong Poon static int
742439b3deaSKacheong Poon tcp_tpistate(tcp_t *tcp)
743439b3deaSKacheong Poon {
744439b3deaSKacheong Poon 	switch (tcp->tcp_state) {
745439b3deaSKacheong Poon 	case TCPS_IDLE:
746439b3deaSKacheong Poon 		return (TS_UNBND);
747439b3deaSKacheong Poon 	case TCPS_LISTEN:
748439b3deaSKacheong Poon 		/*
749439b3deaSKacheong Poon 		 * Return whether there are outstanding T_CONN_IND waiting
750439b3deaSKacheong Poon 		 * for the matching T_CONN_RES. Therefore don't count q0.
751439b3deaSKacheong Poon 		 */
752439b3deaSKacheong Poon 		if (tcp->tcp_conn_req_cnt_q > 0)
753439b3deaSKacheong Poon 			return (TS_WRES_CIND);
754439b3deaSKacheong Poon 		else
755439b3deaSKacheong Poon 			return (TS_IDLE);
756439b3deaSKacheong Poon 	case TCPS_BOUND:
757439b3deaSKacheong Poon 		return (TS_IDLE);
758439b3deaSKacheong Poon 	case TCPS_SYN_SENT:
759439b3deaSKacheong Poon 		return (TS_WCON_CREQ);
760439b3deaSKacheong Poon 	case TCPS_SYN_RCVD:
761439b3deaSKacheong Poon 		/*
762439b3deaSKacheong Poon 		 * Note: assumption: this has to the active open SYN_RCVD.
763439b3deaSKacheong Poon 		 * The passive instance is detached in SYN_RCVD stage of
764439b3deaSKacheong Poon 		 * incoming connection processing so we cannot get request
765439b3deaSKacheong Poon 		 * for T_info_ack on it.
766439b3deaSKacheong Poon 		 */
767439b3deaSKacheong Poon 		return (TS_WACK_CRES);
768439b3deaSKacheong Poon 	case TCPS_ESTABLISHED:
769439b3deaSKacheong Poon 		return (TS_DATA_XFER);
770439b3deaSKacheong Poon 	case TCPS_CLOSE_WAIT:
771439b3deaSKacheong Poon 		return (TS_WREQ_ORDREL);
772439b3deaSKacheong Poon 	case TCPS_FIN_WAIT_1:
773439b3deaSKacheong Poon 		return (TS_WIND_ORDREL);
774439b3deaSKacheong Poon 	case TCPS_FIN_WAIT_2:
775439b3deaSKacheong Poon 		return (TS_WIND_ORDREL);
776439b3deaSKacheong Poon 
777439b3deaSKacheong Poon 	case TCPS_CLOSING:
778439b3deaSKacheong Poon 	case TCPS_LAST_ACK:
779439b3deaSKacheong Poon 	case TCPS_TIME_WAIT:
780439b3deaSKacheong Poon 	case TCPS_CLOSED:
781439b3deaSKacheong Poon 		/*
782439b3deaSKacheong Poon 		 * Following TS_WACK_DREQ7 is a rendition of "not
783439b3deaSKacheong Poon 		 * yet TS_IDLE" TPI state. There is no best match to any
784439b3deaSKacheong Poon 		 * TPI state for TCPS_{CLOSING, LAST_ACK, TIME_WAIT} but we
785439b3deaSKacheong Poon 		 * choose a value chosen that will map to TLI/XTI level
786439b3deaSKacheong Poon 		 * state of TSTATECHNG (state is process of changing) which
787439b3deaSKacheong Poon 		 * captures what this dummy state represents.
788439b3deaSKacheong Poon 		 */
789439b3deaSKacheong Poon 		return (TS_WACK_DREQ7);
790439b3deaSKacheong Poon 	default:
791439b3deaSKacheong Poon 		cmn_err(CE_WARN, "tcp_tpistate: strange state (%d) %s",
792439b3deaSKacheong Poon 		    tcp->tcp_state, tcp_display(tcp, NULL,
793439b3deaSKacheong Poon 		    DISP_PORT_ONLY));
794439b3deaSKacheong Poon 		return (TS_UNBND);
795439b3deaSKacheong Poon 	}
796439b3deaSKacheong Poon }
797439b3deaSKacheong Poon 
798439b3deaSKacheong Poon static void
799439b3deaSKacheong Poon tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp)
800439b3deaSKacheong Poon {
801439b3deaSKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
802439b3deaSKacheong Poon 	conn_t		*connp = tcp->tcp_connp;
803439b3deaSKacheong Poon 	extern struct T_info_ack tcp_g_t_info_ack;
804439b3deaSKacheong Poon 	extern struct T_info_ack tcp_g_t_info_ack_v6;
805439b3deaSKacheong Poon 
806439b3deaSKacheong Poon 	if (connp->conn_family == AF_INET6)
807439b3deaSKacheong Poon 		*tia = tcp_g_t_info_ack_v6;
808439b3deaSKacheong Poon 	else
809439b3deaSKacheong Poon 		*tia = tcp_g_t_info_ack;
810439b3deaSKacheong Poon 	tia->CURRENT_state = tcp_tpistate(tcp);
811439b3deaSKacheong Poon 	tia->OPT_size = tcp_max_optsize;
812439b3deaSKacheong Poon 	if (tcp->tcp_mss == 0) {
813439b3deaSKacheong Poon 		/* Not yet set - tcp_open does not set mss */
814439b3deaSKacheong Poon 		if (connp->conn_ipversion == IPV4_VERSION)
815439b3deaSKacheong Poon 			tia->TIDU_size = tcps->tcps_mss_def_ipv4;
816439b3deaSKacheong Poon 		else
817439b3deaSKacheong Poon 			tia->TIDU_size = tcps->tcps_mss_def_ipv6;
818439b3deaSKacheong Poon 	} else {
819439b3deaSKacheong Poon 		tia->TIDU_size = tcp->tcp_mss;
820439b3deaSKacheong Poon 	}
821439b3deaSKacheong Poon 	/* TODO: Default ETSDU is 1.  Is that correct for tcp? */
822439b3deaSKacheong Poon }
823439b3deaSKacheong Poon 
8243e95bd4aSAnders Persson void
825439b3deaSKacheong Poon tcp_do_capability_ack(tcp_t *tcp, struct T_capability_ack *tcap,
826439b3deaSKacheong Poon     t_uscalar_t cap_bits1)
827439b3deaSKacheong Poon {
828439b3deaSKacheong Poon 	tcap->CAP_bits1 = 0;
829439b3deaSKacheong Poon 
830439b3deaSKacheong Poon 	if (cap_bits1 & TC1_INFO) {
831439b3deaSKacheong Poon 		tcp_copy_info(&tcap->INFO_ack, tcp);
832439b3deaSKacheong Poon 		tcap->CAP_bits1 |= TC1_INFO;
833439b3deaSKacheong Poon 	}
834439b3deaSKacheong Poon 
835439b3deaSKacheong Poon 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
836439b3deaSKacheong Poon 		tcap->ACCEPTOR_id = tcp->tcp_acceptor_id;
837439b3deaSKacheong Poon 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
838439b3deaSKacheong Poon 	}
839439b3deaSKacheong Poon 
840439b3deaSKacheong Poon }
841439b3deaSKacheong Poon 
842439b3deaSKacheong Poon /*
843439b3deaSKacheong Poon  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
844439b3deaSKacheong Poon  * tcp_wput.  Much of the T_CAPABILITY_ACK information is copied from
845439b3deaSKacheong Poon  * tcp_g_t_info_ack.  The current state of the stream is copied from
846439b3deaSKacheong Poon  * tcp_state.
847439b3deaSKacheong Poon  */
848439b3deaSKacheong Poon void
849439b3deaSKacheong Poon tcp_capability_req(tcp_t *tcp, mblk_t *mp)
850439b3deaSKacheong Poon {
851439b3deaSKacheong Poon 	t_uscalar_t		cap_bits1;
852439b3deaSKacheong Poon 	struct T_capability_ack	*tcap;
853439b3deaSKacheong Poon 
854439b3deaSKacheong Poon 	if (MBLKL(mp) < sizeof (struct T_capability_req)) {
855439b3deaSKacheong Poon 		freemsg(mp);
856439b3deaSKacheong Poon 		return;
857439b3deaSKacheong Poon 	}
858439b3deaSKacheong Poon 
859439b3deaSKacheong Poon 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
860439b3deaSKacheong Poon 
861439b3deaSKacheong Poon 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
862439b3deaSKacheong Poon 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
863439b3deaSKacheong Poon 	if (mp == NULL)
864439b3deaSKacheong Poon 		return;
865439b3deaSKacheong Poon 
866439b3deaSKacheong Poon 	tcap = (struct T_capability_ack *)mp->b_rptr;
867439b3deaSKacheong Poon 	tcp_do_capability_ack(tcp, tcap, cap_bits1);
868439b3deaSKacheong Poon 
869439b3deaSKacheong Poon 	putnext(tcp->tcp_connp->conn_rq, mp);
870439b3deaSKacheong Poon }
871439b3deaSKacheong Poon 
872439b3deaSKacheong Poon /*
873439b3deaSKacheong Poon  * This routine responds to T_INFO_REQ messages.  It is called by tcp_wput.
874439b3deaSKacheong Poon  * Most of the T_INFO_ACK information is copied from tcp_g_t_info_ack.
875439b3deaSKacheong Poon  * The current state of the stream is copied from tcp_state.
876439b3deaSKacheong Poon  */
877439b3deaSKacheong Poon void
878439b3deaSKacheong Poon tcp_info_req(tcp_t *tcp, mblk_t *mp)
879439b3deaSKacheong Poon {
880439b3deaSKacheong Poon 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
881439b3deaSKacheong Poon 	    T_INFO_ACK);
882439b3deaSKacheong Poon 	if (!mp) {
883439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
884439b3deaSKacheong Poon 		return;
885439b3deaSKacheong Poon 	}
886439b3deaSKacheong Poon 	tcp_copy_info((struct T_info_ack *)mp->b_rptr, tcp);
887439b3deaSKacheong Poon 	putnext(tcp->tcp_connp->conn_rq, mp);
888439b3deaSKacheong Poon }
889439b3deaSKacheong Poon 
890439b3deaSKacheong Poon /* Respond to the TPI addr request */
891439b3deaSKacheong Poon void
892439b3deaSKacheong Poon tcp_addr_req(tcp_t *tcp, mblk_t *mp)
893439b3deaSKacheong Poon {
894439b3deaSKacheong Poon 	struct sockaddr *sa;
895439b3deaSKacheong Poon 	mblk_t	*ackmp;
896439b3deaSKacheong Poon 	struct T_addr_ack *taa;
897439b3deaSKacheong Poon 	conn_t	*connp = tcp->tcp_connp;
898439b3deaSKacheong Poon 	uint_t	addrlen;
899439b3deaSKacheong Poon 
900439b3deaSKacheong Poon 	/* Make it large enough for worst case */
901439b3deaSKacheong Poon 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
902439b3deaSKacheong Poon 	    2 * sizeof (sin6_t), 1);
903439b3deaSKacheong Poon 	if (ackmp == NULL) {
904439b3deaSKacheong Poon 		tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
905439b3deaSKacheong Poon 		return;
906439b3deaSKacheong Poon 	}
907439b3deaSKacheong Poon 
908439b3deaSKacheong Poon 	taa = (struct T_addr_ack *)ackmp->b_rptr;
909439b3deaSKacheong Poon 
910439b3deaSKacheong Poon 	bzero(taa, sizeof (struct T_addr_ack));
911439b3deaSKacheong Poon 	ackmp->b_wptr = (uchar_t *)&taa[1];
912439b3deaSKacheong Poon 
913439b3deaSKacheong Poon 	taa->PRIM_type = T_ADDR_ACK;
914439b3deaSKacheong Poon 	ackmp->b_datap->db_type = M_PCPROTO;
915439b3deaSKacheong Poon 
916439b3deaSKacheong Poon 	if (connp->conn_family == AF_INET)
917439b3deaSKacheong Poon 		addrlen = sizeof (sin_t);
918439b3deaSKacheong Poon 	else
919439b3deaSKacheong Poon 		addrlen = sizeof (sin6_t);
920439b3deaSKacheong Poon 
921439b3deaSKacheong Poon 	/*
922439b3deaSKacheong Poon 	 * Note: Following code assumes 32 bit alignment of basic
923439b3deaSKacheong Poon 	 * data structures like sin_t and struct T_addr_ack.
924439b3deaSKacheong Poon 	 */
925439b3deaSKacheong Poon 	if (tcp->tcp_state >= TCPS_BOUND) {
926439b3deaSKacheong Poon 		/*
927439b3deaSKacheong Poon 		 * Fill in local address first
928439b3deaSKacheong Poon 		 */
929439b3deaSKacheong Poon 		taa->LOCADDR_offset = sizeof (*taa);
930439b3deaSKacheong Poon 		taa->LOCADDR_length = addrlen;
931439b3deaSKacheong Poon 		sa = (struct sockaddr *)&taa[1];
932439b3deaSKacheong Poon 		(void) conn_getsockname(connp, sa, &addrlen);
933439b3deaSKacheong Poon 		ackmp->b_wptr += addrlen;
934439b3deaSKacheong Poon 	}
935439b3deaSKacheong Poon 	if (tcp->tcp_state >= TCPS_SYN_RCVD) {
936439b3deaSKacheong Poon 		/*
937439b3deaSKacheong Poon 		 * Fill in Remote address
938439b3deaSKacheong Poon 		 */
939439b3deaSKacheong Poon 		taa->REMADDR_length = addrlen;
940439b3deaSKacheong Poon 		/* assumed 32-bit alignment */
941439b3deaSKacheong Poon 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
942439b3deaSKacheong Poon 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
943439b3deaSKacheong Poon 		(void) conn_getpeername(connp, sa, &addrlen);
944439b3deaSKacheong Poon 		ackmp->b_wptr += addrlen;
945439b3deaSKacheong Poon 	}
946439b3deaSKacheong Poon 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
947439b3deaSKacheong Poon 	putnext(tcp->tcp_connp->conn_rq, ackmp);
948439b3deaSKacheong Poon }
949439b3deaSKacheong Poon 
950439b3deaSKacheong Poon /*
951439b3deaSKacheong Poon  * Swap information between the eager and acceptor for a TLI/XTI client.
952439b3deaSKacheong Poon  * The sockfs accept is done on the acceptor stream and control goes
953439b3deaSKacheong Poon  * through tcp_tli_accept() and tcp_accept()/tcp_accept_swap() is not
954439b3deaSKacheong Poon  * called. In either case, both the eager and listener are in their own
955439b3deaSKacheong Poon  * perimeter (squeue) and the code has to deal with potential race.
956439b3deaSKacheong Poon  *
957439b3deaSKacheong Poon  * See the block comment on top of tcp_accept() and tcp_tli_accept().
958439b3deaSKacheong Poon  */
959439b3deaSKacheong Poon static void
960439b3deaSKacheong Poon tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager)
961439b3deaSKacheong Poon {
962439b3deaSKacheong Poon 	conn_t	*econnp, *aconnp;
963439b3deaSKacheong Poon 
964439b3deaSKacheong Poon 	ASSERT(eager->tcp_connp->conn_rq == listener->tcp_connp->conn_rq);
965439b3deaSKacheong Poon 	ASSERT(eager->tcp_detached && !acceptor->tcp_detached);
966439b3deaSKacheong Poon 	ASSERT(!TCP_IS_SOCKET(acceptor));
967439b3deaSKacheong Poon 	ASSERT(!TCP_IS_SOCKET(eager));
968439b3deaSKacheong Poon 	ASSERT(!TCP_IS_SOCKET(listener));
969439b3deaSKacheong Poon 
970439b3deaSKacheong Poon 	/*
971439b3deaSKacheong Poon 	 * Trusted Extensions may need to use a security label that is
972439b3deaSKacheong Poon 	 * different from the acceptor's label on MLP and MAC-Exempt
973439b3deaSKacheong Poon 	 * sockets. If this is the case, the required security label
974439b3deaSKacheong Poon 	 * already exists in econnp->conn_ixa->ixa_tsl. Since we make the
975439b3deaSKacheong Poon 	 * acceptor stream refer to econnp we atomatically get that label.
976439b3deaSKacheong Poon 	 */
977439b3deaSKacheong Poon 
978439b3deaSKacheong Poon 	acceptor->tcp_detached = B_TRUE;
979439b3deaSKacheong Poon 	/*
980439b3deaSKacheong Poon 	 * To permit stream re-use by TLI/XTI, the eager needs a copy of
981439b3deaSKacheong Poon 	 * the acceptor id.
982439b3deaSKacheong Poon 	 */
983439b3deaSKacheong Poon 	eager->tcp_acceptor_id = acceptor->tcp_acceptor_id;
984439b3deaSKacheong Poon 
985439b3deaSKacheong Poon 	/* remove eager from listen list... */
986439b3deaSKacheong Poon 	mutex_enter(&listener->tcp_eager_lock);
987439b3deaSKacheong Poon 	tcp_eager_unlink(eager);
988439b3deaSKacheong Poon 	ASSERT(eager->tcp_eager_next_q == NULL &&
989439b3deaSKacheong Poon 	    eager->tcp_eager_last_q == NULL);
990439b3deaSKacheong Poon 	ASSERT(eager->tcp_eager_next_q0 == NULL &&
991439b3deaSKacheong Poon 	    eager->tcp_eager_prev_q0 == NULL);
992439b3deaSKacheong Poon 	mutex_exit(&listener->tcp_eager_lock);
993439b3deaSKacheong Poon 
994439b3deaSKacheong Poon 	econnp = eager->tcp_connp;
995439b3deaSKacheong Poon 	aconnp = acceptor->tcp_connp;
996439b3deaSKacheong Poon 	econnp->conn_rq = aconnp->conn_rq;
997439b3deaSKacheong Poon 	econnp->conn_wq = aconnp->conn_wq;
998439b3deaSKacheong Poon 	econnp->conn_rq->q_ptr = econnp;
999439b3deaSKacheong Poon 	econnp->conn_wq->q_ptr = econnp;
1000439b3deaSKacheong Poon 
1001439b3deaSKacheong Poon 	/*
1002439b3deaSKacheong Poon 	 * In the TLI/XTI loopback case, we are inside the listener's squeue,
1003439b3deaSKacheong Poon 	 * which might be a different squeue from our peer TCP instance.
1004439b3deaSKacheong Poon 	 * For TCP Fusion, the peer expects that whenever tcp_detached is
1005439b3deaSKacheong Poon 	 * clear, our TCP queues point to the acceptor's queues.  Thus, use
1006439b3deaSKacheong Poon 	 * membar_producer() to ensure that the assignments of conn_rq/conn_wq
1007439b3deaSKacheong Poon 	 * above reach global visibility prior to the clearing of tcp_detached.
1008439b3deaSKacheong Poon 	 */
1009439b3deaSKacheong Poon 	membar_producer();
1010439b3deaSKacheong Poon 	eager->tcp_detached = B_FALSE;
1011439b3deaSKacheong Poon 
1012439b3deaSKacheong Poon 	ASSERT(eager->tcp_ack_tid == 0);
1013439b3deaSKacheong Poon 
1014439b3deaSKacheong Poon 	econnp->conn_dev = aconnp->conn_dev;
1015439b3deaSKacheong Poon 	econnp->conn_minor_arena = aconnp->conn_minor_arena;
1016439b3deaSKacheong Poon 
1017439b3deaSKacheong Poon 	ASSERT(econnp->conn_minor_arena != NULL);
1018439b3deaSKacheong Poon 	if (econnp->conn_cred != NULL)
1019439b3deaSKacheong Poon 		crfree(econnp->conn_cred);
1020439b3deaSKacheong Poon 	econnp->conn_cred = aconnp->conn_cred;
1021be4c8f74SErik Nordmark 	ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
1022439b3deaSKacheong Poon 	econnp->conn_ixa->ixa_cred = econnp->conn_cred;
1023439b3deaSKacheong Poon 	aconnp->conn_cred = NULL;
1024439b3deaSKacheong Poon 	econnp->conn_cpid = aconnp->conn_cpid;
1025439b3deaSKacheong Poon 	ASSERT(econnp->conn_netstack == aconnp->conn_netstack);
1026439b3deaSKacheong Poon 	ASSERT(eager->tcp_tcps == acceptor->tcp_tcps);
1027439b3deaSKacheong Poon 
1028439b3deaSKacheong Poon 	econnp->conn_zoneid = aconnp->conn_zoneid;
1029439b3deaSKacheong Poon 	econnp->conn_allzones = aconnp->conn_allzones;
1030439b3deaSKacheong Poon 	econnp->conn_ixa->ixa_zoneid = aconnp->conn_ixa->ixa_zoneid;
1031439b3deaSKacheong Poon 
1032439b3deaSKacheong Poon 	econnp->conn_mac_mode = aconnp->conn_mac_mode;
1033439b3deaSKacheong Poon 	econnp->conn_zone_is_global = aconnp->conn_zone_is_global;
1034439b3deaSKacheong Poon 	aconnp->conn_mac_mode = CONN_MAC_DEFAULT;
1035439b3deaSKacheong Poon 
1036439b3deaSKacheong Poon 	/* Do the IPC initialization */
1037439b3deaSKacheong Poon 	CONN_INC_REF(econnp);
1038439b3deaSKacheong Poon 
1039439b3deaSKacheong Poon 	/* Done with old IPC. Drop its ref on its connp */
1040439b3deaSKacheong Poon 	CONN_DEC_REF(aconnp);
1041439b3deaSKacheong Poon }
1042439b3deaSKacheong Poon 
1043439b3deaSKacheong Poon /*
10443e95bd4aSAnders Persson  * This runs at the tail end of accept processing on the squeue of the
10453e95bd4aSAnders Persson  * new connection.
10463e95bd4aSAnders Persson  */
10473e95bd4aSAnders Persson /* ARGSUSED */
10483e95bd4aSAnders Persson static void
10493e95bd4aSAnders Persson tcp_accept_finish(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
10503e95bd4aSAnders Persson {
10513e95bd4aSAnders Persson 	conn_t			*connp = (conn_t *)arg;
10523e95bd4aSAnders Persson 	tcp_t			*tcp = connp->conn_tcp;
10533e95bd4aSAnders Persson 	queue_t			*q = connp->conn_rq;
10543e95bd4aSAnders Persson 	tcp_stack_t		*tcps = tcp->tcp_tcps;
10553e95bd4aSAnders Persson 	struct stroptions 	*stropt;
10563e95bd4aSAnders Persson 	struct sock_proto_props sopp;
10573e95bd4aSAnders Persson 
10583e95bd4aSAnders Persson 	/* Should never be called for non-STREAMS sockets */
10593e95bd4aSAnders Persson 	ASSERT(!IPCL_IS_NONSTR(connp));
10603e95bd4aSAnders Persson 
10613e95bd4aSAnders Persson 	/* We should just receive a single mblk that fits a T_discon_ind */
10623e95bd4aSAnders Persson 	ASSERT(mp->b_cont == NULL);
10633e95bd4aSAnders Persson 
10643e95bd4aSAnders Persson 	/*
10653e95bd4aSAnders Persson 	 * Drop the eager's ref on the listener, that was placed when
10663e95bd4aSAnders Persson 	 * this eager began life in tcp_input_listener.
10673e95bd4aSAnders Persson 	 */
10683e95bd4aSAnders Persson 	CONN_DEC_REF(tcp->tcp_saved_listener->tcp_connp);
10693e95bd4aSAnders Persson 
10703e95bd4aSAnders Persson 	tcp->tcp_detached = B_FALSE;
10713e95bd4aSAnders Persson 
10723e95bd4aSAnders Persson 	if (tcp->tcp_state <= TCPS_BOUND || tcp->tcp_accept_error) {
10733e95bd4aSAnders Persson 		/*
10743e95bd4aSAnders Persson 		 * Someone blewoff the eager before we could finish
10753e95bd4aSAnders Persson 		 * the accept.
10763e95bd4aSAnders Persson 		 *
10773e95bd4aSAnders Persson 		 * The only reason eager exists it because we put in
10783e95bd4aSAnders Persson 		 * a ref on it when conn ind went up. We need to send
10793e95bd4aSAnders Persson 		 * a disconnect indication up while the last reference
10803e95bd4aSAnders Persson 		 * on the eager will be dropped by the squeue when we
10813e95bd4aSAnders Persson 		 * return.
10823e95bd4aSAnders Persson 		 */
10833e95bd4aSAnders Persson 		ASSERT(tcp->tcp_listener == NULL);
10843e95bd4aSAnders Persson 		if (tcp->tcp_issocket || tcp->tcp_send_discon_ind) {
10853e95bd4aSAnders Persson 			struct	T_discon_ind	*tdi;
10863e95bd4aSAnders Persson 
10873e95bd4aSAnders Persson 			(void) putnextctl1(q, M_FLUSH, FLUSHRW);
10883e95bd4aSAnders Persson 			/*
10893e95bd4aSAnders Persson 			 * Let us reuse the incoming mblk to avoid
10903e95bd4aSAnders Persson 			 * memory allocation failure problems. We know
10913e95bd4aSAnders Persson 			 * that the size of the incoming mblk i.e.
10923e95bd4aSAnders Persson 			 * stroptions is greater than sizeof
10933e95bd4aSAnders Persson 			 * T_discon_ind.
10943e95bd4aSAnders Persson 			 */
10953e95bd4aSAnders Persson 			ASSERT(DB_REF(mp) == 1);
10963e95bd4aSAnders Persson 			ASSERT(MBLKSIZE(mp) >=
10973e95bd4aSAnders Persson 			    sizeof (struct T_discon_ind));
10983e95bd4aSAnders Persson 
10993e95bd4aSAnders Persson 			DB_TYPE(mp) = M_PROTO;
11003e95bd4aSAnders Persson 			((union T_primitives *)mp->b_rptr)->type =
11013e95bd4aSAnders Persson 			    T_DISCON_IND;
11023e95bd4aSAnders Persson 			tdi = (struct T_discon_ind *)mp->b_rptr;
11033e95bd4aSAnders Persson 			if (tcp->tcp_issocket) {
11043e95bd4aSAnders Persson 				tdi->DISCON_reason = ECONNREFUSED;
11053e95bd4aSAnders Persson 				tdi->SEQ_number = 0;
11063e95bd4aSAnders Persson 			} else {
11073e95bd4aSAnders Persson 				tdi->DISCON_reason = ENOPROTOOPT;
11083e95bd4aSAnders Persson 				tdi->SEQ_number =
11093e95bd4aSAnders Persson 				    tcp->tcp_conn_req_seqnum;
11103e95bd4aSAnders Persson 			}
11113e95bd4aSAnders Persson 			mp->b_wptr = mp->b_rptr +
11123e95bd4aSAnders Persson 			    sizeof (struct T_discon_ind);
11133e95bd4aSAnders Persson 			putnext(q, mp);
11143e95bd4aSAnders Persson 		}
11153e95bd4aSAnders Persson 		tcp->tcp_hard_binding = B_FALSE;
11163e95bd4aSAnders Persson 		return;
11173e95bd4aSAnders Persson 	}
11183e95bd4aSAnders Persson 
11193e95bd4aSAnders Persson 	/*
11203e95bd4aSAnders Persson 	 * This is the first time we run on the correct
11213e95bd4aSAnders Persson 	 * queue after tcp_accept. So fix all the q parameters
11223e95bd4aSAnders Persson 	 * here.
11233e95bd4aSAnders Persson 	 *
11243e95bd4aSAnders Persson 	 * Let us reuse the incoming mblk to avoid
11253e95bd4aSAnders Persson 	 * memory allocation failure problems. We know
11263e95bd4aSAnders Persson 	 * that the size of the incoming mblk is at least
11273e95bd4aSAnders Persson 	 * stroptions
11283e95bd4aSAnders Persson 	 */
11293e95bd4aSAnders Persson 	tcp_get_proto_props(tcp, &sopp);
11303e95bd4aSAnders Persson 
11313e95bd4aSAnders Persson 	ASSERT(DB_REF(mp) == 1);
11323e95bd4aSAnders Persson 	ASSERT(MBLKSIZE(mp) >= sizeof (struct stroptions));
11333e95bd4aSAnders Persson 
11343e95bd4aSAnders Persson 	DB_TYPE(mp) = M_SETOPTS;
11353e95bd4aSAnders Persson 	stropt = (struct stroptions *)mp->b_rptr;
11363e95bd4aSAnders Persson 	mp->b_wptr = mp->b_rptr + sizeof (struct stroptions);
11373e95bd4aSAnders Persson 	stropt = (struct stroptions *)mp->b_rptr;
11383e95bd4aSAnders Persson 	ASSERT(sopp.sopp_flags & (SO_HIWAT|SO_WROFF|SO_MAXBLK));
11393e95bd4aSAnders Persson 	stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
11403e95bd4aSAnders Persson 	stropt->so_hiwat = sopp.sopp_rxhiwat;
11413e95bd4aSAnders Persson 	stropt->so_wroff = sopp.sopp_wroff;
11423e95bd4aSAnders Persson 	stropt->so_maxblk = sopp.sopp_maxblk;
11433e95bd4aSAnders Persson 
11443e95bd4aSAnders Persson 	/* Send the options up */
11453e95bd4aSAnders Persson 	putnext(q, mp);
11463e95bd4aSAnders Persson 
11473e95bd4aSAnders Persson 	/*
11483e95bd4aSAnders Persson 	 * Pass up any data and/or a fin that has been received.
11493e95bd4aSAnders Persson 	 *
11503e95bd4aSAnders Persson 	 * Adjust receive window in case it had decreased
11513e95bd4aSAnders Persson 	 * (because there is data <=> tcp_rcv_list != NULL)
11523e95bd4aSAnders Persson 	 * while the connection was detached. Note that
11533e95bd4aSAnders Persson 	 * in case the eager was flow-controlled, w/o this
11543e95bd4aSAnders Persson 	 * code, the rwnd may never open up again!
11553e95bd4aSAnders Persson 	 */
11563e95bd4aSAnders Persson 	if (tcp->tcp_rcv_list != NULL) {
11573e95bd4aSAnders Persson 		/* We drain directly in case of fused tcp loopback */
11583e95bd4aSAnders Persson 
11593e95bd4aSAnders Persson 		if (!tcp->tcp_fused && canputnext(q)) {
11603e95bd4aSAnders Persson 			tcp->tcp_rwnd = connp->conn_rcvbuf;
11613e95bd4aSAnders Persson 			if (tcp->tcp_state >= TCPS_ESTABLISHED &&
11623e95bd4aSAnders Persson 			    tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
11633e95bd4aSAnders Persson 				tcp_xmit_ctl(NULL,
11643e95bd4aSAnders Persson 				    tcp, (tcp->tcp_swnd == 0) ?
11653e95bd4aSAnders Persson 				    tcp->tcp_suna : tcp->tcp_snxt,
11663e95bd4aSAnders Persson 				    tcp->tcp_rnxt, TH_ACK);
11673e95bd4aSAnders Persson 			}
11683e95bd4aSAnders Persson 		}
11693e95bd4aSAnders Persson 
11703e95bd4aSAnders Persson 		(void) tcp_rcv_drain(tcp);
11713e95bd4aSAnders Persson 
11723e95bd4aSAnders Persson 		/*
11733e95bd4aSAnders Persson 		 * For fused tcp loopback, back-enable peer endpoint
11743e95bd4aSAnders Persson 		 * if it's currently flow-controlled.
11753e95bd4aSAnders Persson 		 */
11763e95bd4aSAnders Persson 		if (tcp->tcp_fused) {
11773e95bd4aSAnders Persson 			tcp_t *peer_tcp = tcp->tcp_loopback_peer;
11783e95bd4aSAnders Persson 
11793e95bd4aSAnders Persson 			ASSERT(peer_tcp != NULL);
11803e95bd4aSAnders Persson 			ASSERT(peer_tcp->tcp_fused);
11813e95bd4aSAnders Persson 
11823e95bd4aSAnders Persson 			mutex_enter(&peer_tcp->tcp_non_sq_lock);
11833e95bd4aSAnders Persson 			if (peer_tcp->tcp_flow_stopped) {
11843e95bd4aSAnders Persson 				tcp_clrqfull(peer_tcp);
11853e95bd4aSAnders Persson 				TCP_STAT(tcps, tcp_fusion_backenabled);
11863e95bd4aSAnders Persson 			}
11873e95bd4aSAnders Persson 			mutex_exit(&peer_tcp->tcp_non_sq_lock);
11883e95bd4aSAnders Persson 		}
11893e95bd4aSAnders Persson 	}
11903e95bd4aSAnders Persson 	ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
11913e95bd4aSAnders Persson 	if (tcp->tcp_fin_rcvd && !tcp->tcp_ordrel_done) {
11923e95bd4aSAnders Persson 		tcp->tcp_ordrel_done = B_TRUE;
11933e95bd4aSAnders Persson 		mp = tcp->tcp_ordrel_mp;
11943e95bd4aSAnders Persson 		tcp->tcp_ordrel_mp = NULL;
11953e95bd4aSAnders Persson 		putnext(q, mp);
11963e95bd4aSAnders Persson 	}
11973e95bd4aSAnders Persson 	tcp->tcp_hard_binding = B_FALSE;
11983e95bd4aSAnders Persson 
11993e95bd4aSAnders Persson 	if (connp->conn_keepalive) {
12003e95bd4aSAnders Persson 		tcp->tcp_ka_last_intrvl = 0;
12013e95bd4aSAnders Persson 		tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer,
12023e95bd4aSAnders Persson 		    tcp->tcp_ka_interval);
12033e95bd4aSAnders Persson 	}
12043e95bd4aSAnders Persson 
12053e95bd4aSAnders Persson 	/*
12063e95bd4aSAnders Persson 	 * At this point, eager is fully established and will
12073e95bd4aSAnders Persson 	 * have the following references -
12083e95bd4aSAnders Persson 	 *
12093e95bd4aSAnders Persson 	 * 2 references for connection to exist (1 for TCP and 1 for IP).
12103e95bd4aSAnders Persson 	 * 1 reference for the squeue which will be dropped by the squeue as
12113e95bd4aSAnders Persson 	 *	soon as this function returns.
12123e95bd4aSAnders Persson 	 * There will be 1 additonal reference for being in classifier
12133e95bd4aSAnders Persson 	 *	hash list provided something bad hasn't happened.
12143e95bd4aSAnders Persson 	 */
12153e95bd4aSAnders Persson 	ASSERT((connp->conn_fanout != NULL && connp->conn_ref >= 4) ||
12163e95bd4aSAnders Persson 	    (connp->conn_fanout == NULL && connp->conn_ref >= 3));
12173e95bd4aSAnders Persson }
12183e95bd4aSAnders Persson 
1219*dd49f125SAnders Persson /*
1220*dd49f125SAnders Persson  * Pull a deferred connection indication off of the listener. The caller
1221*dd49f125SAnders Persson  * must verify that there is a deferred conn ind under eager_lock before
1222*dd49f125SAnders Persson  * calling this function.
1223*dd49f125SAnders Persson  */
1224*dd49f125SAnders Persson static mblk_t *
1225*dd49f125SAnders Persson tcp_get_def_conn_ind(tcp_t *listener)
1226*dd49f125SAnders Persson {
1227*dd49f125SAnders Persson 	tcp_t *tail;
1228*dd49f125SAnders Persson 	tcp_t *tcp;
1229*dd49f125SAnders Persson 	mblk_t *conn_ind;
1230*dd49f125SAnders Persson 
1231*dd49f125SAnders Persson 	ASSERT(MUTEX_HELD(&listener->tcp_eager_lock));
1232*dd49f125SAnders Persson 	ASSERT(listener->tcp_eager_prev_q0->tcp_conn_def_q0);
1233*dd49f125SAnders Persson 
1234*dd49f125SAnders Persson 	tcp = listener->tcp_eager_prev_q0;
1235*dd49f125SAnders Persson 	/*
1236*dd49f125SAnders Persson 	 * listener->tcp_eager_prev_q0 points to the TAIL of the
1237*dd49f125SAnders Persson 	 * deferred T_conn_ind queue. We need to get to the head
1238*dd49f125SAnders Persson 	 * of the queue in order to send up T_conn_ind the same
1239*dd49f125SAnders Persson 	 * order as how the 3WHS is completed.
1240*dd49f125SAnders Persson 	 */
1241*dd49f125SAnders Persson 	while (tcp != listener) {
1242*dd49f125SAnders Persson 		if (!tcp->tcp_eager_prev_q0->tcp_conn_def_q0)
1243*dd49f125SAnders Persson 			break;
1244*dd49f125SAnders Persson 		else
1245*dd49f125SAnders Persson 			tcp = tcp->tcp_eager_prev_q0;
1246*dd49f125SAnders Persson 	}
1247*dd49f125SAnders Persson 
1248*dd49f125SAnders Persson 	conn_ind = tcp->tcp_conn.tcp_eager_conn_ind;
1249*dd49f125SAnders Persson 	tcp->tcp_conn.tcp_eager_conn_ind = NULL;
1250*dd49f125SAnders Persson 	/* Move from q0 to q */
1251*dd49f125SAnders Persson 	ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
1252*dd49f125SAnders Persson 	listener->tcp_conn_req_cnt_q0--;
1253*dd49f125SAnders Persson 	listener->tcp_conn_req_cnt_q++;
1254*dd49f125SAnders Persson 	tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
1255*dd49f125SAnders Persson 	    tcp->tcp_eager_prev_q0;
1256*dd49f125SAnders Persson 	tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
1257*dd49f125SAnders Persson 	    tcp->tcp_eager_next_q0;
1258*dd49f125SAnders Persson 	tcp->tcp_eager_prev_q0 = NULL;
1259*dd49f125SAnders Persson 	tcp->tcp_eager_next_q0 = NULL;
1260*dd49f125SAnders Persson 	tcp->tcp_conn_def_q0 = B_FALSE;
1261*dd49f125SAnders Persson 
1262*dd49f125SAnders Persson 	/* Make sure the tcp isn't in the list of droppables */
1263*dd49f125SAnders Persson 	ASSERT(tcp->tcp_eager_next_drop_q0 == NULL &&
1264*dd49f125SAnders Persson 	    tcp->tcp_eager_prev_drop_q0 == NULL);
1265*dd49f125SAnders Persson 
1266*dd49f125SAnders Persson 	/*
1267*dd49f125SAnders Persson 	 * Insert at end of the queue because sockfs sends
1268*dd49f125SAnders Persson 	 * down T_CONN_RES in chronological order. Leaving
1269*dd49f125SAnders Persson 	 * the older conn indications at front of the queue
1270*dd49f125SAnders Persson 	 * helps reducing search time.
1271*dd49f125SAnders Persson 	 */
1272*dd49f125SAnders Persson 	tail = listener->tcp_eager_last_q;
1273*dd49f125SAnders Persson 	if (tail != NULL) {
1274*dd49f125SAnders Persson 		tail->tcp_eager_next_q = tcp;
1275*dd49f125SAnders Persson 	} else {
1276*dd49f125SAnders Persson 		listener->tcp_eager_next_q = tcp;
1277*dd49f125SAnders Persson 	}
1278*dd49f125SAnders Persson 	listener->tcp_eager_last_q = tcp;
1279*dd49f125SAnders Persson 	tcp->tcp_eager_next_q = NULL;
1280*dd49f125SAnders Persson 
1281*dd49f125SAnders Persson 	return (conn_ind);
1282*dd49f125SAnders Persson }
1283*dd49f125SAnders Persson 
12843e95bd4aSAnders Persson 
12853e95bd4aSAnders Persson /*
1286439b3deaSKacheong Poon  * Reply to a clients T_CONN_RES TPI message. This function
1287439b3deaSKacheong Poon  * is used only for TLI/XTI listener. Sockfs sends T_CONN_RES
1288439b3deaSKacheong Poon  * on the acceptor STREAM and processed in tcp_accept_common().
1289439b3deaSKacheong Poon  * Read the block comment on top of tcp_input_listener().
1290439b3deaSKacheong Poon  */
1291439b3deaSKacheong Poon void
1292439b3deaSKacheong Poon tcp_tli_accept(tcp_t *listener, mblk_t *mp)
1293439b3deaSKacheong Poon {
1294439b3deaSKacheong Poon 	tcp_t		*acceptor;
1295439b3deaSKacheong Poon 	tcp_t		*eager;
1296439b3deaSKacheong Poon 	struct T_conn_res	*tcr;
1297439b3deaSKacheong Poon 	t_uscalar_t	acceptor_id;
1298439b3deaSKacheong Poon 	t_scalar_t	seqnum;
1299439b3deaSKacheong Poon 	mblk_t		*discon_mp = NULL;
1300439b3deaSKacheong Poon 	mblk_t		*ok_mp;
1301439b3deaSKacheong Poon 	mblk_t		*mp1;
1302439b3deaSKacheong Poon 	tcp_stack_t	*tcps = listener->tcp_tcps;
1303439b3deaSKacheong Poon 	conn_t		*econnp;
1304439b3deaSKacheong Poon 
1305439b3deaSKacheong Poon 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) {
1306439b3deaSKacheong Poon 		tcp_err_ack(listener, mp, TPROTO, 0);
1307439b3deaSKacheong Poon 		return;
1308439b3deaSKacheong Poon 	}
1309439b3deaSKacheong Poon 	tcr = (struct T_conn_res *)mp->b_rptr;
1310439b3deaSKacheong Poon 
1311439b3deaSKacheong Poon 	/*
1312439b3deaSKacheong Poon 	 * Under ILP32 the stream head points tcr->ACCEPTOR_id at the
1313439b3deaSKacheong Poon 	 * read side queue of the streams device underneath us i.e. the
1314439b3deaSKacheong Poon 	 * read side queue of 'ip'. Since we can't deference QUEUE_ptr we
1315439b3deaSKacheong Poon 	 * look it up in the queue_hash.  Under LP64 it sends down the
1316439b3deaSKacheong Poon 	 * minor_t of the accepting endpoint.
1317439b3deaSKacheong Poon 	 *
1318439b3deaSKacheong Poon 	 * Once the acceptor/eager are modified (in tcp_accept_swap) the
1319439b3deaSKacheong Poon 	 * fanout hash lock is held.
1320439b3deaSKacheong Poon 	 * This prevents any thread from entering the acceptor queue from
1321439b3deaSKacheong Poon 	 * below (since it has not been hard bound yet i.e. any inbound
1322439b3deaSKacheong Poon 	 * packets will arrive on the listener conn_t and
1323439b3deaSKacheong Poon 	 * go through the classifier).
1324439b3deaSKacheong Poon 	 * The CONN_INC_REF will prevent the acceptor from closing.
1325439b3deaSKacheong Poon 	 *
1326439b3deaSKacheong Poon 	 * XXX It is still possible for a tli application to send down data
1327439b3deaSKacheong Poon 	 * on the accepting stream while another thread calls t_accept.
1328439b3deaSKacheong Poon 	 * This should not be a problem for well-behaved applications since
1329439b3deaSKacheong Poon 	 * the T_OK_ACK is sent after the queue swapping is completed.
1330439b3deaSKacheong Poon 	 *
1331439b3deaSKacheong Poon 	 * If the accepting fd is the same as the listening fd, avoid
1332439b3deaSKacheong Poon 	 * queue hash lookup since that will return an eager listener in a
1333439b3deaSKacheong Poon 	 * already established state.
1334439b3deaSKacheong Poon 	 */
1335439b3deaSKacheong Poon 	acceptor_id = tcr->ACCEPTOR_id;
1336439b3deaSKacheong Poon 	mutex_enter(&listener->tcp_eager_lock);
1337439b3deaSKacheong Poon 	if (listener->tcp_acceptor_id == acceptor_id) {
1338439b3deaSKacheong Poon 		eager = listener->tcp_eager_next_q;
1339439b3deaSKacheong Poon 		/* only count how many T_CONN_INDs so don't count q0 */
1340439b3deaSKacheong Poon 		if ((listener->tcp_conn_req_cnt_q != 1) ||
1341439b3deaSKacheong Poon 		    (eager->tcp_conn_req_seqnum != tcr->SEQ_number)) {
1342439b3deaSKacheong Poon 			mutex_exit(&listener->tcp_eager_lock);
1343439b3deaSKacheong Poon 			tcp_err_ack(listener, mp, TBADF, 0);
1344439b3deaSKacheong Poon 			return;
1345439b3deaSKacheong Poon 		}
1346439b3deaSKacheong Poon 		if (listener->tcp_conn_req_cnt_q0 != 0) {
1347439b3deaSKacheong Poon 			/* Throw away all the eagers on q0. */
1348439b3deaSKacheong Poon 			tcp_eager_cleanup(listener, 1);
1349439b3deaSKacheong Poon 		}
1350439b3deaSKacheong Poon 		if (listener->tcp_syn_defense) {
1351439b3deaSKacheong Poon 			listener->tcp_syn_defense = B_FALSE;
1352439b3deaSKacheong Poon 			if (listener->tcp_ip_addr_cache != NULL) {
1353439b3deaSKacheong Poon 				kmem_free(listener->tcp_ip_addr_cache,
1354439b3deaSKacheong Poon 				    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
1355439b3deaSKacheong Poon 				listener->tcp_ip_addr_cache = NULL;
1356439b3deaSKacheong Poon 			}
1357439b3deaSKacheong Poon 		}
1358439b3deaSKacheong Poon 		/*
1359439b3deaSKacheong Poon 		 * Transfer tcp_conn_req_max to the eager so that when
1360439b3deaSKacheong Poon 		 * a disconnect occurs we can revert the endpoint to the
1361439b3deaSKacheong Poon 		 * listen state.
1362439b3deaSKacheong Poon 		 */
1363439b3deaSKacheong Poon 		eager->tcp_conn_req_max = listener->tcp_conn_req_max;
1364439b3deaSKacheong Poon 		ASSERT(listener->tcp_conn_req_cnt_q0 == 0);
1365439b3deaSKacheong Poon 		/*
1366439b3deaSKacheong Poon 		 * Get a reference on the acceptor just like the
1367439b3deaSKacheong Poon 		 * tcp_acceptor_hash_lookup below.
1368439b3deaSKacheong Poon 		 */
1369439b3deaSKacheong Poon 		acceptor = listener;
1370439b3deaSKacheong Poon 		CONN_INC_REF(acceptor->tcp_connp);
1371439b3deaSKacheong Poon 	} else {
1372439b3deaSKacheong Poon 		acceptor = tcp_acceptor_hash_lookup(acceptor_id, tcps);
1373439b3deaSKacheong Poon 		if (acceptor == NULL) {
1374439b3deaSKacheong Poon 			if (listener->tcp_connp->conn_debug) {
1375439b3deaSKacheong Poon 				(void) strlog(TCP_MOD_ID, 0, 1,
1376439b3deaSKacheong Poon 				    SL_ERROR|SL_TRACE,
1377439b3deaSKacheong Poon 				    "tcp_accept: did not find acceptor 0x%x\n",
1378439b3deaSKacheong Poon 				    acceptor_id);
1379439b3deaSKacheong Poon 			}
1380439b3deaSKacheong Poon 			mutex_exit(&listener->tcp_eager_lock);
1381439b3deaSKacheong Poon 			tcp_err_ack(listener, mp, TPROVMISMATCH, 0);
1382439b3deaSKacheong Poon 			return;
1383439b3deaSKacheong Poon 		}
1384439b3deaSKacheong Poon 		/*
1385439b3deaSKacheong Poon 		 * Verify acceptor state. The acceptable states for an acceptor
1386439b3deaSKacheong Poon 		 * include TCPS_IDLE and TCPS_BOUND.
1387439b3deaSKacheong Poon 		 */
1388439b3deaSKacheong Poon 		switch (acceptor->tcp_state) {
1389439b3deaSKacheong Poon 		case TCPS_IDLE:
1390439b3deaSKacheong Poon 			/* FALLTHRU */
1391439b3deaSKacheong Poon 		case TCPS_BOUND:
1392439b3deaSKacheong Poon 			break;
1393439b3deaSKacheong Poon 		default:
1394439b3deaSKacheong Poon 			CONN_DEC_REF(acceptor->tcp_connp);
1395439b3deaSKacheong Poon 			mutex_exit(&listener->tcp_eager_lock);
1396439b3deaSKacheong Poon 			tcp_err_ack(listener, mp, TOUTSTATE, 0);
1397439b3deaSKacheong Poon 			return;
1398439b3deaSKacheong Poon 		}
1399439b3deaSKacheong Poon 	}
1400439b3deaSKacheong Poon 
1401439b3deaSKacheong Poon 	/* The listener must be in TCPS_LISTEN */
1402439b3deaSKacheong Poon 	if (listener->tcp_state != TCPS_LISTEN) {
1403439b3deaSKacheong Poon 		CONN_DEC_REF(acceptor->tcp_connp);
1404439b3deaSKacheong Poon 		mutex_exit(&listener->tcp_eager_lock);
1405439b3deaSKacheong Poon 		tcp_err_ack(listener, mp, TOUTSTATE, 0);
1406439b3deaSKacheong Poon 		return;
1407439b3deaSKacheong Poon 	}
1408439b3deaSKacheong Poon 
1409439b3deaSKacheong Poon 	/*
1410439b3deaSKacheong Poon 	 * Rendezvous with an eager connection request packet hanging off
1411439b3deaSKacheong Poon 	 * 'tcp' that has the 'seqnum' tag.  We tagged the detached open
1412439b3deaSKacheong Poon 	 * tcp structure when the connection packet arrived in
1413439b3deaSKacheong Poon 	 * tcp_input_listener().
1414439b3deaSKacheong Poon 	 */
1415439b3deaSKacheong Poon 	seqnum = tcr->SEQ_number;
1416439b3deaSKacheong Poon 	eager = listener;
1417439b3deaSKacheong Poon 	do {
1418439b3deaSKacheong Poon 		eager = eager->tcp_eager_next_q;
1419439b3deaSKacheong Poon 		if (eager == NULL) {
1420439b3deaSKacheong Poon 			CONN_DEC_REF(acceptor->tcp_connp);
1421439b3deaSKacheong Poon 			mutex_exit(&listener->tcp_eager_lock);
1422439b3deaSKacheong Poon 			tcp_err_ack(listener, mp, TBADSEQ, 0);
1423439b3deaSKacheong Poon 			return;
1424439b3deaSKacheong Poon 		}
1425439b3deaSKacheong Poon 	} while (eager->tcp_conn_req_seqnum != seqnum);
1426439b3deaSKacheong Poon 	mutex_exit(&listener->tcp_eager_lock);
1427439b3deaSKacheong Poon 
1428439b3deaSKacheong Poon 	/*
1429439b3deaSKacheong Poon 	 * At this point, both acceptor and listener have 2 ref
1430439b3deaSKacheong Poon 	 * that they begin with. Acceptor has one additional ref
1431439b3deaSKacheong Poon 	 * we placed in lookup while listener has 3 additional
1432439b3deaSKacheong Poon 	 * ref for being behind the squeue (tcp_accept() is
1433439b3deaSKacheong Poon 	 * done on listener's squeue); being in classifier hash;
1434439b3deaSKacheong Poon 	 * and eager's ref on listener.
1435439b3deaSKacheong Poon 	 */
1436439b3deaSKacheong Poon 	ASSERT(listener->tcp_connp->conn_ref >= 5);
1437439b3deaSKacheong Poon 	ASSERT(acceptor->tcp_connp->conn_ref >= 3);
1438439b3deaSKacheong Poon 
1439439b3deaSKacheong Poon 	/*
1440439b3deaSKacheong Poon 	 * The eager at this point is set in its own squeue and
1441439b3deaSKacheong Poon 	 * could easily have been killed (tcp_accept_finish will
1442439b3deaSKacheong Poon 	 * deal with that) because of a TH_RST so we can only
1443439b3deaSKacheong Poon 	 * ASSERT for a single ref.
1444439b3deaSKacheong Poon 	 */
1445439b3deaSKacheong Poon 	ASSERT(eager->tcp_connp->conn_ref >= 1);
1446439b3deaSKacheong Poon 
1447439b3deaSKacheong Poon 	/*
1448439b3deaSKacheong Poon 	 * Pre allocate the discon_ind mblk also. tcp_accept_finish will
1449439b3deaSKacheong Poon 	 * use it if something failed.
1450439b3deaSKacheong Poon 	 */
1451439b3deaSKacheong Poon 	discon_mp = allocb(MAX(sizeof (struct T_discon_ind),
1452439b3deaSKacheong Poon 	    sizeof (struct stroptions)), BPRI_HI);
1453439b3deaSKacheong Poon 	if (discon_mp == NULL) {
1454439b3deaSKacheong Poon 		CONN_DEC_REF(acceptor->tcp_connp);
1455439b3deaSKacheong Poon 		CONN_DEC_REF(eager->tcp_connp);
1456439b3deaSKacheong Poon 		tcp_err_ack(listener, mp, TSYSERR, ENOMEM);
1457439b3deaSKacheong Poon 		return;
1458439b3deaSKacheong Poon 	}
1459439b3deaSKacheong Poon 
1460439b3deaSKacheong Poon 	econnp = eager->tcp_connp;
1461439b3deaSKacheong Poon 
1462439b3deaSKacheong Poon 	/* Hold a copy of mp, in case reallocb fails */
1463439b3deaSKacheong Poon 	if ((mp1 = copymsg(mp)) == NULL) {
1464439b3deaSKacheong Poon 		CONN_DEC_REF(acceptor->tcp_connp);
1465439b3deaSKacheong Poon 		CONN_DEC_REF(eager->tcp_connp);
1466439b3deaSKacheong Poon 		freemsg(discon_mp);
1467439b3deaSKacheong Poon 		tcp_err_ack(listener, mp, TSYSERR, ENOMEM);
1468439b3deaSKacheong Poon 		return;
1469439b3deaSKacheong Poon 	}
1470439b3deaSKacheong Poon 
1471439b3deaSKacheong Poon 	tcr = (struct T_conn_res *)mp1->b_rptr;
1472439b3deaSKacheong Poon 
1473439b3deaSKacheong Poon 	/*
1474439b3deaSKacheong Poon 	 * This is an expanded version of mi_tpi_ok_ack_alloc()
1475439b3deaSKacheong Poon 	 * which allocates a larger mblk and appends the new
1476439b3deaSKacheong Poon 	 * local address to the ok_ack.  The address is copied by
1477439b3deaSKacheong Poon 	 * soaccept() for getsockname().
1478439b3deaSKacheong Poon 	 */
1479439b3deaSKacheong Poon 	{
1480439b3deaSKacheong Poon 		int extra;
1481439b3deaSKacheong Poon 
1482439b3deaSKacheong Poon 		extra = (econnp->conn_family == AF_INET) ?
1483439b3deaSKacheong Poon 		    sizeof (sin_t) : sizeof (sin6_t);
1484439b3deaSKacheong Poon 
1485439b3deaSKacheong Poon 		/*
1486439b3deaSKacheong Poon 		 * Try to re-use mp, if possible.  Otherwise, allocate
1487439b3deaSKacheong Poon 		 * an mblk and return it as ok_mp.  In any case, mp
1488439b3deaSKacheong Poon 		 * is no longer usable upon return.
1489439b3deaSKacheong Poon 		 */
1490439b3deaSKacheong Poon 		if ((ok_mp = mi_tpi_ok_ack_alloc_extra(mp, extra)) == NULL) {
1491439b3deaSKacheong Poon 			CONN_DEC_REF(acceptor->tcp_connp);
1492439b3deaSKacheong Poon 			CONN_DEC_REF(eager->tcp_connp);
1493439b3deaSKacheong Poon 			freemsg(discon_mp);
1494439b3deaSKacheong Poon 			/* Original mp has been freed by now, so use mp1 */
1495439b3deaSKacheong Poon 			tcp_err_ack(listener, mp1, TSYSERR, ENOMEM);
1496439b3deaSKacheong Poon 			return;
1497439b3deaSKacheong Poon 		}
1498439b3deaSKacheong Poon 
1499439b3deaSKacheong Poon 		mp = NULL;	/* We should never use mp after this point */
1500439b3deaSKacheong Poon 
1501439b3deaSKacheong Poon 		switch (extra) {
1502439b3deaSKacheong Poon 		case sizeof (sin_t): {
1503439b3deaSKacheong Poon 			sin_t *sin = (sin_t *)ok_mp->b_wptr;
1504439b3deaSKacheong Poon 
1505439b3deaSKacheong Poon 			ok_mp->b_wptr += extra;
1506439b3deaSKacheong Poon 			sin->sin_family = AF_INET;
1507439b3deaSKacheong Poon 			sin->sin_port = econnp->conn_lport;
1508439b3deaSKacheong Poon 			sin->sin_addr.s_addr = econnp->conn_laddr_v4;
1509439b3deaSKacheong Poon 			break;
1510439b3deaSKacheong Poon 		}
1511439b3deaSKacheong Poon 		case sizeof (sin6_t): {
1512439b3deaSKacheong Poon 			sin6_t *sin6 = (sin6_t *)ok_mp->b_wptr;
1513439b3deaSKacheong Poon 
1514439b3deaSKacheong Poon 			ok_mp->b_wptr += extra;
1515439b3deaSKacheong Poon 			sin6->sin6_family = AF_INET6;
1516439b3deaSKacheong Poon 			sin6->sin6_port = econnp->conn_lport;
1517439b3deaSKacheong Poon 			sin6->sin6_addr = econnp->conn_laddr_v6;
1518439b3deaSKacheong Poon 			sin6->sin6_flowinfo = econnp->conn_flowinfo;
1519439b3deaSKacheong Poon 			if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) &&
1520439b3deaSKacheong Poon 			    (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
1521439b3deaSKacheong Poon 				sin6->sin6_scope_id =
1522439b3deaSKacheong Poon 				    econnp->conn_ixa->ixa_scopeid;
1523439b3deaSKacheong Poon 			} else {
1524439b3deaSKacheong Poon 				sin6->sin6_scope_id = 0;
1525439b3deaSKacheong Poon 			}
1526439b3deaSKacheong Poon 			sin6->__sin6_src_id = 0;
1527439b3deaSKacheong Poon 			break;
1528439b3deaSKacheong Poon 		}
1529439b3deaSKacheong Poon 		default:
1530439b3deaSKacheong Poon 			break;
1531439b3deaSKacheong Poon 		}
1532439b3deaSKacheong Poon 		ASSERT(ok_mp->b_wptr <= ok_mp->b_datap->db_lim);
1533439b3deaSKacheong Poon 	}
1534439b3deaSKacheong Poon 
1535439b3deaSKacheong Poon 	/*
1536439b3deaSKacheong Poon 	 * If there are no options we know that the T_CONN_RES will
1537439b3deaSKacheong Poon 	 * succeed. However, we can't send the T_OK_ACK upstream until
1538439b3deaSKacheong Poon 	 * the tcp_accept_swap is done since it would be dangerous to
1539439b3deaSKacheong Poon 	 * let the application start using the new fd prior to the swap.
1540439b3deaSKacheong Poon 	 */
1541439b3deaSKacheong Poon 	tcp_accept_swap(listener, acceptor, eager);
1542439b3deaSKacheong Poon 
1543439b3deaSKacheong Poon 	/*
1544439b3deaSKacheong Poon 	 * tcp_accept_swap unlinks eager from listener but does not drop
1545439b3deaSKacheong Poon 	 * the eager's reference on the listener.
1546439b3deaSKacheong Poon 	 */
1547439b3deaSKacheong Poon 	ASSERT(eager->tcp_listener == NULL);
1548439b3deaSKacheong Poon 	ASSERT(listener->tcp_connp->conn_ref >= 5);
1549439b3deaSKacheong Poon 
1550439b3deaSKacheong Poon 	/*
1551439b3deaSKacheong Poon 	 * The eager is now associated with its own queue. Insert in
1552439b3deaSKacheong Poon 	 * the hash so that the connection can be reused for a future
1553439b3deaSKacheong Poon 	 * T_CONN_RES.
1554439b3deaSKacheong Poon 	 */
1555439b3deaSKacheong Poon 	tcp_acceptor_hash_insert(acceptor_id, eager);
1556439b3deaSKacheong Poon 
1557439b3deaSKacheong Poon 	/*
1558439b3deaSKacheong Poon 	 * We now do the processing of options with T_CONN_RES.
1559439b3deaSKacheong Poon 	 * We delay till now since we wanted to have queue to pass to
1560439b3deaSKacheong Poon 	 * option processing routines that points back to the right
1561439b3deaSKacheong Poon 	 * instance structure which does not happen until after
1562439b3deaSKacheong Poon 	 * tcp_accept_swap().
1563439b3deaSKacheong Poon 	 *
1564439b3deaSKacheong Poon 	 * Note:
1565439b3deaSKacheong Poon 	 * The sanity of the logic here assumes that whatever options
1566439b3deaSKacheong Poon 	 * are appropriate to inherit from listner=>eager are done
1567439b3deaSKacheong Poon 	 * before this point, and whatever were to be overridden (or not)
1568439b3deaSKacheong Poon 	 * in transfer logic from eager=>acceptor in tcp_accept_swap().
1569439b3deaSKacheong Poon 	 * [ Warning: acceptor endpoint can have T_OPTMGMT_REQ done to it
1570439b3deaSKacheong Poon 	 *   before its ACCEPTOR_id comes down in T_CONN_RES ]
1571439b3deaSKacheong Poon 	 * This may not be true at this point in time but can be fixed
1572439b3deaSKacheong Poon 	 * independently. This option processing code starts with
1573439b3deaSKacheong Poon 	 * the instantiated acceptor instance and the final queue at
1574439b3deaSKacheong Poon 	 * this point.
1575439b3deaSKacheong Poon 	 */
1576439b3deaSKacheong Poon 
1577439b3deaSKacheong Poon 	if (tcr->OPT_length != 0) {
1578439b3deaSKacheong Poon 		/* Options to process */
1579439b3deaSKacheong Poon 		int t_error = 0;
1580439b3deaSKacheong Poon 		int sys_error = 0;
1581439b3deaSKacheong Poon 		int do_disconnect = 0;
1582439b3deaSKacheong Poon 
1583439b3deaSKacheong Poon 		if (tcp_conprim_opt_process(eager, mp1,
1584439b3deaSKacheong Poon 		    &do_disconnect, &t_error, &sys_error) < 0) {
1585439b3deaSKacheong Poon 			eager->tcp_accept_error = 1;
1586439b3deaSKacheong Poon 			if (do_disconnect) {
1587439b3deaSKacheong Poon 				/*
1588439b3deaSKacheong Poon 				 * An option failed which does not allow
1589439b3deaSKacheong Poon 				 * connection to be accepted.
1590439b3deaSKacheong Poon 				 *
1591439b3deaSKacheong Poon 				 * We allow T_CONN_RES to succeed and
1592439b3deaSKacheong Poon 				 * put a T_DISCON_IND on the eager queue.
1593439b3deaSKacheong Poon 				 */
1594439b3deaSKacheong Poon 				ASSERT(t_error == 0 && sys_error == 0);
1595439b3deaSKacheong Poon 				eager->tcp_send_discon_ind = 1;
1596439b3deaSKacheong Poon 			} else {
1597439b3deaSKacheong Poon 				ASSERT(t_error != 0);
1598439b3deaSKacheong Poon 				freemsg(ok_mp);
1599439b3deaSKacheong Poon 				/*
1600439b3deaSKacheong Poon 				 * Original mp was either freed or set
1601439b3deaSKacheong Poon 				 * to ok_mp above, so use mp1 instead.
1602439b3deaSKacheong Poon 				 */
1603439b3deaSKacheong Poon 				tcp_err_ack(listener, mp1, t_error, sys_error);
1604439b3deaSKacheong Poon 				goto finish;
1605439b3deaSKacheong Poon 			}
1606439b3deaSKacheong Poon 		}
1607439b3deaSKacheong Poon 		/*
1608439b3deaSKacheong Poon 		 * Most likely success in setting options (except if
1609439b3deaSKacheong Poon 		 * eager->tcp_send_discon_ind set).
1610439b3deaSKacheong Poon 		 * mp1 option buffer represented by OPT_length/offset
1611439b3deaSKacheong Poon 		 * potentially modified and contains results of setting
1612439b3deaSKacheong Poon 		 * options at this point
1613439b3deaSKacheong Poon 		 */
1614439b3deaSKacheong Poon 	}
1615439b3deaSKacheong Poon 
1616439b3deaSKacheong Poon 	/* We no longer need mp1, since all options processing has passed */
1617439b3deaSKacheong Poon 	freemsg(mp1);
1618439b3deaSKacheong Poon 
1619439b3deaSKacheong Poon 	putnext(listener->tcp_connp->conn_rq, ok_mp);
1620439b3deaSKacheong Poon 
1621439b3deaSKacheong Poon 	mutex_enter(&listener->tcp_eager_lock);
1622439b3deaSKacheong Poon 	if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
1623439b3deaSKacheong Poon 		mblk_t	*conn_ind;
1624439b3deaSKacheong Poon 
1625439b3deaSKacheong Poon 		/*
1626439b3deaSKacheong Poon 		 * This path should not be executed if listener and
1627439b3deaSKacheong Poon 		 * acceptor streams are the same.
1628439b3deaSKacheong Poon 		 */
1629439b3deaSKacheong Poon 		ASSERT(listener != acceptor);
1630*dd49f125SAnders Persson 		conn_ind = tcp_get_def_conn_ind(listener);
1631439b3deaSKacheong Poon 		mutex_exit(&listener->tcp_eager_lock);
1632*dd49f125SAnders Persson 		putnext(listener->tcp_connp->conn_rq, conn_ind);
1633439b3deaSKacheong Poon 	} else {
1634439b3deaSKacheong Poon 		mutex_exit(&listener->tcp_eager_lock);
1635439b3deaSKacheong Poon 	}
1636439b3deaSKacheong Poon 
1637439b3deaSKacheong Poon 	/*
1638439b3deaSKacheong Poon 	 * Done with the acceptor - free it
1639439b3deaSKacheong Poon 	 *
1640439b3deaSKacheong Poon 	 * Note: from this point on, no access to listener should be made
1641439b3deaSKacheong Poon 	 * as listener can be equal to acceptor.
1642439b3deaSKacheong Poon 	 */
1643439b3deaSKacheong Poon finish:
1644439b3deaSKacheong Poon 	ASSERT(acceptor->tcp_detached);
1645439b3deaSKacheong Poon 	acceptor->tcp_connp->conn_rq = NULL;
1646439b3deaSKacheong Poon 	ASSERT(!IPCL_IS_NONSTR(acceptor->tcp_connp));
1647439b3deaSKacheong Poon 	acceptor->tcp_connp->conn_wq = NULL;
1648439b3deaSKacheong Poon 	(void) tcp_clean_death(acceptor, 0);
1649439b3deaSKacheong Poon 	CONN_DEC_REF(acceptor->tcp_connp);
1650439b3deaSKacheong Poon 
1651439b3deaSKacheong Poon 	/*
1652439b3deaSKacheong Poon 	 * We pass discon_mp to tcp_accept_finish to get on the right squeue.
1653439b3deaSKacheong Poon 	 *
1654439b3deaSKacheong Poon 	 * It will update the setting for sockfs/stream head and also take
1655439b3deaSKacheong Poon 	 * care of any data that arrived before accept() wad called.
1656439b3deaSKacheong Poon 	 * In case we already received a FIN then tcp_accept_finish will send up
1657439b3deaSKacheong Poon 	 * the ordrel. It will also send up a window update if the window
1658439b3deaSKacheong Poon 	 * has opened up.
1659439b3deaSKacheong Poon 	 */
1660439b3deaSKacheong Poon 
1661439b3deaSKacheong Poon 	/*
1662439b3deaSKacheong Poon 	 * XXX: we currently have a problem if XTI application closes the
1663439b3deaSKacheong Poon 	 * acceptor stream in between. This problem exists in on10-gate also
1664439b3deaSKacheong Poon 	 * and is well know but nothing can be done short of major rewrite
1665439b3deaSKacheong Poon 	 * to fix it. Now it is possible to take care of it by assigning TLI/XTI
1666439b3deaSKacheong Poon 	 * eager same squeue as listener (we can distinguish non socket
1667439b3deaSKacheong Poon 	 * listeners at the time of handling a SYN in tcp_input_listener)
1668439b3deaSKacheong Poon 	 * and do most of the work that tcp_accept_finish does here itself
1669439b3deaSKacheong Poon 	 * and then get behind the acceptor squeue to access the acceptor
1670439b3deaSKacheong Poon 	 * queue.
1671439b3deaSKacheong Poon 	 */
1672439b3deaSKacheong Poon 	/*
1673439b3deaSKacheong Poon 	 * We already have a ref on tcp so no need to do one before squeue_enter
1674439b3deaSKacheong Poon 	 */
1675439b3deaSKacheong Poon 	SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, discon_mp,
1676439b3deaSKacheong Poon 	    tcp_accept_finish, eager->tcp_connp, NULL, SQ_FILL,
1677439b3deaSKacheong Poon 	    SQTAG_TCP_ACCEPT_FINISH);
1678439b3deaSKacheong Poon }
1679439b3deaSKacheong Poon 
1680439b3deaSKacheong Poon 
1681439b3deaSKacheong Poon /*
1682439b3deaSKacheong Poon  * This is the STREAMS entry point for T_CONN_RES coming down on
1683439b3deaSKacheong Poon  * Acceptor STREAM when  sockfs listener does accept processing.
1684439b3deaSKacheong Poon  * Read the block comment on top of tcp_input_listener().
1685439b3deaSKacheong Poon  */
1686439b3deaSKacheong Poon void
1687439b3deaSKacheong Poon tcp_tpi_accept(queue_t *q, mblk_t *mp)
1688439b3deaSKacheong Poon {
1689439b3deaSKacheong Poon 	queue_t *rq = RD(q);
1690439b3deaSKacheong Poon 	struct T_conn_res *conn_res;
1691439b3deaSKacheong Poon 	tcp_t *eager;
1692439b3deaSKacheong Poon 	tcp_t *listener;
1693439b3deaSKacheong Poon 	struct T_ok_ack *ok;
1694439b3deaSKacheong Poon 	t_scalar_t PRIM_type;
16953e95bd4aSAnders Persson 	mblk_t *discon_mp;
1696439b3deaSKacheong Poon 	conn_t *econnp;
1697439b3deaSKacheong Poon 	cred_t *cr;
1698439b3deaSKacheong Poon 
1699439b3deaSKacheong Poon 	ASSERT(DB_TYPE(mp) == M_PROTO);
1700439b3deaSKacheong Poon 
1701439b3deaSKacheong Poon 	/*
1702439b3deaSKacheong Poon 	 * All Solaris components should pass a db_credp
1703439b3deaSKacheong Poon 	 * for this TPI message, hence we ASSERT.
1704439b3deaSKacheong Poon 	 * But in case there is some other M_PROTO that looks
1705439b3deaSKacheong Poon 	 * like a TPI message sent by some other kernel
1706439b3deaSKacheong Poon 	 * component, we check and return an error.
1707439b3deaSKacheong Poon 	 */
1708439b3deaSKacheong Poon 	cr = msg_getcred(mp, NULL);
1709439b3deaSKacheong Poon 	ASSERT(cr != NULL);
1710439b3deaSKacheong Poon 	if (cr == NULL) {
1711439b3deaSKacheong Poon 		mp = mi_tpi_err_ack_alloc(mp, TSYSERR, EINVAL);
1712439b3deaSKacheong Poon 		if (mp != NULL)
1713439b3deaSKacheong Poon 			putnext(rq, mp);
1714439b3deaSKacheong Poon 		return;
1715439b3deaSKacheong Poon 	}
1716439b3deaSKacheong Poon 	conn_res = (struct T_conn_res *)mp->b_rptr;
1717439b3deaSKacheong Poon 	ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
1718439b3deaSKacheong Poon 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_res)) {
1719439b3deaSKacheong Poon 		mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0);
1720439b3deaSKacheong Poon 		if (mp != NULL)
1721439b3deaSKacheong Poon 			putnext(rq, mp);
1722439b3deaSKacheong Poon 		return;
1723439b3deaSKacheong Poon 	}
1724439b3deaSKacheong Poon 	switch (conn_res->PRIM_type) {
1725439b3deaSKacheong Poon 	case O_T_CONN_RES:
1726439b3deaSKacheong Poon 	case T_CONN_RES:
1727439b3deaSKacheong Poon 		/*
1728439b3deaSKacheong Poon 		 * We pass up an err ack if allocb fails. This will
1729439b3deaSKacheong Poon 		 * cause sockfs to issue a T_DISCON_REQ which will cause
1730439b3deaSKacheong Poon 		 * tcp_eager_blowoff to be called. sockfs will then call
1731439b3deaSKacheong Poon 		 * rq->q_qinfo->qi_qclose to cleanup the acceptor stream.
1732439b3deaSKacheong Poon 		 * we need to do the allocb up here because we have to
1733439b3deaSKacheong Poon 		 * make sure rq->q_qinfo->qi_qclose still points to the
1734439b3deaSKacheong Poon 		 * correct function (tcp_tpi_close_accept) in case allocb
1735439b3deaSKacheong Poon 		 * fails.
1736439b3deaSKacheong Poon 		 */
1737439b3deaSKacheong Poon 		bcopy(mp->b_rptr + conn_res->OPT_offset,
1738439b3deaSKacheong Poon 		    &eager, conn_res->OPT_length);
1739439b3deaSKacheong Poon 		PRIM_type = conn_res->PRIM_type;
1740439b3deaSKacheong Poon 		mp->b_datap->db_type = M_PCPROTO;
1741439b3deaSKacheong Poon 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ok_ack);
1742439b3deaSKacheong Poon 		ok = (struct T_ok_ack *)mp->b_rptr;
1743439b3deaSKacheong Poon 		ok->PRIM_type = T_OK_ACK;
1744439b3deaSKacheong Poon 		ok->CORRECT_prim = PRIM_type;
1745439b3deaSKacheong Poon 		econnp = eager->tcp_connp;
1746439b3deaSKacheong Poon 		econnp->conn_dev = (dev_t)RD(q)->q_ptr;
1747439b3deaSKacheong Poon 		econnp->conn_minor_arena = (vmem_t *)(WR(q)->q_ptr);
1748439b3deaSKacheong Poon 		econnp->conn_rq = rq;
1749439b3deaSKacheong Poon 		econnp->conn_wq = q;
1750439b3deaSKacheong Poon 		rq->q_ptr = econnp;
1751439b3deaSKacheong Poon 		rq->q_qinfo = &tcp_rinitv4;	/* No open - same as rinitv6 */
1752439b3deaSKacheong Poon 		q->q_ptr = econnp;
1753439b3deaSKacheong Poon 		q->q_qinfo = &tcp_winit;
1754439b3deaSKacheong Poon 		listener = eager->tcp_listener;
1755439b3deaSKacheong Poon 
17563e95bd4aSAnders Persson 		/*
17573e95bd4aSAnders Persson 		 * Pre allocate the discon_ind mblk also. tcp_accept_finish will
17583e95bd4aSAnders Persson 		 * use it if something failed.
17593e95bd4aSAnders Persson 		 */
17603e95bd4aSAnders Persson 		discon_mp = allocb(MAX(sizeof (struct T_discon_ind),
17613e95bd4aSAnders Persson 		    sizeof (struct stroptions)), BPRI_HI);
17623e95bd4aSAnders Persson 
17633e95bd4aSAnders Persson 		if (discon_mp == NULL) {
1764439b3deaSKacheong Poon 			mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0);
1765439b3deaSKacheong Poon 			if (mp != NULL)
1766439b3deaSKacheong Poon 				putnext(rq, mp);
1767439b3deaSKacheong Poon 			return;
1768439b3deaSKacheong Poon 		}
1769439b3deaSKacheong Poon 
17703e95bd4aSAnders Persson 		eager->tcp_issocket = B_TRUE;
17713e95bd4aSAnders Persson 
17723e95bd4aSAnders Persson 		ASSERT(econnp->conn_netstack ==
17733e95bd4aSAnders Persson 		    listener->tcp_connp->conn_netstack);
17743e95bd4aSAnders Persson 		ASSERT(eager->tcp_tcps == listener->tcp_tcps);
17753e95bd4aSAnders Persson 
17763e95bd4aSAnders Persson 		/* Put the ref for IP */
17773e95bd4aSAnders Persson 		CONN_INC_REF(econnp);
17783e95bd4aSAnders Persson 
17793e95bd4aSAnders Persson 		/*
17803e95bd4aSAnders Persson 		 * We should have minimum of 3 references on the conn
17813e95bd4aSAnders Persson 		 * at this point. One each for TCP and IP and one for
17823e95bd4aSAnders Persson 		 * the T_conn_ind that was sent up when the 3-way handshake
17833e95bd4aSAnders Persson 		 * completed. In the normal case we would also have another
17843e95bd4aSAnders Persson 		 * reference (making a total of 4) for the conn being in the
17853e95bd4aSAnders Persson 		 * classifier hash list. However the eager could have received
17863e95bd4aSAnders Persson 		 * an RST subsequently and tcp_closei_local could have removed
17873e95bd4aSAnders Persson 		 * the eager from the classifier hash list, hence we can't
17883e95bd4aSAnders Persson 		 * assert that reference.
17893e95bd4aSAnders Persson 		 */
17903e95bd4aSAnders Persson 		ASSERT(econnp->conn_ref >= 3);
17913e95bd4aSAnders Persson 
17923e95bd4aSAnders Persson 		mutex_enter(&listener->tcp_eager_lock);
17933e95bd4aSAnders Persson 		if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
1794*dd49f125SAnders Persson 			mblk_t *conn_ind = tcp_get_def_conn_ind(listener);
17953e95bd4aSAnders Persson 
17963e95bd4aSAnders Persson 			/* Need to get inside the listener perimeter */
17973e95bd4aSAnders Persson 			CONN_INC_REF(listener->tcp_connp);
1798*dd49f125SAnders Persson 			SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp,
1799*dd49f125SAnders Persson 			    conn_ind, tcp_send_pending, listener->tcp_connp,
1800*dd49f125SAnders Persson 			    NULL, SQ_FILL, SQTAG_TCP_SEND_PENDING);
18013e95bd4aSAnders Persson 		}
18023e95bd4aSAnders Persson 		tcp_eager_unlink(eager);
18033e95bd4aSAnders Persson 		mutex_exit(&listener->tcp_eager_lock);
18043e95bd4aSAnders Persson 
18053e95bd4aSAnders Persson 		/*
18063e95bd4aSAnders Persson 		 * At this point, the eager is detached from the listener
18073e95bd4aSAnders Persson 		 * but we still have an extra refs on eager (apart from the
18083e95bd4aSAnders Persson 		 * usual tcp references). The ref was placed in tcp_input_data
18093e95bd4aSAnders Persson 		 * before sending the conn_ind in tcp_send_conn_ind.
18103e95bd4aSAnders Persson 		 * The ref will be dropped in tcp_accept_finish().
18113e95bd4aSAnders Persson 		 */
18123e95bd4aSAnders Persson 		SQUEUE_ENTER_ONE(econnp->conn_sqp, discon_mp, tcp_accept_finish,
18133e95bd4aSAnders Persson 		    econnp, NULL, SQ_NODRAIN, SQTAG_TCP_ACCEPT_FINISH_Q0);
18143e95bd4aSAnders Persson 
1815439b3deaSKacheong Poon 		/*
1816439b3deaSKacheong Poon 		 * Send the new local address also up to sockfs. There
1817439b3deaSKacheong Poon 		 * should already be enough space in the mp that came
1818439b3deaSKacheong Poon 		 * down from soaccept().
1819439b3deaSKacheong Poon 		 */
1820439b3deaSKacheong Poon 		if (econnp->conn_family == AF_INET) {
1821439b3deaSKacheong Poon 			sin_t *sin;
1822439b3deaSKacheong Poon 
1823439b3deaSKacheong Poon 			ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >=
1824439b3deaSKacheong Poon 			    (sizeof (struct T_ok_ack) + sizeof (sin_t)));
1825439b3deaSKacheong Poon 			sin = (sin_t *)mp->b_wptr;
1826439b3deaSKacheong Poon 			mp->b_wptr += sizeof (sin_t);
1827439b3deaSKacheong Poon 			sin->sin_family = AF_INET;
1828439b3deaSKacheong Poon 			sin->sin_port = econnp->conn_lport;
1829439b3deaSKacheong Poon 			sin->sin_addr.s_addr = econnp->conn_laddr_v4;
1830439b3deaSKacheong Poon 		} else {
1831439b3deaSKacheong Poon 			sin6_t *sin6;
1832439b3deaSKacheong Poon 
1833439b3deaSKacheong Poon 			ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >=
1834439b3deaSKacheong Poon 			    sizeof (struct T_ok_ack) + sizeof (sin6_t));
1835439b3deaSKacheong Poon 			sin6 = (sin6_t *)mp->b_wptr;
1836439b3deaSKacheong Poon 			mp->b_wptr += sizeof (sin6_t);
1837439b3deaSKacheong Poon 			sin6->sin6_family = AF_INET6;
1838439b3deaSKacheong Poon 			sin6->sin6_port = econnp->conn_lport;
1839439b3deaSKacheong Poon 			sin6->sin6_addr = econnp->conn_laddr_v6;
1840439b3deaSKacheong Poon 			if (econnp->conn_ipversion == IPV4_VERSION)
1841439b3deaSKacheong Poon 				sin6->sin6_flowinfo = 0;
1842439b3deaSKacheong Poon 			else
1843439b3deaSKacheong Poon 				sin6->sin6_flowinfo = econnp->conn_flowinfo;
1844439b3deaSKacheong Poon 			if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) &&
1845439b3deaSKacheong Poon 			    (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
1846439b3deaSKacheong Poon 				sin6->sin6_scope_id =
1847439b3deaSKacheong Poon 				    econnp->conn_ixa->ixa_scopeid;
1848439b3deaSKacheong Poon 			} else {
1849439b3deaSKacheong Poon 				sin6->sin6_scope_id = 0;
1850439b3deaSKacheong Poon 			}
1851439b3deaSKacheong Poon 			sin6->__sin6_src_id = 0;
1852439b3deaSKacheong Poon 		}
1853439b3deaSKacheong Poon 
1854439b3deaSKacheong Poon 		putnext(rq, mp);
1855439b3deaSKacheong Poon 		return;
1856439b3deaSKacheong Poon 	default:
1857439b3deaSKacheong Poon 		mp = mi_tpi_err_ack_alloc(mp, TNOTSUPPORT, 0);
1858439b3deaSKacheong Poon 		if (mp != NULL)
1859439b3deaSKacheong Poon 			putnext(rq, mp);
1860439b3deaSKacheong Poon 		return;
1861439b3deaSKacheong Poon 	}
1862439b3deaSKacheong Poon }
1863439b3deaSKacheong Poon 
1864439b3deaSKacheong Poon /*
1865439b3deaSKacheong Poon  * The function called through squeue to get behind listener's perimeter to
1866439b3deaSKacheong Poon  * send a deferred conn_ind.
1867439b3deaSKacheong Poon  */
1868439b3deaSKacheong Poon /* ARGSUSED */
1869439b3deaSKacheong Poon void
1870439b3deaSKacheong Poon tcp_send_pending(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
1871439b3deaSKacheong Poon {
1872439b3deaSKacheong Poon 	conn_t	*lconnp = (conn_t *)arg;
1873439b3deaSKacheong Poon 	tcp_t *listener = lconnp->conn_tcp;
1874439b3deaSKacheong Poon 	struct T_conn_ind *conn_ind;
1875439b3deaSKacheong Poon 	tcp_t *tcp;
1876439b3deaSKacheong Poon 
1877439b3deaSKacheong Poon 	conn_ind = (struct T_conn_ind *)mp->b_rptr;
1878439b3deaSKacheong Poon 	bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp,
1879439b3deaSKacheong Poon 	    conn_ind->OPT_length);
1880439b3deaSKacheong Poon 
1881439b3deaSKacheong Poon 	if (listener->tcp_state != TCPS_LISTEN) {
1882439b3deaSKacheong Poon 		/*
1883439b3deaSKacheong Poon 		 * If listener has closed, it would have caused a
1884439b3deaSKacheong Poon 		 * a cleanup/blowoff to happen for the eager, so
1885439b3deaSKacheong Poon 		 * we don't need to do anything more.
1886439b3deaSKacheong Poon 		 */
1887439b3deaSKacheong Poon 		freemsg(mp);
1888439b3deaSKacheong Poon 		return;
1889439b3deaSKacheong Poon 	}
1890439b3deaSKacheong Poon 
18913e95bd4aSAnders Persson 	putnext(lconnp->conn_rq, mp);
1892439b3deaSKacheong Poon }
1893439b3deaSKacheong Poon 
1894439b3deaSKacheong Poon /*
1895439b3deaSKacheong Poon  * Sends the T_CONN_IND to the listener. The caller calls this
1896439b3deaSKacheong Poon  * functions via squeue to get inside the listener's perimeter
1897439b3deaSKacheong Poon  * once the 3 way hand shake is done a T_CONN_IND needs to be
1898439b3deaSKacheong Poon  * sent. As an optimization, the caller can call this directly
1899439b3deaSKacheong Poon  * if listener's perimeter is same as eager's.
1900439b3deaSKacheong Poon  */
1901439b3deaSKacheong Poon /* ARGSUSED */
1902439b3deaSKacheong Poon void
1903439b3deaSKacheong Poon tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2)
1904439b3deaSKacheong Poon {
1905439b3deaSKacheong Poon 	conn_t			*lconnp = (conn_t *)arg;
1906439b3deaSKacheong Poon 	tcp_t			*listener = lconnp->conn_tcp;
1907439b3deaSKacheong Poon 	tcp_t			*tcp;
1908439b3deaSKacheong Poon 	struct T_conn_ind	*conn_ind;
1909439b3deaSKacheong Poon 	ipaddr_t 		*addr_cache;
1910439b3deaSKacheong Poon 	boolean_t		need_send_conn_ind = B_FALSE;
1911439b3deaSKacheong Poon 	tcp_stack_t		*tcps = listener->tcp_tcps;
1912439b3deaSKacheong Poon 
1913439b3deaSKacheong Poon 	/* retrieve the eager */
1914439b3deaSKacheong Poon 	conn_ind = (struct T_conn_ind *)mp->b_rptr;
1915439b3deaSKacheong Poon 	ASSERT(conn_ind->OPT_offset != 0 &&
1916439b3deaSKacheong Poon 	    conn_ind->OPT_length == sizeof (intptr_t));
1917439b3deaSKacheong Poon 	bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp,
1918439b3deaSKacheong Poon 	    conn_ind->OPT_length);
1919439b3deaSKacheong Poon 
1920439b3deaSKacheong Poon 	/*
1921439b3deaSKacheong Poon 	 * TLI/XTI applications will get confused by
1922439b3deaSKacheong Poon 	 * sending eager as an option since it violates
1923439b3deaSKacheong Poon 	 * the option semantics. So remove the eager as
1924439b3deaSKacheong Poon 	 * option since TLI/XTI app doesn't need it anyway.
1925439b3deaSKacheong Poon 	 */
1926439b3deaSKacheong Poon 	if (!TCP_IS_SOCKET(listener)) {
1927439b3deaSKacheong Poon 		conn_ind->OPT_length = 0;
1928439b3deaSKacheong Poon 		conn_ind->OPT_offset = 0;
1929439b3deaSKacheong Poon 	}
1930439b3deaSKacheong Poon 	if (listener->tcp_state != TCPS_LISTEN) {
1931439b3deaSKacheong Poon 		/*
1932439b3deaSKacheong Poon 		 * If listener has closed, it would have caused a
1933439b3deaSKacheong Poon 		 * a cleanup/blowoff to happen for the eager. We
1934439b3deaSKacheong Poon 		 * just need to return.
1935439b3deaSKacheong Poon 		 */
1936439b3deaSKacheong Poon 		freemsg(mp);
1937439b3deaSKacheong Poon 		return;
1938439b3deaSKacheong Poon 	}
1939439b3deaSKacheong Poon 
1940439b3deaSKacheong Poon 
1941439b3deaSKacheong Poon 	/*
1942439b3deaSKacheong Poon 	 * if the conn_req_q is full defer passing up the
1943439b3deaSKacheong Poon 	 * T_CONN_IND until space is availabe after t_accept()
1944439b3deaSKacheong Poon 	 * processing
1945439b3deaSKacheong Poon 	 */
1946439b3deaSKacheong Poon 	mutex_enter(&listener->tcp_eager_lock);
1947439b3deaSKacheong Poon 
1948439b3deaSKacheong Poon 	/*
1949439b3deaSKacheong Poon 	 * Take the eager out, if it is in the list of droppable eagers
1950439b3deaSKacheong Poon 	 * as we are here because the 3W handshake is over.
1951439b3deaSKacheong Poon 	 */
1952439b3deaSKacheong Poon 	MAKE_UNDROPPABLE(tcp);
1953439b3deaSKacheong Poon 
1954439b3deaSKacheong Poon 	if (listener->tcp_conn_req_cnt_q < listener->tcp_conn_req_max) {
1955439b3deaSKacheong Poon 		tcp_t *tail;
1956439b3deaSKacheong Poon 
1957439b3deaSKacheong Poon 		/*
1958439b3deaSKacheong Poon 		 * The eager already has an extra ref put in tcp_input_data
1959439b3deaSKacheong Poon 		 * so that it stays till accept comes back even though it
1960439b3deaSKacheong Poon 		 * might get into TCPS_CLOSED as a result of a TH_RST etc.
1961439b3deaSKacheong Poon 		 */
1962439b3deaSKacheong Poon 		ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
1963439b3deaSKacheong Poon 		listener->tcp_conn_req_cnt_q0--;
1964439b3deaSKacheong Poon 		listener->tcp_conn_req_cnt_q++;
1965439b3deaSKacheong Poon 
1966439b3deaSKacheong Poon 		/* Move from SYN_RCVD to ESTABLISHED list  */
1967439b3deaSKacheong Poon 		tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
1968439b3deaSKacheong Poon 		    tcp->tcp_eager_prev_q0;
1969439b3deaSKacheong Poon 		tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
1970439b3deaSKacheong Poon 		    tcp->tcp_eager_next_q0;
1971439b3deaSKacheong Poon 		tcp->tcp_eager_prev_q0 = NULL;
1972439b3deaSKacheong Poon 		tcp->tcp_eager_next_q0 = NULL;
1973439b3deaSKacheong Poon 
1974439b3deaSKacheong Poon 		/*
1975439b3deaSKacheong Poon 		 * Insert at end of the queue because sockfs
1976439b3deaSKacheong Poon 		 * sends down T_CONN_RES in chronological
1977439b3deaSKacheong Poon 		 * order. Leaving the older conn indications
1978439b3deaSKacheong Poon 		 * at front of the queue helps reducing search
1979439b3deaSKacheong Poon 		 * time.
1980439b3deaSKacheong Poon 		 */
1981439b3deaSKacheong Poon 		tail = listener->tcp_eager_last_q;
1982439b3deaSKacheong Poon 		if (tail != NULL)
1983439b3deaSKacheong Poon 			tail->tcp_eager_next_q = tcp;
1984439b3deaSKacheong Poon 		else
1985439b3deaSKacheong Poon 			listener->tcp_eager_next_q = tcp;
1986439b3deaSKacheong Poon 		listener->tcp_eager_last_q = tcp;
1987439b3deaSKacheong Poon 		tcp->tcp_eager_next_q = NULL;
1988439b3deaSKacheong Poon 		/*
1989439b3deaSKacheong Poon 		 * Delay sending up the T_conn_ind until we are
1990439b3deaSKacheong Poon 		 * done with the eager. Once we have have sent up
1991439b3deaSKacheong Poon 		 * the T_conn_ind, the accept can potentially complete
1992439b3deaSKacheong Poon 		 * any time and release the refhold we have on the eager.
1993439b3deaSKacheong Poon 		 */
1994439b3deaSKacheong Poon 		need_send_conn_ind = B_TRUE;
1995439b3deaSKacheong Poon 	} else {
1996439b3deaSKacheong Poon 		/*
1997439b3deaSKacheong Poon 		 * Defer connection on q0 and set deferred
1998439b3deaSKacheong Poon 		 * connection bit true
1999439b3deaSKacheong Poon 		 */
2000439b3deaSKacheong Poon 		tcp->tcp_conn_def_q0 = B_TRUE;
2001439b3deaSKacheong Poon 
2002439b3deaSKacheong Poon 		/* take tcp out of q0 ... */
2003439b3deaSKacheong Poon 		tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
2004439b3deaSKacheong Poon 		    tcp->tcp_eager_next_q0;
2005439b3deaSKacheong Poon 		tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
2006439b3deaSKacheong Poon 		    tcp->tcp_eager_prev_q0;
2007439b3deaSKacheong Poon 
2008439b3deaSKacheong Poon 		/* ... and place it at the end of q0 */
2009439b3deaSKacheong Poon 		tcp->tcp_eager_prev_q0 = listener->tcp_eager_prev_q0;
2010439b3deaSKacheong Poon 		tcp->tcp_eager_next_q0 = listener;
2011439b3deaSKacheong Poon 		listener->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp;
2012439b3deaSKacheong Poon 		listener->tcp_eager_prev_q0 = tcp;
2013439b3deaSKacheong Poon 		tcp->tcp_conn.tcp_eager_conn_ind = mp;
2014439b3deaSKacheong Poon 	}
2015439b3deaSKacheong Poon 
2016439b3deaSKacheong Poon 	/* we have timed out before */
2017439b3deaSKacheong Poon 	if (tcp->tcp_syn_rcvd_timeout != 0) {
2018439b3deaSKacheong Poon 		tcp->tcp_syn_rcvd_timeout = 0;
2019439b3deaSKacheong Poon 		listener->tcp_syn_rcvd_timeout--;
2020439b3deaSKacheong Poon 		if (listener->tcp_syn_defense &&
2021439b3deaSKacheong Poon 		    listener->tcp_syn_rcvd_timeout <=
2022439b3deaSKacheong Poon 		    (tcps->tcps_conn_req_max_q0 >> 5) &&
2023439b3deaSKacheong Poon 		    10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() -
2024439b3deaSKacheong Poon 		    listener->tcp_last_rcv_lbolt)) {
2025439b3deaSKacheong Poon 			/*
2026439b3deaSKacheong Poon 			 * Turn off the defense mode if we
2027439b3deaSKacheong Poon 			 * believe the SYN attack is over.
2028439b3deaSKacheong Poon 			 */
2029439b3deaSKacheong Poon 			listener->tcp_syn_defense = B_FALSE;
2030439b3deaSKacheong Poon 			if (listener->tcp_ip_addr_cache) {
2031439b3deaSKacheong Poon 				kmem_free((void *)listener->tcp_ip_addr_cache,
2032439b3deaSKacheong Poon 				    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
2033439b3deaSKacheong Poon 				listener->tcp_ip_addr_cache = NULL;
2034439b3deaSKacheong Poon 			}
2035439b3deaSKacheong Poon 		}
2036439b3deaSKacheong Poon 	}
2037439b3deaSKacheong Poon 	addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache);
2038439b3deaSKacheong Poon 	if (addr_cache != NULL) {
2039439b3deaSKacheong Poon 		/*
2040439b3deaSKacheong Poon 		 * We have finished a 3-way handshake with this
2041439b3deaSKacheong Poon 		 * remote host. This proves the IP addr is good.
2042439b3deaSKacheong Poon 		 * Cache it!
2043439b3deaSKacheong Poon 		 */
2044439b3deaSKacheong Poon 		addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] =
2045439b3deaSKacheong Poon 		    tcp->tcp_connp->conn_faddr_v4;
2046439b3deaSKacheong Poon 	}
2047439b3deaSKacheong Poon 	mutex_exit(&listener->tcp_eager_lock);
2048439b3deaSKacheong Poon 	if (need_send_conn_ind)
20493e95bd4aSAnders Persson 		putnext(lconnp->conn_rq, mp);
2050439b3deaSKacheong Poon }
2051