xref: /titanic_41/usr/src/uts/common/inet/sctp/sctp_conn.c (revision 0d6bb4c6728fd20087fe25f4028a3838250e6e9c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/stropts.h>
34 #include <sys/strsubr.h>
35 #include <sys/socket.h>
36 #include <sys/tsol/tndb.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/ip6.h>
40 
41 #include <inet/common.h>
42 #include <inet/ip.h>
43 #include <inet/ip6.h>
44 #include <inet/ipclassifier.h>
45 #include <inet/ipsec_impl.h>
46 
47 #include "sctp_impl.h"
48 #include "sctp_addr.h"
49 
50 /*
51  * Common accept code.  Called by sctp_conn_request.
52  * cr_pkt is the INIT / INIT ACK packet.
53  */
54 static int
sctp_accept_comm(sctp_t * listener,sctp_t * acceptor,mblk_t * cr_pkt,uint_t ip_hdr_len,sctp_init_chunk_t * iack)55 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt,
56     uint_t ip_hdr_len, sctp_init_chunk_t *iack)
57 {
58 
59 	sctp_hdr_t		*sctph;
60 	sctp_chunk_hdr_t	*ich;
61 	sctp_init_chunk_t	*init;
62 	int			err;
63 	uint_t			sctp_options;
64 	conn_t			*aconnp;
65 	conn_t			*lconnp;
66 	sctp_stack_t		*sctps = listener->sctp_sctps;
67 
68 	sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len);
69 	ASSERT(OK_32PTR(sctph));
70 
71 	aconnp = acceptor->sctp_connp;
72 	lconnp = listener->sctp_connp;
73 	aconnp->conn_lport = lconnp->conn_lport;
74 	aconnp->conn_fport = sctph->sh_sport;
75 
76 	ich = (sctp_chunk_hdr_t *)(iack + 1);
77 	init = (sctp_init_chunk_t *)(ich + 1);
78 
79 	/* acceptor isn't in any fanouts yet, so don't need to hold locks */
80 	ASSERT(acceptor->sctp_faddrs == NULL);
81 	err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich,
82 	    &sctp_options);
83 	if (err != 0)
84 		return (err);
85 
86 	if ((err = sctp_set_hdraddrs(acceptor)) != 0)
87 		return (err);
88 
89 	if ((err = sctp_build_hdrs(acceptor, KM_NOSLEEP)) != 0)
90 		return (err);
91 
92 	if ((sctp_options & SCTP_PRSCTP_OPTION) &&
93 	    listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) {
94 		acceptor->sctp_prsctp_aware = B_TRUE;
95 	} else {
96 		acceptor->sctp_prsctp_aware = B_FALSE;
97 	}
98 
99 	/* Get  initial TSNs */
100 	acceptor->sctp_ltsn = ntohl(iack->sic_inittsn);
101 	acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd =
102 	    acceptor->sctp_ltsn - 1;
103 	acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd;
104 	/* Serial numbers are initialized to the same value as the TSNs */
105 	acceptor->sctp_lcsn = acceptor->sctp_ltsn;
106 
107 	if (!sctp_initialize_params(acceptor, init, iack))
108 		return (ENOMEM);
109 
110 	/*
111 	 * Copy sctp_secret from the listener in case we need to validate
112 	 * a possibly delayed cookie.
113 	 */
114 	bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN);
115 	bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret,
116 	    SCTP_SECRET_LEN);
117 	acceptor->sctp_last_secret_update = ddi_get_lbolt64();
118 
119 	/*
120 	 * After acceptor is inserted in the hash list, it can be found.
121 	 * So we need to lock it here.
122 	 */
123 	RUN_SCTP(acceptor);
124 
125 	sctp_conn_hash_insert(&sctps->sctps_conn_fanout[
126 	    SCTP_CONN_HASH(sctps, aconnp->conn_ports)], acceptor, 0);
127 	sctp_bind_hash_insert(&sctps->sctps_bind_fanout[
128 	    SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0);
129 
130 	SCTP_ASSOC_EST(sctps, acceptor);
131 	return (0);
132 }
133 
134 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */
135 sctp_t *
sctp_conn_request(sctp_t * sctp,mblk_t * mp,uint_t ifindex,uint_t ip_hdr_len,sctp_init_chunk_t * iack,ip_recv_attr_t * ira)136 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len,
137     sctp_init_chunk_t *iack, ip_recv_attr_t *ira)
138 {
139 	sctp_t	*eager;
140 	ip6_t	*ip6h;
141 	int	err;
142 	conn_t	*connp, *econnp;
143 	sctp_stack_t	*sctps;
144 	cred_t		*cr;
145 	pid_t		cpid;
146 	in6_addr_t	faddr, laddr;
147 	ip_xmit_attr_t	*ixa;
148 	sctp_listen_cnt_t *slc = sctp->sctp_listen_cnt;
149 	boolean_t	slc_set = B_FALSE;
150 
151 	/*
152 	 * No need to check for duplicate as this is the listener
153 	 * and we are holding the lock.  This means that no new
154 	 * connection can be created out of it.  And since the
155 	 * fanout already done cannot find a match, it means that
156 	 * there is no duplicate.
157 	 */
158 	ASSERT(OK_32PTR(mp->b_rptr));
159 
160 	connp = sctp->sctp_connp;
161 	sctps = sctp->sctp_sctps;
162 
163 	/*
164 	 * Enforce the limit set on the number of connections per listener.
165 	 * Note that tlc_cnt starts with 1.  So need to add 1 to tlc_max
166 	 * for comparison.
167 	 */
168 	if (slc != NULL) {
169 		int64_t now;
170 
171 		if (atomic_inc_32_nv(&slc->slc_cnt) > slc->slc_max + 1) {
172 			now = ddi_get_lbolt64();
173 			atomic_dec_32(&slc->slc_cnt);
174 			SCTP_KSTAT(sctps, sctp_listen_cnt_drop);
175 			slc->slc_drop++;
176 			if (now - slc->slc_report_time >
177 			    MSEC_TO_TICK(SCTP_SLC_REPORT_INTERVAL)) {
178 				zcmn_err(connp->conn_zoneid, CE_WARN,
179 				    "SCTP listener (port %d) association max "
180 				    "(%u) reached: %u attempts dropped total\n",
181 				    ntohs(connp->conn_lport),
182 				    slc->slc_max, slc->slc_drop);
183 				slc->slc_report_time = now;
184 			}
185 			return (NULL);
186 		}
187 		slc_set = B_TRUE;
188 	}
189 
190 	if ((eager = sctp_create_eager(sctp)) == NULL) {
191 		if (slc_set)
192 			atomic_dec_32(&slc->slc_cnt);
193 		return (NULL);
194 	}
195 	econnp = eager->sctp_connp;
196 
197 	if (connp->conn_policy != NULL) {
198 		/* Inherit the policy from the listener; use actions from ira */
199 		if (!ip_ipsec_policy_inherit(econnp, connp, ira)) {
200 			sctp_close_eager(eager);
201 			SCTPS_BUMP_MIB(sctps, sctpListenDrop);
202 			return (NULL);
203 		}
204 	}
205 
206 	ip6h = (ip6_t *)mp->b_rptr;
207 	if (ira->ira_flags & IXAF_IS_IPV4) {
208 		ipha_t	*ipha;
209 
210 		ipha = (ipha_t *)ip6h;
211 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &laddr);
212 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &faddr);
213 	} else {
214 		laddr = ip6h->ip6_dst;
215 		faddr = ip6h->ip6_src;
216 	}
217 
218 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
219 		/*
220 		 * XXX need to fix the cached policy issue here.
221 		 * We temporarily set the conn_laddr/conn_faddr here so
222 		 * that IPsec can use it for the latched policy
223 		 * selector.  This is obvioursly wrong as SCTP can
224 		 * use different addresses...
225 		 */
226 		econnp->conn_laddr_v6 = laddr;
227 		econnp->conn_faddr_v6 = faddr;
228 		econnp->conn_saddr_v6 = laddr;
229 	}
230 	if (ipsec_conn_cache_policy(econnp,
231 	    (ira->ira_flags & IRAF_IS_IPV4) != 0) != 0) {
232 		sctp_close_eager(eager);
233 		SCTPS_BUMP_MIB(sctps, sctpListenDrop);
234 		return (NULL);
235 	}
236 
237 	/* Save for getpeerucred */
238 	cr = ira->ira_cred;
239 	cpid = ira->ira_cpid;
240 
241 	if (is_system_labeled()) {
242 		ip_xmit_attr_t *ixa = econnp->conn_ixa;
243 
244 		ASSERT(ira->ira_tsl != NULL);
245 
246 		/* Discard any old label */
247 		if (ixa->ixa_free_flags & IXA_FREE_TSL) {
248 			ASSERT(ixa->ixa_tsl != NULL);
249 			label_rele(ixa->ixa_tsl);
250 			ixa->ixa_free_flags &= ~IXA_FREE_TSL;
251 			ixa->ixa_tsl = NULL;
252 		}
253 
254 		if ((connp->conn_mlp_type != mlptSingle ||
255 		    connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
256 		    ira->ira_tsl != NULL) {
257 			/*
258 			 * If this is an MLP connection or a MAC-Exempt
259 			 * connection with an unlabeled node, packets are to be
260 			 * exchanged using the security label of the received
261 			 * Cookie packet instead of the server application's
262 			 * label.
263 			 * tsol_check_dest called from ip_set_destination
264 			 * might later update TSF_UNLABELED by replacing
265 			 * ixa_tsl with a new label.
266 			 */
267 			label_hold(ira->ira_tsl);
268 			ip_xmit_attr_replace_tsl(ixa, ira->ira_tsl);
269 		} else {
270 			ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
271 		}
272 	}
273 
274 	err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack);
275 	if (err != 0) {
276 		sctp_close_eager(eager);
277 		SCTPS_BUMP_MIB(sctps, sctpListenDrop);
278 		return (NULL);
279 	}
280 
281 	ASSERT(eager->sctp_current->sf_ixa != NULL);
282 
283 	ixa = eager->sctp_current->sf_ixa;
284 	if (!(ira->ira_flags & IXAF_IS_IPV4)) {
285 		ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
286 
287 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
288 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) {
289 			eager->sctp_linklocal = 1;
290 
291 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
292 			ixa->ixa_scopeid = ifindex;
293 			econnp->conn_incoming_ifindex = ifindex;
294 		}
295 	}
296 
297 	/*
298 	 * On a clustered note send this notification to the clustering
299 	 * subsystem.
300 	 */
301 	if (cl_sctp_connect != NULL) {
302 		uchar_t	*slist;
303 		uchar_t	*flist;
304 		size_t	fsize;
305 		size_t	ssize;
306 
307 		fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs;
308 		ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs;
309 		slist = kmem_alloc(ssize, KM_NOSLEEP);
310 		flist = kmem_alloc(fsize, KM_NOSLEEP);
311 		if (slist == NULL || flist == NULL) {
312 			if (slist != NULL)
313 				kmem_free(slist, ssize);
314 			if (flist != NULL)
315 				kmem_free(flist, fsize);
316 			sctp_close_eager(eager);
317 			SCTPS_BUMP_MIB(sctps, sctpListenDrop);
318 			SCTP_KSTAT(sctps, sctp_cl_connect);
319 			return (NULL);
320 		}
321 		/* The clustering module frees these list */
322 		sctp_get_saddr_list(eager, slist, ssize);
323 		sctp_get_faddr_list(eager, flist, fsize);
324 		(*cl_sctp_connect)(econnp->conn_family, slist,
325 		    eager->sctp_nsaddrs, econnp->conn_lport, flist,
326 		    eager->sctp_nfaddrs, econnp->conn_fport, B_FALSE,
327 		    (cl_sctp_handle_t)eager);
328 	}
329 
330 	/* Connection established, so send up the conn_ind */
331 	if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd,
332 	    (sock_lower_handle_t)eager, NULL, cr, cpid,
333 	    &eager->sctp_upcalls)) == NULL) {
334 		sctp_close_eager(eager);
335 		SCTPS_BUMP_MIB(sctps, sctpListenDrop);
336 		return (NULL);
337 	}
338 	ASSERT(SCTP_IS_DETACHED(eager));
339 	eager->sctp_detached = B_FALSE;
340 	return (eager);
341 }
342 
343 /*
344  * Connect to a peer - this function inserts the sctp in the
345  * bind and conn fanouts, sends the INIT, and replies to the client
346  * with an OK ack.
347  */
348 int
sctp_connect(sctp_t * sctp,const struct sockaddr * dst,uint32_t addrlen,cred_t * cr,pid_t pid)349 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen,
350     cred_t *cr, pid_t pid)
351 {
352 	sin_t		*sin;
353 	sin6_t		*sin6;
354 	in6_addr_t	dstaddr;
355 	in_port_t	dstport;
356 	mblk_t		*initmp;
357 	sctp_tf_t	*tbf;
358 	sctp_t		*lsctp;
359 	char		buf[INET6_ADDRSTRLEN];
360 	int		sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP;
361 	int		err;
362 	sctp_faddr_t	*cur_fp;
363 	sctp_stack_t	*sctps = sctp->sctp_sctps;
364 	conn_t		*connp = sctp->sctp_connp;
365 	uint_t		scope_id = 0;
366 	ip_xmit_attr_t	*ixa;
367 
368 	/*
369 	 * Determine packet type based on type of address passed in
370 	 * the request should contain an IPv4 or IPv6 address.
371 	 * Make sure that address family matches the type of
372 	 * family of the address passed down.
373 	 */
374 	if (addrlen < sizeof (sin_t)) {
375 		return (EINVAL);
376 	}
377 	switch (dst->sa_family) {
378 	case AF_INET:
379 		sin = (sin_t *)dst;
380 
381 		/* Check for attempt to connect to non-unicast */
382 		if (CLASSD(sin->sin_addr.s_addr) ||
383 		    (sin->sin_addr.s_addr == INADDR_BROADCAST)) {
384 			ip0dbg(("sctp_connect: non-unicast\n"));
385 			return (EINVAL);
386 		}
387 		if (connp->conn_ipv6_v6only)
388 			return (EAFNOSUPPORT);
389 
390 		/* convert to v6 mapped */
391 		/* Check for attempt to connect to INADDR_ANY */
392 		if (sin->sin_addr.s_addr == INADDR_ANY)  {
393 			struct in_addr v4_addr;
394 			/*
395 			 * SunOS 4.x and 4.3 BSD allow an application
396 			 * to connect a TCP socket to INADDR_ANY.
397 			 * When they do this, the kernel picks the
398 			 * address of one interface and uses it
399 			 * instead.  The kernel usually ends up
400 			 * picking the address of the loopback
401 			 * interface.  This is an undocumented feature.
402 			 * However, we provide the same thing here
403 			 * in case any TCP apps that use this feature
404 			 * are being ported to SCTP...
405 			 */
406 			v4_addr.s_addr = htonl(INADDR_LOOPBACK);
407 			IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr);
408 		} else {
409 			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr);
410 		}
411 		dstport = sin->sin_port;
412 		break;
413 	case AF_INET6:
414 		sin6 = (sin6_t *)dst;
415 		/* Check for attempt to connect to non-unicast. */
416 		if ((addrlen < sizeof (sin6_t)) ||
417 		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
418 			ip0dbg(("sctp_connect: non-unicast\n"));
419 			return (EINVAL);
420 		}
421 		if (connp->conn_ipv6_v6only &&
422 		    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
423 			return (EAFNOSUPPORT);
424 		}
425 		/* check for attempt to connect to unspec */
426 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
427 			dstaddr = ipv6_loopback;
428 		} else {
429 			dstaddr = sin6->sin6_addr;
430 			if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) {
431 				sctp->sctp_linklocal = 1;
432 				scope_id = sin6->sin6_scope_id;
433 			}
434 		}
435 		dstport = sin6->sin6_port;
436 		connp->conn_flowinfo = sin6->sin6_flowinfo;
437 		break;
438 	default:
439 		dprint(1, ("sctp_connect: unknown family %d\n",
440 		    dst->sa_family));
441 		return (EAFNOSUPPORT);
442 	}
443 
444 	(void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf));
445 	dprint(1, ("sctp_connect: attempting connect to %s...\n", buf));
446 
447 	RUN_SCTP(sctp);
448 
449 	if (connp->conn_family != dst->sa_family ||
450 	    (connp->conn_state_flags & CONN_CLOSING)) {
451 		WAKE_SCTP(sctp);
452 		return (EINVAL);
453 	}
454 
455 	/* We update our cred/cpid based on the caller of connect */
456 	if (connp->conn_cred != cr) {
457 		crhold(cr);
458 		crfree(connp->conn_cred);
459 		connp->conn_cred = cr;
460 	}
461 	connp->conn_cpid = pid;
462 
463 	/* Cache things in conn_ixa without any refhold */
464 	ixa = connp->conn_ixa;
465 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
466 	ixa->ixa_cred = cr;
467 	ixa->ixa_cpid = pid;
468 	if (is_system_labeled()) {
469 		/* We need to restart with a label based on the cred */
470 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
471 	}
472 
473 	switch (sctp->sctp_state) {
474 	case SCTPS_IDLE: {
475 		struct sockaddr_storage	ss;
476 
477 		/*
478 		 * We support a quick connect capability here, allowing
479 		 * clients to transition directly from IDLE to COOKIE_WAIT.
480 		 * sctp_bindi will pick an unused port, insert the connection
481 		 * in the bind hash and transition to BOUND state. SCTP
482 		 * picks and uses what it considers the optimal local address
483 		 * set (just like specifiying INADDR_ANY to bind()).
484 		 */
485 		dprint(1, ("sctp_connect: idle, attempting bind...\n"));
486 		ASSERT(sctp->sctp_nsaddrs == 0);
487 
488 		bzero(&ss, sizeof (ss));
489 		ss.ss_family = connp->conn_family;
490 		WAKE_SCTP(sctp);
491 		if ((err = sctp_bind(sctp, (struct sockaddr *)&ss,
492 		    sizeof (ss))) != 0) {
493 			return (err);
494 		}
495 		RUN_SCTP(sctp);
496 		/* FALLTHRU */
497 	}
498 
499 	case SCTPS_BOUND:
500 		ASSERT(sctp->sctp_nsaddrs > 0);
501 
502 		/* do the connect */
503 		/* XXX check for attempt to connect to self */
504 		connp->conn_fport = dstport;
505 
506 		/*
507 		 * Don't allow this connection to completely duplicate
508 		 * an existing connection.
509 		 *
510 		 * Ensure that the duplicate check and insertion is atomic.
511 		 */
512 		sctp_conn_hash_remove(sctp);
513 		tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps,
514 		    connp->conn_ports)];
515 		mutex_enter(&tbf->tf_lock);
516 		lsctp = sctp_lookup(sctp, &dstaddr, tbf, &connp->conn_ports,
517 		    SCTPS_COOKIE_WAIT);
518 		if (lsctp != NULL) {
519 			/* found a duplicate connection */
520 			mutex_exit(&tbf->tf_lock);
521 			SCTP_REFRELE(lsctp);
522 			WAKE_SCTP(sctp);
523 			return (EADDRINUSE);
524 		}
525 
526 		/*
527 		 * OK; set up the peer addr (this may grow after we get
528 		 * the INIT ACK from the peer with additional addresses).
529 		 */
530 		if ((err = sctp_add_faddr(sctp, &dstaddr, sleep,
531 		    B_FALSE)) != 0) {
532 			mutex_exit(&tbf->tf_lock);
533 			WAKE_SCTP(sctp);
534 			return (err);
535 		}
536 		cur_fp = sctp->sctp_faddrs;
537 		ASSERT(cur_fp->sf_ixa != NULL);
538 
539 		/* No valid src addr, return. */
540 		if (cur_fp->sf_state == SCTP_FADDRS_UNREACH) {
541 			mutex_exit(&tbf->tf_lock);
542 			WAKE_SCTP(sctp);
543 			return (EADDRNOTAVAIL);
544 		}
545 
546 		sctp->sctp_primary = cur_fp;
547 		sctp->sctp_current = cur_fp;
548 		sctp->sctp_mss = cur_fp->sf_pmss;
549 		sctp_conn_hash_insert(tbf, sctp, 1);
550 		mutex_exit(&tbf->tf_lock);
551 
552 		ixa = cur_fp->sf_ixa;
553 		ASSERT(ixa->ixa_cred != NULL);
554 
555 		if (scope_id != 0) {
556 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
557 			ixa->ixa_scopeid = scope_id;
558 		} else {
559 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
560 		}
561 
562 		/* initialize composite headers */
563 		if ((err = sctp_set_hdraddrs(sctp)) != 0) {
564 			sctp_conn_hash_remove(sctp);
565 			WAKE_SCTP(sctp);
566 			return (err);
567 		}
568 
569 		if ((err = sctp_build_hdrs(sctp, KM_SLEEP)) != 0) {
570 			sctp_conn_hash_remove(sctp);
571 			WAKE_SCTP(sctp);
572 			return (err);
573 		}
574 
575 		/*
576 		 * Turn off the don't fragment bit on the (only) faddr,
577 		 * so that if one of the messages exchanged during the
578 		 * initialization sequence exceeds the path mtu, it
579 		 * at least has a chance to get there. SCTP does no
580 		 * fragmentation of initialization messages.  The DF bit
581 		 * will be turned on again in sctp_send_cookie_echo()
582 		 * (but the cookie echo will still be sent with the df bit
583 		 * off).
584 		 */
585 		cur_fp->sf_df = B_FALSE;
586 
587 		/* Mark this address as alive */
588 		cur_fp->sf_state = SCTP_FADDRS_ALIVE;
589 
590 		/* Send the INIT to the peer */
591 		SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->sf_rto);
592 		sctp->sctp_state = SCTPS_COOKIE_WAIT;
593 		/*
594 		 * sctp_init_mp() could result in modifying the source
595 		 * address list, so take the hash lock.
596 		 */
597 		mutex_enter(&tbf->tf_lock);
598 		initmp = sctp_init_mp(sctp, cur_fp);
599 		if (initmp == NULL) {
600 			mutex_exit(&tbf->tf_lock);
601 			/*
602 			 * It may happen that all the source addresses
603 			 * (loopback/link local) are removed.  In that case,
604 			 * faile the connect.
605 			 */
606 			if (sctp->sctp_nsaddrs == 0) {
607 				sctp_conn_hash_remove(sctp);
608 				SCTP_FADDR_TIMER_STOP(cur_fp);
609 				WAKE_SCTP(sctp);
610 				return (EADDRNOTAVAIL);
611 			}
612 
613 			/* Otherwise, let the retransmission timer retry */
614 			WAKE_SCTP(sctp);
615 			goto notify_ulp;
616 		}
617 		mutex_exit(&tbf->tf_lock);
618 
619 		/*
620 		 * On a clustered note send this notification to the clustering
621 		 * subsystem.
622 		 */
623 		if (cl_sctp_connect != NULL) {
624 			uchar_t		*slist;
625 			uchar_t		*flist;
626 			size_t		ssize;
627 			size_t		fsize;
628 
629 			fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
630 			ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
631 			slist = kmem_alloc(ssize, KM_SLEEP);
632 			flist = kmem_alloc(fsize, KM_SLEEP);
633 			/* The clustering module frees the lists */
634 			sctp_get_saddr_list(sctp, slist, ssize);
635 			sctp_get_faddr_list(sctp, flist, fsize);
636 			(*cl_sctp_connect)(connp->conn_family, slist,
637 			    sctp->sctp_nsaddrs, connp->conn_lport,
638 			    flist, sctp->sctp_nfaddrs, connp->conn_fport,
639 			    B_TRUE, (cl_sctp_handle_t)sctp);
640 		}
641 		ASSERT(ixa->ixa_cred != NULL);
642 		ASSERT(ixa->ixa_ire != NULL);
643 
644 		(void) conn_ip_output(initmp, ixa);
645 		BUMP_LOCAL(sctp->sctp_opkts);
646 		WAKE_SCTP(sctp);
647 
648 notify_ulp:
649 		sctp_set_ulp_prop(sctp);
650 
651 		return (0);
652 	default:
653 		ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state));
654 		WAKE_SCTP(sctp);
655 		return (EINVAL);
656 	}
657 }
658