xref: /titanic_51/usr/src/uts/common/inet/sctp/sctp_conn.c (revision b27516f55237249607f754e6e42e865f12456675)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/stropts.h>
34 #include <sys/strsubr.h>
35 #include <sys/socket.h>
36 #include <sys/tsol/tndb.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/ip6.h>
40 
41 #include <inet/common.h>
42 #include <inet/ip.h>
43 #include <inet/ip6.h>
44 #include <inet/ipclassifier.h>
45 #include <inet/ipsec_impl.h>
46 
47 #include "sctp_impl.h"
48 #include "sctp_addr.h"
49 
50 /*
51  * Common accept code.  Called by sctp_conn_request.
52  * cr_pkt is the INIT / INIT ACK packet.
53  */
54 static int
55 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt,
56     uint_t ip_hdr_len, sctp_init_chunk_t *iack)
57 {
58 
59 	sctp_hdr_t		*sctph;
60 	sctp_chunk_hdr_t	*ich;
61 	sctp_init_chunk_t	*init;
62 	int			err;
63 	uint_t			sctp_options;
64 	conn_t			*aconnp;
65 	conn_t			*lconnp;
66 	sctp_stack_t	*sctps = listener->sctp_sctps;
67 
68 	sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len);
69 	ASSERT(OK_32PTR(sctph));
70 
71 	aconnp = acceptor->sctp_connp;
72 	lconnp = listener->sctp_connp;
73 	aconnp->conn_lport = lconnp->conn_lport;
74 	aconnp->conn_fport = sctph->sh_sport;
75 
76 	ich = (sctp_chunk_hdr_t *)(iack + 1);
77 	init = (sctp_init_chunk_t *)(ich + 1);
78 
79 	/* acceptor isn't in any fanouts yet, so don't need to hold locks */
80 	ASSERT(acceptor->sctp_faddrs == NULL);
81 	err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich,
82 	    &sctp_options);
83 	if (err != 0)
84 		return (err);
85 
86 	if ((err = sctp_set_hdraddrs(acceptor)) != 0)
87 		return (err);
88 
89 	if ((err = sctp_build_hdrs(acceptor, KM_NOSLEEP)) != 0)
90 		return (err);
91 
92 	if ((sctp_options & SCTP_PRSCTP_OPTION) &&
93 	    listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) {
94 		acceptor->sctp_prsctp_aware = B_TRUE;
95 	} else {
96 		acceptor->sctp_prsctp_aware = B_FALSE;
97 	}
98 
99 	/* Get  initial TSNs */
100 	acceptor->sctp_ltsn = ntohl(iack->sic_inittsn);
101 	acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd =
102 	    acceptor->sctp_ltsn - 1;
103 	acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd;
104 	/* Serial numbers are initialized to the same value as the TSNs */
105 	acceptor->sctp_lcsn = acceptor->sctp_ltsn;
106 
107 	if (!sctp_initialize_params(acceptor, init, iack))
108 		return (ENOMEM);
109 
110 	/*
111 	 * Copy sctp_secret from the listener in case we need to validate
112 	 * a possibly delayed cookie.
113 	 */
114 	bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN);
115 	bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret,
116 	    SCTP_SECRET_LEN);
117 	acceptor->sctp_last_secret_update = ddi_get_lbolt64();
118 
119 	/*
120 	 * After acceptor is inserted in the hash list, it can be found.
121 	 * So we need to lock it here.
122 	 */
123 	RUN_SCTP(acceptor);
124 
125 	sctp_conn_hash_insert(&sctps->sctps_conn_fanout[
126 	    SCTP_CONN_HASH(sctps, aconnp->conn_ports)], acceptor, 0);
127 	sctp_bind_hash_insert(&sctps->sctps_bind_fanout[
128 	    SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0);
129 
130 	/*
131 	 * No need to check for multicast destination since ip will only pass
132 	 * up multicasts to those that have expressed interest
133 	 * TODO: what about rejecting broadcasts?
134 	 * Also check that source is not a multicast or broadcast address.
135 	 */
136 	/* XXXSCTP */
137 	acceptor->sctp_state = SCTPS_ESTABLISHED;
138 	acceptor->sctp_assoc_start_time = (uint32_t)ddi_get_lbolt();
139 	/*
140 	 * listener->sctp_rwnd should be the default window size or a
141 	 * window size changed via SO_RCVBUF option.
142 	 */
143 	acceptor->sctp_rwnd = listener->sctp_rwnd;
144 	acceptor->sctp_irwnd = acceptor->sctp_rwnd;
145 	acceptor->sctp_pd_point = acceptor->sctp_rwnd;
146 	acceptor->sctp_upcalls = listener->sctp_upcalls;
147 
148 	return (0);
149 }
150 
151 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */
152 sctp_t *
153 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len,
154     sctp_init_chunk_t *iack, ip_recv_attr_t *ira)
155 {
156 	sctp_t	*eager;
157 	ip6_t	*ip6h;
158 	int	err;
159 	conn_t	*connp, *econnp;
160 	sctp_stack_t	*sctps;
161 	struct sock_proto_props sopp;
162 	cred_t		*cr;
163 	pid_t		cpid;
164 	in6_addr_t	faddr, laddr;
165 	ip_xmit_attr_t	*ixa;
166 
167 	/*
168 	 * No need to check for duplicate as this is the listener
169 	 * and we are holding the lock.  This means that no new
170 	 * connection can be created out of it.  And since the
171 	 * fanout already done cannot find a match, it means that
172 	 * there is no duplicate.
173 	 */
174 	ASSERT(OK_32PTR(mp->b_rptr));
175 
176 	if ((eager = sctp_create_eager(sctp)) == NULL) {
177 		return (NULL);
178 	}
179 
180 	connp = sctp->sctp_connp;
181 	sctps = sctp->sctp_sctps;
182 	econnp = eager->sctp_connp;
183 
184 	if (connp->conn_policy != NULL) {
185 		/* Inherit the policy from the listener; use actions from ira */
186 		if (!ip_ipsec_policy_inherit(econnp, connp, ira)) {
187 			sctp_close_eager(eager);
188 			BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
189 			return (NULL);
190 		}
191 	}
192 
193 	ip6h = (ip6_t *)mp->b_rptr;
194 	if (ira->ira_flags & IXAF_IS_IPV4) {
195 		ipha_t	*ipha;
196 
197 		ipha = (ipha_t *)ip6h;
198 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &laddr);
199 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &faddr);
200 	} else {
201 		laddr = ip6h->ip6_dst;
202 		faddr = ip6h->ip6_src;
203 	}
204 
205 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
206 		/*
207 		 * XXX need to fix the cached policy issue here.
208 		 * We temporarily set the conn_laddr/conn_faddr here so
209 		 * that IPsec can use it for the latched policy
210 		 * selector.  This is obvioursly wrong as SCTP can
211 		 * use different addresses...
212 		 */
213 		econnp->conn_laddr_v6 = laddr;
214 		econnp->conn_faddr_v6 = faddr;
215 		econnp->conn_saddr_v6 = laddr;
216 	}
217 	if (ipsec_conn_cache_policy(econnp,
218 	    (ira->ira_flags & IRAF_IS_IPV4) != 0) != 0) {
219 		sctp_close_eager(eager);
220 		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
221 		return (NULL);
222 	}
223 
224 	/* Save for getpeerucred */
225 	cr = ira->ira_cred;
226 	cpid = ira->ira_cpid;
227 
228 	if (is_system_labeled()) {
229 		ip_xmit_attr_t *ixa = econnp->conn_ixa;
230 
231 		ASSERT(ira->ira_tsl != NULL);
232 
233 		/* Discard any old label */
234 		if (ixa->ixa_free_flags & IXA_FREE_TSL) {
235 			ASSERT(ixa->ixa_tsl != NULL);
236 			label_rele(ixa->ixa_tsl);
237 			ixa->ixa_free_flags &= ~IXA_FREE_TSL;
238 			ixa->ixa_tsl = NULL;
239 		}
240 
241 		if ((connp->conn_mlp_type != mlptSingle ||
242 		    connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
243 		    ira->ira_tsl != NULL) {
244 			/*
245 			 * If this is an MLP connection or a MAC-Exempt
246 			 * connection with an unlabeled node, packets are to be
247 			 * exchanged using the security label of the received
248 			 * Cookie packet instead of the server application's
249 			 * label.
250 			 * tsol_check_dest called from ip_set_destination
251 			 * might later update TSF_UNLABELED by replacing
252 			 * ixa_tsl with a new label.
253 			 */
254 			label_hold(ira->ira_tsl);
255 			ip_xmit_attr_replace_tsl(ixa, ira->ira_tsl);
256 		} else {
257 			ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
258 		}
259 	}
260 
261 	err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack);
262 	if (err != 0) {
263 		sctp_close_eager(eager);
264 		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
265 		return (NULL);
266 	}
267 
268 	ASSERT(eager->sctp_current->ixa != NULL);
269 
270 	ixa = eager->sctp_current->ixa;
271 	if (!(ira->ira_flags & IXAF_IS_IPV4)) {
272 		ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
273 
274 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
275 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) {
276 			eager->sctp_linklocal = 1;
277 
278 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
279 			ixa->ixa_scopeid = ifindex;
280 			econnp->conn_incoming_ifindex = ifindex;
281 		}
282 	}
283 
284 	/*
285 	 * On a clustered note send this notification to the clustering
286 	 * subsystem.
287 	 */
288 	if (cl_sctp_connect != NULL) {
289 		uchar_t	*slist;
290 		uchar_t	*flist;
291 		size_t	fsize;
292 		size_t	ssize;
293 
294 		fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs;
295 		ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs;
296 		slist = kmem_alloc(ssize, KM_NOSLEEP);
297 		flist = kmem_alloc(fsize, KM_NOSLEEP);
298 		if (slist == NULL || flist == NULL) {
299 			if (slist != NULL)
300 				kmem_free(slist, ssize);
301 			if (flist != NULL)
302 				kmem_free(flist, fsize);
303 			sctp_close_eager(eager);
304 			BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
305 			SCTP_KSTAT(sctps, sctp_cl_connect);
306 			return (NULL);
307 		}
308 		/* The clustering module frees these list */
309 		sctp_get_saddr_list(eager, slist, ssize);
310 		sctp_get_faddr_list(eager, flist, fsize);
311 		(*cl_sctp_connect)(econnp->conn_family, slist,
312 		    eager->sctp_nsaddrs, econnp->conn_lport, flist,
313 		    eager->sctp_nfaddrs, econnp->conn_fport, B_FALSE,
314 		    (cl_sctp_handle_t)eager);
315 	}
316 
317 	/* Connection established, so send up the conn_ind */
318 	if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd,
319 	    (sock_lower_handle_t)eager, NULL, cr, cpid,
320 	    &eager->sctp_upcalls)) == NULL) {
321 		sctp_close_eager(eager);
322 		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
323 		return (NULL);
324 	}
325 	ASSERT(SCTP_IS_DETACHED(eager));
326 	eager->sctp_detached = B_FALSE;
327 	bzero(&sopp, sizeof (sopp));
328 	sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF;
329 	sopp.sopp_maxblk = strmsgsz;
330 	if (econnp->conn_family == AF_INET) {
331 		sopp.sopp_wroff = sctps->sctps_wroff_xtra +
332 		    sizeof (sctp_data_hdr_t) + sctp->sctp_hdr_len;
333 	} else {
334 		sopp.sopp_wroff = sctps->sctps_wroff_xtra +
335 		    sizeof (sctp_data_hdr_t) + sctp->sctp_hdr6_len;
336 	}
337 	eager->sctp_ulp_prop(eager->sctp_ulpd, &sopp);
338 	return (eager);
339 }
340 
341 /*
342  * Connect to a peer - this function inserts the sctp in the
343  * bind and conn fanouts, sends the INIT, and replies to the client
344  * with an OK ack.
345  */
346 int
347 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen,
348     cred_t *cr, pid_t pid)
349 {
350 	sin_t		*sin;
351 	sin6_t		*sin6;
352 	in6_addr_t	dstaddr;
353 	in_port_t	dstport;
354 	mblk_t		*initmp;
355 	sctp_tf_t	*tbf;
356 	sctp_t		*lsctp;
357 	char		buf[INET6_ADDRSTRLEN];
358 	int		sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP;
359 	int		err;
360 	sctp_faddr_t	*cur_fp;
361 	sctp_stack_t	*sctps = sctp->sctp_sctps;
362 	conn_t		*connp = sctp->sctp_connp;
363 	uint_t		scope_id = 0;
364 	ip_xmit_attr_t	*ixa;
365 
366 	/*
367 	 * Determine packet type based on type of address passed in
368 	 * the request should contain an IPv4 or IPv6 address.
369 	 * Make sure that address family matches the type of
370 	 * family of the address passed down.
371 	 */
372 	if (addrlen < sizeof (sin_t)) {
373 		return (EINVAL);
374 	}
375 	switch (dst->sa_family) {
376 	case AF_INET:
377 		sin = (sin_t *)dst;
378 
379 		/* Check for attempt to connect to non-unicast */
380 		if (CLASSD(sin->sin_addr.s_addr) ||
381 		    (sin->sin_addr.s_addr == INADDR_BROADCAST)) {
382 			ip0dbg(("sctp_connect: non-unicast\n"));
383 			return (EINVAL);
384 		}
385 		if (connp->conn_ipv6_v6only)
386 			return (EAFNOSUPPORT);
387 
388 		/* convert to v6 mapped */
389 		/* Check for attempt to connect to INADDR_ANY */
390 		if (sin->sin_addr.s_addr == INADDR_ANY)  {
391 			struct in_addr v4_addr;
392 			/*
393 			 * SunOS 4.x and 4.3 BSD allow an application
394 			 * to connect a TCP socket to INADDR_ANY.
395 			 * When they do this, the kernel picks the
396 			 * address of one interface and uses it
397 			 * instead.  The kernel usually ends up
398 			 * picking the address of the loopback
399 			 * interface.  This is an undocumented feature.
400 			 * However, we provide the same thing here
401 			 * in case any TCP apps that use this feature
402 			 * are being ported to SCTP...
403 			 */
404 			v4_addr.s_addr = htonl(INADDR_LOOPBACK);
405 			IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr);
406 		} else {
407 			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr);
408 		}
409 		dstport = sin->sin_port;
410 		break;
411 	case AF_INET6:
412 		sin6 = (sin6_t *)dst;
413 		/* Check for attempt to connect to non-unicast. */
414 		if ((addrlen < sizeof (sin6_t)) ||
415 		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
416 			ip0dbg(("sctp_connect: non-unicast\n"));
417 			return (EINVAL);
418 		}
419 		if (connp->conn_ipv6_v6only &&
420 		    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
421 			return (EAFNOSUPPORT);
422 		}
423 		/* check for attempt to connect to unspec */
424 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
425 			dstaddr = ipv6_loopback;
426 		} else {
427 			dstaddr = sin6->sin6_addr;
428 			if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) {
429 				sctp->sctp_linklocal = 1;
430 				scope_id = sin6->sin6_scope_id;
431 			}
432 		}
433 		dstport = sin6->sin6_port;
434 		connp->conn_flowinfo = sin6->sin6_flowinfo;
435 		break;
436 	default:
437 		dprint(1, ("sctp_connect: unknown family %d\n",
438 		    dst->sa_family));
439 		return (EAFNOSUPPORT);
440 	}
441 
442 	(void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf));
443 	dprint(1, ("sctp_connect: attempting connect to %s...\n", buf));
444 
445 	RUN_SCTP(sctp);
446 
447 	if (connp->conn_family != dst->sa_family ||
448 	    (connp->conn_state_flags & CONN_CLOSING)) {
449 		WAKE_SCTP(sctp);
450 		return (EINVAL);
451 	}
452 
453 	/* We update our cred/cpid based on the caller of connect */
454 	if (connp->conn_cred != cr) {
455 		crhold(cr);
456 		crfree(connp->conn_cred);
457 		connp->conn_cred = cr;
458 	}
459 	connp->conn_cpid = pid;
460 
461 	/* Cache things in conn_ixa without any refhold */
462 	ixa = connp->conn_ixa;
463 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
464 	ixa->ixa_cred = cr;
465 	ixa->ixa_cpid = pid;
466 	if (is_system_labeled()) {
467 		/* We need to restart with a label based on the cred */
468 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
469 	}
470 
471 	switch (sctp->sctp_state) {
472 	case SCTPS_IDLE: {
473 		struct sockaddr_storage	ss;
474 
475 		/*
476 		 * We support a quick connect capability here, allowing
477 		 * clients to transition directly from IDLE to COOKIE_WAIT.
478 		 * sctp_bindi will pick an unused port, insert the connection
479 		 * in the bind hash and transition to BOUND state. SCTP
480 		 * picks and uses what it considers the optimal local address
481 		 * set (just like specifiying INADDR_ANY to bind()).
482 		 */
483 		dprint(1, ("sctp_connect: idle, attempting bind...\n"));
484 		ASSERT(sctp->sctp_nsaddrs == 0);
485 
486 		bzero(&ss, sizeof (ss));
487 		ss.ss_family = connp->conn_family;
488 		WAKE_SCTP(sctp);
489 		if ((err = sctp_bind(sctp, (struct sockaddr *)&ss,
490 		    sizeof (ss))) != 0) {
491 			return (err);
492 		}
493 		RUN_SCTP(sctp);
494 		/* FALLTHRU */
495 	}
496 
497 	case SCTPS_BOUND:
498 		ASSERT(sctp->sctp_nsaddrs > 0);
499 
500 		/* do the connect */
501 		/* XXX check for attempt to connect to self */
502 		connp->conn_fport = dstport;
503 
504 		/*
505 		 * Don't allow this connection to completely duplicate
506 		 * an existing connection.
507 		 *
508 		 * Ensure that the duplicate check and insertion is atomic.
509 		 */
510 		sctp_conn_hash_remove(sctp);
511 		tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps,
512 		    connp->conn_ports)];
513 		mutex_enter(&tbf->tf_lock);
514 		lsctp = sctp_lookup(sctp, &dstaddr, tbf, &connp->conn_ports,
515 		    SCTPS_COOKIE_WAIT);
516 		if (lsctp != NULL) {
517 			/* found a duplicate connection */
518 			mutex_exit(&tbf->tf_lock);
519 			SCTP_REFRELE(lsctp);
520 			WAKE_SCTP(sctp);
521 			return (EADDRINUSE);
522 		}
523 
524 		/*
525 		 * OK; set up the peer addr (this may grow after we get
526 		 * the INIT ACK from the peer with additional addresses).
527 		 */
528 		if ((err = sctp_add_faddr(sctp, &dstaddr, sleep,
529 		    B_FALSE)) != 0) {
530 			mutex_exit(&tbf->tf_lock);
531 			WAKE_SCTP(sctp);
532 			return (err);
533 		}
534 		cur_fp = sctp->sctp_faddrs;
535 		ASSERT(cur_fp->ixa != NULL);
536 
537 		/* No valid src addr, return. */
538 		if (cur_fp->state == SCTP_FADDRS_UNREACH) {
539 			mutex_exit(&tbf->tf_lock);
540 			WAKE_SCTP(sctp);
541 			return (EADDRNOTAVAIL);
542 		}
543 
544 		sctp->sctp_primary = cur_fp;
545 		sctp->sctp_current = cur_fp;
546 		sctp->sctp_mss = cur_fp->sfa_pmss;
547 		sctp_conn_hash_insert(tbf, sctp, 1);
548 		mutex_exit(&tbf->tf_lock);
549 
550 		ixa = cur_fp->ixa;
551 		ASSERT(ixa->ixa_cred != NULL);
552 
553 		if (scope_id != 0) {
554 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
555 			ixa->ixa_scopeid = scope_id;
556 		} else {
557 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
558 		}
559 
560 		/* initialize composite headers */
561 		if ((err = sctp_set_hdraddrs(sctp)) != 0) {
562 			sctp_conn_hash_remove(sctp);
563 			WAKE_SCTP(sctp);
564 			return (err);
565 		}
566 
567 		if ((err = sctp_build_hdrs(sctp, KM_SLEEP)) != 0) {
568 			sctp_conn_hash_remove(sctp);
569 			WAKE_SCTP(sctp);
570 			return (err);
571 		}
572 
573 		/*
574 		 * Turn off the don't fragment bit on the (only) faddr,
575 		 * so that if one of the messages exchanged during the
576 		 * initialization sequence exceeds the path mtu, it
577 		 * at least has a chance to get there. SCTP does no
578 		 * fragmentation of initialization messages.  The DF bit
579 		 * will be turned on again in sctp_send_cookie_echo()
580 		 * (but the cookie echo will still be sent with the df bit
581 		 * off).
582 		 */
583 		cur_fp->df = B_FALSE;
584 
585 		/* Mark this address as alive */
586 		cur_fp->state = SCTP_FADDRS_ALIVE;
587 
588 		/* Send the INIT to the peer */
589 		SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto);
590 		sctp->sctp_state = SCTPS_COOKIE_WAIT;
591 		/*
592 		 * sctp_init_mp() could result in modifying the source
593 		 * address list, so take the hash lock.
594 		 */
595 		mutex_enter(&tbf->tf_lock);
596 		initmp = sctp_init_mp(sctp, cur_fp);
597 		if (initmp == NULL) {
598 			mutex_exit(&tbf->tf_lock);
599 			/*
600 			 * It may happen that all the source addresses
601 			 * (loopback/link local) are removed.  In that case,
602 			 * faile the connect.
603 			 */
604 			if (sctp->sctp_nsaddrs == 0) {
605 				sctp_conn_hash_remove(sctp);
606 				SCTP_FADDR_TIMER_STOP(cur_fp);
607 				WAKE_SCTP(sctp);
608 				return (EADDRNOTAVAIL);
609 			}
610 
611 			/* Otherwise, let the retransmission timer retry */
612 			WAKE_SCTP(sctp);
613 			goto notify_ulp;
614 		}
615 		mutex_exit(&tbf->tf_lock);
616 
617 		/*
618 		 * On a clustered note send this notification to the clustering
619 		 * subsystem.
620 		 */
621 		if (cl_sctp_connect != NULL) {
622 			uchar_t		*slist;
623 			uchar_t		*flist;
624 			size_t		ssize;
625 			size_t		fsize;
626 
627 			fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
628 			ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
629 			slist = kmem_alloc(ssize, KM_SLEEP);
630 			flist = kmem_alloc(fsize, KM_SLEEP);
631 			/* The clustering module frees the lists */
632 			sctp_get_saddr_list(sctp, slist, ssize);
633 			sctp_get_faddr_list(sctp, flist, fsize);
634 			(*cl_sctp_connect)(connp->conn_family, slist,
635 			    sctp->sctp_nsaddrs, connp->conn_lport,
636 			    flist, sctp->sctp_nfaddrs, connp->conn_fport,
637 			    B_TRUE, (cl_sctp_handle_t)sctp);
638 		}
639 		ASSERT(ixa->ixa_cred != NULL);
640 		ASSERT(ixa->ixa_ire != NULL);
641 
642 		(void) conn_ip_output(initmp, ixa);
643 		BUMP_LOCAL(sctp->sctp_opkts);
644 		WAKE_SCTP(sctp);
645 
646 notify_ulp:
647 		sctp_set_ulp_prop(sctp);
648 
649 		return (0);
650 	default:
651 		ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state));
652 		WAKE_SCTP(sctp);
653 		return (EINVAL);
654 	}
655 }
656