xref: /illumos-gate/usr/src/uts/common/inet/sctp/sctp_common.c (revision c2aa8c918a0c67f7fd93724a31efac84968fc12c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/kmem.h>
36 #include <sys/socket.h>
37 #include <sys/random.h>
38 #include <sys/tsol/tndb.h>
39 #include <sys/tsol/tnet.h>
40 
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 #include <netinet/sctp.h>
44 
45 #include <inet/common.h>
46 #include <inet/ip.h>
47 #include <inet/ip6.h>
48 #include <inet/ip_ire.h>
49 #include <inet/mib2.h>
50 #include <inet/nd.h>
51 #include <inet/optcom.h>
52 #include <inet/sctp_ip.h>
53 #include <inet/ipclassifier.h>
54 
55 #include "sctp_impl.h"
56 #include "sctp_addr.h"
57 #include "sctp_asconf.h"
58 
59 static struct kmem_cache *sctp_kmem_faddr_cache;
60 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *);
61 
62 /* Set the source address.  Refer to comments in sctp_get_ire(). */
63 void
64 sctp_set_saddr(sctp_t *sctp, sctp_faddr_t *fp)
65 {
66 	boolean_t v6 = !fp->isv4;
67 
68 	if (sctp->sctp_bound_to_all) {
69 		V6_SET_ZERO(fp->saddr);
70 	} else {
71 		fp->saddr = sctp_get_valid_addr(sctp, v6);
72 		if (!v6 && IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) ||
73 		    v6 && IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
74 			fp->state = SCTP_FADDRS_UNREACH;
75 			/* Disable heartbeat. */
76 			fp->hb_expiry = 0;
77 			fp->hb_pending = B_FALSE;
78 			fp->strikes = 0;
79 		}
80 	}
81 }
82 
83 /*
84  * Call this function to update the cached IRE of a peer addr fp.
85  */
86 void
87 sctp_get_ire(sctp_t *sctp, sctp_faddr_t *fp)
88 {
89 	ire_t		*ire;
90 	ipaddr_t	addr4;
91 	in6_addr_t	laddr;
92 	sctp_saddr_ipif_t *sp;
93 	int		hdrlen;
94 	ts_label_t	*tsl;
95 	sctp_stack_t	*sctps = sctp->sctp_sctps;
96 	ip_stack_t	*ipst = sctps->sctps_netstack->netstack_ip;
97 
98 	/* Remove the previous cache IRE */
99 	if ((ire = fp->ire) != NULL) {
100 		IRE_REFRELE_NOTR(ire);
101 		fp->ire = NULL;
102 	}
103 
104 	/*
105 	 * If this addr is not reachable, mark it as unconfirmed for now, the
106 	 * state will be changed back to unreachable later in this function
107 	 * if it is still the case.
108 	 */
109 	if (fp->state == SCTP_FADDRS_UNREACH) {
110 		fp->state = SCTP_FADDRS_UNCONFIRMED;
111 	}
112 
113 	tsl = crgetlabel(CONN_CRED(sctp->sctp_connp));
114 
115 	if (fp->isv4) {
116 		IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4);
117 		ire = ire_cache_lookup(addr4, sctp->sctp_zoneid, tsl, ipst);
118 		if (ire != NULL)
119 			IN6_IPADDR_TO_V4MAPPED(ire->ire_src_addr, &laddr);
120 	} else {
121 		ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid, tsl,
122 		    ipst);
123 		if (ire != NULL)
124 			laddr = ire->ire_src_addr_v6;
125 	}
126 
127 	if (ire == NULL) {
128 		dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n",
129 		    SCTP_PRINTADDR(fp->faddr)));
130 		/*
131 		 * It is tempting to just leave the src addr
132 		 * unspecified and let IP figure it out, but we
133 		 * *cannot* do this, since IP may choose a src addr
134 		 * that is not part of this association... unless
135 		 * this sctp has bound to all addrs.  So if the ire
136 		 * lookup fails, try to find one in our src addr
137 		 * list, unless the sctp has bound to all addrs, in
138 		 * which case we change the src addr to unspec.
139 		 *
140 		 * Note that if this is a v6 endpoint but it does
141 		 * not have any v4 address at this point (e.g. may
142 		 * have been  deleted), sctp_get_valid_addr() will
143 		 * return mapped INADDR_ANY.  In this case, this
144 		 * address should be marked not reachable so that
145 		 * it won't be used to send data.
146 		 */
147 		sctp_set_saddr(sctp, fp);
148 		if (fp->state == SCTP_FADDRS_UNREACH)
149 			return;
150 		goto check_current;
151 	}
152 
153 	dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ",
154 	    SCTP_PRINTADDR(fp->faddr)));
155 	if (fp->isv4) {
156 		dprint(2, ("src = %x\n", ire->ire_src_addr));
157 	} else {
158 		dprint(2, ("src=%x:%x:%x:%x\n",
159 		    SCTP_PRINTADDR(ire->ire_src_addr_v6)));
160 	}
161 
162 	/* Make sure the laddr is part of this association */
163 	if ((sp = sctp_saddr_lookup(sctp, &ire->ire_ipif->ipif_v6lcl_addr,
164 	    0)) != NULL && !sp->saddr_ipif_dontsrc) {
165 		if (sp->saddr_ipif_unconfirmed == 1)
166 			sp->saddr_ipif_unconfirmed = 0;
167 		fp->saddr = laddr;
168 	} else {
169 		dprint(2, ("ire2faddr: src addr is not part of assc\n"));
170 
171 		/*
172 		 * Set the src to the first saddr and hope for the best.
173 		 * Note that we will still do the ire caching below.
174 		 * Otherwise, whenever we send a packet, we need to do
175 		 * the ire lookup again and still may not get the correct
176 		 * source address.  Note that this case should very seldomly
177 		 * happen.  One scenario this can happen is an app
178 		 * explicitly bind() to an address.  But that address is
179 		 * not the preferred source address to send to the peer.
180 		 */
181 		sctp_set_saddr(sctp, fp);
182 		if (fp->state == SCTP_FADDRS_UNREACH) {
183 			IRE_REFRELE(ire);
184 			return;
185 		}
186 	}
187 
188 	/*
189 	 * Note that ire_cache_lookup_*() returns an ire with the tracing
190 	 * bits enabled.  This requires the thread holding the ire also
191 	 * do the IRE_REFRELE().  Thus we need to do IRE_REFHOLD_NOTR()
192 	 * and then IRE_REFRELE() the ire here to make the tracing bits
193 	 * work.
194 	 */
195 	IRE_REFHOLD_NOTR(ire);
196 	IRE_REFRELE(ire);
197 
198 	/* Cache the IRE */
199 	fp->ire = ire;
200 	if (fp->ire->ire_type == IRE_LOOPBACK && !sctp->sctp_loopback)
201 		sctp->sctp_loopback = 1;
202 
203 	/*
204 	 * Pull out RTO information for this faddr and use it if we don't
205 	 * have any yet.
206 	 */
207 	if (fp->srtt == -1 && ire->ire_uinfo.iulp_rtt != 0) {
208 		/* The cached value is in ms. */
209 		fp->srtt = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt);
210 		fp->rttvar = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt_sd);
211 		fp->rto = 3 * fp->srtt;
212 
213 		/* Bound the RTO by configured min and max values */
214 		if (fp->rto < sctp->sctp_rto_min) {
215 			fp->rto = sctp->sctp_rto_min;
216 		}
217 		if (fp->rto > sctp->sctp_rto_max) {
218 			fp->rto = sctp->sctp_rto_max;
219 		}
220 	}
221 
222 	/*
223 	 * Record the MTU for this faddr. If the MTU for this faddr has
224 	 * changed, check if the assc MTU will also change.
225 	 */
226 	if (fp->isv4) {
227 		hdrlen = sctp->sctp_hdr_len;
228 	} else {
229 		hdrlen = sctp->sctp_hdr6_len;
230 	}
231 	if ((fp->sfa_pmss + hdrlen) != ire->ire_max_frag) {
232 		/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
233 		fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1);
234 		if (fp->cwnd < (fp->sfa_pmss * 2)) {
235 			SET_CWND(fp, fp->sfa_pmss,
236 			    sctps->sctps_slow_start_initial);
237 		}
238 	}
239 
240 check_current:
241 	if (fp == sctp->sctp_current)
242 		sctp_set_faddr_current(sctp, fp);
243 }
244 
245 void
246 sctp_update_ire(sctp_t *sctp)
247 {
248 	ire_t		*ire;
249 	sctp_faddr_t	*fp;
250 	sctp_stack_t	*sctps = sctp->sctp_sctps;
251 
252 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
253 		if ((ire = fp->ire) == NULL)
254 			continue;
255 		mutex_enter(&ire->ire_lock);
256 
257 		/*
258 		 * If the cached IRE is going away, there is no point to
259 		 * update it.
260 		 */
261 		if (ire->ire_marks & IRE_MARK_CONDEMNED) {
262 			mutex_exit(&ire->ire_lock);
263 			IRE_REFRELE_NOTR(ire);
264 			fp->ire = NULL;
265 			continue;
266 		}
267 
268 		/*
269 		 * Only record the PMTU for this faddr if we actually have
270 		 * done discovery. This prevents initialized default from
271 		 * clobbering any real info that IP may have.
272 		 */
273 		if (fp->pmtu_discovered) {
274 			if (fp->isv4) {
275 				ire->ire_max_frag = fp->sfa_pmss +
276 				    sctp->sctp_hdr_len;
277 			} else {
278 				ire->ire_max_frag = fp->sfa_pmss +
279 				    sctp->sctp_hdr6_len;
280 			}
281 		}
282 
283 		if (sctps->sctps_rtt_updates != 0 &&
284 		    fp->rtt_updates >= sctps->sctps_rtt_updates) {
285 			/*
286 			 * If there is no old cached values, initialize them
287 			 * conservatively.  Set them to be (1.5 * new value).
288 			 * This code copied from ip_ire_advise().  The cached
289 			 * value is in ms.
290 			 */
291 			if (ire->ire_uinfo.iulp_rtt != 0) {
292 				ire->ire_uinfo.iulp_rtt =
293 				    (ire->ire_uinfo.iulp_rtt +
294 				    TICK_TO_MSEC(fp->srtt)) >> 1;
295 			} else {
296 				ire->ire_uinfo.iulp_rtt =
297 				    TICK_TO_MSEC(fp->srtt + (fp->srtt >> 1));
298 			}
299 			if (ire->ire_uinfo.iulp_rtt_sd != 0) {
300 				ire->ire_uinfo.iulp_rtt_sd =
301 				    (ire->ire_uinfo.iulp_rtt_sd +
302 				    TICK_TO_MSEC(fp->rttvar)) >> 1;
303 			} else {
304 				ire->ire_uinfo.iulp_rtt_sd =
305 				    TICK_TO_MSEC(fp->rttvar +
306 				    (fp->rttvar >> 1));
307 			}
308 			fp->rtt_updates = 0;
309 		}
310 		mutex_exit(&ire->ire_lock);
311 	}
312 }
313 
314 /*
315  * The sender must set the total length in the IP header.
316  * If sendto == NULL, the current will be used.
317  */
318 mblk_t *
319 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer)
320 {
321 	mblk_t *mp;
322 	size_t ipsctplen;
323 	int isv4;
324 	sctp_faddr_t *fp;
325 	sctp_stack_t *sctps = sctp->sctp_sctps;
326 
327 	ASSERT(sctp->sctp_current != NULL || sendto != NULL);
328 	if (sendto == NULL) {
329 		fp = sctp->sctp_current;
330 	} else {
331 		fp = sendto;
332 	}
333 	isv4 = fp->isv4;
334 
335 	/* Try to look for another IRE again. */
336 	if (fp->ire == NULL)
337 		sctp_get_ire(sctp, fp);
338 
339 	/* There is no suitable source address to use, return. */
340 	if (fp->state == SCTP_FADDRS_UNREACH)
341 		return (NULL);
342 
343 	if (isv4) {
344 		ipsctplen = sctp->sctp_hdr_len;
345 	} else {
346 		ipsctplen = sctp->sctp_hdr6_len;
347 	}
348 
349 	mp = allocb_cred(ipsctplen + sctps->sctps_wroff_xtra + trailer,
350 	    CONN_CRED(sctp->sctp_connp));
351 	if (mp == NULL) {
352 		ip1dbg(("sctp_make_mp: error making mp..\n"));
353 		return (NULL);
354 	}
355 	mp->b_rptr += sctps->sctps_wroff_xtra;
356 	mp->b_wptr = mp->b_rptr + ipsctplen;
357 
358 	ASSERT(OK_32PTR(mp->b_wptr));
359 
360 	if (isv4) {
361 		ipha_t *iph = (ipha_t *)mp->b_rptr;
362 
363 		bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen);
364 		if (fp != sctp->sctp_current) {
365 			/* fiddle with the dst addr */
366 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
367 			/* fix up src addr */
368 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
369 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
370 				    iph->ipha_src);
371 			} else if (sctp->sctp_bound_to_all) {
372 				iph->ipha_src = INADDR_ANY;
373 			}
374 		}
375 		/* set or clear the don't fragment bit */
376 		if (fp->df) {
377 			iph->ipha_fragment_offset_and_flags = htons(IPH_DF);
378 		} else {
379 			iph->ipha_fragment_offset_and_flags = 0;
380 		}
381 	} else {
382 		bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen);
383 		if (fp != sctp->sctp_current) {
384 			/* fiddle with the dst addr */
385 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
386 			/* fix up src addr */
387 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
388 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
389 			} else if (sctp->sctp_bound_to_all) {
390 				bzero(&((ip6_t *)(mp->b_rptr))->ip6_src,
391 				    sizeof (in6_addr_t));
392 			}
393 		}
394 	}
395 	ASSERT(sctp->sctp_connp != NULL);
396 
397 	/*
398 	 * IP will not free this IRE if it is condemned.  SCTP needs to
399 	 * free it.
400 	 */
401 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
402 		IRE_REFRELE_NOTR(fp->ire);
403 		fp->ire = NULL;
404 	}
405 	/* Stash the conn and ire ptr info. for IP */
406 	SCTP_STASH_IPINFO(mp, fp->ire);
407 
408 	return (mp);
409 }
410 
411 /*
412  * Notify upper layers about preferred write offset, write size.
413  */
414 void
415 sctp_set_ulp_prop(sctp_t *sctp)
416 {
417 	int hdrlen;
418 	sctp_stack_t *sctps = sctp->sctp_sctps;
419 
420 	if (sctp->sctp_current->isv4) {
421 		hdrlen = sctp->sctp_hdr_len;
422 	} else {
423 		hdrlen = sctp->sctp_hdr6_len;
424 	}
425 	ASSERT(sctp->sctp_ulpd);
426 
427 	ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss);
428 	sctp->sctp_ulp_prop(sctp->sctp_ulpd,
429 	    sctps->sctps_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t),
430 	    sctp->sctp_mss - sizeof (sctp_data_hdr_t));
431 }
432 
433 void
434 sctp_set_iplen(sctp_t *sctp, mblk_t *mp)
435 {
436 	uint16_t	sum = 0;
437 	ipha_t		*iph;
438 	ip6_t		*ip6h;
439 	mblk_t		*pmp = mp;
440 	boolean_t	isv4;
441 
442 	isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
443 	for (; pmp; pmp = pmp->b_cont)
444 		sum += pmp->b_wptr - pmp->b_rptr;
445 
446 	if (isv4) {
447 		iph = (ipha_t *)mp->b_rptr;
448 		iph->ipha_length = htons(sum);
449 	} else {
450 		ip6h = (ip6_t *)mp->b_rptr;
451 		/*
452 		 * If an ip6i_t is present, the real IPv6 header
453 		 * immediately follows.
454 		 */
455 		if (ip6h->ip6_nxt == IPPROTO_RAW)
456 			ip6h = (ip6_t *)&ip6h[1];
457 		ip6h->ip6_plen = htons(sum - ((char *)&sctp->sctp_ip6h[1] -
458 		    sctp->sctp_iphc6));
459 	}
460 }
461 
462 int
463 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2)
464 {
465 	int na1 = 0;
466 	int overlap = 0;
467 	int equal = 1;
468 	int onematch;
469 	sctp_faddr_t *fp1, *fp2;
470 
471 	for (fp1 = a1; fp1; fp1 = fp1->next) {
472 		onematch = 0;
473 		for (fp2 = a2; fp2; fp2 = fp2->next) {
474 			if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) {
475 				overlap++;
476 				onematch = 1;
477 				break;
478 			}
479 			if (!onematch) {
480 				equal = 0;
481 			}
482 		}
483 		na1++;
484 	}
485 
486 	if (equal) {
487 		return (SCTP_ADDR_EQUAL);
488 	}
489 	if (overlap == na1) {
490 		return (SCTP_ADDR_SUBSET);
491 	}
492 	if (overlap) {
493 		return (SCTP_ADDR_OVERLAP);
494 	}
495 	return (SCTP_ADDR_DISJOINT);
496 }
497 
498 /*
499  * Returns 0 on success, -1 on memory allocation failure. If sleep
500  * is true, this function should never fail.  The boolean parameter
501  * first decides whether the newly created faddr structure should be
502  * added at the beginning of the list or at the end.
503  *
504  * Note: caller must hold conn fanout lock.
505  */
506 int
507 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first)
508 {
509 	sctp_faddr_t	*faddr;
510 	mblk_t		*timer_mp;
511 
512 	if (is_system_labeled()) {
513 		ts_label_t *tsl;
514 		tsol_tpc_t *rhtp;
515 		int retv;
516 
517 		tsl = crgetlabel(CONN_CRED(sctp->sctp_connp));
518 		ASSERT(tsl != NULL);
519 
520 		/* find_tpc automatically does the right thing with IPv4 */
521 		rhtp = find_tpc(addr, IPV6_VERSION, B_FALSE);
522 		if (rhtp == NULL)
523 			return (EACCES);
524 
525 		retv = EACCES;
526 		if (tsl->tsl_doi == rhtp->tpc_tp.tp_doi) {
527 			switch (rhtp->tpc_tp.host_type) {
528 			case UNLABELED:
529 				/*
530 				 * Can talk to unlabeled hosts if any of the
531 				 * following are true:
532 				 *   1. zone's label matches the remote host's
533 				 *	default label,
534 				 *   2. mac_exempt is on and the zone dominates
535 				 *	the remote host's label, or
536 				 *   3. mac_exempt is on and the socket is from
537 				 *	the global zone.
538 				 */
539 				if (blequal(&rhtp->tpc_tp.tp_def_label,
540 				    &tsl->tsl_label) ||
541 				    (sctp->sctp_mac_exempt &&
542 				    (sctp->sctp_zoneid == GLOBAL_ZONEID ||
543 				    bldominates(&tsl->tsl_label,
544 				    &rhtp->tpc_tp.tp_def_label))))
545 					retv = 0;
546 				break;
547 			case SUN_CIPSO:
548 				if (_blinrange(&tsl->tsl_label,
549 				    &rhtp->tpc_tp.tp_sl_range_cipso) ||
550 				    blinlset(&tsl->tsl_label,
551 				    rhtp->tpc_tp.tp_sl_set_cipso))
552 					retv = 0;
553 				break;
554 			}
555 		}
556 		TPC_RELE(rhtp);
557 		if (retv != 0)
558 			return (retv);
559 	}
560 
561 	if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL)
562 		return (ENOMEM);
563 	timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer, sleep);
564 	if (timer_mp == NULL) {
565 		kmem_cache_free(sctp_kmem_faddr_cache, faddr);
566 		return (ENOMEM);
567 	}
568 	((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr;
569 
570 	sctp_init_faddr(sctp, faddr, addr, timer_mp);
571 	ASSERT(faddr->next == NULL);
572 
573 	if (sctp->sctp_faddrs == NULL) {
574 		ASSERT(sctp->sctp_lastfaddr == NULL);
575 		/* only element on list; first and last are same */
576 		sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr;
577 	} else if (first) {
578 		ASSERT(sctp->sctp_lastfaddr != NULL);
579 		faddr->next = sctp->sctp_faddrs;
580 		sctp->sctp_faddrs = faddr;
581 	} else {
582 		sctp->sctp_lastfaddr->next = faddr;
583 		sctp->sctp_lastfaddr = faddr;
584 	}
585 	sctp->sctp_nfaddrs++;
586 
587 	return (0);
588 }
589 
590 sctp_faddr_t *
591 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr)
592 {
593 	sctp_faddr_t *fp;
594 
595 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
596 		if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr))
597 			break;
598 	}
599 
600 	return (fp);
601 }
602 
603 sctp_faddr_t *
604 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr)
605 {
606 	for (; fp; fp = fp->next) {
607 		if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) {
608 			break;
609 		}
610 	}
611 
612 	return (fp);
613 }
614 
615 /*
616  * To change the currently used peer address to the specified one.
617  */
618 void
619 sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp)
620 {
621 	/* Now setup the composite header. */
622 	if (fp->isv4) {
623 		IN6_V4MAPPED_TO_IPADDR(&fp->faddr,
624 		    sctp->sctp_ipha->ipha_dst);
625 		IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src);
626 		/* update don't fragment bit */
627 		if (fp->df) {
628 			sctp->sctp_ipha->ipha_fragment_offset_and_flags =
629 			    htons(IPH_DF);
630 		} else {
631 			sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0;
632 		}
633 	} else {
634 		sctp->sctp_ip6h->ip6_dst = fp->faddr;
635 		sctp->sctp_ip6h->ip6_src = fp->saddr;
636 	}
637 
638 	sctp->sctp_current = fp;
639 	sctp->sctp_mss = fp->sfa_pmss;
640 
641 	/* Update the uppper layer for the change. */
642 	if (!SCTP_IS_DETACHED(sctp))
643 		sctp_set_ulp_prop(sctp);
644 }
645 
646 void
647 sctp_redo_faddr_srcs(sctp_t *sctp)
648 {
649 	sctp_faddr_t *fp;
650 
651 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
652 		sctp_get_ire(sctp, fp);
653 	}
654 }
655 
656 void
657 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp)
658 {
659 	int64_t now = lbolt64;
660 
661 	fp->strikes = 0;
662 	sctp->sctp_strikes = 0;
663 	fp->lastactive = now;
664 	fp->hb_expiry = now + SET_HB_INTVL(fp);
665 	fp->hb_pending = B_FALSE;
666 	if (fp->state != SCTP_FADDRS_ALIVE) {
667 		fp->state = SCTP_FADDRS_ALIVE;
668 		sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0);
669 
670 		/*
671 		 * If this is the primary, switch back to it now.  And
672 		 * we probably want to reset the source addr used to reach
673 		 * it.
674 		 */
675 		if (fp == sctp->sctp_primary) {
676 			sctp_set_faddr_current(sctp, fp);
677 			sctp_get_ire(sctp, fp);
678 			return;
679 		}
680 	}
681 	if (fp->ire == NULL) {
682 		/* Should have a full IRE now */
683 		sctp_get_ire(sctp, fp);
684 	}
685 }
686 
687 int
688 sctp_is_a_faddr_clean(sctp_t *sctp)
689 {
690 	sctp_faddr_t *fp;
691 
692 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
693 		if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) {
694 			return (1);
695 		}
696 	}
697 
698 	return (0);
699 }
700 
701 /*
702  * Returns 0 if there is at leave one other active faddr, -1 if there
703  * are none. If there are none left, faddr_dead() will start killing the
704  * association.
705  * If the downed faddr was the current faddr, a new current faddr
706  * will be chosen.
707  */
708 int
709 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate)
710 {
711 	sctp_faddr_t *ofp;
712 	sctp_stack_t *sctps = sctp->sctp_sctps;
713 
714 	if (fp->state == SCTP_FADDRS_ALIVE) {
715 		sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0);
716 	}
717 	fp->state = newstate;
718 
719 	dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n",
720 	    SCTP_PRINTADDR(fp->faddr), newstate));
721 
722 	if (fp == sctp->sctp_current) {
723 		/* Current faddr down; need to switch it */
724 		sctp->sctp_current = NULL;
725 	}
726 
727 	/* Find next alive faddr */
728 	ofp = fp;
729 	for (fp = fp->next; fp != NULL; fp = fp->next) {
730 		if (fp->state == SCTP_FADDRS_ALIVE) {
731 			break;
732 		}
733 	}
734 
735 	if (fp == NULL) {
736 		/* Continue from beginning of list */
737 		for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) {
738 			if (fp->state == SCTP_FADDRS_ALIVE) {
739 				break;
740 			}
741 		}
742 	}
743 
744 	/*
745 	 * Find a new fp, so if the current faddr is dead, use the new fp
746 	 * as the current one.
747 	 */
748 	if (fp != ofp) {
749 		if (sctp->sctp_current == NULL) {
750 			dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n",
751 			    SCTP_PRINTADDR(fp->faddr)));
752 			/*
753 			 * Note that we don't need to reset the source addr
754 			 * of the new fp.
755 			 */
756 			sctp_set_faddr_current(sctp, fp);
757 		}
758 		return (0);
759 	}
760 
761 
762 	/* All faddrs are down; kill the association */
763 	dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n"));
764 	BUMP_MIB(&sctps->sctps_mib, sctpAborted);
765 	sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ?
766 	    SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL);
767 	sctp_clean_death(sctp, sctp->sctp_client_errno ?
768 	    sctp->sctp_client_errno : ETIMEDOUT);
769 
770 	return (-1);
771 }
772 
773 sctp_faddr_t *
774 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp)
775 {
776 	sctp_faddr_t *nfp = NULL;
777 
778 	if (ofp == NULL) {
779 		ofp = sctp->sctp_current;
780 	}
781 
782 	/* Find the next live one */
783 	for (nfp = ofp->next; nfp != NULL; nfp = nfp->next) {
784 		if (nfp->state == SCTP_FADDRS_ALIVE) {
785 			break;
786 		}
787 	}
788 
789 	if (nfp == NULL) {
790 		/* Continue from beginning of list */
791 		for (nfp = sctp->sctp_faddrs; nfp != ofp; nfp = nfp->next) {
792 			if (nfp->state == SCTP_FADDRS_ALIVE) {
793 				break;
794 			}
795 		}
796 	}
797 
798 	/*
799 	 * nfp could only be NULL if all faddrs are down, and when
800 	 * this happens, faddr_dead() should have killed the
801 	 * association. Hence this assertion...
802 	 */
803 	ASSERT(nfp != NULL);
804 	return (nfp);
805 }
806 
807 void
808 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp)
809 {
810 	sctp_faddr_t *fpp;
811 
812 	if (!sctp->sctp_faddrs) {
813 		return;
814 	}
815 
816 	if (fp->timer_mp != NULL) {
817 		sctp_timer_free(fp->timer_mp);
818 		fp->timer_mp = NULL;
819 		fp->timer_running = 0;
820 	}
821 	if (fp->rc_timer_mp != NULL) {
822 		sctp_timer_free(fp->rc_timer_mp);
823 		fp->rc_timer_mp = NULL;
824 		fp->rc_timer_running = 0;
825 	}
826 	if (fp->ire != NULL) {
827 		IRE_REFRELE_NOTR(fp->ire);
828 		fp->ire = NULL;
829 	}
830 
831 	if (fp == sctp->sctp_faddrs) {
832 		goto gotit;
833 	}
834 
835 	for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next)
836 		;
837 
838 gotit:
839 	ASSERT(sctp->sctp_conn_tfp != NULL);
840 	mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
841 	if (fp == sctp->sctp_faddrs) {
842 		sctp->sctp_faddrs = fp->next;
843 	} else {
844 		fpp->next = fp->next;
845 	}
846 	mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
847 	/* XXX faddr2ire? */
848 	kmem_cache_free(sctp_kmem_faddr_cache, fp);
849 	sctp->sctp_nfaddrs--;
850 }
851 
852 void
853 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock)
854 {
855 	sctp_faddr_t *fp, *fpn;
856 
857 	if (sctp->sctp_faddrs == NULL) {
858 		ASSERT(sctp->sctp_lastfaddr == NULL);
859 		return;
860 	}
861 
862 	ASSERT(sctp->sctp_lastfaddr != NULL);
863 	sctp->sctp_lastfaddr = NULL;
864 	sctp->sctp_current = NULL;
865 	sctp->sctp_primary = NULL;
866 
867 	sctp_free_faddr_timers(sctp);
868 
869 	if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
870 		/* in conn fanout; need to hold lock */
871 		mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
872 	}
873 
874 	for (fp = sctp->sctp_faddrs; fp; fp = fpn) {
875 		fpn = fp->next;
876 		if (fp->ire != NULL)
877 			IRE_REFRELE_NOTR(fp->ire);
878 		kmem_cache_free(sctp_kmem_faddr_cache, fp);
879 		sctp->sctp_nfaddrs--;
880 	}
881 
882 	sctp->sctp_faddrs = NULL;
883 	ASSERT(sctp->sctp_nfaddrs == 0);
884 	if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
885 		mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
886 	}
887 
888 }
889 
890 void
891 sctp_zap_addrs(sctp_t *sctp)
892 {
893 	sctp_zap_faddrs(sctp, 0);
894 	sctp_free_saddrs(sctp);
895 }
896 
897 /*
898  * Initialize the IPv4 header. Loses any record of any IP options.
899  */
900 int
901 sctp_header_init_ipv4(sctp_t *sctp, int sleep)
902 {
903 	sctp_hdr_t	*sctph;
904 	sctp_stack_t	*sctps = sctp->sctp_sctps;
905 
906 	/*
907 	 * This is a simple initialization. If there's
908 	 * already a template, it should never be too small,
909 	 * so reuse it.  Otherwise, allocate space for the new one.
910 	 */
911 	if (sctp->sctp_iphc != NULL) {
912 		ASSERT(sctp->sctp_iphc_len >= SCTP_MAX_COMBINED_HEADER_LENGTH);
913 		bzero(sctp->sctp_iphc, sctp->sctp_iphc_len);
914 	} else {
915 		sctp->sctp_iphc_len = SCTP_MAX_COMBINED_HEADER_LENGTH;
916 		sctp->sctp_iphc = kmem_zalloc(sctp->sctp_iphc_len, sleep);
917 		if (sctp->sctp_iphc == NULL) {
918 			sctp->sctp_iphc_len = 0;
919 			return (ENOMEM);
920 		}
921 	}
922 
923 	sctp->sctp_ipha = (ipha_t *)sctp->sctp_iphc;
924 
925 	sctp->sctp_hdr_len = sizeof (ipha_t) + sizeof (sctp_hdr_t);
926 	sctp->sctp_ip_hdr_len = sizeof (ipha_t);
927 	sctp->sctp_ipha->ipha_length = htons(sizeof (ipha_t) +
928 	    sizeof (sctp_hdr_t));
929 	sctp->sctp_ipha->ipha_version_and_hdr_length =
930 	    (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
931 
932 	/*
933 	 * These two fields should be zero, and are already set above.
934 	 *
935 	 * sctp->sctp_ipha->ipha_ident,
936 	 * sctp->sctp_ipha->ipha_fragment_offset_and_flags.
937 	 */
938 
939 	sctp->sctp_ipha->ipha_ttl = sctps->sctps_ipv4_ttl;
940 	sctp->sctp_ipha->ipha_protocol = IPPROTO_SCTP;
941 
942 	sctph = (sctp_hdr_t *)(sctp->sctp_iphc + sizeof (ipha_t));
943 	sctp->sctp_sctph = sctph;
944 
945 	return (0);
946 }
947 
948 /*
949  * Update sctp_sticky_hdrs based on sctp_sticky_ipp.
950  * The headers include ip6i_t (if needed), ip6_t, any sticky extension
951  * headers, and the maximum size sctp header (to avoid reallocation
952  * on the fly for additional sctp options).
953  * Returns failure if can't allocate memory.
954  */
955 int
956 sctp_build_hdrs(sctp_t *sctp)
957 {
958 	char		*hdrs;
959 	uint_t		hdrs_len;
960 	ip6i_t		*ip6i;
961 	char		buf[SCTP_MAX_HDR_LENGTH];
962 	ip6_pkt_t	*ipp = &sctp->sctp_sticky_ipp;
963 	in6_addr_t	src;
964 	in6_addr_t	dst;
965 	sctp_stack_t	*sctps = sctp->sctp_sctps;
966 
967 	/*
968 	 * save the existing sctp header and source/dest IP addresses
969 	 */
970 	bcopy(sctp->sctp_sctph6, buf, sizeof (sctp_hdr_t));
971 	src = sctp->sctp_ip6h->ip6_src;
972 	dst = sctp->sctp_ip6h->ip6_dst;
973 	hdrs_len = ip_total_hdrs_len_v6(ipp) + SCTP_MAX_HDR_LENGTH;
974 	ASSERT(hdrs_len != 0);
975 	if (hdrs_len > sctp->sctp_iphc6_len) {
976 		/* Need to reallocate */
977 		hdrs = kmem_zalloc(hdrs_len, KM_NOSLEEP);
978 		if (hdrs == NULL)
979 			return (ENOMEM);
980 
981 		if (sctp->sctp_iphc6_len != 0)
982 			kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
983 		sctp->sctp_iphc6 = hdrs;
984 		sctp->sctp_iphc6_len = hdrs_len;
985 	}
986 	ip_build_hdrs_v6((uchar_t *)sctp->sctp_iphc6,
987 	    hdrs_len - SCTP_MAX_HDR_LENGTH, ipp, IPPROTO_SCTP);
988 
989 	/* Set header fields not in ipp */
990 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
991 		ip6i = (ip6i_t *)sctp->sctp_iphc6;
992 		sctp->sctp_ip6h = (ip6_t *)&ip6i[1];
993 	} else {
994 		sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6;
995 	}
996 	/*
997 	 * sctp->sctp_ip_hdr_len will include ip6i_t if there is one.
998 	 */
999 	sctp->sctp_ip_hdr6_len = hdrs_len - SCTP_MAX_HDR_LENGTH;
1000 	sctp->sctp_sctph6 = (sctp_hdr_t *)(sctp->sctp_iphc6 +
1001 	    sctp->sctp_ip_hdr6_len);
1002 	sctp->sctp_hdr6_len = sctp->sctp_ip_hdr6_len + sizeof (sctp_hdr_t);
1003 
1004 	bcopy(buf, sctp->sctp_sctph6, sizeof (sctp_hdr_t));
1005 
1006 	sctp->sctp_ip6h->ip6_src = src;
1007 	sctp->sctp_ip6h->ip6_dst = dst;
1008 	/*
1009 	 * If the hoplimit was not set by ip_build_hdrs_v6(), we need to
1010 	 * set it to the default value for SCTP.
1011 	 */
1012 	if (!(ipp->ipp_fields & IPPF_UNICAST_HOPS))
1013 		sctp->sctp_ip6h->ip6_hops = sctps->sctps_ipv6_hoplimit;
1014 	/*
1015 	 * If we're setting extension headers after a connection
1016 	 * has been established, and if we have a routing header
1017 	 * among the extension headers, call ip_massage_options_v6 to
1018 	 * manipulate the routing header/ip6_dst set the checksum
1019 	 * difference in the sctp header template.
1020 	 * (This happens in sctp_connect_ipv6 if the routing header
1021 	 * is set prior to the connect.)
1022 	 */
1023 
1024 	if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) &&
1025 	    (sctp->sctp_sticky_ipp.ipp_fields & IPPF_RTHDR)) {
1026 		ip6_rthdr_t *rth;
1027 
1028 		rth = ip_find_rthdr_v6(sctp->sctp_ip6h,
1029 		    (uint8_t *)sctp->sctp_sctph6);
1030 		if (rth != NULL) {
1031 			(void) ip_massage_options_v6(sctp->sctp_ip6h, rth,
1032 			    sctps->sctps_netstack);
1033 		}
1034 	}
1035 	return (0);
1036 }
1037 
1038 /*
1039  * Initialize the IPv6 header. Loses any record of any IPv6 extension headers.
1040  */
1041 int
1042 sctp_header_init_ipv6(sctp_t *sctp, int sleep)
1043 {
1044 	sctp_hdr_t	*sctph;
1045 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1046 
1047 	/*
1048 	 * This is a simple initialization. If there's
1049 	 * already a template, it should never be too small,
1050 	 * so reuse it. Otherwise, allocate space for the new one.
1051 	 * Ensure that there is enough space to "downgrade" the sctp_t
1052 	 * to an IPv4 sctp_t. This requires having space for a full load
1053 	 * of IPv4 options
1054 	 */
1055 	if (sctp->sctp_iphc6 != NULL) {
1056 		ASSERT(sctp->sctp_iphc6_len >=
1057 		    SCTP_MAX_COMBINED_HEADER_LENGTH);
1058 		bzero(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
1059 	} else {
1060 		sctp->sctp_iphc6_len = SCTP_MAX_COMBINED_HEADER_LENGTH;
1061 		sctp->sctp_iphc6 = kmem_zalloc(sctp->sctp_iphc_len, sleep);
1062 		if (sctp->sctp_iphc6 == NULL) {
1063 			sctp->sctp_iphc6_len = 0;
1064 			return (ENOMEM);
1065 		}
1066 	}
1067 	sctp->sctp_hdr6_len = IPV6_HDR_LEN + sizeof (sctp_hdr_t);
1068 	sctp->sctp_ip_hdr6_len = IPV6_HDR_LEN;
1069 	sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6;
1070 
1071 	/* Initialize the header template */
1072 
1073 	sctp->sctp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1074 	sctp->sctp_ip6h->ip6_plen = ntohs(sizeof (sctp_hdr_t));
1075 	sctp->sctp_ip6h->ip6_nxt = IPPROTO_SCTP;
1076 	sctp->sctp_ip6h->ip6_hops = sctps->sctps_ipv6_hoplimit;
1077 
1078 	sctph = (sctp_hdr_t *)(sctp->sctp_iphc6 + IPV6_HDR_LEN);
1079 	sctp->sctp_sctph6 = sctph;
1080 
1081 	return (0);
1082 }
1083 
1084 static int
1085 sctp_v4_label(sctp_t *sctp)
1086 {
1087 	uchar_t optbuf[IP_MAX_OPT_LENGTH];
1088 	const cred_t *cr = CONN_CRED(sctp->sctp_connp);
1089 	int added;
1090 
1091 	if (tsol_compute_label(cr, sctp->sctp_ipha->ipha_dst, optbuf,
1092 	    sctp->sctp_mac_exempt,
1093 	    sctp->sctp_sctps->sctps_netstack->netstack_ip) != 0)
1094 		return (EACCES);
1095 
1096 	added = tsol_remove_secopt(sctp->sctp_ipha, sctp->sctp_hdr_len);
1097 	if (added == -1)
1098 		return (EACCES);
1099 	sctp->sctp_hdr_len += added;
1100 	sctp->sctp_sctph = (sctp_hdr_t *)((uchar_t *)sctp->sctp_sctph + added);
1101 	sctp->sctp_ip_hdr_len += added;
1102 	if ((sctp->sctp_v4label_len = optbuf[IPOPT_OLEN]) != 0) {
1103 		sctp->sctp_v4label_len = (sctp->sctp_v4label_len + 3) & ~3;
1104 		added = tsol_prepend_option(optbuf, sctp->sctp_ipha,
1105 		    sctp->sctp_hdr_len);
1106 		if (added == -1)
1107 			return (EACCES);
1108 		sctp->sctp_hdr_len += added;
1109 		sctp->sctp_sctph = (sctp_hdr_t *)((uchar_t *)sctp->sctp_sctph +
1110 		    added);
1111 		sctp->sctp_ip_hdr_len += added;
1112 	}
1113 	return (0);
1114 }
1115 
1116 static int
1117 sctp_v6_label(sctp_t *sctp)
1118 {
1119 	uchar_t optbuf[TSOL_MAX_IPV6_OPTION];
1120 	const cred_t *cr = CONN_CRED(sctp->sctp_connp);
1121 
1122 	if (tsol_compute_label_v6(cr, &sctp->sctp_ip6h->ip6_dst, optbuf,
1123 	    sctp->sctp_mac_exempt,
1124 	    sctp->sctp_sctps->sctps_netstack->netstack_ip) != 0)
1125 		return (EACCES);
1126 	if (tsol_update_sticky(&sctp->sctp_sticky_ipp, &sctp->sctp_v6label_len,
1127 	    optbuf) != 0)
1128 		return (EACCES);
1129 	if (sctp_build_hdrs(sctp) != 0)
1130 		return (EACCES);
1131 	return (0);
1132 }
1133 
1134 /*
1135  * XXX implement more sophisticated logic
1136  */
1137 int
1138 sctp_set_hdraddrs(sctp_t *sctp)
1139 {
1140 	sctp_faddr_t *fp;
1141 	int gotv4 = 0;
1142 	int gotv6 = 0;
1143 
1144 	ASSERT(sctp->sctp_faddrs != NULL);
1145 	ASSERT(sctp->sctp_nsaddrs > 0);
1146 
1147 	/* Set up using the primary first */
1148 	if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) {
1149 		IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->faddr,
1150 		    sctp->sctp_ipha->ipha_dst);
1151 		/* saddr may be unspec; make_mp() will handle this */
1152 		IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->saddr,
1153 		    sctp->sctp_ipha->ipha_src);
1154 		if (!is_system_labeled() || sctp_v4_label(sctp) == 0) {
1155 			gotv4 = 1;
1156 			if (sctp->sctp_ipversion == IPV4_VERSION) {
1157 				goto copyports;
1158 			}
1159 		}
1160 	} else {
1161 		sctp->sctp_ip6h->ip6_dst = sctp->sctp_primary->faddr;
1162 		/* saddr may be unspec; make_mp() will handle this */
1163 		sctp->sctp_ip6h->ip6_src = sctp->sctp_primary->saddr;
1164 		if (!is_system_labeled() || sctp_v6_label(sctp) == 0)
1165 			gotv6 = 1;
1166 	}
1167 
1168 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
1169 		if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) {
1170 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr,
1171 			    sctp->sctp_ipha->ipha_dst);
1172 			/* copy in the faddr_t's saddr */
1173 			IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
1174 			    sctp->sctp_ipha->ipha_src);
1175 			if (!is_system_labeled() || sctp_v4_label(sctp) == 0) {
1176 				gotv4 = 1;
1177 				if (sctp->sctp_ipversion == IPV4_VERSION ||
1178 				    gotv6) {
1179 					break;
1180 				}
1181 			}
1182 		} else if (!gotv6 && !IN6_IS_ADDR_V4MAPPED(&fp->faddr)) {
1183 			sctp->sctp_ip6h->ip6_dst = fp->faddr;
1184 			/* copy in the faddr_t's saddr */
1185 			sctp->sctp_ip6h->ip6_src = fp->saddr;
1186 			if (!is_system_labeled() || sctp_v6_label(sctp) == 0) {
1187 				gotv6 = 1;
1188 				if (gotv4)
1189 					break;
1190 			}
1191 		}
1192 	}
1193 
1194 copyports:
1195 	if (!gotv4 && !gotv6)
1196 		return (EACCES);
1197 
1198 	/* copy in the ports for good measure */
1199 	sctp->sctp_sctph->sh_sport = sctp->sctp_lport;
1200 	sctp->sctp_sctph->sh_dport = sctp->sctp_fport;
1201 
1202 	sctp->sctp_sctph6->sh_sport = sctp->sctp_lport;
1203 	sctp->sctp_sctph6->sh_dport = sctp->sctp_fport;
1204 	return (0);
1205 }
1206 
1207 void
1208 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp)
1209 {
1210 	mblk_t *mp;
1211 	sctp_parm_hdr_t *ph;
1212 	size_t len;
1213 	int pad;
1214 
1215 	len = sizeof (*ph) + ntohs(uph->sph_len);
1216 	if ((pad = len % 4) != 0) {
1217 		pad = 4 - pad;
1218 		len += pad;
1219 	}
1220 	mp = allocb(len, BPRI_MED);
1221 	if (mp == NULL) {
1222 		return;
1223 	}
1224 
1225 	ph = (sctp_parm_hdr_t *)(mp->b_rptr);
1226 	ph->sph_type = htons(PARM_UNRECOGNIZED);
1227 	ph->sph_len = htons(len - pad);
1228 
1229 	/* copy in the unrecognized parameter */
1230 	bcopy(uph, ph + 1, ntohs(uph->sph_len));
1231 
1232 	mp->b_wptr = mp->b_rptr + len;
1233 	if (*errmp != NULL) {
1234 		linkb(*errmp, mp);
1235 	} else {
1236 		*errmp = mp;
1237 	}
1238 }
1239 
1240 /*
1241  * o Bounds checking
1242  * o Updates remaining
1243  * o Checks alignment
1244  */
1245 sctp_parm_hdr_t *
1246 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining)
1247 {
1248 	int pad;
1249 	uint16_t len;
1250 
1251 	len = ntohs(current->sph_len);
1252 	*remaining -= len;
1253 	if (*remaining < sizeof (*current) || len < sizeof (*current)) {
1254 		return (NULL);
1255 	}
1256 	if ((pad = len & (SCTP_ALIGN - 1)) != 0) {
1257 		pad = SCTP_ALIGN - pad;
1258 		*remaining -= pad;
1259 	}
1260 	/*LINTED pointer cast may result in improper alignment*/
1261 	current = (sctp_parm_hdr_t *)((char *)current + len + pad);
1262 	return (current);
1263 }
1264 
1265 /*
1266  * Sets the address parameters given in the INIT chunk into sctp's
1267  * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are
1268  * no address parameters in the INIT chunk, a single faddr is created
1269  * from the ip hdr at the beginning of pkt.
1270  * If there already are existing addresses hanging from sctp, merge
1271  * them in, if the old info contains addresses which are not present
1272  * in this new info, get rid of them, and clean the pointers if there's
1273  * messages which have this as their target address.
1274  *
1275  * We also re-adjust the source address list here since the list may
1276  * contain more than what is actually part of the association. If
1277  * we get here from sctp_send_cookie_echo(), we are on the active
1278  * side and psctp will be NULL and ich will be the INIT-ACK chunk.
1279  * If we get here from sctp_accept_comm(), ich will be the INIT chunk
1280  * and psctp will the listening endpoint.
1281  *
1282  * INIT processing: When processing the INIT we inherit the src address
1283  * list from the listener. For a loopback or linklocal association, we
1284  * delete the list and just take the address from the IP header (since
1285  * that's how we created the INIT-ACK). Additionally, for loopback we
1286  * ignore the address params in the INIT. For determining which address
1287  * types were sent in the INIT-ACK we follow the same logic as in
1288  * creating the INIT-ACK. We delete addresses of the type that are not
1289  * supported by the peer.
1290  *
1291  * INIT-ACK processing: When processing the INIT-ACK since we had not
1292  * included addr params for loopback or linklocal addresses when creating
1293  * the INIT, we just use the address from the IP header. Further, for
1294  * loopback we ignore the addr param list. We mark addresses of the
1295  * type not supported by the peer as unconfirmed.
1296  *
1297  * In case of INIT processing we look for supported address types in the
1298  * supported address param, if present. In both cases the address type in
1299  * the IP header is supported as well as types for addresses in the param
1300  * list, if any.
1301  *
1302  * Once we have the supported address types sctp_check_saddr() runs through
1303  * the source address list and deletes or marks as unconfirmed address of
1304  * types not supported by the peer.
1305  *
1306  * Returns 0 on success, sys errno on failure
1307  */
1308 int
1309 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt,
1310     sctp_chunk_hdr_t *ich, uint_t *sctp_options)
1311 {
1312 	sctp_init_chunk_t	*init;
1313 	ipha_t			*iph;
1314 	ip6_t			*ip6h;
1315 	in6_addr_t		hdrsaddr[1];
1316 	in6_addr_t		hdrdaddr[1];
1317 	sctp_parm_hdr_t		*ph;
1318 	ssize_t			remaining;
1319 	int			isv4;
1320 	int			err;
1321 	sctp_faddr_t		*fp;
1322 	int			supp_af = 0;
1323 	boolean_t		check_saddr = B_TRUE;
1324 	in6_addr_t		curaddr;
1325 	sctp_stack_t		*sctps = sctp->sctp_sctps;
1326 
1327 	if (sctp_options != NULL)
1328 		*sctp_options = 0;
1329 
1330 	/* extract the address from the IP header */
1331 	isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
1332 	if (isv4) {
1333 		iph = (ipha_t *)pkt->b_rptr;
1334 		IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr);
1335 		IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr);
1336 		supp_af |= PARM_SUPP_V4;
1337 	} else {
1338 		ip6h = (ip6_t *)pkt->b_rptr;
1339 		hdrsaddr[0] = ip6h->ip6_src;
1340 		hdrdaddr[0] = ip6h->ip6_dst;
1341 		supp_af |= PARM_SUPP_V6;
1342 	}
1343 
1344 	/*
1345 	 * Unfortunately, we can't delay this because adding an faddr
1346 	 * looks for the presence of the source address (from the ire
1347 	 * for the faddr) in the source address list. We could have
1348 	 * delayed this if, say, this was a loopback/linklocal connection.
1349 	 * Now, we just end up nuking this list and taking the addr from
1350 	 * the IP header for loopback/linklocal.
1351 	 */
1352 	if (psctp != NULL && psctp->sctp_nsaddrs > 0) {
1353 		ASSERT(sctp->sctp_nsaddrs == 0);
1354 
1355 		err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP);
1356 		if (err != 0)
1357 			return (err);
1358 	}
1359 	/*
1360 	 * We will add the faddr before parsing the address list as this
1361 	 * might be a loopback connection and we would not have to
1362 	 * go through the list.
1363 	 *
1364 	 * Make sure the header's addr is in the list
1365 	 */
1366 	fp = sctp_lookup_faddr(sctp, hdrsaddr);
1367 	if (fp == NULL) {
1368 		/* not included; add it now */
1369 		err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE);
1370 		if (err != 0)
1371 			return (err);
1372 
1373 		/* sctp_faddrs will be the hdr addr */
1374 		fp = sctp->sctp_faddrs;
1375 	}
1376 	/* make the header addr the primary */
1377 
1378 	if (cl_sctp_assoc_change != NULL && psctp == NULL)
1379 		curaddr = sctp->sctp_current->faddr;
1380 
1381 	sctp->sctp_primary = fp;
1382 	sctp->sctp_current = fp;
1383 	sctp->sctp_mss = fp->sfa_pmss;
1384 
1385 	/* For loopback connections & linklocal get address from the header */
1386 	if (sctp->sctp_loopback || sctp->sctp_linklocal) {
1387 		if (sctp->sctp_nsaddrs != 0)
1388 			sctp_free_saddrs(sctp);
1389 		if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0)
1390 			return (err);
1391 		/* For loopback ignore address list */
1392 		if (sctp->sctp_loopback)
1393 			return (0);
1394 		check_saddr = B_FALSE;
1395 	}
1396 
1397 	/* Walk the params in the INIT [ACK], pulling out addr params */
1398 	remaining = ntohs(ich->sch_len) - sizeof (*ich) -
1399 	    sizeof (sctp_init_chunk_t);
1400 	if (remaining < sizeof (*ph)) {
1401 		if (check_saddr) {
1402 			sctp_check_saddr(sctp, supp_af, psctp == NULL ?
1403 			    B_FALSE : B_TRUE);
1404 		}
1405 		ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL);
1406 		return (0);
1407 	}
1408 
1409 	init = (sctp_init_chunk_t *)(ich + 1);
1410 	ph = (sctp_parm_hdr_t *)(init + 1);
1411 
1412 	/* params will have already been byteordered when validating */
1413 	while (ph != NULL) {
1414 		if (ph->sph_type == htons(PARM_SUPP_ADDRS)) {
1415 			int		plen;
1416 			uint16_t	*p;
1417 			uint16_t	addrtype;
1418 
1419 			ASSERT(psctp != NULL);
1420 			plen = ntohs(ph->sph_len);
1421 			p = (uint16_t *)(ph + 1);
1422 			while (plen > 0) {
1423 				addrtype = ntohs(*p);
1424 				switch (addrtype) {
1425 					case PARM_ADDR6:
1426 						supp_af |= PARM_SUPP_V6;
1427 						break;
1428 					case PARM_ADDR4:
1429 						supp_af |= PARM_SUPP_V4;
1430 						break;
1431 					default:
1432 						break;
1433 				}
1434 				p++;
1435 				plen -= sizeof (*p);
1436 			}
1437 		} else if (ph->sph_type == htons(PARM_ADDR4)) {
1438 			if (remaining >= PARM_ADDR4_LEN) {
1439 				in6_addr_t addr;
1440 				ipaddr_t ta;
1441 
1442 				supp_af |= PARM_SUPP_V4;
1443 				/*
1444 				 * Screen out broad/multicasts & loopback.
1445 				 * If the endpoint only accepts v6 address,
1446 				 * go to the next one.
1447 				 */
1448 				bcopy(ph + 1, &ta, sizeof (ta));
1449 				if (ta == 0 ||
1450 				    ta == INADDR_BROADCAST ||
1451 				    ta == htonl(INADDR_LOOPBACK) ||
1452 				    IN_MULTICAST(ta) ||
1453 				    sctp->sctp_connp->conn_ipv6_v6only) {
1454 					goto next;
1455 				}
1456 				/*
1457 				 * XXX also need to check for subnet
1458 				 * broadcasts. This should probably
1459 				 * wait until we have full access
1460 				 * to the ILL tables.
1461 				 */
1462 
1463 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
1464 				    (ph + 1), &addr);
1465 				/* Check for duplicate. */
1466 				if (sctp_lookup_faddr(sctp, &addr) != NULL)
1467 					goto next;
1468 
1469 				/* OK, add it to the faddr set */
1470 				err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP,
1471 				    B_FALSE);
1472 				if (err != 0)
1473 					return (err);
1474 			}
1475 		} else if (ph->sph_type == htons(PARM_ADDR6) &&
1476 		    sctp->sctp_family == AF_INET6) {
1477 			/* An v4 socket should not take v6 addresses. */
1478 			if (remaining >= PARM_ADDR6_LEN) {
1479 				in6_addr_t *addr6;
1480 
1481 				supp_af |= PARM_SUPP_V6;
1482 				addr6 = (in6_addr_t *)(ph + 1);
1483 				/*
1484 				 * Screen out link locals, mcast, loopback
1485 				 * and bogus v6 address.
1486 				 */
1487 				if (IN6_IS_ADDR_LINKLOCAL(addr6) ||
1488 				    IN6_IS_ADDR_MULTICAST(addr6) ||
1489 				    IN6_IS_ADDR_LOOPBACK(addr6) ||
1490 				    IN6_IS_ADDR_V4MAPPED(addr6)) {
1491 					goto next;
1492 				}
1493 				/* Check for duplicate. */
1494 				if (sctp_lookup_faddr(sctp, addr6) != NULL)
1495 					goto next;
1496 
1497 				err = sctp_add_faddr(sctp,
1498 				    (in6_addr_t *)(ph + 1), KM_NOSLEEP,
1499 				    B_FALSE);
1500 				if (err != 0)
1501 					return (err);
1502 			}
1503 		} else if (ph->sph_type == htons(PARM_FORWARD_TSN)) {
1504 			if (sctp_options != NULL)
1505 				*sctp_options |= SCTP_PRSCTP_OPTION;
1506 		} /* else; skip */
1507 
1508 next:
1509 		ph = sctp_next_parm(ph, &remaining);
1510 	}
1511 	if (check_saddr) {
1512 		sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE :
1513 		    B_TRUE);
1514 	}
1515 	ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL);
1516 	/*
1517 	 * We have the right address list now, update clustering's
1518 	 * knowledge because when we sent the INIT we had just added
1519 	 * the address the INIT was sent to.
1520 	 */
1521 	if (psctp == NULL && cl_sctp_assoc_change != NULL) {
1522 		uchar_t	*alist;
1523 		size_t	asize;
1524 		uchar_t	*dlist;
1525 		size_t	dsize;
1526 
1527 		asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
1528 		alist = kmem_alloc(asize, KM_NOSLEEP);
1529 		if (alist == NULL) {
1530 			SCTP_KSTAT(sctps, sctp_cl_assoc_change);
1531 			return (ENOMEM);
1532 		}
1533 		/*
1534 		 * Just include the address the INIT was sent to in the
1535 		 * delete list and send the entire faddr list. We could
1536 		 * do it differently (i.e include all the addresses in the
1537 		 * add list even if it contains the original address OR
1538 		 * remove the original address from the add list etc.), but
1539 		 * this seems reasonable enough.
1540 		 */
1541 		dsize = sizeof (in6_addr_t);
1542 		dlist = kmem_alloc(dsize, KM_NOSLEEP);
1543 		if (dlist == NULL) {
1544 			kmem_free(alist, asize);
1545 			SCTP_KSTAT(sctps, sctp_cl_assoc_change);
1546 			return (ENOMEM);
1547 		}
1548 		bcopy(&curaddr, dlist, sizeof (curaddr));
1549 		sctp_get_faddr_list(sctp, alist, asize);
1550 		(*cl_sctp_assoc_change)(sctp->sctp_family, alist, asize,
1551 		    sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR,
1552 		    (cl_sctp_handle_t)sctp);
1553 		/* alist and dlist will be freed by the clustering module */
1554 	}
1555 	return (0);
1556 }
1557 
1558 /*
1559  * Returns 0 if the check failed and the restart should be refused,
1560  * 1 if the check succeeded.
1561  */
1562 int
1563 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports,
1564     int sleep, sctp_stack_t *sctps)
1565 {
1566 	sctp_faddr_t *fp, *fpa, *fphead = NULL;
1567 	sctp_parm_hdr_t *ph;
1568 	ssize_t remaining;
1569 	int isv4;
1570 	ipha_t *iph;
1571 	ip6_t *ip6h;
1572 	in6_addr_t hdraddr[1];
1573 	int retval = 0;
1574 	sctp_tf_t *tf;
1575 	sctp_t *sctp;
1576 	int compres;
1577 	sctp_init_chunk_t *init;
1578 	int nadded = 0;
1579 
1580 	/* extract the address from the IP header */
1581 	isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
1582 	if (isv4) {
1583 		iph = (ipha_t *)pkt->b_rptr;
1584 		IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr);
1585 	} else {
1586 		ip6h = (ip6_t *)pkt->b_rptr;
1587 		hdraddr[0] = ip6h->ip6_src;
1588 	}
1589 
1590 	/* Walk the params in the INIT [ACK], pulling out addr params */
1591 	remaining = ntohs(ich->sch_len) - sizeof (*ich) -
1592 	    sizeof (sctp_init_chunk_t);
1593 	if (remaining < sizeof (*ph)) {
1594 		/* no parameters; restart OK */
1595 		return (1);
1596 	}
1597 	init = (sctp_init_chunk_t *)(ich + 1);
1598 	ph = (sctp_parm_hdr_t *)(init + 1);
1599 
1600 	while (ph != NULL) {
1601 		/* params will have already been byteordered when validating */
1602 		if (ph->sph_type == htons(PARM_ADDR4)) {
1603 			if (remaining >= PARM_ADDR4_LEN) {
1604 				in6_addr_t addr;
1605 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
1606 				    (ph + 1), &addr);
1607 				fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
1608 				    sleep);
1609 				if (!fpa) {
1610 					goto done;
1611 				}
1612 				bzero(fpa, sizeof (*fpa));
1613 				fpa->faddr = addr;
1614 				fpa->next = NULL;
1615 			}
1616 		} else if (ph->sph_type == htons(PARM_ADDR6)) {
1617 			if (remaining >= PARM_ADDR6_LEN) {
1618 				fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
1619 				    sleep);
1620 				if (!fpa) {
1621 					goto done;
1622 				}
1623 				bzero(fpa, sizeof (*fpa));
1624 				bcopy(ph + 1, &fpa->faddr,
1625 				    sizeof (fpa->faddr));
1626 				fpa->next = NULL;
1627 			}
1628 		} else {
1629 			/* else not addr param; skip */
1630 			fpa = NULL;
1631 		}
1632 		/* link in the new addr, if it was an addr param */
1633 		if (fpa) {
1634 			if (!fphead) {
1635 				fphead = fpa;
1636 				fp = fphead;
1637 			} else {
1638 				fp->next = fpa;
1639 				fp = fpa;
1640 			}
1641 		}
1642 
1643 		ph = sctp_next_parm(ph, &remaining);
1644 	}
1645 
1646 	if (fphead == NULL) {
1647 		/* no addr parameters; restart OK */
1648 		return (1);
1649 	}
1650 
1651 	/*
1652 	 * got at least one; make sure the header's addr is
1653 	 * in the list
1654 	 */
1655 	fp = sctp_lookup_faddr_nosctp(fphead, hdraddr);
1656 	if (!fp) {
1657 		/* not included; add it now */
1658 		fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep);
1659 		if (!fp) {
1660 			goto done;
1661 		}
1662 		bzero(fp, sizeof (*fp));
1663 		fp->faddr = *hdraddr;
1664 		fp->next = fphead;
1665 		fphead = fp;
1666 	}
1667 
1668 	/*
1669 	 * Now, we can finally do the check: For each sctp instance
1670 	 * on the hash line for ports, compare its faddr set against
1671 	 * the new one. If the new one is a strict subset of any
1672 	 * existing sctp's faddrs, the restart is OK. However, if there
1673 	 * is an overlap, this could be an attack, so return failure.
1674 	 * If all sctp's faddrs are disjoint, this is a legitimate new
1675 	 * association.
1676 	 */
1677 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
1678 	mutex_enter(&tf->tf_lock);
1679 
1680 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
1681 		if (ports != sctp->sctp_ports) {
1682 			continue;
1683 		}
1684 		compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs);
1685 		if (compres <= SCTP_ADDR_SUBSET) {
1686 			retval = 1;
1687 			mutex_exit(&tf->tf_lock);
1688 			goto done;
1689 		}
1690 		if (compres == SCTP_ADDR_OVERLAP) {
1691 			dprint(1,
1692 			    ("new assoc from %x:%x:%x:%x overlaps with %p\n",
1693 			    SCTP_PRINTADDR(*hdraddr), (void *)sctp));
1694 			/*
1695 			 * While we still hold the lock, we need to
1696 			 * figure out which addresses have been
1697 			 * added so we can include them in the abort
1698 			 * we will send back. Since these faddrs will
1699 			 * never be used, we overload the rto field
1700 			 * here, setting it to 0 if the address was
1701 			 * not added, 1 if it was added.
1702 			 */
1703 			for (fp = fphead; fp; fp = fp->next) {
1704 				if (sctp_lookup_faddr(sctp, &fp->faddr)) {
1705 					fp->rto = 0;
1706 				} else {
1707 					fp->rto = 1;
1708 					nadded++;
1709 				}
1710 			}
1711 			mutex_exit(&tf->tf_lock);
1712 			goto done;
1713 		}
1714 	}
1715 	mutex_exit(&tf->tf_lock);
1716 
1717 	/* All faddrs are disjoint; legit new association */
1718 	retval = 1;
1719 
1720 done:
1721 	/* If are attempted adds, send back an abort listing the addrs */
1722 	if (nadded > 0) {
1723 		void *dtail;
1724 		size_t dlen;
1725 
1726 		dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP);
1727 		if (dtail == NULL) {
1728 			goto cleanup;
1729 		}
1730 
1731 		ph = dtail;
1732 		dlen = 0;
1733 		for (fp = fphead; fp; fp = fp->next) {
1734 			if (fp->rto == 0) {
1735 				continue;
1736 			}
1737 			if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) {
1738 				ipaddr_t addr4;
1739 
1740 				ph->sph_type = htons(PARM_ADDR4);
1741 				ph->sph_len = htons(PARM_ADDR4_LEN);
1742 				IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4);
1743 				ph++;
1744 				bcopy(&addr4, ph, sizeof (addr4));
1745 				ph = (sctp_parm_hdr_t *)
1746 				    ((char *)ph + sizeof (addr4));
1747 				dlen += PARM_ADDR4_LEN;
1748 			} else {
1749 				ph->sph_type = htons(PARM_ADDR6);
1750 				ph->sph_len = htons(PARM_ADDR6_LEN);
1751 				ph++;
1752 				bcopy(&fp->faddr, ph, sizeof (fp->faddr));
1753 				ph = (sctp_parm_hdr_t *)
1754 				    ((char *)ph + sizeof (fp->faddr));
1755 				dlen += PARM_ADDR6_LEN;
1756 			}
1757 		}
1758 
1759 		/* Send off the abort */
1760 		sctp_send_abort(sctp, sctp_init2vtag(ich),
1761 		    SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE);
1762 
1763 		kmem_free(dtail, PARM_ADDR6_LEN * nadded);
1764 	}
1765 
1766 cleanup:
1767 	/* Clean up */
1768 	if (fphead) {
1769 		sctp_faddr_t *fpn;
1770 		for (fp = fphead; fp; fp = fpn) {
1771 			fpn = fp->next;
1772 			kmem_cache_free(sctp_kmem_faddr_cache, fp);
1773 		}
1774 	}
1775 
1776 	return (retval);
1777 }
1778 
1779 /*
1780  * Reset any state related to transmitted chunks.
1781  */
1782 void
1783 sctp_congest_reset(sctp_t *sctp)
1784 {
1785 	sctp_faddr_t	*fp;
1786 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1787 	mblk_t		*mp;
1788 
1789 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
1790 		fp->ssthresh = sctps->sctps_initial_mtu;
1791 		SET_CWND(fp, fp->sfa_pmss, sctps->sctps_slow_start_initial);
1792 		fp->suna = 0;
1793 		fp->pba = 0;
1794 	}
1795 	/*
1796 	 * Clean up the transmit list as well since we have reset accounting
1797 	 * on all the fps. Send event upstream, if required.
1798 	 */
1799 	while ((mp = sctp->sctp_xmit_head) != NULL) {
1800 		sctp->sctp_xmit_head = mp->b_next;
1801 		mp->b_next = NULL;
1802 		if (sctp->sctp_xmit_head != NULL)
1803 			sctp->sctp_xmit_head->b_prev = NULL;
1804 		sctp_sendfail_event(sctp, mp, 0, B_TRUE);
1805 	}
1806 	sctp->sctp_xmit_head = NULL;
1807 	sctp->sctp_xmit_tail = NULL;
1808 	sctp->sctp_xmit_unacked = NULL;
1809 
1810 	sctp->sctp_unacked = 0;
1811 	/*
1812 	 * Any control message as well. We will clean-up this list as well.
1813 	 * This contains any pending ASCONF request that we have queued/sent.
1814 	 * If we do get an ACK we will just drop it. However, given that
1815 	 * we are restarting chances are we aren't going to get any.
1816 	 */
1817 	if (sctp->sctp_cxmit_list != NULL)
1818 		sctp_asconf_free_cxmit(sctp, NULL);
1819 	sctp->sctp_cxmit_list = NULL;
1820 	sctp->sctp_cchunk_pend = 0;
1821 
1822 	sctp->sctp_rexmitting = B_FALSE;
1823 	sctp->sctp_rxt_nxttsn = 0;
1824 	sctp->sctp_rxt_maxtsn = 0;
1825 
1826 	sctp->sctp_zero_win_probe = B_FALSE;
1827 }
1828 
1829 static void
1830 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr,
1831     mblk_t *timer_mp)
1832 {
1833 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1834 
1835 	bcopy(addr, &fp->faddr, sizeof (*addr));
1836 	if (IN6_IS_ADDR_V4MAPPED(addr)) {
1837 		fp->isv4 = 1;
1838 		/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
1839 		fp->sfa_pmss =
1840 		    (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) &
1841 		    ~(SCTP_ALIGN - 1);
1842 	} else {
1843 		fp->isv4 = 0;
1844 		fp->sfa_pmss =
1845 		    (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) &
1846 		    ~(SCTP_ALIGN - 1);
1847 	}
1848 	fp->cwnd = sctps->sctps_slow_start_initial * fp->sfa_pmss;
1849 	fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max);
1850 	fp->srtt = -1;
1851 	fp->rtt_updates = 0;
1852 	fp->strikes = 0;
1853 	fp->max_retr = sctp->sctp_pp_max_rxt;
1854 	/* Mark it as not confirmed. */
1855 	fp->state = SCTP_FADDRS_UNCONFIRMED;
1856 	fp->hb_interval = sctp->sctp_hb_interval;
1857 	fp->ssthresh = sctps->sctps_initial_ssthresh;
1858 	fp->suna = 0;
1859 	fp->pba = 0;
1860 	fp->acked = 0;
1861 	fp->lastactive = lbolt64;
1862 	fp->timer_mp = timer_mp;
1863 	fp->hb_pending = B_FALSE;
1864 	fp->timer_running = 0;
1865 	fp->df = 1;
1866 	fp->pmtu_discovered = 0;
1867 	fp->rc_timer_mp = NULL;
1868 	fp->rc_timer_running = 0;
1869 	fp->next = NULL;
1870 	fp->ire = NULL;
1871 	fp->T3expire = 0;
1872 	(void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret,
1873 	    sizeof (fp->hb_secret));
1874 	fp->hb_expiry = lbolt64;
1875 	fp->rxt_unacked = 0;
1876 
1877 	sctp_get_ire(sctp, fp);
1878 }
1879 
1880 /*ARGSUSED*/
1881 static void
1882 faddr_destructor(void *buf, void *cdrarg)
1883 {
1884 	sctp_faddr_t *fp = buf;
1885 
1886 	ASSERT(fp->timer_mp == NULL);
1887 	ASSERT(fp->timer_running == 0);
1888 
1889 	ASSERT(fp->rc_timer_mp == NULL);
1890 	ASSERT(fp->rc_timer_running == 0);
1891 }
1892 
1893 void
1894 sctp_faddr_init(void)
1895 {
1896 	sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache",
1897 	    sizeof (sctp_faddr_t), 0, NULL, faddr_destructor,
1898 	    NULL, NULL, NULL, 0);
1899 }
1900 
1901 void
1902 sctp_faddr_fini(void)
1903 {
1904 	kmem_cache_destroy(sctp_kmem_faddr_cache);
1905 }
1906