xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_common.c (revision bdfc6d18da790deeec2e0eb09c625902defe2498)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/kmem.h>
35 #include <sys/socket.h>
36 #include <sys/random.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/ip6.h>
40 #include <netinet/sctp.h>
41 
42 #include <inet/common.h>
43 #include <inet/ip.h>
44 #include <inet/ip6.h>
45 #include <inet/ip_ire.h>
46 #include <inet/mi.h>
47 #include <inet/mib2.h>
48 #include <inet/nd.h>
49 #include <inet/optcom.h>
50 #include <inet/sctp_ip.h>
51 #include <inet/ipclassifier.h>
52 #include "sctp_impl.h"
53 #include "sctp_addr.h"
54 
55 static struct kmem_cache *sctp_kmem_faddr_cache;
56 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *);
57 
58 /* Set the source address.  Refer to comments in sctp_ire2faddr(). */
59 static void
60 set_saddr(sctp_t *sctp, sctp_faddr_t *fp, boolean_t v6)
61 {
62 	if (sctp->sctp_bound_to_all) {
63 		V6_SET_ZERO(fp->saddr);
64 	} else {
65 		fp->saddr = sctp_get_valid_addr(sctp, v6);
66 		if (!v6 && IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) ||
67 		    v6 && IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
68 			fp->state = SCTP_FADDRS_UNREACH;
69 			/* Disable heartbeat. */
70 			fp->hb_expiry = 0;
71 			fp->hb_pending = B_FALSE;
72 			fp->strikes = 0;
73 		}
74 	}
75 }
76 
77 /*
78  * Call this function to update the cached IRE of a peer addr fp.
79  */
80 void
81 sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp)
82 {
83 	ire_t *ire;
84 	ipaddr_t addr4;
85 	in6_addr_t laddr;
86 	sctp_saddr_ipif_t *sp;
87 	uint_t	ipif_seqid;
88 	int hdrlen;
89 
90 	/* Remove the previous cache IRE */
91 	if ((ire = fp->ire) != NULL) {
92 		IRE_REFRELE_NOTR(ire);
93 		fp->ire = NULL;
94 	}
95 
96 	/*
97 	 * If this addr is not reachable, mark it as unconfirmed for now, the
98 	 * state will be changed back to unreachable later in this function
99 	 * if it is still the case.
100 	 */
101 	if (fp->state == SCTP_FADDRS_UNREACH) {
102 		fp->state = SCTP_FADDRS_UNCONFIRMED;
103 	}
104 
105 	if (fp->isv4) {
106 		IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4);
107 
108 		ire = ire_cache_lookup(addr4, sctp->sctp_zoneid);
109 		if (ire == NULL) {
110 			dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n",
111 			    SCTP_PRINTADDR(fp->faddr)));
112 			/*
113 			 * It is tempting to just leave the src addr
114 			 * unspecified and let IP figure it out, but we
115 			 * *cannot* do this, since IP may choose a src addr
116 			 * that is not part of this association... unless
117 			 * this sctp has bound to all addrs.  So if the ire
118 			 * lookup fails, try to find one in our src addr
119 			 * list, unless the sctp has bound to all addrs, in
120 			 * which case we change the src addr to unspec.
121 			 *
122 			 * Note that if this is a v6 endpoint but it does
123 			 * not have any v4 address at this point (e.g. may
124 			 * have been  deleted), sctp_get_valid_addr() will
125 			 * return mapped INADDR_ANY.  In this case, this
126 			 * address should be marked not reachable so that
127 			 * it won't be used to send data.
128 			 */
129 			set_saddr(sctp, fp, B_FALSE);
130 			goto set_current;
131 		}
132 		ipif_seqid = ire->ire_ipif->ipif_seqid;
133 		dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ",
134 			SCTP_PRINTADDR(fp->faddr)));
135 		dprint(2, ("src = %x\n", ire->ire_src_addr));
136 		IN6_IPADDR_TO_V4MAPPED(ire->ire_src_addr, &laddr);
137 
138 		/* make sure the laddr is part of this association */
139 		if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) !=
140 		    NULL && !sp->saddr_ipif_dontsrc) {
141 			fp->saddr = laddr;
142 		} else {
143 			ip2dbg(("ire2faddr: src addr is not part of assc\n"));
144 			set_saddr(sctp, fp, B_FALSE);
145 		}
146 	} else {
147 		ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid);
148 		if (ire == NULL) {
149 			dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n",
150 			    SCTP_PRINTADDR(fp->faddr)));
151 			set_saddr(sctp, fp, B_TRUE);
152 			goto set_current;
153 		}
154 		ipif_seqid = ire->ire_ipif->ipif_seqid;
155 		dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ",
156 		    SCTP_PRINTADDR(fp->faddr)));
157 		dprint(2, ("src=%x:%x:%x:%x\n",
158 		    SCTP_PRINTADDR(ire->ire_src_addr_v6)));
159 		laddr = ire->ire_src_addr_v6;
160 
161 		/* make sure the laddr is part of this association */
162 
163 		if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) !=
164 		    NULL && !sp->saddr_ipif_dontsrc) {
165 			fp->saddr = laddr;
166 		} else {
167 			dprint(2, ("ire2faddr: src addr is not part "
168 				"of assc\n"));
169 			set_saddr(sctp, fp, B_TRUE);
170 		}
171 	}
172 
173 	/* Cache the IRE */
174 	IRE_REFHOLD_NOTR(ire);
175 	fp->ire = ire;
176 	if (fp->ire->ire_type == IRE_LOOPBACK && !sctp->sctp_loopback)
177 		sctp->sctp_loopback = 1;
178 	IRE_REFRELE(ire);
179 
180 	/*
181 	 * Pull out RTO information for this faddr and use it if we don't
182 	 * have any yet.
183 	 */
184 	if (fp->srtt == -1 && ire->ire_uinfo.iulp_rtt != 0) {
185 		/* The cached value is in ms. */
186 		fp->srtt = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt);
187 		fp->rttvar = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt_sd);
188 		fp->rto = 3 * fp->srtt;
189 
190 		/* Bound the RTO by configured min and max values */
191 		if (fp->rto < sctp->sctp_rto_min) {
192 			fp->rto = sctp->sctp_rto_min;
193 		}
194 		if (fp->rto > sctp->sctp_rto_max) {
195 			fp->rto = sctp->sctp_rto_max;
196 		}
197 	}
198 
199 	/*
200 	 * Record the MTU for this faddr. If the MTU for this faddr has
201 	 * changed, check if the assc MTU will also change.
202 	 */
203 	if (fp->isv4) {
204 		hdrlen = sctp->sctp_hdr_len;
205 	} else {
206 		hdrlen = sctp->sctp_hdr6_len;
207 	}
208 	if ((fp->sfa_pmss + hdrlen) != ire->ire_max_frag) {
209 		/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
210 		fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1);
211 		if (fp->cwnd < (fp->sfa_pmss * 2)) {
212 			fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial;
213 		}
214 	}
215 
216 set_current:
217 	if (fp == sctp->sctp_current) {
218 		sctp_faddr2hdraddr(fp, sctp);
219 		sctp->sctp_mss = fp->sfa_pmss;
220 		if (!SCTP_IS_DETACHED(sctp)) {
221 			sctp_set_ulp_prop(sctp);
222 		}
223 	}
224 }
225 
226 /*ARGSUSED*/
227 void
228 sctp_faddr2ire(sctp_t *sctp, sctp_faddr_t *fp)
229 {
230 	ire_t *ire;
231 
232 	if ((ire = fp->ire) == NULL) {
233 		return;
234 	}
235 
236 	mutex_enter(&ire->ire_lock);
237 
238 	/* If the cached IRE is going sway, there is no point to update it. */
239 	if (ire->ire_marks & IRE_MARK_CONDEMNED) {
240 		mutex_exit(&ire->ire_lock);
241 		IRE_REFRELE_NOTR(ire);
242 		fp->ire = NULL;
243 		return;
244 	}
245 
246 	/*
247 	 * Only record the PMTU for this faddr if we actually have
248 	 * done discovery. This prevents initialized default from
249 	 * clobbering any real info that IP may have.
250 	 */
251 	if (fp->pmtu_discovered) {
252 		if (fp->isv4) {
253 			ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr_len;
254 		} else {
255 			ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr6_len;
256 		}
257 	}
258 
259 	if (sctp_rtt_updates != 0 && fp->rtt_updates >= sctp_rtt_updates) {
260 		/*
261 		 * If there is no old cached values, initialize them
262 		 * conservatively.  Set them to be (1.5 * new value).
263 		 * This code copied from ip_ire_advise().  The cached
264 		 * value is in ms.
265 		 */
266 		if (ire->ire_uinfo.iulp_rtt != 0) {
267 			ire->ire_uinfo.iulp_rtt = (ire->ire_uinfo.iulp_rtt +
268 			    TICK_TO_MSEC(fp->srtt)) >> 1;
269 		} else {
270 			ire->ire_uinfo.iulp_rtt = TICK_TO_MSEC(fp->srtt +
271 			    (fp->srtt >> 1));
272 		}
273 		if (ire->ire_uinfo.iulp_rtt_sd != 0) {
274 			ire->ire_uinfo.iulp_rtt_sd =
275 			    (ire->ire_uinfo.iulp_rtt_sd +
276 			    TICK_TO_MSEC(fp->rttvar)) >> 1;
277 		} else {
278 			ire->ire_uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->rttvar +
279 			    (fp->rttvar >> 1));
280 		}
281 		fp->rtt_updates = 0;
282 	}
283 
284 	mutex_exit(&ire->ire_lock);
285 }
286 
287 /*
288  * The sender must set the total length in the IP header.
289  * If sendto == NULL, the current will be used.
290  */
291 mblk_t *
292 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer)
293 {
294 	mblk_t *mp;
295 	size_t ipsctplen;
296 	int isv4;
297 	sctp_faddr_t *fp;
298 
299 	ASSERT(sctp->sctp_current != NULL || sendto != NULL);
300 	if (sendto == NULL) {
301 		fp = sctp->sctp_current;
302 	} else {
303 		fp = sendto;
304 	}
305 	isv4 = fp->isv4;
306 
307 	/* Try to look for another IRE again. */
308 	if (fp->ire == NULL)
309 		sctp_ire2faddr(sctp, fp);
310 
311 	/* There is no suitable source address to use, return. */
312 	if (fp->state == SCTP_FADDRS_UNREACH)
313 		return (NULL);
314 
315 	if (isv4) {
316 		ipsctplen = sctp->sctp_hdr_len;
317 	} else {
318 		ipsctplen = sctp->sctp_hdr6_len;
319 	}
320 
321 	mp = allocb(ipsctplen + sctp_wroff_xtra + trailer, BPRI_MED);
322 	if (mp == NULL) {
323 		ip1dbg(("sctp_make_mp: error makign mp..\n"));
324 		return (NULL);
325 	}
326 	mp->b_rptr += sctp_wroff_xtra;
327 	mp->b_wptr = mp->b_rptr + ipsctplen;
328 
329 	ASSERT(OK_32PTR(mp->b_wptr));
330 
331 	if (isv4) {
332 		ipha_t *iph = (ipha_t *)mp->b_rptr;
333 
334 		bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen);
335 		if (fp != sctp->sctp_current) {
336 			/* fiddle with the dst addr */
337 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
338 			/* fix up src addr */
339 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
340 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
341 				    iph->ipha_src);
342 			} else if (sctp->sctp_bound_to_all) {
343 				iph->ipha_src = INADDR_ANY;
344 			}
345 		}
346 		/* set or clear the don't fragment bit */
347 		if (fp->df) {
348 			iph->ipha_fragment_offset_and_flags = htons(IPH_DF);
349 		} else {
350 			iph->ipha_fragment_offset_and_flags = 0;
351 		}
352 	} else {
353 		bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen);
354 		if (fp != sctp->sctp_current) {
355 			/* fiddle with the dst addr */
356 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
357 			/* fix up src addr */
358 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
359 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
360 			} else if (sctp->sctp_bound_to_all) {
361 				bzero(&((ip6_t *)(mp->b_rptr))->ip6_src,
362 				    sizeof (in6_addr_t));
363 			}
364 		}
365 	}
366 	ASSERT(sctp->sctp_connp != NULL);
367 
368 	/*
369 	 * IP will not free this IRE if it is condemned.  SCTP needs to
370 	 * free it.
371 	 */
372 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
373 		IRE_REFRELE_NOTR(fp->ire);
374 		fp->ire = NULL;
375 	}
376 	/* Stash the conn and ire ptr info. for IP */
377 	SCTP_STASH_IPINFO(mp, fp->ire);
378 
379 	return (mp);
380 }
381 
382 /*
383  * Notify upper layers about preferred write offset, write size.
384  */
385 void
386 sctp_set_ulp_prop(sctp_t *sctp)
387 {
388 	int hdrlen;
389 
390 	if (sctp->sctp_current->isv4) {
391 		hdrlen = sctp->sctp_hdr_len;
392 	} else {
393 		hdrlen = sctp->sctp_hdr6_len;
394 	}
395 	ASSERT(sctp->sctp_ulpd);
396 
397 	ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss);
398 	sctp->sctp_ulp_prop(sctp->sctp_ulpd,
399 	    sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t),
400 	    sctp->sctp_mss - sizeof (sctp_data_hdr_t));
401 }
402 
403 void
404 sctp_set_iplen(sctp_t *sctp, mblk_t *mp)
405 {
406 	uint16_t	sum = 0;
407 	ipha_t		*iph;
408 	ip6_t		*ip6h;
409 	mblk_t		*pmp = mp;
410 	boolean_t	isv4;
411 
412 	isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
413 	for (; pmp; pmp = pmp->b_cont)
414 		sum += pmp->b_wptr - pmp->b_rptr;
415 
416 	if (isv4) {
417 		iph = (ipha_t *)mp->b_rptr;
418 		iph->ipha_length = htons(sum);
419 	} else {
420 		ip6h = (ip6_t *)mp->b_rptr;
421 		ip6h->ip6_plen = htons(sum - ((char *)&sctp->sctp_ip6h[1] -
422 		    sctp->sctp_iphc6));
423 	}
424 }
425 
426 int
427 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2)
428 {
429 	int na1 = 0;
430 	int overlap = 0;
431 	int equal = 1;
432 	int onematch;
433 	sctp_faddr_t *fp1, *fp2;
434 
435 	for (fp1 = a1; fp1; fp1 = fp1->next) {
436 		onematch = 0;
437 		for (fp2 = a2; fp2; fp2 = fp2->next) {
438 			if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) {
439 				overlap++;
440 				onematch = 1;
441 				break;
442 			}
443 			if (!onematch) {
444 				equal = 0;
445 			}
446 		}
447 		na1++;
448 	}
449 
450 	if (equal) {
451 		return (SCTP_ADDR_EQUAL);
452 	}
453 	if (overlap == na1) {
454 		return (SCTP_ADDR_SUBSET);
455 	}
456 	if (overlap) {
457 		return (SCTP_ADDR_OVERLAP);
458 	}
459 	return (SCTP_ADDR_DISJOINT);
460 }
461 
462 /*
463  * Returns 0 on success, -1 on memory allocation failure. If sleep
464  * is true, should never fail.
465  * Caller must hold conn fanout lock.
466  */
467 int
468 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep)
469 {
470 	sctp_faddr_t *faddr;
471 
472 	dprint(4, ("add_faddr: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr),
473 	    sleep));
474 
475 	if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) {
476 		return (-1);
477 	}
478 
479 	sctp_init_faddr(sctp, faddr, addr);
480 	ASSERT(faddr->next == NULL);
481 
482 	/* tack it on to the end */
483 	if (sctp->sctp_lastfaddr != NULL) {
484 		sctp->sctp_lastfaddr->next = faddr;
485 	} else {
486 		/* list is empty */
487 		ASSERT(sctp->sctp_faddrs == NULL);
488 		sctp->sctp_faddrs = faddr;
489 	}
490 	sctp->sctp_lastfaddr = faddr;
491 
492 	return (0);
493 }
494 
495 /*
496  * Caller must hold conn fanout lock.
497  */
498 int
499 sctp_add_faddr_first(sctp_t *sctp, in6_addr_t *addr, int sleep)
500 {
501 	sctp_faddr_t *faddr;
502 
503 	dprint(4, ("add_faddr_first: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr),
504 	    sleep));
505 
506 	if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) {
507 		return (-1);
508 	}
509 	sctp_init_faddr(sctp, faddr, addr);
510 	ASSERT(faddr->next == NULL);
511 
512 	/* Put it at the beginning of the list */
513 	if (sctp->sctp_faddrs != NULL) {
514 		faddr->next = sctp->sctp_faddrs;
515 	} else {
516 		sctp->sctp_lastfaddr = faddr;
517 	}
518 	sctp->sctp_faddrs = faddr;
519 
520 	return (0);
521 }
522 
523 sctp_faddr_t *
524 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr)
525 {
526 	sctp_faddr_t *fp;
527 
528 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
529 		if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr))
530 			break;
531 	}
532 
533 	return (fp);
534 }
535 
536 sctp_faddr_t *
537 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr)
538 {
539 	for (; fp; fp = fp->next) {
540 		if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) {
541 			break;
542 		}
543 	}
544 
545 	return (fp);
546 }
547 
548 void
549 sctp_faddr2hdraddr(sctp_faddr_t *fp, sctp_t *sctp)
550 {
551 	if (fp->isv4) {
552 		IN6_V4MAPPED_TO_IPADDR(&fp->faddr,
553 		    sctp->sctp_ipha->ipha_dst);
554 		/* Must not allow unspec src addr if not bound to all */
555 		if (IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) &&
556 		    !sctp->sctp_bound_to_all) {
557 			/*
558 			 * set the src to the first v4 saddr and hope
559 			 * for the best
560 			 */
561 			fp->saddr = sctp_get_valid_addr(sctp, B_FALSE);
562 		}
563 		IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src);
564 		/* update don't fragment bit */
565 		if (fp->df) {
566 			sctp->sctp_ipha->ipha_fragment_offset_and_flags =
567 			    htons(IPH_DF);
568 		} else {
569 			sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0;
570 		}
571 	} else {
572 		sctp->sctp_ip6h->ip6_dst = fp->faddr;
573 		/* Must not allow unspec src addr if not bound to all */
574 		if (IN6_IS_ADDR_UNSPECIFIED(&fp->saddr) &&
575 		    !sctp->sctp_bound_to_all) {
576 			/*
577 			 * set the src to the first v6 saddr and hope
578 			 * for the best
579 			 */
580 			fp->saddr = sctp_get_valid_addr(sctp, B_TRUE);
581 		}
582 		sctp->sctp_ip6h->ip6_src = fp->saddr;
583 	}
584 }
585 
586 void
587 sctp_redo_faddr_srcs(sctp_t *sctp)
588 {
589 	sctp_faddr_t *fp;
590 
591 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
592 		sctp_ire2faddr(sctp, fp);
593 	}
594 
595 	sctp_faddr2hdraddr(sctp->sctp_current, sctp);
596 }
597 
598 void
599 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp)
600 {
601 	int64_t now = lbolt64;
602 
603 	fp->strikes = 0;
604 	sctp->sctp_strikes = 0;
605 	fp->lastactive = now;
606 	fp->hb_expiry = now + SET_HB_INTVL(fp);
607 	fp->hb_pending = B_FALSE;
608 	if (fp->state != SCTP_FADDRS_ALIVE) {
609 		fp->state = SCTP_FADDRS_ALIVE;
610 		sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0);
611 
612 		/* If this is the primary, switch back to it now */
613 		if (fp == sctp->sctp_primary) {
614 			sctp->sctp_current = fp;
615 			sctp->sctp_mss = fp->sfa_pmss;
616 			/* Reset the addrs in the composite header */
617 			sctp_faddr2hdraddr(fp, sctp);
618 			if (!SCTP_IS_DETACHED(sctp)) {
619 				sctp_set_ulp_prop(sctp);
620 			}
621 		}
622 	}
623 	if (fp->ire == NULL) {
624 		/* Should have a full IRE now */
625 		sctp_ire2faddr(sctp, fp);
626 	}
627 }
628 
629 int
630 sctp_is_a_faddr_clean(sctp_t *sctp)
631 {
632 	sctp_faddr_t *fp;
633 
634 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
635 		if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) {
636 			return (1);
637 		}
638 	}
639 
640 	return (0);
641 }
642 
643 /*
644  * Returns 0 if there is at leave one other active faddr, -1 if there
645  * are none. If there are none left, faddr_dead() will start killing the
646  * association.
647  * If the downed faddr was the current faddr, a new current faddr
648  * will be chosen.
649  */
650 int
651 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate)
652 {
653 	sctp_faddr_t *ofp;
654 
655 	if (fp->state == SCTP_FADDRS_ALIVE) {
656 		sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0);
657 	}
658 	fp->state = newstate;
659 
660 	dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n",
661 	    SCTP_PRINTADDR(fp->faddr), newstate));
662 
663 	if (fp == sctp->sctp_current) {
664 		/* Current faddr down; need to switch it */
665 		sctp->sctp_current = NULL;
666 	}
667 
668 	/* Find next alive faddr */
669 	ofp = fp;
670 	for (fp = fp->next; fp; fp = fp->next) {
671 		if (fp->state == SCTP_FADDRS_ALIVE) {
672 			break;
673 		}
674 	}
675 
676 	if (fp == NULL) {
677 		/* Continue from beginning of list */
678 		for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) {
679 			if (fp->state == SCTP_FADDRS_ALIVE) {
680 				break;
681 			}
682 		}
683 	}
684 
685 	if (fp != ofp) {
686 		if (sctp->sctp_current == NULL) {
687 			dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n",
688 			    SCTP_PRINTADDR(fp->faddr)));
689 			sctp->sctp_current = fp;
690 			sctp->sctp_mss = fp->sfa_pmss;
691 
692 			/* Reset the addrs in the composite header */
693 			sctp_faddr2hdraddr(fp, sctp);
694 
695 			if (!SCTP_IS_DETACHED(sctp)) {
696 				sctp_set_ulp_prop(sctp);
697 			}
698 		}
699 		return (0);
700 	}
701 
702 
703 	/* All faddrs are down; kill the association */
704 	dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n"));
705 	BUMP_MIB(&sctp_mib, sctpAborted);
706 	sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ?
707 	    SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL);
708 	sctp_clean_death(sctp, sctp->sctp_client_errno ?
709 	    sctp->sctp_client_errno : ETIMEDOUT);
710 
711 	return (-1);
712 }
713 
714 sctp_faddr_t *
715 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp)
716 {
717 	sctp_faddr_t *nfp = NULL;
718 
719 	if (ofp == NULL) {
720 		ofp = sctp->sctp_current;
721 	}
722 
723 	/* Find the next live one */
724 	for (nfp = ofp->next; nfp != NULL; nfp = nfp->next) {
725 		if (nfp->state == SCTP_FADDRS_ALIVE) {
726 			break;
727 		}
728 	}
729 
730 	if (nfp == NULL) {
731 		/* Continue from beginning of list */
732 		for (nfp = sctp->sctp_faddrs; nfp != ofp; nfp = nfp->next) {
733 			if (nfp->state == SCTP_FADDRS_ALIVE) {
734 				break;
735 			}
736 		}
737 	}
738 
739 	/*
740 	 * nfp could only be NULL if all faddrs are down, and when
741 	 * this happens, faddr_dead() should have killed the
742 	 * association. Hence this assertion...
743 	 */
744 	ASSERT(nfp != NULL);
745 	return (nfp);
746 }
747 
748 void
749 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp)
750 {
751 	sctp_faddr_t *fpp;
752 
753 	if (!sctp->sctp_faddrs) {
754 		return;
755 	}
756 
757 	if (fp->timer_mp != NULL) {
758 		sctp_timer_free(fp->timer_mp);
759 		fp->timer_mp = NULL;
760 		fp->timer_running = 0;
761 	}
762 	if (fp->rc_timer_mp != NULL) {
763 		sctp_timer_free(fp->rc_timer_mp);
764 		fp->rc_timer_mp = NULL;
765 		fp->rc_timer_running = 0;
766 	}
767 	if (fp->ire != NULL) {
768 		IRE_REFRELE_NOTR(fp->ire);
769 		fp->ire = NULL;
770 	}
771 
772 	if (fp == sctp->sctp_faddrs) {
773 		goto gotit;
774 	}
775 
776 	for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next)
777 		;
778 
779 gotit:
780 	ASSERT(sctp->sctp_conn_tfp != NULL);
781 	mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
782 	if (fp == sctp->sctp_faddrs) {
783 		sctp->sctp_faddrs = fp->next;
784 	} else {
785 		fpp->next = fp->next;
786 	}
787 	mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
788 	/* XXX faddr2ire? */
789 	kmem_cache_free(sctp_kmem_faddr_cache, fp);
790 }
791 
792 void
793 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock)
794 {
795 	sctp_faddr_t *fp, *fpn;
796 
797 	if (sctp->sctp_faddrs == NULL) {
798 		ASSERT(sctp->sctp_lastfaddr == NULL);
799 		return;
800 	}
801 
802 	ASSERT(sctp->sctp_lastfaddr != NULL);
803 	sctp->sctp_lastfaddr = NULL;
804 	sctp->sctp_current = NULL;
805 	sctp->sctp_primary = NULL;
806 
807 	sctp_free_faddr_timers(sctp);
808 
809 	if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
810 		/* in conn fanout; need to hold lock */
811 		mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
812 	}
813 
814 	for (fp = sctp->sctp_faddrs; fp; fp = fpn) {
815 		fpn = fp->next;
816 		if (fp->ire != NULL)
817 			IRE_REFRELE_NOTR(fp->ire);
818 		kmem_cache_free(sctp_kmem_faddr_cache, fp);
819 	}
820 
821 	sctp->sctp_faddrs = NULL;
822 
823 	if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
824 		mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
825 	}
826 
827 }
828 
829 void
830 sctp_zap_addrs(sctp_t *sctp)
831 {
832 	sctp_zap_faddrs(sctp, 0);
833 	sctp_free_saddrs(sctp);
834 }
835 
836 /*
837  * Initialize the IPv4 header. Loses any record of any IP options.
838  */
839 int
840 sctp_header_init_ipv4(sctp_t *sctp, int sleep)
841 {
842 	sctp_hdr_t	*sctph;
843 
844 	/*
845 	 * This is a simple initialization. If there's
846 	 * already a template, it should never be too small,
847 	 * so reuse it.  Otherwise, allocate space for the new one.
848 	 */
849 	if (sctp->sctp_iphc != NULL) {
850 		ASSERT(sctp->sctp_iphc_len >= SCTP_MAX_COMBINED_HEADER_LENGTH);
851 		bzero(sctp->sctp_iphc, sctp->sctp_iphc_len);
852 	} else {
853 		sctp->sctp_iphc_len = SCTP_MAX_COMBINED_HEADER_LENGTH;
854 		sctp->sctp_iphc = kmem_zalloc(sctp->sctp_iphc_len, sleep);
855 		if (sctp->sctp_iphc == NULL) {
856 			sctp->sctp_iphc_len = 0;
857 			return (ENOMEM);
858 		}
859 	}
860 
861 	sctp->sctp_ipha = (ipha_t *)sctp->sctp_iphc;
862 
863 	sctp->sctp_hdr_len = sizeof (ipha_t) + sizeof (sctp_hdr_t);
864 	sctp->sctp_ip_hdr_len = sizeof (ipha_t);
865 	sctp->sctp_ipha->ipha_length = htons(sizeof (ipha_t) +
866 	    sizeof (sctp_hdr_t));
867 	sctp->sctp_ipha->ipha_version_and_hdr_length
868 		= (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
869 
870 	/*
871 	 * These two fields should be zero, and are already set above.
872 	 *
873 	 * sctp->sctp_ipha->ipha_ident,
874 	 * sctp->sctp_ipha->ipha_fragment_offset_and_flags.
875 	 */
876 
877 	sctp->sctp_ipha->ipha_ttl = sctp_ipv4_ttl;
878 	sctp->sctp_ipha->ipha_protocol = IPPROTO_SCTP;
879 
880 	sctph = (sctp_hdr_t *)(sctp->sctp_iphc + sizeof (ipha_t));
881 	sctp->sctp_sctph = sctph;
882 
883 	return (0);
884 }
885 
886 /*
887  * Update sctp_sticky_hdrs based on sctp_sticky_ipp.
888  * The headers include ip6i_t (if needed), ip6_t, any sticky extension
889  * headers, and the maximum size sctp header (to avoid reallocation
890  * on the fly for additional sctp options).
891  * Returns failure if can't allocate memory.
892  */
893 int
894 sctp_build_hdrs(sctp_t *sctp)
895 {
896 	char		*hdrs;
897 	uint_t		hdrs_len;
898 	ip6i_t		*ip6i;
899 	char		buf[SCTP_MAX_HDR_LENGTH];
900 	ip6_pkt_t	*ipp = &sctp->sctp_sticky_ipp;
901 	in6_addr_t	src;
902 	in6_addr_t	dst;
903 	uint8_t		hoplimit;
904 	/*
905 	 * save the existing sctp header and source/dest IP addresses
906 	 */
907 	bcopy(sctp->sctp_sctph6, buf, sizeof (sctp_hdr_t));
908 	src = sctp->sctp_ip6h->ip6_src;
909 	dst = sctp->sctp_ip6h->ip6_dst;
910 	hoplimit = sctp->sctp_ip6h->ip6_hops;
911 	hdrs_len = ip_total_hdrs_len_v6(ipp) + SCTP_MAX_HDR_LENGTH;
912 	ASSERT(hdrs_len != 0);
913 	if (hdrs_len > sctp->sctp_iphc6_len) {
914 		/* Need to reallocate */
915 		hdrs = kmem_zalloc(hdrs_len, KM_NOSLEEP);
916 		if (hdrs == NULL)
917 			return (ENOMEM);
918 
919 		if (sctp->sctp_iphc6_len != 0)
920 			kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
921 		sctp->sctp_iphc6 = hdrs;
922 		sctp->sctp_iphc6_len = hdrs_len;
923 	}
924 	ip_build_hdrs_v6((uchar_t *)sctp->sctp_iphc6,
925 	    hdrs_len - SCTP_MAX_HDR_LENGTH, ipp, IPPROTO_SCTP);
926 
927 	/* Set header fields not in ipp */
928 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
929 		ip6i = (ip6i_t *)sctp->sctp_iphc6;
930 		sctp->sctp_ip6h = (ip6_t *)&ip6i[1];
931 	} else {
932 		sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6;
933 	}
934 	/*
935 	 * sctp->sctp_ip_hdr_len will include ip6i_t if there is one.
936 	 */
937 	sctp->sctp_ip_hdr6_len = hdrs_len - SCTP_MAX_HDR_LENGTH;
938 	sctp->sctp_sctph6 = (sctp_hdr_t *)(sctp->sctp_iphc6 +
939 	    sctp->sctp_ip_hdr6_len);
940 	sctp->sctp_hdr6_len = sctp->sctp_ip_hdr6_len + sizeof (sctp_hdr_t);
941 
942 	bcopy(buf, sctp->sctp_sctph6, sizeof (sctp_hdr_t));
943 
944 	sctp->sctp_ip6h->ip6_src = src;
945 	sctp->sctp_ip6h->ip6_dst = dst;
946 	/*
947 	 * If IPV6_HOPLIMIT was set in ipp, use that value.
948 	 * For sticky options, if it does not exist use
949 	 * the default/saved value (which was set in ip_build_hdrs_v6())
950 	 * All this as per RFC 2922.
951 	 */
952 	if (!(ipp->ipp_fields & IPPF_HOPLIMIT))
953 		sctp->sctp_ip6h->ip6_hops = hoplimit;
954 	/*
955 	 * Set the IPv6 header payload length.
956 	 * If there's an ip6i_t included, don't count it in the length.
957 	 */
958 	sctp->sctp_ip6h->ip6_plen = sctp->sctp_hdr6_len - IPV6_HDR_LEN;
959 	if (ipp->ipp_fields & IPPF_HAS_IP6I)
960 		sctp->sctp_ip6h->ip6_plen -= sizeof (ip6i_t);
961 	/*
962 	 * If we're setting extension headers after a connection
963 	 * has been established, and if we have a routing header
964 	 * among the extension headers, call ip_massage_options_v6 to
965 	 * manipulate the routing header/ip6_dst set the checksum
966 	 * difference in the sctp header template.
967 	 * (This happens in sctp_connect_ipv6 if the routing header
968 	 * is set prior to the connect.)
969 	 */
970 
971 	if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) &&
972 	    (sctp->sctp_sticky_ipp.ipp_fields & IPPF_RTHDR)) {
973 		ip6_rthdr_t *rth;
974 
975 		rth = ip_find_rthdr_v6(sctp->sctp_ip6h,
976 		    (uint8_t *)sctp->sctp_sctph6);
977 		if (rth != NULL)
978 			(void) ip_massage_options_v6(sctp->sctp_ip6h, rth);
979 	}
980 	return (0);
981 }
982 
983 /*
984  * Initialize the IPv6 header. Loses any record of any IPv6 extension headers.
985  */
986 int
987 sctp_header_init_ipv6(sctp_t *sctp, int sleep)
988 {
989 	sctp_hdr_t	*sctph;
990 
991 	/*
992 	 * This is a simple initialization. If there's
993 	 * already a template, it should never be too small,
994 	 * so reuse it. Otherwise, allocate space for the new one.
995 	 * Ensure that there is enough space to "downgrade" the sctp_t
996 	 * to an IPv4 sctp_t. This requires having space for a full load
997 	 * of IPv4 options
998 	 */
999 	if (sctp->sctp_iphc6 != NULL) {
1000 		ASSERT(sctp->sctp_iphc6_len >=
1001 		    SCTP_MAX_COMBINED_HEADER_LENGTH);
1002 		bzero(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
1003 	} else {
1004 		sctp->sctp_iphc6_len = SCTP_MAX_COMBINED_HEADER_LENGTH;
1005 		sctp->sctp_iphc6 = kmem_zalloc(sctp->sctp_iphc_len, sleep);
1006 		if (sctp->sctp_iphc6 == NULL) {
1007 			sctp->sctp_iphc6_len = 0;
1008 			return (ENOMEM);
1009 		}
1010 	}
1011 	sctp->sctp_hdr6_len = IPV6_HDR_LEN + sizeof (sctp_hdr_t);
1012 	sctp->sctp_ip_hdr6_len = IPV6_HDR_LEN;
1013 	sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6;
1014 
1015 	/* Initialize the header template */
1016 
1017 	sctp->sctp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1018 	sctp->sctp_ip6h->ip6_plen = ntohs(sizeof (sctp_hdr_t));
1019 	sctp->sctp_ip6h->ip6_nxt = IPPROTO_SCTP;
1020 	sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit;
1021 
1022 	sctph = (sctp_hdr_t *)(sctp->sctp_iphc6 + IPV6_HDR_LEN);
1023 	sctp->sctp_sctph6 = sctph;
1024 
1025 	return (0);
1026 }
1027 
1028 /*
1029  * XXX implement more sophisticated logic
1030  */
1031 void
1032 sctp_set_hdraddrs(sctp_t *sctp)
1033 {
1034 	sctp_faddr_t *fp;
1035 	int gotv4 = 0;
1036 	int gotv6 = 0;
1037 
1038 	ASSERT(sctp->sctp_faddrs != NULL);
1039 	ASSERT(sctp->sctp_nsaddrs > 0);
1040 
1041 	/* Set up using the primary first */
1042 	if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) {
1043 		IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->faddr,
1044 		    sctp->sctp_ipha->ipha_dst);
1045 		/* saddr may be unspec; make_mp() will handle this */
1046 		IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->saddr,
1047 		    sctp->sctp_ipha->ipha_src);
1048 		gotv4 = 1;
1049 		if (sctp->sctp_ipversion == IPV4_VERSION) {
1050 			goto copyports;
1051 		}
1052 	} else {
1053 		sctp->sctp_ip6h->ip6_dst = sctp->sctp_primary->faddr;
1054 		/* saddr may be unspec; make_mp() will handle this */
1055 		sctp->sctp_ip6h->ip6_src = sctp->sctp_primary->saddr;
1056 		gotv6 = 1;
1057 	}
1058 
1059 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
1060 		if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) {
1061 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr,
1062 			    sctp->sctp_ipha->ipha_dst);
1063 			/* copy in the faddr_t's saddr */
1064 			IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
1065 			    sctp->sctp_ipha->ipha_src);
1066 			gotv4 = 1;
1067 			if (sctp->sctp_ipversion == IPV4_VERSION || gotv6) {
1068 				break;
1069 			}
1070 		} else if (!gotv6) {
1071 			sctp->sctp_ip6h->ip6_dst = fp->faddr;
1072 			/* copy in the faddr_t's saddr */
1073 			sctp->sctp_ip6h->ip6_src = fp->saddr;
1074 			gotv6 = 1;
1075 			if (gotv4) {
1076 				break;
1077 			}
1078 		}
1079 	}
1080 
1081 copyports:
1082 	/* copy in the ports for good measure */
1083 	sctp->sctp_sctph->sh_sport = sctp->sctp_lport;
1084 	sctp->sctp_sctph->sh_dport = sctp->sctp_fport;
1085 
1086 	sctp->sctp_sctph6->sh_sport = sctp->sctp_lport;
1087 	sctp->sctp_sctph6->sh_dport = sctp->sctp_fport;
1088 }
1089 
1090 void
1091 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp)
1092 {
1093 	mblk_t *mp;
1094 	sctp_parm_hdr_t *ph;
1095 	size_t len;
1096 	int pad;
1097 
1098 	len = sizeof (*ph) + ntohs(uph->sph_len);
1099 	if ((pad = len % 4) != 0) {
1100 		pad = 4 - pad;
1101 		len += pad;
1102 	}
1103 	mp = allocb(len, BPRI_MED);
1104 	if (mp == NULL) {
1105 		return;
1106 	}
1107 
1108 	ph = (sctp_parm_hdr_t *)(mp->b_rptr);
1109 	ph->sph_type = htons(PARM_UNRECOGNIZED);
1110 	ph->sph_len = htons(len - pad);
1111 
1112 	/* copy in the unrecognized parameter */
1113 	bcopy(uph, ph + 1, ntohs(uph->sph_len));
1114 
1115 	mp->b_wptr = mp->b_rptr + len;
1116 	if (*errmp != NULL) {
1117 		linkb(*errmp, mp);
1118 	} else {
1119 		*errmp = mp;
1120 	}
1121 }
1122 
1123 /*
1124  * o Bounds checking
1125  * o Updates remaining
1126  * o Checks alignment
1127  */
1128 sctp_parm_hdr_t *
1129 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining)
1130 {
1131 	int pad;
1132 	uint16_t len;
1133 
1134 	len = ntohs(current->sph_len);
1135 	*remaining -= len;
1136 	if (*remaining < sizeof (*current) || len < sizeof (*current)) {
1137 		return (NULL);
1138 	}
1139 	if ((pad = len & (SCTP_ALIGN - 1)) != 0) {
1140 		pad = SCTP_ALIGN - pad;
1141 		*remaining -= pad;
1142 	}
1143 	/*LINTED pointer cast may result in improper alignment*/
1144 	current = (sctp_parm_hdr_t *)((char *)current + len + pad);
1145 	return (current);
1146 }
1147 
1148 /*
1149  * Sets the address parameters given in the INIT chunk into sctp's
1150  * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are
1151  * no address parameters in the INIT chunk, a single faddr is created
1152  * from the ip hdr at the beginning of pkt.
1153  * If there already are existing addresses hanging from sctp, merge
1154  * them in, if the old info contains addresses which are not present
1155  * in this new info, get rid of them, and clean the pointers if there's
1156  * messages which have this as their target address.
1157  *
1158  * Returns 0 on success, sys errno on failure
1159  */
1160 int
1161 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt,
1162     sctp_chunk_hdr_t *ich, uint_t *sctp_options)
1163 {
1164 	sctp_init_chunk_t	*init;
1165 	ipha_t			*iph;
1166 	ip6_t			*ip6h;
1167 	in6_addr_t		hdraddr[1];
1168 	sctp_parm_hdr_t		*ph;
1169 	ssize_t			remaining;
1170 	int			isv4;
1171 	int			err;
1172 	sctp_faddr_t		*fp;
1173 
1174 	if (sctp_options != NULL)
1175 		*sctp_options = 0;
1176 
1177 	/* inherit laddrs, if given */
1178 	if (psctp != NULL && psctp->sctp_nsaddrs > 0) {
1179 		ASSERT(sctp->sctp_nsaddrs == 0);
1180 
1181 		err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP);
1182 		if (err != 0)
1183 			return (err);
1184 	}
1185 
1186 	/* extract the address from the IP header */
1187 	isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
1188 	if (isv4) {
1189 		iph = (ipha_t *)pkt->b_rptr;
1190 		IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr);
1191 	} else {
1192 		ip6h = (ip6_t *)pkt->b_rptr;
1193 		hdraddr[0] = ip6h->ip6_src;
1194 	}
1195 
1196 	/* For loopback connections ignore address list */
1197 	if (sctp->sctp_loopback)
1198 		goto get_from_iphdr;
1199 
1200 	/* Walk the params in the INIT [ACK], pulling out addr params */
1201 	remaining = ntohs(ich->sch_len) - sizeof (*ich) -
1202 	    sizeof (sctp_init_chunk_t);
1203 	if (remaining < sizeof (*ph)) {
1204 		/* no parameters */
1205 		goto get_from_iphdr;
1206 	}
1207 	init = (sctp_init_chunk_t *)(ich + 1);
1208 	ph = (sctp_parm_hdr_t *)(init + 1);
1209 
1210 	while (ph != NULL) {
1211 		/* params will have already been byteordered when validating */
1212 		if (ph->sph_type == htons(PARM_ADDR4)) {
1213 			if (remaining >= PARM_ADDR4_LEN) {
1214 				in6_addr_t addr;
1215 				ipaddr_t ta;
1216 
1217 				/*
1218 				 * Screen out broad/multicasts & loopback.
1219 				 * If the endpoint only accepts v6 address,
1220 				 * go to the next one.
1221 				 */
1222 				bcopy(ph + 1, &ta, sizeof (ta));
1223 				if (ta == 0 ||
1224 				    ta == INADDR_BROADCAST ||
1225 				    ta == htonl(INADDR_LOOPBACK) ||
1226 				    IN_MULTICAST(ta) ||
1227 				    sctp->sctp_connp->conn_ipv6_v6only) {
1228 					goto next;
1229 				}
1230 				/*
1231 				 * XXX also need to check for subnet
1232 				 * broadcasts. This should probably
1233 				 * wait until we have full access
1234 				 * to the ILL tables.
1235 				 */
1236 
1237 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
1238 				    (ph + 1), &addr);
1239 				/* Check for duplicate. */
1240 				if (sctp_lookup_faddr(sctp, &addr) != NULL)
1241 					goto next;
1242 
1243 				/* OK, add it to the faddr set */
1244 				if (sctp_add_faddr(sctp, &addr,
1245 					KM_NOSLEEP) != 0) {
1246 					return (ENOMEM);
1247 				}
1248 			}
1249 		} else if (ph->sph_type == htons(PARM_ADDR6) &&
1250 		    sctp->sctp_family == AF_INET6) {
1251 			/* An v4 socket should not take v6 addresses. */
1252 			if (remaining >= PARM_ADDR6_LEN) {
1253 				in6_addr_t *addr6;
1254 
1255 				addr6 = (in6_addr_t *)(ph + 1);
1256 				/*
1257 				 * Screen out link locals, mcast, loopback
1258 				 * and bogus v6 address.
1259 				 */
1260 				if (IN6_IS_ADDR_LINKLOCAL(addr6) ||
1261 				    IN6_IS_ADDR_MULTICAST(addr6) ||
1262 				    IN6_IS_ADDR_LOOPBACK(addr6) ||
1263 				    IN6_IS_ADDR_V4MAPPED(addr6)) {
1264 					goto next;
1265 				}
1266 				/* Check for duplicate. */
1267 				if (sctp_lookup_faddr(sctp, addr6) != NULL)
1268 					goto next;
1269 
1270 				if (sctp_add_faddr(sctp,
1271 				    (in6_addr_t *)(ph + 1), KM_NOSLEEP) != 0) {
1272 					return (ENOMEM);
1273 				}
1274 			}
1275 		} else if (ph->sph_type == htons(PARM_FORWARD_TSN)) {
1276 			if (sctp_options != NULL)
1277 				*sctp_options |= SCTP_PRSCTP_OPTION;
1278 		} /* else; skip */
1279 
1280 next:
1281 		ph = sctp_next_parm(ph, &remaining);
1282 	}
1283 
1284 get_from_iphdr:
1285 	/* Make sure the header's addr is in the list */
1286 	fp = sctp_lookup_faddr(sctp, hdraddr);
1287 	if (fp == NULL) {
1288 		/* not included; add it now */
1289 		if (sctp_add_faddr_first(sctp, hdraddr, KM_NOSLEEP) == -1)
1290 			return (ENOMEM);
1291 
1292 		/* sctp_faddrs will be the hdr addr */
1293 		fp = sctp->sctp_faddrs;
1294 	}
1295 	/* make the header addr the primary */
1296 	sctp->sctp_primary = fp;
1297 	sctp->sctp_current = fp;
1298 	sctp->sctp_mss = fp->sfa_pmss;
1299 
1300 	return (0);
1301 }
1302 
1303 /*
1304  * Returns 0 if the check failed and the restart should be refused,
1305  * 1 if the check succeeded.
1306  */
1307 int
1308 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports,
1309     int sleep)
1310 {
1311 	sctp_faddr_t *fp, *fpa, *fphead = NULL;
1312 	sctp_parm_hdr_t *ph;
1313 	ssize_t remaining;
1314 	int isv4;
1315 	ipha_t *iph;
1316 	ip6_t *ip6h;
1317 	in6_addr_t hdraddr[1];
1318 	int retval = 0;
1319 	sctp_tf_t *tf;
1320 	sctp_t *sctp;
1321 	int compres;
1322 	sctp_init_chunk_t *init;
1323 	int nadded = 0;
1324 
1325 	/* extract the address from the IP header */
1326 	isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
1327 	if (isv4) {
1328 		iph = (ipha_t *)pkt->b_rptr;
1329 		IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr);
1330 	} else {
1331 		ip6h = (ip6_t *)pkt->b_rptr;
1332 		hdraddr[0] = ip6h->ip6_src;
1333 	}
1334 
1335 	/* Walk the params in the INIT [ACK], pulling out addr params */
1336 	remaining = ntohs(ich->sch_len) - sizeof (*ich) -
1337 	    sizeof (sctp_init_chunk_t);
1338 	if (remaining < sizeof (*ph)) {
1339 		/* no parameters; restart OK */
1340 		return (1);
1341 	}
1342 	init = (sctp_init_chunk_t *)(ich + 1);
1343 	ph = (sctp_parm_hdr_t *)(init + 1);
1344 
1345 	while (ph != NULL) {
1346 		/* params will have already been byteordered when validating */
1347 		if (ph->sph_type == htons(PARM_ADDR4)) {
1348 			if (remaining >= PARM_ADDR4_LEN) {
1349 				in6_addr_t addr;
1350 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
1351 				    (ph + 1), &addr);
1352 				fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
1353 				    sleep);
1354 				if (!fpa) {
1355 					goto done;
1356 				}
1357 				bzero(fpa, sizeof (*fpa));
1358 				fpa->faddr = addr;
1359 				fpa->next = NULL;
1360 			}
1361 		} else if (ph->sph_type == htons(PARM_ADDR6)) {
1362 			if (remaining >= PARM_ADDR6_LEN) {
1363 				fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
1364 				    sleep);
1365 				if (!fpa) {
1366 					goto done;
1367 				}
1368 				bzero(fpa, sizeof (*fpa));
1369 				bcopy(ph + 1, &fpa->faddr,
1370 				    sizeof (fpa->faddr));
1371 				fpa->next = NULL;
1372 			}
1373 		} else {
1374 			/* else not addr param; skip */
1375 			fpa = NULL;
1376 		}
1377 		/* link in the new addr, if it was an addr param */
1378 		if (fpa) {
1379 			if (!fphead) {
1380 				fphead = fpa;
1381 				fp = fphead;
1382 			} else {
1383 				fp->next = fpa;
1384 				fp = fpa;
1385 			}
1386 		}
1387 
1388 		ph = sctp_next_parm(ph, &remaining);
1389 	}
1390 
1391 	if (fphead == NULL) {
1392 		/* no addr parameters; restart OK */
1393 		return (1);
1394 	}
1395 
1396 	/*
1397 	 * got at least one; make sure the header's addr is
1398 	 * in the list
1399 	 */
1400 	fp = sctp_lookup_faddr_nosctp(fphead, hdraddr);
1401 	if (!fp) {
1402 		/* not included; add it now */
1403 		fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep);
1404 		if (!fp) {
1405 			goto done;
1406 		}
1407 		bzero(fp, sizeof (*fp));
1408 		fp->faddr = *hdraddr;
1409 		fp->next = fphead;
1410 		fphead = fp;
1411 	}
1412 
1413 	/*
1414 	 * Now, we can finally do the check: For each sctp instance
1415 	 * on the hash line for ports, compare its faddr set against
1416 	 * the new one. If the new one is a strict subset of any
1417 	 * existing sctp's faddrs, the restart is OK. However, if there
1418 	 * is an overlap, this could be an attack, so return failure.
1419 	 * If all sctp's faddrs are disjoint, this is a legitimate new
1420 	 * association.
1421 	 */
1422 	tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
1423 	mutex_enter(&tf->tf_lock);
1424 
1425 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
1426 		if (ports != sctp->sctp_ports) {
1427 			continue;
1428 		}
1429 		compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs);
1430 		if (compres <= SCTP_ADDR_SUBSET) {
1431 			retval = 1;
1432 			mutex_exit(&tf->tf_lock);
1433 			goto done;
1434 		}
1435 		if (compres == SCTP_ADDR_OVERLAP) {
1436 			dprint(1,
1437 			    ("new assoc from %x:%x:%x:%x overlaps with %p\n",
1438 			    SCTP_PRINTADDR(*hdraddr), sctp));
1439 			/*
1440 			 * While we still hold the lock, we need to
1441 			 * figure out which addresses have been
1442 			 * added so we can include them in the abort
1443 			 * we will send back. Since these faddrs will
1444 			 * never be used, we overload the rto field
1445 			 * here, setting it to 0 if the address was
1446 			 * not added, 1 if it was added.
1447 			 */
1448 			for (fp = fphead; fp; fp = fp->next) {
1449 				if (sctp_lookup_faddr(sctp, &fp->faddr)) {
1450 					fp->rto = 0;
1451 				} else {
1452 					fp->rto = 1;
1453 					nadded++;
1454 				}
1455 			}
1456 			mutex_exit(&tf->tf_lock);
1457 			goto done;
1458 		}
1459 	}
1460 	mutex_exit(&tf->tf_lock);
1461 
1462 	/* All faddrs are disjoint; legit new association */
1463 	retval = 1;
1464 
1465 done:
1466 	/* If are attempted adds, send back an abort listing the addrs */
1467 	if (nadded > 0) {
1468 		void *dtail;
1469 		size_t dlen;
1470 
1471 		dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP);
1472 		if (dtail == NULL) {
1473 			goto cleanup;
1474 		}
1475 
1476 		ph = dtail;
1477 		dlen = 0;
1478 		for (fp = fphead; fp; fp = fp->next) {
1479 			if (fp->rto == 0) {
1480 				continue;
1481 			}
1482 			if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) {
1483 				ipaddr_t addr4;
1484 
1485 				ph->sph_type = htons(PARM_ADDR4);
1486 				ph->sph_len = htons(PARM_ADDR4_LEN);
1487 				IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4);
1488 				ph++;
1489 				bcopy(&addr4, ph, sizeof (addr4));
1490 				ph = (sctp_parm_hdr_t *)
1491 				    ((char *)ph + sizeof (addr4));
1492 				dlen += PARM_ADDR4_LEN;
1493 			} else {
1494 				ph->sph_type = htons(PARM_ADDR6);
1495 				ph->sph_len = htons(PARM_ADDR6_LEN);
1496 				ph++;
1497 				bcopy(&fp->faddr, ph, sizeof (fp->faddr));
1498 				ph = (sctp_parm_hdr_t *)
1499 				    ((char *)ph + sizeof (fp->faddr));
1500 				dlen += PARM_ADDR6_LEN;
1501 			}
1502 		}
1503 
1504 		/* Send off the abort */
1505 		sctp_send_abort(sctp, sctp_init2vtag(ich),
1506 		    SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE);
1507 
1508 		kmem_free(dtail, PARM_ADDR6_LEN * nadded);
1509 	}
1510 
1511 cleanup:
1512 	/* Clean up */
1513 	if (fphead) {
1514 		sctp_faddr_t *fpn;
1515 		for (fp = fphead; fp; fp = fpn) {
1516 			fpn = fp->next;
1517 			kmem_cache_free(sctp_kmem_faddr_cache, fp);
1518 		}
1519 	}
1520 
1521 	return (retval);
1522 }
1523 
1524 void
1525 sctp_congest_reset(sctp_t *sctp)
1526 {
1527 	sctp_faddr_t *fp;
1528 
1529 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
1530 		fp->ssthresh = sctp_initial_mtu;
1531 		fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial;
1532 		fp->suna = 0;
1533 		fp->pba = 0;
1534 	}
1535 }
1536 
1537 /*
1538  * Return zero if the buffers are identical in length and content.
1539  * This is used for comparing extension header buffers.
1540  * Note that an extension header would be declared different
1541  * even if all that changed was the next header value in that header i.e.
1542  * what really changed is the next extension header.
1543  */
1544 boolean_t
1545 sctp_cmpbuf(void *a, uint_t alen, boolean_t b_valid, void *b, uint_t blen)
1546 {
1547 	if (!b_valid)
1548 		blen = 0;
1549 
1550 	if (alen != blen)
1551 		return (B_TRUE);
1552 	if (alen == 0)
1553 		return (B_FALSE);	/* Both zero length */
1554 	return (bcmp(a, b, alen));
1555 }
1556 
1557 /*
1558  * Preallocate memory for sctp_savebuf(). Returns B_TRUE if ok.
1559  * Return B_FALSE if memory allocation fails - don't change any state!
1560  */
1561 boolean_t
1562 sctp_allocbuf(void **dstp, uint_t *dstlenp, boolean_t src_valid,
1563     void *src, uint_t srclen)
1564 {
1565 	void *dst;
1566 
1567 	if (!src_valid)
1568 		srclen = 0;
1569 
1570 	ASSERT(*dstlenp == 0);
1571 	if (src != NULL && srclen != 0) {
1572 		dst = mi_zalloc(srclen);
1573 		if (dst == NULL)
1574 			return (B_FALSE);
1575 	} else {
1576 		dst = NULL;
1577 	}
1578 	if (*dstp != NULL) {
1579 		mi_free(*dstp);
1580 		*dstp = NULL;
1581 		*dstlenp = 0;
1582 	}
1583 	*dstp = dst;
1584 	if (dst != NULL)
1585 		*dstlenp = srclen;
1586 	else
1587 		*dstlenp = 0;
1588 	return (B_TRUE);
1589 }
1590 
1591 /*
1592  * Replace what is in *dst, *dstlen with the source.
1593  * Assumes sctp_allocbuf has already been called.
1594  */
1595 void
1596 sctp_savebuf(void **dstp, uint_t *dstlenp, boolean_t src_valid,
1597     void *src, uint_t srclen)
1598 {
1599 	if (!src_valid)
1600 		srclen = 0;
1601 
1602 	ASSERT(*dstlenp == srclen);
1603 	if (src != NULL && srclen != 0) {
1604 		bcopy(src, *dstp, srclen);
1605 	}
1606 }
1607 
1608 static void
1609 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr)
1610 {
1611 	bcopy(addr, &fp->faddr, sizeof (*addr));
1612 	if (IN6_IS_ADDR_V4MAPPED(addr)) {
1613 		fp->isv4 = 1;
1614 		/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
1615 		fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr_len) &
1616 			~(SCTP_ALIGN - 1);
1617 	} else {
1618 		fp->isv4 = 0;
1619 		fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr6_len) &
1620 			~(SCTP_ALIGN - 1);
1621 	}
1622 	fp->cwnd = sctp_slow_start_initial * fp->sfa_pmss;
1623 	fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max);
1624 	fp->srtt = -1;
1625 	fp->rtt_updates = 0;
1626 	fp->strikes = 0;
1627 	fp->max_retr = sctp->sctp_pp_max_rxt;
1628 	/* Mark it as not confirmed. */
1629 	fp->state = SCTP_FADDRS_UNCONFIRMED;
1630 	fp->hb_interval = sctp->sctp_hb_interval;
1631 	fp->ssthresh = sctp_initial_ssthresh;
1632 	fp->suna = 0;
1633 	fp->pba = 0;
1634 	fp->acked = 0;
1635 	fp->lastactive = lbolt64;
1636 	fp->timer_mp = NULL;
1637 	fp->hb_pending = B_FALSE;
1638 	fp->timer_running = 0;
1639 	fp->df = 1;
1640 	fp->pmtu_discovered = 0;
1641 	fp->rc_timer_mp = NULL;
1642 	fp->rc_timer_running = 0;
1643 	fp->next = NULL;
1644 	fp->ire = NULL;
1645 	fp->T3expire = 0;
1646 	(void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret,
1647 	    sizeof (fp->hb_secret));
1648 	fp->hb_expiry = lbolt64;
1649 
1650 	sctp_ire2faddr(sctp, fp);
1651 }
1652 
1653 /*ARGSUSED*/
1654 static void
1655 faddr_destructor(void *buf, void *cdrarg)
1656 {
1657 	sctp_faddr_t *fp = buf;
1658 
1659 	ASSERT(fp->timer_mp == NULL);
1660 	ASSERT(fp->timer_running == 0);
1661 
1662 	ASSERT(fp->rc_timer_mp == NULL);
1663 	ASSERT(fp->rc_timer_running == 0);
1664 }
1665 
1666 void
1667 sctp_faddr_init()
1668 {
1669 	sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache",
1670 	    sizeof (sctp_faddr_t), 0, NULL, faddr_destructor,
1671 	    NULL, NULL, NULL, 0);
1672 }
1673 
1674 void
1675 sctp_faddr_fini()
1676 {
1677 	kmem_cache_destroy(sctp_kmem_faddr_cache);
1678 }
1679