xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_common.c (revision 544f04c0a127b3f42f33facf10edfc0e0d896bc3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/kmem.h>
35 #include <sys/socket.h>
36 #include <sys/random.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/ip6.h>
40 #include <netinet/sctp.h>
41 
42 #include <inet/common.h>
43 #include <inet/ip.h>
44 #include <inet/ip6.h>
45 #include <inet/ip_ire.h>
46 #include <inet/mi.h>
47 #include <inet/mib2.h>
48 #include <inet/nd.h>
49 #include <inet/optcom.h>
50 #include <inet/sctp_ip.h>
51 #include <inet/ipclassifier.h>
52 #include "sctp_impl.h"
53 #include "sctp_addr.h"
54 
55 static struct kmem_cache *sctp_kmem_faddr_cache;
56 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *);
57 
58 /* Set the source address.  Refer to comments in sctp_ire2faddr(). */
59 static void
60 set_saddr(sctp_t *sctp, sctp_faddr_t *fp, boolean_t v6)
61 {
62 	if (sctp->sctp_bound_to_all) {
63 		V6_SET_ZERO(fp->saddr);
64 	} else {
65 		fp->saddr = sctp_get_valid_addr(sctp, v6);
66 		if (!v6 && IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) ||
67 		    v6 && IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
68 			fp->state = SCTP_FADDRS_UNREACH;
69 			/* Disable heartbeat. */
70 			fp->hb_expiry = 0;
71 			fp->hb_pending = B_FALSE;
72 			fp->strikes = 0;
73 		}
74 	}
75 }
76 
77 /*
78  * Call this function to update the cached IRE of a peer addr fp.
79  */
80 void
81 sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp)
82 {
83 	ire_t *ire;
84 	ipaddr_t addr4;
85 	in6_addr_t laddr;
86 	sctp_saddr_ipif_t *sp;
87 	uint_t	ipif_seqid;
88 	int hdrlen;
89 
90 	/* Remove the previous cache IRE */
91 	if ((ire = fp->ire) != NULL) {
92 		IRE_REFRELE_NOTR(ire);
93 		fp->ire = NULL;
94 	}
95 
96 	/*
97 	 * If this addr is not reachable, mark it as unconfirmed for now, the
98 	 * state will be changed back to unreachable later in this function
99 	 * if it is still the case.
100 	 */
101 	if (fp->state == SCTP_FADDRS_UNREACH) {
102 		fp->state = SCTP_FADDRS_UNCONFIRMED;
103 	}
104 
105 	if (fp->isv4) {
106 		IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4);
107 
108 		ire = ire_cache_lookup(addr4, sctp->sctp_zoneid);
109 		if (ire == NULL) {
110 			dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n",
111 			    SCTP_PRINTADDR(fp->faddr)));
112 			/*
113 			 * It is tempting to just leave the src addr
114 			 * unspecified and let IP figure it out, but we
115 			 * *cannot* do this, since IP may choose a src addr
116 			 * that is not part of this association... unless
117 			 * this sctp has bound to all addrs.  So if the ire
118 			 * lookup fails, try to find one in our src addr
119 			 * list, unless the sctp has bound to all addrs, in
120 			 * which case we change the src addr to unspec.
121 			 *
122 			 * Note that if this is a v6 endpoint but it does
123 			 * not have any v4 address at this point (e.g. may
124 			 * have been  deleted), sctp_get_valid_addr() will
125 			 * return mapped INADDR_ANY.  In this case, this
126 			 * address should be marked not reachable so that
127 			 * it won't be used to send data.
128 			 */
129 			set_saddr(sctp, fp, B_FALSE);
130 			goto set_current;
131 		}
132 		ipif_seqid = ire->ire_ipif->ipif_seqid;
133 		dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ",
134 			SCTP_PRINTADDR(fp->faddr)));
135 		dprint(2, ("src = %x\n", ire->ire_src_addr));
136 		IN6_IPADDR_TO_V4MAPPED(ire->ire_src_addr, &laddr);
137 
138 		/* make sure the laddr is part of this association */
139 		if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) !=
140 		    NULL && !sp->saddr_ipif_dontsrc) {
141 			if (sp->saddr_ipif_unconfirmed == 1)
142 				sp->saddr_ipif_unconfirmed = 0;
143 			fp->saddr = laddr;
144 		} else {
145 			ip2dbg(("ire2faddr: src addr is not part of assc\n"));
146 			set_saddr(sctp, fp, B_FALSE);
147 		}
148 	} else {
149 		ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid);
150 		if (ire == NULL) {
151 			dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n",
152 			    SCTP_PRINTADDR(fp->faddr)));
153 			set_saddr(sctp, fp, B_TRUE);
154 			goto set_current;
155 		}
156 		ipif_seqid = ire->ire_ipif->ipif_seqid;
157 		dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ",
158 		    SCTP_PRINTADDR(fp->faddr)));
159 		dprint(2, ("src=%x:%x:%x:%x\n",
160 		    SCTP_PRINTADDR(ire->ire_src_addr_v6)));
161 		laddr = ire->ire_src_addr_v6;
162 
163 		/* make sure the laddr is part of this association */
164 
165 		if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) !=
166 		    NULL && !sp->saddr_ipif_dontsrc) {
167 			if (sp->saddr_ipif_unconfirmed == 1)
168 				sp->saddr_ipif_unconfirmed = 0;
169 			fp->saddr = laddr;
170 		} else {
171 			dprint(2, ("ire2faddr: src addr is not part "
172 				"of assc\n"));
173 			set_saddr(sctp, fp, B_TRUE);
174 		}
175 	}
176 
177 	/* Cache the IRE */
178 	IRE_REFHOLD_NOTR(ire);
179 	fp->ire = ire;
180 	if (fp->ire->ire_type == IRE_LOOPBACK && !sctp->sctp_loopback)
181 		sctp->sctp_loopback = 1;
182 	IRE_REFRELE(ire);
183 
184 	/*
185 	 * Pull out RTO information for this faddr and use it if we don't
186 	 * have any yet.
187 	 */
188 	if (fp->srtt == -1 && ire->ire_uinfo.iulp_rtt != 0) {
189 		/* The cached value is in ms. */
190 		fp->srtt = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt);
191 		fp->rttvar = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt_sd);
192 		fp->rto = 3 * fp->srtt;
193 
194 		/* Bound the RTO by configured min and max values */
195 		if (fp->rto < sctp->sctp_rto_min) {
196 			fp->rto = sctp->sctp_rto_min;
197 		}
198 		if (fp->rto > sctp->sctp_rto_max) {
199 			fp->rto = sctp->sctp_rto_max;
200 		}
201 	}
202 
203 	/*
204 	 * Record the MTU for this faddr. If the MTU for this faddr has
205 	 * changed, check if the assc MTU will also change.
206 	 */
207 	if (fp->isv4) {
208 		hdrlen = sctp->sctp_hdr_len;
209 	} else {
210 		hdrlen = sctp->sctp_hdr6_len;
211 	}
212 	if ((fp->sfa_pmss + hdrlen) != ire->ire_max_frag) {
213 		/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
214 		fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1);
215 		if (fp->cwnd < (fp->sfa_pmss * 2)) {
216 			fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial;
217 		}
218 	}
219 
220 set_current:
221 	if (fp == sctp->sctp_current) {
222 		sctp_faddr2hdraddr(fp, sctp);
223 		sctp->sctp_mss = fp->sfa_pmss;
224 		if (!SCTP_IS_DETACHED(sctp)) {
225 			sctp_set_ulp_prop(sctp);
226 		}
227 	}
228 }
229 
230 /*ARGSUSED*/
231 void
232 sctp_faddr2ire(sctp_t *sctp, sctp_faddr_t *fp)
233 {
234 	ire_t *ire;
235 
236 	if ((ire = fp->ire) == NULL) {
237 		return;
238 	}
239 
240 	mutex_enter(&ire->ire_lock);
241 
242 	/* If the cached IRE is going sway, there is no point to update it. */
243 	if (ire->ire_marks & IRE_MARK_CONDEMNED) {
244 		mutex_exit(&ire->ire_lock);
245 		IRE_REFRELE_NOTR(ire);
246 		fp->ire = NULL;
247 		return;
248 	}
249 
250 	/*
251 	 * Only record the PMTU for this faddr if we actually have
252 	 * done discovery. This prevents initialized default from
253 	 * clobbering any real info that IP may have.
254 	 */
255 	if (fp->pmtu_discovered) {
256 		if (fp->isv4) {
257 			ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr_len;
258 		} else {
259 			ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr6_len;
260 		}
261 	}
262 
263 	if (sctp_rtt_updates != 0 && fp->rtt_updates >= sctp_rtt_updates) {
264 		/*
265 		 * If there is no old cached values, initialize them
266 		 * conservatively.  Set them to be (1.5 * new value).
267 		 * This code copied from ip_ire_advise().  The cached
268 		 * value is in ms.
269 		 */
270 		if (ire->ire_uinfo.iulp_rtt != 0) {
271 			ire->ire_uinfo.iulp_rtt = (ire->ire_uinfo.iulp_rtt +
272 			    TICK_TO_MSEC(fp->srtt)) >> 1;
273 		} else {
274 			ire->ire_uinfo.iulp_rtt = TICK_TO_MSEC(fp->srtt +
275 			    (fp->srtt >> 1));
276 		}
277 		if (ire->ire_uinfo.iulp_rtt_sd != 0) {
278 			ire->ire_uinfo.iulp_rtt_sd =
279 			    (ire->ire_uinfo.iulp_rtt_sd +
280 			    TICK_TO_MSEC(fp->rttvar)) >> 1;
281 		} else {
282 			ire->ire_uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->rttvar +
283 			    (fp->rttvar >> 1));
284 		}
285 		fp->rtt_updates = 0;
286 	}
287 
288 	mutex_exit(&ire->ire_lock);
289 }
290 
291 /*
292  * The sender must set the total length in the IP header.
293  * If sendto == NULL, the current will be used.
294  */
295 mblk_t *
296 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer)
297 {
298 	mblk_t *mp;
299 	size_t ipsctplen;
300 	int isv4;
301 	sctp_faddr_t *fp;
302 
303 	ASSERT(sctp->sctp_current != NULL || sendto != NULL);
304 	if (sendto == NULL) {
305 		fp = sctp->sctp_current;
306 	} else {
307 		fp = sendto;
308 	}
309 	isv4 = fp->isv4;
310 
311 	/* Try to look for another IRE again. */
312 	if (fp->ire == NULL)
313 		sctp_ire2faddr(sctp, fp);
314 
315 	/* There is no suitable source address to use, return. */
316 	if (fp->state == SCTP_FADDRS_UNREACH)
317 		return (NULL);
318 
319 	if (isv4) {
320 		ipsctplen = sctp->sctp_hdr_len;
321 	} else {
322 		ipsctplen = sctp->sctp_hdr6_len;
323 	}
324 
325 	mp = allocb(ipsctplen + sctp_wroff_xtra + trailer, BPRI_MED);
326 	if (mp == NULL) {
327 		ip1dbg(("sctp_make_mp: error makign mp..\n"));
328 		return (NULL);
329 	}
330 	mp->b_rptr += sctp_wroff_xtra;
331 	mp->b_wptr = mp->b_rptr + ipsctplen;
332 
333 	ASSERT(OK_32PTR(mp->b_wptr));
334 
335 	if (isv4) {
336 		ipha_t *iph = (ipha_t *)mp->b_rptr;
337 
338 		bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen);
339 		if (fp != sctp->sctp_current) {
340 			/* fiddle with the dst addr */
341 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
342 			/* fix up src addr */
343 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
344 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
345 				    iph->ipha_src);
346 			} else if (sctp->sctp_bound_to_all) {
347 				iph->ipha_src = INADDR_ANY;
348 			}
349 		}
350 		/* set or clear the don't fragment bit */
351 		if (fp->df) {
352 			iph->ipha_fragment_offset_and_flags = htons(IPH_DF);
353 		} else {
354 			iph->ipha_fragment_offset_and_flags = 0;
355 		}
356 	} else {
357 		bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen);
358 		if (fp != sctp->sctp_current) {
359 			/* fiddle with the dst addr */
360 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
361 			/* fix up src addr */
362 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
363 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
364 			} else if (sctp->sctp_bound_to_all) {
365 				bzero(&((ip6_t *)(mp->b_rptr))->ip6_src,
366 				    sizeof (in6_addr_t));
367 			}
368 		}
369 	}
370 	ASSERT(sctp->sctp_connp != NULL);
371 
372 	/*
373 	 * IP will not free this IRE if it is condemned.  SCTP needs to
374 	 * free it.
375 	 */
376 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
377 		IRE_REFRELE_NOTR(fp->ire);
378 		fp->ire = NULL;
379 	}
380 	/* Stash the conn and ire ptr info. for IP */
381 	SCTP_STASH_IPINFO(mp, fp->ire);
382 
383 	return (mp);
384 }
385 
386 /*
387  * Notify upper layers about preferred write offset, write size.
388  */
389 void
390 sctp_set_ulp_prop(sctp_t *sctp)
391 {
392 	int hdrlen;
393 
394 	if (sctp->sctp_current->isv4) {
395 		hdrlen = sctp->sctp_hdr_len;
396 	} else {
397 		hdrlen = sctp->sctp_hdr6_len;
398 	}
399 	ASSERT(sctp->sctp_ulpd);
400 
401 	ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss);
402 	sctp->sctp_ulp_prop(sctp->sctp_ulpd,
403 	    sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t),
404 	    sctp->sctp_mss - sizeof (sctp_data_hdr_t));
405 }
406 
407 void
408 sctp_set_iplen(sctp_t *sctp, mblk_t *mp)
409 {
410 	uint16_t	sum = 0;
411 	ipha_t		*iph;
412 	ip6_t		*ip6h;
413 	mblk_t		*pmp = mp;
414 	boolean_t	isv4;
415 
416 	isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
417 	for (; pmp; pmp = pmp->b_cont)
418 		sum += pmp->b_wptr - pmp->b_rptr;
419 
420 	if (isv4) {
421 		iph = (ipha_t *)mp->b_rptr;
422 		iph->ipha_length = htons(sum);
423 	} else {
424 		ip6h = (ip6_t *)mp->b_rptr;
425 		ip6h->ip6_plen = htons(sum - ((char *)&sctp->sctp_ip6h[1] -
426 		    sctp->sctp_iphc6));
427 	}
428 }
429 
430 int
431 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2)
432 {
433 	int na1 = 0;
434 	int overlap = 0;
435 	int equal = 1;
436 	int onematch;
437 	sctp_faddr_t *fp1, *fp2;
438 
439 	for (fp1 = a1; fp1; fp1 = fp1->next) {
440 		onematch = 0;
441 		for (fp2 = a2; fp2; fp2 = fp2->next) {
442 			if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) {
443 				overlap++;
444 				onematch = 1;
445 				break;
446 			}
447 			if (!onematch) {
448 				equal = 0;
449 			}
450 		}
451 		na1++;
452 	}
453 
454 	if (equal) {
455 		return (SCTP_ADDR_EQUAL);
456 	}
457 	if (overlap == na1) {
458 		return (SCTP_ADDR_SUBSET);
459 	}
460 	if (overlap) {
461 		return (SCTP_ADDR_OVERLAP);
462 	}
463 	return (SCTP_ADDR_DISJOINT);
464 }
465 
466 /*
467  * Returns 0 on success, -1 on memory allocation failure. If sleep
468  * is true, should never fail.
469  * Caller must hold conn fanout lock.
470  */
471 int
472 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep)
473 {
474 	sctp_faddr_t *faddr;
475 
476 	dprint(4, ("add_faddr: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr),
477 	    sleep));
478 
479 	if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) {
480 		return (-1);
481 	}
482 
483 	sctp_init_faddr(sctp, faddr, addr);
484 	ASSERT(faddr->next == NULL);
485 
486 	/* tack it on to the end */
487 	if (sctp->sctp_lastfaddr != NULL) {
488 		sctp->sctp_lastfaddr->next = faddr;
489 	} else {
490 		/* list is empty */
491 		ASSERT(sctp->sctp_faddrs == NULL);
492 		sctp->sctp_faddrs = faddr;
493 	}
494 	sctp->sctp_lastfaddr = faddr;
495 
496 	return (0);
497 }
498 
499 /*
500  * Caller must hold conn fanout lock.
501  */
502 int
503 sctp_add_faddr_first(sctp_t *sctp, in6_addr_t *addr, int sleep)
504 {
505 	sctp_faddr_t *faddr;
506 
507 	dprint(4, ("add_faddr_first: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr),
508 	    sleep));
509 
510 	if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) {
511 		return (-1);
512 	}
513 	sctp_init_faddr(sctp, faddr, addr);
514 	ASSERT(faddr->next == NULL);
515 
516 	/* Put it at the beginning of the list */
517 	if (sctp->sctp_faddrs != NULL) {
518 		faddr->next = sctp->sctp_faddrs;
519 	} else {
520 		sctp->sctp_lastfaddr = faddr;
521 	}
522 	sctp->sctp_faddrs = faddr;
523 
524 	return (0);
525 }
526 
527 sctp_faddr_t *
528 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr)
529 {
530 	sctp_faddr_t *fp;
531 
532 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
533 		if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr))
534 			break;
535 	}
536 
537 	return (fp);
538 }
539 
540 sctp_faddr_t *
541 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr)
542 {
543 	for (; fp; fp = fp->next) {
544 		if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) {
545 			break;
546 		}
547 	}
548 
549 	return (fp);
550 }
551 
552 void
553 sctp_faddr2hdraddr(sctp_faddr_t *fp, sctp_t *sctp)
554 {
555 	if (fp->isv4) {
556 		IN6_V4MAPPED_TO_IPADDR(&fp->faddr,
557 		    sctp->sctp_ipha->ipha_dst);
558 		/* Must not allow unspec src addr if not bound to all */
559 		if (IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) &&
560 		    !sctp->sctp_bound_to_all) {
561 			/*
562 			 * set the src to the first v4 saddr and hope
563 			 * for the best
564 			 */
565 			fp->saddr = sctp_get_valid_addr(sctp, B_FALSE);
566 		}
567 		IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src);
568 		/* update don't fragment bit */
569 		if (fp->df) {
570 			sctp->sctp_ipha->ipha_fragment_offset_and_flags =
571 			    htons(IPH_DF);
572 		} else {
573 			sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0;
574 		}
575 	} else {
576 		sctp->sctp_ip6h->ip6_dst = fp->faddr;
577 		/* Must not allow unspec src addr if not bound to all */
578 		if (IN6_IS_ADDR_UNSPECIFIED(&fp->saddr) &&
579 		    !sctp->sctp_bound_to_all) {
580 			/*
581 			 * set the src to the first v6 saddr and hope
582 			 * for the best
583 			 */
584 			fp->saddr = sctp_get_valid_addr(sctp, B_TRUE);
585 		}
586 		sctp->sctp_ip6h->ip6_src = fp->saddr;
587 	}
588 }
589 
590 void
591 sctp_redo_faddr_srcs(sctp_t *sctp)
592 {
593 	sctp_faddr_t *fp;
594 
595 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
596 		sctp_ire2faddr(sctp, fp);
597 	}
598 
599 	sctp_faddr2hdraddr(sctp->sctp_current, sctp);
600 }
601 
602 void
603 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp)
604 {
605 	int64_t now = lbolt64;
606 
607 	fp->strikes = 0;
608 	sctp->sctp_strikes = 0;
609 	fp->lastactive = now;
610 	fp->hb_expiry = now + SET_HB_INTVL(fp);
611 	fp->hb_pending = B_FALSE;
612 	if (fp->state != SCTP_FADDRS_ALIVE) {
613 		fp->state = SCTP_FADDRS_ALIVE;
614 		sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0);
615 
616 		/* If this is the primary, switch back to it now */
617 		if (fp == sctp->sctp_primary) {
618 			sctp->sctp_current = fp;
619 			sctp->sctp_mss = fp->sfa_pmss;
620 			/* Reset the addrs in the composite header */
621 			sctp_faddr2hdraddr(fp, sctp);
622 			if (!SCTP_IS_DETACHED(sctp)) {
623 				sctp_set_ulp_prop(sctp);
624 			}
625 		}
626 	}
627 	if (fp->ire == NULL) {
628 		/* Should have a full IRE now */
629 		sctp_ire2faddr(sctp, fp);
630 	}
631 }
632 
633 int
634 sctp_is_a_faddr_clean(sctp_t *sctp)
635 {
636 	sctp_faddr_t *fp;
637 
638 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
639 		if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) {
640 			return (1);
641 		}
642 	}
643 
644 	return (0);
645 }
646 
647 /*
648  * Returns 0 if there is at leave one other active faddr, -1 if there
649  * are none. If there are none left, faddr_dead() will start killing the
650  * association.
651  * If the downed faddr was the current faddr, a new current faddr
652  * will be chosen.
653  */
654 int
655 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate)
656 {
657 	sctp_faddr_t *ofp;
658 
659 	if (fp->state == SCTP_FADDRS_ALIVE) {
660 		sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0);
661 	}
662 	fp->state = newstate;
663 
664 	dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n",
665 	    SCTP_PRINTADDR(fp->faddr), newstate));
666 
667 	if (fp == sctp->sctp_current) {
668 		/* Current faddr down; need to switch it */
669 		sctp->sctp_current = NULL;
670 	}
671 
672 	/* Find next alive faddr */
673 	ofp = fp;
674 	for (fp = fp->next; fp; fp = fp->next) {
675 		if (fp->state == SCTP_FADDRS_ALIVE) {
676 			break;
677 		}
678 	}
679 
680 	if (fp == NULL) {
681 		/* Continue from beginning of list */
682 		for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) {
683 			if (fp->state == SCTP_FADDRS_ALIVE) {
684 				break;
685 			}
686 		}
687 	}
688 
689 	if (fp != ofp) {
690 		if (sctp->sctp_current == NULL) {
691 			dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n",
692 			    SCTP_PRINTADDR(fp->faddr)));
693 			sctp->sctp_current = fp;
694 			sctp->sctp_mss = fp->sfa_pmss;
695 
696 			/* Reset the addrs in the composite header */
697 			sctp_faddr2hdraddr(fp, sctp);
698 
699 			if (!SCTP_IS_DETACHED(sctp)) {
700 				sctp_set_ulp_prop(sctp);
701 			}
702 		}
703 		return (0);
704 	}
705 
706 
707 	/* All faddrs are down; kill the association */
708 	dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n"));
709 	BUMP_MIB(&sctp_mib, sctpAborted);
710 	sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ?
711 	    SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL);
712 	sctp_clean_death(sctp, sctp->sctp_client_errno ?
713 	    sctp->sctp_client_errno : ETIMEDOUT);
714 
715 	return (-1);
716 }
717 
718 sctp_faddr_t *
719 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp)
720 {
721 	sctp_faddr_t *nfp = NULL;
722 
723 	if (ofp == NULL) {
724 		ofp = sctp->sctp_current;
725 	}
726 
727 	/* Find the next live one */
728 	for (nfp = ofp->next; nfp != NULL; nfp = nfp->next) {
729 		if (nfp->state == SCTP_FADDRS_ALIVE) {
730 			break;
731 		}
732 	}
733 
734 	if (nfp == NULL) {
735 		/* Continue from beginning of list */
736 		for (nfp = sctp->sctp_faddrs; nfp != ofp; nfp = nfp->next) {
737 			if (nfp->state == SCTP_FADDRS_ALIVE) {
738 				break;
739 			}
740 		}
741 	}
742 
743 	/*
744 	 * nfp could only be NULL if all faddrs are down, and when
745 	 * this happens, faddr_dead() should have killed the
746 	 * association. Hence this assertion...
747 	 */
748 	ASSERT(nfp != NULL);
749 	return (nfp);
750 }
751 
752 void
753 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp)
754 {
755 	sctp_faddr_t *fpp;
756 
757 	if (!sctp->sctp_faddrs) {
758 		return;
759 	}
760 
761 	if (fp->timer_mp != NULL) {
762 		sctp_timer_free(fp->timer_mp);
763 		fp->timer_mp = NULL;
764 		fp->timer_running = 0;
765 	}
766 	if (fp->rc_timer_mp != NULL) {
767 		sctp_timer_free(fp->rc_timer_mp);
768 		fp->rc_timer_mp = NULL;
769 		fp->rc_timer_running = 0;
770 	}
771 	if (fp->ire != NULL) {
772 		IRE_REFRELE_NOTR(fp->ire);
773 		fp->ire = NULL;
774 	}
775 
776 	if (fp == sctp->sctp_faddrs) {
777 		goto gotit;
778 	}
779 
780 	for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next)
781 		;
782 
783 gotit:
784 	ASSERT(sctp->sctp_conn_tfp != NULL);
785 	mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
786 	if (fp == sctp->sctp_faddrs) {
787 		sctp->sctp_faddrs = fp->next;
788 	} else {
789 		fpp->next = fp->next;
790 	}
791 	mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
792 	/* XXX faddr2ire? */
793 	kmem_cache_free(sctp_kmem_faddr_cache, fp);
794 }
795 
796 void
797 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock)
798 {
799 	sctp_faddr_t *fp, *fpn;
800 
801 	if (sctp->sctp_faddrs == NULL) {
802 		ASSERT(sctp->sctp_lastfaddr == NULL);
803 		return;
804 	}
805 
806 	ASSERT(sctp->sctp_lastfaddr != NULL);
807 	sctp->sctp_lastfaddr = NULL;
808 	sctp->sctp_current = NULL;
809 	sctp->sctp_primary = NULL;
810 
811 	sctp_free_faddr_timers(sctp);
812 
813 	if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
814 		/* in conn fanout; need to hold lock */
815 		mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
816 	}
817 
818 	for (fp = sctp->sctp_faddrs; fp; fp = fpn) {
819 		fpn = fp->next;
820 		if (fp->ire != NULL)
821 			IRE_REFRELE_NOTR(fp->ire);
822 		kmem_cache_free(sctp_kmem_faddr_cache, fp);
823 	}
824 
825 	sctp->sctp_faddrs = NULL;
826 
827 	if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
828 		mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
829 	}
830 
831 }
832 
833 void
834 sctp_zap_addrs(sctp_t *sctp)
835 {
836 	sctp_zap_faddrs(sctp, 0);
837 	sctp_free_saddrs(sctp);
838 }
839 
840 /*
841  * Initialize the IPv4 header. Loses any record of any IP options.
842  */
843 int
844 sctp_header_init_ipv4(sctp_t *sctp, int sleep)
845 {
846 	sctp_hdr_t	*sctph;
847 
848 	/*
849 	 * This is a simple initialization. If there's
850 	 * already a template, it should never be too small,
851 	 * so reuse it.  Otherwise, allocate space for the new one.
852 	 */
853 	if (sctp->sctp_iphc != NULL) {
854 		ASSERT(sctp->sctp_iphc_len >= SCTP_MAX_COMBINED_HEADER_LENGTH);
855 		bzero(sctp->sctp_iphc, sctp->sctp_iphc_len);
856 	} else {
857 		sctp->sctp_iphc_len = SCTP_MAX_COMBINED_HEADER_LENGTH;
858 		sctp->sctp_iphc = kmem_zalloc(sctp->sctp_iphc_len, sleep);
859 		if (sctp->sctp_iphc == NULL) {
860 			sctp->sctp_iphc_len = 0;
861 			return (ENOMEM);
862 		}
863 	}
864 
865 	sctp->sctp_ipha = (ipha_t *)sctp->sctp_iphc;
866 
867 	sctp->sctp_hdr_len = sizeof (ipha_t) + sizeof (sctp_hdr_t);
868 	sctp->sctp_ip_hdr_len = sizeof (ipha_t);
869 	sctp->sctp_ipha->ipha_length = htons(sizeof (ipha_t) +
870 	    sizeof (sctp_hdr_t));
871 	sctp->sctp_ipha->ipha_version_and_hdr_length
872 		= (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
873 
874 	/*
875 	 * These two fields should be zero, and are already set above.
876 	 *
877 	 * sctp->sctp_ipha->ipha_ident,
878 	 * sctp->sctp_ipha->ipha_fragment_offset_and_flags.
879 	 */
880 
881 	sctp->sctp_ipha->ipha_ttl = sctp_ipv4_ttl;
882 	sctp->sctp_ipha->ipha_protocol = IPPROTO_SCTP;
883 
884 	sctph = (sctp_hdr_t *)(sctp->sctp_iphc + sizeof (ipha_t));
885 	sctp->sctp_sctph = sctph;
886 
887 	return (0);
888 }
889 
890 /*
891  * Update sctp_sticky_hdrs based on sctp_sticky_ipp.
892  * The headers include ip6i_t (if needed), ip6_t, any sticky extension
893  * headers, and the maximum size sctp header (to avoid reallocation
894  * on the fly for additional sctp options).
895  * Returns failure if can't allocate memory.
896  */
897 int
898 sctp_build_hdrs(sctp_t *sctp)
899 {
900 	char		*hdrs;
901 	uint_t		hdrs_len;
902 	ip6i_t		*ip6i;
903 	char		buf[SCTP_MAX_HDR_LENGTH];
904 	ip6_pkt_t	*ipp = &sctp->sctp_sticky_ipp;
905 	in6_addr_t	src;
906 	in6_addr_t	dst;
907 	uint8_t		hoplimit;
908 	/*
909 	 * save the existing sctp header and source/dest IP addresses
910 	 */
911 	bcopy(sctp->sctp_sctph6, buf, sizeof (sctp_hdr_t));
912 	src = sctp->sctp_ip6h->ip6_src;
913 	dst = sctp->sctp_ip6h->ip6_dst;
914 	hoplimit = sctp->sctp_ip6h->ip6_hops;
915 	hdrs_len = ip_total_hdrs_len_v6(ipp) + SCTP_MAX_HDR_LENGTH;
916 	ASSERT(hdrs_len != 0);
917 	if (hdrs_len > sctp->sctp_iphc6_len) {
918 		/* Need to reallocate */
919 		hdrs = kmem_zalloc(hdrs_len, KM_NOSLEEP);
920 		if (hdrs == NULL)
921 			return (ENOMEM);
922 
923 		if (sctp->sctp_iphc6_len != 0)
924 			kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
925 		sctp->sctp_iphc6 = hdrs;
926 		sctp->sctp_iphc6_len = hdrs_len;
927 	}
928 	ip_build_hdrs_v6((uchar_t *)sctp->sctp_iphc6,
929 	    hdrs_len - SCTP_MAX_HDR_LENGTH, ipp, IPPROTO_SCTP);
930 
931 	/* Set header fields not in ipp */
932 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
933 		ip6i = (ip6i_t *)sctp->sctp_iphc6;
934 		sctp->sctp_ip6h = (ip6_t *)&ip6i[1];
935 	} else {
936 		sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6;
937 	}
938 	/*
939 	 * sctp->sctp_ip_hdr_len will include ip6i_t if there is one.
940 	 */
941 	sctp->sctp_ip_hdr6_len = hdrs_len - SCTP_MAX_HDR_LENGTH;
942 	sctp->sctp_sctph6 = (sctp_hdr_t *)(sctp->sctp_iphc6 +
943 	    sctp->sctp_ip_hdr6_len);
944 	sctp->sctp_hdr6_len = sctp->sctp_ip_hdr6_len + sizeof (sctp_hdr_t);
945 
946 	bcopy(buf, sctp->sctp_sctph6, sizeof (sctp_hdr_t));
947 
948 	sctp->sctp_ip6h->ip6_src = src;
949 	sctp->sctp_ip6h->ip6_dst = dst;
950 	/*
951 	 * If IPV6_HOPLIMIT was set in ipp, use that value.
952 	 * For sticky options, if it does not exist use
953 	 * the default/saved value (which was set in ip_build_hdrs_v6())
954 	 * All this as per RFC 2922.
955 	 */
956 	if (!(ipp->ipp_fields & IPPF_HOPLIMIT))
957 		sctp->sctp_ip6h->ip6_hops = hoplimit;
958 	/*
959 	 * Set the IPv6 header payload length.
960 	 * If there's an ip6i_t included, don't count it in the length.
961 	 */
962 	sctp->sctp_ip6h->ip6_plen = sctp->sctp_hdr6_len - IPV6_HDR_LEN;
963 	if (ipp->ipp_fields & IPPF_HAS_IP6I)
964 		sctp->sctp_ip6h->ip6_plen -= sizeof (ip6i_t);
965 	/*
966 	 * If we're setting extension headers after a connection
967 	 * has been established, and if we have a routing header
968 	 * among the extension headers, call ip_massage_options_v6 to
969 	 * manipulate the routing header/ip6_dst set the checksum
970 	 * difference in the sctp header template.
971 	 * (This happens in sctp_connect_ipv6 if the routing header
972 	 * is set prior to the connect.)
973 	 */
974 
975 	if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) &&
976 	    (sctp->sctp_sticky_ipp.ipp_fields & IPPF_RTHDR)) {
977 		ip6_rthdr_t *rth;
978 
979 		rth = ip_find_rthdr_v6(sctp->sctp_ip6h,
980 		    (uint8_t *)sctp->sctp_sctph6);
981 		if (rth != NULL)
982 			(void) ip_massage_options_v6(sctp->sctp_ip6h, rth);
983 	}
984 	return (0);
985 }
986 
987 /*
988  * Initialize the IPv6 header. Loses any record of any IPv6 extension headers.
989  */
990 int
991 sctp_header_init_ipv6(sctp_t *sctp, int sleep)
992 {
993 	sctp_hdr_t	*sctph;
994 
995 	/*
996 	 * This is a simple initialization. If there's
997 	 * already a template, it should never be too small,
998 	 * so reuse it. Otherwise, allocate space for the new one.
999 	 * Ensure that there is enough space to "downgrade" the sctp_t
1000 	 * to an IPv4 sctp_t. This requires having space for a full load
1001 	 * of IPv4 options
1002 	 */
1003 	if (sctp->sctp_iphc6 != NULL) {
1004 		ASSERT(sctp->sctp_iphc6_len >=
1005 		    SCTP_MAX_COMBINED_HEADER_LENGTH);
1006 		bzero(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
1007 	} else {
1008 		sctp->sctp_iphc6_len = SCTP_MAX_COMBINED_HEADER_LENGTH;
1009 		sctp->sctp_iphc6 = kmem_zalloc(sctp->sctp_iphc_len, sleep);
1010 		if (sctp->sctp_iphc6 == NULL) {
1011 			sctp->sctp_iphc6_len = 0;
1012 			return (ENOMEM);
1013 		}
1014 	}
1015 	sctp->sctp_hdr6_len = IPV6_HDR_LEN + sizeof (sctp_hdr_t);
1016 	sctp->sctp_ip_hdr6_len = IPV6_HDR_LEN;
1017 	sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6;
1018 
1019 	/* Initialize the header template */
1020 
1021 	sctp->sctp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1022 	sctp->sctp_ip6h->ip6_plen = ntohs(sizeof (sctp_hdr_t));
1023 	sctp->sctp_ip6h->ip6_nxt = IPPROTO_SCTP;
1024 	sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit;
1025 
1026 	sctph = (sctp_hdr_t *)(sctp->sctp_iphc6 + IPV6_HDR_LEN);
1027 	sctp->sctp_sctph6 = sctph;
1028 
1029 	return (0);
1030 }
1031 
1032 /*
1033  * XXX implement more sophisticated logic
1034  */
1035 void
1036 sctp_set_hdraddrs(sctp_t *sctp)
1037 {
1038 	sctp_faddr_t *fp;
1039 	int gotv4 = 0;
1040 	int gotv6 = 0;
1041 
1042 	ASSERT(sctp->sctp_faddrs != NULL);
1043 	ASSERT(sctp->sctp_nsaddrs > 0);
1044 
1045 	/* Set up using the primary first */
1046 	if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) {
1047 		IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->faddr,
1048 		    sctp->sctp_ipha->ipha_dst);
1049 		/* saddr may be unspec; make_mp() will handle this */
1050 		IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->saddr,
1051 		    sctp->sctp_ipha->ipha_src);
1052 		gotv4 = 1;
1053 		if (sctp->sctp_ipversion == IPV4_VERSION) {
1054 			goto copyports;
1055 		}
1056 	} else {
1057 		sctp->sctp_ip6h->ip6_dst = sctp->sctp_primary->faddr;
1058 		/* saddr may be unspec; make_mp() will handle this */
1059 		sctp->sctp_ip6h->ip6_src = sctp->sctp_primary->saddr;
1060 		gotv6 = 1;
1061 	}
1062 
1063 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
1064 		if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) {
1065 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr,
1066 			    sctp->sctp_ipha->ipha_dst);
1067 			/* copy in the faddr_t's saddr */
1068 			IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
1069 			    sctp->sctp_ipha->ipha_src);
1070 			gotv4 = 1;
1071 			if (sctp->sctp_ipversion == IPV4_VERSION || gotv6) {
1072 				break;
1073 			}
1074 		} else if (!gotv6) {
1075 			sctp->sctp_ip6h->ip6_dst = fp->faddr;
1076 			/* copy in the faddr_t's saddr */
1077 			sctp->sctp_ip6h->ip6_src = fp->saddr;
1078 			gotv6 = 1;
1079 			if (gotv4) {
1080 				break;
1081 			}
1082 		}
1083 	}
1084 
1085 copyports:
1086 	/* copy in the ports for good measure */
1087 	sctp->sctp_sctph->sh_sport = sctp->sctp_lport;
1088 	sctp->sctp_sctph->sh_dport = sctp->sctp_fport;
1089 
1090 	sctp->sctp_sctph6->sh_sport = sctp->sctp_lport;
1091 	sctp->sctp_sctph6->sh_dport = sctp->sctp_fport;
1092 }
1093 
1094 void
1095 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp)
1096 {
1097 	mblk_t *mp;
1098 	sctp_parm_hdr_t *ph;
1099 	size_t len;
1100 	int pad;
1101 
1102 	len = sizeof (*ph) + ntohs(uph->sph_len);
1103 	if ((pad = len % 4) != 0) {
1104 		pad = 4 - pad;
1105 		len += pad;
1106 	}
1107 	mp = allocb(len, BPRI_MED);
1108 	if (mp == NULL) {
1109 		return;
1110 	}
1111 
1112 	ph = (sctp_parm_hdr_t *)(mp->b_rptr);
1113 	ph->sph_type = htons(PARM_UNRECOGNIZED);
1114 	ph->sph_len = htons(len - pad);
1115 
1116 	/* copy in the unrecognized parameter */
1117 	bcopy(uph, ph + 1, ntohs(uph->sph_len));
1118 
1119 	mp->b_wptr = mp->b_rptr + len;
1120 	if (*errmp != NULL) {
1121 		linkb(*errmp, mp);
1122 	} else {
1123 		*errmp = mp;
1124 	}
1125 }
1126 
1127 /*
1128  * o Bounds checking
1129  * o Updates remaining
1130  * o Checks alignment
1131  */
1132 sctp_parm_hdr_t *
1133 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining)
1134 {
1135 	int pad;
1136 	uint16_t len;
1137 
1138 	len = ntohs(current->sph_len);
1139 	*remaining -= len;
1140 	if (*remaining < sizeof (*current) || len < sizeof (*current)) {
1141 		return (NULL);
1142 	}
1143 	if ((pad = len & (SCTP_ALIGN - 1)) != 0) {
1144 		pad = SCTP_ALIGN - pad;
1145 		*remaining -= pad;
1146 	}
1147 	/*LINTED pointer cast may result in improper alignment*/
1148 	current = (sctp_parm_hdr_t *)((char *)current + len + pad);
1149 	return (current);
1150 }
1151 
1152 /*
1153  * Sets the address parameters given in the INIT chunk into sctp's
1154  * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are
1155  * no address parameters in the INIT chunk, a single faddr is created
1156  * from the ip hdr at the beginning of pkt.
1157  * If there already are existing addresses hanging from sctp, merge
1158  * them in, if the old info contains addresses which are not present
1159  * in this new info, get rid of them, and clean the pointers if there's
1160  * messages which have this as their target address.
1161  *
1162  * We also re-adjust the source address list here since the list may
1163  * contain more than what is actually part of the association. If
1164  * we get here from sctp_send_cookie_echo(), we are on the active
1165  * side and psctp will be NULL and ich will be the INIT-ACK chunk.
1166  * If we get here from sctp_accept_comm(), ich will be the INIT chunk
1167  * and psctp will the listening endpoint.
1168  *
1169  * INIT processing: When processing the INIT we inherit the src address
1170  * list from the listener. For a loopback or linklocal association, we
1171  * delete the list and just take the address from the IP header (since
1172  * that's how we created the INIT-ACK). Additionally, for loopback we
1173  * ignore the address params in the INIT. For determining which address
1174  * types were sent in the INIT-ACK we follow the same logic as in
1175  * creating the INIT-ACK. We delete addresses of the type that are not
1176  * supported by the peer.
1177  *
1178  * INIT-ACK processing: When processing the INIT-ACK since we had not
1179  * included addr params for loopback or linklocal addresses when creating
1180  * the INIT, we just use the address from the IP header. Further, for
1181  * loopback we ignore the addr param list. We mark addresses of the
1182  * type not supported by the peer as unconfirmed.
1183  *
1184  * In case of INIT processing we look for supported address types in the
1185  * supported address param, if present. In both cases the address type in
1186  * the IP header is supported as well as types for addresses in the param
1187  * list, if any.
1188  *
1189  * Once we have the supported address types sctp_check_saddr() runs through
1190  * the source address list and deletes or marks as unconfirmed address of
1191  * types not supported by the peer.
1192  *
1193  * Returns 0 on success, sys errno on failure
1194  */
1195 int
1196 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt,
1197     sctp_chunk_hdr_t *ich, uint_t *sctp_options)
1198 {
1199 	sctp_init_chunk_t	*init;
1200 	ipha_t			*iph;
1201 	ip6_t			*ip6h;
1202 	in6_addr_t		hdrsaddr[1];
1203 	in6_addr_t		hdrdaddr[1];
1204 	sctp_parm_hdr_t		*ph;
1205 	ssize_t			remaining;
1206 	int			isv4;
1207 	int			err;
1208 	sctp_faddr_t		*fp;
1209 	int			supp_af = 0;
1210 	boolean_t		check_saddr = B_TRUE;
1211 
1212 	if (sctp_options != NULL)
1213 		*sctp_options = 0;
1214 
1215 	/* extract the address from the IP header */
1216 	isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
1217 	if (isv4) {
1218 		iph = (ipha_t *)pkt->b_rptr;
1219 		IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr);
1220 		IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr);
1221 		supp_af |= PARM_SUPP_V4;
1222 	} else {
1223 		ip6h = (ip6_t *)pkt->b_rptr;
1224 		hdrsaddr[0] = ip6h->ip6_src;
1225 		hdrdaddr[0] = ip6h->ip6_dst;
1226 		supp_af |= PARM_SUPP_V6;
1227 	}
1228 
1229 	/*
1230 	 * Unfortunately, we can't delay this because adding an faddr
1231 	 * looks for the presence of the source address (from the ire
1232 	 * for the faddr) in the source address list. We could have
1233 	 * delayed this if, say, this was a loopback/linklocal connection.
1234 	 * Now, we just end up nuking this list and taking the addr from
1235 	 * the IP header for loopback/linklocal.
1236 	 */
1237 	if (psctp != NULL && psctp->sctp_nsaddrs > 0) {
1238 		ASSERT(sctp->sctp_nsaddrs == 0);
1239 
1240 		err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP);
1241 		if (err != 0)
1242 			return (err);
1243 	}
1244 	/*
1245 	 * We will add the faddr before parsing the address list as this
1246 	 * might be a loopback connection and we would not have to
1247 	 * go through the list.
1248 	 *
1249 	 * Make sure the header's addr is in the list
1250 	 */
1251 	fp = sctp_lookup_faddr(sctp, hdrsaddr);
1252 	if (fp == NULL) {
1253 		/* not included; add it now */
1254 		if (sctp_add_faddr_first(sctp, hdrsaddr, KM_NOSLEEP) == -1)
1255 			return (ENOMEM);
1256 
1257 		/* sctp_faddrs will be the hdr addr */
1258 		fp = sctp->sctp_faddrs;
1259 	}
1260 	/* make the header addr the primary */
1261 	sctp->sctp_primary = fp;
1262 	sctp->sctp_current = fp;
1263 	sctp->sctp_mss = fp->sfa_pmss;
1264 
1265 	/* For loopback connections & linklocal get address from the header */
1266 	if (sctp->sctp_loopback || sctp->sctp_linklocal) {
1267 		if (sctp->sctp_nsaddrs != 0)
1268 			sctp_free_saddrs(sctp);
1269 		if ((err = sctp_saddr_add_addr(sctp, hdrdaddr)) != 0)
1270 			return (err);
1271 		/* For loopback ignore address list */
1272 		if (sctp->sctp_loopback)
1273 			return (0);
1274 		check_saddr = B_FALSE;
1275 	}
1276 
1277 	/* Walk the params in the INIT [ACK], pulling out addr params */
1278 	remaining = ntohs(ich->sch_len) - sizeof (*ich) -
1279 	    sizeof (sctp_init_chunk_t);
1280 	if (remaining < sizeof (*ph)) {
1281 		if (check_saddr) {
1282 			sctp_check_saddr(sctp, supp_af, psctp == NULL ?
1283 			    B_FALSE : B_TRUE);
1284 		}
1285 		ASSERT(sctp_saddr_lookup(sctp, hdrdaddr) != NULL);
1286 		return (0);
1287 	}
1288 
1289 	init = (sctp_init_chunk_t *)(ich + 1);
1290 	ph = (sctp_parm_hdr_t *)(init + 1);
1291 
1292 	/* params will have already been byteordered when validating */
1293 	while (ph != NULL) {
1294 		if (ph->sph_type == htons(PARM_SUPP_ADDRS)) {
1295 			int		plen;
1296 			uint16_t	*p;
1297 			uint16_t	addrtype;
1298 
1299 			ASSERT(psctp != NULL);
1300 			plen = ntohs(ph->sph_len);
1301 			p = (uint16_t *)(ph + 1);
1302 			while (plen > 0) {
1303 				addrtype = ntohs(*p);
1304 				switch (addrtype) {
1305 					case PARM_ADDR6:
1306 						supp_af |= PARM_SUPP_V6;
1307 						break;
1308 					case PARM_ADDR4:
1309 						supp_af |= PARM_SUPP_V4;
1310 						break;
1311 					default:
1312 						break;
1313 				}
1314 				p++;
1315 				plen -= sizeof (*p);
1316 			}
1317 		} else if (ph->sph_type == htons(PARM_ADDR4)) {
1318 			if (remaining >= PARM_ADDR4_LEN) {
1319 				in6_addr_t addr;
1320 				ipaddr_t ta;
1321 
1322 				supp_af |= PARM_SUPP_V4;
1323 				/*
1324 				 * Screen out broad/multicasts & loopback.
1325 				 * If the endpoint only accepts v6 address,
1326 				 * go to the next one.
1327 				 */
1328 				bcopy(ph + 1, &ta, sizeof (ta));
1329 				if (ta == 0 ||
1330 				    ta == INADDR_BROADCAST ||
1331 				    ta == htonl(INADDR_LOOPBACK) ||
1332 				    IN_MULTICAST(ta) ||
1333 				    sctp->sctp_connp->conn_ipv6_v6only) {
1334 					goto next;
1335 				}
1336 				/*
1337 				 * XXX also need to check for subnet
1338 				 * broadcasts. This should probably
1339 				 * wait until we have full access
1340 				 * to the ILL tables.
1341 				 */
1342 
1343 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
1344 				    (ph + 1), &addr);
1345 				/* Check for duplicate. */
1346 				if (sctp_lookup_faddr(sctp, &addr) != NULL)
1347 					goto next;
1348 
1349 				/* OK, add it to the faddr set */
1350 				if (sctp_add_faddr(sctp, &addr,
1351 					KM_NOSLEEP) != 0) {
1352 					return (ENOMEM);
1353 				}
1354 			}
1355 		} else if (ph->sph_type == htons(PARM_ADDR6) &&
1356 		    sctp->sctp_family == AF_INET6) {
1357 			/* An v4 socket should not take v6 addresses. */
1358 			if (remaining >= PARM_ADDR6_LEN) {
1359 				in6_addr_t *addr6;
1360 
1361 				supp_af |= PARM_SUPP_V6;
1362 				addr6 = (in6_addr_t *)(ph + 1);
1363 				/*
1364 				 * Screen out link locals, mcast, loopback
1365 				 * and bogus v6 address.
1366 				 */
1367 				if (IN6_IS_ADDR_LINKLOCAL(addr6) ||
1368 				    IN6_IS_ADDR_MULTICAST(addr6) ||
1369 				    IN6_IS_ADDR_LOOPBACK(addr6) ||
1370 				    IN6_IS_ADDR_V4MAPPED(addr6)) {
1371 					goto next;
1372 				}
1373 				/* Check for duplicate. */
1374 				if (sctp_lookup_faddr(sctp, addr6) != NULL)
1375 					goto next;
1376 
1377 				if (sctp_add_faddr(sctp,
1378 				    (in6_addr_t *)(ph + 1), KM_NOSLEEP) != 0) {
1379 					return (ENOMEM);
1380 				}
1381 			}
1382 		} else if (ph->sph_type == htons(PARM_FORWARD_TSN)) {
1383 			if (sctp_options != NULL)
1384 				*sctp_options |= SCTP_PRSCTP_OPTION;
1385 		} /* else; skip */
1386 
1387 next:
1388 		ph = sctp_next_parm(ph, &remaining);
1389 	}
1390 	if (check_saddr) {
1391 		sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE :
1392 		    B_TRUE);
1393 	}
1394 	ASSERT(sctp_saddr_lookup(sctp, hdrdaddr) != NULL);
1395 	return (0);
1396 }
1397 
1398 /*
1399  * Returns 0 if the check failed and the restart should be refused,
1400  * 1 if the check succeeded.
1401  */
1402 int
1403 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports,
1404     int sleep)
1405 {
1406 	sctp_faddr_t *fp, *fpa, *fphead = NULL;
1407 	sctp_parm_hdr_t *ph;
1408 	ssize_t remaining;
1409 	int isv4;
1410 	ipha_t *iph;
1411 	ip6_t *ip6h;
1412 	in6_addr_t hdraddr[1];
1413 	int retval = 0;
1414 	sctp_tf_t *tf;
1415 	sctp_t *sctp;
1416 	int compres;
1417 	sctp_init_chunk_t *init;
1418 	int nadded = 0;
1419 
1420 	/* extract the address from the IP header */
1421 	isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
1422 	if (isv4) {
1423 		iph = (ipha_t *)pkt->b_rptr;
1424 		IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr);
1425 	} else {
1426 		ip6h = (ip6_t *)pkt->b_rptr;
1427 		hdraddr[0] = ip6h->ip6_src;
1428 	}
1429 
1430 	/* Walk the params in the INIT [ACK], pulling out addr params */
1431 	remaining = ntohs(ich->sch_len) - sizeof (*ich) -
1432 	    sizeof (sctp_init_chunk_t);
1433 	if (remaining < sizeof (*ph)) {
1434 		/* no parameters; restart OK */
1435 		return (1);
1436 	}
1437 	init = (sctp_init_chunk_t *)(ich + 1);
1438 	ph = (sctp_parm_hdr_t *)(init + 1);
1439 
1440 	while (ph != NULL) {
1441 		/* params will have already been byteordered when validating */
1442 		if (ph->sph_type == htons(PARM_ADDR4)) {
1443 			if (remaining >= PARM_ADDR4_LEN) {
1444 				in6_addr_t addr;
1445 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
1446 				    (ph + 1), &addr);
1447 				fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
1448 				    sleep);
1449 				if (!fpa) {
1450 					goto done;
1451 				}
1452 				bzero(fpa, sizeof (*fpa));
1453 				fpa->faddr = addr;
1454 				fpa->next = NULL;
1455 			}
1456 		} else if (ph->sph_type == htons(PARM_ADDR6)) {
1457 			if (remaining >= PARM_ADDR6_LEN) {
1458 				fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
1459 				    sleep);
1460 				if (!fpa) {
1461 					goto done;
1462 				}
1463 				bzero(fpa, sizeof (*fpa));
1464 				bcopy(ph + 1, &fpa->faddr,
1465 				    sizeof (fpa->faddr));
1466 				fpa->next = NULL;
1467 			}
1468 		} else {
1469 			/* else not addr param; skip */
1470 			fpa = NULL;
1471 		}
1472 		/* link in the new addr, if it was an addr param */
1473 		if (fpa) {
1474 			if (!fphead) {
1475 				fphead = fpa;
1476 				fp = fphead;
1477 			} else {
1478 				fp->next = fpa;
1479 				fp = fpa;
1480 			}
1481 		}
1482 
1483 		ph = sctp_next_parm(ph, &remaining);
1484 	}
1485 
1486 	if (fphead == NULL) {
1487 		/* no addr parameters; restart OK */
1488 		return (1);
1489 	}
1490 
1491 	/*
1492 	 * got at least one; make sure the header's addr is
1493 	 * in the list
1494 	 */
1495 	fp = sctp_lookup_faddr_nosctp(fphead, hdraddr);
1496 	if (!fp) {
1497 		/* not included; add it now */
1498 		fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep);
1499 		if (!fp) {
1500 			goto done;
1501 		}
1502 		bzero(fp, sizeof (*fp));
1503 		fp->faddr = *hdraddr;
1504 		fp->next = fphead;
1505 		fphead = fp;
1506 	}
1507 
1508 	/*
1509 	 * Now, we can finally do the check: For each sctp instance
1510 	 * on the hash line for ports, compare its faddr set against
1511 	 * the new one. If the new one is a strict subset of any
1512 	 * existing sctp's faddrs, the restart is OK. However, if there
1513 	 * is an overlap, this could be an attack, so return failure.
1514 	 * If all sctp's faddrs are disjoint, this is a legitimate new
1515 	 * association.
1516 	 */
1517 	tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
1518 	mutex_enter(&tf->tf_lock);
1519 
1520 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
1521 		if (ports != sctp->sctp_ports) {
1522 			continue;
1523 		}
1524 		compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs);
1525 		if (compres <= SCTP_ADDR_SUBSET) {
1526 			retval = 1;
1527 			mutex_exit(&tf->tf_lock);
1528 			goto done;
1529 		}
1530 		if (compres == SCTP_ADDR_OVERLAP) {
1531 			dprint(1,
1532 			    ("new assoc from %x:%x:%x:%x overlaps with %p\n",
1533 			    SCTP_PRINTADDR(*hdraddr), sctp));
1534 			/*
1535 			 * While we still hold the lock, we need to
1536 			 * figure out which addresses have been
1537 			 * added so we can include them in the abort
1538 			 * we will send back. Since these faddrs will
1539 			 * never be used, we overload the rto field
1540 			 * here, setting it to 0 if the address was
1541 			 * not added, 1 if it was added.
1542 			 */
1543 			for (fp = fphead; fp; fp = fp->next) {
1544 				if (sctp_lookup_faddr(sctp, &fp->faddr)) {
1545 					fp->rto = 0;
1546 				} else {
1547 					fp->rto = 1;
1548 					nadded++;
1549 				}
1550 			}
1551 			mutex_exit(&tf->tf_lock);
1552 			goto done;
1553 		}
1554 	}
1555 	mutex_exit(&tf->tf_lock);
1556 
1557 	/* All faddrs are disjoint; legit new association */
1558 	retval = 1;
1559 
1560 done:
1561 	/* If are attempted adds, send back an abort listing the addrs */
1562 	if (nadded > 0) {
1563 		void *dtail;
1564 		size_t dlen;
1565 
1566 		dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP);
1567 		if (dtail == NULL) {
1568 			goto cleanup;
1569 		}
1570 
1571 		ph = dtail;
1572 		dlen = 0;
1573 		for (fp = fphead; fp; fp = fp->next) {
1574 			if (fp->rto == 0) {
1575 				continue;
1576 			}
1577 			if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) {
1578 				ipaddr_t addr4;
1579 
1580 				ph->sph_type = htons(PARM_ADDR4);
1581 				ph->sph_len = htons(PARM_ADDR4_LEN);
1582 				IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4);
1583 				ph++;
1584 				bcopy(&addr4, ph, sizeof (addr4));
1585 				ph = (sctp_parm_hdr_t *)
1586 				    ((char *)ph + sizeof (addr4));
1587 				dlen += PARM_ADDR4_LEN;
1588 			} else {
1589 				ph->sph_type = htons(PARM_ADDR6);
1590 				ph->sph_len = htons(PARM_ADDR6_LEN);
1591 				ph++;
1592 				bcopy(&fp->faddr, ph, sizeof (fp->faddr));
1593 				ph = (sctp_parm_hdr_t *)
1594 				    ((char *)ph + sizeof (fp->faddr));
1595 				dlen += PARM_ADDR6_LEN;
1596 			}
1597 		}
1598 
1599 		/* Send off the abort */
1600 		sctp_send_abort(sctp, sctp_init2vtag(ich),
1601 		    SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE);
1602 
1603 		kmem_free(dtail, PARM_ADDR6_LEN * nadded);
1604 	}
1605 
1606 cleanup:
1607 	/* Clean up */
1608 	if (fphead) {
1609 		sctp_faddr_t *fpn;
1610 		for (fp = fphead; fp; fp = fpn) {
1611 			fpn = fp->next;
1612 			kmem_cache_free(sctp_kmem_faddr_cache, fp);
1613 		}
1614 	}
1615 
1616 	return (retval);
1617 }
1618 
1619 void
1620 sctp_congest_reset(sctp_t *sctp)
1621 {
1622 	sctp_faddr_t *fp;
1623 
1624 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
1625 		fp->ssthresh = sctp_initial_mtu;
1626 		fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial;
1627 		fp->suna = 0;
1628 		fp->pba = 0;
1629 	}
1630 }
1631 
1632 /*
1633  * Return zero if the buffers are identical in length and content.
1634  * This is used for comparing extension header buffers.
1635  * Note that an extension header would be declared different
1636  * even if all that changed was the next header value in that header i.e.
1637  * what really changed is the next extension header.
1638  */
1639 boolean_t
1640 sctp_cmpbuf(void *a, uint_t alen, boolean_t b_valid, void *b, uint_t blen)
1641 {
1642 	if (!b_valid)
1643 		blen = 0;
1644 
1645 	if (alen != blen)
1646 		return (B_TRUE);
1647 	if (alen == 0)
1648 		return (B_FALSE);	/* Both zero length */
1649 	return (bcmp(a, b, alen));
1650 }
1651 
1652 /*
1653  * Preallocate memory for sctp_savebuf(). Returns B_TRUE if ok.
1654  * Return B_FALSE if memory allocation fails - don't change any state!
1655  */
1656 boolean_t
1657 sctp_allocbuf(void **dstp, uint_t *dstlenp, boolean_t src_valid,
1658     void *src, uint_t srclen)
1659 {
1660 	void *dst;
1661 
1662 	if (!src_valid)
1663 		srclen = 0;
1664 
1665 	ASSERT(*dstlenp == 0);
1666 	if (src != NULL && srclen != 0) {
1667 		dst = mi_zalloc(srclen);
1668 		if (dst == NULL)
1669 			return (B_FALSE);
1670 	} else {
1671 		dst = NULL;
1672 	}
1673 	if (*dstp != NULL) {
1674 		mi_free(*dstp);
1675 		*dstp = NULL;
1676 		*dstlenp = 0;
1677 	}
1678 	*dstp = dst;
1679 	if (dst != NULL)
1680 		*dstlenp = srclen;
1681 	else
1682 		*dstlenp = 0;
1683 	return (B_TRUE);
1684 }
1685 
1686 /*
1687  * Replace what is in *dst, *dstlen with the source.
1688  * Assumes sctp_allocbuf has already been called.
1689  */
1690 void
1691 sctp_savebuf(void **dstp, uint_t *dstlenp, boolean_t src_valid,
1692     void *src, uint_t srclen)
1693 {
1694 	if (!src_valid)
1695 		srclen = 0;
1696 
1697 	ASSERT(*dstlenp == srclen);
1698 	if (src != NULL && srclen != 0) {
1699 		bcopy(src, *dstp, srclen);
1700 	}
1701 }
1702 
1703 static void
1704 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr)
1705 {
1706 	bcopy(addr, &fp->faddr, sizeof (*addr));
1707 	if (IN6_IS_ADDR_V4MAPPED(addr)) {
1708 		fp->isv4 = 1;
1709 		/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
1710 		fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr_len) &
1711 			~(SCTP_ALIGN - 1);
1712 	} else {
1713 		fp->isv4 = 0;
1714 		fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr6_len) &
1715 			~(SCTP_ALIGN - 1);
1716 	}
1717 	fp->cwnd = sctp_slow_start_initial * fp->sfa_pmss;
1718 	fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max);
1719 	fp->srtt = -1;
1720 	fp->rtt_updates = 0;
1721 	fp->strikes = 0;
1722 	fp->max_retr = sctp->sctp_pp_max_rxt;
1723 	/* Mark it as not confirmed. */
1724 	fp->state = SCTP_FADDRS_UNCONFIRMED;
1725 	fp->hb_interval = sctp->sctp_hb_interval;
1726 	fp->ssthresh = sctp_initial_ssthresh;
1727 	fp->suna = 0;
1728 	fp->pba = 0;
1729 	fp->acked = 0;
1730 	fp->lastactive = lbolt64;
1731 	fp->timer_mp = NULL;
1732 	fp->hb_pending = B_FALSE;
1733 	fp->timer_running = 0;
1734 	fp->df = 1;
1735 	fp->pmtu_discovered = 0;
1736 	fp->rc_timer_mp = NULL;
1737 	fp->rc_timer_running = 0;
1738 	fp->next = NULL;
1739 	fp->ire = NULL;
1740 	fp->T3expire = 0;
1741 	(void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret,
1742 	    sizeof (fp->hb_secret));
1743 	fp->hb_expiry = lbolt64;
1744 
1745 	sctp_ire2faddr(sctp, fp);
1746 }
1747 
1748 /*ARGSUSED*/
1749 static void
1750 faddr_destructor(void *buf, void *cdrarg)
1751 {
1752 	sctp_faddr_t *fp = buf;
1753 
1754 	ASSERT(fp->timer_mp == NULL);
1755 	ASSERT(fp->timer_running == 0);
1756 
1757 	ASSERT(fp->rc_timer_mp == NULL);
1758 	ASSERT(fp->rc_timer_running == 0);
1759 }
1760 
1761 void
1762 sctp_faddr_init()
1763 {
1764 	sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache",
1765 	    sizeof (sctp_faddr_t), 0, NULL, faddr_destructor,
1766 	    NULL, NULL, NULL, 0);
1767 }
1768 
1769 void
1770 sctp_faddr_fini()
1771 {
1772 	kmem_cache_destroy(sctp_kmem_faddr_cache);
1773 }
1774