1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/kmem.h> 35 #include <sys/socket.h> 36 #include <sys/random.h> 37 38 #include <netinet/in.h> 39 #include <netinet/ip6.h> 40 #include <netinet/sctp.h> 41 42 #include <inet/common.h> 43 #include <inet/ip.h> 44 #include <inet/ip6.h> 45 #include <inet/ip_ire.h> 46 #include <inet/mi.h> 47 #include <inet/mib2.h> 48 #include <inet/nd.h> 49 #include <inet/optcom.h> 50 #include <inet/sctp_ip.h> 51 #include <inet/ipclassifier.h> 52 #include "sctp_impl.h" 53 #include "sctp_addr.h" 54 55 static struct kmem_cache *sctp_kmem_faddr_cache; 56 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *); 57 58 /* Set the source address. Refer to comments in sctp_ire2faddr(). */ 59 static void 60 set_saddr(sctp_t *sctp, sctp_faddr_t *fp, boolean_t v6) 61 { 62 if (sctp->sctp_bound_to_all) { 63 V6_SET_ZERO(fp->saddr); 64 } else { 65 fp->saddr = sctp_get_valid_addr(sctp, v6); 66 if (!v6 && IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) || 67 v6 && IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 68 fp->state = SCTP_FADDRS_UNREACH; 69 /* Disable heartbeat. */ 70 fp->hb_expiry = 0; 71 fp->hb_pending = B_FALSE; 72 fp->strikes = 0; 73 } 74 } 75 } 76 77 /* 78 * Call this function to update the cached IRE of a peer addr fp. 79 */ 80 void 81 sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp) 82 { 83 ire_t *ire; 84 ipaddr_t addr4; 85 in6_addr_t laddr; 86 sctp_saddr_ipif_t *sp; 87 uint_t ipif_seqid; 88 int hdrlen; 89 90 /* Remove the previous cache IRE */ 91 if ((ire = fp->ire) != NULL) { 92 IRE_REFRELE_NOTR(ire); 93 fp->ire = NULL; 94 } 95 96 /* 97 * If this addr is not reachable, mark it as unconfirmed for now, the 98 * state will be changed back to unreachable later in this function 99 * if it is still the case. 100 */ 101 if (fp->state == SCTP_FADDRS_UNREACH) { 102 fp->state = SCTP_FADDRS_UNCONFIRMED; 103 } 104 105 if (fp->isv4) { 106 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 107 108 ire = ire_cache_lookup(addr4, sctp->sctp_zoneid); 109 if (ire == NULL) { 110 dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n", 111 SCTP_PRINTADDR(fp->faddr))); 112 /* 113 * It is tempting to just leave the src addr 114 * unspecified and let IP figure it out, but we 115 * *cannot* do this, since IP may choose a src addr 116 * that is not part of this association... unless 117 * this sctp has bound to all addrs. So if the ire 118 * lookup fails, try to find one in our src addr 119 * list, unless the sctp has bound to all addrs, in 120 * which case we change the src addr to unspec. 121 * 122 * Note that if this is a v6 endpoint but it does 123 * not have any v4 address at this point (e.g. may 124 * have been deleted), sctp_get_valid_addr() will 125 * return mapped INADDR_ANY. In this case, this 126 * address should be marked not reachable so that 127 * it won't be used to send data. 128 */ 129 set_saddr(sctp, fp, B_FALSE); 130 goto set_current; 131 } 132 ipif_seqid = ire->ire_ipif->ipif_seqid; 133 dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ", 134 SCTP_PRINTADDR(fp->faddr))); 135 dprint(2, ("src = %x\n", ire->ire_src_addr)); 136 IN6_IPADDR_TO_V4MAPPED(ire->ire_src_addr, &laddr); 137 138 /* make sure the laddr is part of this association */ 139 if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) != 140 NULL && !sp->saddr_ipif_dontsrc) { 141 if (sp->saddr_ipif_unconfirmed == 1) 142 sp->saddr_ipif_unconfirmed = 0; 143 fp->saddr = laddr; 144 } else { 145 ip2dbg(("ire2faddr: src addr is not part of assc\n")); 146 set_saddr(sctp, fp, B_FALSE); 147 } 148 } else { 149 ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid); 150 if (ire == NULL) { 151 dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n", 152 SCTP_PRINTADDR(fp->faddr))); 153 set_saddr(sctp, fp, B_TRUE); 154 goto set_current; 155 } 156 ipif_seqid = ire->ire_ipif->ipif_seqid; 157 dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ", 158 SCTP_PRINTADDR(fp->faddr))); 159 dprint(2, ("src=%x:%x:%x:%x\n", 160 SCTP_PRINTADDR(ire->ire_src_addr_v6))); 161 laddr = ire->ire_src_addr_v6; 162 163 /* make sure the laddr is part of this association */ 164 165 if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) != 166 NULL && !sp->saddr_ipif_dontsrc) { 167 if (sp->saddr_ipif_unconfirmed == 1) 168 sp->saddr_ipif_unconfirmed = 0; 169 fp->saddr = laddr; 170 } else { 171 dprint(2, ("ire2faddr: src addr is not part " 172 "of assc\n")); 173 set_saddr(sctp, fp, B_TRUE); 174 } 175 } 176 177 /* Cache the IRE */ 178 IRE_REFHOLD_NOTR(ire); 179 fp->ire = ire; 180 if (fp->ire->ire_type == IRE_LOOPBACK && !sctp->sctp_loopback) 181 sctp->sctp_loopback = 1; 182 IRE_REFRELE(ire); 183 184 /* 185 * Pull out RTO information for this faddr and use it if we don't 186 * have any yet. 187 */ 188 if (fp->srtt == -1 && ire->ire_uinfo.iulp_rtt != 0) { 189 /* The cached value is in ms. */ 190 fp->srtt = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt); 191 fp->rttvar = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt_sd); 192 fp->rto = 3 * fp->srtt; 193 194 /* Bound the RTO by configured min and max values */ 195 if (fp->rto < sctp->sctp_rto_min) { 196 fp->rto = sctp->sctp_rto_min; 197 } 198 if (fp->rto > sctp->sctp_rto_max) { 199 fp->rto = sctp->sctp_rto_max; 200 } 201 } 202 203 /* 204 * Record the MTU for this faddr. If the MTU for this faddr has 205 * changed, check if the assc MTU will also change. 206 */ 207 if (fp->isv4) { 208 hdrlen = sctp->sctp_hdr_len; 209 } else { 210 hdrlen = sctp->sctp_hdr6_len; 211 } 212 if ((fp->sfa_pmss + hdrlen) != ire->ire_max_frag) { 213 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 214 fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1); 215 if (fp->cwnd < (fp->sfa_pmss * 2)) { 216 fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; 217 } 218 } 219 220 set_current: 221 if (fp == sctp->sctp_current) { 222 sctp_faddr2hdraddr(fp, sctp); 223 sctp->sctp_mss = fp->sfa_pmss; 224 if (!SCTP_IS_DETACHED(sctp)) { 225 sctp_set_ulp_prop(sctp); 226 } 227 } 228 } 229 230 /*ARGSUSED*/ 231 void 232 sctp_faddr2ire(sctp_t *sctp, sctp_faddr_t *fp) 233 { 234 ire_t *ire; 235 236 if ((ire = fp->ire) == NULL) { 237 return; 238 } 239 240 mutex_enter(&ire->ire_lock); 241 242 /* If the cached IRE is going sway, there is no point to update it. */ 243 if (ire->ire_marks & IRE_MARK_CONDEMNED) { 244 mutex_exit(&ire->ire_lock); 245 IRE_REFRELE_NOTR(ire); 246 fp->ire = NULL; 247 return; 248 } 249 250 /* 251 * Only record the PMTU for this faddr if we actually have 252 * done discovery. This prevents initialized default from 253 * clobbering any real info that IP may have. 254 */ 255 if (fp->pmtu_discovered) { 256 if (fp->isv4) { 257 ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr_len; 258 } else { 259 ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr6_len; 260 } 261 } 262 263 if (sctp_rtt_updates != 0 && fp->rtt_updates >= sctp_rtt_updates) { 264 /* 265 * If there is no old cached values, initialize them 266 * conservatively. Set them to be (1.5 * new value). 267 * This code copied from ip_ire_advise(). The cached 268 * value is in ms. 269 */ 270 if (ire->ire_uinfo.iulp_rtt != 0) { 271 ire->ire_uinfo.iulp_rtt = (ire->ire_uinfo.iulp_rtt + 272 TICK_TO_MSEC(fp->srtt)) >> 1; 273 } else { 274 ire->ire_uinfo.iulp_rtt = TICK_TO_MSEC(fp->srtt + 275 (fp->srtt >> 1)); 276 } 277 if (ire->ire_uinfo.iulp_rtt_sd != 0) { 278 ire->ire_uinfo.iulp_rtt_sd = 279 (ire->ire_uinfo.iulp_rtt_sd + 280 TICK_TO_MSEC(fp->rttvar)) >> 1; 281 } else { 282 ire->ire_uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->rttvar + 283 (fp->rttvar >> 1)); 284 } 285 fp->rtt_updates = 0; 286 } 287 288 mutex_exit(&ire->ire_lock); 289 } 290 291 /* 292 * The sender must set the total length in the IP header. 293 * If sendto == NULL, the current will be used. 294 */ 295 mblk_t * 296 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer) 297 { 298 mblk_t *mp; 299 size_t ipsctplen; 300 int isv4; 301 sctp_faddr_t *fp; 302 303 ASSERT(sctp->sctp_current != NULL || sendto != NULL); 304 if (sendto == NULL) { 305 fp = sctp->sctp_current; 306 } else { 307 fp = sendto; 308 } 309 isv4 = fp->isv4; 310 311 /* Try to look for another IRE again. */ 312 if (fp->ire == NULL) 313 sctp_ire2faddr(sctp, fp); 314 315 /* There is no suitable source address to use, return. */ 316 if (fp->state == SCTP_FADDRS_UNREACH) 317 return (NULL); 318 319 if (isv4) { 320 ipsctplen = sctp->sctp_hdr_len; 321 } else { 322 ipsctplen = sctp->sctp_hdr6_len; 323 } 324 325 mp = allocb(ipsctplen + sctp_wroff_xtra + trailer, BPRI_MED); 326 if (mp == NULL) { 327 ip1dbg(("sctp_make_mp: error makign mp..\n")); 328 return (NULL); 329 } 330 mp->b_rptr += sctp_wroff_xtra; 331 mp->b_wptr = mp->b_rptr + ipsctplen; 332 333 ASSERT(OK_32PTR(mp->b_wptr)); 334 335 if (isv4) { 336 ipha_t *iph = (ipha_t *)mp->b_rptr; 337 338 bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen); 339 if (fp != sctp->sctp_current) { 340 /* fiddle with the dst addr */ 341 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 342 /* fix up src addr */ 343 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 344 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 345 iph->ipha_src); 346 } else if (sctp->sctp_bound_to_all) { 347 iph->ipha_src = INADDR_ANY; 348 } 349 } 350 /* set or clear the don't fragment bit */ 351 if (fp->df) { 352 iph->ipha_fragment_offset_and_flags = htons(IPH_DF); 353 } else { 354 iph->ipha_fragment_offset_and_flags = 0; 355 } 356 } else { 357 bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen); 358 if (fp != sctp->sctp_current) { 359 /* fiddle with the dst addr */ 360 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 361 /* fix up src addr */ 362 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 363 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 364 } else if (sctp->sctp_bound_to_all) { 365 bzero(&((ip6_t *)(mp->b_rptr))->ip6_src, 366 sizeof (in6_addr_t)); 367 } 368 } 369 } 370 ASSERT(sctp->sctp_connp != NULL); 371 372 /* 373 * IP will not free this IRE if it is condemned. SCTP needs to 374 * free it. 375 */ 376 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 377 IRE_REFRELE_NOTR(fp->ire); 378 fp->ire = NULL; 379 } 380 /* Stash the conn and ire ptr info. for IP */ 381 SCTP_STASH_IPINFO(mp, fp->ire); 382 383 return (mp); 384 } 385 386 /* 387 * Notify upper layers about preferred write offset, write size. 388 */ 389 void 390 sctp_set_ulp_prop(sctp_t *sctp) 391 { 392 int hdrlen; 393 394 if (sctp->sctp_current->isv4) { 395 hdrlen = sctp->sctp_hdr_len; 396 } else { 397 hdrlen = sctp->sctp_hdr6_len; 398 } 399 ASSERT(sctp->sctp_ulpd); 400 401 ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss); 402 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 403 sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 404 sctp->sctp_mss - sizeof (sctp_data_hdr_t)); 405 } 406 407 void 408 sctp_set_iplen(sctp_t *sctp, mblk_t *mp) 409 { 410 uint16_t sum = 0; 411 ipha_t *iph; 412 ip6_t *ip6h; 413 mblk_t *pmp = mp; 414 boolean_t isv4; 415 416 isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); 417 for (; pmp; pmp = pmp->b_cont) 418 sum += pmp->b_wptr - pmp->b_rptr; 419 420 if (isv4) { 421 iph = (ipha_t *)mp->b_rptr; 422 iph->ipha_length = htons(sum); 423 } else { 424 ip6h = (ip6_t *)mp->b_rptr; 425 /* 426 * If an ip6i_t is present, the real IPv6 header 427 * immediately follows. 428 */ 429 if (ip6h->ip6_nxt == IPPROTO_RAW) 430 ip6h = (ip6_t *)&ip6h[1]; 431 ip6h->ip6_plen = htons(sum - ((char *)&sctp->sctp_ip6h[1] - 432 sctp->sctp_iphc6)); 433 } 434 } 435 436 int 437 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2) 438 { 439 int na1 = 0; 440 int overlap = 0; 441 int equal = 1; 442 int onematch; 443 sctp_faddr_t *fp1, *fp2; 444 445 for (fp1 = a1; fp1; fp1 = fp1->next) { 446 onematch = 0; 447 for (fp2 = a2; fp2; fp2 = fp2->next) { 448 if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) { 449 overlap++; 450 onematch = 1; 451 break; 452 } 453 if (!onematch) { 454 equal = 0; 455 } 456 } 457 na1++; 458 } 459 460 if (equal) { 461 return (SCTP_ADDR_EQUAL); 462 } 463 if (overlap == na1) { 464 return (SCTP_ADDR_SUBSET); 465 } 466 if (overlap) { 467 return (SCTP_ADDR_OVERLAP); 468 } 469 return (SCTP_ADDR_DISJOINT); 470 } 471 472 /* 473 * Returns 0 on success, -1 on memory allocation failure. If sleep 474 * is true, should never fail. 475 * Caller must hold conn fanout lock. 476 */ 477 int 478 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep) 479 { 480 sctp_faddr_t *faddr; 481 482 dprint(4, ("add_faddr: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr), 483 sleep)); 484 485 if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) { 486 return (-1); 487 } 488 489 sctp_init_faddr(sctp, faddr, addr); 490 ASSERT(faddr->next == NULL); 491 492 /* tack it on to the end */ 493 if (sctp->sctp_lastfaddr != NULL) { 494 sctp->sctp_lastfaddr->next = faddr; 495 } else { 496 /* list is empty */ 497 ASSERT(sctp->sctp_faddrs == NULL); 498 sctp->sctp_faddrs = faddr; 499 } 500 sctp->sctp_lastfaddr = faddr; 501 sctp->sctp_nfaddrs++; 502 503 return (0); 504 } 505 506 /* 507 * Caller must hold conn fanout lock. 508 */ 509 int 510 sctp_add_faddr_first(sctp_t *sctp, in6_addr_t *addr, int sleep) 511 { 512 sctp_faddr_t *faddr; 513 514 dprint(4, ("add_faddr_first: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr), 515 sleep)); 516 517 if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) { 518 return (-1); 519 } 520 sctp_init_faddr(sctp, faddr, addr); 521 ASSERT(faddr->next == NULL); 522 523 /* Put it at the beginning of the list */ 524 if (sctp->sctp_faddrs != NULL) { 525 faddr->next = sctp->sctp_faddrs; 526 } else { 527 sctp->sctp_lastfaddr = faddr; 528 } 529 sctp->sctp_faddrs = faddr; 530 sctp->sctp_nfaddrs++; 531 532 return (0); 533 } 534 535 sctp_faddr_t * 536 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr) 537 { 538 sctp_faddr_t *fp; 539 540 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 541 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) 542 break; 543 } 544 545 return (fp); 546 } 547 548 sctp_faddr_t * 549 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr) 550 { 551 for (; fp; fp = fp->next) { 552 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) { 553 break; 554 } 555 } 556 557 return (fp); 558 } 559 560 void 561 sctp_faddr2hdraddr(sctp_faddr_t *fp, sctp_t *sctp) 562 { 563 if (fp->isv4) { 564 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 565 sctp->sctp_ipha->ipha_dst); 566 /* Must not allow unspec src addr if not bound to all */ 567 if (IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) && 568 !sctp->sctp_bound_to_all) { 569 /* 570 * set the src to the first v4 saddr and hope 571 * for the best 572 */ 573 fp->saddr = sctp_get_valid_addr(sctp, B_FALSE); 574 } 575 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src); 576 /* update don't fragment bit */ 577 if (fp->df) { 578 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 579 htons(IPH_DF); 580 } else { 581 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0; 582 } 583 } else { 584 sctp->sctp_ip6h->ip6_dst = fp->faddr; 585 /* Must not allow unspec src addr if not bound to all */ 586 if (IN6_IS_ADDR_UNSPECIFIED(&fp->saddr) && 587 !sctp->sctp_bound_to_all) { 588 /* 589 * set the src to the first v6 saddr and hope 590 * for the best 591 */ 592 fp->saddr = sctp_get_valid_addr(sctp, B_TRUE); 593 } 594 sctp->sctp_ip6h->ip6_src = fp->saddr; 595 } 596 } 597 598 void 599 sctp_redo_faddr_srcs(sctp_t *sctp) 600 { 601 sctp_faddr_t *fp; 602 603 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 604 sctp_ire2faddr(sctp, fp); 605 } 606 607 sctp_faddr2hdraddr(sctp->sctp_current, sctp); 608 } 609 610 void 611 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) 612 { 613 int64_t now = lbolt64; 614 615 fp->strikes = 0; 616 sctp->sctp_strikes = 0; 617 fp->lastactive = now; 618 fp->hb_expiry = now + SET_HB_INTVL(fp); 619 fp->hb_pending = B_FALSE; 620 if (fp->state != SCTP_FADDRS_ALIVE) { 621 fp->state = SCTP_FADDRS_ALIVE; 622 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0); 623 624 /* If this is the primary, switch back to it now */ 625 if (fp == sctp->sctp_primary) { 626 sctp->sctp_current = fp; 627 sctp->sctp_mss = fp->sfa_pmss; 628 /* Reset the addrs in the composite header */ 629 sctp_faddr2hdraddr(fp, sctp); 630 if (!SCTP_IS_DETACHED(sctp)) { 631 sctp_set_ulp_prop(sctp); 632 } 633 } 634 } 635 if (fp->ire == NULL) { 636 /* Should have a full IRE now */ 637 sctp_ire2faddr(sctp, fp); 638 } 639 } 640 641 int 642 sctp_is_a_faddr_clean(sctp_t *sctp) 643 { 644 sctp_faddr_t *fp; 645 646 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 647 if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) { 648 return (1); 649 } 650 } 651 652 return (0); 653 } 654 655 /* 656 * Returns 0 if there is at leave one other active faddr, -1 if there 657 * are none. If there are none left, faddr_dead() will start killing the 658 * association. 659 * If the downed faddr was the current faddr, a new current faddr 660 * will be chosen. 661 */ 662 int 663 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) 664 { 665 sctp_faddr_t *ofp; 666 667 if (fp->state == SCTP_FADDRS_ALIVE) { 668 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0); 669 } 670 fp->state = newstate; 671 672 dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n", 673 SCTP_PRINTADDR(fp->faddr), newstate)); 674 675 if (fp == sctp->sctp_current) { 676 /* Current faddr down; need to switch it */ 677 sctp->sctp_current = NULL; 678 } 679 680 /* Find next alive faddr */ 681 ofp = fp; 682 for (fp = fp->next; fp; fp = fp->next) { 683 if (fp->state == SCTP_FADDRS_ALIVE) { 684 break; 685 } 686 } 687 688 if (fp == NULL) { 689 /* Continue from beginning of list */ 690 for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) { 691 if (fp->state == SCTP_FADDRS_ALIVE) { 692 break; 693 } 694 } 695 } 696 697 if (fp != ofp) { 698 if (sctp->sctp_current == NULL) { 699 dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n", 700 SCTP_PRINTADDR(fp->faddr))); 701 sctp->sctp_current = fp; 702 sctp->sctp_mss = fp->sfa_pmss; 703 704 /* Reset the addrs in the composite header */ 705 sctp_faddr2hdraddr(fp, sctp); 706 707 if (!SCTP_IS_DETACHED(sctp)) { 708 sctp_set_ulp_prop(sctp); 709 } 710 } 711 return (0); 712 } 713 714 715 /* All faddrs are down; kill the association */ 716 dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); 717 BUMP_MIB(&sctp_mib, sctpAborted); 718 sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? 719 SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); 720 sctp_clean_death(sctp, sctp->sctp_client_errno ? 721 sctp->sctp_client_errno : ETIMEDOUT); 722 723 return (-1); 724 } 725 726 sctp_faddr_t * 727 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp) 728 { 729 sctp_faddr_t *nfp = NULL; 730 731 if (ofp == NULL) { 732 ofp = sctp->sctp_current; 733 } 734 735 /* Find the next live one */ 736 for (nfp = ofp->next; nfp != NULL; nfp = nfp->next) { 737 if (nfp->state == SCTP_FADDRS_ALIVE) { 738 break; 739 } 740 } 741 742 if (nfp == NULL) { 743 /* Continue from beginning of list */ 744 for (nfp = sctp->sctp_faddrs; nfp != ofp; nfp = nfp->next) { 745 if (nfp->state == SCTP_FADDRS_ALIVE) { 746 break; 747 } 748 } 749 } 750 751 /* 752 * nfp could only be NULL if all faddrs are down, and when 753 * this happens, faddr_dead() should have killed the 754 * association. Hence this assertion... 755 */ 756 ASSERT(nfp != NULL); 757 return (nfp); 758 } 759 760 void 761 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp) 762 { 763 sctp_faddr_t *fpp; 764 765 if (!sctp->sctp_faddrs) { 766 return; 767 } 768 769 if (fp->timer_mp != NULL) { 770 sctp_timer_free(fp->timer_mp); 771 fp->timer_mp = NULL; 772 fp->timer_running = 0; 773 } 774 if (fp->rc_timer_mp != NULL) { 775 sctp_timer_free(fp->rc_timer_mp); 776 fp->rc_timer_mp = NULL; 777 fp->rc_timer_running = 0; 778 } 779 if (fp->ire != NULL) { 780 IRE_REFRELE_NOTR(fp->ire); 781 fp->ire = NULL; 782 } 783 784 if (fp == sctp->sctp_faddrs) { 785 goto gotit; 786 } 787 788 for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next) 789 ; 790 791 gotit: 792 ASSERT(sctp->sctp_conn_tfp != NULL); 793 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 794 if (fp == sctp->sctp_faddrs) { 795 sctp->sctp_faddrs = fp->next; 796 } else { 797 fpp->next = fp->next; 798 } 799 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 800 /* XXX faddr2ire? */ 801 kmem_cache_free(sctp_kmem_faddr_cache, fp); 802 sctp->sctp_nfaddrs--; 803 } 804 805 void 806 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock) 807 { 808 sctp_faddr_t *fp, *fpn; 809 810 if (sctp->sctp_faddrs == NULL) { 811 ASSERT(sctp->sctp_lastfaddr == NULL); 812 return; 813 } 814 815 ASSERT(sctp->sctp_lastfaddr != NULL); 816 sctp->sctp_lastfaddr = NULL; 817 sctp->sctp_current = NULL; 818 sctp->sctp_primary = NULL; 819 820 sctp_free_faddr_timers(sctp); 821 822 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 823 /* in conn fanout; need to hold lock */ 824 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 825 } 826 827 for (fp = sctp->sctp_faddrs; fp; fp = fpn) { 828 fpn = fp->next; 829 if (fp->ire != NULL) 830 IRE_REFRELE_NOTR(fp->ire); 831 kmem_cache_free(sctp_kmem_faddr_cache, fp); 832 sctp->sctp_nfaddrs--; 833 } 834 835 sctp->sctp_faddrs = NULL; 836 ASSERT(sctp->sctp_nfaddrs == 0); 837 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 838 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 839 } 840 841 } 842 843 void 844 sctp_zap_addrs(sctp_t *sctp) 845 { 846 sctp_zap_faddrs(sctp, 0); 847 sctp_free_saddrs(sctp); 848 } 849 850 /* 851 * Initialize the IPv4 header. Loses any record of any IP options. 852 */ 853 int 854 sctp_header_init_ipv4(sctp_t *sctp, int sleep) 855 { 856 sctp_hdr_t *sctph; 857 858 /* 859 * This is a simple initialization. If there's 860 * already a template, it should never be too small, 861 * so reuse it. Otherwise, allocate space for the new one. 862 */ 863 if (sctp->sctp_iphc != NULL) { 864 ASSERT(sctp->sctp_iphc_len >= SCTP_MAX_COMBINED_HEADER_LENGTH); 865 bzero(sctp->sctp_iphc, sctp->sctp_iphc_len); 866 } else { 867 sctp->sctp_iphc_len = SCTP_MAX_COMBINED_HEADER_LENGTH; 868 sctp->sctp_iphc = kmem_zalloc(sctp->sctp_iphc_len, sleep); 869 if (sctp->sctp_iphc == NULL) { 870 sctp->sctp_iphc_len = 0; 871 return (ENOMEM); 872 } 873 } 874 875 sctp->sctp_ipha = (ipha_t *)sctp->sctp_iphc; 876 877 sctp->sctp_hdr_len = sizeof (ipha_t) + sizeof (sctp_hdr_t); 878 sctp->sctp_ip_hdr_len = sizeof (ipha_t); 879 sctp->sctp_ipha->ipha_length = htons(sizeof (ipha_t) + 880 sizeof (sctp_hdr_t)); 881 sctp->sctp_ipha->ipha_version_and_hdr_length 882 = (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS; 883 884 /* 885 * These two fields should be zero, and are already set above. 886 * 887 * sctp->sctp_ipha->ipha_ident, 888 * sctp->sctp_ipha->ipha_fragment_offset_and_flags. 889 */ 890 891 sctp->sctp_ipha->ipha_ttl = sctp_ipv4_ttl; 892 sctp->sctp_ipha->ipha_protocol = IPPROTO_SCTP; 893 894 sctph = (sctp_hdr_t *)(sctp->sctp_iphc + sizeof (ipha_t)); 895 sctp->sctp_sctph = sctph; 896 897 return (0); 898 } 899 900 /* 901 * Update sctp_sticky_hdrs based on sctp_sticky_ipp. 902 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 903 * headers, and the maximum size sctp header (to avoid reallocation 904 * on the fly for additional sctp options). 905 * Returns failure if can't allocate memory. 906 */ 907 int 908 sctp_build_hdrs(sctp_t *sctp) 909 { 910 char *hdrs; 911 uint_t hdrs_len; 912 ip6i_t *ip6i; 913 char buf[SCTP_MAX_HDR_LENGTH]; 914 ip6_pkt_t *ipp = &sctp->sctp_sticky_ipp; 915 in6_addr_t src; 916 in6_addr_t dst; 917 /* 918 * save the existing sctp header and source/dest IP addresses 919 */ 920 bcopy(sctp->sctp_sctph6, buf, sizeof (sctp_hdr_t)); 921 src = sctp->sctp_ip6h->ip6_src; 922 dst = sctp->sctp_ip6h->ip6_dst; 923 hdrs_len = ip_total_hdrs_len_v6(ipp) + SCTP_MAX_HDR_LENGTH; 924 ASSERT(hdrs_len != 0); 925 if (hdrs_len > sctp->sctp_iphc6_len) { 926 /* Need to reallocate */ 927 hdrs = kmem_zalloc(hdrs_len, KM_NOSLEEP); 928 if (hdrs == NULL) 929 return (ENOMEM); 930 931 if (sctp->sctp_iphc6_len != 0) 932 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 933 sctp->sctp_iphc6 = hdrs; 934 sctp->sctp_iphc6_len = hdrs_len; 935 } 936 ip_build_hdrs_v6((uchar_t *)sctp->sctp_iphc6, 937 hdrs_len - SCTP_MAX_HDR_LENGTH, ipp, IPPROTO_SCTP); 938 939 /* Set header fields not in ipp */ 940 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 941 ip6i = (ip6i_t *)sctp->sctp_iphc6; 942 sctp->sctp_ip6h = (ip6_t *)&ip6i[1]; 943 } else { 944 sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6; 945 } 946 /* 947 * sctp->sctp_ip_hdr_len will include ip6i_t if there is one. 948 */ 949 sctp->sctp_ip_hdr6_len = hdrs_len - SCTP_MAX_HDR_LENGTH; 950 sctp->sctp_sctph6 = (sctp_hdr_t *)(sctp->sctp_iphc6 + 951 sctp->sctp_ip_hdr6_len); 952 sctp->sctp_hdr6_len = sctp->sctp_ip_hdr6_len + sizeof (sctp_hdr_t); 953 954 bcopy(buf, sctp->sctp_sctph6, sizeof (sctp_hdr_t)); 955 956 sctp->sctp_ip6h->ip6_src = src; 957 sctp->sctp_ip6h->ip6_dst = dst; 958 /* 959 * If the hoplimit was not set by ip_build_hdrs_v6(), we need to 960 * set it to the default value for SCTP. 961 */ 962 if (!(ipp->ipp_fields & IPPF_UNICAST_HOPS)) 963 sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; 964 /* 965 * If we're setting extension headers after a connection 966 * has been established, and if we have a routing header 967 * among the extension headers, call ip_massage_options_v6 to 968 * manipulate the routing header/ip6_dst set the checksum 969 * difference in the sctp header template. 970 * (This happens in sctp_connect_ipv6 if the routing header 971 * is set prior to the connect.) 972 */ 973 974 if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) && 975 (sctp->sctp_sticky_ipp.ipp_fields & IPPF_RTHDR)) { 976 ip6_rthdr_t *rth; 977 978 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 979 (uint8_t *)sctp->sctp_sctph6); 980 if (rth != NULL) 981 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); 982 } 983 return (0); 984 } 985 986 /* 987 * Initialize the IPv6 header. Loses any record of any IPv6 extension headers. 988 */ 989 int 990 sctp_header_init_ipv6(sctp_t *sctp, int sleep) 991 { 992 sctp_hdr_t *sctph; 993 994 /* 995 * This is a simple initialization. If there's 996 * already a template, it should never be too small, 997 * so reuse it. Otherwise, allocate space for the new one. 998 * Ensure that there is enough space to "downgrade" the sctp_t 999 * to an IPv4 sctp_t. This requires having space for a full load 1000 * of IPv4 options 1001 */ 1002 if (sctp->sctp_iphc6 != NULL) { 1003 ASSERT(sctp->sctp_iphc6_len >= 1004 SCTP_MAX_COMBINED_HEADER_LENGTH); 1005 bzero(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 1006 } else { 1007 sctp->sctp_iphc6_len = SCTP_MAX_COMBINED_HEADER_LENGTH; 1008 sctp->sctp_iphc6 = kmem_zalloc(sctp->sctp_iphc_len, sleep); 1009 if (sctp->sctp_iphc6 == NULL) { 1010 sctp->sctp_iphc6_len = 0; 1011 return (ENOMEM); 1012 } 1013 } 1014 sctp->sctp_hdr6_len = IPV6_HDR_LEN + sizeof (sctp_hdr_t); 1015 sctp->sctp_ip_hdr6_len = IPV6_HDR_LEN; 1016 sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6; 1017 1018 /* Initialize the header template */ 1019 1020 sctp->sctp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1021 sctp->sctp_ip6h->ip6_plen = ntohs(sizeof (sctp_hdr_t)); 1022 sctp->sctp_ip6h->ip6_nxt = IPPROTO_SCTP; 1023 sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; 1024 1025 sctph = (sctp_hdr_t *)(sctp->sctp_iphc6 + IPV6_HDR_LEN); 1026 sctp->sctp_sctph6 = sctph; 1027 1028 return (0); 1029 } 1030 1031 /* 1032 * XXX implement more sophisticated logic 1033 */ 1034 void 1035 sctp_set_hdraddrs(sctp_t *sctp) 1036 { 1037 sctp_faddr_t *fp; 1038 int gotv4 = 0; 1039 int gotv6 = 0; 1040 1041 ASSERT(sctp->sctp_faddrs != NULL); 1042 ASSERT(sctp->sctp_nsaddrs > 0); 1043 1044 /* Set up using the primary first */ 1045 if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) { 1046 IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->faddr, 1047 sctp->sctp_ipha->ipha_dst); 1048 /* saddr may be unspec; make_mp() will handle this */ 1049 IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->saddr, 1050 sctp->sctp_ipha->ipha_src); 1051 gotv4 = 1; 1052 if (sctp->sctp_ipversion == IPV4_VERSION) { 1053 goto copyports; 1054 } 1055 } else { 1056 sctp->sctp_ip6h->ip6_dst = sctp->sctp_primary->faddr; 1057 /* saddr may be unspec; make_mp() will handle this */ 1058 sctp->sctp_ip6h->ip6_src = sctp->sctp_primary->saddr; 1059 gotv6 = 1; 1060 } 1061 1062 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1063 if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1064 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 1065 sctp->sctp_ipha->ipha_dst); 1066 /* copy in the faddr_t's saddr */ 1067 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 1068 sctp->sctp_ipha->ipha_src); 1069 gotv4 = 1; 1070 if (sctp->sctp_ipversion == IPV4_VERSION || gotv6) { 1071 break; 1072 } 1073 } else if (!gotv6) { 1074 sctp->sctp_ip6h->ip6_dst = fp->faddr; 1075 /* copy in the faddr_t's saddr */ 1076 sctp->sctp_ip6h->ip6_src = fp->saddr; 1077 gotv6 = 1; 1078 if (gotv4) { 1079 break; 1080 } 1081 } 1082 } 1083 1084 copyports: 1085 /* copy in the ports for good measure */ 1086 sctp->sctp_sctph->sh_sport = sctp->sctp_lport; 1087 sctp->sctp_sctph->sh_dport = sctp->sctp_fport; 1088 1089 sctp->sctp_sctph6->sh_sport = sctp->sctp_lport; 1090 sctp->sctp_sctph6->sh_dport = sctp->sctp_fport; 1091 } 1092 1093 void 1094 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp) 1095 { 1096 mblk_t *mp; 1097 sctp_parm_hdr_t *ph; 1098 size_t len; 1099 int pad; 1100 1101 len = sizeof (*ph) + ntohs(uph->sph_len); 1102 if ((pad = len % 4) != 0) { 1103 pad = 4 - pad; 1104 len += pad; 1105 } 1106 mp = allocb(len, BPRI_MED); 1107 if (mp == NULL) { 1108 return; 1109 } 1110 1111 ph = (sctp_parm_hdr_t *)(mp->b_rptr); 1112 ph->sph_type = htons(PARM_UNRECOGNIZED); 1113 ph->sph_len = htons(len - pad); 1114 1115 /* copy in the unrecognized parameter */ 1116 bcopy(uph, ph + 1, ntohs(uph->sph_len)); 1117 1118 mp->b_wptr = mp->b_rptr + len; 1119 if (*errmp != NULL) { 1120 linkb(*errmp, mp); 1121 } else { 1122 *errmp = mp; 1123 } 1124 } 1125 1126 /* 1127 * o Bounds checking 1128 * o Updates remaining 1129 * o Checks alignment 1130 */ 1131 sctp_parm_hdr_t * 1132 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining) 1133 { 1134 int pad; 1135 uint16_t len; 1136 1137 len = ntohs(current->sph_len); 1138 *remaining -= len; 1139 if (*remaining < sizeof (*current) || len < sizeof (*current)) { 1140 return (NULL); 1141 } 1142 if ((pad = len & (SCTP_ALIGN - 1)) != 0) { 1143 pad = SCTP_ALIGN - pad; 1144 *remaining -= pad; 1145 } 1146 /*LINTED pointer cast may result in improper alignment*/ 1147 current = (sctp_parm_hdr_t *)((char *)current + len + pad); 1148 return (current); 1149 } 1150 1151 /* 1152 * Sets the address parameters given in the INIT chunk into sctp's 1153 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are 1154 * no address parameters in the INIT chunk, a single faddr is created 1155 * from the ip hdr at the beginning of pkt. 1156 * If there already are existing addresses hanging from sctp, merge 1157 * them in, if the old info contains addresses which are not present 1158 * in this new info, get rid of them, and clean the pointers if there's 1159 * messages which have this as their target address. 1160 * 1161 * We also re-adjust the source address list here since the list may 1162 * contain more than what is actually part of the association. If 1163 * we get here from sctp_send_cookie_echo(), we are on the active 1164 * side and psctp will be NULL and ich will be the INIT-ACK chunk. 1165 * If we get here from sctp_accept_comm(), ich will be the INIT chunk 1166 * and psctp will the listening endpoint. 1167 * 1168 * INIT processing: When processing the INIT we inherit the src address 1169 * list from the listener. For a loopback or linklocal association, we 1170 * delete the list and just take the address from the IP header (since 1171 * that's how we created the INIT-ACK). Additionally, for loopback we 1172 * ignore the address params in the INIT. For determining which address 1173 * types were sent in the INIT-ACK we follow the same logic as in 1174 * creating the INIT-ACK. We delete addresses of the type that are not 1175 * supported by the peer. 1176 * 1177 * INIT-ACK processing: When processing the INIT-ACK since we had not 1178 * included addr params for loopback or linklocal addresses when creating 1179 * the INIT, we just use the address from the IP header. Further, for 1180 * loopback we ignore the addr param list. We mark addresses of the 1181 * type not supported by the peer as unconfirmed. 1182 * 1183 * In case of INIT processing we look for supported address types in the 1184 * supported address param, if present. In both cases the address type in 1185 * the IP header is supported as well as types for addresses in the param 1186 * list, if any. 1187 * 1188 * Once we have the supported address types sctp_check_saddr() runs through 1189 * the source address list and deletes or marks as unconfirmed address of 1190 * types not supported by the peer. 1191 * 1192 * Returns 0 on success, sys errno on failure 1193 */ 1194 int 1195 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, 1196 sctp_chunk_hdr_t *ich, uint_t *sctp_options) 1197 { 1198 sctp_init_chunk_t *init; 1199 ipha_t *iph; 1200 ip6_t *ip6h; 1201 in6_addr_t hdrsaddr[1]; 1202 in6_addr_t hdrdaddr[1]; 1203 sctp_parm_hdr_t *ph; 1204 ssize_t remaining; 1205 int isv4; 1206 int err; 1207 sctp_faddr_t *fp; 1208 int supp_af = 0; 1209 boolean_t check_saddr = B_TRUE; 1210 in6_addr_t curaddr; 1211 1212 if (sctp_options != NULL) 1213 *sctp_options = 0; 1214 1215 /* extract the address from the IP header */ 1216 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1217 if (isv4) { 1218 iph = (ipha_t *)pkt->b_rptr; 1219 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr); 1220 IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr); 1221 supp_af |= PARM_SUPP_V4; 1222 } else { 1223 ip6h = (ip6_t *)pkt->b_rptr; 1224 hdrsaddr[0] = ip6h->ip6_src; 1225 hdrdaddr[0] = ip6h->ip6_dst; 1226 supp_af |= PARM_SUPP_V6; 1227 } 1228 1229 /* 1230 * Unfortunately, we can't delay this because adding an faddr 1231 * looks for the presence of the source address (from the ire 1232 * for the faddr) in the source address list. We could have 1233 * delayed this if, say, this was a loopback/linklocal connection. 1234 * Now, we just end up nuking this list and taking the addr from 1235 * the IP header for loopback/linklocal. 1236 */ 1237 if (psctp != NULL && psctp->sctp_nsaddrs > 0) { 1238 ASSERT(sctp->sctp_nsaddrs == 0); 1239 1240 err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP); 1241 if (err != 0) 1242 return (err); 1243 } 1244 /* 1245 * We will add the faddr before parsing the address list as this 1246 * might be a loopback connection and we would not have to 1247 * go through the list. 1248 * 1249 * Make sure the header's addr is in the list 1250 */ 1251 fp = sctp_lookup_faddr(sctp, hdrsaddr); 1252 if (fp == NULL) { 1253 /* not included; add it now */ 1254 if (sctp_add_faddr_first(sctp, hdrsaddr, KM_NOSLEEP) == -1) 1255 return (ENOMEM); 1256 1257 /* sctp_faddrs will be the hdr addr */ 1258 fp = sctp->sctp_faddrs; 1259 } 1260 /* make the header addr the primary */ 1261 1262 if (cl_sctp_assoc_change != NULL && psctp == NULL) 1263 curaddr = sctp->sctp_current->faddr; 1264 1265 sctp->sctp_primary = fp; 1266 sctp->sctp_current = fp; 1267 sctp->sctp_mss = fp->sfa_pmss; 1268 1269 /* For loopback connections & linklocal get address from the header */ 1270 if (sctp->sctp_loopback || sctp->sctp_linklocal) { 1271 if (sctp->sctp_nsaddrs != 0) 1272 sctp_free_saddrs(sctp); 1273 if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0) 1274 return (err); 1275 /* For loopback ignore address list */ 1276 if (sctp->sctp_loopback) 1277 return (0); 1278 check_saddr = B_FALSE; 1279 } 1280 1281 /* Walk the params in the INIT [ACK], pulling out addr params */ 1282 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1283 sizeof (sctp_init_chunk_t); 1284 if (remaining < sizeof (*ph)) { 1285 if (check_saddr) { 1286 sctp_check_saddr(sctp, supp_af, psctp == NULL ? 1287 B_FALSE : B_TRUE); 1288 } 1289 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1290 return (0); 1291 } 1292 1293 init = (sctp_init_chunk_t *)(ich + 1); 1294 ph = (sctp_parm_hdr_t *)(init + 1); 1295 1296 /* params will have already been byteordered when validating */ 1297 while (ph != NULL) { 1298 if (ph->sph_type == htons(PARM_SUPP_ADDRS)) { 1299 int plen; 1300 uint16_t *p; 1301 uint16_t addrtype; 1302 1303 ASSERT(psctp != NULL); 1304 plen = ntohs(ph->sph_len); 1305 p = (uint16_t *)(ph + 1); 1306 while (plen > 0) { 1307 addrtype = ntohs(*p); 1308 switch (addrtype) { 1309 case PARM_ADDR6: 1310 supp_af |= PARM_SUPP_V6; 1311 break; 1312 case PARM_ADDR4: 1313 supp_af |= PARM_SUPP_V4; 1314 break; 1315 default: 1316 break; 1317 } 1318 p++; 1319 plen -= sizeof (*p); 1320 } 1321 } else if (ph->sph_type == htons(PARM_ADDR4)) { 1322 if (remaining >= PARM_ADDR4_LEN) { 1323 in6_addr_t addr; 1324 ipaddr_t ta; 1325 1326 supp_af |= PARM_SUPP_V4; 1327 /* 1328 * Screen out broad/multicasts & loopback. 1329 * If the endpoint only accepts v6 address, 1330 * go to the next one. 1331 */ 1332 bcopy(ph + 1, &ta, sizeof (ta)); 1333 if (ta == 0 || 1334 ta == INADDR_BROADCAST || 1335 ta == htonl(INADDR_LOOPBACK) || 1336 IN_MULTICAST(ta) || 1337 sctp->sctp_connp->conn_ipv6_v6only) { 1338 goto next; 1339 } 1340 /* 1341 * XXX also need to check for subnet 1342 * broadcasts. This should probably 1343 * wait until we have full access 1344 * to the ILL tables. 1345 */ 1346 1347 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1348 (ph + 1), &addr); 1349 /* Check for duplicate. */ 1350 if (sctp_lookup_faddr(sctp, &addr) != NULL) 1351 goto next; 1352 1353 /* OK, add it to the faddr set */ 1354 if (sctp_add_faddr(sctp, &addr, 1355 KM_NOSLEEP) != 0) { 1356 return (ENOMEM); 1357 } 1358 } 1359 } else if (ph->sph_type == htons(PARM_ADDR6) && 1360 sctp->sctp_family == AF_INET6) { 1361 /* An v4 socket should not take v6 addresses. */ 1362 if (remaining >= PARM_ADDR6_LEN) { 1363 in6_addr_t *addr6; 1364 1365 supp_af |= PARM_SUPP_V6; 1366 addr6 = (in6_addr_t *)(ph + 1); 1367 /* 1368 * Screen out link locals, mcast, loopback 1369 * and bogus v6 address. 1370 */ 1371 if (IN6_IS_ADDR_LINKLOCAL(addr6) || 1372 IN6_IS_ADDR_MULTICAST(addr6) || 1373 IN6_IS_ADDR_LOOPBACK(addr6) || 1374 IN6_IS_ADDR_V4MAPPED(addr6)) { 1375 goto next; 1376 } 1377 /* Check for duplicate. */ 1378 if (sctp_lookup_faddr(sctp, addr6) != NULL) 1379 goto next; 1380 1381 if (sctp_add_faddr(sctp, 1382 (in6_addr_t *)(ph + 1), KM_NOSLEEP) != 0) { 1383 return (ENOMEM); 1384 } 1385 } 1386 } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) { 1387 if (sctp_options != NULL) 1388 *sctp_options |= SCTP_PRSCTP_OPTION; 1389 } /* else; skip */ 1390 1391 next: 1392 ph = sctp_next_parm(ph, &remaining); 1393 } 1394 if (check_saddr) { 1395 sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE : 1396 B_TRUE); 1397 } 1398 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1399 /* 1400 * We have the right address list now, update clustering's 1401 * knowledge because when we sent the INIT we had just added 1402 * the address the INIT was sent to. 1403 */ 1404 if (psctp == NULL && cl_sctp_assoc_change != NULL) { 1405 uchar_t *alist; 1406 size_t asize; 1407 uchar_t *dlist; 1408 size_t dsize; 1409 1410 asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 1411 alist = kmem_alloc(asize, KM_NOSLEEP); 1412 if (alist == NULL) 1413 return (ENOMEM); 1414 /* 1415 * Just include the address the INIT was sent to in the 1416 * delete list and send the entire faddr list. We could 1417 * do it differently (i.e include all the addresses in the 1418 * add list even if it contains the original address OR 1419 * remove the original address from the add list etc.), but 1420 * this seems reasonable enough. 1421 */ 1422 dsize = sizeof (in6_addr_t); 1423 dlist = kmem_alloc(dsize, KM_NOSLEEP); 1424 if (dlist == NULL) { 1425 kmem_free(alist, asize); 1426 return (ENOMEM); 1427 } 1428 bcopy(&curaddr, dlist, sizeof (curaddr)); 1429 sctp_get_faddr_list(sctp, alist, asize); 1430 (*cl_sctp_assoc_change)(sctp->sctp_family, alist, asize, 1431 sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR, 1432 (cl_sctp_handle_t)sctp); 1433 /* alist and dlist will be freed by the clustering module */ 1434 } 1435 return (0); 1436 } 1437 1438 /* 1439 * Returns 0 if the check failed and the restart should be refused, 1440 * 1 if the check succeeded. 1441 */ 1442 int 1443 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, 1444 int sleep) 1445 { 1446 sctp_faddr_t *fp, *fpa, *fphead = NULL; 1447 sctp_parm_hdr_t *ph; 1448 ssize_t remaining; 1449 int isv4; 1450 ipha_t *iph; 1451 ip6_t *ip6h; 1452 in6_addr_t hdraddr[1]; 1453 int retval = 0; 1454 sctp_tf_t *tf; 1455 sctp_t *sctp; 1456 int compres; 1457 sctp_init_chunk_t *init; 1458 int nadded = 0; 1459 1460 /* extract the address from the IP header */ 1461 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1462 if (isv4) { 1463 iph = (ipha_t *)pkt->b_rptr; 1464 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr); 1465 } else { 1466 ip6h = (ip6_t *)pkt->b_rptr; 1467 hdraddr[0] = ip6h->ip6_src; 1468 } 1469 1470 /* Walk the params in the INIT [ACK], pulling out addr params */ 1471 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1472 sizeof (sctp_init_chunk_t); 1473 if (remaining < sizeof (*ph)) { 1474 /* no parameters; restart OK */ 1475 return (1); 1476 } 1477 init = (sctp_init_chunk_t *)(ich + 1); 1478 ph = (sctp_parm_hdr_t *)(init + 1); 1479 1480 while (ph != NULL) { 1481 /* params will have already been byteordered when validating */ 1482 if (ph->sph_type == htons(PARM_ADDR4)) { 1483 if (remaining >= PARM_ADDR4_LEN) { 1484 in6_addr_t addr; 1485 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1486 (ph + 1), &addr); 1487 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1488 sleep); 1489 if (!fpa) { 1490 goto done; 1491 } 1492 bzero(fpa, sizeof (*fpa)); 1493 fpa->faddr = addr; 1494 fpa->next = NULL; 1495 } 1496 } else if (ph->sph_type == htons(PARM_ADDR6)) { 1497 if (remaining >= PARM_ADDR6_LEN) { 1498 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1499 sleep); 1500 if (!fpa) { 1501 goto done; 1502 } 1503 bzero(fpa, sizeof (*fpa)); 1504 bcopy(ph + 1, &fpa->faddr, 1505 sizeof (fpa->faddr)); 1506 fpa->next = NULL; 1507 } 1508 } else { 1509 /* else not addr param; skip */ 1510 fpa = NULL; 1511 } 1512 /* link in the new addr, if it was an addr param */ 1513 if (fpa) { 1514 if (!fphead) { 1515 fphead = fpa; 1516 fp = fphead; 1517 } else { 1518 fp->next = fpa; 1519 fp = fpa; 1520 } 1521 } 1522 1523 ph = sctp_next_parm(ph, &remaining); 1524 } 1525 1526 if (fphead == NULL) { 1527 /* no addr parameters; restart OK */ 1528 return (1); 1529 } 1530 1531 /* 1532 * got at least one; make sure the header's addr is 1533 * in the list 1534 */ 1535 fp = sctp_lookup_faddr_nosctp(fphead, hdraddr); 1536 if (!fp) { 1537 /* not included; add it now */ 1538 fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); 1539 if (!fp) { 1540 goto done; 1541 } 1542 bzero(fp, sizeof (*fp)); 1543 fp->faddr = *hdraddr; 1544 fp->next = fphead; 1545 fphead = fp; 1546 } 1547 1548 /* 1549 * Now, we can finally do the check: For each sctp instance 1550 * on the hash line for ports, compare its faddr set against 1551 * the new one. If the new one is a strict subset of any 1552 * existing sctp's faddrs, the restart is OK. However, if there 1553 * is an overlap, this could be an attack, so return failure. 1554 * If all sctp's faddrs are disjoint, this is a legitimate new 1555 * association. 1556 */ 1557 tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]); 1558 mutex_enter(&tf->tf_lock); 1559 1560 for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { 1561 if (ports != sctp->sctp_ports) { 1562 continue; 1563 } 1564 compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs); 1565 if (compres <= SCTP_ADDR_SUBSET) { 1566 retval = 1; 1567 mutex_exit(&tf->tf_lock); 1568 goto done; 1569 } 1570 if (compres == SCTP_ADDR_OVERLAP) { 1571 dprint(1, 1572 ("new assoc from %x:%x:%x:%x overlaps with %p\n", 1573 SCTP_PRINTADDR(*hdraddr), sctp)); 1574 /* 1575 * While we still hold the lock, we need to 1576 * figure out which addresses have been 1577 * added so we can include them in the abort 1578 * we will send back. Since these faddrs will 1579 * never be used, we overload the rto field 1580 * here, setting it to 0 if the address was 1581 * not added, 1 if it was added. 1582 */ 1583 for (fp = fphead; fp; fp = fp->next) { 1584 if (sctp_lookup_faddr(sctp, &fp->faddr)) { 1585 fp->rto = 0; 1586 } else { 1587 fp->rto = 1; 1588 nadded++; 1589 } 1590 } 1591 mutex_exit(&tf->tf_lock); 1592 goto done; 1593 } 1594 } 1595 mutex_exit(&tf->tf_lock); 1596 1597 /* All faddrs are disjoint; legit new association */ 1598 retval = 1; 1599 1600 done: 1601 /* If are attempted adds, send back an abort listing the addrs */ 1602 if (nadded > 0) { 1603 void *dtail; 1604 size_t dlen; 1605 1606 dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP); 1607 if (dtail == NULL) { 1608 goto cleanup; 1609 } 1610 1611 ph = dtail; 1612 dlen = 0; 1613 for (fp = fphead; fp; fp = fp->next) { 1614 if (fp->rto == 0) { 1615 continue; 1616 } 1617 if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1618 ipaddr_t addr4; 1619 1620 ph->sph_type = htons(PARM_ADDR4); 1621 ph->sph_len = htons(PARM_ADDR4_LEN); 1622 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 1623 ph++; 1624 bcopy(&addr4, ph, sizeof (addr4)); 1625 ph = (sctp_parm_hdr_t *) 1626 ((char *)ph + sizeof (addr4)); 1627 dlen += PARM_ADDR4_LEN; 1628 } else { 1629 ph->sph_type = htons(PARM_ADDR6); 1630 ph->sph_len = htons(PARM_ADDR6_LEN); 1631 ph++; 1632 bcopy(&fp->faddr, ph, sizeof (fp->faddr)); 1633 ph = (sctp_parm_hdr_t *) 1634 ((char *)ph + sizeof (fp->faddr)); 1635 dlen += PARM_ADDR6_LEN; 1636 } 1637 } 1638 1639 /* Send off the abort */ 1640 sctp_send_abort(sctp, sctp_init2vtag(ich), 1641 SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE); 1642 1643 kmem_free(dtail, PARM_ADDR6_LEN * nadded); 1644 } 1645 1646 cleanup: 1647 /* Clean up */ 1648 if (fphead) { 1649 sctp_faddr_t *fpn; 1650 for (fp = fphead; fp; fp = fpn) { 1651 fpn = fp->next; 1652 kmem_cache_free(sctp_kmem_faddr_cache, fp); 1653 } 1654 } 1655 1656 return (retval); 1657 } 1658 1659 void 1660 sctp_congest_reset(sctp_t *sctp) 1661 { 1662 sctp_faddr_t *fp; 1663 1664 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1665 fp->ssthresh = sctp_initial_mtu; 1666 fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; 1667 fp->suna = 0; 1668 fp->pba = 0; 1669 } 1670 } 1671 1672 /* 1673 * Return zero if the buffers are identical in length and content. 1674 * This is used for comparing extension header buffers. 1675 * Note that an extension header would be declared different 1676 * even if all that changed was the next header value in that header i.e. 1677 * what really changed is the next extension header. 1678 */ 1679 boolean_t 1680 sctp_cmpbuf(void *a, uint_t alen, boolean_t b_valid, void *b, uint_t blen) 1681 { 1682 if (!b_valid) 1683 blen = 0; 1684 1685 if (alen != blen) 1686 return (B_TRUE); 1687 if (alen == 0) 1688 return (B_FALSE); /* Both zero length */ 1689 return (bcmp(a, b, alen)); 1690 } 1691 1692 /* 1693 * Preallocate memory for sctp_savebuf(). Returns B_TRUE if ok. 1694 * Return B_FALSE if memory allocation fails - don't change any state! 1695 */ 1696 boolean_t 1697 sctp_allocbuf(void **dstp, uint_t *dstlenp, boolean_t src_valid, 1698 void *src, uint_t srclen) 1699 { 1700 void *dst; 1701 1702 if (!src_valid) 1703 srclen = 0; 1704 1705 ASSERT(*dstlenp == 0); 1706 if (src != NULL && srclen != 0) { 1707 dst = mi_zalloc(srclen); 1708 if (dst == NULL) 1709 return (B_FALSE); 1710 } else { 1711 dst = NULL; 1712 } 1713 if (*dstp != NULL) { 1714 mi_free(*dstp); 1715 *dstp = NULL; 1716 *dstlenp = 0; 1717 } 1718 *dstp = dst; 1719 if (dst != NULL) 1720 *dstlenp = srclen; 1721 else 1722 *dstlenp = 0; 1723 return (B_TRUE); 1724 } 1725 1726 /* 1727 * Replace what is in *dst, *dstlen with the source. 1728 * Assumes sctp_allocbuf has already been called. 1729 */ 1730 void 1731 sctp_savebuf(void **dstp, uint_t *dstlenp, boolean_t src_valid, 1732 void *src, uint_t srclen) 1733 { 1734 if (!src_valid) 1735 srclen = 0; 1736 1737 ASSERT(*dstlenp == srclen); 1738 if (src != NULL && srclen != 0) { 1739 bcopy(src, *dstp, srclen); 1740 } 1741 } 1742 1743 static void 1744 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr) 1745 { 1746 bcopy(addr, &fp->faddr, sizeof (*addr)); 1747 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1748 fp->isv4 = 1; 1749 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 1750 fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr_len) & 1751 ~(SCTP_ALIGN - 1); 1752 } else { 1753 fp->isv4 = 0; 1754 fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr6_len) & 1755 ~(SCTP_ALIGN - 1); 1756 } 1757 fp->cwnd = sctp_slow_start_initial * fp->sfa_pmss; 1758 fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max); 1759 fp->srtt = -1; 1760 fp->rtt_updates = 0; 1761 fp->strikes = 0; 1762 fp->max_retr = sctp->sctp_pp_max_rxt; 1763 /* Mark it as not confirmed. */ 1764 fp->state = SCTP_FADDRS_UNCONFIRMED; 1765 fp->hb_interval = sctp->sctp_hb_interval; 1766 fp->ssthresh = sctp_initial_ssthresh; 1767 fp->suna = 0; 1768 fp->pba = 0; 1769 fp->acked = 0; 1770 fp->lastactive = lbolt64; 1771 fp->timer_mp = NULL; 1772 fp->hb_pending = B_FALSE; 1773 fp->timer_running = 0; 1774 fp->df = 1; 1775 fp->pmtu_discovered = 0; 1776 fp->rc_timer_mp = NULL; 1777 fp->rc_timer_running = 0; 1778 fp->next = NULL; 1779 fp->ire = NULL; 1780 fp->T3expire = 0; 1781 (void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret, 1782 sizeof (fp->hb_secret)); 1783 fp->hb_expiry = lbolt64; 1784 1785 sctp_ire2faddr(sctp, fp); 1786 } 1787 1788 /*ARGSUSED*/ 1789 static void 1790 faddr_destructor(void *buf, void *cdrarg) 1791 { 1792 sctp_faddr_t *fp = buf; 1793 1794 ASSERT(fp->timer_mp == NULL); 1795 ASSERT(fp->timer_running == 0); 1796 1797 ASSERT(fp->rc_timer_mp == NULL); 1798 ASSERT(fp->rc_timer_running == 0); 1799 } 1800 1801 void 1802 sctp_faddr_init() 1803 { 1804 sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache", 1805 sizeof (sctp_faddr_t), 0, NULL, faddr_destructor, 1806 NULL, NULL, NULL, 0); 1807 } 1808 1809 void 1810 sctp_faddr_fini() 1811 { 1812 kmem_cache_destroy(sctp_kmem_faddr_cache); 1813 } 1814