1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/stream.h> 30 #include <sys/strsubr.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/kmem.h> 34 #include <sys/socket.h> 35 #include <sys/random.h> 36 #include <sys/tsol/tndb.h> 37 #include <sys/tsol/tnet.h> 38 39 #include <netinet/in.h> 40 #include <netinet/ip6.h> 41 #include <netinet/sctp.h> 42 43 #include <inet/common.h> 44 #include <inet/ip.h> 45 #include <inet/ip6.h> 46 #include <inet/ip_ire.h> 47 #include <inet/ip_if.h> 48 #include <inet/ip_ndp.h> 49 #include <inet/mib2.h> 50 #include <inet/nd.h> 51 #include <inet/optcom.h> 52 #include <inet/sctp_ip.h> 53 #include <inet/ipclassifier.h> 54 55 #include "sctp_impl.h" 56 #include "sctp_addr.h" 57 #include "sctp_asconf.h" 58 59 static struct kmem_cache *sctp_kmem_faddr_cache; 60 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *); 61 62 /* Set the source address. Refer to comments in sctp_get_dest(). */ 63 void 64 sctp_set_saddr(sctp_t *sctp, sctp_faddr_t *fp) 65 { 66 boolean_t v6 = !fp->isv4; 67 boolean_t addr_set; 68 69 fp->saddr = sctp_get_valid_addr(sctp, v6, &addr_set); 70 /* 71 * If there is no source address avaialble, mark this peer address 72 * as unreachable for now. When the heartbeat timer fires, it will 73 * call sctp_get_dest() to re-check if there is any source address 74 * available. 75 */ 76 if (!addr_set) 77 fp->state = SCTP_FADDRS_UNREACH; 78 } 79 80 /* 81 * Call this function to get information about a peer addr fp. 82 * 83 * Uses ip_attr_connect to avoid explicit use of ire and source address 84 * selection. 85 */ 86 void 87 sctp_get_dest(sctp_t *sctp, sctp_faddr_t *fp) 88 { 89 in6_addr_t laddr; 90 in6_addr_t nexthop; 91 sctp_saddr_ipif_t *sp; 92 int hdrlen; 93 sctp_stack_t *sctps = sctp->sctp_sctps; 94 conn_t *connp = sctp->sctp_connp; 95 iulp_t uinfo; 96 uint_t pmtu; 97 int error; 98 uint32_t flags = IPDF_VERIFY_DST | IPDF_IPSEC | 99 IPDF_SELECT_SRC | IPDF_UNIQUE_DCE; 100 101 /* 102 * Tell sctp_make_mp it needs to call us again should we not 103 * complete and set the saddr. 104 */ 105 fp->saddr = ipv6_all_zeros; 106 107 /* 108 * If this addr is not reachable, mark it as unconfirmed for now, the 109 * state will be changed back to unreachable later in this function 110 * if it is still the case. 111 */ 112 if (fp->state == SCTP_FADDRS_UNREACH) { 113 fp->state = SCTP_FADDRS_UNCONFIRMED; 114 } 115 116 /* 117 * Socket is connected - enable PMTU discovery. 118 */ 119 if (!sctps->sctps_ignore_path_mtu) 120 fp->ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 121 122 ip_attr_nexthop(&connp->conn_xmit_ipp, fp->ixa, &fp->faddr, 123 &nexthop); 124 125 laddr = fp->saddr; 126 error = ip_attr_connect(connp, fp->ixa, &laddr, &fp->faddr, &nexthop, 127 connp->conn_fport, &laddr, &uinfo, flags); 128 129 if (error != 0) { 130 dprint(3, ("sctp_get_dest: no ire for %x:%x:%x:%x\n", 131 SCTP_PRINTADDR(fp->faddr))); 132 /* 133 * It is tempting to just leave the src addr 134 * unspecified and let IP figure it out, but we 135 * *cannot* do this, since IP may choose a src addr 136 * that is not part of this association... unless 137 * this sctp has bound to all addrs. So if the dest 138 * lookup fails, try to find one in our src addr 139 * list, unless the sctp has bound to all addrs, in 140 * which case we change the src addr to unspec. 141 * 142 * Note that if this is a v6 endpoint but it does 143 * not have any v4 address at this point (e.g. may 144 * have been deleted), sctp_get_valid_addr() will 145 * return mapped INADDR_ANY. In this case, this 146 * address should be marked not reachable so that 147 * it won't be used to send data. 148 */ 149 sctp_set_saddr(sctp, fp); 150 if (fp->state == SCTP_FADDRS_UNREACH) 151 return; 152 goto check_current; 153 } 154 ASSERT(fp->ixa->ixa_ire != NULL); 155 ASSERT(!(fp->ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))); 156 157 if (!sctp->sctp_loopback) 158 sctp->sctp_loopback = uinfo.iulp_loopback; 159 160 /* Make sure the laddr is part of this association */ 161 if ((sp = sctp_saddr_lookup(sctp, &laddr, 0)) != NULL && 162 !sp->saddr_ipif_dontsrc) { 163 if (sp->saddr_ipif_unconfirmed == 1) 164 sp->saddr_ipif_unconfirmed = 0; 165 /* We did IPsec policy lookup for laddr already */ 166 fp->saddr = laddr; 167 } else { 168 dprint(2, ("sctp_get_dest: src addr is not part of assoc " 169 "%x:%x:%x:%x\n", SCTP_PRINTADDR(laddr))); 170 171 /* 172 * Set the src to the first saddr and hope for the best. 173 * Note that this case should very seldomly 174 * happen. One scenario this can happen is an app 175 * explicitly bind() to an address. But that address is 176 * not the preferred source address to send to the peer. 177 */ 178 sctp_set_saddr(sctp, fp); 179 if (fp->state == SCTP_FADDRS_UNREACH) { 180 return; 181 } 182 } 183 184 /* 185 * Pull out RTO information for this faddr and use it if we don't 186 * have any yet. 187 */ 188 if (fp->srtt == -1 && uinfo.iulp_rtt != 0) { 189 /* The cached value is in ms. */ 190 fp->srtt = MSEC_TO_TICK(uinfo.iulp_rtt); 191 fp->rttvar = MSEC_TO_TICK(uinfo.iulp_rtt_sd); 192 fp->rto = 3 * fp->srtt; 193 194 /* Bound the RTO by configured min and max values */ 195 if (fp->rto < sctp->sctp_rto_min) { 196 fp->rto = sctp->sctp_rto_min; 197 } 198 if (fp->rto > sctp->sctp_rto_max) { 199 fp->rto = sctp->sctp_rto_max; 200 } 201 SCTP_MAX_RTO(sctp, fp); 202 } 203 pmtu = uinfo.iulp_mtu; 204 205 /* 206 * Record the MTU for this faddr. If the MTU for this faddr has 207 * changed, check if the assc MTU will also change. 208 */ 209 if (fp->isv4) { 210 hdrlen = sctp->sctp_hdr_len; 211 } else { 212 hdrlen = sctp->sctp_hdr6_len; 213 } 214 if ((fp->sfa_pmss + hdrlen) != pmtu) { 215 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 216 fp->sfa_pmss = (pmtu - hdrlen) & ~(SCTP_ALIGN - 1); 217 if (fp->cwnd < (fp->sfa_pmss * 2)) { 218 SET_CWND(fp, fp->sfa_pmss, 219 sctps->sctps_slow_start_initial); 220 } 221 } 222 223 check_current: 224 if (fp == sctp->sctp_current) 225 sctp_set_faddr_current(sctp, fp); 226 } 227 228 void 229 sctp_update_dce(sctp_t *sctp) 230 { 231 sctp_faddr_t *fp; 232 sctp_stack_t *sctps = sctp->sctp_sctps; 233 iulp_t uinfo; 234 ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip; 235 uint_t ifindex; 236 237 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 238 bzero(&uinfo, sizeof (uinfo)); 239 /* 240 * Only record the PMTU for this faddr if we actually have 241 * done discovery. This prevents initialized default from 242 * clobbering any real info that IP may have. 243 */ 244 if (fp->pmtu_discovered) { 245 if (fp->isv4) { 246 uinfo.iulp_mtu = fp->sfa_pmss + 247 sctp->sctp_hdr_len; 248 } else { 249 uinfo.iulp_mtu = fp->sfa_pmss + 250 sctp->sctp_hdr6_len; 251 } 252 } 253 if (sctps->sctps_rtt_updates != 0 && 254 fp->rtt_updates >= sctps->sctps_rtt_updates) { 255 /* 256 * dce_update_uinfo() merges these values with the 257 * old values. 258 */ 259 uinfo.iulp_rtt = TICK_TO_MSEC(fp->srtt); 260 uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->rttvar); 261 fp->rtt_updates = 0; 262 } 263 ifindex = 0; 264 if (IN6_IS_ADDR_LINKSCOPE(&fp->faddr)) { 265 /* 266 * If we are going to create a DCE we'd better have 267 * an ifindex 268 */ 269 if (fp->ixa->ixa_nce != NULL) { 270 ifindex = fp->ixa->ixa_nce->nce_common-> 271 ncec_ill->ill_phyint->phyint_ifindex; 272 } else { 273 continue; 274 } 275 } 276 277 (void) dce_update_uinfo(&fp->faddr, ifindex, &uinfo, ipst); 278 } 279 } 280 281 /* 282 * The sender must later set the total length in the IP header. 283 */ 284 mblk_t * 285 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *fp, int trailer) 286 { 287 mblk_t *mp; 288 size_t ipsctplen; 289 int isv4; 290 sctp_stack_t *sctps = sctp->sctp_sctps; 291 boolean_t src_changed = B_FALSE; 292 293 ASSERT(fp != NULL); 294 isv4 = fp->isv4; 295 296 if (SCTP_IS_ADDR_UNSPEC(isv4, fp->saddr) || 297 (fp->ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 298 /* Need to pick a source */ 299 sctp_get_dest(sctp, fp); 300 /* 301 * Although we still may not get an IRE, the source address 302 * may be changed in sctp_get_ire(). Set src_changed to 303 * true so that the source address is copied again. 304 */ 305 src_changed = B_TRUE; 306 } 307 308 /* There is no suitable source address to use, return. */ 309 if (fp->state == SCTP_FADDRS_UNREACH) 310 return (NULL); 311 312 ASSERT(fp->ixa->ixa_ire != NULL); 313 ASSERT(!SCTP_IS_ADDR_UNSPEC(isv4, fp->saddr)); 314 315 if (isv4) { 316 ipsctplen = sctp->sctp_hdr_len; 317 } else { 318 ipsctplen = sctp->sctp_hdr6_len; 319 } 320 321 mp = allocb(ipsctplen + sctps->sctps_wroff_xtra + trailer, BPRI_MED); 322 if (mp == NULL) { 323 ip1dbg(("sctp_make_mp: error making mp..\n")); 324 return (NULL); 325 } 326 mp->b_rptr += sctps->sctps_wroff_xtra; 327 mp->b_wptr = mp->b_rptr + ipsctplen; 328 329 ASSERT(OK_32PTR(mp->b_wptr)); 330 331 if (isv4) { 332 ipha_t *iph = (ipha_t *)mp->b_rptr; 333 334 bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen); 335 if (fp != sctp->sctp_current || src_changed) { 336 /* Fix the source and destination addresses. */ 337 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 338 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, iph->ipha_src); 339 } 340 /* set or clear the don't fragment bit */ 341 if (fp->df) { 342 iph->ipha_fragment_offset_and_flags = htons(IPH_DF); 343 } else { 344 iph->ipha_fragment_offset_and_flags = 0; 345 } 346 } else { 347 bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen); 348 if (fp != sctp->sctp_current || src_changed) { 349 /* Fix the source and destination addresses. */ 350 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 351 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 352 } 353 } 354 ASSERT(sctp->sctp_connp != NULL); 355 return (mp); 356 } 357 358 /* 359 * Notify upper layers about preferred write offset, write size. 360 */ 361 void 362 sctp_set_ulp_prop(sctp_t *sctp) 363 { 364 int hdrlen; 365 struct sock_proto_props sopp; 366 367 sctp_stack_t *sctps = sctp->sctp_sctps; 368 369 if (sctp->sctp_current->isv4) { 370 hdrlen = sctp->sctp_hdr_len; 371 } else { 372 hdrlen = sctp->sctp_hdr6_len; 373 } 374 ASSERT(sctp->sctp_ulpd); 375 376 sctp->sctp_connp->conn_wroff = sctps->sctps_wroff_xtra + hdrlen + 377 sizeof (sctp_data_hdr_t); 378 379 ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss); 380 bzero(&sopp, sizeof (sopp)); 381 sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF; 382 sopp.sopp_wroff = sctp->sctp_connp->conn_wroff; 383 sopp.sopp_maxblk = sctp->sctp_mss - sizeof (sctp_data_hdr_t); 384 sctp->sctp_ulp_prop(sctp->sctp_ulpd, &sopp); 385 } 386 387 /* 388 * Set the lengths in the packet and the transmit attributes. 389 */ 390 void 391 sctp_set_iplen(sctp_t *sctp, mblk_t *mp, ip_xmit_attr_t *ixa) 392 { 393 uint16_t sum = 0; 394 ipha_t *iph; 395 ip6_t *ip6h; 396 mblk_t *pmp = mp; 397 boolean_t isv4; 398 399 isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); 400 for (; pmp; pmp = pmp->b_cont) 401 sum += pmp->b_wptr - pmp->b_rptr; 402 403 ixa->ixa_pktlen = sum; 404 if (isv4) { 405 iph = (ipha_t *)mp->b_rptr; 406 iph->ipha_length = htons(sum); 407 ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr_len; 408 } else { 409 ip6h = (ip6_t *)mp->b_rptr; 410 ip6h->ip6_plen = htons(sum - IPV6_HDR_LEN); 411 ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr6_len; 412 } 413 } 414 415 int 416 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2) 417 { 418 int na1 = 0; 419 int overlap = 0; 420 int equal = 1; 421 int onematch; 422 sctp_faddr_t *fp1, *fp2; 423 424 for (fp1 = a1; fp1; fp1 = fp1->next) { 425 onematch = 0; 426 for (fp2 = a2; fp2; fp2 = fp2->next) { 427 if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) { 428 overlap++; 429 onematch = 1; 430 break; 431 } 432 if (!onematch) { 433 equal = 0; 434 } 435 } 436 na1++; 437 } 438 439 if (equal) { 440 return (SCTP_ADDR_EQUAL); 441 } 442 if (overlap == na1) { 443 return (SCTP_ADDR_SUBSET); 444 } 445 if (overlap) { 446 return (SCTP_ADDR_OVERLAP); 447 } 448 return (SCTP_ADDR_DISJOINT); 449 } 450 451 /* 452 * Returns 0 on success, ENOMEM on memory allocation failure, EHOSTUNREACH 453 * if the connection credentials fail remote host accreditation or 454 * if the new destination does not support the previously established 455 * connection security label. If sleep is true, this function should 456 * never fail for a memory allocation failure. The boolean parameter 457 * "first" decides whether the newly created faddr structure should be 458 * added at the beginning of the list or at the end. 459 * 460 * Note: caller must hold conn fanout lock. 461 */ 462 int 463 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first) 464 { 465 sctp_faddr_t *faddr; 466 mblk_t *timer_mp; 467 int err; 468 conn_t *connp = sctp->sctp_connp; 469 470 if (is_system_labeled()) { 471 ip_xmit_attr_t *ixa = connp->conn_ixa; 472 ts_label_t *effective_tsl = NULL; 473 474 ASSERT(ixa->ixa_tsl != NULL); 475 476 /* 477 * Verify the destination is allowed to receive packets 478 * at the security label of the connection we are initiating. 479 * 480 * tsol_check_dest() will create a new effective label for 481 * this connection with a modified label or label flags only 482 * if there are changes from the original label. 483 * 484 * Accept whatever label we get if this is the first 485 * destination address for this connection. The security 486 * label and label flags must match any previuous settings 487 * for all subsequent destination addresses. 488 */ 489 if (IN6_IS_ADDR_V4MAPPED(addr)) { 490 uint32_t dst; 491 IN6_V4MAPPED_TO_IPADDR(addr, dst); 492 err = tsol_check_dest(ixa->ixa_tsl, 493 &dst, IPV4_VERSION, connp->conn_mac_mode, 494 connp->conn_zone_is_global, &effective_tsl); 495 } else { 496 err = tsol_check_dest(ixa->ixa_tsl, 497 addr, IPV6_VERSION, connp->conn_mac_mode, 498 connp->conn_zone_is_global, &effective_tsl); 499 } 500 if (err != 0) 501 return (err); 502 503 if (sctp->sctp_faddrs == NULL && effective_tsl != NULL) { 504 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 505 } else if (effective_tsl != NULL) { 506 label_rele(effective_tsl); 507 return (EHOSTUNREACH); 508 } 509 } 510 511 if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) 512 return (ENOMEM); 513 bzero(faddr, sizeof (*faddr)); 514 timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer, sleep); 515 if (timer_mp == NULL) { 516 kmem_cache_free(sctp_kmem_faddr_cache, faddr); 517 return (ENOMEM); 518 } 519 ((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr; 520 521 /* Start with any options set on the conn */ 522 faddr->ixa = conn_get_ixa_exclusive(connp); 523 if (faddr->ixa == NULL) { 524 freemsg(timer_mp); 525 kmem_cache_free(sctp_kmem_faddr_cache, faddr); 526 return (ENOMEM); 527 } 528 faddr->ixa->ixa_notify_cookie = connp->conn_sctp; 529 530 sctp_init_faddr(sctp, faddr, addr, timer_mp); 531 ASSERT(faddr->ixa->ixa_cred != NULL); 532 533 /* ip_attr_connect didn't allow broadcats/multicast dest */ 534 ASSERT(faddr->next == NULL); 535 536 if (sctp->sctp_faddrs == NULL) { 537 ASSERT(sctp->sctp_lastfaddr == NULL); 538 /* only element on list; first and last are same */ 539 sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr; 540 } else if (first) { 541 ASSERT(sctp->sctp_lastfaddr != NULL); 542 faddr->next = sctp->sctp_faddrs; 543 sctp->sctp_faddrs = faddr; 544 } else { 545 sctp->sctp_lastfaddr->next = faddr; 546 sctp->sctp_lastfaddr = faddr; 547 } 548 sctp->sctp_nfaddrs++; 549 550 return (0); 551 } 552 553 sctp_faddr_t * 554 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr) 555 { 556 sctp_faddr_t *fp; 557 558 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 559 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) 560 break; 561 } 562 563 return (fp); 564 } 565 566 sctp_faddr_t * 567 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr) 568 { 569 for (; fp; fp = fp->next) { 570 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) { 571 break; 572 } 573 } 574 575 return (fp); 576 } 577 578 /* 579 * To change the currently used peer address to the specified one. 580 */ 581 void 582 sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp) 583 { 584 /* Now setup the composite header. */ 585 if (fp->isv4) { 586 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 587 sctp->sctp_ipha->ipha_dst); 588 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src); 589 /* update don't fragment bit */ 590 if (fp->df) { 591 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 592 htons(IPH_DF); 593 } else { 594 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0; 595 } 596 } else { 597 sctp->sctp_ip6h->ip6_dst = fp->faddr; 598 sctp->sctp_ip6h->ip6_src = fp->saddr; 599 } 600 601 sctp->sctp_current = fp; 602 sctp->sctp_mss = fp->sfa_pmss; 603 604 /* Update the uppper layer for the change. */ 605 if (!SCTP_IS_DETACHED(sctp)) 606 sctp_set_ulp_prop(sctp); 607 } 608 609 void 610 sctp_redo_faddr_srcs(sctp_t *sctp) 611 { 612 sctp_faddr_t *fp; 613 614 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 615 sctp_get_dest(sctp, fp); 616 } 617 } 618 619 void 620 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) 621 { 622 int64_t now = lbolt64; 623 624 fp->strikes = 0; 625 sctp->sctp_strikes = 0; 626 fp->lastactive = now; 627 fp->hb_expiry = now + SET_HB_INTVL(fp); 628 fp->hb_pending = B_FALSE; 629 if (fp->state != SCTP_FADDRS_ALIVE) { 630 fp->state = SCTP_FADDRS_ALIVE; 631 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0); 632 /* Should have a full IRE now */ 633 sctp_get_dest(sctp, fp); 634 635 /* 636 * If this is the primary, switch back to it now. And 637 * we probably want to reset the source addr used to reach 638 * it. 639 * Note that if we didn't find a source in sctp_get_dest 640 * then we'd be unreachable at this point in time. 641 */ 642 if (fp == sctp->sctp_primary && 643 fp->state != SCTP_FADDRS_UNREACH) { 644 sctp_set_faddr_current(sctp, fp); 645 return; 646 } 647 } 648 } 649 650 int 651 sctp_is_a_faddr_clean(sctp_t *sctp) 652 { 653 sctp_faddr_t *fp; 654 655 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 656 if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) { 657 return (1); 658 } 659 } 660 661 return (0); 662 } 663 664 /* 665 * Returns 0 if there is at leave one other active faddr, -1 if there 666 * are none. If there are none left, faddr_dead() will start killing the 667 * association. 668 * If the downed faddr was the current faddr, a new current faddr 669 * will be chosen. 670 */ 671 int 672 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) 673 { 674 sctp_faddr_t *ofp; 675 sctp_stack_t *sctps = sctp->sctp_sctps; 676 677 if (fp->state == SCTP_FADDRS_ALIVE) { 678 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0); 679 } 680 fp->state = newstate; 681 682 dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n", 683 SCTP_PRINTADDR(fp->faddr), newstate)); 684 685 if (fp == sctp->sctp_current) { 686 /* Current faddr down; need to switch it */ 687 sctp->sctp_current = NULL; 688 } 689 690 /* Find next alive faddr */ 691 ofp = fp; 692 for (fp = fp->next; fp != NULL; fp = fp->next) { 693 if (fp->state == SCTP_FADDRS_ALIVE) { 694 break; 695 } 696 } 697 698 if (fp == NULL) { 699 /* Continue from beginning of list */ 700 for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) { 701 if (fp->state == SCTP_FADDRS_ALIVE) { 702 break; 703 } 704 } 705 } 706 707 /* 708 * Find a new fp, so if the current faddr is dead, use the new fp 709 * as the current one. 710 */ 711 if (fp != ofp) { 712 if (sctp->sctp_current == NULL) { 713 dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n", 714 SCTP_PRINTADDR(fp->faddr))); 715 /* 716 * Note that we don't need to reset the source addr 717 * of the new fp. 718 */ 719 sctp_set_faddr_current(sctp, fp); 720 } 721 return (0); 722 } 723 724 725 /* All faddrs are down; kill the association */ 726 dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); 727 BUMP_MIB(&sctps->sctps_mib, sctpAborted); 728 sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? 729 SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); 730 sctp_clean_death(sctp, sctp->sctp_client_errno ? 731 sctp->sctp_client_errno : ETIMEDOUT); 732 733 return (-1); 734 } 735 736 sctp_faddr_t * 737 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp) 738 { 739 sctp_faddr_t *nfp = NULL; 740 741 if (ofp == NULL) { 742 ofp = sctp->sctp_current; 743 } 744 745 /* Find the next live one */ 746 for (nfp = ofp->next; nfp != NULL; nfp = nfp->next) { 747 if (nfp->state == SCTP_FADDRS_ALIVE) { 748 break; 749 } 750 } 751 752 if (nfp == NULL) { 753 /* Continue from beginning of list */ 754 for (nfp = sctp->sctp_faddrs; nfp != ofp; nfp = nfp->next) { 755 if (nfp->state == SCTP_FADDRS_ALIVE) { 756 break; 757 } 758 } 759 } 760 761 /* 762 * nfp could only be NULL if all faddrs are down, and when 763 * this happens, faddr_dead() should have killed the 764 * association. Hence this assertion... 765 */ 766 ASSERT(nfp != NULL); 767 return (nfp); 768 } 769 770 void 771 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp) 772 { 773 sctp_faddr_t *fpp; 774 775 if (!sctp->sctp_faddrs) { 776 return; 777 } 778 779 if (fp->timer_mp != NULL) { 780 sctp_timer_free(fp->timer_mp); 781 fp->timer_mp = NULL; 782 fp->timer_running = 0; 783 } 784 if (fp->rc_timer_mp != NULL) { 785 sctp_timer_free(fp->rc_timer_mp); 786 fp->rc_timer_mp = NULL; 787 fp->rc_timer_running = 0; 788 } 789 if (fp->ixa != NULL) { 790 ixa_refrele(fp->ixa); 791 fp->ixa = NULL; 792 } 793 794 if (fp == sctp->sctp_faddrs) { 795 goto gotit; 796 } 797 798 for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next) 799 ; 800 801 gotit: 802 ASSERT(sctp->sctp_conn_tfp != NULL); 803 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 804 if (fp == sctp->sctp_faddrs) { 805 sctp->sctp_faddrs = fp->next; 806 } else { 807 fpp->next = fp->next; 808 } 809 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 810 kmem_cache_free(sctp_kmem_faddr_cache, fp); 811 sctp->sctp_nfaddrs--; 812 } 813 814 void 815 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock) 816 { 817 sctp_faddr_t *fp, *fpn; 818 819 if (sctp->sctp_faddrs == NULL) { 820 ASSERT(sctp->sctp_lastfaddr == NULL); 821 return; 822 } 823 824 ASSERT(sctp->sctp_lastfaddr != NULL); 825 sctp->sctp_lastfaddr = NULL; 826 sctp->sctp_current = NULL; 827 sctp->sctp_primary = NULL; 828 829 sctp_free_faddr_timers(sctp); 830 831 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 832 /* in conn fanout; need to hold lock */ 833 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 834 } 835 836 for (fp = sctp->sctp_faddrs; fp; fp = fpn) { 837 fpn = fp->next; 838 if (fp->ixa != NULL) { 839 ixa_refrele(fp->ixa); 840 fp->ixa = NULL; 841 } 842 kmem_cache_free(sctp_kmem_faddr_cache, fp); 843 sctp->sctp_nfaddrs--; 844 } 845 846 sctp->sctp_faddrs = NULL; 847 ASSERT(sctp->sctp_nfaddrs == 0); 848 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 849 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 850 } 851 852 } 853 854 void 855 sctp_zap_addrs(sctp_t *sctp) 856 { 857 sctp_zap_faddrs(sctp, 0); 858 sctp_free_saddrs(sctp); 859 } 860 861 /* 862 * Build two SCTP header templates; one for IPv4 and one for IPv6. 863 * Store them in sctp_iphc and sctp_iphc6 respectively (and related fields). 864 * There are no IP addresses in the templates, but the port numbers and 865 * verifier are field in from the conn_t and sctp_t. 866 * 867 * Returns failure if can't allocate memory, or if there is a problem 868 * with a routing header/option. 869 * 870 * We allocate space for the minimum sctp header (sctp_hdr_t). 871 * 872 * We massage an routing option/header. There is no checksum implication 873 * for a routing header for sctp. 874 * 875 * Caller needs to update conn_wroff if desired. 876 * 877 * TSol notes: This assumes that a SCTP association has a single peer label 878 * since we only track a single pair of ipp_label_v4/v6 and not a separate one 879 * for each faddr. 880 */ 881 int 882 sctp_build_hdrs(sctp_t *sctp, int sleep) 883 { 884 conn_t *connp = sctp->sctp_connp; 885 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 886 uint_t ip_hdr_length; 887 uchar_t *hdrs; 888 uint_t hdrs_len; 889 uint_t ulp_hdr_length = sizeof (sctp_hdr_t); 890 ipha_t *ipha; 891 ip6_t *ip6h; 892 sctp_hdr_t *sctph; 893 in6_addr_t v6src, v6dst; 894 ipaddr_t v4src, v4dst; 895 896 v4src = connp->conn_saddr_v4; 897 v4dst = connp->conn_faddr_v4; 898 v6src = connp->conn_saddr_v6; 899 v6dst = connp->conn_faddr_v6; 900 901 /* First do IPv4 header */ 902 ip_hdr_length = ip_total_hdrs_len_v4(ipp); 903 904 /* In case of TX label and IP options it can be too much */ 905 if (ip_hdr_length > IP_MAX_HDR_LENGTH) { 906 /* Preserves existing TX errno for this */ 907 return (EHOSTUNREACH); 908 } 909 hdrs_len = ip_hdr_length + ulp_hdr_length; 910 ASSERT(hdrs_len != 0); 911 912 if (hdrs_len != sctp->sctp_iphc_len) { 913 /* Allocate new before we free any old */ 914 hdrs = kmem_alloc(hdrs_len, sleep); 915 if (hdrs == NULL) 916 return (ENOMEM); 917 918 if (sctp->sctp_iphc != NULL) 919 kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len); 920 sctp->sctp_iphc = hdrs; 921 sctp->sctp_iphc_len = hdrs_len; 922 } else { 923 hdrs = sctp->sctp_iphc; 924 } 925 sctp->sctp_hdr_len = sctp->sctp_iphc_len; 926 sctp->sctp_ip_hdr_len = ip_hdr_length; 927 928 sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length); 929 sctp->sctp_sctph = sctph; 930 sctph->sh_sport = connp->conn_lport; 931 sctph->sh_dport = connp->conn_fport; 932 sctph->sh_verf = sctp->sctp_fvtag; 933 sctph->sh_chksum = 0; 934 935 ipha = (ipha_t *)hdrs; 936 sctp->sctp_ipha = ipha; 937 938 ipha->ipha_src = v4src; 939 ipha->ipha_dst = v4dst; 940 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto); 941 ipha->ipha_length = htons(hdrs_len); 942 ipha->ipha_fragment_offset_and_flags = 0; 943 944 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) 945 (void) ip_massage_options(ipha, connp->conn_netstack); 946 947 /* Now IPv6 */ 948 ip_hdr_length = ip_total_hdrs_len_v6(ipp); 949 hdrs_len = ip_hdr_length + ulp_hdr_length; 950 ASSERT(hdrs_len != 0); 951 952 if (hdrs_len != sctp->sctp_iphc6_len) { 953 /* Allocate new before we free any old */ 954 hdrs = kmem_alloc(hdrs_len, sleep); 955 if (hdrs == NULL) 956 return (ENOMEM); 957 958 if (sctp->sctp_iphc6 != NULL) 959 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 960 sctp->sctp_iphc6 = hdrs; 961 sctp->sctp_iphc6_len = hdrs_len; 962 } else { 963 hdrs = sctp->sctp_iphc6; 964 } 965 sctp->sctp_hdr6_len = sctp->sctp_iphc6_len; 966 sctp->sctp_ip_hdr6_len = ip_hdr_length; 967 968 sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length); 969 sctp->sctp_sctph6 = sctph; 970 sctph->sh_sport = connp->conn_lport; 971 sctph->sh_dport = connp->conn_fport; 972 sctph->sh_verf = sctp->sctp_fvtag; 973 sctph->sh_chksum = 0; 974 975 ip6h = (ip6_t *)hdrs; 976 sctp->sctp_ip6h = ip6h; 977 978 ip6h->ip6_src = v6src; 979 ip6h->ip6_dst = v6dst; 980 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto, 981 connp->conn_flowinfo); 982 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN); 983 984 if (ipp->ipp_fields & IPPF_RTHDR) { 985 uint8_t *end; 986 ip6_rthdr_t *rth; 987 988 end = (uint8_t *)ip6h + ip_hdr_length; 989 rth = ip_find_rthdr_v6(ip6h, end); 990 if (rth != NULL) { 991 (void) ip_massage_options_v6(ip6h, rth, 992 connp->conn_netstack); 993 } 994 995 /* 996 * Verify that the first hop isn't a mapped address. 997 * Routers along the path need to do this verification 998 * for subsequent hops. 999 */ 1000 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) 1001 return (EADDRNOTAVAIL); 1002 } 1003 return (0); 1004 } 1005 1006 static int 1007 sctp_v4_label(sctp_t *sctp, sctp_faddr_t *fp) 1008 { 1009 conn_t *connp = sctp->sctp_connp; 1010 1011 ASSERT(fp->ixa->ixa_flags & IXAF_IS_IPV4); 1012 return (conn_update_label(connp, fp->ixa, &fp->faddr, 1013 &connp->conn_xmit_ipp)); 1014 } 1015 1016 static int 1017 sctp_v6_label(sctp_t *sctp, sctp_faddr_t *fp) 1018 { 1019 conn_t *connp = sctp->sctp_connp; 1020 1021 ASSERT(!(fp->ixa->ixa_flags & IXAF_IS_IPV4)); 1022 return (conn_update_label(connp, fp->ixa, &fp->faddr, 1023 &connp->conn_xmit_ipp)); 1024 } 1025 1026 /* 1027 * XXX implement more sophisticated logic 1028 * 1029 * Tsol note: We have already verified the addresses using tsol_check_dest 1030 * in sctp_add_faddr, thus no need to redo that here. 1031 * We do setup ipp_label_v4 and ipp_label_v6 based on which addresses 1032 * we have. 1033 */ 1034 int 1035 sctp_set_hdraddrs(sctp_t *sctp) 1036 { 1037 sctp_faddr_t *fp; 1038 int gotv4 = 0; 1039 int gotv6 = 0; 1040 conn_t *connp = sctp->sctp_connp; 1041 1042 ASSERT(sctp->sctp_faddrs != NULL); 1043 ASSERT(sctp->sctp_nsaddrs > 0); 1044 1045 /* Set up using the primary first */ 1046 connp->conn_faddr_v6 = sctp->sctp_primary->faddr; 1047 /* saddr may be unspec; make_mp() will handle this */ 1048 connp->conn_saddr_v6 = sctp->sctp_primary->saddr; 1049 connp->conn_laddr_v6 = connp->conn_saddr_v6; 1050 if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) { 1051 if (!is_system_labeled() || 1052 sctp_v4_label(sctp, sctp->sctp_primary) == 0) { 1053 gotv4 = 1; 1054 if (connp->conn_family == AF_INET) { 1055 goto done; 1056 } 1057 } 1058 } else { 1059 if (!is_system_labeled() || 1060 sctp_v6_label(sctp, sctp->sctp_primary) == 0) { 1061 gotv6 = 1; 1062 } 1063 } 1064 1065 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1066 if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1067 if (!is_system_labeled() || 1068 sctp_v4_label(sctp, fp) == 0) { 1069 gotv4 = 1; 1070 if (connp->conn_family == AF_INET || gotv6) { 1071 break; 1072 } 1073 } 1074 } else if (!gotv6 && !IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1075 if (!is_system_labeled() || 1076 sctp_v6_label(sctp, fp) == 0) { 1077 gotv6 = 1; 1078 if (gotv4) 1079 break; 1080 } 1081 } 1082 } 1083 1084 done: 1085 if (!gotv4 && !gotv6) 1086 return (EACCES); 1087 1088 return (0); 1089 } 1090 1091 /* 1092 * got_errchunk is set B_TRUE only if called from validate_init_params(), when 1093 * an ERROR chunk is already prepended the size of which needs updating for 1094 * additional unrecognized parameters. Other callers either prepend the ERROR 1095 * chunk with the correct size after calling this function, or they are calling 1096 * to add an invalid parameter to an INIT_ACK chunk, in that case no ERROR chunk 1097 * exists, the CAUSE blocks go into the INIT_ACK directly. 1098 * 1099 * *errmp will be non-NULL both when adding an additional CAUSE block to an 1100 * existing prepended COOKIE ERROR chunk (processing params of an INIT_ACK), 1101 * and when adding unrecognized parameters after the first, to an INIT_ACK 1102 * (processing params of an INIT chunk). 1103 */ 1104 void 1105 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp, 1106 boolean_t got_errchunk) 1107 { 1108 mblk_t *mp; 1109 sctp_parm_hdr_t *ph; 1110 size_t len; 1111 int pad; 1112 sctp_chunk_hdr_t *ecp; 1113 1114 len = sizeof (*ph) + ntohs(uph->sph_len); 1115 if ((pad = len % SCTP_ALIGN) != 0) { 1116 pad = SCTP_ALIGN - pad; 1117 len += pad; 1118 } 1119 mp = allocb(len, BPRI_MED); 1120 if (mp == NULL) { 1121 return; 1122 } 1123 1124 ph = (sctp_parm_hdr_t *)(mp->b_rptr); 1125 ph->sph_type = htons(PARM_UNRECOGNIZED); 1126 ph->sph_len = htons(len - pad); 1127 1128 /* copy in the unrecognized parameter */ 1129 bcopy(uph, ph + 1, ntohs(uph->sph_len)); 1130 1131 if (pad != 0) 1132 bzero((mp->b_rptr + len - pad), pad); 1133 1134 mp->b_wptr = mp->b_rptr + len; 1135 if (*errmp != NULL) { 1136 /* 1137 * Update total length if an ERROR chunk, then link 1138 * this CAUSE block to the possible chain of CAUSE 1139 * blocks attached to the ERROR chunk or INIT_ACK 1140 * being created. 1141 */ 1142 if (got_errchunk) { 1143 /* ERROR chunk already prepended */ 1144 ecp = (sctp_chunk_hdr_t *)((*errmp)->b_rptr); 1145 ecp->sch_len = htons(ntohs(ecp->sch_len) + len); 1146 } 1147 linkb(*errmp, mp); 1148 } else { 1149 *errmp = mp; 1150 } 1151 } 1152 1153 /* 1154 * o Bounds checking 1155 * o Updates remaining 1156 * o Checks alignment 1157 */ 1158 sctp_parm_hdr_t * 1159 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining) 1160 { 1161 int pad; 1162 uint16_t len; 1163 1164 len = ntohs(current->sph_len); 1165 *remaining -= len; 1166 if (*remaining < sizeof (*current) || len < sizeof (*current)) { 1167 return (NULL); 1168 } 1169 if ((pad = len & (SCTP_ALIGN - 1)) != 0) { 1170 pad = SCTP_ALIGN - pad; 1171 *remaining -= pad; 1172 } 1173 /*LINTED pointer cast may result in improper alignment*/ 1174 current = (sctp_parm_hdr_t *)((char *)current + len + pad); 1175 return (current); 1176 } 1177 1178 /* 1179 * Sets the address parameters given in the INIT chunk into sctp's 1180 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are 1181 * no address parameters in the INIT chunk, a single faddr is created 1182 * from the ip hdr at the beginning of pkt. 1183 * If there already are existing addresses hanging from sctp, merge 1184 * them in, if the old info contains addresses which are not present 1185 * in this new info, get rid of them, and clean the pointers if there's 1186 * messages which have this as their target address. 1187 * 1188 * We also re-adjust the source address list here since the list may 1189 * contain more than what is actually part of the association. If 1190 * we get here from sctp_send_cookie_echo(), we are on the active 1191 * side and psctp will be NULL and ich will be the INIT-ACK chunk. 1192 * If we get here from sctp_accept_comm(), ich will be the INIT chunk 1193 * and psctp will the listening endpoint. 1194 * 1195 * INIT processing: When processing the INIT we inherit the src address 1196 * list from the listener. For a loopback or linklocal association, we 1197 * delete the list and just take the address from the IP header (since 1198 * that's how we created the INIT-ACK). Additionally, for loopback we 1199 * ignore the address params in the INIT. For determining which address 1200 * types were sent in the INIT-ACK we follow the same logic as in 1201 * creating the INIT-ACK. We delete addresses of the type that are not 1202 * supported by the peer. 1203 * 1204 * INIT-ACK processing: When processing the INIT-ACK since we had not 1205 * included addr params for loopback or linklocal addresses when creating 1206 * the INIT, we just use the address from the IP header. Further, for 1207 * loopback we ignore the addr param list. We mark addresses of the 1208 * type not supported by the peer as unconfirmed. 1209 * 1210 * In case of INIT processing we look for supported address types in the 1211 * supported address param, if present. In both cases the address type in 1212 * the IP header is supported as well as types for addresses in the param 1213 * list, if any. 1214 * 1215 * Once we have the supported address types sctp_check_saddr() runs through 1216 * the source address list and deletes or marks as unconfirmed address of 1217 * types not supported by the peer. 1218 * 1219 * Returns 0 on success, sys errno on failure 1220 */ 1221 int 1222 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, 1223 sctp_chunk_hdr_t *ich, uint_t *sctp_options) 1224 { 1225 sctp_init_chunk_t *init; 1226 ipha_t *iph; 1227 ip6_t *ip6h; 1228 in6_addr_t hdrsaddr[1]; 1229 in6_addr_t hdrdaddr[1]; 1230 sctp_parm_hdr_t *ph; 1231 ssize_t remaining; 1232 int isv4; 1233 int err; 1234 sctp_faddr_t *fp; 1235 int supp_af = 0; 1236 boolean_t check_saddr = B_TRUE; 1237 in6_addr_t curaddr; 1238 sctp_stack_t *sctps = sctp->sctp_sctps; 1239 conn_t *connp = sctp->sctp_connp; 1240 1241 if (sctp_options != NULL) 1242 *sctp_options = 0; 1243 1244 /* extract the address from the IP header */ 1245 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1246 if (isv4) { 1247 iph = (ipha_t *)pkt->b_rptr; 1248 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr); 1249 IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr); 1250 supp_af |= PARM_SUPP_V4; 1251 } else { 1252 ip6h = (ip6_t *)pkt->b_rptr; 1253 hdrsaddr[0] = ip6h->ip6_src; 1254 hdrdaddr[0] = ip6h->ip6_dst; 1255 supp_af |= PARM_SUPP_V6; 1256 } 1257 1258 /* 1259 * Unfortunately, we can't delay this because adding an faddr 1260 * looks for the presence of the source address (from the ire 1261 * for the faddr) in the source address list. We could have 1262 * delayed this if, say, this was a loopback/linklocal connection. 1263 * Now, we just end up nuking this list and taking the addr from 1264 * the IP header for loopback/linklocal. 1265 */ 1266 if (psctp != NULL && psctp->sctp_nsaddrs > 0) { 1267 ASSERT(sctp->sctp_nsaddrs == 0); 1268 1269 err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP); 1270 if (err != 0) 1271 return (err); 1272 } 1273 /* 1274 * We will add the faddr before parsing the address list as this 1275 * might be a loopback connection and we would not have to 1276 * go through the list. 1277 * 1278 * Make sure the header's addr is in the list 1279 */ 1280 fp = sctp_lookup_faddr(sctp, hdrsaddr); 1281 if (fp == NULL) { 1282 /* not included; add it now */ 1283 err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE); 1284 if (err != 0) 1285 return (err); 1286 1287 /* sctp_faddrs will be the hdr addr */ 1288 fp = sctp->sctp_faddrs; 1289 } 1290 /* make the header addr the primary */ 1291 1292 if (cl_sctp_assoc_change != NULL && psctp == NULL) 1293 curaddr = sctp->sctp_current->faddr; 1294 1295 sctp->sctp_primary = fp; 1296 sctp->sctp_current = fp; 1297 sctp->sctp_mss = fp->sfa_pmss; 1298 1299 /* For loopback connections & linklocal get address from the header */ 1300 if (sctp->sctp_loopback || sctp->sctp_linklocal) { 1301 if (sctp->sctp_nsaddrs != 0) 1302 sctp_free_saddrs(sctp); 1303 if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0) 1304 return (err); 1305 /* For loopback ignore address list */ 1306 if (sctp->sctp_loopback) 1307 return (0); 1308 check_saddr = B_FALSE; 1309 } 1310 1311 /* Walk the params in the INIT [ACK], pulling out addr params */ 1312 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1313 sizeof (sctp_init_chunk_t); 1314 if (remaining < sizeof (*ph)) { 1315 if (check_saddr) { 1316 sctp_check_saddr(sctp, supp_af, psctp == NULL ? 1317 B_FALSE : B_TRUE, hdrdaddr); 1318 } 1319 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1320 return (0); 1321 } 1322 1323 init = (sctp_init_chunk_t *)(ich + 1); 1324 ph = (sctp_parm_hdr_t *)(init + 1); 1325 1326 /* params will have already been byteordered when validating */ 1327 while (ph != NULL) { 1328 if (ph->sph_type == htons(PARM_SUPP_ADDRS)) { 1329 int plen; 1330 uint16_t *p; 1331 uint16_t addrtype; 1332 1333 ASSERT(psctp != NULL); 1334 plen = ntohs(ph->sph_len); 1335 p = (uint16_t *)(ph + 1); 1336 while (plen > 0) { 1337 addrtype = ntohs(*p); 1338 switch (addrtype) { 1339 case PARM_ADDR6: 1340 supp_af |= PARM_SUPP_V6; 1341 break; 1342 case PARM_ADDR4: 1343 supp_af |= PARM_SUPP_V4; 1344 break; 1345 default: 1346 break; 1347 } 1348 p++; 1349 plen -= sizeof (*p); 1350 } 1351 } else if (ph->sph_type == htons(PARM_ADDR4)) { 1352 if (remaining >= PARM_ADDR4_LEN) { 1353 in6_addr_t addr; 1354 ipaddr_t ta; 1355 1356 supp_af |= PARM_SUPP_V4; 1357 /* 1358 * Screen out broad/multicasts & loopback. 1359 * If the endpoint only accepts v6 address, 1360 * go to the next one. 1361 * 1362 * Subnet broadcast check is done in 1363 * sctp_add_faddr(). If the address is 1364 * a broadcast address, it won't be added. 1365 */ 1366 bcopy(ph + 1, &ta, sizeof (ta)); 1367 if (ta == 0 || 1368 ta == INADDR_BROADCAST || 1369 ta == htonl(INADDR_LOOPBACK) || 1370 CLASSD(ta) || connp->conn_ipv6_v6only) { 1371 goto next; 1372 } 1373 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1374 (ph + 1), &addr); 1375 1376 /* Check for duplicate. */ 1377 if (sctp_lookup_faddr(sctp, &addr) != NULL) 1378 goto next; 1379 1380 /* OK, add it to the faddr set */ 1381 err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP, 1382 B_FALSE); 1383 /* Something is wrong... Try the next one. */ 1384 if (err != 0) 1385 goto next; 1386 } 1387 } else if (ph->sph_type == htons(PARM_ADDR6) && 1388 connp->conn_family == AF_INET6) { 1389 /* An v4 socket should not take v6 addresses. */ 1390 if (remaining >= PARM_ADDR6_LEN) { 1391 in6_addr_t *addr6; 1392 1393 supp_af |= PARM_SUPP_V6; 1394 addr6 = (in6_addr_t *)(ph + 1); 1395 /* 1396 * Screen out link locals, mcast, loopback 1397 * and bogus v6 address. 1398 */ 1399 if (IN6_IS_ADDR_LINKLOCAL(addr6) || 1400 IN6_IS_ADDR_MULTICAST(addr6) || 1401 IN6_IS_ADDR_LOOPBACK(addr6) || 1402 IN6_IS_ADDR_V4MAPPED(addr6)) { 1403 goto next; 1404 } 1405 /* Check for duplicate. */ 1406 if (sctp_lookup_faddr(sctp, addr6) != NULL) 1407 goto next; 1408 1409 err = sctp_add_faddr(sctp, 1410 (in6_addr_t *)(ph + 1), KM_NOSLEEP, 1411 B_FALSE); 1412 /* Something is wrong... Try the next one. */ 1413 if (err != 0) 1414 goto next; 1415 } 1416 } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) { 1417 if (sctp_options != NULL) 1418 *sctp_options |= SCTP_PRSCTP_OPTION; 1419 } /* else; skip */ 1420 1421 next: 1422 ph = sctp_next_parm(ph, &remaining); 1423 } 1424 if (check_saddr) { 1425 sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE : 1426 B_TRUE, hdrdaddr); 1427 } 1428 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1429 /* 1430 * We have the right address list now, update clustering's 1431 * knowledge because when we sent the INIT we had just added 1432 * the address the INIT was sent to. 1433 */ 1434 if (psctp == NULL && cl_sctp_assoc_change != NULL) { 1435 uchar_t *alist; 1436 size_t asize; 1437 uchar_t *dlist; 1438 size_t dsize; 1439 1440 asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 1441 alist = kmem_alloc(asize, KM_NOSLEEP); 1442 if (alist == NULL) { 1443 SCTP_KSTAT(sctps, sctp_cl_assoc_change); 1444 return (ENOMEM); 1445 } 1446 /* 1447 * Just include the address the INIT was sent to in the 1448 * delete list and send the entire faddr list. We could 1449 * do it differently (i.e include all the addresses in the 1450 * add list even if it contains the original address OR 1451 * remove the original address from the add list etc.), but 1452 * this seems reasonable enough. 1453 */ 1454 dsize = sizeof (in6_addr_t); 1455 dlist = kmem_alloc(dsize, KM_NOSLEEP); 1456 if (dlist == NULL) { 1457 kmem_free(alist, asize); 1458 SCTP_KSTAT(sctps, sctp_cl_assoc_change); 1459 return (ENOMEM); 1460 } 1461 bcopy(&curaddr, dlist, sizeof (curaddr)); 1462 sctp_get_faddr_list(sctp, alist, asize); 1463 (*cl_sctp_assoc_change)(connp->conn_family, alist, asize, 1464 sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR, 1465 (cl_sctp_handle_t)sctp); 1466 /* alist and dlist will be freed by the clustering module */ 1467 } 1468 return (0); 1469 } 1470 1471 /* 1472 * Returns 0 if the check failed and the restart should be refused, 1473 * 1 if the check succeeded. 1474 */ 1475 int 1476 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, 1477 int sleep, sctp_stack_t *sctps, ip_recv_attr_t *ira) 1478 { 1479 sctp_faddr_t *fp, *fphead = NULL; 1480 sctp_parm_hdr_t *ph; 1481 ssize_t remaining; 1482 int isv4; 1483 ipha_t *iph; 1484 ip6_t *ip6h; 1485 in6_addr_t hdraddr[1]; 1486 int retval = 0; 1487 sctp_tf_t *tf; 1488 sctp_t *sctp; 1489 int compres; 1490 sctp_init_chunk_t *init; 1491 int nadded = 0; 1492 1493 /* extract the address from the IP header */ 1494 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1495 if (isv4) { 1496 iph = (ipha_t *)pkt->b_rptr; 1497 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr); 1498 } else { 1499 ip6h = (ip6_t *)pkt->b_rptr; 1500 hdraddr[0] = ip6h->ip6_src; 1501 } 1502 1503 /* Walk the params in the INIT [ACK], pulling out addr params */ 1504 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1505 sizeof (sctp_init_chunk_t); 1506 if (remaining < sizeof (*ph)) { 1507 /* no parameters; restart OK */ 1508 return (1); 1509 } 1510 init = (sctp_init_chunk_t *)(ich + 1); 1511 ph = (sctp_parm_hdr_t *)(init + 1); 1512 1513 while (ph != NULL) { 1514 sctp_faddr_t *fpa = NULL; 1515 1516 /* params will have already been byteordered when validating */ 1517 if (ph->sph_type == htons(PARM_ADDR4)) { 1518 if (remaining >= PARM_ADDR4_LEN) { 1519 in6_addr_t addr; 1520 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1521 (ph + 1), &addr); 1522 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1523 sleep); 1524 if (fpa == NULL) { 1525 goto done; 1526 } 1527 bzero(fpa, sizeof (*fpa)); 1528 fpa->faddr = addr; 1529 fpa->next = NULL; 1530 } 1531 } else if (ph->sph_type == htons(PARM_ADDR6)) { 1532 if (remaining >= PARM_ADDR6_LEN) { 1533 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1534 sleep); 1535 if (fpa == NULL) { 1536 goto done; 1537 } 1538 bzero(fpa, sizeof (*fpa)); 1539 bcopy(ph + 1, &fpa->faddr, 1540 sizeof (fpa->faddr)); 1541 fpa->next = NULL; 1542 } 1543 } 1544 /* link in the new addr, if it was an addr param */ 1545 if (fpa != NULL) { 1546 if (fphead == NULL) { 1547 fphead = fpa; 1548 } else { 1549 fpa->next = fphead; 1550 fphead = fpa; 1551 } 1552 } 1553 1554 ph = sctp_next_parm(ph, &remaining); 1555 } 1556 1557 if (fphead == NULL) { 1558 /* no addr parameters; restart OK */ 1559 return (1); 1560 } 1561 1562 /* 1563 * got at least one; make sure the header's addr is 1564 * in the list 1565 */ 1566 fp = sctp_lookup_faddr_nosctp(fphead, hdraddr); 1567 if (fp == NULL) { 1568 /* not included; add it now */ 1569 fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); 1570 if (fp == NULL) { 1571 goto done; 1572 } 1573 bzero(fp, sizeof (*fp)); 1574 fp->faddr = *hdraddr; 1575 fp->next = fphead; 1576 fphead = fp; 1577 } 1578 1579 /* 1580 * Now, we can finally do the check: For each sctp instance 1581 * on the hash line for ports, compare its faddr set against 1582 * the new one. If the new one is a strict subset of any 1583 * existing sctp's faddrs, the restart is OK. However, if there 1584 * is an overlap, this could be an attack, so return failure. 1585 * If all sctp's faddrs are disjoint, this is a legitimate new 1586 * association. 1587 */ 1588 tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]); 1589 mutex_enter(&tf->tf_lock); 1590 1591 for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { 1592 if (ports != sctp->sctp_connp->conn_ports) { 1593 continue; 1594 } 1595 compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs); 1596 if (compres <= SCTP_ADDR_SUBSET) { 1597 retval = 1; 1598 mutex_exit(&tf->tf_lock); 1599 goto done; 1600 } 1601 if (compres == SCTP_ADDR_OVERLAP) { 1602 dprint(1, 1603 ("new assoc from %x:%x:%x:%x overlaps with %p\n", 1604 SCTP_PRINTADDR(*hdraddr), (void *)sctp)); 1605 /* 1606 * While we still hold the lock, we need to 1607 * figure out which addresses have been 1608 * added so we can include them in the abort 1609 * we will send back. Since these faddrs will 1610 * never be used, we overload the rto field 1611 * here, setting it to 0 if the address was 1612 * not added, 1 if it was added. 1613 */ 1614 for (fp = fphead; fp; fp = fp->next) { 1615 if (sctp_lookup_faddr(sctp, &fp->faddr)) { 1616 fp->rto = 0; 1617 } else { 1618 fp->rto = 1; 1619 nadded++; 1620 } 1621 } 1622 mutex_exit(&tf->tf_lock); 1623 goto done; 1624 } 1625 } 1626 mutex_exit(&tf->tf_lock); 1627 1628 /* All faddrs are disjoint; legit new association */ 1629 retval = 1; 1630 1631 done: 1632 /* If are attempted adds, send back an abort listing the addrs */ 1633 if (nadded > 0) { 1634 void *dtail; 1635 size_t dlen; 1636 1637 dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP); 1638 if (dtail == NULL) { 1639 goto cleanup; 1640 } 1641 1642 ph = dtail; 1643 dlen = 0; 1644 for (fp = fphead; fp; fp = fp->next) { 1645 if (fp->rto == 0) { 1646 continue; 1647 } 1648 if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1649 ipaddr_t addr4; 1650 1651 ph->sph_type = htons(PARM_ADDR4); 1652 ph->sph_len = htons(PARM_ADDR4_LEN); 1653 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 1654 ph++; 1655 bcopy(&addr4, ph, sizeof (addr4)); 1656 ph = (sctp_parm_hdr_t *) 1657 ((char *)ph + sizeof (addr4)); 1658 dlen += PARM_ADDR4_LEN; 1659 } else { 1660 ph->sph_type = htons(PARM_ADDR6); 1661 ph->sph_len = htons(PARM_ADDR6_LEN); 1662 ph++; 1663 bcopy(&fp->faddr, ph, sizeof (fp->faddr)); 1664 ph = (sctp_parm_hdr_t *) 1665 ((char *)ph + sizeof (fp->faddr)); 1666 dlen += PARM_ADDR6_LEN; 1667 } 1668 } 1669 1670 /* Send off the abort */ 1671 sctp_send_abort(sctp, sctp_init2vtag(ich), 1672 SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE, 1673 ira); 1674 1675 kmem_free(dtail, PARM_ADDR6_LEN * nadded); 1676 } 1677 1678 cleanup: 1679 /* Clean up */ 1680 if (fphead) { 1681 sctp_faddr_t *fpn; 1682 for (fp = fphead; fp; fp = fpn) { 1683 fpn = fp->next; 1684 if (fp->ixa != NULL) { 1685 ixa_refrele(fp->ixa); 1686 fp->ixa = NULL; 1687 } 1688 kmem_cache_free(sctp_kmem_faddr_cache, fp); 1689 } 1690 } 1691 1692 return (retval); 1693 } 1694 1695 /* 1696 * Reset any state related to transmitted chunks. 1697 */ 1698 void 1699 sctp_congest_reset(sctp_t *sctp) 1700 { 1701 sctp_faddr_t *fp; 1702 sctp_stack_t *sctps = sctp->sctp_sctps; 1703 mblk_t *mp; 1704 1705 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 1706 fp->ssthresh = sctps->sctps_initial_mtu; 1707 SET_CWND(fp, fp->sfa_pmss, sctps->sctps_slow_start_initial); 1708 fp->suna = 0; 1709 fp->pba = 0; 1710 } 1711 /* 1712 * Clean up the transmit list as well since we have reset accounting 1713 * on all the fps. Send event upstream, if required. 1714 */ 1715 while ((mp = sctp->sctp_xmit_head) != NULL) { 1716 sctp->sctp_xmit_head = mp->b_next; 1717 mp->b_next = NULL; 1718 if (sctp->sctp_xmit_head != NULL) 1719 sctp->sctp_xmit_head->b_prev = NULL; 1720 sctp_sendfail_event(sctp, mp, 0, B_TRUE); 1721 } 1722 sctp->sctp_xmit_head = NULL; 1723 sctp->sctp_xmit_tail = NULL; 1724 sctp->sctp_xmit_unacked = NULL; 1725 1726 sctp->sctp_unacked = 0; 1727 /* 1728 * Any control message as well. We will clean-up this list as well. 1729 * This contains any pending ASCONF request that we have queued/sent. 1730 * If we do get an ACK we will just drop it. However, given that 1731 * we are restarting chances are we aren't going to get any. 1732 */ 1733 if (sctp->sctp_cxmit_list != NULL) 1734 sctp_asconf_free_cxmit(sctp, NULL); 1735 sctp->sctp_cxmit_list = NULL; 1736 sctp->sctp_cchunk_pend = 0; 1737 1738 sctp->sctp_rexmitting = B_FALSE; 1739 sctp->sctp_rxt_nxttsn = 0; 1740 sctp->sctp_rxt_maxtsn = 0; 1741 1742 sctp->sctp_zero_win_probe = B_FALSE; 1743 } 1744 1745 static void 1746 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr, 1747 mblk_t *timer_mp) 1748 { 1749 sctp_stack_t *sctps = sctp->sctp_sctps; 1750 1751 ASSERT(fp->ixa != NULL); 1752 1753 bcopy(addr, &fp->faddr, sizeof (*addr)); 1754 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1755 fp->isv4 = 1; 1756 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 1757 fp->sfa_pmss = 1758 (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) & 1759 ~(SCTP_ALIGN - 1); 1760 fp->ixa->ixa_flags |= IXAF_IS_IPV4; 1761 } else { 1762 fp->isv4 = 0; 1763 fp->sfa_pmss = 1764 (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) & 1765 ~(SCTP_ALIGN - 1); 1766 fp->ixa->ixa_flags &= ~IXAF_IS_IPV4; 1767 } 1768 fp->cwnd = sctps->sctps_slow_start_initial * fp->sfa_pmss; 1769 fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max); 1770 SCTP_MAX_RTO(sctp, fp); 1771 fp->srtt = -1; 1772 fp->rtt_updates = 0; 1773 fp->strikes = 0; 1774 fp->max_retr = sctp->sctp_pp_max_rxt; 1775 /* Mark it as not confirmed. */ 1776 fp->state = SCTP_FADDRS_UNCONFIRMED; 1777 fp->hb_interval = sctp->sctp_hb_interval; 1778 fp->ssthresh = sctps->sctps_initial_ssthresh; 1779 fp->suna = 0; 1780 fp->pba = 0; 1781 fp->acked = 0; 1782 fp->lastactive = lbolt64; 1783 fp->timer_mp = timer_mp; 1784 fp->hb_pending = B_FALSE; 1785 fp->hb_enabled = B_TRUE; 1786 fp->df = 1; 1787 fp->pmtu_discovered = 0; 1788 fp->next = NULL; 1789 fp->T3expire = 0; 1790 (void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret, 1791 sizeof (fp->hb_secret)); 1792 fp->hb_expiry = lbolt64; 1793 fp->rxt_unacked = 0; 1794 1795 sctp_get_dest(sctp, fp); 1796 } 1797 1798 /*ARGSUSED*/ 1799 static int 1800 faddr_constructor(void *buf, void *arg, int flags) 1801 { 1802 sctp_faddr_t *fp = buf; 1803 1804 fp->timer_mp = NULL; 1805 fp->timer_running = 0; 1806 1807 fp->rc_timer_mp = NULL; 1808 fp->rc_timer_running = 0; 1809 1810 return (0); 1811 } 1812 1813 /*ARGSUSED*/ 1814 static void 1815 faddr_destructor(void *buf, void *arg) 1816 { 1817 sctp_faddr_t *fp = buf; 1818 1819 ASSERT(fp->timer_mp == NULL); 1820 ASSERT(fp->timer_running == 0); 1821 1822 ASSERT(fp->rc_timer_mp == NULL); 1823 ASSERT(fp->rc_timer_running == 0); 1824 } 1825 1826 void 1827 sctp_faddr_init(void) 1828 { 1829 sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache", 1830 sizeof (sctp_faddr_t), 0, faddr_constructor, faddr_destructor, 1831 NULL, NULL, NULL, 0); 1832 } 1833 1834 void 1835 sctp_faddr_fini(void) 1836 { 1837 kmem_cache_destroy(sctp_kmem_faddr_cache); 1838 } 1839