1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/systm.h> 28 #include <sys/stream.h> 29 #include <sys/strsubr.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/kmem.h> 33 #include <sys/socket.h> 34 #include <sys/random.h> 35 #include <sys/tsol/tndb.h> 36 #include <sys/tsol/tnet.h> 37 38 #include <netinet/in.h> 39 #include <netinet/ip6.h> 40 #include <netinet/sctp.h> 41 42 #include <inet/common.h> 43 #include <inet/ip.h> 44 #include <inet/ip6.h> 45 #include <inet/ip_ire.h> 46 #include <inet/ip_if.h> 47 #include <inet/ip_ndp.h> 48 #include <inet/mib2.h> 49 #include <inet/nd.h> 50 #include <inet/optcom.h> 51 #include <inet/sctp_ip.h> 52 #include <inet/ipclassifier.h> 53 54 #include "sctp_impl.h" 55 #include "sctp_addr.h" 56 #include "sctp_asconf.h" 57 58 static struct kmem_cache *sctp_kmem_faddr_cache; 59 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *); 60 61 /* Set the source address. Refer to comments in sctp_get_dest(). */ 62 void 63 sctp_set_saddr(sctp_t *sctp, sctp_faddr_t *fp) 64 { 65 boolean_t v6 = !fp->isv4; 66 boolean_t addr_set; 67 68 fp->saddr = sctp_get_valid_addr(sctp, v6, &addr_set); 69 /* 70 * If there is no source address avaialble, mark this peer address 71 * as unreachable for now. When the heartbeat timer fires, it will 72 * call sctp_get_dest() to re-check if there is any source address 73 * available. 74 */ 75 if (!addr_set) 76 fp->state = SCTP_FADDRS_UNREACH; 77 } 78 79 /* 80 * Call this function to get information about a peer addr fp. 81 * 82 * Uses ip_attr_connect to avoid explicit use of ire and source address 83 * selection. 84 */ 85 void 86 sctp_get_dest(sctp_t *sctp, sctp_faddr_t *fp) 87 { 88 in6_addr_t laddr; 89 in6_addr_t nexthop; 90 sctp_saddr_ipif_t *sp; 91 int hdrlen; 92 sctp_stack_t *sctps = sctp->sctp_sctps; 93 conn_t *connp = sctp->sctp_connp; 94 iulp_t uinfo; 95 uint_t pmtu; 96 int error; 97 uint32_t flags = IPDF_VERIFY_DST | IPDF_IPSEC | 98 IPDF_SELECT_SRC | IPDF_UNIQUE_DCE; 99 100 /* 101 * Tell sctp_make_mp it needs to call us again should we not 102 * complete and set the saddr. 103 */ 104 fp->saddr = ipv6_all_zeros; 105 106 /* 107 * If this addr is not reachable, mark it as unconfirmed for now, the 108 * state will be changed back to unreachable later in this function 109 * if it is still the case. 110 */ 111 if (fp->state == SCTP_FADDRS_UNREACH) { 112 fp->state = SCTP_FADDRS_UNCONFIRMED; 113 } 114 115 /* 116 * Socket is connected - enable PMTU discovery. 117 */ 118 if (!sctps->sctps_ignore_path_mtu) 119 fp->ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 120 121 ip_attr_nexthop(&connp->conn_xmit_ipp, fp->ixa, &fp->faddr, 122 &nexthop); 123 124 laddr = fp->saddr; 125 error = ip_attr_connect(connp, fp->ixa, &laddr, &fp->faddr, &nexthop, 126 connp->conn_fport, &laddr, &uinfo, flags); 127 128 if (error != 0) { 129 dprint(3, ("sctp_get_dest: no ire for %x:%x:%x:%x\n", 130 SCTP_PRINTADDR(fp->faddr))); 131 /* 132 * It is tempting to just leave the src addr 133 * unspecified and let IP figure it out, but we 134 * *cannot* do this, since IP may choose a src addr 135 * that is not part of this association... unless 136 * this sctp has bound to all addrs. So if the dest 137 * lookup fails, try to find one in our src addr 138 * list, unless the sctp has bound to all addrs, in 139 * which case we change the src addr to unspec. 140 * 141 * Note that if this is a v6 endpoint but it does 142 * not have any v4 address at this point (e.g. may 143 * have been deleted), sctp_get_valid_addr() will 144 * return mapped INADDR_ANY. In this case, this 145 * address should be marked not reachable so that 146 * it won't be used to send data. 147 */ 148 sctp_set_saddr(sctp, fp); 149 if (fp->state == SCTP_FADDRS_UNREACH) 150 return; 151 goto check_current; 152 } 153 ASSERT(fp->ixa->ixa_ire != NULL); 154 ASSERT(!(fp->ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))); 155 156 if (!sctp->sctp_loopback) 157 sctp->sctp_loopback = uinfo.iulp_loopback; 158 159 /* Make sure the laddr is part of this association */ 160 if ((sp = sctp_saddr_lookup(sctp, &laddr, 0)) != NULL && 161 !sp->saddr_ipif_dontsrc) { 162 if (sp->saddr_ipif_unconfirmed == 1) 163 sp->saddr_ipif_unconfirmed = 0; 164 /* We did IPsec policy lookup for laddr already */ 165 fp->saddr = laddr; 166 } else { 167 dprint(2, ("sctp_get_dest: src addr is not part of assoc " 168 "%x:%x:%x:%x\n", SCTP_PRINTADDR(laddr))); 169 170 /* 171 * Set the src to the first saddr and hope for the best. 172 * Note that this case should very seldomly 173 * happen. One scenario this can happen is an app 174 * explicitly bind() to an address. But that address is 175 * not the preferred source address to send to the peer. 176 */ 177 sctp_set_saddr(sctp, fp); 178 if (fp->state == SCTP_FADDRS_UNREACH) { 179 return; 180 } 181 } 182 183 /* 184 * Pull out RTO information for this faddr and use it if we don't 185 * have any yet. 186 */ 187 if (fp->srtt == -1 && uinfo.iulp_rtt != 0) { 188 /* The cached value is in ms. */ 189 fp->srtt = MSEC_TO_TICK(uinfo.iulp_rtt); 190 fp->rttvar = MSEC_TO_TICK(uinfo.iulp_rtt_sd); 191 fp->rto = 3 * fp->srtt; 192 193 /* Bound the RTO by configured min and max values */ 194 if (fp->rto < sctp->sctp_rto_min) { 195 fp->rto = sctp->sctp_rto_min; 196 } 197 if (fp->rto > sctp->sctp_rto_max) { 198 fp->rto = sctp->sctp_rto_max; 199 } 200 SCTP_MAX_RTO(sctp, fp); 201 } 202 pmtu = uinfo.iulp_mtu; 203 204 /* 205 * Record the MTU for this faddr. If the MTU for this faddr has 206 * changed, check if the assc MTU will also change. 207 */ 208 if (fp->isv4) { 209 hdrlen = sctp->sctp_hdr_len; 210 } else { 211 hdrlen = sctp->sctp_hdr6_len; 212 } 213 if ((fp->sfa_pmss + hdrlen) != pmtu) { 214 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 215 fp->sfa_pmss = (pmtu - hdrlen) & ~(SCTP_ALIGN - 1); 216 if (fp->cwnd < (fp->sfa_pmss * 2)) { 217 SET_CWND(fp, fp->sfa_pmss, 218 sctps->sctps_slow_start_initial); 219 } 220 } 221 222 check_current: 223 if (fp == sctp->sctp_current) 224 sctp_set_faddr_current(sctp, fp); 225 } 226 227 void 228 sctp_update_dce(sctp_t *sctp) 229 { 230 sctp_faddr_t *fp; 231 sctp_stack_t *sctps = sctp->sctp_sctps; 232 iulp_t uinfo; 233 ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip; 234 uint_t ifindex; 235 236 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 237 bzero(&uinfo, sizeof (uinfo)); 238 /* 239 * Only record the PMTU for this faddr if we actually have 240 * done discovery. This prevents initialized default from 241 * clobbering any real info that IP may have. 242 */ 243 if (fp->pmtu_discovered) { 244 if (fp->isv4) { 245 uinfo.iulp_mtu = fp->sfa_pmss + 246 sctp->sctp_hdr_len; 247 } else { 248 uinfo.iulp_mtu = fp->sfa_pmss + 249 sctp->sctp_hdr6_len; 250 } 251 } 252 if (sctps->sctps_rtt_updates != 0 && 253 fp->rtt_updates >= sctps->sctps_rtt_updates) { 254 /* 255 * dce_update_uinfo() merges these values with the 256 * old values. 257 */ 258 uinfo.iulp_rtt = TICK_TO_MSEC(fp->srtt); 259 uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->rttvar); 260 fp->rtt_updates = 0; 261 } 262 ifindex = 0; 263 if (IN6_IS_ADDR_LINKSCOPE(&fp->faddr)) { 264 /* 265 * If we are going to create a DCE we'd better have 266 * an ifindex 267 */ 268 if (fp->ixa->ixa_nce != NULL) { 269 ifindex = fp->ixa->ixa_nce->nce_common-> 270 ncec_ill->ill_phyint->phyint_ifindex; 271 } else { 272 continue; 273 } 274 } 275 276 (void) dce_update_uinfo(&fp->faddr, ifindex, &uinfo, ipst); 277 } 278 } 279 280 /* 281 * The sender must later set the total length in the IP header. 282 */ 283 mblk_t * 284 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *fp, int trailer) 285 { 286 mblk_t *mp; 287 size_t ipsctplen; 288 int isv4; 289 sctp_stack_t *sctps = sctp->sctp_sctps; 290 boolean_t src_changed = B_FALSE; 291 292 ASSERT(fp != NULL); 293 isv4 = fp->isv4; 294 295 if (SCTP_IS_ADDR_UNSPEC(isv4, fp->saddr) || 296 (fp->ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 297 /* Need to pick a source */ 298 sctp_get_dest(sctp, fp); 299 /* 300 * Although we still may not get an IRE, the source address 301 * may be changed in sctp_get_ire(). Set src_changed to 302 * true so that the source address is copied again. 303 */ 304 src_changed = B_TRUE; 305 } 306 307 /* There is no suitable source address to use, return. */ 308 if (fp->state == SCTP_FADDRS_UNREACH) 309 return (NULL); 310 311 ASSERT(fp->ixa->ixa_ire != NULL); 312 ASSERT(!SCTP_IS_ADDR_UNSPEC(isv4, fp->saddr)); 313 314 if (isv4) { 315 ipsctplen = sctp->sctp_hdr_len; 316 } else { 317 ipsctplen = sctp->sctp_hdr6_len; 318 } 319 320 mp = allocb(ipsctplen + sctps->sctps_wroff_xtra + trailer, BPRI_MED); 321 if (mp == NULL) { 322 ip1dbg(("sctp_make_mp: error making mp..\n")); 323 return (NULL); 324 } 325 mp->b_rptr += sctps->sctps_wroff_xtra; 326 mp->b_wptr = mp->b_rptr + ipsctplen; 327 328 ASSERT(OK_32PTR(mp->b_wptr)); 329 330 if (isv4) { 331 ipha_t *iph = (ipha_t *)mp->b_rptr; 332 333 bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen); 334 if (fp != sctp->sctp_current || src_changed) { 335 /* Fix the source and destination addresses. */ 336 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 337 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, iph->ipha_src); 338 } 339 /* set or clear the don't fragment bit */ 340 if (fp->df) { 341 iph->ipha_fragment_offset_and_flags = htons(IPH_DF); 342 } else { 343 iph->ipha_fragment_offset_and_flags = 0; 344 } 345 } else { 346 bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen); 347 if (fp != sctp->sctp_current || src_changed) { 348 /* Fix the source and destination addresses. */ 349 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 350 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 351 } 352 } 353 ASSERT(sctp->sctp_connp != NULL); 354 return (mp); 355 } 356 357 /* 358 * Notify upper layers about preferred write offset, write size. 359 */ 360 void 361 sctp_set_ulp_prop(sctp_t *sctp) 362 { 363 int hdrlen; 364 struct sock_proto_props sopp; 365 366 sctp_stack_t *sctps = sctp->sctp_sctps; 367 368 if (sctp->sctp_current->isv4) { 369 hdrlen = sctp->sctp_hdr_len; 370 } else { 371 hdrlen = sctp->sctp_hdr6_len; 372 } 373 ASSERT(sctp->sctp_ulpd); 374 375 sctp->sctp_connp->conn_wroff = sctps->sctps_wroff_xtra + hdrlen + 376 sizeof (sctp_data_hdr_t); 377 378 ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss); 379 bzero(&sopp, sizeof (sopp)); 380 sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF; 381 sopp.sopp_wroff = sctp->sctp_connp->conn_wroff; 382 sopp.sopp_maxblk = sctp->sctp_mss - sizeof (sctp_data_hdr_t); 383 sctp->sctp_ulp_prop(sctp->sctp_ulpd, &sopp); 384 } 385 386 /* 387 * Set the lengths in the packet and the transmit attributes. 388 */ 389 void 390 sctp_set_iplen(sctp_t *sctp, mblk_t *mp, ip_xmit_attr_t *ixa) 391 { 392 uint16_t sum = 0; 393 ipha_t *iph; 394 ip6_t *ip6h; 395 mblk_t *pmp = mp; 396 boolean_t isv4; 397 398 isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); 399 for (; pmp; pmp = pmp->b_cont) 400 sum += pmp->b_wptr - pmp->b_rptr; 401 402 ixa->ixa_pktlen = sum; 403 if (isv4) { 404 iph = (ipha_t *)mp->b_rptr; 405 iph->ipha_length = htons(sum); 406 ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr_len; 407 } else { 408 ip6h = (ip6_t *)mp->b_rptr; 409 ip6h->ip6_plen = htons(sum - IPV6_HDR_LEN); 410 ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr6_len; 411 } 412 } 413 414 int 415 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2) 416 { 417 int na1 = 0; 418 int overlap = 0; 419 int equal = 1; 420 int onematch; 421 sctp_faddr_t *fp1, *fp2; 422 423 for (fp1 = a1; fp1; fp1 = fp1->next) { 424 onematch = 0; 425 for (fp2 = a2; fp2; fp2 = fp2->next) { 426 if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) { 427 overlap++; 428 onematch = 1; 429 break; 430 } 431 if (!onematch) { 432 equal = 0; 433 } 434 } 435 na1++; 436 } 437 438 if (equal) { 439 return (SCTP_ADDR_EQUAL); 440 } 441 if (overlap == na1) { 442 return (SCTP_ADDR_SUBSET); 443 } 444 if (overlap) { 445 return (SCTP_ADDR_OVERLAP); 446 } 447 return (SCTP_ADDR_DISJOINT); 448 } 449 450 /* 451 * Returns 0 on success, ENOMEM on memory allocation failure, EHOSTUNREACH 452 * if the connection credentials fail remote host accreditation or 453 * if the new destination does not support the previously established 454 * connection security label. If sleep is true, this function should 455 * never fail for a memory allocation failure. The boolean parameter 456 * "first" decides whether the newly created faddr structure should be 457 * added at the beginning of the list or at the end. 458 * 459 * Note: caller must hold conn fanout lock. 460 */ 461 int 462 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first) 463 { 464 sctp_faddr_t *faddr; 465 mblk_t *timer_mp; 466 int err; 467 conn_t *connp = sctp->sctp_connp; 468 469 if (is_system_labeled()) { 470 ip_xmit_attr_t *ixa = connp->conn_ixa; 471 ts_label_t *effective_tsl = NULL; 472 473 ASSERT(ixa->ixa_tsl != NULL); 474 475 /* 476 * Verify the destination is allowed to receive packets 477 * at the security label of the connection we are initiating. 478 * 479 * tsol_check_dest() will create a new effective label for 480 * this connection with a modified label or label flags only 481 * if there are changes from the original label. 482 * 483 * Accept whatever label we get if this is the first 484 * destination address for this connection. The security 485 * label and label flags must match any previuous settings 486 * for all subsequent destination addresses. 487 */ 488 if (IN6_IS_ADDR_V4MAPPED(addr)) { 489 uint32_t dst; 490 IN6_V4MAPPED_TO_IPADDR(addr, dst); 491 err = tsol_check_dest(ixa->ixa_tsl, 492 &dst, IPV4_VERSION, connp->conn_mac_mode, 493 connp->conn_zone_is_global, &effective_tsl); 494 } else { 495 err = tsol_check_dest(ixa->ixa_tsl, 496 addr, IPV6_VERSION, connp->conn_mac_mode, 497 connp->conn_zone_is_global, &effective_tsl); 498 } 499 if (err != 0) 500 return (err); 501 502 if (sctp->sctp_faddrs == NULL && effective_tsl != NULL) { 503 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 504 } else if (effective_tsl != NULL) { 505 label_rele(effective_tsl); 506 return (EHOSTUNREACH); 507 } 508 } 509 510 if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) 511 return (ENOMEM); 512 bzero(faddr, sizeof (*faddr)); 513 timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer, sleep); 514 if (timer_mp == NULL) { 515 kmem_cache_free(sctp_kmem_faddr_cache, faddr); 516 return (ENOMEM); 517 } 518 ((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr; 519 520 /* Start with any options set on the conn */ 521 faddr->ixa = conn_get_ixa_exclusive(connp); 522 if (faddr->ixa == NULL) { 523 freemsg(timer_mp); 524 kmem_cache_free(sctp_kmem_faddr_cache, faddr); 525 return (ENOMEM); 526 } 527 faddr->ixa->ixa_notify_cookie = connp->conn_sctp; 528 529 sctp_init_faddr(sctp, faddr, addr, timer_mp); 530 ASSERT(faddr->ixa->ixa_cred != NULL); 531 532 /* ip_attr_connect didn't allow broadcats/multicast dest */ 533 ASSERT(faddr->next == NULL); 534 535 if (sctp->sctp_faddrs == NULL) { 536 ASSERT(sctp->sctp_lastfaddr == NULL); 537 /* only element on list; first and last are same */ 538 sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr; 539 } else if (first) { 540 ASSERT(sctp->sctp_lastfaddr != NULL); 541 faddr->next = sctp->sctp_faddrs; 542 sctp->sctp_faddrs = faddr; 543 } else { 544 sctp->sctp_lastfaddr->next = faddr; 545 sctp->sctp_lastfaddr = faddr; 546 } 547 sctp->sctp_nfaddrs++; 548 549 return (0); 550 } 551 552 sctp_faddr_t * 553 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr) 554 { 555 sctp_faddr_t *fp; 556 557 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 558 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) 559 break; 560 } 561 562 return (fp); 563 } 564 565 sctp_faddr_t * 566 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr) 567 { 568 for (; fp; fp = fp->next) { 569 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) { 570 break; 571 } 572 } 573 574 return (fp); 575 } 576 577 /* 578 * To change the currently used peer address to the specified one. 579 */ 580 void 581 sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp) 582 { 583 /* Now setup the composite header. */ 584 if (fp->isv4) { 585 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 586 sctp->sctp_ipha->ipha_dst); 587 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src); 588 /* update don't fragment bit */ 589 if (fp->df) { 590 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 591 htons(IPH_DF); 592 } else { 593 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0; 594 } 595 } else { 596 sctp->sctp_ip6h->ip6_dst = fp->faddr; 597 sctp->sctp_ip6h->ip6_src = fp->saddr; 598 } 599 600 sctp->sctp_current = fp; 601 sctp->sctp_mss = fp->sfa_pmss; 602 603 /* Update the uppper layer for the change. */ 604 if (!SCTP_IS_DETACHED(sctp)) 605 sctp_set_ulp_prop(sctp); 606 } 607 608 void 609 sctp_redo_faddr_srcs(sctp_t *sctp) 610 { 611 sctp_faddr_t *fp; 612 613 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 614 sctp_get_dest(sctp, fp); 615 } 616 } 617 618 void 619 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) 620 { 621 int64_t now = LBOLT_FASTPATH64; 622 623 /* 624 * If we are under memory pressure, we abort association waiting 625 * in zero window probing state for too long. We do this by not 626 * resetting sctp_strikes. So if sctp_zero_win_probe continues 627 * while under memory pressure, this association will eventually 628 * time out. 629 */ 630 if (!sctp->sctp_zero_win_probe || !sctp->sctp_sctps->sctps_reclaim) { 631 sctp->sctp_strikes = 0; 632 } 633 fp->strikes = 0; 634 fp->lastactive = now; 635 fp->hb_expiry = now + SET_HB_INTVL(fp); 636 fp->hb_pending = B_FALSE; 637 if (fp->state != SCTP_FADDRS_ALIVE) { 638 fp->state = SCTP_FADDRS_ALIVE; 639 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0); 640 /* Should have a full IRE now */ 641 sctp_get_dest(sctp, fp); 642 643 /* 644 * If this is the primary, switch back to it now. And 645 * we probably want to reset the source addr used to reach 646 * it. 647 * Note that if we didn't find a source in sctp_get_dest 648 * then we'd be unreachable at this point in time. 649 */ 650 if (fp == sctp->sctp_primary && 651 fp->state != SCTP_FADDRS_UNREACH) { 652 sctp_set_faddr_current(sctp, fp); 653 return; 654 } 655 } 656 } 657 658 /* 659 * Return B_TRUE if there is still an active peer address with zero strikes; 660 * otherwise rturn B_FALSE. 661 */ 662 boolean_t 663 sctp_is_a_faddr_clean(sctp_t *sctp) 664 { 665 sctp_faddr_t *fp; 666 667 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 668 if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) { 669 return (B_TRUE); 670 } 671 } 672 673 return (B_FALSE); 674 } 675 676 /* 677 * Returns 0 if there is at leave one other active faddr, -1 if there 678 * are none. If there are none left, faddr_dead() will start killing the 679 * association. 680 * If the downed faddr was the current faddr, a new current faddr 681 * will be chosen. 682 */ 683 int 684 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) 685 { 686 sctp_faddr_t *ofp; 687 sctp_stack_t *sctps = sctp->sctp_sctps; 688 689 if (fp->state == SCTP_FADDRS_ALIVE) { 690 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0); 691 } 692 fp->state = newstate; 693 694 dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n", 695 SCTP_PRINTADDR(fp->faddr), newstate)); 696 697 if (fp == sctp->sctp_current) { 698 /* Current faddr down; need to switch it */ 699 sctp->sctp_current = NULL; 700 } 701 702 /* Find next alive faddr */ 703 ofp = fp; 704 for (fp = fp->next; fp != NULL; fp = fp->next) { 705 if (fp->state == SCTP_FADDRS_ALIVE) { 706 break; 707 } 708 } 709 710 if (fp == NULL) { 711 /* Continue from beginning of list */ 712 for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) { 713 if (fp->state == SCTP_FADDRS_ALIVE) { 714 break; 715 } 716 } 717 } 718 719 /* 720 * Find a new fp, so if the current faddr is dead, use the new fp 721 * as the current one. 722 */ 723 if (fp != ofp) { 724 if (sctp->sctp_current == NULL) { 725 dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n", 726 SCTP_PRINTADDR(fp->faddr))); 727 /* 728 * Note that we don't need to reset the source addr 729 * of the new fp. 730 */ 731 sctp_set_faddr_current(sctp, fp); 732 } 733 return (0); 734 } 735 736 737 /* All faddrs are down; kill the association */ 738 dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); 739 SCTPS_BUMP_MIB(sctps, sctpAborted); 740 sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? 741 SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); 742 sctp_clean_death(sctp, sctp->sctp_client_errno ? 743 sctp->sctp_client_errno : ETIMEDOUT); 744 745 return (-1); 746 } 747 748 sctp_faddr_t * 749 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp) 750 { 751 sctp_faddr_t *nfp = NULL; 752 sctp_faddr_t *saved_fp = NULL; 753 int min_strikes; 754 755 if (ofp == NULL) { 756 ofp = sctp->sctp_current; 757 } 758 /* Nothing to do */ 759 if (sctp->sctp_nfaddrs < 2) 760 return (ofp); 761 762 /* 763 * Find the next live peer address with zero strikes. In case 764 * there is none, find the one with the lowest number of strikes. 765 */ 766 min_strikes = ofp->strikes; 767 nfp = ofp->next; 768 while (nfp != ofp) { 769 /* If reached end of list, continue scan from the head */ 770 if (nfp == NULL) { 771 nfp = sctp->sctp_faddrs; 772 continue; 773 } 774 if (nfp->state == SCTP_FADDRS_ALIVE) { 775 if (nfp->strikes == 0) 776 break; 777 if (nfp->strikes < min_strikes) { 778 min_strikes = nfp->strikes; 779 saved_fp = nfp; 780 } 781 } 782 nfp = nfp->next; 783 } 784 /* If reached the old address, there is no zero strike path */ 785 if (nfp == ofp) 786 nfp = NULL; 787 788 /* 789 * If there is a peer address with zero strikes we use that, if not 790 * return a peer address with fewer strikes than the one last used, 791 * if neither exist we may as well stay with the old one. 792 */ 793 if (nfp != NULL) 794 return (nfp); 795 if (saved_fp != NULL) 796 return (saved_fp); 797 return (ofp); 798 } 799 800 void 801 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp) 802 { 803 sctp_faddr_t *fpp; 804 805 if (!sctp->sctp_faddrs) { 806 return; 807 } 808 809 if (fp->timer_mp != NULL) { 810 sctp_timer_free(fp->timer_mp); 811 fp->timer_mp = NULL; 812 fp->timer_running = 0; 813 } 814 if (fp->rc_timer_mp != NULL) { 815 sctp_timer_free(fp->rc_timer_mp); 816 fp->rc_timer_mp = NULL; 817 fp->rc_timer_running = 0; 818 } 819 if (fp->ixa != NULL) { 820 ixa_refrele(fp->ixa); 821 fp->ixa = NULL; 822 } 823 824 if (fp == sctp->sctp_faddrs) { 825 goto gotit; 826 } 827 828 for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next) 829 ; 830 831 gotit: 832 ASSERT(sctp->sctp_conn_tfp != NULL); 833 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 834 if (fp == sctp->sctp_faddrs) { 835 sctp->sctp_faddrs = fp->next; 836 } else { 837 fpp->next = fp->next; 838 } 839 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 840 kmem_cache_free(sctp_kmem_faddr_cache, fp); 841 sctp->sctp_nfaddrs--; 842 } 843 844 void 845 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock) 846 { 847 sctp_faddr_t *fp, *fpn; 848 849 if (sctp->sctp_faddrs == NULL) { 850 ASSERT(sctp->sctp_lastfaddr == NULL); 851 return; 852 } 853 854 ASSERT(sctp->sctp_lastfaddr != NULL); 855 sctp->sctp_lastfaddr = NULL; 856 sctp->sctp_current = NULL; 857 sctp->sctp_primary = NULL; 858 859 sctp_free_faddr_timers(sctp); 860 861 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 862 /* in conn fanout; need to hold lock */ 863 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 864 } 865 866 for (fp = sctp->sctp_faddrs; fp; fp = fpn) { 867 fpn = fp->next; 868 if (fp->ixa != NULL) { 869 ixa_refrele(fp->ixa); 870 fp->ixa = NULL; 871 } 872 kmem_cache_free(sctp_kmem_faddr_cache, fp); 873 sctp->sctp_nfaddrs--; 874 } 875 876 sctp->sctp_faddrs = NULL; 877 ASSERT(sctp->sctp_nfaddrs == 0); 878 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 879 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 880 } 881 882 } 883 884 void 885 sctp_zap_addrs(sctp_t *sctp) 886 { 887 sctp_zap_faddrs(sctp, 0); 888 sctp_free_saddrs(sctp); 889 } 890 891 /* 892 * Build two SCTP header templates; one for IPv4 and one for IPv6. 893 * Store them in sctp_iphc and sctp_iphc6 respectively (and related fields). 894 * There are no IP addresses in the templates, but the port numbers and 895 * verifier are field in from the conn_t and sctp_t. 896 * 897 * Returns failure if can't allocate memory, or if there is a problem 898 * with a routing header/option. 899 * 900 * We allocate space for the minimum sctp header (sctp_hdr_t). 901 * 902 * We massage an routing option/header. There is no checksum implication 903 * for a routing header for sctp. 904 * 905 * Caller needs to update conn_wroff if desired. 906 * 907 * TSol notes: This assumes that a SCTP association has a single peer label 908 * since we only track a single pair of ipp_label_v4/v6 and not a separate one 909 * for each faddr. 910 */ 911 int 912 sctp_build_hdrs(sctp_t *sctp, int sleep) 913 { 914 conn_t *connp = sctp->sctp_connp; 915 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 916 uint_t ip_hdr_length; 917 uchar_t *hdrs; 918 uint_t hdrs_len; 919 uint_t ulp_hdr_length = sizeof (sctp_hdr_t); 920 ipha_t *ipha; 921 ip6_t *ip6h; 922 sctp_hdr_t *sctph; 923 in6_addr_t v6src, v6dst; 924 ipaddr_t v4src, v4dst; 925 926 v4src = connp->conn_saddr_v4; 927 v4dst = connp->conn_faddr_v4; 928 v6src = connp->conn_saddr_v6; 929 v6dst = connp->conn_faddr_v6; 930 931 /* First do IPv4 header */ 932 ip_hdr_length = ip_total_hdrs_len_v4(ipp); 933 934 /* In case of TX label and IP options it can be too much */ 935 if (ip_hdr_length > IP_MAX_HDR_LENGTH) { 936 /* Preserves existing TX errno for this */ 937 return (EHOSTUNREACH); 938 } 939 hdrs_len = ip_hdr_length + ulp_hdr_length; 940 ASSERT(hdrs_len != 0); 941 942 if (hdrs_len != sctp->sctp_iphc_len) { 943 /* Allocate new before we free any old */ 944 hdrs = kmem_alloc(hdrs_len, sleep); 945 if (hdrs == NULL) 946 return (ENOMEM); 947 948 if (sctp->sctp_iphc != NULL) 949 kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len); 950 sctp->sctp_iphc = hdrs; 951 sctp->sctp_iphc_len = hdrs_len; 952 } else { 953 hdrs = sctp->sctp_iphc; 954 } 955 sctp->sctp_hdr_len = sctp->sctp_iphc_len; 956 sctp->sctp_ip_hdr_len = ip_hdr_length; 957 958 sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length); 959 sctp->sctp_sctph = sctph; 960 sctph->sh_sport = connp->conn_lport; 961 sctph->sh_dport = connp->conn_fport; 962 sctph->sh_verf = sctp->sctp_fvtag; 963 sctph->sh_chksum = 0; 964 965 ipha = (ipha_t *)hdrs; 966 sctp->sctp_ipha = ipha; 967 968 ipha->ipha_src = v4src; 969 ipha->ipha_dst = v4dst; 970 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto); 971 ipha->ipha_length = htons(hdrs_len); 972 ipha->ipha_fragment_offset_and_flags = 0; 973 974 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) 975 (void) ip_massage_options(ipha, connp->conn_netstack); 976 977 /* Now IPv6 */ 978 ip_hdr_length = ip_total_hdrs_len_v6(ipp); 979 hdrs_len = ip_hdr_length + ulp_hdr_length; 980 ASSERT(hdrs_len != 0); 981 982 if (hdrs_len != sctp->sctp_iphc6_len) { 983 /* Allocate new before we free any old */ 984 hdrs = kmem_alloc(hdrs_len, sleep); 985 if (hdrs == NULL) 986 return (ENOMEM); 987 988 if (sctp->sctp_iphc6 != NULL) 989 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 990 sctp->sctp_iphc6 = hdrs; 991 sctp->sctp_iphc6_len = hdrs_len; 992 } else { 993 hdrs = sctp->sctp_iphc6; 994 } 995 sctp->sctp_hdr6_len = sctp->sctp_iphc6_len; 996 sctp->sctp_ip_hdr6_len = ip_hdr_length; 997 998 sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length); 999 sctp->sctp_sctph6 = sctph; 1000 sctph->sh_sport = connp->conn_lport; 1001 sctph->sh_dport = connp->conn_fport; 1002 sctph->sh_verf = sctp->sctp_fvtag; 1003 sctph->sh_chksum = 0; 1004 1005 ip6h = (ip6_t *)hdrs; 1006 sctp->sctp_ip6h = ip6h; 1007 1008 ip6h->ip6_src = v6src; 1009 ip6h->ip6_dst = v6dst; 1010 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto, 1011 connp->conn_flowinfo); 1012 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN); 1013 1014 if (ipp->ipp_fields & IPPF_RTHDR) { 1015 uint8_t *end; 1016 ip6_rthdr_t *rth; 1017 1018 end = (uint8_t *)ip6h + ip_hdr_length; 1019 rth = ip_find_rthdr_v6(ip6h, end); 1020 if (rth != NULL) { 1021 (void) ip_massage_options_v6(ip6h, rth, 1022 connp->conn_netstack); 1023 } 1024 1025 /* 1026 * Verify that the first hop isn't a mapped address. 1027 * Routers along the path need to do this verification 1028 * for subsequent hops. 1029 */ 1030 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) 1031 return (EADDRNOTAVAIL); 1032 } 1033 return (0); 1034 } 1035 1036 static int 1037 sctp_v4_label(sctp_t *sctp, sctp_faddr_t *fp) 1038 { 1039 conn_t *connp = sctp->sctp_connp; 1040 1041 ASSERT(fp->ixa->ixa_flags & IXAF_IS_IPV4); 1042 return (conn_update_label(connp, fp->ixa, &fp->faddr, 1043 &connp->conn_xmit_ipp)); 1044 } 1045 1046 static int 1047 sctp_v6_label(sctp_t *sctp, sctp_faddr_t *fp) 1048 { 1049 conn_t *connp = sctp->sctp_connp; 1050 1051 ASSERT(!(fp->ixa->ixa_flags & IXAF_IS_IPV4)); 1052 return (conn_update_label(connp, fp->ixa, &fp->faddr, 1053 &connp->conn_xmit_ipp)); 1054 } 1055 1056 /* 1057 * XXX implement more sophisticated logic 1058 * 1059 * Tsol note: We have already verified the addresses using tsol_check_dest 1060 * in sctp_add_faddr, thus no need to redo that here. 1061 * We do setup ipp_label_v4 and ipp_label_v6 based on which addresses 1062 * we have. 1063 */ 1064 int 1065 sctp_set_hdraddrs(sctp_t *sctp) 1066 { 1067 sctp_faddr_t *fp; 1068 int gotv4 = 0; 1069 int gotv6 = 0; 1070 conn_t *connp = sctp->sctp_connp; 1071 1072 ASSERT(sctp->sctp_faddrs != NULL); 1073 ASSERT(sctp->sctp_nsaddrs > 0); 1074 1075 /* Set up using the primary first */ 1076 connp->conn_faddr_v6 = sctp->sctp_primary->faddr; 1077 /* saddr may be unspec; make_mp() will handle this */ 1078 connp->conn_saddr_v6 = sctp->sctp_primary->saddr; 1079 connp->conn_laddr_v6 = connp->conn_saddr_v6; 1080 if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) { 1081 if (!is_system_labeled() || 1082 sctp_v4_label(sctp, sctp->sctp_primary) == 0) { 1083 gotv4 = 1; 1084 if (connp->conn_family == AF_INET) { 1085 goto done; 1086 } 1087 } 1088 } else { 1089 if (!is_system_labeled() || 1090 sctp_v6_label(sctp, sctp->sctp_primary) == 0) { 1091 gotv6 = 1; 1092 } 1093 } 1094 1095 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1096 if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1097 if (!is_system_labeled() || 1098 sctp_v4_label(sctp, fp) == 0) { 1099 gotv4 = 1; 1100 if (connp->conn_family == AF_INET || gotv6) { 1101 break; 1102 } 1103 } 1104 } else if (!gotv6 && !IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1105 if (!is_system_labeled() || 1106 sctp_v6_label(sctp, fp) == 0) { 1107 gotv6 = 1; 1108 if (gotv4) 1109 break; 1110 } 1111 } 1112 } 1113 1114 done: 1115 if (!gotv4 && !gotv6) 1116 return (EACCES); 1117 1118 return (0); 1119 } 1120 1121 /* 1122 * got_errchunk is set B_TRUE only if called from validate_init_params(), when 1123 * an ERROR chunk is already prepended the size of which needs updating for 1124 * additional unrecognized parameters. Other callers either prepend the ERROR 1125 * chunk with the correct size after calling this function, or they are calling 1126 * to add an invalid parameter to an INIT_ACK chunk, in that case no ERROR chunk 1127 * exists, the CAUSE blocks go into the INIT_ACK directly. 1128 * 1129 * *errmp will be non-NULL both when adding an additional CAUSE block to an 1130 * existing prepended COOKIE ERROR chunk (processing params of an INIT_ACK), 1131 * and when adding unrecognized parameters after the first, to an INIT_ACK 1132 * (processing params of an INIT chunk). 1133 */ 1134 void 1135 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp, 1136 boolean_t got_errchunk) 1137 { 1138 mblk_t *mp; 1139 sctp_parm_hdr_t *ph; 1140 size_t len; 1141 int pad; 1142 sctp_chunk_hdr_t *ecp; 1143 1144 len = sizeof (*ph) + ntohs(uph->sph_len); 1145 if ((pad = len % SCTP_ALIGN) != 0) { 1146 pad = SCTP_ALIGN - pad; 1147 len += pad; 1148 } 1149 mp = allocb(len, BPRI_MED); 1150 if (mp == NULL) { 1151 return; 1152 } 1153 1154 ph = (sctp_parm_hdr_t *)(mp->b_rptr); 1155 ph->sph_type = htons(PARM_UNRECOGNIZED); 1156 ph->sph_len = htons(len - pad); 1157 1158 /* copy in the unrecognized parameter */ 1159 bcopy(uph, ph + 1, ntohs(uph->sph_len)); 1160 1161 if (pad != 0) 1162 bzero((mp->b_rptr + len - pad), pad); 1163 1164 mp->b_wptr = mp->b_rptr + len; 1165 if (*errmp != NULL) { 1166 /* 1167 * Update total length if an ERROR chunk, then link 1168 * this CAUSE block to the possible chain of CAUSE 1169 * blocks attached to the ERROR chunk or INIT_ACK 1170 * being created. 1171 */ 1172 if (got_errchunk) { 1173 /* ERROR chunk already prepended */ 1174 ecp = (sctp_chunk_hdr_t *)((*errmp)->b_rptr); 1175 ecp->sch_len = htons(ntohs(ecp->sch_len) + len); 1176 } 1177 linkb(*errmp, mp); 1178 } else { 1179 *errmp = mp; 1180 } 1181 } 1182 1183 /* 1184 * o Bounds checking 1185 * o Updates remaining 1186 * o Checks alignment 1187 */ 1188 sctp_parm_hdr_t * 1189 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining) 1190 { 1191 int pad; 1192 uint16_t len; 1193 1194 len = ntohs(current->sph_len); 1195 *remaining -= len; 1196 if (*remaining < sizeof (*current) || len < sizeof (*current)) { 1197 return (NULL); 1198 } 1199 if ((pad = len & (SCTP_ALIGN - 1)) != 0) { 1200 pad = SCTP_ALIGN - pad; 1201 *remaining -= pad; 1202 } 1203 /*LINTED pointer cast may result in improper alignment*/ 1204 current = (sctp_parm_hdr_t *)((char *)current + len + pad); 1205 return (current); 1206 } 1207 1208 /* 1209 * Sets the address parameters given in the INIT chunk into sctp's 1210 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are 1211 * no address parameters in the INIT chunk, a single faddr is created 1212 * from the ip hdr at the beginning of pkt. 1213 * If there already are existing addresses hanging from sctp, merge 1214 * them in, if the old info contains addresses which are not present 1215 * in this new info, get rid of them, and clean the pointers if there's 1216 * messages which have this as their target address. 1217 * 1218 * We also re-adjust the source address list here since the list may 1219 * contain more than what is actually part of the association. If 1220 * we get here from sctp_send_cookie_echo(), we are on the active 1221 * side and psctp will be NULL and ich will be the INIT-ACK chunk. 1222 * If we get here from sctp_accept_comm(), ich will be the INIT chunk 1223 * and psctp will the listening endpoint. 1224 * 1225 * INIT processing: When processing the INIT we inherit the src address 1226 * list from the listener. For a loopback or linklocal association, we 1227 * delete the list and just take the address from the IP header (since 1228 * that's how we created the INIT-ACK). Additionally, for loopback we 1229 * ignore the address params in the INIT. For determining which address 1230 * types were sent in the INIT-ACK we follow the same logic as in 1231 * creating the INIT-ACK. We delete addresses of the type that are not 1232 * supported by the peer. 1233 * 1234 * INIT-ACK processing: When processing the INIT-ACK since we had not 1235 * included addr params for loopback or linklocal addresses when creating 1236 * the INIT, we just use the address from the IP header. Further, for 1237 * loopback we ignore the addr param list. We mark addresses of the 1238 * type not supported by the peer as unconfirmed. 1239 * 1240 * In case of INIT processing we look for supported address types in the 1241 * supported address param, if present. In both cases the address type in 1242 * the IP header is supported as well as types for addresses in the param 1243 * list, if any. 1244 * 1245 * Once we have the supported address types sctp_check_saddr() runs through 1246 * the source address list and deletes or marks as unconfirmed address of 1247 * types not supported by the peer. 1248 * 1249 * Returns 0 on success, sys errno on failure 1250 */ 1251 int 1252 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, 1253 sctp_chunk_hdr_t *ich, uint_t *sctp_options) 1254 { 1255 sctp_init_chunk_t *init; 1256 ipha_t *iph; 1257 ip6_t *ip6h; 1258 in6_addr_t hdrsaddr[1]; 1259 in6_addr_t hdrdaddr[1]; 1260 sctp_parm_hdr_t *ph; 1261 ssize_t remaining; 1262 int isv4; 1263 int err; 1264 sctp_faddr_t *fp; 1265 int supp_af = 0; 1266 boolean_t check_saddr = B_TRUE; 1267 in6_addr_t curaddr; 1268 sctp_stack_t *sctps = sctp->sctp_sctps; 1269 conn_t *connp = sctp->sctp_connp; 1270 1271 if (sctp_options != NULL) 1272 *sctp_options = 0; 1273 1274 /* extract the address from the IP header */ 1275 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1276 if (isv4) { 1277 iph = (ipha_t *)pkt->b_rptr; 1278 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr); 1279 IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr); 1280 supp_af |= PARM_SUPP_V4; 1281 } else { 1282 ip6h = (ip6_t *)pkt->b_rptr; 1283 hdrsaddr[0] = ip6h->ip6_src; 1284 hdrdaddr[0] = ip6h->ip6_dst; 1285 supp_af |= PARM_SUPP_V6; 1286 } 1287 1288 /* 1289 * Unfortunately, we can't delay this because adding an faddr 1290 * looks for the presence of the source address (from the ire 1291 * for the faddr) in the source address list. We could have 1292 * delayed this if, say, this was a loopback/linklocal connection. 1293 * Now, we just end up nuking this list and taking the addr from 1294 * the IP header for loopback/linklocal. 1295 */ 1296 if (psctp != NULL && psctp->sctp_nsaddrs > 0) { 1297 ASSERT(sctp->sctp_nsaddrs == 0); 1298 1299 err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP); 1300 if (err != 0) 1301 return (err); 1302 } 1303 /* 1304 * We will add the faddr before parsing the address list as this 1305 * might be a loopback connection and we would not have to 1306 * go through the list. 1307 * 1308 * Make sure the header's addr is in the list 1309 */ 1310 fp = sctp_lookup_faddr(sctp, hdrsaddr); 1311 if (fp == NULL) { 1312 /* not included; add it now */ 1313 err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE); 1314 if (err != 0) 1315 return (err); 1316 1317 /* sctp_faddrs will be the hdr addr */ 1318 fp = sctp->sctp_faddrs; 1319 } 1320 /* make the header addr the primary */ 1321 1322 if (cl_sctp_assoc_change != NULL && psctp == NULL) 1323 curaddr = sctp->sctp_current->faddr; 1324 1325 sctp->sctp_primary = fp; 1326 sctp->sctp_current = fp; 1327 sctp->sctp_mss = fp->sfa_pmss; 1328 1329 /* For loopback connections & linklocal get address from the header */ 1330 if (sctp->sctp_loopback || sctp->sctp_linklocal) { 1331 if (sctp->sctp_nsaddrs != 0) 1332 sctp_free_saddrs(sctp); 1333 if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0) 1334 return (err); 1335 /* For loopback ignore address list */ 1336 if (sctp->sctp_loopback) 1337 return (0); 1338 check_saddr = B_FALSE; 1339 } 1340 1341 /* Walk the params in the INIT [ACK], pulling out addr params */ 1342 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1343 sizeof (sctp_init_chunk_t); 1344 if (remaining < sizeof (*ph)) { 1345 if (check_saddr) { 1346 sctp_check_saddr(sctp, supp_af, psctp == NULL ? 1347 B_FALSE : B_TRUE, hdrdaddr); 1348 } 1349 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1350 return (0); 1351 } 1352 1353 init = (sctp_init_chunk_t *)(ich + 1); 1354 ph = (sctp_parm_hdr_t *)(init + 1); 1355 1356 /* params will have already been byteordered when validating */ 1357 while (ph != NULL) { 1358 if (ph->sph_type == htons(PARM_SUPP_ADDRS)) { 1359 int plen; 1360 uint16_t *p; 1361 uint16_t addrtype; 1362 1363 ASSERT(psctp != NULL); 1364 plen = ntohs(ph->sph_len); 1365 p = (uint16_t *)(ph + 1); 1366 while (plen > 0) { 1367 addrtype = ntohs(*p); 1368 switch (addrtype) { 1369 case PARM_ADDR6: 1370 supp_af |= PARM_SUPP_V6; 1371 break; 1372 case PARM_ADDR4: 1373 supp_af |= PARM_SUPP_V4; 1374 break; 1375 default: 1376 break; 1377 } 1378 p++; 1379 plen -= sizeof (*p); 1380 } 1381 } else if (ph->sph_type == htons(PARM_ADDR4)) { 1382 if (remaining >= PARM_ADDR4_LEN) { 1383 in6_addr_t addr; 1384 ipaddr_t ta; 1385 1386 supp_af |= PARM_SUPP_V4; 1387 /* 1388 * Screen out broad/multicasts & loopback. 1389 * If the endpoint only accepts v6 address, 1390 * go to the next one. 1391 * 1392 * Subnet broadcast check is done in 1393 * sctp_add_faddr(). If the address is 1394 * a broadcast address, it won't be added. 1395 */ 1396 bcopy(ph + 1, &ta, sizeof (ta)); 1397 if (ta == 0 || 1398 ta == INADDR_BROADCAST || 1399 ta == htonl(INADDR_LOOPBACK) || 1400 CLASSD(ta) || connp->conn_ipv6_v6only) { 1401 goto next; 1402 } 1403 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1404 (ph + 1), &addr); 1405 1406 /* Check for duplicate. */ 1407 if (sctp_lookup_faddr(sctp, &addr) != NULL) 1408 goto next; 1409 1410 /* OK, add it to the faddr set */ 1411 err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP, 1412 B_FALSE); 1413 /* Something is wrong... Try the next one. */ 1414 if (err != 0) 1415 goto next; 1416 } 1417 } else if (ph->sph_type == htons(PARM_ADDR6) && 1418 connp->conn_family == AF_INET6) { 1419 /* An v4 socket should not take v6 addresses. */ 1420 if (remaining >= PARM_ADDR6_LEN) { 1421 in6_addr_t *addr6; 1422 1423 supp_af |= PARM_SUPP_V6; 1424 addr6 = (in6_addr_t *)(ph + 1); 1425 /* 1426 * Screen out link locals, mcast, loopback 1427 * and bogus v6 address. 1428 */ 1429 if (IN6_IS_ADDR_LINKLOCAL(addr6) || 1430 IN6_IS_ADDR_MULTICAST(addr6) || 1431 IN6_IS_ADDR_LOOPBACK(addr6) || 1432 IN6_IS_ADDR_V4MAPPED(addr6)) { 1433 goto next; 1434 } 1435 /* Check for duplicate. */ 1436 if (sctp_lookup_faddr(sctp, addr6) != NULL) 1437 goto next; 1438 1439 err = sctp_add_faddr(sctp, 1440 (in6_addr_t *)(ph + 1), KM_NOSLEEP, 1441 B_FALSE); 1442 /* Something is wrong... Try the next one. */ 1443 if (err != 0) 1444 goto next; 1445 } 1446 } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) { 1447 if (sctp_options != NULL) 1448 *sctp_options |= SCTP_PRSCTP_OPTION; 1449 } /* else; skip */ 1450 1451 next: 1452 ph = sctp_next_parm(ph, &remaining); 1453 } 1454 if (check_saddr) { 1455 sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE : 1456 B_TRUE, hdrdaddr); 1457 } 1458 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1459 /* 1460 * We have the right address list now, update clustering's 1461 * knowledge because when we sent the INIT we had just added 1462 * the address the INIT was sent to. 1463 */ 1464 if (psctp == NULL && cl_sctp_assoc_change != NULL) { 1465 uchar_t *alist; 1466 size_t asize; 1467 uchar_t *dlist; 1468 size_t dsize; 1469 1470 asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 1471 alist = kmem_alloc(asize, KM_NOSLEEP); 1472 if (alist == NULL) { 1473 SCTP_KSTAT(sctps, sctp_cl_assoc_change); 1474 return (ENOMEM); 1475 } 1476 /* 1477 * Just include the address the INIT was sent to in the 1478 * delete list and send the entire faddr list. We could 1479 * do it differently (i.e include all the addresses in the 1480 * add list even if it contains the original address OR 1481 * remove the original address from the add list etc.), but 1482 * this seems reasonable enough. 1483 */ 1484 dsize = sizeof (in6_addr_t); 1485 dlist = kmem_alloc(dsize, KM_NOSLEEP); 1486 if (dlist == NULL) { 1487 kmem_free(alist, asize); 1488 SCTP_KSTAT(sctps, sctp_cl_assoc_change); 1489 return (ENOMEM); 1490 } 1491 bcopy(&curaddr, dlist, sizeof (curaddr)); 1492 sctp_get_faddr_list(sctp, alist, asize); 1493 (*cl_sctp_assoc_change)(connp->conn_family, alist, asize, 1494 sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR, 1495 (cl_sctp_handle_t)sctp); 1496 /* alist and dlist will be freed by the clustering module */ 1497 } 1498 return (0); 1499 } 1500 1501 /* 1502 * Returns 0 if the check failed and the restart should be refused, 1503 * 1 if the check succeeded. 1504 */ 1505 int 1506 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, 1507 int sleep, sctp_stack_t *sctps, ip_recv_attr_t *ira) 1508 { 1509 sctp_faddr_t *fp, *fphead = NULL; 1510 sctp_parm_hdr_t *ph; 1511 ssize_t remaining; 1512 int isv4; 1513 ipha_t *iph; 1514 ip6_t *ip6h; 1515 in6_addr_t hdraddr[1]; 1516 int retval = 0; 1517 sctp_tf_t *tf; 1518 sctp_t *sctp; 1519 int compres; 1520 sctp_init_chunk_t *init; 1521 int nadded = 0; 1522 1523 /* extract the address from the IP header */ 1524 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1525 if (isv4) { 1526 iph = (ipha_t *)pkt->b_rptr; 1527 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr); 1528 } else { 1529 ip6h = (ip6_t *)pkt->b_rptr; 1530 hdraddr[0] = ip6h->ip6_src; 1531 } 1532 1533 /* Walk the params in the INIT [ACK], pulling out addr params */ 1534 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1535 sizeof (sctp_init_chunk_t); 1536 if (remaining < sizeof (*ph)) { 1537 /* no parameters; restart OK */ 1538 return (1); 1539 } 1540 init = (sctp_init_chunk_t *)(ich + 1); 1541 ph = (sctp_parm_hdr_t *)(init + 1); 1542 1543 while (ph != NULL) { 1544 sctp_faddr_t *fpa = NULL; 1545 1546 /* params will have already been byteordered when validating */ 1547 if (ph->sph_type == htons(PARM_ADDR4)) { 1548 if (remaining >= PARM_ADDR4_LEN) { 1549 in6_addr_t addr; 1550 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1551 (ph + 1), &addr); 1552 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1553 sleep); 1554 if (fpa == NULL) { 1555 goto done; 1556 } 1557 bzero(fpa, sizeof (*fpa)); 1558 fpa->faddr = addr; 1559 fpa->next = NULL; 1560 } 1561 } else if (ph->sph_type == htons(PARM_ADDR6)) { 1562 if (remaining >= PARM_ADDR6_LEN) { 1563 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1564 sleep); 1565 if (fpa == NULL) { 1566 goto done; 1567 } 1568 bzero(fpa, sizeof (*fpa)); 1569 bcopy(ph + 1, &fpa->faddr, 1570 sizeof (fpa->faddr)); 1571 fpa->next = NULL; 1572 } 1573 } 1574 /* link in the new addr, if it was an addr param */ 1575 if (fpa != NULL) { 1576 if (fphead == NULL) { 1577 fphead = fpa; 1578 } else { 1579 fpa->next = fphead; 1580 fphead = fpa; 1581 } 1582 } 1583 1584 ph = sctp_next_parm(ph, &remaining); 1585 } 1586 1587 if (fphead == NULL) { 1588 /* no addr parameters; restart OK */ 1589 return (1); 1590 } 1591 1592 /* 1593 * got at least one; make sure the header's addr is 1594 * in the list 1595 */ 1596 fp = sctp_lookup_faddr_nosctp(fphead, hdraddr); 1597 if (fp == NULL) { 1598 /* not included; add it now */ 1599 fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); 1600 if (fp == NULL) { 1601 goto done; 1602 } 1603 bzero(fp, sizeof (*fp)); 1604 fp->faddr = *hdraddr; 1605 fp->next = fphead; 1606 fphead = fp; 1607 } 1608 1609 /* 1610 * Now, we can finally do the check: For each sctp instance 1611 * on the hash line for ports, compare its faddr set against 1612 * the new one. If the new one is a strict subset of any 1613 * existing sctp's faddrs, the restart is OK. However, if there 1614 * is an overlap, this could be an attack, so return failure. 1615 * If all sctp's faddrs are disjoint, this is a legitimate new 1616 * association. 1617 */ 1618 tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]); 1619 mutex_enter(&tf->tf_lock); 1620 1621 for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { 1622 if (ports != sctp->sctp_connp->conn_ports) { 1623 continue; 1624 } 1625 compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs); 1626 if (compres <= SCTP_ADDR_SUBSET) { 1627 retval = 1; 1628 mutex_exit(&tf->tf_lock); 1629 goto done; 1630 } 1631 if (compres == SCTP_ADDR_OVERLAP) { 1632 dprint(1, 1633 ("new assoc from %x:%x:%x:%x overlaps with %p\n", 1634 SCTP_PRINTADDR(*hdraddr), (void *)sctp)); 1635 /* 1636 * While we still hold the lock, we need to 1637 * figure out which addresses have been 1638 * added so we can include them in the abort 1639 * we will send back. Since these faddrs will 1640 * never be used, we overload the rto field 1641 * here, setting it to 0 if the address was 1642 * not added, 1 if it was added. 1643 */ 1644 for (fp = fphead; fp; fp = fp->next) { 1645 if (sctp_lookup_faddr(sctp, &fp->faddr)) { 1646 fp->rto = 0; 1647 } else { 1648 fp->rto = 1; 1649 nadded++; 1650 } 1651 } 1652 mutex_exit(&tf->tf_lock); 1653 goto done; 1654 } 1655 } 1656 mutex_exit(&tf->tf_lock); 1657 1658 /* All faddrs are disjoint; legit new association */ 1659 retval = 1; 1660 1661 done: 1662 /* If are attempted adds, send back an abort listing the addrs */ 1663 if (nadded > 0) { 1664 void *dtail; 1665 size_t dlen; 1666 1667 dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP); 1668 if (dtail == NULL) { 1669 goto cleanup; 1670 } 1671 1672 ph = dtail; 1673 dlen = 0; 1674 for (fp = fphead; fp; fp = fp->next) { 1675 if (fp->rto == 0) { 1676 continue; 1677 } 1678 if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1679 ipaddr_t addr4; 1680 1681 ph->sph_type = htons(PARM_ADDR4); 1682 ph->sph_len = htons(PARM_ADDR4_LEN); 1683 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 1684 ph++; 1685 bcopy(&addr4, ph, sizeof (addr4)); 1686 ph = (sctp_parm_hdr_t *) 1687 ((char *)ph + sizeof (addr4)); 1688 dlen += PARM_ADDR4_LEN; 1689 } else { 1690 ph->sph_type = htons(PARM_ADDR6); 1691 ph->sph_len = htons(PARM_ADDR6_LEN); 1692 ph++; 1693 bcopy(&fp->faddr, ph, sizeof (fp->faddr)); 1694 ph = (sctp_parm_hdr_t *) 1695 ((char *)ph + sizeof (fp->faddr)); 1696 dlen += PARM_ADDR6_LEN; 1697 } 1698 } 1699 1700 /* Send off the abort */ 1701 sctp_send_abort(sctp, sctp_init2vtag(ich), 1702 SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE, 1703 ira); 1704 1705 kmem_free(dtail, PARM_ADDR6_LEN * nadded); 1706 } 1707 1708 cleanup: 1709 /* Clean up */ 1710 if (fphead) { 1711 sctp_faddr_t *fpn; 1712 for (fp = fphead; fp; fp = fpn) { 1713 fpn = fp->next; 1714 if (fp->ixa != NULL) { 1715 ixa_refrele(fp->ixa); 1716 fp->ixa = NULL; 1717 } 1718 kmem_cache_free(sctp_kmem_faddr_cache, fp); 1719 } 1720 } 1721 1722 return (retval); 1723 } 1724 1725 /* 1726 * Reset any state related to transmitted chunks. 1727 */ 1728 void 1729 sctp_congest_reset(sctp_t *sctp) 1730 { 1731 sctp_faddr_t *fp; 1732 sctp_stack_t *sctps = sctp->sctp_sctps; 1733 mblk_t *mp; 1734 1735 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 1736 fp->ssthresh = sctps->sctps_initial_mtu; 1737 SET_CWND(fp, fp->sfa_pmss, sctps->sctps_slow_start_initial); 1738 fp->suna = 0; 1739 fp->pba = 0; 1740 } 1741 /* 1742 * Clean up the transmit list as well since we have reset accounting 1743 * on all the fps. Send event upstream, if required. 1744 */ 1745 while ((mp = sctp->sctp_xmit_head) != NULL) { 1746 sctp->sctp_xmit_head = mp->b_next; 1747 mp->b_next = NULL; 1748 if (sctp->sctp_xmit_head != NULL) 1749 sctp->sctp_xmit_head->b_prev = NULL; 1750 sctp_sendfail_event(sctp, mp, 0, B_TRUE); 1751 } 1752 sctp->sctp_xmit_head = NULL; 1753 sctp->sctp_xmit_tail = NULL; 1754 sctp->sctp_xmit_unacked = NULL; 1755 1756 sctp->sctp_unacked = 0; 1757 /* 1758 * Any control message as well. We will clean-up this list as well. 1759 * This contains any pending ASCONF request that we have queued/sent. 1760 * If we do get an ACK we will just drop it. However, given that 1761 * we are restarting chances are we aren't going to get any. 1762 */ 1763 if (sctp->sctp_cxmit_list != NULL) 1764 sctp_asconf_free_cxmit(sctp, NULL); 1765 sctp->sctp_cxmit_list = NULL; 1766 sctp->sctp_cchunk_pend = 0; 1767 1768 sctp->sctp_rexmitting = B_FALSE; 1769 sctp->sctp_rxt_nxttsn = 0; 1770 sctp->sctp_rxt_maxtsn = 0; 1771 1772 sctp->sctp_zero_win_probe = B_FALSE; 1773 } 1774 1775 static void 1776 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr, 1777 mblk_t *timer_mp) 1778 { 1779 sctp_stack_t *sctps = sctp->sctp_sctps; 1780 1781 ASSERT(fp->ixa != NULL); 1782 1783 bcopy(addr, &fp->faddr, sizeof (*addr)); 1784 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1785 fp->isv4 = 1; 1786 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 1787 fp->sfa_pmss = 1788 (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) & 1789 ~(SCTP_ALIGN - 1); 1790 fp->ixa->ixa_flags |= IXAF_IS_IPV4; 1791 } else { 1792 fp->isv4 = 0; 1793 fp->sfa_pmss = 1794 (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) & 1795 ~(SCTP_ALIGN - 1); 1796 fp->ixa->ixa_flags &= ~IXAF_IS_IPV4; 1797 } 1798 fp->cwnd = sctps->sctps_slow_start_initial * fp->sfa_pmss; 1799 fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_rto_max_init); 1800 SCTP_MAX_RTO(sctp, fp); 1801 fp->srtt = -1; 1802 fp->rtt_updates = 0; 1803 fp->strikes = 0; 1804 fp->max_retr = sctp->sctp_pp_max_rxt; 1805 /* Mark it as not confirmed. */ 1806 fp->state = SCTP_FADDRS_UNCONFIRMED; 1807 fp->hb_interval = sctp->sctp_hb_interval; 1808 fp->ssthresh = sctps->sctps_initial_ssthresh; 1809 fp->suna = 0; 1810 fp->pba = 0; 1811 fp->acked = 0; 1812 fp->lastactive = fp->hb_expiry = ddi_get_lbolt64(); 1813 fp->timer_mp = timer_mp; 1814 fp->hb_pending = B_FALSE; 1815 fp->hb_enabled = B_TRUE; 1816 fp->df = 1; 1817 fp->pmtu_discovered = 0; 1818 fp->next = NULL; 1819 fp->T3expire = 0; 1820 (void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret, 1821 sizeof (fp->hb_secret)); 1822 fp->rxt_unacked = 0; 1823 1824 sctp_get_dest(sctp, fp); 1825 } 1826 1827 /*ARGSUSED*/ 1828 static int 1829 faddr_constructor(void *buf, void *arg, int flags) 1830 { 1831 sctp_faddr_t *fp = buf; 1832 1833 fp->timer_mp = NULL; 1834 fp->timer_running = 0; 1835 1836 fp->rc_timer_mp = NULL; 1837 fp->rc_timer_running = 0; 1838 1839 return (0); 1840 } 1841 1842 /*ARGSUSED*/ 1843 static void 1844 faddr_destructor(void *buf, void *arg) 1845 { 1846 sctp_faddr_t *fp = buf; 1847 1848 ASSERT(fp->timer_mp == NULL); 1849 ASSERT(fp->timer_running == 0); 1850 1851 ASSERT(fp->rc_timer_mp == NULL); 1852 ASSERT(fp->rc_timer_running == 0); 1853 } 1854 1855 void 1856 sctp_faddr_init(void) 1857 { 1858 sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache", 1859 sizeof (sctp_faddr_t), 0, faddr_constructor, faddr_destructor, 1860 NULL, NULL, NULL, 0); 1861 } 1862 1863 void 1864 sctp_faddr_fini(void) 1865 { 1866 kmem_cache_destroy(sctp_kmem_faddr_cache); 1867 } 1868