1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/strsubr.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/kmem.h> 36 #include <sys/socket.h> 37 #include <sys/random.h> 38 #include <sys/tsol/tndb.h> 39 #include <sys/tsol/tnet.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 #include <netinet/sctp.h> 44 45 #include <inet/common.h> 46 #include <inet/ip.h> 47 #include <inet/ip6.h> 48 #include <inet/ip_ire.h> 49 #include <inet/mib2.h> 50 #include <inet/nd.h> 51 #include <inet/optcom.h> 52 #include <inet/sctp_ip.h> 53 #include <inet/ipclassifier.h> 54 55 #include "sctp_impl.h" 56 #include "sctp_addr.h" 57 58 static struct kmem_cache *sctp_kmem_faddr_cache; 59 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *); 60 61 /* Set the source address. Refer to comments in sctp_get_ire(). */ 62 static void 63 set_saddr(sctp_t *sctp, sctp_faddr_t *fp) 64 { 65 boolean_t v6 = !fp->isv4; 66 67 if (sctp->sctp_bound_to_all) { 68 V6_SET_ZERO(fp->saddr); 69 } else { 70 fp->saddr = sctp_get_valid_addr(sctp, v6); 71 if (!v6 && IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) || 72 v6 && IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 73 fp->state = SCTP_FADDRS_UNREACH; 74 /* Disable heartbeat. */ 75 fp->hb_expiry = 0; 76 fp->hb_pending = B_FALSE; 77 fp->strikes = 0; 78 } 79 } 80 } 81 82 /* 83 * Call this function to update the cached IRE of a peer addr fp. 84 */ 85 void 86 sctp_get_ire(sctp_t *sctp, sctp_faddr_t *fp) 87 { 88 ire_t *ire; 89 ipaddr_t addr4; 90 in6_addr_t laddr; 91 sctp_saddr_ipif_t *sp; 92 uint_t ipif_seqid; 93 int hdrlen; 94 ts_label_t *tsl; 95 96 /* Remove the previous cache IRE */ 97 if ((ire = fp->ire) != NULL) { 98 IRE_REFRELE_NOTR(ire); 99 fp->ire = NULL; 100 } 101 102 /* 103 * If this addr is not reachable, mark it as unconfirmed for now, the 104 * state will be changed back to unreachable later in this function 105 * if it is still the case. 106 */ 107 if (fp->state == SCTP_FADDRS_UNREACH) { 108 fp->state = SCTP_FADDRS_UNCONFIRMED; 109 } 110 111 tsl = crgetlabel(CONN_CRED(sctp->sctp_connp)); 112 113 if (fp->isv4) { 114 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 115 ire = ire_cache_lookup(addr4, sctp->sctp_zoneid, tsl); 116 if (ire != NULL) 117 IN6_IPADDR_TO_V4MAPPED(ire->ire_src_addr, &laddr); 118 } else { 119 ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid, tsl); 120 if (ire != NULL) 121 laddr = ire->ire_src_addr_v6; 122 } 123 124 if (ire == NULL) { 125 dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n", 126 SCTP_PRINTADDR(fp->faddr))); 127 /* 128 * It is tempting to just leave the src addr 129 * unspecified and let IP figure it out, but we 130 * *cannot* do this, since IP may choose a src addr 131 * that is not part of this association... unless 132 * this sctp has bound to all addrs. So if the ire 133 * lookup fails, try to find one in our src addr 134 * list, unless the sctp has bound to all addrs, in 135 * which case we change the src addr to unspec. 136 * 137 * Note that if this is a v6 endpoint but it does 138 * not have any v4 address at this point (e.g. may 139 * have been deleted), sctp_get_valid_addr() will 140 * return mapped INADDR_ANY. In this case, this 141 * address should be marked not reachable so that 142 * it won't be used to send data. 143 */ 144 set_saddr(sctp, fp); 145 if (fp->state == SCTP_FADDRS_UNREACH) 146 return; 147 goto check_current; 148 } 149 150 ipif_seqid = ire->ire_ipif->ipif_seqid; 151 dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ", 152 SCTP_PRINTADDR(fp->faddr))); 153 if (fp->isv4) { 154 dprint(2, ("src = %x\n", ire->ire_src_addr)); 155 } else { 156 dprint(2, ("src=%x:%x:%x:%x\n", 157 SCTP_PRINTADDR(ire->ire_src_addr_v6))); 158 } 159 160 /* Make sure the laddr is part of this association */ 161 if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) != NULL && 162 !sp->saddr_ipif_dontsrc) { 163 if (sp->saddr_ipif_unconfirmed == 1) 164 sp->saddr_ipif_unconfirmed = 0; 165 fp->saddr = laddr; 166 } else { 167 dprint(2, ("ire2faddr: src addr is not part of assc\n")); 168 169 /* 170 * Set the src to the first saddr and hope for the best. 171 * Note that we will still do the ire caching below. 172 * Otherwise, whenever we send a packet, we need to do 173 * the ire lookup again and still may not get the correct 174 * source address. Note that this case should very seldomly 175 * happen. One scenario this can happen is an app 176 * explicitly bind() to an address. But that address is 177 * not the preferred source address to send to the peer. 178 */ 179 set_saddr(sctp, fp); 180 if (fp->state == SCTP_FADDRS_UNREACH) { 181 IRE_REFRELE(ire); 182 return; 183 } 184 } 185 186 /* 187 * Note that ire_cache_lookup_*() returns an ire with the tracing 188 * bits enabled. This requires the thread holding the ire also 189 * do the IRE_REFRELE(). Thus we need to do IRE_REFHOLD_NOTR() 190 * and then IRE_REFRELE() the ire here to make the tracing bits 191 * work. 192 */ 193 IRE_REFHOLD_NOTR(ire); 194 IRE_REFRELE(ire); 195 196 /* Cache the IRE */ 197 fp->ire = ire; 198 if (fp->ire->ire_type == IRE_LOOPBACK && !sctp->sctp_loopback) 199 sctp->sctp_loopback = 1; 200 201 /* 202 * Pull out RTO information for this faddr and use it if we don't 203 * have any yet. 204 */ 205 if (fp->srtt == -1 && ire->ire_uinfo.iulp_rtt != 0) { 206 /* The cached value is in ms. */ 207 fp->srtt = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt); 208 fp->rttvar = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt_sd); 209 fp->rto = 3 * fp->srtt; 210 211 /* Bound the RTO by configured min and max values */ 212 if (fp->rto < sctp->sctp_rto_min) { 213 fp->rto = sctp->sctp_rto_min; 214 } 215 if (fp->rto > sctp->sctp_rto_max) { 216 fp->rto = sctp->sctp_rto_max; 217 } 218 } 219 220 /* 221 * Record the MTU for this faddr. If the MTU for this faddr has 222 * changed, check if the assc MTU will also change. 223 */ 224 if (fp->isv4) { 225 hdrlen = sctp->sctp_hdr_len; 226 } else { 227 hdrlen = sctp->sctp_hdr6_len; 228 } 229 if ((fp->sfa_pmss + hdrlen) != ire->ire_max_frag) { 230 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 231 fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1); 232 if (fp->cwnd < (fp->sfa_pmss * 2)) { 233 fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; 234 } 235 } 236 237 check_current: 238 if (fp == sctp->sctp_current) 239 sctp_set_faddr_current(sctp, fp); 240 } 241 242 void 243 sctp_update_ire(sctp_t *sctp) 244 { 245 ire_t *ire; 246 sctp_faddr_t *fp; 247 248 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 249 if ((ire = fp->ire) == NULL) 250 continue; 251 mutex_enter(&ire->ire_lock); 252 253 /* 254 * If the cached IRE is going away, there is no point to 255 * update it. 256 */ 257 if (ire->ire_marks & IRE_MARK_CONDEMNED) { 258 mutex_exit(&ire->ire_lock); 259 IRE_REFRELE_NOTR(ire); 260 fp->ire = NULL; 261 continue; 262 } 263 264 /* 265 * Only record the PMTU for this faddr if we actually have 266 * done discovery. This prevents initialized default from 267 * clobbering any real info that IP may have. 268 */ 269 if (fp->pmtu_discovered) { 270 if (fp->isv4) { 271 ire->ire_max_frag = fp->sfa_pmss + 272 sctp->sctp_hdr_len; 273 } else { 274 ire->ire_max_frag = fp->sfa_pmss + 275 sctp->sctp_hdr6_len; 276 } 277 } 278 279 if (sctp_rtt_updates != 0 && 280 fp->rtt_updates >= sctp_rtt_updates) { 281 /* 282 * If there is no old cached values, initialize them 283 * conservatively. Set them to be (1.5 * new value). 284 * This code copied from ip_ire_advise(). The cached 285 * value is in ms. 286 */ 287 if (ire->ire_uinfo.iulp_rtt != 0) { 288 ire->ire_uinfo.iulp_rtt = 289 (ire->ire_uinfo.iulp_rtt + 290 TICK_TO_MSEC(fp->srtt)) >> 1; 291 } else { 292 ire->ire_uinfo.iulp_rtt = 293 TICK_TO_MSEC(fp->srtt + (fp->srtt >> 1)); 294 } 295 if (ire->ire_uinfo.iulp_rtt_sd != 0) { 296 ire->ire_uinfo.iulp_rtt_sd = 297 (ire->ire_uinfo.iulp_rtt_sd + 298 TICK_TO_MSEC(fp->rttvar)) >> 1; 299 } else { 300 ire->ire_uinfo.iulp_rtt_sd = 301 TICK_TO_MSEC(fp->rttvar + 302 (fp->rttvar >> 1)); 303 } 304 fp->rtt_updates = 0; 305 } 306 mutex_exit(&ire->ire_lock); 307 } 308 } 309 310 /* 311 * The sender must set the total length in the IP header. 312 * If sendto == NULL, the current will be used. 313 */ 314 mblk_t * 315 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer) 316 { 317 mblk_t *mp; 318 size_t ipsctplen; 319 int isv4; 320 sctp_faddr_t *fp; 321 322 ASSERT(sctp->sctp_current != NULL || sendto != NULL); 323 if (sendto == NULL) { 324 fp = sctp->sctp_current; 325 } else { 326 fp = sendto; 327 } 328 isv4 = fp->isv4; 329 330 /* Try to look for another IRE again. */ 331 if (fp->ire == NULL) 332 sctp_get_ire(sctp, fp); 333 334 /* There is no suitable source address to use, return. */ 335 if (fp->state == SCTP_FADDRS_UNREACH) 336 return (NULL); 337 338 if (isv4) { 339 ipsctplen = sctp->sctp_hdr_len; 340 } else { 341 ipsctplen = sctp->sctp_hdr6_len; 342 } 343 344 mp = allocb_cred(ipsctplen + sctp_wroff_xtra + trailer, 345 CONN_CRED(sctp->sctp_connp)); 346 if (mp == NULL) { 347 ip1dbg(("sctp_make_mp: error making mp..\n")); 348 return (NULL); 349 } 350 mp->b_rptr += sctp_wroff_xtra; 351 mp->b_wptr = mp->b_rptr + ipsctplen; 352 353 ASSERT(OK_32PTR(mp->b_wptr)); 354 355 if (isv4) { 356 ipha_t *iph = (ipha_t *)mp->b_rptr; 357 358 bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen); 359 if (fp != sctp->sctp_current) { 360 /* fiddle with the dst addr */ 361 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 362 /* fix up src addr */ 363 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 364 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 365 iph->ipha_src); 366 } else if (sctp->sctp_bound_to_all) { 367 iph->ipha_src = INADDR_ANY; 368 } 369 } 370 /* set or clear the don't fragment bit */ 371 if (fp->df) { 372 iph->ipha_fragment_offset_and_flags = htons(IPH_DF); 373 } else { 374 iph->ipha_fragment_offset_and_flags = 0; 375 } 376 } else { 377 bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen); 378 if (fp != sctp->sctp_current) { 379 /* fiddle with the dst addr */ 380 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 381 /* fix up src addr */ 382 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 383 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 384 } else if (sctp->sctp_bound_to_all) { 385 bzero(&((ip6_t *)(mp->b_rptr))->ip6_src, 386 sizeof (in6_addr_t)); 387 } 388 } 389 } 390 ASSERT(sctp->sctp_connp != NULL); 391 392 /* 393 * IP will not free this IRE if it is condemned. SCTP needs to 394 * free it. 395 */ 396 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 397 IRE_REFRELE_NOTR(fp->ire); 398 fp->ire = NULL; 399 } 400 /* Stash the conn and ire ptr info. for IP */ 401 SCTP_STASH_IPINFO(mp, fp->ire); 402 403 return (mp); 404 } 405 406 /* 407 * Notify upper layers about preferred write offset, write size. 408 */ 409 void 410 sctp_set_ulp_prop(sctp_t *sctp) 411 { 412 int hdrlen; 413 414 if (sctp->sctp_current->isv4) { 415 hdrlen = sctp->sctp_hdr_len; 416 } else { 417 hdrlen = sctp->sctp_hdr6_len; 418 } 419 ASSERT(sctp->sctp_ulpd); 420 421 ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss); 422 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 423 sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 424 sctp->sctp_mss - sizeof (sctp_data_hdr_t)); 425 } 426 427 void 428 sctp_set_iplen(sctp_t *sctp, mblk_t *mp) 429 { 430 uint16_t sum = 0; 431 ipha_t *iph; 432 ip6_t *ip6h; 433 mblk_t *pmp = mp; 434 boolean_t isv4; 435 436 isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); 437 for (; pmp; pmp = pmp->b_cont) 438 sum += pmp->b_wptr - pmp->b_rptr; 439 440 if (isv4) { 441 iph = (ipha_t *)mp->b_rptr; 442 iph->ipha_length = htons(sum); 443 } else { 444 ip6h = (ip6_t *)mp->b_rptr; 445 /* 446 * If an ip6i_t is present, the real IPv6 header 447 * immediately follows. 448 */ 449 if (ip6h->ip6_nxt == IPPROTO_RAW) 450 ip6h = (ip6_t *)&ip6h[1]; 451 ip6h->ip6_plen = htons(sum - ((char *)&sctp->sctp_ip6h[1] - 452 sctp->sctp_iphc6)); 453 } 454 } 455 456 int 457 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2) 458 { 459 int na1 = 0; 460 int overlap = 0; 461 int equal = 1; 462 int onematch; 463 sctp_faddr_t *fp1, *fp2; 464 465 for (fp1 = a1; fp1; fp1 = fp1->next) { 466 onematch = 0; 467 for (fp2 = a2; fp2; fp2 = fp2->next) { 468 if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) { 469 overlap++; 470 onematch = 1; 471 break; 472 } 473 if (!onematch) { 474 equal = 0; 475 } 476 } 477 na1++; 478 } 479 480 if (equal) { 481 return (SCTP_ADDR_EQUAL); 482 } 483 if (overlap == na1) { 484 return (SCTP_ADDR_SUBSET); 485 } 486 if (overlap) { 487 return (SCTP_ADDR_OVERLAP); 488 } 489 return (SCTP_ADDR_DISJOINT); 490 } 491 492 /* 493 * Returns 0 on success, -1 on memory allocation failure. If sleep 494 * is true, this function should never fail. The boolean parameter 495 * first decides whether the newly created faddr structure should be 496 * added at the beginning of the list or at the end. 497 * 498 * Note: caller must hold conn fanout lock. 499 */ 500 int 501 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first) 502 { 503 sctp_faddr_t *faddr; 504 mblk_t *timer_mp; 505 506 if (is_system_labeled()) { 507 ts_label_t *tsl; 508 tsol_tpc_t *rhtp; 509 int retv; 510 511 tsl = crgetlabel(CONN_CRED(sctp->sctp_connp)); 512 ASSERT(tsl != NULL); 513 514 /* find_tpc automatically does the right thing with IPv4 */ 515 rhtp = find_tpc(addr, IPV6_VERSION, B_FALSE); 516 if (rhtp == NULL) 517 return (EACCES); 518 519 retv = EACCES; 520 if (tsl->tsl_doi == rhtp->tpc_tp.tp_doi) { 521 switch (rhtp->tpc_tp.host_type) { 522 case UNLABELED: 523 /* 524 * Can talk to unlabeled hosts if any of the 525 * following are true: 526 * 1. zone's label matches the remote host's 527 * default label, 528 * 2. mac_exempt is on and the zone dominates 529 * the remote host's label, or 530 * 3. mac_exempt is on and the socket is from 531 * the global zone. 532 */ 533 if (blequal(&rhtp->tpc_tp.tp_def_label, 534 &tsl->tsl_label) || 535 (sctp->sctp_mac_exempt && 536 (sctp->sctp_zoneid == GLOBAL_ZONEID || 537 bldominates(&tsl->tsl_label, 538 &rhtp->tpc_tp.tp_def_label)))) 539 retv = 0; 540 break; 541 case SUN_CIPSO: 542 if (_blinrange(&tsl->tsl_label, 543 &rhtp->tpc_tp.tp_sl_range_cipso) || 544 blinlset(&tsl->tsl_label, 545 rhtp->tpc_tp.tp_sl_set_cipso)) 546 retv = 0; 547 break; 548 } 549 } 550 TPC_RELE(rhtp); 551 if (retv != 0) 552 return (retv); 553 } 554 555 if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) 556 return (ENOMEM); 557 timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer); 558 if (timer_mp == NULL) { 559 kmem_cache_free(sctp_kmem_faddr_cache, faddr); 560 return (ENOMEM); 561 } 562 ((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr; 563 564 sctp_init_faddr(sctp, faddr, addr, timer_mp); 565 ASSERT(faddr->next == NULL); 566 567 if (sctp->sctp_faddrs == NULL) { 568 ASSERT(sctp->sctp_lastfaddr == NULL); 569 /* only element on list; first and last are same */ 570 sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr; 571 } else if (first) { 572 ASSERT(sctp->sctp_lastfaddr != NULL); 573 faddr->next = sctp->sctp_faddrs; 574 sctp->sctp_faddrs = faddr; 575 } else { 576 sctp->sctp_lastfaddr->next = faddr; 577 sctp->sctp_lastfaddr = faddr; 578 } 579 sctp->sctp_nfaddrs++; 580 581 return (0); 582 } 583 584 sctp_faddr_t * 585 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr) 586 { 587 sctp_faddr_t *fp; 588 589 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 590 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) 591 break; 592 } 593 594 return (fp); 595 } 596 597 sctp_faddr_t * 598 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr) 599 { 600 for (; fp; fp = fp->next) { 601 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) { 602 break; 603 } 604 } 605 606 return (fp); 607 } 608 609 /* 610 * To change the currently used peer address to the specified one. 611 */ 612 void 613 sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp) 614 { 615 /* Now setup the composite header. */ 616 if (fp->isv4) { 617 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 618 sctp->sctp_ipha->ipha_dst); 619 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src); 620 /* update don't fragment bit */ 621 if (fp->df) { 622 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 623 htons(IPH_DF); 624 } else { 625 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0; 626 } 627 } else { 628 sctp->sctp_ip6h->ip6_dst = fp->faddr; 629 sctp->sctp_ip6h->ip6_src = fp->saddr; 630 } 631 632 sctp->sctp_current = fp; 633 sctp->sctp_mss = fp->sfa_pmss; 634 635 /* Update the uppper layer for the change. */ 636 if (!SCTP_IS_DETACHED(sctp)) 637 sctp_set_ulp_prop(sctp); 638 } 639 640 void 641 sctp_redo_faddr_srcs(sctp_t *sctp) 642 { 643 sctp_faddr_t *fp; 644 645 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 646 sctp_get_ire(sctp, fp); 647 } 648 } 649 650 void 651 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) 652 { 653 int64_t now = lbolt64; 654 655 fp->strikes = 0; 656 sctp->sctp_strikes = 0; 657 fp->lastactive = now; 658 fp->hb_expiry = now + SET_HB_INTVL(fp); 659 fp->hb_pending = B_FALSE; 660 if (fp->state != SCTP_FADDRS_ALIVE) { 661 fp->state = SCTP_FADDRS_ALIVE; 662 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0); 663 664 /* 665 * If this is the primary, switch back to it now. And 666 * we probably want to reset the source addr used to reach 667 * it. 668 */ 669 if (fp == sctp->sctp_primary) { 670 sctp_set_faddr_current(sctp, fp); 671 sctp_get_ire(sctp, fp); 672 return; 673 } 674 } 675 if (fp->ire == NULL) { 676 /* Should have a full IRE now */ 677 sctp_get_ire(sctp, fp); 678 } 679 } 680 681 int 682 sctp_is_a_faddr_clean(sctp_t *sctp) 683 { 684 sctp_faddr_t *fp; 685 686 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 687 if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) { 688 return (1); 689 } 690 } 691 692 return (0); 693 } 694 695 /* 696 * Returns 0 if there is at leave one other active faddr, -1 if there 697 * are none. If there are none left, faddr_dead() will start killing the 698 * association. 699 * If the downed faddr was the current faddr, a new current faddr 700 * will be chosen. 701 */ 702 int 703 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) 704 { 705 sctp_faddr_t *ofp; 706 707 if (fp->state == SCTP_FADDRS_ALIVE) { 708 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0); 709 } 710 fp->state = newstate; 711 712 dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n", 713 SCTP_PRINTADDR(fp->faddr), newstate)); 714 715 if (fp == sctp->sctp_current) { 716 /* Current faddr down; need to switch it */ 717 sctp->sctp_current = NULL; 718 } 719 720 /* Find next alive faddr */ 721 ofp = fp; 722 for (fp = fp->next; fp != NULL; fp = fp->next) { 723 if (fp->state == SCTP_FADDRS_ALIVE) { 724 break; 725 } 726 } 727 728 if (fp == NULL) { 729 /* Continue from beginning of list */ 730 for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) { 731 if (fp->state == SCTP_FADDRS_ALIVE) { 732 break; 733 } 734 } 735 } 736 737 /* 738 * Find a new fp, so if the current faddr is dead, use the new fp 739 * as the current one. 740 */ 741 if (fp != ofp) { 742 if (sctp->sctp_current == NULL) { 743 dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n", 744 SCTP_PRINTADDR(fp->faddr))); 745 /* 746 * Note that we don't need to reset the source addr 747 * of the new fp. 748 */ 749 sctp_set_faddr_current(sctp, fp); 750 } 751 return (0); 752 } 753 754 755 /* All faddrs are down; kill the association */ 756 dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); 757 BUMP_MIB(&sctp_mib, sctpAborted); 758 sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? 759 SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); 760 sctp_clean_death(sctp, sctp->sctp_client_errno ? 761 sctp->sctp_client_errno : ETIMEDOUT); 762 763 return (-1); 764 } 765 766 sctp_faddr_t * 767 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp) 768 { 769 sctp_faddr_t *nfp = NULL; 770 771 if (ofp == NULL) { 772 ofp = sctp->sctp_current; 773 } 774 775 /* Find the next live one */ 776 for (nfp = ofp->next; nfp != NULL; nfp = nfp->next) { 777 if (nfp->state == SCTP_FADDRS_ALIVE) { 778 break; 779 } 780 } 781 782 if (nfp == NULL) { 783 /* Continue from beginning of list */ 784 for (nfp = sctp->sctp_faddrs; nfp != ofp; nfp = nfp->next) { 785 if (nfp->state == SCTP_FADDRS_ALIVE) { 786 break; 787 } 788 } 789 } 790 791 /* 792 * nfp could only be NULL if all faddrs are down, and when 793 * this happens, faddr_dead() should have killed the 794 * association. Hence this assertion... 795 */ 796 ASSERT(nfp != NULL); 797 return (nfp); 798 } 799 800 void 801 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp) 802 { 803 sctp_faddr_t *fpp; 804 805 if (!sctp->sctp_faddrs) { 806 return; 807 } 808 809 if (fp->timer_mp != NULL) { 810 sctp_timer_free(fp->timer_mp); 811 fp->timer_mp = NULL; 812 fp->timer_running = 0; 813 } 814 if (fp->rc_timer_mp != NULL) { 815 sctp_timer_free(fp->rc_timer_mp); 816 fp->rc_timer_mp = NULL; 817 fp->rc_timer_running = 0; 818 } 819 if (fp->ire != NULL) { 820 IRE_REFRELE_NOTR(fp->ire); 821 fp->ire = NULL; 822 } 823 824 if (fp == sctp->sctp_faddrs) { 825 goto gotit; 826 } 827 828 for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next) 829 ; 830 831 gotit: 832 ASSERT(sctp->sctp_conn_tfp != NULL); 833 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 834 if (fp == sctp->sctp_faddrs) { 835 sctp->sctp_faddrs = fp->next; 836 } else { 837 fpp->next = fp->next; 838 } 839 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 840 /* XXX faddr2ire? */ 841 kmem_cache_free(sctp_kmem_faddr_cache, fp); 842 sctp->sctp_nfaddrs--; 843 } 844 845 void 846 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock) 847 { 848 sctp_faddr_t *fp, *fpn; 849 850 if (sctp->sctp_faddrs == NULL) { 851 ASSERT(sctp->sctp_lastfaddr == NULL); 852 return; 853 } 854 855 ASSERT(sctp->sctp_lastfaddr != NULL); 856 sctp->sctp_lastfaddr = NULL; 857 sctp->sctp_current = NULL; 858 sctp->sctp_primary = NULL; 859 860 sctp_free_faddr_timers(sctp); 861 862 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 863 /* in conn fanout; need to hold lock */ 864 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 865 } 866 867 for (fp = sctp->sctp_faddrs; fp; fp = fpn) { 868 fpn = fp->next; 869 if (fp->ire != NULL) 870 IRE_REFRELE_NOTR(fp->ire); 871 kmem_cache_free(sctp_kmem_faddr_cache, fp); 872 sctp->sctp_nfaddrs--; 873 } 874 875 sctp->sctp_faddrs = NULL; 876 ASSERT(sctp->sctp_nfaddrs == 0); 877 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 878 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 879 } 880 881 } 882 883 void 884 sctp_zap_addrs(sctp_t *sctp) 885 { 886 sctp_zap_faddrs(sctp, 0); 887 sctp_free_saddrs(sctp); 888 } 889 890 /* 891 * Initialize the IPv4 header. Loses any record of any IP options. 892 */ 893 int 894 sctp_header_init_ipv4(sctp_t *sctp, int sleep) 895 { 896 sctp_hdr_t *sctph; 897 898 /* 899 * This is a simple initialization. If there's 900 * already a template, it should never be too small, 901 * so reuse it. Otherwise, allocate space for the new one. 902 */ 903 if (sctp->sctp_iphc != NULL) { 904 ASSERT(sctp->sctp_iphc_len >= SCTP_MAX_COMBINED_HEADER_LENGTH); 905 bzero(sctp->sctp_iphc, sctp->sctp_iphc_len); 906 } else { 907 sctp->sctp_iphc_len = SCTP_MAX_COMBINED_HEADER_LENGTH; 908 sctp->sctp_iphc = kmem_zalloc(sctp->sctp_iphc_len, sleep); 909 if (sctp->sctp_iphc == NULL) { 910 sctp->sctp_iphc_len = 0; 911 return (ENOMEM); 912 } 913 } 914 915 sctp->sctp_ipha = (ipha_t *)sctp->sctp_iphc; 916 917 sctp->sctp_hdr_len = sizeof (ipha_t) + sizeof (sctp_hdr_t); 918 sctp->sctp_ip_hdr_len = sizeof (ipha_t); 919 sctp->sctp_ipha->ipha_length = htons(sizeof (ipha_t) + 920 sizeof (sctp_hdr_t)); 921 sctp->sctp_ipha->ipha_version_and_hdr_length 922 = (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS; 923 924 /* 925 * These two fields should be zero, and are already set above. 926 * 927 * sctp->sctp_ipha->ipha_ident, 928 * sctp->sctp_ipha->ipha_fragment_offset_and_flags. 929 */ 930 931 sctp->sctp_ipha->ipha_ttl = sctp_ipv4_ttl; 932 sctp->sctp_ipha->ipha_protocol = IPPROTO_SCTP; 933 934 sctph = (sctp_hdr_t *)(sctp->sctp_iphc + sizeof (ipha_t)); 935 sctp->sctp_sctph = sctph; 936 937 return (0); 938 } 939 940 /* 941 * Update sctp_sticky_hdrs based on sctp_sticky_ipp. 942 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 943 * headers, and the maximum size sctp header (to avoid reallocation 944 * on the fly for additional sctp options). 945 * Returns failure if can't allocate memory. 946 */ 947 int 948 sctp_build_hdrs(sctp_t *sctp) 949 { 950 char *hdrs; 951 uint_t hdrs_len; 952 ip6i_t *ip6i; 953 char buf[SCTP_MAX_HDR_LENGTH]; 954 ip6_pkt_t *ipp = &sctp->sctp_sticky_ipp; 955 in6_addr_t src; 956 in6_addr_t dst; 957 958 /* 959 * save the existing sctp header and source/dest IP addresses 960 */ 961 bcopy(sctp->sctp_sctph6, buf, sizeof (sctp_hdr_t)); 962 src = sctp->sctp_ip6h->ip6_src; 963 dst = sctp->sctp_ip6h->ip6_dst; 964 hdrs_len = ip_total_hdrs_len_v6(ipp) + SCTP_MAX_HDR_LENGTH; 965 ASSERT(hdrs_len != 0); 966 if (hdrs_len > sctp->sctp_iphc6_len) { 967 /* Need to reallocate */ 968 hdrs = kmem_zalloc(hdrs_len, KM_NOSLEEP); 969 if (hdrs == NULL) 970 return (ENOMEM); 971 972 if (sctp->sctp_iphc6_len != 0) 973 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 974 sctp->sctp_iphc6 = hdrs; 975 sctp->sctp_iphc6_len = hdrs_len; 976 } 977 ip_build_hdrs_v6((uchar_t *)sctp->sctp_iphc6, 978 hdrs_len - SCTP_MAX_HDR_LENGTH, ipp, IPPROTO_SCTP); 979 980 /* Set header fields not in ipp */ 981 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 982 ip6i = (ip6i_t *)sctp->sctp_iphc6; 983 sctp->sctp_ip6h = (ip6_t *)&ip6i[1]; 984 } else { 985 sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6; 986 } 987 /* 988 * sctp->sctp_ip_hdr_len will include ip6i_t if there is one. 989 */ 990 sctp->sctp_ip_hdr6_len = hdrs_len - SCTP_MAX_HDR_LENGTH; 991 sctp->sctp_sctph6 = (sctp_hdr_t *)(sctp->sctp_iphc6 + 992 sctp->sctp_ip_hdr6_len); 993 sctp->sctp_hdr6_len = sctp->sctp_ip_hdr6_len + sizeof (sctp_hdr_t); 994 995 bcopy(buf, sctp->sctp_sctph6, sizeof (sctp_hdr_t)); 996 997 sctp->sctp_ip6h->ip6_src = src; 998 sctp->sctp_ip6h->ip6_dst = dst; 999 /* 1000 * If the hoplimit was not set by ip_build_hdrs_v6(), we need to 1001 * set it to the default value for SCTP. 1002 */ 1003 if (!(ipp->ipp_fields & IPPF_UNICAST_HOPS)) 1004 sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; 1005 /* 1006 * If we're setting extension headers after a connection 1007 * has been established, and if we have a routing header 1008 * among the extension headers, call ip_massage_options_v6 to 1009 * manipulate the routing header/ip6_dst set the checksum 1010 * difference in the sctp header template. 1011 * (This happens in sctp_connect_ipv6 if the routing header 1012 * is set prior to the connect.) 1013 */ 1014 1015 if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) && 1016 (sctp->sctp_sticky_ipp.ipp_fields & IPPF_RTHDR)) { 1017 ip6_rthdr_t *rth; 1018 1019 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 1020 (uint8_t *)sctp->sctp_sctph6); 1021 if (rth != NULL) 1022 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); 1023 } 1024 return (0); 1025 } 1026 1027 /* 1028 * Initialize the IPv6 header. Loses any record of any IPv6 extension headers. 1029 */ 1030 int 1031 sctp_header_init_ipv6(sctp_t *sctp, int sleep) 1032 { 1033 sctp_hdr_t *sctph; 1034 1035 /* 1036 * This is a simple initialization. If there's 1037 * already a template, it should never be too small, 1038 * so reuse it. Otherwise, allocate space for the new one. 1039 * Ensure that there is enough space to "downgrade" the sctp_t 1040 * to an IPv4 sctp_t. This requires having space for a full load 1041 * of IPv4 options 1042 */ 1043 if (sctp->sctp_iphc6 != NULL) { 1044 ASSERT(sctp->sctp_iphc6_len >= 1045 SCTP_MAX_COMBINED_HEADER_LENGTH); 1046 bzero(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 1047 } else { 1048 sctp->sctp_iphc6_len = SCTP_MAX_COMBINED_HEADER_LENGTH; 1049 sctp->sctp_iphc6 = kmem_zalloc(sctp->sctp_iphc_len, sleep); 1050 if (sctp->sctp_iphc6 == NULL) { 1051 sctp->sctp_iphc6_len = 0; 1052 return (ENOMEM); 1053 } 1054 } 1055 sctp->sctp_hdr6_len = IPV6_HDR_LEN + sizeof (sctp_hdr_t); 1056 sctp->sctp_ip_hdr6_len = IPV6_HDR_LEN; 1057 sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6; 1058 1059 /* Initialize the header template */ 1060 1061 sctp->sctp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1062 sctp->sctp_ip6h->ip6_plen = ntohs(sizeof (sctp_hdr_t)); 1063 sctp->sctp_ip6h->ip6_nxt = IPPROTO_SCTP; 1064 sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; 1065 1066 sctph = (sctp_hdr_t *)(sctp->sctp_iphc6 + IPV6_HDR_LEN); 1067 sctp->sctp_sctph6 = sctph; 1068 1069 return (0); 1070 } 1071 1072 static int 1073 sctp_v4_label(sctp_t *sctp) 1074 { 1075 uchar_t optbuf[IP_MAX_OPT_LENGTH]; 1076 const cred_t *cr = CONN_CRED(sctp->sctp_connp); 1077 int added; 1078 1079 if (tsol_compute_label(cr, sctp->sctp_ipha->ipha_dst, optbuf, 1080 sctp->sctp_mac_exempt) != 0) 1081 return (EACCES); 1082 1083 added = tsol_remove_secopt(sctp->sctp_ipha, sctp->sctp_hdr_len); 1084 if (added == -1) 1085 return (EACCES); 1086 sctp->sctp_hdr_len += added; 1087 sctp->sctp_sctph = (sctp_hdr_t *)((uchar_t *)sctp->sctp_sctph + added); 1088 sctp->sctp_ip_hdr_len += added; 1089 if ((sctp->sctp_v4label_len = optbuf[IPOPT_OLEN]) != 0) { 1090 sctp->sctp_v4label_len = (sctp->sctp_v4label_len + 3) & ~3; 1091 added = tsol_prepend_option(optbuf, sctp->sctp_ipha, 1092 sctp->sctp_hdr_len); 1093 if (added == -1) 1094 return (EACCES); 1095 sctp->sctp_hdr_len += added; 1096 sctp->sctp_sctph = (sctp_hdr_t *)((uchar_t *)sctp->sctp_sctph + 1097 added); 1098 sctp->sctp_ip_hdr_len += added; 1099 } 1100 return (0); 1101 } 1102 1103 static int 1104 sctp_v6_label(sctp_t *sctp) 1105 { 1106 uchar_t optbuf[TSOL_MAX_IPV6_OPTION]; 1107 const cred_t *cr = CONN_CRED(sctp->sctp_connp); 1108 1109 if (tsol_compute_label_v6(cr, &sctp->sctp_ip6h->ip6_dst, optbuf, 1110 sctp->sctp_mac_exempt) != 0) 1111 return (EACCES); 1112 if (tsol_update_sticky(&sctp->sctp_sticky_ipp, &sctp->sctp_v6label_len, 1113 optbuf) != 0) 1114 return (EACCES); 1115 if (sctp_build_hdrs(sctp) != 0) 1116 return (EACCES); 1117 return (0); 1118 } 1119 1120 /* 1121 * XXX implement more sophisticated logic 1122 */ 1123 int 1124 sctp_set_hdraddrs(sctp_t *sctp) 1125 { 1126 sctp_faddr_t *fp; 1127 int gotv4 = 0; 1128 int gotv6 = 0; 1129 1130 ASSERT(sctp->sctp_faddrs != NULL); 1131 ASSERT(sctp->sctp_nsaddrs > 0); 1132 1133 /* Set up using the primary first */ 1134 if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) { 1135 IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->faddr, 1136 sctp->sctp_ipha->ipha_dst); 1137 /* saddr may be unspec; make_mp() will handle this */ 1138 IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->saddr, 1139 sctp->sctp_ipha->ipha_src); 1140 if (!is_system_labeled() || sctp_v4_label(sctp) == 0) { 1141 gotv4 = 1; 1142 if (sctp->sctp_ipversion == IPV4_VERSION) { 1143 goto copyports; 1144 } 1145 } 1146 } else { 1147 sctp->sctp_ip6h->ip6_dst = sctp->sctp_primary->faddr; 1148 /* saddr may be unspec; make_mp() will handle this */ 1149 sctp->sctp_ip6h->ip6_src = sctp->sctp_primary->saddr; 1150 if (!is_system_labeled() || sctp_v6_label(sctp) == 0) 1151 gotv6 = 1; 1152 } 1153 1154 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1155 if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1156 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 1157 sctp->sctp_ipha->ipha_dst); 1158 /* copy in the faddr_t's saddr */ 1159 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 1160 sctp->sctp_ipha->ipha_src); 1161 if (!is_system_labeled() || sctp_v4_label(sctp) == 0) { 1162 gotv4 = 1; 1163 if (sctp->sctp_ipversion == IPV4_VERSION || 1164 gotv6) { 1165 break; 1166 } 1167 } 1168 } else if (!gotv6) { 1169 sctp->sctp_ip6h->ip6_dst = fp->faddr; 1170 /* copy in the faddr_t's saddr */ 1171 sctp->sctp_ip6h->ip6_src = fp->saddr; 1172 if (!is_system_labeled() || sctp_v6_label(sctp) == 0) { 1173 gotv6 = 1; 1174 if (gotv4) 1175 break; 1176 } 1177 } 1178 } 1179 1180 copyports: 1181 if (!gotv4 && !gotv6) 1182 return (EACCES); 1183 1184 /* copy in the ports for good measure */ 1185 sctp->sctp_sctph->sh_sport = sctp->sctp_lport; 1186 sctp->sctp_sctph->sh_dport = sctp->sctp_fport; 1187 1188 sctp->sctp_sctph6->sh_sport = sctp->sctp_lport; 1189 sctp->sctp_sctph6->sh_dport = sctp->sctp_fport; 1190 return (0); 1191 } 1192 1193 void 1194 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp) 1195 { 1196 mblk_t *mp; 1197 sctp_parm_hdr_t *ph; 1198 size_t len; 1199 int pad; 1200 1201 len = sizeof (*ph) + ntohs(uph->sph_len); 1202 if ((pad = len % 4) != 0) { 1203 pad = 4 - pad; 1204 len += pad; 1205 } 1206 mp = allocb(len, BPRI_MED); 1207 if (mp == NULL) { 1208 return; 1209 } 1210 1211 ph = (sctp_parm_hdr_t *)(mp->b_rptr); 1212 ph->sph_type = htons(PARM_UNRECOGNIZED); 1213 ph->sph_len = htons(len - pad); 1214 1215 /* copy in the unrecognized parameter */ 1216 bcopy(uph, ph + 1, ntohs(uph->sph_len)); 1217 1218 mp->b_wptr = mp->b_rptr + len; 1219 if (*errmp != NULL) { 1220 linkb(*errmp, mp); 1221 } else { 1222 *errmp = mp; 1223 } 1224 } 1225 1226 /* 1227 * o Bounds checking 1228 * o Updates remaining 1229 * o Checks alignment 1230 */ 1231 sctp_parm_hdr_t * 1232 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining) 1233 { 1234 int pad; 1235 uint16_t len; 1236 1237 len = ntohs(current->sph_len); 1238 *remaining -= len; 1239 if (*remaining < sizeof (*current) || len < sizeof (*current)) { 1240 return (NULL); 1241 } 1242 if ((pad = len & (SCTP_ALIGN - 1)) != 0) { 1243 pad = SCTP_ALIGN - pad; 1244 *remaining -= pad; 1245 } 1246 /*LINTED pointer cast may result in improper alignment*/ 1247 current = (sctp_parm_hdr_t *)((char *)current + len + pad); 1248 return (current); 1249 } 1250 1251 /* 1252 * Sets the address parameters given in the INIT chunk into sctp's 1253 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are 1254 * no address parameters in the INIT chunk, a single faddr is created 1255 * from the ip hdr at the beginning of pkt. 1256 * If there already are existing addresses hanging from sctp, merge 1257 * them in, if the old info contains addresses which are not present 1258 * in this new info, get rid of them, and clean the pointers if there's 1259 * messages which have this as their target address. 1260 * 1261 * We also re-adjust the source address list here since the list may 1262 * contain more than what is actually part of the association. If 1263 * we get here from sctp_send_cookie_echo(), we are on the active 1264 * side and psctp will be NULL and ich will be the INIT-ACK chunk. 1265 * If we get here from sctp_accept_comm(), ich will be the INIT chunk 1266 * and psctp will the listening endpoint. 1267 * 1268 * INIT processing: When processing the INIT we inherit the src address 1269 * list from the listener. For a loopback or linklocal association, we 1270 * delete the list and just take the address from the IP header (since 1271 * that's how we created the INIT-ACK). Additionally, for loopback we 1272 * ignore the address params in the INIT. For determining which address 1273 * types were sent in the INIT-ACK we follow the same logic as in 1274 * creating the INIT-ACK. We delete addresses of the type that are not 1275 * supported by the peer. 1276 * 1277 * INIT-ACK processing: When processing the INIT-ACK since we had not 1278 * included addr params for loopback or linklocal addresses when creating 1279 * the INIT, we just use the address from the IP header. Further, for 1280 * loopback we ignore the addr param list. We mark addresses of the 1281 * type not supported by the peer as unconfirmed. 1282 * 1283 * In case of INIT processing we look for supported address types in the 1284 * supported address param, if present. In both cases the address type in 1285 * the IP header is supported as well as types for addresses in the param 1286 * list, if any. 1287 * 1288 * Once we have the supported address types sctp_check_saddr() runs through 1289 * the source address list and deletes or marks as unconfirmed address of 1290 * types not supported by the peer. 1291 * 1292 * Returns 0 on success, sys errno on failure 1293 */ 1294 int 1295 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, 1296 sctp_chunk_hdr_t *ich, uint_t *sctp_options) 1297 { 1298 sctp_init_chunk_t *init; 1299 ipha_t *iph; 1300 ip6_t *ip6h; 1301 in6_addr_t hdrsaddr[1]; 1302 in6_addr_t hdrdaddr[1]; 1303 sctp_parm_hdr_t *ph; 1304 ssize_t remaining; 1305 int isv4; 1306 int err; 1307 sctp_faddr_t *fp; 1308 int supp_af = 0; 1309 boolean_t check_saddr = B_TRUE; 1310 in6_addr_t curaddr; 1311 1312 if (sctp_options != NULL) 1313 *sctp_options = 0; 1314 1315 /* extract the address from the IP header */ 1316 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1317 if (isv4) { 1318 iph = (ipha_t *)pkt->b_rptr; 1319 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr); 1320 IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr); 1321 supp_af |= PARM_SUPP_V4; 1322 } else { 1323 ip6h = (ip6_t *)pkt->b_rptr; 1324 hdrsaddr[0] = ip6h->ip6_src; 1325 hdrdaddr[0] = ip6h->ip6_dst; 1326 supp_af |= PARM_SUPP_V6; 1327 } 1328 1329 /* 1330 * Unfortunately, we can't delay this because adding an faddr 1331 * looks for the presence of the source address (from the ire 1332 * for the faddr) in the source address list. We could have 1333 * delayed this if, say, this was a loopback/linklocal connection. 1334 * Now, we just end up nuking this list and taking the addr from 1335 * the IP header for loopback/linklocal. 1336 */ 1337 if (psctp != NULL && psctp->sctp_nsaddrs > 0) { 1338 ASSERT(sctp->sctp_nsaddrs == 0); 1339 1340 err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP); 1341 if (err != 0) 1342 return (err); 1343 } 1344 /* 1345 * We will add the faddr before parsing the address list as this 1346 * might be a loopback connection and we would not have to 1347 * go through the list. 1348 * 1349 * Make sure the header's addr is in the list 1350 */ 1351 fp = sctp_lookup_faddr(sctp, hdrsaddr); 1352 if (fp == NULL) { 1353 /* not included; add it now */ 1354 err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE); 1355 if (err != 0) 1356 return (err); 1357 1358 /* sctp_faddrs will be the hdr addr */ 1359 fp = sctp->sctp_faddrs; 1360 } 1361 /* make the header addr the primary */ 1362 1363 if (cl_sctp_assoc_change != NULL && psctp == NULL) 1364 curaddr = sctp->sctp_current->faddr; 1365 1366 sctp->sctp_primary = fp; 1367 sctp->sctp_current = fp; 1368 sctp->sctp_mss = fp->sfa_pmss; 1369 1370 /* For loopback connections & linklocal get address from the header */ 1371 if (sctp->sctp_loopback || sctp->sctp_linklocal) { 1372 if (sctp->sctp_nsaddrs != 0) 1373 sctp_free_saddrs(sctp); 1374 if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0) 1375 return (err); 1376 /* For loopback ignore address list */ 1377 if (sctp->sctp_loopback) 1378 return (0); 1379 check_saddr = B_FALSE; 1380 } 1381 1382 /* Walk the params in the INIT [ACK], pulling out addr params */ 1383 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1384 sizeof (sctp_init_chunk_t); 1385 if (remaining < sizeof (*ph)) { 1386 if (check_saddr) { 1387 sctp_check_saddr(sctp, supp_af, psctp == NULL ? 1388 B_FALSE : B_TRUE); 1389 } 1390 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1391 return (0); 1392 } 1393 1394 init = (sctp_init_chunk_t *)(ich + 1); 1395 ph = (sctp_parm_hdr_t *)(init + 1); 1396 1397 /* params will have already been byteordered when validating */ 1398 while (ph != NULL) { 1399 if (ph->sph_type == htons(PARM_SUPP_ADDRS)) { 1400 int plen; 1401 uint16_t *p; 1402 uint16_t addrtype; 1403 1404 ASSERT(psctp != NULL); 1405 plen = ntohs(ph->sph_len); 1406 p = (uint16_t *)(ph + 1); 1407 while (plen > 0) { 1408 addrtype = ntohs(*p); 1409 switch (addrtype) { 1410 case PARM_ADDR6: 1411 supp_af |= PARM_SUPP_V6; 1412 break; 1413 case PARM_ADDR4: 1414 supp_af |= PARM_SUPP_V4; 1415 break; 1416 default: 1417 break; 1418 } 1419 p++; 1420 plen -= sizeof (*p); 1421 } 1422 } else if (ph->sph_type == htons(PARM_ADDR4)) { 1423 if (remaining >= PARM_ADDR4_LEN) { 1424 in6_addr_t addr; 1425 ipaddr_t ta; 1426 1427 supp_af |= PARM_SUPP_V4; 1428 /* 1429 * Screen out broad/multicasts & loopback. 1430 * If the endpoint only accepts v6 address, 1431 * go to the next one. 1432 */ 1433 bcopy(ph + 1, &ta, sizeof (ta)); 1434 if (ta == 0 || 1435 ta == INADDR_BROADCAST || 1436 ta == htonl(INADDR_LOOPBACK) || 1437 IN_MULTICAST(ta) || 1438 sctp->sctp_connp->conn_ipv6_v6only) { 1439 goto next; 1440 } 1441 /* 1442 * XXX also need to check for subnet 1443 * broadcasts. This should probably 1444 * wait until we have full access 1445 * to the ILL tables. 1446 */ 1447 1448 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1449 (ph + 1), &addr); 1450 /* Check for duplicate. */ 1451 if (sctp_lookup_faddr(sctp, &addr) != NULL) 1452 goto next; 1453 1454 /* OK, add it to the faddr set */ 1455 err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP, 1456 B_FALSE); 1457 if (err != 0) 1458 return (err); 1459 } 1460 } else if (ph->sph_type == htons(PARM_ADDR6) && 1461 sctp->sctp_family == AF_INET6) { 1462 /* An v4 socket should not take v6 addresses. */ 1463 if (remaining >= PARM_ADDR6_LEN) { 1464 in6_addr_t *addr6; 1465 1466 supp_af |= PARM_SUPP_V6; 1467 addr6 = (in6_addr_t *)(ph + 1); 1468 /* 1469 * Screen out link locals, mcast, loopback 1470 * and bogus v6 address. 1471 */ 1472 if (IN6_IS_ADDR_LINKLOCAL(addr6) || 1473 IN6_IS_ADDR_MULTICAST(addr6) || 1474 IN6_IS_ADDR_LOOPBACK(addr6) || 1475 IN6_IS_ADDR_V4MAPPED(addr6)) { 1476 goto next; 1477 } 1478 /* Check for duplicate. */ 1479 if (sctp_lookup_faddr(sctp, addr6) != NULL) 1480 goto next; 1481 1482 err = sctp_add_faddr(sctp, 1483 (in6_addr_t *)(ph + 1), KM_NOSLEEP, 1484 B_FALSE); 1485 if (err != 0) 1486 return (err); 1487 } 1488 } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) { 1489 if (sctp_options != NULL) 1490 *sctp_options |= SCTP_PRSCTP_OPTION; 1491 } /* else; skip */ 1492 1493 next: 1494 ph = sctp_next_parm(ph, &remaining); 1495 } 1496 if (check_saddr) { 1497 sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE : 1498 B_TRUE); 1499 } 1500 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1501 /* 1502 * We have the right address list now, update clustering's 1503 * knowledge because when we sent the INIT we had just added 1504 * the address the INIT was sent to. 1505 */ 1506 if (psctp == NULL && cl_sctp_assoc_change != NULL) { 1507 uchar_t *alist; 1508 size_t asize; 1509 uchar_t *dlist; 1510 size_t dsize; 1511 1512 asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 1513 alist = kmem_alloc(asize, KM_NOSLEEP); 1514 if (alist == NULL) { 1515 SCTP_KSTAT(sctp_cl_assoc_change); 1516 return (ENOMEM); 1517 } 1518 /* 1519 * Just include the address the INIT was sent to in the 1520 * delete list and send the entire faddr list. We could 1521 * do it differently (i.e include all the addresses in the 1522 * add list even if it contains the original address OR 1523 * remove the original address from the add list etc.), but 1524 * this seems reasonable enough. 1525 */ 1526 dsize = sizeof (in6_addr_t); 1527 dlist = kmem_alloc(dsize, KM_NOSLEEP); 1528 if (dlist == NULL) { 1529 kmem_free(alist, asize); 1530 SCTP_KSTAT(sctp_cl_assoc_change); 1531 return (ENOMEM); 1532 } 1533 bcopy(&curaddr, dlist, sizeof (curaddr)); 1534 sctp_get_faddr_list(sctp, alist, asize); 1535 (*cl_sctp_assoc_change)(sctp->sctp_family, alist, asize, 1536 sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR, 1537 (cl_sctp_handle_t)sctp); 1538 /* alist and dlist will be freed by the clustering module */ 1539 } 1540 return (0); 1541 } 1542 1543 /* 1544 * Returns 0 if the check failed and the restart should be refused, 1545 * 1 if the check succeeded. 1546 */ 1547 int 1548 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, 1549 int sleep) 1550 { 1551 sctp_faddr_t *fp, *fpa, *fphead = NULL; 1552 sctp_parm_hdr_t *ph; 1553 ssize_t remaining; 1554 int isv4; 1555 ipha_t *iph; 1556 ip6_t *ip6h; 1557 in6_addr_t hdraddr[1]; 1558 int retval = 0; 1559 sctp_tf_t *tf; 1560 sctp_t *sctp; 1561 int compres; 1562 sctp_init_chunk_t *init; 1563 int nadded = 0; 1564 1565 /* extract the address from the IP header */ 1566 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1567 if (isv4) { 1568 iph = (ipha_t *)pkt->b_rptr; 1569 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr); 1570 } else { 1571 ip6h = (ip6_t *)pkt->b_rptr; 1572 hdraddr[0] = ip6h->ip6_src; 1573 } 1574 1575 /* Walk the params in the INIT [ACK], pulling out addr params */ 1576 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1577 sizeof (sctp_init_chunk_t); 1578 if (remaining < sizeof (*ph)) { 1579 /* no parameters; restart OK */ 1580 return (1); 1581 } 1582 init = (sctp_init_chunk_t *)(ich + 1); 1583 ph = (sctp_parm_hdr_t *)(init + 1); 1584 1585 while (ph != NULL) { 1586 /* params will have already been byteordered when validating */ 1587 if (ph->sph_type == htons(PARM_ADDR4)) { 1588 if (remaining >= PARM_ADDR4_LEN) { 1589 in6_addr_t addr; 1590 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1591 (ph + 1), &addr); 1592 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1593 sleep); 1594 if (!fpa) { 1595 goto done; 1596 } 1597 bzero(fpa, sizeof (*fpa)); 1598 fpa->faddr = addr; 1599 fpa->next = NULL; 1600 } 1601 } else if (ph->sph_type == htons(PARM_ADDR6)) { 1602 if (remaining >= PARM_ADDR6_LEN) { 1603 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1604 sleep); 1605 if (!fpa) { 1606 goto done; 1607 } 1608 bzero(fpa, sizeof (*fpa)); 1609 bcopy(ph + 1, &fpa->faddr, 1610 sizeof (fpa->faddr)); 1611 fpa->next = NULL; 1612 } 1613 } else { 1614 /* else not addr param; skip */ 1615 fpa = NULL; 1616 } 1617 /* link in the new addr, if it was an addr param */ 1618 if (fpa) { 1619 if (!fphead) { 1620 fphead = fpa; 1621 fp = fphead; 1622 } else { 1623 fp->next = fpa; 1624 fp = fpa; 1625 } 1626 } 1627 1628 ph = sctp_next_parm(ph, &remaining); 1629 } 1630 1631 if (fphead == NULL) { 1632 /* no addr parameters; restart OK */ 1633 return (1); 1634 } 1635 1636 /* 1637 * got at least one; make sure the header's addr is 1638 * in the list 1639 */ 1640 fp = sctp_lookup_faddr_nosctp(fphead, hdraddr); 1641 if (!fp) { 1642 /* not included; add it now */ 1643 fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); 1644 if (!fp) { 1645 goto done; 1646 } 1647 bzero(fp, sizeof (*fp)); 1648 fp->faddr = *hdraddr; 1649 fp->next = fphead; 1650 fphead = fp; 1651 } 1652 1653 /* 1654 * Now, we can finally do the check: For each sctp instance 1655 * on the hash line for ports, compare its faddr set against 1656 * the new one. If the new one is a strict subset of any 1657 * existing sctp's faddrs, the restart is OK. However, if there 1658 * is an overlap, this could be an attack, so return failure. 1659 * If all sctp's faddrs are disjoint, this is a legitimate new 1660 * association. 1661 */ 1662 tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]); 1663 mutex_enter(&tf->tf_lock); 1664 1665 for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { 1666 if (ports != sctp->sctp_ports) { 1667 continue; 1668 } 1669 compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs); 1670 if (compres <= SCTP_ADDR_SUBSET) { 1671 retval = 1; 1672 mutex_exit(&tf->tf_lock); 1673 goto done; 1674 } 1675 if (compres == SCTP_ADDR_OVERLAP) { 1676 dprint(1, 1677 ("new assoc from %x:%x:%x:%x overlaps with %p\n", 1678 SCTP_PRINTADDR(*hdraddr), (void *)sctp)); 1679 /* 1680 * While we still hold the lock, we need to 1681 * figure out which addresses have been 1682 * added so we can include them in the abort 1683 * we will send back. Since these faddrs will 1684 * never be used, we overload the rto field 1685 * here, setting it to 0 if the address was 1686 * not added, 1 if it was added. 1687 */ 1688 for (fp = fphead; fp; fp = fp->next) { 1689 if (sctp_lookup_faddr(sctp, &fp->faddr)) { 1690 fp->rto = 0; 1691 } else { 1692 fp->rto = 1; 1693 nadded++; 1694 } 1695 } 1696 mutex_exit(&tf->tf_lock); 1697 goto done; 1698 } 1699 } 1700 mutex_exit(&tf->tf_lock); 1701 1702 /* All faddrs are disjoint; legit new association */ 1703 retval = 1; 1704 1705 done: 1706 /* If are attempted adds, send back an abort listing the addrs */ 1707 if (nadded > 0) { 1708 void *dtail; 1709 size_t dlen; 1710 1711 dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP); 1712 if (dtail == NULL) { 1713 goto cleanup; 1714 } 1715 1716 ph = dtail; 1717 dlen = 0; 1718 for (fp = fphead; fp; fp = fp->next) { 1719 if (fp->rto == 0) { 1720 continue; 1721 } 1722 if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1723 ipaddr_t addr4; 1724 1725 ph->sph_type = htons(PARM_ADDR4); 1726 ph->sph_len = htons(PARM_ADDR4_LEN); 1727 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 1728 ph++; 1729 bcopy(&addr4, ph, sizeof (addr4)); 1730 ph = (sctp_parm_hdr_t *) 1731 ((char *)ph + sizeof (addr4)); 1732 dlen += PARM_ADDR4_LEN; 1733 } else { 1734 ph->sph_type = htons(PARM_ADDR6); 1735 ph->sph_len = htons(PARM_ADDR6_LEN); 1736 ph++; 1737 bcopy(&fp->faddr, ph, sizeof (fp->faddr)); 1738 ph = (sctp_parm_hdr_t *) 1739 ((char *)ph + sizeof (fp->faddr)); 1740 dlen += PARM_ADDR6_LEN; 1741 } 1742 } 1743 1744 /* Send off the abort */ 1745 sctp_send_abort(sctp, sctp_init2vtag(ich), 1746 SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE); 1747 1748 kmem_free(dtail, PARM_ADDR6_LEN * nadded); 1749 } 1750 1751 cleanup: 1752 /* Clean up */ 1753 if (fphead) { 1754 sctp_faddr_t *fpn; 1755 for (fp = fphead; fp; fp = fpn) { 1756 fpn = fp->next; 1757 kmem_cache_free(sctp_kmem_faddr_cache, fp); 1758 } 1759 } 1760 1761 return (retval); 1762 } 1763 1764 void 1765 sctp_congest_reset(sctp_t *sctp) 1766 { 1767 sctp_faddr_t *fp; 1768 1769 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1770 fp->ssthresh = sctp_initial_mtu; 1771 fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; 1772 fp->suna = 0; 1773 fp->pba = 0; 1774 } 1775 } 1776 1777 static void 1778 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr, 1779 mblk_t *timer_mp) 1780 { 1781 bcopy(addr, &fp->faddr, sizeof (*addr)); 1782 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1783 fp->isv4 = 1; 1784 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 1785 fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr_len) & 1786 ~(SCTP_ALIGN - 1); 1787 } else { 1788 fp->isv4 = 0; 1789 fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr6_len) & 1790 ~(SCTP_ALIGN - 1); 1791 } 1792 fp->cwnd = sctp_slow_start_initial * fp->sfa_pmss; 1793 fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max); 1794 fp->srtt = -1; 1795 fp->rtt_updates = 0; 1796 fp->strikes = 0; 1797 fp->max_retr = sctp->sctp_pp_max_rxt; 1798 /* Mark it as not confirmed. */ 1799 fp->state = SCTP_FADDRS_UNCONFIRMED; 1800 fp->hb_interval = sctp->sctp_hb_interval; 1801 fp->ssthresh = sctp_initial_ssthresh; 1802 fp->suna = 0; 1803 fp->pba = 0; 1804 fp->acked = 0; 1805 fp->lastactive = lbolt64; 1806 fp->timer_mp = timer_mp; 1807 fp->hb_pending = B_FALSE; 1808 fp->timer_running = 0; 1809 fp->df = 1; 1810 fp->pmtu_discovered = 0; 1811 fp->rc_timer_mp = NULL; 1812 fp->rc_timer_running = 0; 1813 fp->next = NULL; 1814 fp->ire = NULL; 1815 fp->T3expire = 0; 1816 (void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret, 1817 sizeof (fp->hb_secret)); 1818 fp->hb_expiry = lbolt64; 1819 1820 sctp_get_ire(sctp, fp); 1821 } 1822 1823 /*ARGSUSED*/ 1824 static void 1825 faddr_destructor(void *buf, void *cdrarg) 1826 { 1827 sctp_faddr_t *fp = buf; 1828 1829 ASSERT(fp->timer_mp == NULL); 1830 ASSERT(fp->timer_running == 0); 1831 1832 ASSERT(fp->rc_timer_mp == NULL); 1833 ASSERT(fp->rc_timer_running == 0); 1834 } 1835 1836 void 1837 sctp_faddr_init(void) 1838 { 1839 sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache", 1840 sizeof (sctp_faddr_t), 0, NULL, faddr_destructor, 1841 NULL, NULL, NULL, 0); 1842 } 1843 1844 void 1845 sctp_faddr_fini(void) 1846 { 1847 kmem_cache_destroy(sctp_kmem_faddr_cache); 1848 } 1849