1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/stream.h> 31 #include <sys/strsubr.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/kmem.h> 35 #include <sys/socket.h> 36 #include <sys/random.h> 37 #include <sys/tsol/tndb.h> 38 #include <sys/tsol/tnet.h> 39 40 #include <netinet/in.h> 41 #include <netinet/ip6.h> 42 #include <netinet/sctp.h> 43 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/ip6.h> 47 #include <inet/ip_ire.h> 48 #include <inet/mib2.h> 49 #include <inet/nd.h> 50 #include <inet/optcom.h> 51 #include <inet/sctp_ip.h> 52 #include <inet/ipclassifier.h> 53 54 #include "sctp_impl.h" 55 #include "sctp_addr.h" 56 57 static struct kmem_cache *sctp_kmem_faddr_cache; 58 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *); 59 60 /* Set the source address. Refer to comments in sctp_ire2faddr(). */ 61 static void 62 set_saddr(sctp_t *sctp, sctp_faddr_t *fp, boolean_t v6) 63 { 64 if (sctp->sctp_bound_to_all) { 65 V6_SET_ZERO(fp->saddr); 66 } else { 67 fp->saddr = sctp_get_valid_addr(sctp, v6); 68 if (!v6 && IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) || 69 v6 && IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 70 fp->state = SCTP_FADDRS_UNREACH; 71 /* Disable heartbeat. */ 72 fp->hb_expiry = 0; 73 fp->hb_pending = B_FALSE; 74 fp->strikes = 0; 75 } 76 } 77 } 78 79 /* 80 * Call this function to update the cached IRE of a peer addr fp. 81 */ 82 void 83 sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp) 84 { 85 ire_t *ire; 86 ipaddr_t addr4; 87 in6_addr_t laddr; 88 sctp_saddr_ipif_t *sp; 89 uint_t ipif_seqid; 90 int hdrlen; 91 ts_label_t *tsl; 92 93 /* Remove the previous cache IRE */ 94 if ((ire = fp->ire) != NULL) { 95 IRE_REFRELE_NOTR(ire); 96 fp->ire = NULL; 97 } 98 99 /* 100 * If this addr is not reachable, mark it as unconfirmed for now, the 101 * state will be changed back to unreachable later in this function 102 * if it is still the case. 103 */ 104 if (fp->state == SCTP_FADDRS_UNREACH) { 105 fp->state = SCTP_FADDRS_UNCONFIRMED; 106 } 107 108 tsl = crgetlabel(CONN_CRED(sctp->sctp_connp)); 109 110 if (fp->isv4) { 111 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 112 ire = ire_cache_lookup(addr4, sctp->sctp_zoneid, tsl); 113 if (ire != NULL) 114 IN6_IPADDR_TO_V4MAPPED(ire->ire_src_addr, &laddr); 115 } else { 116 ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid, tsl); 117 if (ire != NULL) 118 laddr = ire->ire_src_addr_v6; 119 } 120 121 if (ire == NULL) { 122 dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n", 123 SCTP_PRINTADDR(fp->faddr))); 124 /* 125 * It is tempting to just leave the src addr 126 * unspecified and let IP figure it out, but we 127 * *cannot* do this, since IP may choose a src addr 128 * that is not part of this association... unless 129 * this sctp has bound to all addrs. So if the ire 130 * lookup fails, try to find one in our src addr 131 * list, unless the sctp has bound to all addrs, in 132 * which case we change the src addr to unspec. 133 * 134 * Note that if this is a v6 endpoint but it does 135 * not have any v4 address at this point (e.g. may 136 * have been deleted), sctp_get_valid_addr() will 137 * return mapped INADDR_ANY. In this case, this 138 * address should be marked not reachable so that 139 * it won't be used to send data. 140 */ 141 set_saddr(sctp, fp, B_FALSE); 142 goto set_current; 143 } 144 145 ipif_seqid = ire->ire_ipif->ipif_seqid; 146 dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ", 147 SCTP_PRINTADDR(fp->faddr))); 148 if (fp->isv4) { 149 dprint(2, ("src = %x\n", ire->ire_src_addr)); 150 } else { 151 dprint(2, ("src=%x:%x:%x:%x\n", 152 SCTP_PRINTADDR(ire->ire_src_addr_v6))); 153 } 154 155 /* make sure the laddr is part of this association */ 156 if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) != NULL && 157 !sp->saddr_ipif_dontsrc) { 158 if (sp->saddr_ipif_unconfirmed == 1) 159 sp->saddr_ipif_unconfirmed = 0; 160 fp->saddr = laddr; 161 } else { 162 dprint(2, ("ire2faddr: src addr is not part of assc\n")); 163 /* set the src to the first saddr and hope for the best */ 164 set_saddr(sctp, fp, B_TRUE); 165 } 166 167 /* Cache the IRE */ 168 IRE_REFHOLD_NOTR(ire); 169 fp->ire = ire; 170 if (fp->ire->ire_type == IRE_LOOPBACK && !sctp->sctp_loopback) 171 sctp->sctp_loopback = 1; 172 IRE_REFRELE(ire); 173 174 /* 175 * Pull out RTO information for this faddr and use it if we don't 176 * have any yet. 177 */ 178 if (fp->srtt == -1 && ire->ire_uinfo.iulp_rtt != 0) { 179 /* The cached value is in ms. */ 180 fp->srtt = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt); 181 fp->rttvar = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt_sd); 182 fp->rto = 3 * fp->srtt; 183 184 /* Bound the RTO by configured min and max values */ 185 if (fp->rto < sctp->sctp_rto_min) { 186 fp->rto = sctp->sctp_rto_min; 187 } 188 if (fp->rto > sctp->sctp_rto_max) { 189 fp->rto = sctp->sctp_rto_max; 190 } 191 } 192 193 /* 194 * Record the MTU for this faddr. If the MTU for this faddr has 195 * changed, check if the assc MTU will also change. 196 */ 197 if (fp->isv4) { 198 hdrlen = sctp->sctp_hdr_len; 199 } else { 200 hdrlen = sctp->sctp_hdr6_len; 201 } 202 if ((fp->sfa_pmss + hdrlen) != ire->ire_max_frag) { 203 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 204 fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1); 205 if (fp->cwnd < (fp->sfa_pmss * 2)) { 206 fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; 207 } 208 } 209 210 set_current: 211 if (fp == sctp->sctp_current) { 212 sctp_faddr2hdraddr(fp, sctp); 213 sctp->sctp_mss = fp->sfa_pmss; 214 if (!SCTP_IS_DETACHED(sctp)) { 215 sctp_set_ulp_prop(sctp); 216 } 217 } 218 } 219 220 void 221 sctp_faddr2ire(sctp_t *sctp, sctp_faddr_t *fp) 222 { 223 ire_t *ire; 224 225 if ((ire = fp->ire) == NULL) { 226 return; 227 } 228 229 mutex_enter(&ire->ire_lock); 230 231 /* If the cached IRE is going away, there is no point to update it. */ 232 if (ire->ire_marks & IRE_MARK_CONDEMNED) { 233 mutex_exit(&ire->ire_lock); 234 IRE_REFRELE_NOTR(ire); 235 fp->ire = NULL; 236 return; 237 } 238 239 /* 240 * Only record the PMTU for this faddr if we actually have 241 * done discovery. This prevents initialized default from 242 * clobbering any real info that IP may have. 243 */ 244 if (fp->pmtu_discovered) { 245 if (fp->isv4) { 246 ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr_len; 247 } else { 248 ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr6_len; 249 } 250 } 251 252 if (sctp_rtt_updates != 0 && fp->rtt_updates >= sctp_rtt_updates) { 253 /* 254 * If there is no old cached values, initialize them 255 * conservatively. Set them to be (1.5 * new value). 256 * This code copied from ip_ire_advise(). The cached 257 * value is in ms. 258 */ 259 if (ire->ire_uinfo.iulp_rtt != 0) { 260 ire->ire_uinfo.iulp_rtt = (ire->ire_uinfo.iulp_rtt + 261 TICK_TO_MSEC(fp->srtt)) >> 1; 262 } else { 263 ire->ire_uinfo.iulp_rtt = TICK_TO_MSEC(fp->srtt + 264 (fp->srtt >> 1)); 265 } 266 if (ire->ire_uinfo.iulp_rtt_sd != 0) { 267 ire->ire_uinfo.iulp_rtt_sd = 268 (ire->ire_uinfo.iulp_rtt_sd + 269 TICK_TO_MSEC(fp->rttvar)) >> 1; 270 } else { 271 ire->ire_uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->rttvar + 272 (fp->rttvar >> 1)); 273 } 274 fp->rtt_updates = 0; 275 } 276 277 mutex_exit(&ire->ire_lock); 278 } 279 280 /* 281 * The sender must set the total length in the IP header. 282 * If sendto == NULL, the current will be used. 283 */ 284 mblk_t * 285 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer) 286 { 287 mblk_t *mp; 288 size_t ipsctplen; 289 int isv4; 290 sctp_faddr_t *fp; 291 292 ASSERT(sctp->sctp_current != NULL || sendto != NULL); 293 if (sendto == NULL) { 294 fp = sctp->sctp_current; 295 } else { 296 fp = sendto; 297 } 298 isv4 = fp->isv4; 299 300 /* Try to look for another IRE again. */ 301 if (fp->ire == NULL) 302 sctp_ire2faddr(sctp, fp); 303 304 /* There is no suitable source address to use, return. */ 305 if (fp->state == SCTP_FADDRS_UNREACH) 306 return (NULL); 307 308 if (isv4) { 309 ipsctplen = sctp->sctp_hdr_len; 310 } else { 311 ipsctplen = sctp->sctp_hdr6_len; 312 } 313 314 mp = allocb_cred(ipsctplen + sctp_wroff_xtra + trailer, 315 CONN_CRED(sctp->sctp_connp)); 316 if (mp == NULL) { 317 ip1dbg(("sctp_make_mp: error making mp..\n")); 318 return (NULL); 319 } 320 mp->b_rptr += sctp_wroff_xtra; 321 mp->b_wptr = mp->b_rptr + ipsctplen; 322 323 ASSERT(OK_32PTR(mp->b_wptr)); 324 325 if (isv4) { 326 ipha_t *iph = (ipha_t *)mp->b_rptr; 327 328 bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen); 329 if (fp != sctp->sctp_current) { 330 /* fiddle with the dst addr */ 331 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 332 /* fix up src addr */ 333 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 334 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 335 iph->ipha_src); 336 } else if (sctp->sctp_bound_to_all) { 337 iph->ipha_src = INADDR_ANY; 338 } 339 } 340 /* set or clear the don't fragment bit */ 341 if (fp->df) { 342 iph->ipha_fragment_offset_and_flags = htons(IPH_DF); 343 } else { 344 iph->ipha_fragment_offset_and_flags = 0; 345 } 346 } else { 347 bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen); 348 if (fp != sctp->sctp_current) { 349 /* fiddle with the dst addr */ 350 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 351 /* fix up src addr */ 352 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 353 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 354 } else if (sctp->sctp_bound_to_all) { 355 bzero(&((ip6_t *)(mp->b_rptr))->ip6_src, 356 sizeof (in6_addr_t)); 357 } 358 } 359 } 360 ASSERT(sctp->sctp_connp != NULL); 361 362 /* 363 * IP will not free this IRE if it is condemned. SCTP needs to 364 * free it. 365 */ 366 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 367 IRE_REFRELE_NOTR(fp->ire); 368 fp->ire = NULL; 369 } 370 /* Stash the conn and ire ptr info. for IP */ 371 SCTP_STASH_IPINFO(mp, fp->ire); 372 373 return (mp); 374 } 375 376 /* 377 * Notify upper layers about preferred write offset, write size. 378 */ 379 void 380 sctp_set_ulp_prop(sctp_t *sctp) 381 { 382 int hdrlen; 383 384 if (sctp->sctp_current->isv4) { 385 hdrlen = sctp->sctp_hdr_len; 386 } else { 387 hdrlen = sctp->sctp_hdr6_len; 388 } 389 ASSERT(sctp->sctp_ulpd); 390 391 ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss); 392 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 393 sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 394 sctp->sctp_mss - sizeof (sctp_data_hdr_t)); 395 } 396 397 void 398 sctp_set_iplen(sctp_t *sctp, mblk_t *mp) 399 { 400 uint16_t sum = 0; 401 ipha_t *iph; 402 ip6_t *ip6h; 403 mblk_t *pmp = mp; 404 boolean_t isv4; 405 406 isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); 407 for (; pmp; pmp = pmp->b_cont) 408 sum += pmp->b_wptr - pmp->b_rptr; 409 410 if (isv4) { 411 iph = (ipha_t *)mp->b_rptr; 412 iph->ipha_length = htons(sum); 413 } else { 414 ip6h = (ip6_t *)mp->b_rptr; 415 /* 416 * If an ip6i_t is present, the real IPv6 header 417 * immediately follows. 418 */ 419 if (ip6h->ip6_nxt == IPPROTO_RAW) 420 ip6h = (ip6_t *)&ip6h[1]; 421 ip6h->ip6_plen = htons(sum - ((char *)&sctp->sctp_ip6h[1] - 422 sctp->sctp_iphc6)); 423 } 424 } 425 426 int 427 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2) 428 { 429 int na1 = 0; 430 int overlap = 0; 431 int equal = 1; 432 int onematch; 433 sctp_faddr_t *fp1, *fp2; 434 435 for (fp1 = a1; fp1; fp1 = fp1->next) { 436 onematch = 0; 437 for (fp2 = a2; fp2; fp2 = fp2->next) { 438 if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) { 439 overlap++; 440 onematch = 1; 441 break; 442 } 443 if (!onematch) { 444 equal = 0; 445 } 446 } 447 na1++; 448 } 449 450 if (equal) { 451 return (SCTP_ADDR_EQUAL); 452 } 453 if (overlap == na1) { 454 return (SCTP_ADDR_SUBSET); 455 } 456 if (overlap) { 457 return (SCTP_ADDR_OVERLAP); 458 } 459 return (SCTP_ADDR_DISJOINT); 460 } 461 462 /* 463 * Caller must hold conn fanout lock. 464 */ 465 static int 466 sctp_add_faddr_entry(sctp_t *sctp, in6_addr_t *addr, int sleep, 467 boolean_t first) 468 { 469 sctp_faddr_t *faddr; 470 471 if (is_system_labeled()) { 472 ts_label_t *tsl; 473 tsol_tpc_t *rhtp; 474 int retv; 475 476 tsl = crgetlabel(CONN_CRED(sctp->sctp_connp)); 477 ASSERT(tsl != NULL); 478 479 /* find_tpc automatically does the right thing with IPv4 */ 480 rhtp = find_tpc(addr, IPV6_VERSION, B_FALSE); 481 if (rhtp == NULL) 482 return (EACCES); 483 484 retv = EACCES; 485 if (tsl->tsl_doi == rhtp->tpc_tp.tp_doi) { 486 switch (rhtp->tpc_tp.host_type) { 487 case UNLABELED: 488 /* 489 * Can talk to unlabeled hosts if any of the 490 * following are true: 491 * 1. zone's label matches the remote host's 492 * default label, 493 * 2. mac_exempt is on and the zone dominates 494 * the remote host's label, or 495 * 3. mac_exempt is on and the socket is from 496 * the global zone. 497 */ 498 if (blequal(&rhtp->tpc_tp.tp_def_label, 499 &tsl->tsl_label) || 500 (sctp->sctp_mac_exempt && 501 (sctp->sctp_zoneid == GLOBAL_ZONEID || 502 bldominates(&tsl->tsl_label, 503 &rhtp->tpc_tp.tp_def_label)))) 504 retv = 0; 505 break; 506 case SUN_CIPSO: 507 if (_blinrange(&tsl->tsl_label, 508 &rhtp->tpc_tp.tp_sl_range_cipso) || 509 blinlset(&tsl->tsl_label, 510 rhtp->tpc_tp.tp_sl_set_cipso)) 511 retv = 0; 512 break; 513 } 514 } 515 TPC_RELE(rhtp); 516 if (retv != 0) 517 return (retv); 518 } 519 520 if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) 521 return (ENOMEM); 522 523 sctp_init_faddr(sctp, faddr, addr); 524 ASSERT(faddr->next == NULL); 525 526 if (sctp->sctp_faddrs == NULL) { 527 ASSERT(sctp->sctp_lastfaddr == NULL); 528 /* only element on list; first and last are same */ 529 sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr; 530 } else if (first) { 531 ASSERT(sctp->sctp_lastfaddr != NULL); 532 faddr->next = sctp->sctp_faddrs; 533 sctp->sctp_faddrs = faddr; 534 } else { 535 sctp->sctp_lastfaddr->next = faddr; 536 sctp->sctp_lastfaddr = faddr; 537 } 538 sctp->sctp_nfaddrs++; 539 540 return (0); 541 } 542 543 /* 544 * Add new address to end of list. 545 * Returns 0 on success, or errno on failure: 546 * ENOMEM - allocation failure; only for sleep==KM_NOSLEEP 547 * EACCES - label is incompatible with caller or connection 548 * (labeled [trusted] solaris only) 549 * Caller must hold conn fanout lock. 550 */ 551 int 552 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep) 553 { 554 dprint(4, ("add_faddr: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr), 555 sleep)); 556 557 return (sctp_add_faddr_entry(sctp, addr, sleep, B_FALSE)); 558 } 559 560 /* 561 * Same as sctp_add_faddr above, but put new entry at front of list. 562 * Caller must hold conn fanout lock. 563 */ 564 int 565 sctp_add_faddr_first(sctp_t *sctp, in6_addr_t *addr, int sleep) 566 { 567 dprint(4, ("add_faddr_first: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr), 568 sleep)); 569 570 return (sctp_add_faddr_entry(sctp, addr, sleep, B_TRUE)); 571 } 572 573 sctp_faddr_t * 574 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr) 575 { 576 sctp_faddr_t *fp; 577 578 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 579 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) 580 break; 581 } 582 583 return (fp); 584 } 585 586 sctp_faddr_t * 587 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr) 588 { 589 for (; fp; fp = fp->next) { 590 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) { 591 break; 592 } 593 } 594 595 return (fp); 596 } 597 598 void 599 sctp_faddr2hdraddr(sctp_faddr_t *fp, sctp_t *sctp) 600 { 601 if (fp->isv4) { 602 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 603 sctp->sctp_ipha->ipha_dst); 604 /* Must not allow unspec src addr if not bound to all */ 605 if (IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) && 606 !sctp->sctp_bound_to_all) { 607 /* 608 * set the src to the first v4 saddr and hope 609 * for the best 610 */ 611 fp->saddr = sctp_get_valid_addr(sctp, B_FALSE); 612 } 613 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src); 614 /* update don't fragment bit */ 615 if (fp->df) { 616 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 617 htons(IPH_DF); 618 } else { 619 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0; 620 } 621 } else { 622 sctp->sctp_ip6h->ip6_dst = fp->faddr; 623 /* Must not allow unspec src addr if not bound to all */ 624 if (IN6_IS_ADDR_UNSPECIFIED(&fp->saddr) && 625 !sctp->sctp_bound_to_all) { 626 /* 627 * set the src to the first v6 saddr and hope 628 * for the best 629 */ 630 fp->saddr = sctp_get_valid_addr(sctp, B_TRUE); 631 } 632 sctp->sctp_ip6h->ip6_src = fp->saddr; 633 } 634 } 635 636 void 637 sctp_redo_faddr_srcs(sctp_t *sctp) 638 { 639 sctp_faddr_t *fp; 640 641 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 642 sctp_ire2faddr(sctp, fp); 643 } 644 645 sctp_faddr2hdraddr(sctp->sctp_current, sctp); 646 } 647 648 void 649 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) 650 { 651 int64_t now = lbolt64; 652 653 fp->strikes = 0; 654 sctp->sctp_strikes = 0; 655 fp->lastactive = now; 656 fp->hb_expiry = now + SET_HB_INTVL(fp); 657 fp->hb_pending = B_FALSE; 658 if (fp->state != SCTP_FADDRS_ALIVE) { 659 fp->state = SCTP_FADDRS_ALIVE; 660 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0); 661 662 /* If this is the primary, switch back to it now */ 663 if (fp == sctp->sctp_primary) { 664 sctp->sctp_current = fp; 665 sctp->sctp_mss = fp->sfa_pmss; 666 /* Reset the addrs in the composite header */ 667 sctp_faddr2hdraddr(fp, sctp); 668 if (!SCTP_IS_DETACHED(sctp)) { 669 sctp_set_ulp_prop(sctp); 670 } 671 } 672 } 673 if (fp->ire == NULL) { 674 /* Should have a full IRE now */ 675 sctp_ire2faddr(sctp, fp); 676 } 677 } 678 679 int 680 sctp_is_a_faddr_clean(sctp_t *sctp) 681 { 682 sctp_faddr_t *fp; 683 684 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 685 if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) { 686 return (1); 687 } 688 } 689 690 return (0); 691 } 692 693 /* 694 * Returns 0 if there is at leave one other active faddr, -1 if there 695 * are none. If there are none left, faddr_dead() will start killing the 696 * association. 697 * If the downed faddr was the current faddr, a new current faddr 698 * will be chosen. 699 */ 700 int 701 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) 702 { 703 sctp_faddr_t *ofp; 704 705 if (fp->state == SCTP_FADDRS_ALIVE) { 706 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0); 707 } 708 fp->state = newstate; 709 710 dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n", 711 SCTP_PRINTADDR(fp->faddr), newstate)); 712 713 if (fp == sctp->sctp_current) { 714 /* Current faddr down; need to switch it */ 715 sctp->sctp_current = NULL; 716 } 717 718 /* Find next alive faddr */ 719 ofp = fp; 720 for (fp = fp->next; fp; fp = fp->next) { 721 if (fp->state == SCTP_FADDRS_ALIVE) { 722 break; 723 } 724 } 725 726 if (fp == NULL) { 727 /* Continue from beginning of list */ 728 for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) { 729 if (fp->state == SCTP_FADDRS_ALIVE) { 730 break; 731 } 732 } 733 } 734 735 if (fp != ofp) { 736 if (sctp->sctp_current == NULL) { 737 dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n", 738 SCTP_PRINTADDR(fp->faddr))); 739 sctp->sctp_current = fp; 740 sctp->sctp_mss = fp->sfa_pmss; 741 742 /* Reset the addrs in the composite header */ 743 sctp_faddr2hdraddr(fp, sctp); 744 745 if (!SCTP_IS_DETACHED(sctp)) { 746 sctp_set_ulp_prop(sctp); 747 } 748 } 749 return (0); 750 } 751 752 753 /* All faddrs are down; kill the association */ 754 dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); 755 BUMP_MIB(&sctp_mib, sctpAborted); 756 sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? 757 SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); 758 sctp_clean_death(sctp, sctp->sctp_client_errno ? 759 sctp->sctp_client_errno : ETIMEDOUT); 760 761 return (-1); 762 } 763 764 sctp_faddr_t * 765 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp) 766 { 767 sctp_faddr_t *nfp = NULL; 768 769 if (ofp == NULL) { 770 ofp = sctp->sctp_current; 771 } 772 773 /* Find the next live one */ 774 for (nfp = ofp->next; nfp != NULL; nfp = nfp->next) { 775 if (nfp->state == SCTP_FADDRS_ALIVE) { 776 break; 777 } 778 } 779 780 if (nfp == NULL) { 781 /* Continue from beginning of list */ 782 for (nfp = sctp->sctp_faddrs; nfp != ofp; nfp = nfp->next) { 783 if (nfp->state == SCTP_FADDRS_ALIVE) { 784 break; 785 } 786 } 787 } 788 789 /* 790 * nfp could only be NULL if all faddrs are down, and when 791 * this happens, faddr_dead() should have killed the 792 * association. Hence this assertion... 793 */ 794 ASSERT(nfp != NULL); 795 return (nfp); 796 } 797 798 void 799 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp) 800 { 801 sctp_faddr_t *fpp; 802 803 if (!sctp->sctp_faddrs) { 804 return; 805 } 806 807 if (fp->timer_mp != NULL) { 808 sctp_timer_free(fp->timer_mp); 809 fp->timer_mp = NULL; 810 fp->timer_running = 0; 811 } 812 if (fp->rc_timer_mp != NULL) { 813 sctp_timer_free(fp->rc_timer_mp); 814 fp->rc_timer_mp = NULL; 815 fp->rc_timer_running = 0; 816 } 817 if (fp->ire != NULL) { 818 IRE_REFRELE_NOTR(fp->ire); 819 fp->ire = NULL; 820 } 821 822 if (fp == sctp->sctp_faddrs) { 823 goto gotit; 824 } 825 826 for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next) 827 ; 828 829 gotit: 830 ASSERT(sctp->sctp_conn_tfp != NULL); 831 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 832 if (fp == sctp->sctp_faddrs) { 833 sctp->sctp_faddrs = fp->next; 834 } else { 835 fpp->next = fp->next; 836 } 837 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 838 /* XXX faddr2ire? */ 839 kmem_cache_free(sctp_kmem_faddr_cache, fp); 840 sctp->sctp_nfaddrs--; 841 } 842 843 void 844 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock) 845 { 846 sctp_faddr_t *fp, *fpn; 847 848 if (sctp->sctp_faddrs == NULL) { 849 ASSERT(sctp->sctp_lastfaddr == NULL); 850 return; 851 } 852 853 ASSERT(sctp->sctp_lastfaddr != NULL); 854 sctp->sctp_lastfaddr = NULL; 855 sctp->sctp_current = NULL; 856 sctp->sctp_primary = NULL; 857 858 sctp_free_faddr_timers(sctp); 859 860 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 861 /* in conn fanout; need to hold lock */ 862 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 863 } 864 865 for (fp = sctp->sctp_faddrs; fp; fp = fpn) { 866 fpn = fp->next; 867 if (fp->ire != NULL) 868 IRE_REFRELE_NOTR(fp->ire); 869 kmem_cache_free(sctp_kmem_faddr_cache, fp); 870 sctp->sctp_nfaddrs--; 871 } 872 873 sctp->sctp_faddrs = NULL; 874 ASSERT(sctp->sctp_nfaddrs == 0); 875 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 876 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 877 } 878 879 } 880 881 void 882 sctp_zap_addrs(sctp_t *sctp) 883 { 884 sctp_zap_faddrs(sctp, 0); 885 sctp_free_saddrs(sctp); 886 } 887 888 /* 889 * Initialize the IPv4 header. Loses any record of any IP options. 890 */ 891 int 892 sctp_header_init_ipv4(sctp_t *sctp, int sleep) 893 { 894 sctp_hdr_t *sctph; 895 896 /* 897 * This is a simple initialization. If there's 898 * already a template, it should never be too small, 899 * so reuse it. Otherwise, allocate space for the new one. 900 */ 901 if (sctp->sctp_iphc != NULL) { 902 ASSERT(sctp->sctp_iphc_len >= SCTP_MAX_COMBINED_HEADER_LENGTH); 903 bzero(sctp->sctp_iphc, sctp->sctp_iphc_len); 904 } else { 905 sctp->sctp_iphc_len = SCTP_MAX_COMBINED_HEADER_LENGTH; 906 sctp->sctp_iphc = kmem_zalloc(sctp->sctp_iphc_len, sleep); 907 if (sctp->sctp_iphc == NULL) { 908 sctp->sctp_iphc_len = 0; 909 return (ENOMEM); 910 } 911 } 912 913 sctp->sctp_ipha = (ipha_t *)sctp->sctp_iphc; 914 915 sctp->sctp_hdr_len = sizeof (ipha_t) + sizeof (sctp_hdr_t); 916 sctp->sctp_ip_hdr_len = sizeof (ipha_t); 917 sctp->sctp_ipha->ipha_length = htons(sizeof (ipha_t) + 918 sizeof (sctp_hdr_t)); 919 sctp->sctp_ipha->ipha_version_and_hdr_length 920 = (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS; 921 922 /* 923 * These two fields should be zero, and are already set above. 924 * 925 * sctp->sctp_ipha->ipha_ident, 926 * sctp->sctp_ipha->ipha_fragment_offset_and_flags. 927 */ 928 929 sctp->sctp_ipha->ipha_ttl = sctp_ipv4_ttl; 930 sctp->sctp_ipha->ipha_protocol = IPPROTO_SCTP; 931 932 sctph = (sctp_hdr_t *)(sctp->sctp_iphc + sizeof (ipha_t)); 933 sctp->sctp_sctph = sctph; 934 935 return (0); 936 } 937 938 /* 939 * Update sctp_sticky_hdrs based on sctp_sticky_ipp. 940 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 941 * headers, and the maximum size sctp header (to avoid reallocation 942 * on the fly for additional sctp options). 943 * Returns failure if can't allocate memory. 944 */ 945 int 946 sctp_build_hdrs(sctp_t *sctp) 947 { 948 char *hdrs; 949 uint_t hdrs_len; 950 ip6i_t *ip6i; 951 char buf[SCTP_MAX_HDR_LENGTH]; 952 ip6_pkt_t *ipp = &sctp->sctp_sticky_ipp; 953 in6_addr_t src; 954 in6_addr_t dst; 955 956 /* 957 * save the existing sctp header and source/dest IP addresses 958 */ 959 bcopy(sctp->sctp_sctph6, buf, sizeof (sctp_hdr_t)); 960 src = sctp->sctp_ip6h->ip6_src; 961 dst = sctp->sctp_ip6h->ip6_dst; 962 hdrs_len = ip_total_hdrs_len_v6(ipp) + SCTP_MAX_HDR_LENGTH; 963 ASSERT(hdrs_len != 0); 964 if (hdrs_len > sctp->sctp_iphc6_len) { 965 /* Need to reallocate */ 966 hdrs = kmem_zalloc(hdrs_len, KM_NOSLEEP); 967 if (hdrs == NULL) 968 return (ENOMEM); 969 970 if (sctp->sctp_iphc6_len != 0) 971 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 972 sctp->sctp_iphc6 = hdrs; 973 sctp->sctp_iphc6_len = hdrs_len; 974 } 975 ip_build_hdrs_v6((uchar_t *)sctp->sctp_iphc6, 976 hdrs_len - SCTP_MAX_HDR_LENGTH, ipp, IPPROTO_SCTP); 977 978 /* Set header fields not in ipp */ 979 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 980 ip6i = (ip6i_t *)sctp->sctp_iphc6; 981 sctp->sctp_ip6h = (ip6_t *)&ip6i[1]; 982 } else { 983 sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6; 984 } 985 /* 986 * sctp->sctp_ip_hdr_len will include ip6i_t if there is one. 987 */ 988 sctp->sctp_ip_hdr6_len = hdrs_len - SCTP_MAX_HDR_LENGTH; 989 sctp->sctp_sctph6 = (sctp_hdr_t *)(sctp->sctp_iphc6 + 990 sctp->sctp_ip_hdr6_len); 991 sctp->sctp_hdr6_len = sctp->sctp_ip_hdr6_len + sizeof (sctp_hdr_t); 992 993 bcopy(buf, sctp->sctp_sctph6, sizeof (sctp_hdr_t)); 994 995 sctp->sctp_ip6h->ip6_src = src; 996 sctp->sctp_ip6h->ip6_dst = dst; 997 /* 998 * If the hoplimit was not set by ip_build_hdrs_v6(), we need to 999 * set it to the default value for SCTP. 1000 */ 1001 if (!(ipp->ipp_fields & IPPF_UNICAST_HOPS)) 1002 sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; 1003 /* 1004 * If we're setting extension headers after a connection 1005 * has been established, and if we have a routing header 1006 * among the extension headers, call ip_massage_options_v6 to 1007 * manipulate the routing header/ip6_dst set the checksum 1008 * difference in the sctp header template. 1009 * (This happens in sctp_connect_ipv6 if the routing header 1010 * is set prior to the connect.) 1011 */ 1012 1013 if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) && 1014 (sctp->sctp_sticky_ipp.ipp_fields & IPPF_RTHDR)) { 1015 ip6_rthdr_t *rth; 1016 1017 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 1018 (uint8_t *)sctp->sctp_sctph6); 1019 if (rth != NULL) 1020 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); 1021 } 1022 return (0); 1023 } 1024 1025 /* 1026 * Initialize the IPv6 header. Loses any record of any IPv6 extension headers. 1027 */ 1028 int 1029 sctp_header_init_ipv6(sctp_t *sctp, int sleep) 1030 { 1031 sctp_hdr_t *sctph; 1032 1033 /* 1034 * This is a simple initialization. If there's 1035 * already a template, it should never be too small, 1036 * so reuse it. Otherwise, allocate space for the new one. 1037 * Ensure that there is enough space to "downgrade" the sctp_t 1038 * to an IPv4 sctp_t. This requires having space for a full load 1039 * of IPv4 options 1040 */ 1041 if (sctp->sctp_iphc6 != NULL) { 1042 ASSERT(sctp->sctp_iphc6_len >= 1043 SCTP_MAX_COMBINED_HEADER_LENGTH); 1044 bzero(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 1045 } else { 1046 sctp->sctp_iphc6_len = SCTP_MAX_COMBINED_HEADER_LENGTH; 1047 sctp->sctp_iphc6 = kmem_zalloc(sctp->sctp_iphc_len, sleep); 1048 if (sctp->sctp_iphc6 == NULL) { 1049 sctp->sctp_iphc6_len = 0; 1050 return (ENOMEM); 1051 } 1052 } 1053 sctp->sctp_hdr6_len = IPV6_HDR_LEN + sizeof (sctp_hdr_t); 1054 sctp->sctp_ip_hdr6_len = IPV6_HDR_LEN; 1055 sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6; 1056 1057 /* Initialize the header template */ 1058 1059 sctp->sctp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1060 sctp->sctp_ip6h->ip6_plen = ntohs(sizeof (sctp_hdr_t)); 1061 sctp->sctp_ip6h->ip6_nxt = IPPROTO_SCTP; 1062 sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; 1063 1064 sctph = (sctp_hdr_t *)(sctp->sctp_iphc6 + IPV6_HDR_LEN); 1065 sctp->sctp_sctph6 = sctph; 1066 1067 return (0); 1068 } 1069 1070 static int 1071 sctp_v4_label(sctp_t *sctp) 1072 { 1073 uchar_t optbuf[IP_MAX_OPT_LENGTH]; 1074 const cred_t *cr = CONN_CRED(sctp->sctp_connp); 1075 int added; 1076 1077 if (tsol_compute_label(cr, sctp->sctp_ipha->ipha_dst, optbuf, 1078 sctp->sctp_mac_exempt) != 0) 1079 return (EACCES); 1080 1081 added = tsol_remove_secopt(sctp->sctp_ipha, sctp->sctp_hdr_len); 1082 if (added == -1) 1083 return (EACCES); 1084 sctp->sctp_hdr_len += added; 1085 sctp->sctp_sctph = (sctp_hdr_t *)((uchar_t *)sctp->sctp_sctph + added); 1086 sctp->sctp_ip_hdr_len += added; 1087 if ((sctp->sctp_v4label_len = optbuf[IPOPT_OLEN]) != 0) { 1088 sctp->sctp_v4label_len = (sctp->sctp_v4label_len + 3) & ~3; 1089 added = tsol_prepend_option(optbuf, sctp->sctp_ipha, 1090 sctp->sctp_hdr_len); 1091 if (added == -1) 1092 return (EACCES); 1093 sctp->sctp_hdr_len += added; 1094 sctp->sctp_sctph = (sctp_hdr_t *)((uchar_t *)sctp->sctp_sctph + 1095 added); 1096 sctp->sctp_ip_hdr_len += added; 1097 } 1098 return (0); 1099 } 1100 1101 static int 1102 sctp_v6_label(sctp_t *sctp) 1103 { 1104 uchar_t optbuf[TSOL_MAX_IPV6_OPTION]; 1105 const cred_t *cr = CONN_CRED(sctp->sctp_connp); 1106 1107 if (tsol_compute_label_v6(cr, &sctp->sctp_ip6h->ip6_dst, optbuf, 1108 sctp->sctp_mac_exempt) != 0) 1109 return (EACCES); 1110 if (tsol_update_sticky(&sctp->sctp_sticky_ipp, &sctp->sctp_v6label_len, 1111 optbuf) != 0) 1112 return (EACCES); 1113 if (sctp_build_hdrs(sctp) != 0) 1114 return (EACCES); 1115 return (0); 1116 } 1117 1118 /* 1119 * XXX implement more sophisticated logic 1120 */ 1121 /* ARGSUSED */ 1122 int 1123 sctp_set_hdraddrs(sctp_t *sctp, cred_t *cr) 1124 { 1125 sctp_faddr_t *fp; 1126 int gotv4 = 0; 1127 int gotv6 = 0; 1128 1129 ASSERT(sctp->sctp_faddrs != NULL); 1130 ASSERT(sctp->sctp_nsaddrs > 0); 1131 1132 /* Set up using the primary first */ 1133 if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) { 1134 IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->faddr, 1135 sctp->sctp_ipha->ipha_dst); 1136 /* saddr may be unspec; make_mp() will handle this */ 1137 IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->saddr, 1138 sctp->sctp_ipha->ipha_src); 1139 if (!is_system_labeled() || sctp_v4_label(sctp) == 0) { 1140 gotv4 = 1; 1141 if (sctp->sctp_ipversion == IPV4_VERSION) { 1142 goto copyports; 1143 } 1144 } 1145 } else { 1146 sctp->sctp_ip6h->ip6_dst = sctp->sctp_primary->faddr; 1147 /* saddr may be unspec; make_mp() will handle this */ 1148 sctp->sctp_ip6h->ip6_src = sctp->sctp_primary->saddr; 1149 if (!is_system_labeled() || sctp_v6_label(sctp) == 0) 1150 gotv6 = 1; 1151 } 1152 1153 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1154 if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1155 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 1156 sctp->sctp_ipha->ipha_dst); 1157 /* copy in the faddr_t's saddr */ 1158 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 1159 sctp->sctp_ipha->ipha_src); 1160 if (!is_system_labeled() || sctp_v4_label(sctp) == 0) { 1161 gotv4 = 1; 1162 if (sctp->sctp_ipversion == IPV4_VERSION || 1163 gotv6) { 1164 break; 1165 } 1166 } 1167 } else if (!gotv6) { 1168 sctp->sctp_ip6h->ip6_dst = fp->faddr; 1169 /* copy in the faddr_t's saddr */ 1170 sctp->sctp_ip6h->ip6_src = fp->saddr; 1171 if (!is_system_labeled() || sctp_v6_label(sctp) == 0) { 1172 gotv6 = 1; 1173 if (gotv4) 1174 break; 1175 } 1176 } 1177 } 1178 1179 copyports: 1180 if (!gotv4 && !gotv6) 1181 return (EACCES); 1182 1183 /* copy in the ports for good measure */ 1184 sctp->sctp_sctph->sh_sport = sctp->sctp_lport; 1185 sctp->sctp_sctph->sh_dport = sctp->sctp_fport; 1186 1187 sctp->sctp_sctph6->sh_sport = sctp->sctp_lport; 1188 sctp->sctp_sctph6->sh_dport = sctp->sctp_fport; 1189 return (0); 1190 } 1191 1192 void 1193 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp) 1194 { 1195 mblk_t *mp; 1196 sctp_parm_hdr_t *ph; 1197 size_t len; 1198 int pad; 1199 1200 len = sizeof (*ph) + ntohs(uph->sph_len); 1201 if ((pad = len % 4) != 0) { 1202 pad = 4 - pad; 1203 len += pad; 1204 } 1205 mp = allocb(len, BPRI_MED); 1206 if (mp == NULL) { 1207 return; 1208 } 1209 1210 ph = (sctp_parm_hdr_t *)(mp->b_rptr); 1211 ph->sph_type = htons(PARM_UNRECOGNIZED); 1212 ph->sph_len = htons(len - pad); 1213 1214 /* copy in the unrecognized parameter */ 1215 bcopy(uph, ph + 1, ntohs(uph->sph_len)); 1216 1217 mp->b_wptr = mp->b_rptr + len; 1218 if (*errmp != NULL) { 1219 linkb(*errmp, mp); 1220 } else { 1221 *errmp = mp; 1222 } 1223 } 1224 1225 /* 1226 * o Bounds checking 1227 * o Updates remaining 1228 * o Checks alignment 1229 */ 1230 sctp_parm_hdr_t * 1231 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining) 1232 { 1233 int pad; 1234 uint16_t len; 1235 1236 len = ntohs(current->sph_len); 1237 *remaining -= len; 1238 if (*remaining < sizeof (*current) || len < sizeof (*current)) { 1239 return (NULL); 1240 } 1241 if ((pad = len & (SCTP_ALIGN - 1)) != 0) { 1242 pad = SCTP_ALIGN - pad; 1243 *remaining -= pad; 1244 } 1245 /*LINTED pointer cast may result in improper alignment*/ 1246 current = (sctp_parm_hdr_t *)((char *)current + len + pad); 1247 return (current); 1248 } 1249 1250 /* 1251 * Sets the address parameters given in the INIT chunk into sctp's 1252 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are 1253 * no address parameters in the INIT chunk, a single faddr is created 1254 * from the ip hdr at the beginning of pkt. 1255 * If there already are existing addresses hanging from sctp, merge 1256 * them in, if the old info contains addresses which are not present 1257 * in this new info, get rid of them, and clean the pointers if there's 1258 * messages which have this as their target address. 1259 * 1260 * We also re-adjust the source address list here since the list may 1261 * contain more than what is actually part of the association. If 1262 * we get here from sctp_send_cookie_echo(), we are on the active 1263 * side and psctp will be NULL and ich will be the INIT-ACK chunk. 1264 * If we get here from sctp_accept_comm(), ich will be the INIT chunk 1265 * and psctp will the listening endpoint. 1266 * 1267 * INIT processing: When processing the INIT we inherit the src address 1268 * list from the listener. For a loopback or linklocal association, we 1269 * delete the list and just take the address from the IP header (since 1270 * that's how we created the INIT-ACK). Additionally, for loopback we 1271 * ignore the address params in the INIT. For determining which address 1272 * types were sent in the INIT-ACK we follow the same logic as in 1273 * creating the INIT-ACK. We delete addresses of the type that are not 1274 * supported by the peer. 1275 * 1276 * INIT-ACK processing: When processing the INIT-ACK since we had not 1277 * included addr params for loopback or linklocal addresses when creating 1278 * the INIT, we just use the address from the IP header. Further, for 1279 * loopback we ignore the addr param list. We mark addresses of the 1280 * type not supported by the peer as unconfirmed. 1281 * 1282 * In case of INIT processing we look for supported address types in the 1283 * supported address param, if present. In both cases the address type in 1284 * the IP header is supported as well as types for addresses in the param 1285 * list, if any. 1286 * 1287 * Once we have the supported address types sctp_check_saddr() runs through 1288 * the source address list and deletes or marks as unconfirmed address of 1289 * types not supported by the peer. 1290 * 1291 * Returns 0 on success, sys errno on failure 1292 */ 1293 int 1294 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, 1295 sctp_chunk_hdr_t *ich, uint_t *sctp_options) 1296 { 1297 sctp_init_chunk_t *init; 1298 ipha_t *iph; 1299 ip6_t *ip6h; 1300 in6_addr_t hdrsaddr[1]; 1301 in6_addr_t hdrdaddr[1]; 1302 sctp_parm_hdr_t *ph; 1303 ssize_t remaining; 1304 int isv4; 1305 int err; 1306 sctp_faddr_t *fp; 1307 int supp_af = 0; 1308 boolean_t check_saddr = B_TRUE; 1309 in6_addr_t curaddr; 1310 1311 if (sctp_options != NULL) 1312 *sctp_options = 0; 1313 1314 /* extract the address from the IP header */ 1315 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1316 if (isv4) { 1317 iph = (ipha_t *)pkt->b_rptr; 1318 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr); 1319 IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr); 1320 supp_af |= PARM_SUPP_V4; 1321 } else { 1322 ip6h = (ip6_t *)pkt->b_rptr; 1323 hdrsaddr[0] = ip6h->ip6_src; 1324 hdrdaddr[0] = ip6h->ip6_dst; 1325 supp_af |= PARM_SUPP_V6; 1326 } 1327 1328 /* 1329 * Unfortunately, we can't delay this because adding an faddr 1330 * looks for the presence of the source address (from the ire 1331 * for the faddr) in the source address list. We could have 1332 * delayed this if, say, this was a loopback/linklocal connection. 1333 * Now, we just end up nuking this list and taking the addr from 1334 * the IP header for loopback/linklocal. 1335 */ 1336 if (psctp != NULL && psctp->sctp_nsaddrs > 0) { 1337 ASSERT(sctp->sctp_nsaddrs == 0); 1338 1339 err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP); 1340 if (err != 0) 1341 return (err); 1342 } 1343 /* 1344 * We will add the faddr before parsing the address list as this 1345 * might be a loopback connection and we would not have to 1346 * go through the list. 1347 * 1348 * Make sure the header's addr is in the list 1349 */ 1350 fp = sctp_lookup_faddr(sctp, hdrsaddr); 1351 if (fp == NULL) { 1352 /* not included; add it now */ 1353 err = sctp_add_faddr_first(sctp, hdrsaddr, KM_NOSLEEP); 1354 if (err != 0) 1355 return (err); 1356 1357 /* sctp_faddrs will be the hdr addr */ 1358 fp = sctp->sctp_faddrs; 1359 } 1360 /* make the header addr the primary */ 1361 1362 if (cl_sctp_assoc_change != NULL && psctp == NULL) 1363 curaddr = sctp->sctp_current->faddr; 1364 1365 sctp->sctp_primary = fp; 1366 sctp->sctp_current = fp; 1367 sctp->sctp_mss = fp->sfa_pmss; 1368 1369 /* For loopback connections & linklocal get address from the header */ 1370 if (sctp->sctp_loopback || sctp->sctp_linklocal) { 1371 if (sctp->sctp_nsaddrs != 0) 1372 sctp_free_saddrs(sctp); 1373 if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0) 1374 return (err); 1375 /* For loopback ignore address list */ 1376 if (sctp->sctp_loopback) 1377 return (0); 1378 check_saddr = B_FALSE; 1379 } 1380 1381 /* Walk the params in the INIT [ACK], pulling out addr params */ 1382 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1383 sizeof (sctp_init_chunk_t); 1384 if (remaining < sizeof (*ph)) { 1385 if (check_saddr) { 1386 sctp_check_saddr(sctp, supp_af, psctp == NULL ? 1387 B_FALSE : B_TRUE); 1388 } 1389 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1390 return (0); 1391 } 1392 1393 init = (sctp_init_chunk_t *)(ich + 1); 1394 ph = (sctp_parm_hdr_t *)(init + 1); 1395 1396 /* params will have already been byteordered when validating */ 1397 while (ph != NULL) { 1398 if (ph->sph_type == htons(PARM_SUPP_ADDRS)) { 1399 int plen; 1400 uint16_t *p; 1401 uint16_t addrtype; 1402 1403 ASSERT(psctp != NULL); 1404 plen = ntohs(ph->sph_len); 1405 p = (uint16_t *)(ph + 1); 1406 while (plen > 0) { 1407 addrtype = ntohs(*p); 1408 switch (addrtype) { 1409 case PARM_ADDR6: 1410 supp_af |= PARM_SUPP_V6; 1411 break; 1412 case PARM_ADDR4: 1413 supp_af |= PARM_SUPP_V4; 1414 break; 1415 default: 1416 break; 1417 } 1418 p++; 1419 plen -= sizeof (*p); 1420 } 1421 } else if (ph->sph_type == htons(PARM_ADDR4)) { 1422 if (remaining >= PARM_ADDR4_LEN) { 1423 in6_addr_t addr; 1424 ipaddr_t ta; 1425 1426 supp_af |= PARM_SUPP_V4; 1427 /* 1428 * Screen out broad/multicasts & loopback. 1429 * If the endpoint only accepts v6 address, 1430 * go to the next one. 1431 */ 1432 bcopy(ph + 1, &ta, sizeof (ta)); 1433 if (ta == 0 || 1434 ta == INADDR_BROADCAST || 1435 ta == htonl(INADDR_LOOPBACK) || 1436 IN_MULTICAST(ta) || 1437 sctp->sctp_connp->conn_ipv6_v6only) { 1438 goto next; 1439 } 1440 /* 1441 * XXX also need to check for subnet 1442 * broadcasts. This should probably 1443 * wait until we have full access 1444 * to the ILL tables. 1445 */ 1446 1447 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1448 (ph + 1), &addr); 1449 /* Check for duplicate. */ 1450 if (sctp_lookup_faddr(sctp, &addr) != NULL) 1451 goto next; 1452 1453 /* OK, add it to the faddr set */ 1454 err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP); 1455 if (err != 0) 1456 return (err); 1457 } 1458 } else if (ph->sph_type == htons(PARM_ADDR6) && 1459 sctp->sctp_family == AF_INET6) { 1460 /* An v4 socket should not take v6 addresses. */ 1461 if (remaining >= PARM_ADDR6_LEN) { 1462 in6_addr_t *addr6; 1463 1464 supp_af |= PARM_SUPP_V6; 1465 addr6 = (in6_addr_t *)(ph + 1); 1466 /* 1467 * Screen out link locals, mcast, loopback 1468 * and bogus v6 address. 1469 */ 1470 if (IN6_IS_ADDR_LINKLOCAL(addr6) || 1471 IN6_IS_ADDR_MULTICAST(addr6) || 1472 IN6_IS_ADDR_LOOPBACK(addr6) || 1473 IN6_IS_ADDR_V4MAPPED(addr6)) { 1474 goto next; 1475 } 1476 /* Check for duplicate. */ 1477 if (sctp_lookup_faddr(sctp, addr6) != NULL) 1478 goto next; 1479 1480 err = sctp_add_faddr(sctp, 1481 (in6_addr_t *)(ph + 1), KM_NOSLEEP); 1482 if (err != 0) 1483 return (err); 1484 } 1485 } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) { 1486 if (sctp_options != NULL) 1487 *sctp_options |= SCTP_PRSCTP_OPTION; 1488 } /* else; skip */ 1489 1490 next: 1491 ph = sctp_next_parm(ph, &remaining); 1492 } 1493 if (check_saddr) { 1494 sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE : 1495 B_TRUE); 1496 } 1497 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1498 /* 1499 * We have the right address list now, update clustering's 1500 * knowledge because when we sent the INIT we had just added 1501 * the address the INIT was sent to. 1502 */ 1503 if (psctp == NULL && cl_sctp_assoc_change != NULL) { 1504 uchar_t *alist; 1505 size_t asize; 1506 uchar_t *dlist; 1507 size_t dsize; 1508 1509 asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 1510 alist = kmem_alloc(asize, KM_NOSLEEP); 1511 if (alist == NULL) 1512 return (ENOMEM); 1513 /* 1514 * Just include the address the INIT was sent to in the 1515 * delete list and send the entire faddr list. We could 1516 * do it differently (i.e include all the addresses in the 1517 * add list even if it contains the original address OR 1518 * remove the original address from the add list etc.), but 1519 * this seems reasonable enough. 1520 */ 1521 dsize = sizeof (in6_addr_t); 1522 dlist = kmem_alloc(dsize, KM_NOSLEEP); 1523 if (dlist == NULL) { 1524 kmem_free(alist, asize); 1525 return (ENOMEM); 1526 } 1527 bcopy(&curaddr, dlist, sizeof (curaddr)); 1528 sctp_get_faddr_list(sctp, alist, asize); 1529 (*cl_sctp_assoc_change)(sctp->sctp_family, alist, asize, 1530 sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR, 1531 (cl_sctp_handle_t)sctp); 1532 /* alist and dlist will be freed by the clustering module */ 1533 } 1534 return (0); 1535 } 1536 1537 /* 1538 * Returns 0 if the check failed and the restart should be refused, 1539 * 1 if the check succeeded. 1540 */ 1541 int 1542 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, 1543 int sleep) 1544 { 1545 sctp_faddr_t *fp, *fpa, *fphead = NULL; 1546 sctp_parm_hdr_t *ph; 1547 ssize_t remaining; 1548 int isv4; 1549 ipha_t *iph; 1550 ip6_t *ip6h; 1551 in6_addr_t hdraddr[1]; 1552 int retval = 0; 1553 sctp_tf_t *tf; 1554 sctp_t *sctp; 1555 int compres; 1556 sctp_init_chunk_t *init; 1557 int nadded = 0; 1558 1559 /* extract the address from the IP header */ 1560 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1561 if (isv4) { 1562 iph = (ipha_t *)pkt->b_rptr; 1563 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr); 1564 } else { 1565 ip6h = (ip6_t *)pkt->b_rptr; 1566 hdraddr[0] = ip6h->ip6_src; 1567 } 1568 1569 /* Walk the params in the INIT [ACK], pulling out addr params */ 1570 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1571 sizeof (sctp_init_chunk_t); 1572 if (remaining < sizeof (*ph)) { 1573 /* no parameters; restart OK */ 1574 return (1); 1575 } 1576 init = (sctp_init_chunk_t *)(ich + 1); 1577 ph = (sctp_parm_hdr_t *)(init + 1); 1578 1579 while (ph != NULL) { 1580 /* params will have already been byteordered when validating */ 1581 if (ph->sph_type == htons(PARM_ADDR4)) { 1582 if (remaining >= PARM_ADDR4_LEN) { 1583 in6_addr_t addr; 1584 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1585 (ph + 1), &addr); 1586 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1587 sleep); 1588 if (!fpa) { 1589 goto done; 1590 } 1591 bzero(fpa, sizeof (*fpa)); 1592 fpa->faddr = addr; 1593 fpa->next = NULL; 1594 } 1595 } else if (ph->sph_type == htons(PARM_ADDR6)) { 1596 if (remaining >= PARM_ADDR6_LEN) { 1597 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1598 sleep); 1599 if (!fpa) { 1600 goto done; 1601 } 1602 bzero(fpa, sizeof (*fpa)); 1603 bcopy(ph + 1, &fpa->faddr, 1604 sizeof (fpa->faddr)); 1605 fpa->next = NULL; 1606 } 1607 } else { 1608 /* else not addr param; skip */ 1609 fpa = NULL; 1610 } 1611 /* link in the new addr, if it was an addr param */ 1612 if (fpa) { 1613 if (!fphead) { 1614 fphead = fpa; 1615 fp = fphead; 1616 } else { 1617 fp->next = fpa; 1618 fp = fpa; 1619 } 1620 } 1621 1622 ph = sctp_next_parm(ph, &remaining); 1623 } 1624 1625 if (fphead == NULL) { 1626 /* no addr parameters; restart OK */ 1627 return (1); 1628 } 1629 1630 /* 1631 * got at least one; make sure the header's addr is 1632 * in the list 1633 */ 1634 fp = sctp_lookup_faddr_nosctp(fphead, hdraddr); 1635 if (!fp) { 1636 /* not included; add it now */ 1637 fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); 1638 if (!fp) { 1639 goto done; 1640 } 1641 bzero(fp, sizeof (*fp)); 1642 fp->faddr = *hdraddr; 1643 fp->next = fphead; 1644 fphead = fp; 1645 } 1646 1647 /* 1648 * Now, we can finally do the check: For each sctp instance 1649 * on the hash line for ports, compare its faddr set against 1650 * the new one. If the new one is a strict subset of any 1651 * existing sctp's faddrs, the restart is OK. However, if there 1652 * is an overlap, this could be an attack, so return failure. 1653 * If all sctp's faddrs are disjoint, this is a legitimate new 1654 * association. 1655 */ 1656 tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]); 1657 mutex_enter(&tf->tf_lock); 1658 1659 for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { 1660 if (ports != sctp->sctp_ports) { 1661 continue; 1662 } 1663 compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs); 1664 if (compres <= SCTP_ADDR_SUBSET) { 1665 retval = 1; 1666 mutex_exit(&tf->tf_lock); 1667 goto done; 1668 } 1669 if (compres == SCTP_ADDR_OVERLAP) { 1670 dprint(1, 1671 ("new assoc from %x:%x:%x:%x overlaps with %p\n", 1672 SCTP_PRINTADDR(*hdraddr), (void *)sctp)); 1673 /* 1674 * While we still hold the lock, we need to 1675 * figure out which addresses have been 1676 * added so we can include them in the abort 1677 * we will send back. Since these faddrs will 1678 * never be used, we overload the rto field 1679 * here, setting it to 0 if the address was 1680 * not added, 1 if it was added. 1681 */ 1682 for (fp = fphead; fp; fp = fp->next) { 1683 if (sctp_lookup_faddr(sctp, &fp->faddr)) { 1684 fp->rto = 0; 1685 } else { 1686 fp->rto = 1; 1687 nadded++; 1688 } 1689 } 1690 mutex_exit(&tf->tf_lock); 1691 goto done; 1692 } 1693 } 1694 mutex_exit(&tf->tf_lock); 1695 1696 /* All faddrs are disjoint; legit new association */ 1697 retval = 1; 1698 1699 done: 1700 /* If are attempted adds, send back an abort listing the addrs */ 1701 if (nadded > 0) { 1702 void *dtail; 1703 size_t dlen; 1704 1705 dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP); 1706 if (dtail == NULL) { 1707 goto cleanup; 1708 } 1709 1710 ph = dtail; 1711 dlen = 0; 1712 for (fp = fphead; fp; fp = fp->next) { 1713 if (fp->rto == 0) { 1714 continue; 1715 } 1716 if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1717 ipaddr_t addr4; 1718 1719 ph->sph_type = htons(PARM_ADDR4); 1720 ph->sph_len = htons(PARM_ADDR4_LEN); 1721 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 1722 ph++; 1723 bcopy(&addr4, ph, sizeof (addr4)); 1724 ph = (sctp_parm_hdr_t *) 1725 ((char *)ph + sizeof (addr4)); 1726 dlen += PARM_ADDR4_LEN; 1727 } else { 1728 ph->sph_type = htons(PARM_ADDR6); 1729 ph->sph_len = htons(PARM_ADDR6_LEN); 1730 ph++; 1731 bcopy(&fp->faddr, ph, sizeof (fp->faddr)); 1732 ph = (sctp_parm_hdr_t *) 1733 ((char *)ph + sizeof (fp->faddr)); 1734 dlen += PARM_ADDR6_LEN; 1735 } 1736 } 1737 1738 /* Send off the abort */ 1739 sctp_send_abort(sctp, sctp_init2vtag(ich), 1740 SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE); 1741 1742 kmem_free(dtail, PARM_ADDR6_LEN * nadded); 1743 } 1744 1745 cleanup: 1746 /* Clean up */ 1747 if (fphead) { 1748 sctp_faddr_t *fpn; 1749 for (fp = fphead; fp; fp = fpn) { 1750 fpn = fp->next; 1751 kmem_cache_free(sctp_kmem_faddr_cache, fp); 1752 } 1753 } 1754 1755 return (retval); 1756 } 1757 1758 void 1759 sctp_congest_reset(sctp_t *sctp) 1760 { 1761 sctp_faddr_t *fp; 1762 1763 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1764 fp->ssthresh = sctp_initial_mtu; 1765 fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; 1766 fp->suna = 0; 1767 fp->pba = 0; 1768 } 1769 } 1770 1771 static void 1772 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr) 1773 { 1774 bcopy(addr, &fp->faddr, sizeof (*addr)); 1775 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1776 fp->isv4 = 1; 1777 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 1778 fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr_len) & 1779 ~(SCTP_ALIGN - 1); 1780 } else { 1781 fp->isv4 = 0; 1782 fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr6_len) & 1783 ~(SCTP_ALIGN - 1); 1784 } 1785 fp->cwnd = sctp_slow_start_initial * fp->sfa_pmss; 1786 fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max); 1787 fp->srtt = -1; 1788 fp->rtt_updates = 0; 1789 fp->strikes = 0; 1790 fp->max_retr = sctp->sctp_pp_max_rxt; 1791 /* Mark it as not confirmed. */ 1792 fp->state = SCTP_FADDRS_UNCONFIRMED; 1793 fp->hb_interval = sctp->sctp_hb_interval; 1794 fp->ssthresh = sctp_initial_ssthresh; 1795 fp->suna = 0; 1796 fp->pba = 0; 1797 fp->acked = 0; 1798 fp->lastactive = lbolt64; 1799 fp->timer_mp = NULL; 1800 fp->hb_pending = B_FALSE; 1801 fp->timer_running = 0; 1802 fp->df = 1; 1803 fp->pmtu_discovered = 0; 1804 fp->rc_timer_mp = NULL; 1805 fp->rc_timer_running = 0; 1806 fp->next = NULL; 1807 fp->ire = NULL; 1808 fp->T3expire = 0; 1809 (void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret, 1810 sizeof (fp->hb_secret)); 1811 fp->hb_expiry = lbolt64; 1812 1813 sctp_ire2faddr(sctp, fp); 1814 } 1815 1816 /*ARGSUSED*/ 1817 static void 1818 faddr_destructor(void *buf, void *cdrarg) 1819 { 1820 sctp_faddr_t *fp = buf; 1821 1822 ASSERT(fp->timer_mp == NULL); 1823 ASSERT(fp->timer_running == 0); 1824 1825 ASSERT(fp->rc_timer_mp == NULL); 1826 ASSERT(fp->rc_timer_running == 0); 1827 } 1828 1829 void 1830 sctp_faddr_init(void) 1831 { 1832 sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache", 1833 sizeof (sctp_faddr_t), 0, NULL, faddr_destructor, 1834 NULL, NULL, NULL, 0); 1835 } 1836 1837 void 1838 sctp_faddr_fini(void) 1839 { 1840 kmem_cache_destroy(sctp_kmem_faddr_cache); 1841 } 1842