1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/stream.h> 31 #include <sys/strsubr.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/kmem.h> 35 #include <sys/socket.h> 36 #include <sys/random.h> 37 #include <sys/tsol/tndb.h> 38 #include <sys/tsol/tnet.h> 39 40 #include <netinet/in.h> 41 #include <netinet/ip6.h> 42 #include <netinet/sctp.h> 43 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/ip6.h> 47 #include <inet/ip_ire.h> 48 #include <inet/mib2.h> 49 #include <inet/nd.h> 50 #include <inet/optcom.h> 51 #include <inet/sctp_ip.h> 52 #include <inet/ipclassifier.h> 53 54 #include "sctp_impl.h" 55 #include "sctp_addr.h" 56 57 static struct kmem_cache *sctp_kmem_faddr_cache; 58 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *); 59 60 /* Set the source address. Refer to comments in sctp_ire2faddr(). */ 61 static void 62 set_saddr(sctp_t *sctp, sctp_faddr_t *fp) 63 { 64 boolean_t v6 = !fp->isv4; 65 66 if (sctp->sctp_bound_to_all) { 67 V6_SET_ZERO(fp->saddr); 68 } else { 69 fp->saddr = sctp_get_valid_addr(sctp, v6); 70 if (!v6 && IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) || 71 v6 && IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 72 fp->state = SCTP_FADDRS_UNREACH; 73 /* Disable heartbeat. */ 74 fp->hb_expiry = 0; 75 fp->hb_pending = B_FALSE; 76 fp->strikes = 0; 77 } 78 } 79 } 80 81 /* 82 * Call this function to update the cached IRE of a peer addr fp. 83 */ 84 void 85 sctp_ire2faddr(sctp_t *sctp, sctp_faddr_t *fp) 86 { 87 ire_t *ire; 88 ipaddr_t addr4; 89 in6_addr_t laddr; 90 sctp_saddr_ipif_t *sp; 91 uint_t ipif_seqid; 92 int hdrlen; 93 ts_label_t *tsl; 94 95 /* Remove the previous cache IRE */ 96 if ((ire = fp->ire) != NULL) { 97 IRE_REFRELE_NOTR(ire); 98 fp->ire = NULL; 99 } 100 101 /* 102 * If this addr is not reachable, mark it as unconfirmed for now, the 103 * state will be changed back to unreachable later in this function 104 * if it is still the case. 105 */ 106 if (fp->state == SCTP_FADDRS_UNREACH) { 107 fp->state = SCTP_FADDRS_UNCONFIRMED; 108 } 109 110 tsl = crgetlabel(CONN_CRED(sctp->sctp_connp)); 111 112 if (fp->isv4) { 113 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 114 ire = ire_cache_lookup(addr4, sctp->sctp_zoneid, tsl); 115 if (ire != NULL) 116 IN6_IPADDR_TO_V4MAPPED(ire->ire_src_addr, &laddr); 117 } else { 118 ire = ire_cache_lookup_v6(&fp->faddr, sctp->sctp_zoneid, tsl); 119 if (ire != NULL) 120 laddr = ire->ire_src_addr_v6; 121 } 122 123 if (ire == NULL) { 124 dprint(3, ("ire2faddr: no ire for %x:%x:%x:%x\n", 125 SCTP_PRINTADDR(fp->faddr))); 126 /* 127 * It is tempting to just leave the src addr 128 * unspecified and let IP figure it out, but we 129 * *cannot* do this, since IP may choose a src addr 130 * that is not part of this association... unless 131 * this sctp has bound to all addrs. So if the ire 132 * lookup fails, try to find one in our src addr 133 * list, unless the sctp has bound to all addrs, in 134 * which case we change the src addr to unspec. 135 * 136 * Note that if this is a v6 endpoint but it does 137 * not have any v4 address at this point (e.g. may 138 * have been deleted), sctp_get_valid_addr() will 139 * return mapped INADDR_ANY. In this case, this 140 * address should be marked not reachable so that 141 * it won't be used to send data. 142 */ 143 set_saddr(sctp, fp); 144 goto set_current; 145 } 146 147 ipif_seqid = ire->ire_ipif->ipif_seqid; 148 dprint(2, ("ire2faddr: got ire for %x:%x:%x:%x, ", 149 SCTP_PRINTADDR(fp->faddr))); 150 if (fp->isv4) { 151 dprint(2, ("src = %x\n", ire->ire_src_addr)); 152 } else { 153 dprint(2, ("src=%x:%x:%x:%x\n", 154 SCTP_PRINTADDR(ire->ire_src_addr_v6))); 155 } 156 157 /* make sure the laddr is part of this association */ 158 if ((sp = sctp_ipif_lookup(sctp, ipif_seqid)) != NULL && 159 !sp->saddr_ipif_dontsrc) { 160 if (sp->saddr_ipif_unconfirmed == 1) 161 sp->saddr_ipif_unconfirmed = 0; 162 fp->saddr = laddr; 163 } else { 164 dprint(2, ("ire2faddr: src addr is not part of assc\n")); 165 /* set the src to the first saddr and hope for the best */ 166 set_saddr(sctp, fp); 167 } 168 169 /* Cache the IRE */ 170 IRE_REFHOLD_NOTR(ire); 171 fp->ire = ire; 172 if (fp->ire->ire_type == IRE_LOOPBACK && !sctp->sctp_loopback) 173 sctp->sctp_loopback = 1; 174 IRE_REFRELE(ire); 175 176 /* 177 * Pull out RTO information for this faddr and use it if we don't 178 * have any yet. 179 */ 180 if (fp->srtt == -1 && ire->ire_uinfo.iulp_rtt != 0) { 181 /* The cached value is in ms. */ 182 fp->srtt = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt); 183 fp->rttvar = MSEC_TO_TICK(ire->ire_uinfo.iulp_rtt_sd); 184 fp->rto = 3 * fp->srtt; 185 186 /* Bound the RTO by configured min and max values */ 187 if (fp->rto < sctp->sctp_rto_min) { 188 fp->rto = sctp->sctp_rto_min; 189 } 190 if (fp->rto > sctp->sctp_rto_max) { 191 fp->rto = sctp->sctp_rto_max; 192 } 193 } 194 195 /* 196 * Record the MTU for this faddr. If the MTU for this faddr has 197 * changed, check if the assc MTU will also change. 198 */ 199 if (fp->isv4) { 200 hdrlen = sctp->sctp_hdr_len; 201 } else { 202 hdrlen = sctp->sctp_hdr6_len; 203 } 204 if ((fp->sfa_pmss + hdrlen) != ire->ire_max_frag) { 205 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 206 fp->sfa_pmss = (ire->ire_max_frag - hdrlen) & ~(SCTP_ALIGN - 1); 207 if (fp->cwnd < (fp->sfa_pmss * 2)) { 208 fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; 209 } 210 } 211 212 set_current: 213 if (fp == sctp->sctp_current) { 214 sctp_faddr2hdraddr(fp, sctp); 215 sctp->sctp_mss = fp->sfa_pmss; 216 if (!SCTP_IS_DETACHED(sctp)) { 217 sctp_set_ulp_prop(sctp); 218 } 219 } 220 } 221 222 void 223 sctp_faddr2ire(sctp_t *sctp, sctp_faddr_t *fp) 224 { 225 ire_t *ire; 226 227 if ((ire = fp->ire) == NULL) { 228 return; 229 } 230 231 mutex_enter(&ire->ire_lock); 232 233 /* If the cached IRE is going away, there is no point to update it. */ 234 if (ire->ire_marks & IRE_MARK_CONDEMNED) { 235 mutex_exit(&ire->ire_lock); 236 IRE_REFRELE_NOTR(ire); 237 fp->ire = NULL; 238 return; 239 } 240 241 /* 242 * Only record the PMTU for this faddr if we actually have 243 * done discovery. This prevents initialized default from 244 * clobbering any real info that IP may have. 245 */ 246 if (fp->pmtu_discovered) { 247 if (fp->isv4) { 248 ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr_len; 249 } else { 250 ire->ire_max_frag = fp->sfa_pmss + sctp->sctp_hdr6_len; 251 } 252 } 253 254 if (sctp_rtt_updates != 0 && fp->rtt_updates >= sctp_rtt_updates) { 255 /* 256 * If there is no old cached values, initialize them 257 * conservatively. Set them to be (1.5 * new value). 258 * This code copied from ip_ire_advise(). The cached 259 * value is in ms. 260 */ 261 if (ire->ire_uinfo.iulp_rtt != 0) { 262 ire->ire_uinfo.iulp_rtt = (ire->ire_uinfo.iulp_rtt + 263 TICK_TO_MSEC(fp->srtt)) >> 1; 264 } else { 265 ire->ire_uinfo.iulp_rtt = TICK_TO_MSEC(fp->srtt + 266 (fp->srtt >> 1)); 267 } 268 if (ire->ire_uinfo.iulp_rtt_sd != 0) { 269 ire->ire_uinfo.iulp_rtt_sd = 270 (ire->ire_uinfo.iulp_rtt_sd + 271 TICK_TO_MSEC(fp->rttvar)) >> 1; 272 } else { 273 ire->ire_uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->rttvar + 274 (fp->rttvar >> 1)); 275 } 276 fp->rtt_updates = 0; 277 } 278 279 mutex_exit(&ire->ire_lock); 280 } 281 282 /* 283 * The sender must set the total length in the IP header. 284 * If sendto == NULL, the current will be used. 285 */ 286 mblk_t * 287 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *sendto, int trailer) 288 { 289 mblk_t *mp; 290 size_t ipsctplen; 291 int isv4; 292 sctp_faddr_t *fp; 293 294 ASSERT(sctp->sctp_current != NULL || sendto != NULL); 295 if (sendto == NULL) { 296 fp = sctp->sctp_current; 297 } else { 298 fp = sendto; 299 } 300 isv4 = fp->isv4; 301 302 /* Try to look for another IRE again. */ 303 if (fp->ire == NULL) 304 sctp_ire2faddr(sctp, fp); 305 306 /* There is no suitable source address to use, return. */ 307 if (fp->state == SCTP_FADDRS_UNREACH) 308 return (NULL); 309 310 if (isv4) { 311 ipsctplen = sctp->sctp_hdr_len; 312 } else { 313 ipsctplen = sctp->sctp_hdr6_len; 314 } 315 316 mp = allocb_cred(ipsctplen + sctp_wroff_xtra + trailer, 317 CONN_CRED(sctp->sctp_connp)); 318 if (mp == NULL) { 319 ip1dbg(("sctp_make_mp: error making mp..\n")); 320 return (NULL); 321 } 322 mp->b_rptr += sctp_wroff_xtra; 323 mp->b_wptr = mp->b_rptr + ipsctplen; 324 325 ASSERT(OK_32PTR(mp->b_wptr)); 326 327 if (isv4) { 328 ipha_t *iph = (ipha_t *)mp->b_rptr; 329 330 bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen); 331 if (fp != sctp->sctp_current) { 332 /* fiddle with the dst addr */ 333 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 334 /* fix up src addr */ 335 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 336 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 337 iph->ipha_src); 338 } else if (sctp->sctp_bound_to_all) { 339 iph->ipha_src = INADDR_ANY; 340 } 341 } 342 /* set or clear the don't fragment bit */ 343 if (fp->df) { 344 iph->ipha_fragment_offset_and_flags = htons(IPH_DF); 345 } else { 346 iph->ipha_fragment_offset_and_flags = 0; 347 } 348 } else { 349 bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen); 350 if (fp != sctp->sctp_current) { 351 /* fiddle with the dst addr */ 352 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 353 /* fix up src addr */ 354 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 355 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 356 } else if (sctp->sctp_bound_to_all) { 357 bzero(&((ip6_t *)(mp->b_rptr))->ip6_src, 358 sizeof (in6_addr_t)); 359 } 360 } 361 } 362 ASSERT(sctp->sctp_connp != NULL); 363 364 /* 365 * IP will not free this IRE if it is condemned. SCTP needs to 366 * free it. 367 */ 368 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 369 IRE_REFRELE_NOTR(fp->ire); 370 fp->ire = NULL; 371 } 372 /* Stash the conn and ire ptr info. for IP */ 373 SCTP_STASH_IPINFO(mp, fp->ire); 374 375 return (mp); 376 } 377 378 /* 379 * Notify upper layers about preferred write offset, write size. 380 */ 381 void 382 sctp_set_ulp_prop(sctp_t *sctp) 383 { 384 int hdrlen; 385 386 if (sctp->sctp_current->isv4) { 387 hdrlen = sctp->sctp_hdr_len; 388 } else { 389 hdrlen = sctp->sctp_hdr6_len; 390 } 391 ASSERT(sctp->sctp_ulpd); 392 393 ASSERT(sctp->sctp_current->sfa_pmss == sctp->sctp_mss); 394 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 395 sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 396 sctp->sctp_mss - sizeof (sctp_data_hdr_t)); 397 } 398 399 void 400 sctp_set_iplen(sctp_t *sctp, mblk_t *mp) 401 { 402 uint16_t sum = 0; 403 ipha_t *iph; 404 ip6_t *ip6h; 405 mblk_t *pmp = mp; 406 boolean_t isv4; 407 408 isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); 409 for (; pmp; pmp = pmp->b_cont) 410 sum += pmp->b_wptr - pmp->b_rptr; 411 412 if (isv4) { 413 iph = (ipha_t *)mp->b_rptr; 414 iph->ipha_length = htons(sum); 415 } else { 416 ip6h = (ip6_t *)mp->b_rptr; 417 /* 418 * If an ip6i_t is present, the real IPv6 header 419 * immediately follows. 420 */ 421 if (ip6h->ip6_nxt == IPPROTO_RAW) 422 ip6h = (ip6_t *)&ip6h[1]; 423 ip6h->ip6_plen = htons(sum - ((char *)&sctp->sctp_ip6h[1] - 424 sctp->sctp_iphc6)); 425 } 426 } 427 428 int 429 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2) 430 { 431 int na1 = 0; 432 int overlap = 0; 433 int equal = 1; 434 int onematch; 435 sctp_faddr_t *fp1, *fp2; 436 437 for (fp1 = a1; fp1; fp1 = fp1->next) { 438 onematch = 0; 439 for (fp2 = a2; fp2; fp2 = fp2->next) { 440 if (IN6_ARE_ADDR_EQUAL(&fp1->faddr, &fp2->faddr)) { 441 overlap++; 442 onematch = 1; 443 break; 444 } 445 if (!onematch) { 446 equal = 0; 447 } 448 } 449 na1++; 450 } 451 452 if (equal) { 453 return (SCTP_ADDR_EQUAL); 454 } 455 if (overlap == na1) { 456 return (SCTP_ADDR_SUBSET); 457 } 458 if (overlap) { 459 return (SCTP_ADDR_OVERLAP); 460 } 461 return (SCTP_ADDR_DISJOINT); 462 } 463 464 /* 465 * Caller must hold conn fanout lock. 466 */ 467 static int 468 sctp_add_faddr_entry(sctp_t *sctp, in6_addr_t *addr, int sleep, 469 boolean_t first) 470 { 471 sctp_faddr_t *faddr; 472 473 if (is_system_labeled()) { 474 ts_label_t *tsl; 475 tsol_tpc_t *rhtp; 476 int retv; 477 478 tsl = crgetlabel(CONN_CRED(sctp->sctp_connp)); 479 ASSERT(tsl != NULL); 480 481 /* find_tpc automatically does the right thing with IPv4 */ 482 rhtp = find_tpc(addr, IPV6_VERSION, B_FALSE); 483 if (rhtp == NULL) 484 return (EACCES); 485 486 retv = EACCES; 487 if (tsl->tsl_doi == rhtp->tpc_tp.tp_doi) { 488 switch (rhtp->tpc_tp.host_type) { 489 case UNLABELED: 490 /* 491 * Can talk to unlabeled hosts if any of the 492 * following are true: 493 * 1. zone's label matches the remote host's 494 * default label, 495 * 2. mac_exempt is on and the zone dominates 496 * the remote host's label, or 497 * 3. mac_exempt is on and the socket is from 498 * the global zone. 499 */ 500 if (blequal(&rhtp->tpc_tp.tp_def_label, 501 &tsl->tsl_label) || 502 (sctp->sctp_mac_exempt && 503 (sctp->sctp_zoneid == GLOBAL_ZONEID || 504 bldominates(&tsl->tsl_label, 505 &rhtp->tpc_tp.tp_def_label)))) 506 retv = 0; 507 break; 508 case SUN_CIPSO: 509 if (_blinrange(&tsl->tsl_label, 510 &rhtp->tpc_tp.tp_sl_range_cipso) || 511 blinlset(&tsl->tsl_label, 512 rhtp->tpc_tp.tp_sl_set_cipso)) 513 retv = 0; 514 break; 515 } 516 } 517 TPC_RELE(rhtp); 518 if (retv != 0) 519 return (retv); 520 } 521 522 if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) 523 return (ENOMEM); 524 525 sctp_init_faddr(sctp, faddr, addr); 526 ASSERT(faddr->next == NULL); 527 528 if (sctp->sctp_faddrs == NULL) { 529 ASSERT(sctp->sctp_lastfaddr == NULL); 530 /* only element on list; first and last are same */ 531 sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr; 532 } else if (first) { 533 ASSERT(sctp->sctp_lastfaddr != NULL); 534 faddr->next = sctp->sctp_faddrs; 535 sctp->sctp_faddrs = faddr; 536 } else { 537 sctp->sctp_lastfaddr->next = faddr; 538 sctp->sctp_lastfaddr = faddr; 539 } 540 sctp->sctp_nfaddrs++; 541 542 return (0); 543 } 544 545 /* 546 * Add new address to end of list. 547 * Returns 0 on success, or errno on failure: 548 * ENOMEM - allocation failure; only for sleep==KM_NOSLEEP 549 * EACCES - label is incompatible with caller or connection 550 * (labeled [trusted] solaris only) 551 * Caller must hold conn fanout lock. 552 */ 553 int 554 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep) 555 { 556 dprint(4, ("add_faddr: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr), 557 sleep)); 558 559 return (sctp_add_faddr_entry(sctp, addr, sleep, B_FALSE)); 560 } 561 562 /* 563 * Same as sctp_add_faddr above, but put new entry at front of list. 564 * Caller must hold conn fanout lock. 565 */ 566 int 567 sctp_add_faddr_first(sctp_t *sctp, in6_addr_t *addr, int sleep) 568 { 569 dprint(4, ("add_faddr_first: %x:%x:%x:%x %d\n", SCTP_PRINTADDR(*addr), 570 sleep)); 571 572 return (sctp_add_faddr_entry(sctp, addr, sleep, B_TRUE)); 573 } 574 575 sctp_faddr_t * 576 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr) 577 { 578 sctp_faddr_t *fp; 579 580 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 581 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) 582 break; 583 } 584 585 return (fp); 586 } 587 588 sctp_faddr_t * 589 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr) 590 { 591 for (; fp; fp = fp->next) { 592 if (IN6_ARE_ADDR_EQUAL(&fp->faddr, addr)) { 593 break; 594 } 595 } 596 597 return (fp); 598 } 599 600 void 601 sctp_faddr2hdraddr(sctp_faddr_t *fp, sctp_t *sctp) 602 { 603 if (fp->isv4) { 604 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 605 sctp->sctp_ipha->ipha_dst); 606 /* Must not allow unspec src addr if not bound to all */ 607 if (IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr) && 608 !sctp->sctp_bound_to_all) { 609 /* 610 * set the src to the first v4 saddr and hope 611 * for the best 612 */ 613 fp->saddr = sctp_get_valid_addr(sctp, B_FALSE); 614 } 615 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, sctp->sctp_ipha->ipha_src); 616 /* update don't fragment bit */ 617 if (fp->df) { 618 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 619 htons(IPH_DF); 620 } else { 621 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0; 622 } 623 } else { 624 sctp->sctp_ip6h->ip6_dst = fp->faddr; 625 /* Must not allow unspec src addr if not bound to all */ 626 if (IN6_IS_ADDR_UNSPECIFIED(&fp->saddr) && 627 !sctp->sctp_bound_to_all) { 628 /* 629 * set the src to the first v6 saddr and hope 630 * for the best 631 */ 632 fp->saddr = sctp_get_valid_addr(sctp, B_TRUE); 633 } 634 sctp->sctp_ip6h->ip6_src = fp->saddr; 635 } 636 } 637 638 void 639 sctp_redo_faddr_srcs(sctp_t *sctp) 640 { 641 sctp_faddr_t *fp; 642 643 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 644 sctp_ire2faddr(sctp, fp); 645 } 646 647 sctp_faddr2hdraddr(sctp->sctp_current, sctp); 648 } 649 650 void 651 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) 652 { 653 int64_t now = lbolt64; 654 655 fp->strikes = 0; 656 sctp->sctp_strikes = 0; 657 fp->lastactive = now; 658 fp->hb_expiry = now + SET_HB_INTVL(fp); 659 fp->hb_pending = B_FALSE; 660 if (fp->state != SCTP_FADDRS_ALIVE) { 661 fp->state = SCTP_FADDRS_ALIVE; 662 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0); 663 664 /* If this is the primary, switch back to it now */ 665 if (fp == sctp->sctp_primary) { 666 sctp->sctp_current = fp; 667 sctp->sctp_mss = fp->sfa_pmss; 668 /* Reset the addrs in the composite header */ 669 sctp_faddr2hdraddr(fp, sctp); 670 if (!SCTP_IS_DETACHED(sctp)) { 671 sctp_set_ulp_prop(sctp); 672 } 673 } 674 } 675 if (fp->ire == NULL) { 676 /* Should have a full IRE now */ 677 sctp_ire2faddr(sctp, fp); 678 } 679 } 680 681 int 682 sctp_is_a_faddr_clean(sctp_t *sctp) 683 { 684 sctp_faddr_t *fp; 685 686 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 687 if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) { 688 return (1); 689 } 690 } 691 692 return (0); 693 } 694 695 /* 696 * Returns 0 if there is at leave one other active faddr, -1 if there 697 * are none. If there are none left, faddr_dead() will start killing the 698 * association. 699 * If the downed faddr was the current faddr, a new current faddr 700 * will be chosen. 701 */ 702 int 703 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) 704 { 705 sctp_faddr_t *ofp; 706 707 if (fp->state == SCTP_FADDRS_ALIVE) { 708 sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_UNREACHABLE, 0); 709 } 710 fp->state = newstate; 711 712 dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n", 713 SCTP_PRINTADDR(fp->faddr), newstate)); 714 715 if (fp == sctp->sctp_current) { 716 /* Current faddr down; need to switch it */ 717 sctp->sctp_current = NULL; 718 } 719 720 /* Find next alive faddr */ 721 ofp = fp; 722 for (fp = fp->next; fp; fp = fp->next) { 723 if (fp->state == SCTP_FADDRS_ALIVE) { 724 break; 725 } 726 } 727 728 if (fp == NULL) { 729 /* Continue from beginning of list */ 730 for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->next) { 731 if (fp->state == SCTP_FADDRS_ALIVE) { 732 break; 733 } 734 } 735 } 736 737 if (fp != ofp) { 738 if (sctp->sctp_current == NULL) { 739 dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n", 740 SCTP_PRINTADDR(fp->faddr))); 741 sctp->sctp_current = fp; 742 sctp->sctp_mss = fp->sfa_pmss; 743 744 /* Reset the addrs in the composite header */ 745 sctp_faddr2hdraddr(fp, sctp); 746 747 if (!SCTP_IS_DETACHED(sctp)) { 748 sctp_set_ulp_prop(sctp); 749 } 750 } 751 return (0); 752 } 753 754 755 /* All faddrs are down; kill the association */ 756 dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); 757 BUMP_MIB(&sctp_mib, sctpAborted); 758 sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? 759 SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); 760 sctp_clean_death(sctp, sctp->sctp_client_errno ? 761 sctp->sctp_client_errno : ETIMEDOUT); 762 763 return (-1); 764 } 765 766 sctp_faddr_t * 767 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp) 768 { 769 sctp_faddr_t *nfp = NULL; 770 771 if (ofp == NULL) { 772 ofp = sctp->sctp_current; 773 } 774 775 /* Find the next live one */ 776 for (nfp = ofp->next; nfp != NULL; nfp = nfp->next) { 777 if (nfp->state == SCTP_FADDRS_ALIVE) { 778 break; 779 } 780 } 781 782 if (nfp == NULL) { 783 /* Continue from beginning of list */ 784 for (nfp = sctp->sctp_faddrs; nfp != ofp; nfp = nfp->next) { 785 if (nfp->state == SCTP_FADDRS_ALIVE) { 786 break; 787 } 788 } 789 } 790 791 /* 792 * nfp could only be NULL if all faddrs are down, and when 793 * this happens, faddr_dead() should have killed the 794 * association. Hence this assertion... 795 */ 796 ASSERT(nfp != NULL); 797 return (nfp); 798 } 799 800 void 801 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp) 802 { 803 sctp_faddr_t *fpp; 804 805 if (!sctp->sctp_faddrs) { 806 return; 807 } 808 809 if (fp->timer_mp != NULL) { 810 sctp_timer_free(fp->timer_mp); 811 fp->timer_mp = NULL; 812 fp->timer_running = 0; 813 } 814 if (fp->rc_timer_mp != NULL) { 815 sctp_timer_free(fp->rc_timer_mp); 816 fp->rc_timer_mp = NULL; 817 fp->rc_timer_running = 0; 818 } 819 if (fp->ire != NULL) { 820 IRE_REFRELE_NOTR(fp->ire); 821 fp->ire = NULL; 822 } 823 824 if (fp == sctp->sctp_faddrs) { 825 goto gotit; 826 } 827 828 for (fpp = sctp->sctp_faddrs; fpp->next != fp; fpp = fpp->next) 829 ; 830 831 gotit: 832 ASSERT(sctp->sctp_conn_tfp != NULL); 833 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 834 if (fp == sctp->sctp_faddrs) { 835 sctp->sctp_faddrs = fp->next; 836 } else { 837 fpp->next = fp->next; 838 } 839 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 840 /* XXX faddr2ire? */ 841 kmem_cache_free(sctp_kmem_faddr_cache, fp); 842 sctp->sctp_nfaddrs--; 843 } 844 845 void 846 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock) 847 { 848 sctp_faddr_t *fp, *fpn; 849 850 if (sctp->sctp_faddrs == NULL) { 851 ASSERT(sctp->sctp_lastfaddr == NULL); 852 return; 853 } 854 855 ASSERT(sctp->sctp_lastfaddr != NULL); 856 sctp->sctp_lastfaddr = NULL; 857 sctp->sctp_current = NULL; 858 sctp->sctp_primary = NULL; 859 860 sctp_free_faddr_timers(sctp); 861 862 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 863 /* in conn fanout; need to hold lock */ 864 mutex_enter(&sctp->sctp_conn_tfp->tf_lock); 865 } 866 867 for (fp = sctp->sctp_faddrs; fp; fp = fpn) { 868 fpn = fp->next; 869 if (fp->ire != NULL) 870 IRE_REFRELE_NOTR(fp->ire); 871 kmem_cache_free(sctp_kmem_faddr_cache, fp); 872 sctp->sctp_nfaddrs--; 873 } 874 875 sctp->sctp_faddrs = NULL; 876 ASSERT(sctp->sctp_nfaddrs == 0); 877 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { 878 mutex_exit(&sctp->sctp_conn_tfp->tf_lock); 879 } 880 881 } 882 883 void 884 sctp_zap_addrs(sctp_t *sctp) 885 { 886 sctp_zap_faddrs(sctp, 0); 887 sctp_free_saddrs(sctp); 888 } 889 890 /* 891 * Initialize the IPv4 header. Loses any record of any IP options. 892 */ 893 int 894 sctp_header_init_ipv4(sctp_t *sctp, int sleep) 895 { 896 sctp_hdr_t *sctph; 897 898 /* 899 * This is a simple initialization. If there's 900 * already a template, it should never be too small, 901 * so reuse it. Otherwise, allocate space for the new one. 902 */ 903 if (sctp->sctp_iphc != NULL) { 904 ASSERT(sctp->sctp_iphc_len >= SCTP_MAX_COMBINED_HEADER_LENGTH); 905 bzero(sctp->sctp_iphc, sctp->sctp_iphc_len); 906 } else { 907 sctp->sctp_iphc_len = SCTP_MAX_COMBINED_HEADER_LENGTH; 908 sctp->sctp_iphc = kmem_zalloc(sctp->sctp_iphc_len, sleep); 909 if (sctp->sctp_iphc == NULL) { 910 sctp->sctp_iphc_len = 0; 911 return (ENOMEM); 912 } 913 } 914 915 sctp->sctp_ipha = (ipha_t *)sctp->sctp_iphc; 916 917 sctp->sctp_hdr_len = sizeof (ipha_t) + sizeof (sctp_hdr_t); 918 sctp->sctp_ip_hdr_len = sizeof (ipha_t); 919 sctp->sctp_ipha->ipha_length = htons(sizeof (ipha_t) + 920 sizeof (sctp_hdr_t)); 921 sctp->sctp_ipha->ipha_version_and_hdr_length 922 = (IP_VERSION << 4) | IP_SIMPLE_HDR_LENGTH_IN_WORDS; 923 924 /* 925 * These two fields should be zero, and are already set above. 926 * 927 * sctp->sctp_ipha->ipha_ident, 928 * sctp->sctp_ipha->ipha_fragment_offset_and_flags. 929 */ 930 931 sctp->sctp_ipha->ipha_ttl = sctp_ipv4_ttl; 932 sctp->sctp_ipha->ipha_protocol = IPPROTO_SCTP; 933 934 sctph = (sctp_hdr_t *)(sctp->sctp_iphc + sizeof (ipha_t)); 935 sctp->sctp_sctph = sctph; 936 937 return (0); 938 } 939 940 /* 941 * Update sctp_sticky_hdrs based on sctp_sticky_ipp. 942 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 943 * headers, and the maximum size sctp header (to avoid reallocation 944 * on the fly for additional sctp options). 945 * Returns failure if can't allocate memory. 946 */ 947 int 948 sctp_build_hdrs(sctp_t *sctp) 949 { 950 char *hdrs; 951 uint_t hdrs_len; 952 ip6i_t *ip6i; 953 char buf[SCTP_MAX_HDR_LENGTH]; 954 ip6_pkt_t *ipp = &sctp->sctp_sticky_ipp; 955 in6_addr_t src; 956 in6_addr_t dst; 957 958 /* 959 * save the existing sctp header and source/dest IP addresses 960 */ 961 bcopy(sctp->sctp_sctph6, buf, sizeof (sctp_hdr_t)); 962 src = sctp->sctp_ip6h->ip6_src; 963 dst = sctp->sctp_ip6h->ip6_dst; 964 hdrs_len = ip_total_hdrs_len_v6(ipp) + SCTP_MAX_HDR_LENGTH; 965 ASSERT(hdrs_len != 0); 966 if (hdrs_len > sctp->sctp_iphc6_len) { 967 /* Need to reallocate */ 968 hdrs = kmem_zalloc(hdrs_len, KM_NOSLEEP); 969 if (hdrs == NULL) 970 return (ENOMEM); 971 972 if (sctp->sctp_iphc6_len != 0) 973 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 974 sctp->sctp_iphc6 = hdrs; 975 sctp->sctp_iphc6_len = hdrs_len; 976 } 977 ip_build_hdrs_v6((uchar_t *)sctp->sctp_iphc6, 978 hdrs_len - SCTP_MAX_HDR_LENGTH, ipp, IPPROTO_SCTP); 979 980 /* Set header fields not in ipp */ 981 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 982 ip6i = (ip6i_t *)sctp->sctp_iphc6; 983 sctp->sctp_ip6h = (ip6_t *)&ip6i[1]; 984 } else { 985 sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6; 986 } 987 /* 988 * sctp->sctp_ip_hdr_len will include ip6i_t if there is one. 989 */ 990 sctp->sctp_ip_hdr6_len = hdrs_len - SCTP_MAX_HDR_LENGTH; 991 sctp->sctp_sctph6 = (sctp_hdr_t *)(sctp->sctp_iphc6 + 992 sctp->sctp_ip_hdr6_len); 993 sctp->sctp_hdr6_len = sctp->sctp_ip_hdr6_len + sizeof (sctp_hdr_t); 994 995 bcopy(buf, sctp->sctp_sctph6, sizeof (sctp_hdr_t)); 996 997 sctp->sctp_ip6h->ip6_src = src; 998 sctp->sctp_ip6h->ip6_dst = dst; 999 /* 1000 * If the hoplimit was not set by ip_build_hdrs_v6(), we need to 1001 * set it to the default value for SCTP. 1002 */ 1003 if (!(ipp->ipp_fields & IPPF_UNICAST_HOPS)) 1004 sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; 1005 /* 1006 * If we're setting extension headers after a connection 1007 * has been established, and if we have a routing header 1008 * among the extension headers, call ip_massage_options_v6 to 1009 * manipulate the routing header/ip6_dst set the checksum 1010 * difference in the sctp header template. 1011 * (This happens in sctp_connect_ipv6 if the routing header 1012 * is set prior to the connect.) 1013 */ 1014 1015 if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) && 1016 (sctp->sctp_sticky_ipp.ipp_fields & IPPF_RTHDR)) { 1017 ip6_rthdr_t *rth; 1018 1019 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 1020 (uint8_t *)sctp->sctp_sctph6); 1021 if (rth != NULL) 1022 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); 1023 } 1024 return (0); 1025 } 1026 1027 /* 1028 * Initialize the IPv6 header. Loses any record of any IPv6 extension headers. 1029 */ 1030 int 1031 sctp_header_init_ipv6(sctp_t *sctp, int sleep) 1032 { 1033 sctp_hdr_t *sctph; 1034 1035 /* 1036 * This is a simple initialization. If there's 1037 * already a template, it should never be too small, 1038 * so reuse it. Otherwise, allocate space for the new one. 1039 * Ensure that there is enough space to "downgrade" the sctp_t 1040 * to an IPv4 sctp_t. This requires having space for a full load 1041 * of IPv4 options 1042 */ 1043 if (sctp->sctp_iphc6 != NULL) { 1044 ASSERT(sctp->sctp_iphc6_len >= 1045 SCTP_MAX_COMBINED_HEADER_LENGTH); 1046 bzero(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 1047 } else { 1048 sctp->sctp_iphc6_len = SCTP_MAX_COMBINED_HEADER_LENGTH; 1049 sctp->sctp_iphc6 = kmem_zalloc(sctp->sctp_iphc_len, sleep); 1050 if (sctp->sctp_iphc6 == NULL) { 1051 sctp->sctp_iphc6_len = 0; 1052 return (ENOMEM); 1053 } 1054 } 1055 sctp->sctp_hdr6_len = IPV6_HDR_LEN + sizeof (sctp_hdr_t); 1056 sctp->sctp_ip_hdr6_len = IPV6_HDR_LEN; 1057 sctp->sctp_ip6h = (ip6_t *)sctp->sctp_iphc6; 1058 1059 /* Initialize the header template */ 1060 1061 sctp->sctp_ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1062 sctp->sctp_ip6h->ip6_plen = ntohs(sizeof (sctp_hdr_t)); 1063 sctp->sctp_ip6h->ip6_nxt = IPPROTO_SCTP; 1064 sctp->sctp_ip6h->ip6_hops = sctp_ipv6_hoplimit; 1065 1066 sctph = (sctp_hdr_t *)(sctp->sctp_iphc6 + IPV6_HDR_LEN); 1067 sctp->sctp_sctph6 = sctph; 1068 1069 return (0); 1070 } 1071 1072 static int 1073 sctp_v4_label(sctp_t *sctp) 1074 { 1075 uchar_t optbuf[IP_MAX_OPT_LENGTH]; 1076 const cred_t *cr = CONN_CRED(sctp->sctp_connp); 1077 int added; 1078 1079 if (tsol_compute_label(cr, sctp->sctp_ipha->ipha_dst, optbuf, 1080 sctp->sctp_mac_exempt) != 0) 1081 return (EACCES); 1082 1083 added = tsol_remove_secopt(sctp->sctp_ipha, sctp->sctp_hdr_len); 1084 if (added == -1) 1085 return (EACCES); 1086 sctp->sctp_hdr_len += added; 1087 sctp->sctp_sctph = (sctp_hdr_t *)((uchar_t *)sctp->sctp_sctph + added); 1088 sctp->sctp_ip_hdr_len += added; 1089 if ((sctp->sctp_v4label_len = optbuf[IPOPT_OLEN]) != 0) { 1090 sctp->sctp_v4label_len = (sctp->sctp_v4label_len + 3) & ~3; 1091 added = tsol_prepend_option(optbuf, sctp->sctp_ipha, 1092 sctp->sctp_hdr_len); 1093 if (added == -1) 1094 return (EACCES); 1095 sctp->sctp_hdr_len += added; 1096 sctp->sctp_sctph = (sctp_hdr_t *)((uchar_t *)sctp->sctp_sctph + 1097 added); 1098 sctp->sctp_ip_hdr_len += added; 1099 } 1100 return (0); 1101 } 1102 1103 static int 1104 sctp_v6_label(sctp_t *sctp) 1105 { 1106 uchar_t optbuf[TSOL_MAX_IPV6_OPTION]; 1107 const cred_t *cr = CONN_CRED(sctp->sctp_connp); 1108 1109 if (tsol_compute_label_v6(cr, &sctp->sctp_ip6h->ip6_dst, optbuf, 1110 sctp->sctp_mac_exempt) != 0) 1111 return (EACCES); 1112 if (tsol_update_sticky(&sctp->sctp_sticky_ipp, &sctp->sctp_v6label_len, 1113 optbuf) != 0) 1114 return (EACCES); 1115 if (sctp_build_hdrs(sctp) != 0) 1116 return (EACCES); 1117 return (0); 1118 } 1119 1120 /* 1121 * XXX implement more sophisticated logic 1122 */ 1123 /* ARGSUSED */ 1124 int 1125 sctp_set_hdraddrs(sctp_t *sctp, cred_t *cr) 1126 { 1127 sctp_faddr_t *fp; 1128 int gotv4 = 0; 1129 int gotv6 = 0; 1130 1131 ASSERT(sctp->sctp_faddrs != NULL); 1132 ASSERT(sctp->sctp_nsaddrs > 0); 1133 1134 /* Set up using the primary first */ 1135 if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->faddr)) { 1136 IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->faddr, 1137 sctp->sctp_ipha->ipha_dst); 1138 /* saddr may be unspec; make_mp() will handle this */ 1139 IN6_V4MAPPED_TO_IPADDR(&sctp->sctp_primary->saddr, 1140 sctp->sctp_ipha->ipha_src); 1141 if (!is_system_labeled() || sctp_v4_label(sctp) == 0) { 1142 gotv4 = 1; 1143 if (sctp->sctp_ipversion == IPV4_VERSION) { 1144 goto copyports; 1145 } 1146 } 1147 } else { 1148 sctp->sctp_ip6h->ip6_dst = sctp->sctp_primary->faddr; 1149 /* saddr may be unspec; make_mp() will handle this */ 1150 sctp->sctp_ip6h->ip6_src = sctp->sctp_primary->saddr; 1151 if (!is_system_labeled() || sctp_v6_label(sctp) == 0) 1152 gotv6 = 1; 1153 } 1154 1155 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1156 if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1157 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, 1158 sctp->sctp_ipha->ipha_dst); 1159 /* copy in the faddr_t's saddr */ 1160 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 1161 sctp->sctp_ipha->ipha_src); 1162 if (!is_system_labeled() || sctp_v4_label(sctp) == 0) { 1163 gotv4 = 1; 1164 if (sctp->sctp_ipversion == IPV4_VERSION || 1165 gotv6) { 1166 break; 1167 } 1168 } 1169 } else if (!gotv6) { 1170 sctp->sctp_ip6h->ip6_dst = fp->faddr; 1171 /* copy in the faddr_t's saddr */ 1172 sctp->sctp_ip6h->ip6_src = fp->saddr; 1173 if (!is_system_labeled() || sctp_v6_label(sctp) == 0) { 1174 gotv6 = 1; 1175 if (gotv4) 1176 break; 1177 } 1178 } 1179 } 1180 1181 copyports: 1182 if (!gotv4 && !gotv6) 1183 return (EACCES); 1184 1185 /* copy in the ports for good measure */ 1186 sctp->sctp_sctph->sh_sport = sctp->sctp_lport; 1187 sctp->sctp_sctph->sh_dport = sctp->sctp_fport; 1188 1189 sctp->sctp_sctph6->sh_sport = sctp->sctp_lport; 1190 sctp->sctp_sctph6->sh_dport = sctp->sctp_fport; 1191 return (0); 1192 } 1193 1194 void 1195 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp) 1196 { 1197 mblk_t *mp; 1198 sctp_parm_hdr_t *ph; 1199 size_t len; 1200 int pad; 1201 1202 len = sizeof (*ph) + ntohs(uph->sph_len); 1203 if ((pad = len % 4) != 0) { 1204 pad = 4 - pad; 1205 len += pad; 1206 } 1207 mp = allocb(len, BPRI_MED); 1208 if (mp == NULL) { 1209 return; 1210 } 1211 1212 ph = (sctp_parm_hdr_t *)(mp->b_rptr); 1213 ph->sph_type = htons(PARM_UNRECOGNIZED); 1214 ph->sph_len = htons(len - pad); 1215 1216 /* copy in the unrecognized parameter */ 1217 bcopy(uph, ph + 1, ntohs(uph->sph_len)); 1218 1219 mp->b_wptr = mp->b_rptr + len; 1220 if (*errmp != NULL) { 1221 linkb(*errmp, mp); 1222 } else { 1223 *errmp = mp; 1224 } 1225 } 1226 1227 /* 1228 * o Bounds checking 1229 * o Updates remaining 1230 * o Checks alignment 1231 */ 1232 sctp_parm_hdr_t * 1233 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining) 1234 { 1235 int pad; 1236 uint16_t len; 1237 1238 len = ntohs(current->sph_len); 1239 *remaining -= len; 1240 if (*remaining < sizeof (*current) || len < sizeof (*current)) { 1241 return (NULL); 1242 } 1243 if ((pad = len & (SCTP_ALIGN - 1)) != 0) { 1244 pad = SCTP_ALIGN - pad; 1245 *remaining -= pad; 1246 } 1247 /*LINTED pointer cast may result in improper alignment*/ 1248 current = (sctp_parm_hdr_t *)((char *)current + len + pad); 1249 return (current); 1250 } 1251 1252 /* 1253 * Sets the address parameters given in the INIT chunk into sctp's 1254 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are 1255 * no address parameters in the INIT chunk, a single faddr is created 1256 * from the ip hdr at the beginning of pkt. 1257 * If there already are existing addresses hanging from sctp, merge 1258 * them in, if the old info contains addresses which are not present 1259 * in this new info, get rid of them, and clean the pointers if there's 1260 * messages which have this as their target address. 1261 * 1262 * We also re-adjust the source address list here since the list may 1263 * contain more than what is actually part of the association. If 1264 * we get here from sctp_send_cookie_echo(), we are on the active 1265 * side and psctp will be NULL and ich will be the INIT-ACK chunk. 1266 * If we get here from sctp_accept_comm(), ich will be the INIT chunk 1267 * and psctp will the listening endpoint. 1268 * 1269 * INIT processing: When processing the INIT we inherit the src address 1270 * list from the listener. For a loopback or linklocal association, we 1271 * delete the list and just take the address from the IP header (since 1272 * that's how we created the INIT-ACK). Additionally, for loopback we 1273 * ignore the address params in the INIT. For determining which address 1274 * types were sent in the INIT-ACK we follow the same logic as in 1275 * creating the INIT-ACK. We delete addresses of the type that are not 1276 * supported by the peer. 1277 * 1278 * INIT-ACK processing: When processing the INIT-ACK since we had not 1279 * included addr params for loopback or linklocal addresses when creating 1280 * the INIT, we just use the address from the IP header. Further, for 1281 * loopback we ignore the addr param list. We mark addresses of the 1282 * type not supported by the peer as unconfirmed. 1283 * 1284 * In case of INIT processing we look for supported address types in the 1285 * supported address param, if present. In both cases the address type in 1286 * the IP header is supported as well as types for addresses in the param 1287 * list, if any. 1288 * 1289 * Once we have the supported address types sctp_check_saddr() runs through 1290 * the source address list and deletes or marks as unconfirmed address of 1291 * types not supported by the peer. 1292 * 1293 * Returns 0 on success, sys errno on failure 1294 */ 1295 int 1296 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, 1297 sctp_chunk_hdr_t *ich, uint_t *sctp_options) 1298 { 1299 sctp_init_chunk_t *init; 1300 ipha_t *iph; 1301 ip6_t *ip6h; 1302 in6_addr_t hdrsaddr[1]; 1303 in6_addr_t hdrdaddr[1]; 1304 sctp_parm_hdr_t *ph; 1305 ssize_t remaining; 1306 int isv4; 1307 int err; 1308 sctp_faddr_t *fp; 1309 int supp_af = 0; 1310 boolean_t check_saddr = B_TRUE; 1311 in6_addr_t curaddr; 1312 1313 if (sctp_options != NULL) 1314 *sctp_options = 0; 1315 1316 /* extract the address from the IP header */ 1317 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1318 if (isv4) { 1319 iph = (ipha_t *)pkt->b_rptr; 1320 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr); 1321 IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr); 1322 supp_af |= PARM_SUPP_V4; 1323 } else { 1324 ip6h = (ip6_t *)pkt->b_rptr; 1325 hdrsaddr[0] = ip6h->ip6_src; 1326 hdrdaddr[0] = ip6h->ip6_dst; 1327 supp_af |= PARM_SUPP_V6; 1328 } 1329 1330 /* 1331 * Unfortunately, we can't delay this because adding an faddr 1332 * looks for the presence of the source address (from the ire 1333 * for the faddr) in the source address list. We could have 1334 * delayed this if, say, this was a loopback/linklocal connection. 1335 * Now, we just end up nuking this list and taking the addr from 1336 * the IP header for loopback/linklocal. 1337 */ 1338 if (psctp != NULL && psctp->sctp_nsaddrs > 0) { 1339 ASSERT(sctp->sctp_nsaddrs == 0); 1340 1341 err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP); 1342 if (err != 0) 1343 return (err); 1344 } 1345 /* 1346 * We will add the faddr before parsing the address list as this 1347 * might be a loopback connection and we would not have to 1348 * go through the list. 1349 * 1350 * Make sure the header's addr is in the list 1351 */ 1352 fp = sctp_lookup_faddr(sctp, hdrsaddr); 1353 if (fp == NULL) { 1354 /* not included; add it now */ 1355 err = sctp_add_faddr_first(sctp, hdrsaddr, KM_NOSLEEP); 1356 if (err != 0) 1357 return (err); 1358 1359 /* sctp_faddrs will be the hdr addr */ 1360 fp = sctp->sctp_faddrs; 1361 } 1362 /* make the header addr the primary */ 1363 1364 if (cl_sctp_assoc_change != NULL && psctp == NULL) 1365 curaddr = sctp->sctp_current->faddr; 1366 1367 sctp->sctp_primary = fp; 1368 sctp->sctp_current = fp; 1369 sctp->sctp_mss = fp->sfa_pmss; 1370 1371 /* For loopback connections & linklocal get address from the header */ 1372 if (sctp->sctp_loopback || sctp->sctp_linklocal) { 1373 if (sctp->sctp_nsaddrs != 0) 1374 sctp_free_saddrs(sctp); 1375 if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0) 1376 return (err); 1377 /* For loopback ignore address list */ 1378 if (sctp->sctp_loopback) 1379 return (0); 1380 check_saddr = B_FALSE; 1381 } 1382 1383 /* Walk the params in the INIT [ACK], pulling out addr params */ 1384 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1385 sizeof (sctp_init_chunk_t); 1386 if (remaining < sizeof (*ph)) { 1387 if (check_saddr) { 1388 sctp_check_saddr(sctp, supp_af, psctp == NULL ? 1389 B_FALSE : B_TRUE); 1390 } 1391 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1392 return (0); 1393 } 1394 1395 init = (sctp_init_chunk_t *)(ich + 1); 1396 ph = (sctp_parm_hdr_t *)(init + 1); 1397 1398 /* params will have already been byteordered when validating */ 1399 while (ph != NULL) { 1400 if (ph->sph_type == htons(PARM_SUPP_ADDRS)) { 1401 int plen; 1402 uint16_t *p; 1403 uint16_t addrtype; 1404 1405 ASSERT(psctp != NULL); 1406 plen = ntohs(ph->sph_len); 1407 p = (uint16_t *)(ph + 1); 1408 while (plen > 0) { 1409 addrtype = ntohs(*p); 1410 switch (addrtype) { 1411 case PARM_ADDR6: 1412 supp_af |= PARM_SUPP_V6; 1413 break; 1414 case PARM_ADDR4: 1415 supp_af |= PARM_SUPP_V4; 1416 break; 1417 default: 1418 break; 1419 } 1420 p++; 1421 plen -= sizeof (*p); 1422 } 1423 } else if (ph->sph_type == htons(PARM_ADDR4)) { 1424 if (remaining >= PARM_ADDR4_LEN) { 1425 in6_addr_t addr; 1426 ipaddr_t ta; 1427 1428 supp_af |= PARM_SUPP_V4; 1429 /* 1430 * Screen out broad/multicasts & loopback. 1431 * If the endpoint only accepts v6 address, 1432 * go to the next one. 1433 */ 1434 bcopy(ph + 1, &ta, sizeof (ta)); 1435 if (ta == 0 || 1436 ta == INADDR_BROADCAST || 1437 ta == htonl(INADDR_LOOPBACK) || 1438 IN_MULTICAST(ta) || 1439 sctp->sctp_connp->conn_ipv6_v6only) { 1440 goto next; 1441 } 1442 /* 1443 * XXX also need to check for subnet 1444 * broadcasts. This should probably 1445 * wait until we have full access 1446 * to the ILL tables. 1447 */ 1448 1449 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1450 (ph + 1), &addr); 1451 /* Check for duplicate. */ 1452 if (sctp_lookup_faddr(sctp, &addr) != NULL) 1453 goto next; 1454 1455 /* OK, add it to the faddr set */ 1456 err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP); 1457 if (err != 0) 1458 return (err); 1459 } 1460 } else if (ph->sph_type == htons(PARM_ADDR6) && 1461 sctp->sctp_family == AF_INET6) { 1462 /* An v4 socket should not take v6 addresses. */ 1463 if (remaining >= PARM_ADDR6_LEN) { 1464 in6_addr_t *addr6; 1465 1466 supp_af |= PARM_SUPP_V6; 1467 addr6 = (in6_addr_t *)(ph + 1); 1468 /* 1469 * Screen out link locals, mcast, loopback 1470 * and bogus v6 address. 1471 */ 1472 if (IN6_IS_ADDR_LINKLOCAL(addr6) || 1473 IN6_IS_ADDR_MULTICAST(addr6) || 1474 IN6_IS_ADDR_LOOPBACK(addr6) || 1475 IN6_IS_ADDR_V4MAPPED(addr6)) { 1476 goto next; 1477 } 1478 /* Check for duplicate. */ 1479 if (sctp_lookup_faddr(sctp, addr6) != NULL) 1480 goto next; 1481 1482 err = sctp_add_faddr(sctp, 1483 (in6_addr_t *)(ph + 1), KM_NOSLEEP); 1484 if (err != 0) 1485 return (err); 1486 } 1487 } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) { 1488 if (sctp_options != NULL) 1489 *sctp_options |= SCTP_PRSCTP_OPTION; 1490 } /* else; skip */ 1491 1492 next: 1493 ph = sctp_next_parm(ph, &remaining); 1494 } 1495 if (check_saddr) { 1496 sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE : 1497 B_TRUE); 1498 } 1499 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); 1500 /* 1501 * We have the right address list now, update clustering's 1502 * knowledge because when we sent the INIT we had just added 1503 * the address the INIT was sent to. 1504 */ 1505 if (psctp == NULL && cl_sctp_assoc_change != NULL) { 1506 uchar_t *alist; 1507 size_t asize; 1508 uchar_t *dlist; 1509 size_t dsize; 1510 1511 asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 1512 alist = kmem_alloc(asize, KM_NOSLEEP); 1513 if (alist == NULL) 1514 return (ENOMEM); 1515 /* 1516 * Just include the address the INIT was sent to in the 1517 * delete list and send the entire faddr list. We could 1518 * do it differently (i.e include all the addresses in the 1519 * add list even if it contains the original address OR 1520 * remove the original address from the add list etc.), but 1521 * this seems reasonable enough. 1522 */ 1523 dsize = sizeof (in6_addr_t); 1524 dlist = kmem_alloc(dsize, KM_NOSLEEP); 1525 if (dlist == NULL) { 1526 kmem_free(alist, asize); 1527 return (ENOMEM); 1528 } 1529 bcopy(&curaddr, dlist, sizeof (curaddr)); 1530 sctp_get_faddr_list(sctp, alist, asize); 1531 (*cl_sctp_assoc_change)(sctp->sctp_family, alist, asize, 1532 sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR, 1533 (cl_sctp_handle_t)sctp); 1534 /* alist and dlist will be freed by the clustering module */ 1535 } 1536 return (0); 1537 } 1538 1539 /* 1540 * Returns 0 if the check failed and the restart should be refused, 1541 * 1 if the check succeeded. 1542 */ 1543 int 1544 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, 1545 int sleep) 1546 { 1547 sctp_faddr_t *fp, *fpa, *fphead = NULL; 1548 sctp_parm_hdr_t *ph; 1549 ssize_t remaining; 1550 int isv4; 1551 ipha_t *iph; 1552 ip6_t *ip6h; 1553 in6_addr_t hdraddr[1]; 1554 int retval = 0; 1555 sctp_tf_t *tf; 1556 sctp_t *sctp; 1557 int compres; 1558 sctp_init_chunk_t *init; 1559 int nadded = 0; 1560 1561 /* extract the address from the IP header */ 1562 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); 1563 if (isv4) { 1564 iph = (ipha_t *)pkt->b_rptr; 1565 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr); 1566 } else { 1567 ip6h = (ip6_t *)pkt->b_rptr; 1568 hdraddr[0] = ip6h->ip6_src; 1569 } 1570 1571 /* Walk the params in the INIT [ACK], pulling out addr params */ 1572 remaining = ntohs(ich->sch_len) - sizeof (*ich) - 1573 sizeof (sctp_init_chunk_t); 1574 if (remaining < sizeof (*ph)) { 1575 /* no parameters; restart OK */ 1576 return (1); 1577 } 1578 init = (sctp_init_chunk_t *)(ich + 1); 1579 ph = (sctp_parm_hdr_t *)(init + 1); 1580 1581 while (ph != NULL) { 1582 /* params will have already been byteordered when validating */ 1583 if (ph->sph_type == htons(PARM_ADDR4)) { 1584 if (remaining >= PARM_ADDR4_LEN) { 1585 in6_addr_t addr; 1586 IN6_INADDR_TO_V4MAPPED((struct in_addr *) 1587 (ph + 1), &addr); 1588 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1589 sleep); 1590 if (!fpa) { 1591 goto done; 1592 } 1593 bzero(fpa, sizeof (*fpa)); 1594 fpa->faddr = addr; 1595 fpa->next = NULL; 1596 } 1597 } else if (ph->sph_type == htons(PARM_ADDR6)) { 1598 if (remaining >= PARM_ADDR6_LEN) { 1599 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, 1600 sleep); 1601 if (!fpa) { 1602 goto done; 1603 } 1604 bzero(fpa, sizeof (*fpa)); 1605 bcopy(ph + 1, &fpa->faddr, 1606 sizeof (fpa->faddr)); 1607 fpa->next = NULL; 1608 } 1609 } else { 1610 /* else not addr param; skip */ 1611 fpa = NULL; 1612 } 1613 /* link in the new addr, if it was an addr param */ 1614 if (fpa) { 1615 if (!fphead) { 1616 fphead = fpa; 1617 fp = fphead; 1618 } else { 1619 fp->next = fpa; 1620 fp = fpa; 1621 } 1622 } 1623 1624 ph = sctp_next_parm(ph, &remaining); 1625 } 1626 1627 if (fphead == NULL) { 1628 /* no addr parameters; restart OK */ 1629 return (1); 1630 } 1631 1632 /* 1633 * got at least one; make sure the header's addr is 1634 * in the list 1635 */ 1636 fp = sctp_lookup_faddr_nosctp(fphead, hdraddr); 1637 if (!fp) { 1638 /* not included; add it now */ 1639 fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); 1640 if (!fp) { 1641 goto done; 1642 } 1643 bzero(fp, sizeof (*fp)); 1644 fp->faddr = *hdraddr; 1645 fp->next = fphead; 1646 fphead = fp; 1647 } 1648 1649 /* 1650 * Now, we can finally do the check: For each sctp instance 1651 * on the hash line for ports, compare its faddr set against 1652 * the new one. If the new one is a strict subset of any 1653 * existing sctp's faddrs, the restart is OK. However, if there 1654 * is an overlap, this could be an attack, so return failure. 1655 * If all sctp's faddrs are disjoint, this is a legitimate new 1656 * association. 1657 */ 1658 tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]); 1659 mutex_enter(&tf->tf_lock); 1660 1661 for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { 1662 if (ports != sctp->sctp_ports) { 1663 continue; 1664 } 1665 compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs); 1666 if (compres <= SCTP_ADDR_SUBSET) { 1667 retval = 1; 1668 mutex_exit(&tf->tf_lock); 1669 goto done; 1670 } 1671 if (compres == SCTP_ADDR_OVERLAP) { 1672 dprint(1, 1673 ("new assoc from %x:%x:%x:%x overlaps with %p\n", 1674 SCTP_PRINTADDR(*hdraddr), (void *)sctp)); 1675 /* 1676 * While we still hold the lock, we need to 1677 * figure out which addresses have been 1678 * added so we can include them in the abort 1679 * we will send back. Since these faddrs will 1680 * never be used, we overload the rto field 1681 * here, setting it to 0 if the address was 1682 * not added, 1 if it was added. 1683 */ 1684 for (fp = fphead; fp; fp = fp->next) { 1685 if (sctp_lookup_faddr(sctp, &fp->faddr)) { 1686 fp->rto = 0; 1687 } else { 1688 fp->rto = 1; 1689 nadded++; 1690 } 1691 } 1692 mutex_exit(&tf->tf_lock); 1693 goto done; 1694 } 1695 } 1696 mutex_exit(&tf->tf_lock); 1697 1698 /* All faddrs are disjoint; legit new association */ 1699 retval = 1; 1700 1701 done: 1702 /* If are attempted adds, send back an abort listing the addrs */ 1703 if (nadded > 0) { 1704 void *dtail; 1705 size_t dlen; 1706 1707 dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP); 1708 if (dtail == NULL) { 1709 goto cleanup; 1710 } 1711 1712 ph = dtail; 1713 dlen = 0; 1714 for (fp = fphead; fp; fp = fp->next) { 1715 if (fp->rto == 0) { 1716 continue; 1717 } 1718 if (IN6_IS_ADDR_V4MAPPED(&fp->faddr)) { 1719 ipaddr_t addr4; 1720 1721 ph->sph_type = htons(PARM_ADDR4); 1722 ph->sph_len = htons(PARM_ADDR4_LEN); 1723 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, addr4); 1724 ph++; 1725 bcopy(&addr4, ph, sizeof (addr4)); 1726 ph = (sctp_parm_hdr_t *) 1727 ((char *)ph + sizeof (addr4)); 1728 dlen += PARM_ADDR4_LEN; 1729 } else { 1730 ph->sph_type = htons(PARM_ADDR6); 1731 ph->sph_len = htons(PARM_ADDR6_LEN); 1732 ph++; 1733 bcopy(&fp->faddr, ph, sizeof (fp->faddr)); 1734 ph = (sctp_parm_hdr_t *) 1735 ((char *)ph + sizeof (fp->faddr)); 1736 dlen += PARM_ADDR6_LEN; 1737 } 1738 } 1739 1740 /* Send off the abort */ 1741 sctp_send_abort(sctp, sctp_init2vtag(ich), 1742 SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE); 1743 1744 kmem_free(dtail, PARM_ADDR6_LEN * nadded); 1745 } 1746 1747 cleanup: 1748 /* Clean up */ 1749 if (fphead) { 1750 sctp_faddr_t *fpn; 1751 for (fp = fphead; fp; fp = fpn) { 1752 fpn = fp->next; 1753 kmem_cache_free(sctp_kmem_faddr_cache, fp); 1754 } 1755 } 1756 1757 return (retval); 1758 } 1759 1760 void 1761 sctp_congest_reset(sctp_t *sctp) 1762 { 1763 sctp_faddr_t *fp; 1764 1765 for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { 1766 fp->ssthresh = sctp_initial_mtu; 1767 fp->cwnd = fp->sfa_pmss * sctp_slow_start_initial; 1768 fp->suna = 0; 1769 fp->pba = 0; 1770 } 1771 } 1772 1773 static void 1774 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr) 1775 { 1776 bcopy(addr, &fp->faddr, sizeof (*addr)); 1777 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1778 fp->isv4 = 1; 1779 /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ 1780 fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr_len) & 1781 ~(SCTP_ALIGN - 1); 1782 } else { 1783 fp->isv4 = 0; 1784 fp->sfa_pmss = (sctp_initial_mtu - sctp->sctp_hdr6_len) & 1785 ~(SCTP_ALIGN - 1); 1786 } 1787 fp->cwnd = sctp_slow_start_initial * fp->sfa_pmss; 1788 fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max); 1789 fp->srtt = -1; 1790 fp->rtt_updates = 0; 1791 fp->strikes = 0; 1792 fp->max_retr = sctp->sctp_pp_max_rxt; 1793 /* Mark it as not confirmed. */ 1794 fp->state = SCTP_FADDRS_UNCONFIRMED; 1795 fp->hb_interval = sctp->sctp_hb_interval; 1796 fp->ssthresh = sctp_initial_ssthresh; 1797 fp->suna = 0; 1798 fp->pba = 0; 1799 fp->acked = 0; 1800 fp->lastactive = lbolt64; 1801 fp->timer_mp = NULL; 1802 fp->hb_pending = B_FALSE; 1803 fp->timer_running = 0; 1804 fp->df = 1; 1805 fp->pmtu_discovered = 0; 1806 fp->rc_timer_mp = NULL; 1807 fp->rc_timer_running = 0; 1808 fp->next = NULL; 1809 fp->ire = NULL; 1810 fp->T3expire = 0; 1811 (void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret, 1812 sizeof (fp->hb_secret)); 1813 fp->hb_expiry = lbolt64; 1814 1815 sctp_ire2faddr(sctp, fp); 1816 } 1817 1818 /*ARGSUSED*/ 1819 static void 1820 faddr_destructor(void *buf, void *cdrarg) 1821 { 1822 sctp_faddr_t *fp = buf; 1823 1824 ASSERT(fp->timer_mp == NULL); 1825 ASSERT(fp->timer_running == 0); 1826 1827 ASSERT(fp->rc_timer_mp == NULL); 1828 ASSERT(fp->rc_timer_running == 0); 1829 } 1830 1831 void 1832 sctp_faddr_init(void) 1833 { 1834 sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache", 1835 sizeof (sctp_faddr_t), 0, NULL, faddr_destructor, 1836 NULL, NULL, NULL, 0); 1837 } 1838 1839 void 1840 sctp_faddr_fini(void) 1841 { 1842 kmem_cache_destroy(sctp_kmem_faddr_cache); 1843 } 1844