/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sctp_impl.h" #include "sctp_addr.h" #include "sctp_asconf.h" static struct kmem_cache *sctp_kmem_faddr_cache; static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *); /* Set the source address. Refer to comments in sctp_get_dest(). */ void sctp_set_saddr(sctp_t *sctp, sctp_faddr_t *fp) { boolean_t v6 = !fp->sf_isv4; boolean_t addr_set; fp->sf_saddr = sctp_get_valid_addr(sctp, v6, &addr_set); /* * If there is no source address avaialble, mark this peer address * as unreachable for now. When the heartbeat timer fires, it will * call sctp_get_dest() to re-check if there is any source address * available. */ if (!addr_set) fp->sf_state = SCTP_FADDRS_UNREACH; } /* * Call this function to get information about a peer addr fp. * * Uses ip_attr_connect to avoid explicit use of ire and source address * selection. */ void sctp_get_dest(sctp_t *sctp, sctp_faddr_t *fp) { in6_addr_t laddr; in6_addr_t nexthop; sctp_saddr_ipif_t *sp; int hdrlen; sctp_stack_t *sctps = sctp->sctp_sctps; conn_t *connp = sctp->sctp_connp; iulp_t uinfo; uint_t pmtu; int error; uint32_t flags = IPDF_VERIFY_DST | IPDF_IPSEC | IPDF_SELECT_SRC | IPDF_UNIQUE_DCE; /* * Tell sctp_make_mp it needs to call us again should we not * complete and set the saddr. */ fp->sf_saddr = ipv6_all_zeros; /* * If this addr is not reachable, mark it as unconfirmed for now, the * state will be changed back to unreachable later in this function * if it is still the case. */ if (fp->sf_state == SCTP_FADDRS_UNREACH) { fp->sf_state = SCTP_FADDRS_UNCONFIRMED; } /* * Socket is connected - enable PMTU discovery. */ if (!sctps->sctps_ignore_path_mtu) fp->sf_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; ip_attr_nexthop(&connp->conn_xmit_ipp, fp->sf_ixa, &fp->sf_faddr, &nexthop); laddr = fp->sf_saddr; error = ip_attr_connect(connp, fp->sf_ixa, &laddr, &fp->sf_faddr, &nexthop, connp->conn_fport, &laddr, &uinfo, flags); if (error != 0) { dprint(3, ("sctp_get_dest: no ire for %x:%x:%x:%x\n", SCTP_PRINTADDR(fp->sf_faddr))); /* * It is tempting to just leave the src addr * unspecified and let IP figure it out, but we * *cannot* do this, since IP may choose a src addr * that is not part of this association... unless * this sctp has bound to all addrs. So if the dest * lookup fails, try to find one in our src addr * list, unless the sctp has bound to all addrs, in * which case we change the src addr to unspec. * * Note that if this is a v6 endpoint but it does * not have any v4 address at this point (e.g. may * have been deleted), sctp_get_valid_addr() will * return mapped INADDR_ANY. In this case, this * address should be marked not reachable so that * it won't be used to send data. */ sctp_set_saddr(sctp, fp); if (fp->sf_state == SCTP_FADDRS_UNREACH) return; goto check_current; } ASSERT(fp->sf_ixa->ixa_ire != NULL); ASSERT(!(fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))); if (!sctp->sctp_loopback) sctp->sctp_loopback = uinfo.iulp_loopback; /* Make sure the laddr is part of this association */ if ((sp = sctp_saddr_lookup(sctp, &laddr, 0)) != NULL && !sp->saddr_ipif_dontsrc) { if (sp->saddr_ipif_unconfirmed == 1) sp->saddr_ipif_unconfirmed = 0; /* We did IPsec policy lookup for laddr already */ fp->sf_saddr = laddr; } else { dprint(2, ("sctp_get_dest: src addr is not part of assoc " "%x:%x:%x:%x\n", SCTP_PRINTADDR(laddr))); /* * Set the src to the first saddr and hope for the best. * Note that this case should very seldomly * happen. One scenario this can happen is an app * explicitly bind() to an address. But that address is * not the preferred source address to send to the peer. */ sctp_set_saddr(sctp, fp); if (fp->sf_state == SCTP_FADDRS_UNREACH) { return; } } /* * Pull out RTO information for this faddr and use it if we don't * have any yet. */ if (fp->sf_srtt == -1 && uinfo.iulp_rtt != 0) { /* The cached value is in ms. */ fp->sf_srtt = MSEC_TO_TICK(uinfo.iulp_rtt); fp->sf_rttvar = MSEC_TO_TICK(uinfo.iulp_rtt_sd); fp->sf_rto = 3 * fp->sf_srtt; /* Bound the RTO by configured min and max values */ if (fp->sf_rto < sctp->sctp_rto_min) { fp->sf_rto = sctp->sctp_rto_min; } if (fp->sf_rto > sctp->sctp_rto_max) { fp->sf_rto = sctp->sctp_rto_max; } SCTP_MAX_RTO(sctp, fp); } pmtu = uinfo.iulp_mtu; /* * Record the MTU for this faddr. If the MTU for this faddr has * changed, check if the assc MTU will also change. */ if (fp->sf_isv4) { hdrlen = sctp->sctp_hdr_len; } else { hdrlen = sctp->sctp_hdr6_len; } if ((fp->sf_pmss + hdrlen) != pmtu) { /* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */ fp->sf_pmss = (pmtu - hdrlen) & ~(SCTP_ALIGN - 1); if (fp->sf_cwnd < (fp->sf_pmss * 2)) { SET_CWND(fp, fp->sf_pmss, sctps->sctps_slow_start_initial); } } check_current: if (fp == sctp->sctp_current) sctp_set_faddr_current(sctp, fp); } void sctp_update_dce(sctp_t *sctp) { sctp_faddr_t *fp; sctp_stack_t *sctps = sctp->sctp_sctps; iulp_t uinfo; ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip; uint_t ifindex; for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) { bzero(&uinfo, sizeof (uinfo)); /* * Only record the PMTU for this faddr if we actually have * done discovery. This prevents initialized default from * clobbering any real info that IP may have. */ if (fp->sf_pmtu_discovered) { if (fp->sf_isv4) { uinfo.iulp_mtu = fp->sf_pmss + sctp->sctp_hdr_len; } else { uinfo.iulp_mtu = fp->sf_pmss + sctp->sctp_hdr6_len; } } if (sctps->sctps_rtt_updates != 0 && fp->sf_rtt_updates >= sctps->sctps_rtt_updates) { /* * dce_update_uinfo() merges these values with the * old values. */ uinfo.iulp_rtt = TICK_TO_MSEC(fp->sf_srtt); uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->sf_rttvar); fp->sf_rtt_updates = 0; } ifindex = 0; if (IN6_IS_ADDR_LINKSCOPE(&fp->sf_faddr)) { /* * If we are going to create a DCE we'd better have * an ifindex */ if (fp->sf_ixa->ixa_nce != NULL) { ifindex = fp->sf_ixa->ixa_nce->nce_common-> ncec_ill->ill_phyint->phyint_ifindex; } else { continue; } } (void) dce_update_uinfo(&fp->sf_faddr, ifindex, &uinfo, ipst); } } /* * The sender must later set the total length in the IP header. */ mblk_t * sctp_make_mp(sctp_t *sctp, sctp_faddr_t *fp, int trailer) { mblk_t *mp; size_t ipsctplen; int isv4; sctp_stack_t *sctps = sctp->sctp_sctps; boolean_t src_changed = B_FALSE; ASSERT(fp != NULL); isv4 = fp->sf_isv4; if (SCTP_IS_ADDR_UNSPEC(isv4, fp->sf_saddr) || (fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { /* Need to pick a source */ sctp_get_dest(sctp, fp); /* * Although we still may not get an IRE, the source address * may be changed in sctp_get_ire(). Set src_changed to * true so that the source address is copied again. */ src_changed = B_TRUE; } /* There is no suitable source address to use, return. */ if (fp->sf_state == SCTP_FADDRS_UNREACH) return (NULL); ASSERT(fp->sf_ixa->ixa_ire != NULL); ASSERT(!SCTP_IS_ADDR_UNSPEC(isv4, fp->sf_saddr)); if (isv4) { ipsctplen = sctp->sctp_hdr_len; } else { ipsctplen = sctp->sctp_hdr6_len; } mp = allocb(ipsctplen + sctps->sctps_wroff_xtra + trailer, BPRI_MED); if (mp == NULL) { ip1dbg(("sctp_make_mp: error making mp..\n")); return (NULL); } mp->b_rptr += sctps->sctps_wroff_xtra; mp->b_wptr = mp->b_rptr + ipsctplen; ASSERT(OK_32PTR(mp->b_wptr)); if (isv4) { ipha_t *iph = (ipha_t *)mp->b_rptr; bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen); if (fp != sctp->sctp_current || src_changed) { /* Fix the source and destination addresses. */ IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, iph->ipha_dst); IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr, iph->ipha_src); } /* set or clear the don't fragment bit */ if (fp->sf_df) { iph->ipha_fragment_offset_and_flags = htons(IPH_DF); } else { iph->ipha_fragment_offset_and_flags = 0; } } else { bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen); if (fp != sctp->sctp_current || src_changed) { /* Fix the source and destination addresses. */ ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->sf_faddr; ((ip6_t *)(mp->b_rptr))->ip6_src = fp->sf_saddr; } } ASSERT(sctp->sctp_connp != NULL); return (mp); } /* * Notify upper layers about preferred write offset, write size. */ void sctp_set_ulp_prop(sctp_t *sctp) { int hdrlen; struct sock_proto_props sopp; sctp_stack_t *sctps = sctp->sctp_sctps; if (sctp->sctp_current->sf_isv4) { hdrlen = sctp->sctp_hdr_len; } else { hdrlen = sctp->sctp_hdr6_len; } ASSERT(sctp->sctp_ulpd); sctp->sctp_connp->conn_wroff = sctps->sctps_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t); ASSERT(sctp->sctp_current->sf_pmss == sctp->sctp_mss); bzero(&sopp, sizeof (sopp)); sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF; sopp.sopp_wroff = sctp->sctp_connp->conn_wroff; sopp.sopp_maxblk = sctp->sctp_mss - sizeof (sctp_data_hdr_t); sctp->sctp_ulp_prop(sctp->sctp_ulpd, &sopp); } /* * Set the lengths in the packet and the transmit attributes. */ void sctp_set_iplen(sctp_t *sctp, mblk_t *mp, ip_xmit_attr_t *ixa) { uint16_t sum = 0; ipha_t *iph; ip6_t *ip6h; mblk_t *pmp = mp; boolean_t isv4; isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); for (; pmp; pmp = pmp->b_cont) sum += pmp->b_wptr - pmp->b_rptr; ixa->ixa_pktlen = sum; if (isv4) { iph = (ipha_t *)mp->b_rptr; iph->ipha_length = htons(sum); ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr_len; } else { ip6h = (ip6_t *)mp->b_rptr; ip6h->ip6_plen = htons(sum - IPV6_HDR_LEN); ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr6_len; } } int sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2) { int na1 = 0; int overlap = 0; int equal = 1; int onematch; sctp_faddr_t *fp1, *fp2; for (fp1 = a1; fp1; fp1 = fp1->sf_next) { onematch = 0; for (fp2 = a2; fp2; fp2 = fp2->sf_next) { if (IN6_ARE_ADDR_EQUAL(&fp1->sf_faddr, &fp2->sf_faddr)) { overlap++; onematch = 1; break; } if (!onematch) { equal = 0; } } na1++; } if (equal) { return (SCTP_ADDR_EQUAL); } if (overlap == na1) { return (SCTP_ADDR_SUBSET); } if (overlap) { return (SCTP_ADDR_OVERLAP); } return (SCTP_ADDR_DISJOINT); } /* * Returns 0 on success, ENOMEM on memory allocation failure, EHOSTUNREACH * if the connection credentials fail remote host accreditation or * if the new destination does not support the previously established * connection security label. If sleep is true, this function should * never fail for a memory allocation failure. The boolean parameter * "first" decides whether the newly created faddr structure should be * added at the beginning of the list or at the end. * * Note: caller must hold conn fanout lock. */ int sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first) { sctp_faddr_t *faddr; mblk_t *timer_mp; int err; conn_t *connp = sctp->sctp_connp; if (is_system_labeled()) { ip_xmit_attr_t *ixa = connp->conn_ixa; ts_label_t *effective_tsl = NULL; ASSERT(ixa->ixa_tsl != NULL); /* * Verify the destination is allowed to receive packets * at the security label of the connection we are initiating. * * tsol_check_dest() will create a new effective label for * this connection with a modified label or label flags only * if there are changes from the original label. * * Accept whatever label we get if this is the first * destination address for this connection. The security * label and label flags must match any previuous settings * for all subsequent destination addresses. */ if (IN6_IS_ADDR_V4MAPPED(addr)) { uint32_t dst; IN6_V4MAPPED_TO_IPADDR(addr, dst); err = tsol_check_dest(ixa->ixa_tsl, &dst, IPV4_VERSION, connp->conn_mac_mode, connp->conn_zone_is_global, &effective_tsl); } else { err = tsol_check_dest(ixa->ixa_tsl, addr, IPV6_VERSION, connp->conn_mac_mode, connp->conn_zone_is_global, &effective_tsl); } if (err != 0) return (err); if (sctp->sctp_faddrs == NULL && effective_tsl != NULL) { ip_xmit_attr_replace_tsl(ixa, effective_tsl); } else if (effective_tsl != NULL) { label_rele(effective_tsl); return (EHOSTUNREACH); } } if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL) return (ENOMEM); bzero(faddr, sizeof (*faddr)); timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer, sleep); if (timer_mp == NULL) { kmem_cache_free(sctp_kmem_faddr_cache, faddr); return (ENOMEM); } ((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr; /* Start with any options set on the conn */ faddr->sf_ixa = conn_get_ixa_exclusive(connp); if (faddr->sf_ixa == NULL) { freemsg(timer_mp); kmem_cache_free(sctp_kmem_faddr_cache, faddr); return (ENOMEM); } faddr->sf_ixa->ixa_notify_cookie = connp->conn_sctp; sctp_init_faddr(sctp, faddr, addr, timer_mp); ASSERT(faddr->sf_ixa->ixa_cred != NULL); /* ip_attr_connect didn't allow broadcats/multicast dest */ ASSERT(faddr->sf_next == NULL); if (sctp->sctp_faddrs == NULL) { ASSERT(sctp->sctp_lastfaddr == NULL); /* only element on list; first and last are same */ sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr; } else if (first) { ASSERT(sctp->sctp_lastfaddr != NULL); faddr->sf_next = sctp->sctp_faddrs; sctp->sctp_faddrs = faddr; } else { sctp->sctp_lastfaddr->sf_next = faddr; sctp->sctp_lastfaddr = faddr; } sctp->sctp_nfaddrs++; return (0); } sctp_faddr_t * sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr) { sctp_faddr_t *fp; for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) { if (IN6_ARE_ADDR_EQUAL(&fp->sf_faddr, addr)) break; } return (fp); } sctp_faddr_t * sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr) { for (; fp; fp = fp->sf_next) { if (IN6_ARE_ADDR_EQUAL(&fp->sf_faddr, addr)) { break; } } return (fp); } /* * To change the currently used peer address to the specified one. */ void sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp) { /* Now setup the composite header. */ if (fp->sf_isv4) { IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, sctp->sctp_ipha->ipha_dst); IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr, sctp->sctp_ipha->ipha_src); /* update don't fragment bit */ if (fp->sf_df) { sctp->sctp_ipha->ipha_fragment_offset_and_flags = htons(IPH_DF); } else { sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0; } } else { sctp->sctp_ip6h->ip6_dst = fp->sf_faddr; sctp->sctp_ip6h->ip6_src = fp->sf_saddr; } sctp->sctp_current = fp; sctp->sctp_mss = fp->sf_pmss; /* Update the uppper layer for the change. */ if (!SCTP_IS_DETACHED(sctp)) sctp_set_ulp_prop(sctp); } void sctp_redo_faddr_srcs(sctp_t *sctp) { sctp_faddr_t *fp; for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) { sctp_get_dest(sctp, fp); } } void sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) { int64_t now = LBOLT_FASTPATH64; /* * If we are under memory pressure, we abort association waiting * in zero window probing state for too long. We do this by not * resetting sctp_strikes. So if sctp_zero_win_probe continues * while under memory pressure, this association will eventually * time out. */ if (!sctp->sctp_zero_win_probe || !sctp->sctp_sctps->sctps_reclaim) { sctp->sctp_strikes = 0; } fp->sf_strikes = 0; fp->sf_lastactive = now; fp->sf_hb_expiry = now + SET_HB_INTVL(fp); fp->sf_hb_pending = B_FALSE; if (fp->sf_state != SCTP_FADDRS_ALIVE) { fp->sf_state = SCTP_FADDRS_ALIVE; sctp_intf_event(sctp, fp->sf_faddr, SCTP_ADDR_AVAILABLE, 0); /* Should have a full IRE now */ sctp_get_dest(sctp, fp); /* * If this is the primary, switch back to it now. And * we probably want to reset the source addr used to reach * it. * Note that if we didn't find a source in sctp_get_dest * then we'd be unreachable at this point in time. */ if (fp == sctp->sctp_primary && fp->sf_state != SCTP_FADDRS_UNREACH) { sctp_set_faddr_current(sctp, fp); return; } } } /* * Return B_TRUE if there is still an active peer address with zero strikes; * otherwise rturn B_FALSE. */ boolean_t sctp_is_a_faddr_clean(sctp_t *sctp) { sctp_faddr_t *fp; for (fp = sctp->sctp_faddrs; fp; fp = fp->sf_next) { if (fp->sf_state == SCTP_FADDRS_ALIVE && fp->sf_strikes == 0) { return (B_TRUE); } } return (B_FALSE); } /* * Returns 0 if there is at leave one other active faddr, -1 if there * are none. If there are none left, faddr_dead() will start killing the * association. * If the downed faddr was the current faddr, a new current faddr * will be chosen. */ int sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate) { sctp_faddr_t *ofp; sctp_stack_t *sctps = sctp->sctp_sctps; if (fp->sf_state == SCTP_FADDRS_ALIVE) { sctp_intf_event(sctp, fp->sf_faddr, SCTP_ADDR_UNREACHABLE, 0); } fp->sf_state = newstate; dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n", SCTP_PRINTADDR(fp->sf_faddr), newstate)); if (fp == sctp->sctp_current) { /* Current faddr down; need to switch it */ sctp->sctp_current = NULL; } /* Find next alive faddr */ ofp = fp; for (fp = fp->sf_next; fp != NULL; fp = fp->sf_next) { if (fp->sf_state == SCTP_FADDRS_ALIVE) { break; } } if (fp == NULL) { /* Continue from beginning of list */ for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->sf_next) { if (fp->sf_state == SCTP_FADDRS_ALIVE) { break; } } } /* * Find a new fp, so if the current faddr is dead, use the new fp * as the current one. */ if (fp != ofp) { if (sctp->sctp_current == NULL) { dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n", SCTP_PRINTADDR(fp->sf_faddr))); /* * Note that we don't need to reset the source addr * of the new fp. */ sctp_set_faddr_current(sctp, fp); } return (0); } /* All faddrs are down; kill the association */ dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); SCTPS_BUMP_MIB(sctps, sctpAborted); sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ? sctp->sctp_client_errno : ETIMEDOUT); return (-1); } sctp_faddr_t * sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp) { sctp_faddr_t *nfp = NULL; sctp_faddr_t *saved_fp = NULL; int min_strikes; if (ofp == NULL) { ofp = sctp->sctp_current; } /* Nothing to do */ if (sctp->sctp_nfaddrs < 2) return (ofp); /* * Find the next live peer address with zero strikes. In case * there is none, find the one with the lowest number of strikes. */ min_strikes = ofp->sf_strikes; nfp = ofp->sf_next; while (nfp != ofp) { /* If reached end of list, continue scan from the head */ if (nfp == NULL) { nfp = sctp->sctp_faddrs; continue; } if (nfp->sf_state == SCTP_FADDRS_ALIVE) { if (nfp->sf_strikes == 0) break; if (nfp->sf_strikes < min_strikes) { min_strikes = nfp->sf_strikes; saved_fp = nfp; } } nfp = nfp->sf_next; } /* If reached the old address, there is no zero strike path */ if (nfp == ofp) nfp = NULL; /* * If there is a peer address with zero strikes we use that, if not * return a peer address with fewer strikes than the one last used, * if neither exist we may as well stay with the old one. */ if (nfp != NULL) return (nfp); if (saved_fp != NULL) return (saved_fp); return (ofp); } void sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp) { sctp_faddr_t *fpp; if (!sctp->sctp_faddrs) { return; } if (fp->sf_timer_mp != NULL) { sctp_timer_free(fp->sf_timer_mp); fp->sf_timer_mp = NULL; fp->sf_timer_running = 0; } if (fp->sf_rc_timer_mp != NULL) { sctp_timer_free(fp->sf_rc_timer_mp); fp->sf_rc_timer_mp = NULL; fp->sf_rc_timer_running = 0; } if (fp->sf_ixa != NULL) { ixa_refrele(fp->sf_ixa); fp->sf_ixa = NULL; } if (fp == sctp->sctp_faddrs) { goto gotit; } for (fpp = sctp->sctp_faddrs; fpp->sf_next != fp; fpp = fpp->sf_next) ; gotit: ASSERT(sctp->sctp_conn_tfp != NULL); mutex_enter(&sctp->sctp_conn_tfp->tf_lock); if (fp == sctp->sctp_faddrs) { sctp->sctp_faddrs = fp->sf_next; } else { fpp->sf_next = fp->sf_next; } mutex_exit(&sctp->sctp_conn_tfp->tf_lock); kmem_cache_free(sctp_kmem_faddr_cache, fp); sctp->sctp_nfaddrs--; } void sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock) { sctp_faddr_t *fp, *fpn; if (sctp->sctp_faddrs == NULL) { ASSERT(sctp->sctp_lastfaddr == NULL); return; } ASSERT(sctp->sctp_lastfaddr != NULL); sctp->sctp_lastfaddr = NULL; sctp->sctp_current = NULL; sctp->sctp_primary = NULL; sctp_free_faddr_timers(sctp); if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { /* in conn fanout; need to hold lock */ mutex_enter(&sctp->sctp_conn_tfp->tf_lock); } for (fp = sctp->sctp_faddrs; fp; fp = fpn) { fpn = fp->sf_next; if (fp->sf_ixa != NULL) { ixa_refrele(fp->sf_ixa); fp->sf_ixa = NULL; } kmem_cache_free(sctp_kmem_faddr_cache, fp); sctp->sctp_nfaddrs--; } sctp->sctp_faddrs = NULL; ASSERT(sctp->sctp_nfaddrs == 0); if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) { mutex_exit(&sctp->sctp_conn_tfp->tf_lock); } } void sctp_zap_addrs(sctp_t *sctp) { sctp_zap_faddrs(sctp, 0); sctp_free_saddrs(sctp); } /* * Build two SCTP header templates; one for IPv4 and one for IPv6. * Store them in sctp_iphc and sctp_iphc6 respectively (and related fields). * There are no IP addresses in the templates, but the port numbers and * verifier are field in from the conn_t and sctp_t. * * Returns failure if can't allocate memory, or if there is a problem * with a routing header/option. * * We allocate space for the minimum sctp header (sctp_hdr_t). * * We massage an routing option/header. There is no checksum implication * for a routing header for sctp. * * Caller needs to update conn_wroff if desired. * * TSol notes: This assumes that a SCTP association has a single peer label * since we only track a single pair of ipp_label_v4/v6 and not a separate one * for each faddr. */ int sctp_build_hdrs(sctp_t *sctp, int sleep) { conn_t *connp = sctp->sctp_connp; ip_pkt_t *ipp = &connp->conn_xmit_ipp; uint_t ip_hdr_length; uchar_t *hdrs; uint_t hdrs_len; uint_t ulp_hdr_length = sizeof (sctp_hdr_t); ipha_t *ipha; ip6_t *ip6h; sctp_hdr_t *sctph; in6_addr_t v6src, v6dst; ipaddr_t v4src, v4dst; v4src = connp->conn_saddr_v4; v4dst = connp->conn_faddr_v4; v6src = connp->conn_saddr_v6; v6dst = connp->conn_faddr_v6; /* First do IPv4 header */ ip_hdr_length = ip_total_hdrs_len_v4(ipp); /* In case of TX label and IP options it can be too much */ if (ip_hdr_length > IP_MAX_HDR_LENGTH) { /* Preserves existing TX errno for this */ return (EHOSTUNREACH); } hdrs_len = ip_hdr_length + ulp_hdr_length; ASSERT(hdrs_len != 0); if (hdrs_len != sctp->sctp_iphc_len) { /* Allocate new before we free any old */ hdrs = kmem_alloc(hdrs_len, sleep); if (hdrs == NULL) return (ENOMEM); if (sctp->sctp_iphc != NULL) kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len); sctp->sctp_iphc = hdrs; sctp->sctp_iphc_len = hdrs_len; } else { hdrs = sctp->sctp_iphc; } sctp->sctp_hdr_len = sctp->sctp_iphc_len; sctp->sctp_ip_hdr_len = ip_hdr_length; sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length); sctp->sctp_sctph = sctph; sctph->sh_sport = connp->conn_lport; sctph->sh_dport = connp->conn_fport; sctph->sh_verf = sctp->sctp_fvtag; sctph->sh_chksum = 0; ipha = (ipha_t *)hdrs; sctp->sctp_ipha = ipha; ipha->ipha_src = v4src; ipha->ipha_dst = v4dst; ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto); ipha->ipha_length = htons(hdrs_len); ipha->ipha_fragment_offset_and_flags = 0; if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) (void) ip_massage_options(ipha, connp->conn_netstack); /* Now IPv6 */ ip_hdr_length = ip_total_hdrs_len_v6(ipp); hdrs_len = ip_hdr_length + ulp_hdr_length; ASSERT(hdrs_len != 0); if (hdrs_len != sctp->sctp_iphc6_len) { /* Allocate new before we free any old */ hdrs = kmem_alloc(hdrs_len, sleep); if (hdrs == NULL) return (ENOMEM); if (sctp->sctp_iphc6 != NULL) kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); sctp->sctp_iphc6 = hdrs; sctp->sctp_iphc6_len = hdrs_len; } else { hdrs = sctp->sctp_iphc6; } sctp->sctp_hdr6_len = sctp->sctp_iphc6_len; sctp->sctp_ip_hdr6_len = ip_hdr_length; sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length); sctp->sctp_sctph6 = sctph; sctph->sh_sport = connp->conn_lport; sctph->sh_dport = connp->conn_fport; sctph->sh_verf = sctp->sctp_fvtag; sctph->sh_chksum = 0; ip6h = (ip6_t *)hdrs; sctp->sctp_ip6h = ip6h; ip6h->ip6_src = v6src; ip6h->ip6_dst = v6dst; ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto, connp->conn_flowinfo); ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN); if (ipp->ipp_fields & IPPF_RTHDR) { uint8_t *end; ip6_rthdr_t *rth; end = (uint8_t *)ip6h + ip_hdr_length; rth = ip_find_rthdr_v6(ip6h, end); if (rth != NULL) { (void) ip_massage_options_v6(ip6h, rth, connp->conn_netstack); } /* * Verify that the first hop isn't a mapped address. * Routers along the path need to do this verification * for subsequent hops. */ if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) return (EADDRNOTAVAIL); } return (0); } static int sctp_v4_label(sctp_t *sctp, sctp_faddr_t *fp) { conn_t *connp = sctp->sctp_connp; ASSERT(fp->sf_ixa->ixa_flags & IXAF_IS_IPV4); return (conn_update_label(connp, fp->sf_ixa, &fp->sf_faddr, &connp->conn_xmit_ipp)); } static int sctp_v6_label(sctp_t *sctp, sctp_faddr_t *fp) { conn_t *connp = sctp->sctp_connp; ASSERT(!(fp->sf_ixa->ixa_flags & IXAF_IS_IPV4)); return (conn_update_label(connp, fp->sf_ixa, &fp->sf_faddr, &connp->conn_xmit_ipp)); } /* * XXX implement more sophisticated logic * * Tsol note: We have already verified the addresses using tsol_check_dest * in sctp_add_faddr, thus no need to redo that here. * We do setup ipp_label_v4 and ipp_label_v6 based on which addresses * we have. */ int sctp_set_hdraddrs(sctp_t *sctp) { sctp_faddr_t *fp; int gotv4 = 0; int gotv6 = 0; conn_t *connp = sctp->sctp_connp; ASSERT(sctp->sctp_faddrs != NULL); ASSERT(sctp->sctp_nsaddrs > 0); /* Set up using the primary first */ connp->conn_faddr_v6 = sctp->sctp_primary->sf_faddr; /* saddr may be unspec; make_mp() will handle this */ connp->conn_saddr_v6 = sctp->sctp_primary->sf_saddr; connp->conn_laddr_v6 = connp->conn_saddr_v6; if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->sf_faddr)) { if (!is_system_labeled() || sctp_v4_label(sctp, sctp->sctp_primary) == 0) { gotv4 = 1; if (connp->conn_family == AF_INET) { goto done; } } } else { if (!is_system_labeled() || sctp_v6_label(sctp, sctp->sctp_primary) == 0) { gotv6 = 1; } } for (fp = sctp->sctp_faddrs; fp; fp = fp->sf_next) { if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) { if (!is_system_labeled() || sctp_v4_label(sctp, fp) == 0) { gotv4 = 1; if (connp->conn_family == AF_INET || gotv6) { break; } } } else if (!gotv6 && !IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) { if (!is_system_labeled() || sctp_v6_label(sctp, fp) == 0) { gotv6 = 1; if (gotv4) break; } } } done: if (!gotv4 && !gotv6) return (EACCES); return (0); } /* * got_errchunk is set B_TRUE only if called from validate_init_params(), when * an ERROR chunk is already prepended the size of which needs updating for * additional unrecognized parameters. Other callers either prepend the ERROR * chunk with the correct size after calling this function, or they are calling * to add an invalid parameter to an INIT_ACK chunk, in that case no ERROR chunk * exists, the CAUSE blocks go into the INIT_ACK directly. * * *errmp will be non-NULL both when adding an additional CAUSE block to an * existing prepended COOKIE ERROR chunk (processing params of an INIT_ACK), * and when adding unrecognized parameters after the first, to an INIT_ACK * (processing params of an INIT chunk). */ void sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp, boolean_t got_errchunk) { mblk_t *mp; sctp_parm_hdr_t *ph; size_t len; int pad; sctp_chunk_hdr_t *ecp; len = sizeof (*ph) + ntohs(uph->sph_len); if ((pad = len % SCTP_ALIGN) != 0) { pad = SCTP_ALIGN - pad; len += pad; } mp = allocb(len, BPRI_MED); if (mp == NULL) { return; } ph = (sctp_parm_hdr_t *)(mp->b_rptr); ph->sph_type = htons(PARM_UNRECOGNIZED); ph->sph_len = htons(len - pad); /* copy in the unrecognized parameter */ bcopy(uph, ph + 1, ntohs(uph->sph_len)); if (pad != 0) bzero((mp->b_rptr + len - pad), pad); mp->b_wptr = mp->b_rptr + len; if (*errmp != NULL) { /* * Update total length if an ERROR chunk, then link * this CAUSE block to the possible chain of CAUSE * blocks attached to the ERROR chunk or INIT_ACK * being created. */ if (got_errchunk) { /* ERROR chunk already prepended */ ecp = (sctp_chunk_hdr_t *)((*errmp)->b_rptr); ecp->sch_len = htons(ntohs(ecp->sch_len) + len); } linkb(*errmp, mp); } else { *errmp = mp; } } /* * o Bounds checking * o Updates remaining * o Checks alignment */ sctp_parm_hdr_t * sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining) { int pad; uint16_t len; len = ntohs(current->sph_len); *remaining -= len; if (*remaining < sizeof (*current) || len < sizeof (*current)) { return (NULL); } if ((pad = len & (SCTP_ALIGN - 1)) != 0) { pad = SCTP_ALIGN - pad; *remaining -= pad; } /*LINTED pointer cast may result in improper alignment*/ current = (sctp_parm_hdr_t *)((char *)current + len + pad); return (current); } /* * Sets the address parameters given in the INIT chunk into sctp's * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are * no address parameters in the INIT chunk, a single faddr is created * from the ip hdr at the beginning of pkt. * If there already are existing addresses hanging from sctp, merge * them in, if the old info contains addresses which are not present * in this new info, get rid of them, and clean the pointers if there's * messages which have this as their target address. * * We also re-adjust the source address list here since the list may * contain more than what is actually part of the association. If * we get here from sctp_send_cookie_echo(), we are on the active * side and psctp will be NULL and ich will be the INIT-ACK chunk. * If we get here from sctp_accept_comm(), ich will be the INIT chunk * and psctp will the listening endpoint. * * INIT processing: When processing the INIT we inherit the src address * list from the listener. For a loopback or linklocal association, we * delete the list and just take the address from the IP header (since * that's how we created the INIT-ACK). Additionally, for loopback we * ignore the address params in the INIT. For determining which address * types were sent in the INIT-ACK we follow the same logic as in * creating the INIT-ACK. We delete addresses of the type that are not * supported by the peer. * * INIT-ACK processing: When processing the INIT-ACK since we had not * included addr params for loopback or linklocal addresses when creating * the INIT, we just use the address from the IP header. Further, for * loopback we ignore the addr param list. We mark addresses of the * type not supported by the peer as unconfirmed. * * In case of INIT processing we look for supported address types in the * supported address param, if present. In both cases the address type in * the IP header is supported as well as types for addresses in the param * list, if any. * * Once we have the supported address types sctp_check_saddr() runs through * the source address list and deletes or marks as unconfirmed address of * types not supported by the peer. * * Returns 0 on success, sys errno on failure */ int sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt, sctp_chunk_hdr_t *ich, uint_t *sctp_options) { sctp_init_chunk_t *init; ipha_t *iph; ip6_t *ip6h; in6_addr_t hdrsaddr[1]; in6_addr_t hdrdaddr[1]; sctp_parm_hdr_t *ph; ssize_t remaining; int isv4; int err; sctp_faddr_t *fp; int supp_af = 0; boolean_t check_saddr = B_TRUE; in6_addr_t curaddr; sctp_stack_t *sctps = sctp->sctp_sctps; conn_t *connp = sctp->sctp_connp; if (sctp_options != NULL) *sctp_options = 0; /* extract the address from the IP header */ isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); if (isv4) { iph = (ipha_t *)pkt->b_rptr; IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr); IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr); supp_af |= PARM_SUPP_V4; } else { ip6h = (ip6_t *)pkt->b_rptr; hdrsaddr[0] = ip6h->ip6_src; hdrdaddr[0] = ip6h->ip6_dst; supp_af |= PARM_SUPP_V6; } /* * Unfortunately, we can't delay this because adding an faddr * looks for the presence of the source address (from the ire * for the faddr) in the source address list. We could have * delayed this if, say, this was a loopback/linklocal connection. * Now, we just end up nuking this list and taking the addr from * the IP header for loopback/linklocal. */ if (psctp != NULL && psctp->sctp_nsaddrs > 0) { ASSERT(sctp->sctp_nsaddrs == 0); err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP); if (err != 0) return (err); } /* * We will add the faddr before parsing the address list as this * might be a loopback connection and we would not have to * go through the list. * * Make sure the header's addr is in the list */ fp = sctp_lookup_faddr(sctp, hdrsaddr); if (fp == NULL) { /* not included; add it now */ err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE); if (err != 0) return (err); /* sctp_faddrs will be the hdr addr */ fp = sctp->sctp_faddrs; } /* make the header addr the primary */ if (cl_sctp_assoc_change != NULL && psctp == NULL) curaddr = sctp->sctp_current->sf_faddr; sctp->sctp_primary = fp; sctp->sctp_current = fp; sctp->sctp_mss = fp->sf_pmss; /* For loopback connections & linklocal get address from the header */ if (sctp->sctp_loopback || sctp->sctp_linklocal) { if (sctp->sctp_nsaddrs != 0) sctp_free_saddrs(sctp); if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0) return (err); /* For loopback ignore address list */ if (sctp->sctp_loopback) return (0); check_saddr = B_FALSE; } /* Walk the params in the INIT [ACK], pulling out addr params */ remaining = ntohs(ich->sch_len) - sizeof (*ich) - sizeof (sctp_init_chunk_t); if (remaining < sizeof (*ph)) { if (check_saddr) { sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE : B_TRUE, hdrdaddr); } ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); return (0); } init = (sctp_init_chunk_t *)(ich + 1); ph = (sctp_parm_hdr_t *)(init + 1); /* params will have already been byteordered when validating */ while (ph != NULL) { if (ph->sph_type == htons(PARM_SUPP_ADDRS)) { int plen; uint16_t *p; uint16_t addrtype; ASSERT(psctp != NULL); plen = ntohs(ph->sph_len); p = (uint16_t *)(ph + 1); while (plen > 0) { addrtype = ntohs(*p); switch (addrtype) { case PARM_ADDR6: supp_af |= PARM_SUPP_V6; break; case PARM_ADDR4: supp_af |= PARM_SUPP_V4; break; default: break; } p++; plen -= sizeof (*p); } } else if (ph->sph_type == htons(PARM_ADDR4)) { if (remaining >= PARM_ADDR4_LEN) { in6_addr_t addr; ipaddr_t ta; supp_af |= PARM_SUPP_V4; /* * Screen out broad/multicasts & loopback. * If the endpoint only accepts v6 address, * go to the next one. * * Subnet broadcast check is done in * sctp_add_faddr(). If the address is * a broadcast address, it won't be added. */ bcopy(ph + 1, &ta, sizeof (ta)); if (ta == 0 || ta == INADDR_BROADCAST || ta == htonl(INADDR_LOOPBACK) || CLASSD(ta) || connp->conn_ipv6_v6only) { goto next; } IN6_INADDR_TO_V4MAPPED((struct in_addr *) (ph + 1), &addr); /* Check for duplicate. */ if (sctp_lookup_faddr(sctp, &addr) != NULL) goto next; /* OK, add it to the faddr set */ err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP, B_FALSE); /* Something is wrong... Try the next one. */ if (err != 0) goto next; } } else if (ph->sph_type == htons(PARM_ADDR6) && connp->conn_family == AF_INET6) { /* An v4 socket should not take v6 addresses. */ if (remaining >= PARM_ADDR6_LEN) { in6_addr_t *addr6; supp_af |= PARM_SUPP_V6; addr6 = (in6_addr_t *)(ph + 1); /* * Screen out link locals, mcast, loopback * and bogus v6 address. */ if (IN6_IS_ADDR_LINKLOCAL(addr6) || IN6_IS_ADDR_MULTICAST(addr6) || IN6_IS_ADDR_LOOPBACK(addr6) || IN6_IS_ADDR_V4MAPPED(addr6)) { goto next; } /* Check for duplicate. */ if (sctp_lookup_faddr(sctp, addr6) != NULL) goto next; err = sctp_add_faddr(sctp, (in6_addr_t *)(ph + 1), KM_NOSLEEP, B_FALSE); /* Something is wrong... Try the next one. */ if (err != 0) goto next; } } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) { if (sctp_options != NULL) *sctp_options |= SCTP_PRSCTP_OPTION; } /* else; skip */ next: ph = sctp_next_parm(ph, &remaining); } if (check_saddr) { sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE : B_TRUE, hdrdaddr); } ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL); /* * We have the right address list now, update clustering's * knowledge because when we sent the INIT we had just added * the address the INIT was sent to. */ if (psctp == NULL && cl_sctp_assoc_change != NULL) { uchar_t *alist; size_t asize; uchar_t *dlist; size_t dsize; asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; alist = kmem_alloc(asize, KM_NOSLEEP); if (alist == NULL) { SCTP_KSTAT(sctps, sctp_cl_assoc_change); return (ENOMEM); } /* * Just include the address the INIT was sent to in the * delete list and send the entire faddr list. We could * do it differently (i.e include all the addresses in the * add list even if it contains the original address OR * remove the original address from the add list etc.), but * this seems reasonable enough. */ dsize = sizeof (in6_addr_t); dlist = kmem_alloc(dsize, KM_NOSLEEP); if (dlist == NULL) { kmem_free(alist, asize); SCTP_KSTAT(sctps, sctp_cl_assoc_change); return (ENOMEM); } bcopy(&curaddr, dlist, sizeof (curaddr)); sctp_get_faddr_list(sctp, alist, asize); (*cl_sctp_assoc_change)(connp->conn_family, alist, asize, sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR, (cl_sctp_handle_t)sctp); /* alist and dlist will be freed by the clustering module */ } return (0); } /* * Returns 0 if the check failed and the restart should be refused, * 1 if the check succeeded. */ int sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports, int sleep, sctp_stack_t *sctps, ip_recv_attr_t *ira) { sctp_faddr_t *fp, *fphead = NULL; sctp_parm_hdr_t *ph; ssize_t remaining; int isv4; ipha_t *iph; ip6_t *ip6h; in6_addr_t hdraddr[1]; int retval = 0; sctp_tf_t *tf; sctp_t *sctp; int compres; sctp_init_chunk_t *init; int nadded = 0; /* extract the address from the IP header */ isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION); if (isv4) { iph = (ipha_t *)pkt->b_rptr; IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr); } else { ip6h = (ip6_t *)pkt->b_rptr; hdraddr[0] = ip6h->ip6_src; } /* Walk the params in the INIT [ACK], pulling out addr params */ remaining = ntohs(ich->sch_len) - sizeof (*ich) - sizeof (sctp_init_chunk_t); if (remaining < sizeof (*ph)) { /* no parameters; restart OK */ return (1); } init = (sctp_init_chunk_t *)(ich + 1); ph = (sctp_parm_hdr_t *)(init + 1); while (ph != NULL) { sctp_faddr_t *fpa = NULL; /* params will have already been byteordered when validating */ if (ph->sph_type == htons(PARM_ADDR4)) { if (remaining >= PARM_ADDR4_LEN) { in6_addr_t addr; IN6_INADDR_TO_V4MAPPED((struct in_addr *) (ph + 1), &addr); fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); if (fpa == NULL) { goto done; } bzero(fpa, sizeof (*fpa)); fpa->sf_faddr = addr; fpa->sf_next = NULL; } } else if (ph->sph_type == htons(PARM_ADDR6)) { if (remaining >= PARM_ADDR6_LEN) { fpa = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); if (fpa == NULL) { goto done; } bzero(fpa, sizeof (*fpa)); bcopy(ph + 1, &fpa->sf_faddr, sizeof (fpa->sf_faddr)); fpa->sf_next = NULL; } } /* link in the new addr, if it was an addr param */ if (fpa != NULL) { if (fphead == NULL) { fphead = fpa; } else { fpa->sf_next = fphead; fphead = fpa; } } ph = sctp_next_parm(ph, &remaining); } if (fphead == NULL) { /* no addr parameters; restart OK */ return (1); } /* * got at least one; make sure the header's addr is * in the list */ fp = sctp_lookup_faddr_nosctp(fphead, hdraddr); if (fp == NULL) { /* not included; add it now */ fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep); if (fp == NULL) { goto done; } bzero(fp, sizeof (*fp)); fp->sf_faddr = *hdraddr; fp->sf_next = fphead; fphead = fp; } /* * Now, we can finally do the check: For each sctp instance * on the hash line for ports, compare its faddr set against * the new one. If the new one is a strict subset of any * existing sctp's faddrs, the restart is OK. However, if there * is an overlap, this could be an attack, so return failure. * If all sctp's faddrs are disjoint, this is a legitimate new * association. */ tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]); mutex_enter(&tf->tf_lock); for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) { if (ports != sctp->sctp_connp->conn_ports) { continue; } compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs); if (compres <= SCTP_ADDR_SUBSET) { retval = 1; mutex_exit(&tf->tf_lock); goto done; } if (compres == SCTP_ADDR_OVERLAP) { dprint(1, ("new assoc from %x:%x:%x:%x overlaps with %p\n", SCTP_PRINTADDR(*hdraddr), (void *)sctp)); /* * While we still hold the lock, we need to * figure out which addresses have been * added so we can include them in the abort * we will send back. Since these faddrs will * never be used, we overload the rto field * here, setting it to 0 if the address was * not added, 1 if it was added. */ for (fp = fphead; fp; fp = fp->sf_next) { if (sctp_lookup_faddr(sctp, &fp->sf_faddr)) { fp->sf_rto = 0; } else { fp->sf_rto = 1; nadded++; } } mutex_exit(&tf->tf_lock); goto done; } } mutex_exit(&tf->tf_lock); /* All faddrs are disjoint; legit new association */ retval = 1; done: /* If are attempted adds, send back an abort listing the addrs */ if (nadded > 0) { void *dtail; size_t dlen; dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP); if (dtail == NULL) { goto cleanup; } ph = dtail; dlen = 0; for (fp = fphead; fp; fp = fp->sf_next) { if (fp->sf_rto == 0) { continue; } if (IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) { ipaddr_t addr4; ph->sph_type = htons(PARM_ADDR4); ph->sph_len = htons(PARM_ADDR4_LEN); IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, addr4); ph++; bcopy(&addr4, ph, sizeof (addr4)); ph = (sctp_parm_hdr_t *) ((char *)ph + sizeof (addr4)); dlen += PARM_ADDR4_LEN; } else { ph->sph_type = htons(PARM_ADDR6); ph->sph_len = htons(PARM_ADDR6_LEN); ph++; bcopy(&fp->sf_faddr, ph, sizeof (fp->sf_faddr)); ph = (sctp_parm_hdr_t *) ((char *)ph + sizeof (fp->sf_faddr)); dlen += PARM_ADDR6_LEN; } } /* Send off the abort */ sctp_send_abort(sctp, sctp_init2vtag(ich), SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE, ira); kmem_free(dtail, PARM_ADDR6_LEN * nadded); } cleanup: /* Clean up */ if (fphead) { sctp_faddr_t *fpn; for (fp = fphead; fp; fp = fpn) { fpn = fp->sf_next; if (fp->sf_ixa != NULL) { ixa_refrele(fp->sf_ixa); fp->sf_ixa = NULL; } kmem_cache_free(sctp_kmem_faddr_cache, fp); } } return (retval); } /* * Reset any state related to transmitted chunks. */ void sctp_congest_reset(sctp_t *sctp) { sctp_faddr_t *fp; sctp_stack_t *sctps = sctp->sctp_sctps; mblk_t *mp; for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) { fp->sf_ssthresh = sctps->sctps_initial_mtu; SET_CWND(fp, fp->sf_pmss, sctps->sctps_slow_start_initial); fp->sf_suna = 0; fp->sf_pba = 0; } /* * Clean up the transmit list as well since we have reset accounting * on all the fps. Send event upstream, if required. */ while ((mp = sctp->sctp_xmit_head) != NULL) { sctp->sctp_xmit_head = mp->b_next; mp->b_next = NULL; if (sctp->sctp_xmit_head != NULL) sctp->sctp_xmit_head->b_prev = NULL; sctp_sendfail_event(sctp, mp, 0, B_TRUE); } sctp->sctp_xmit_head = NULL; sctp->sctp_xmit_tail = NULL; sctp->sctp_xmit_unacked = NULL; sctp->sctp_unacked = 0; /* * Any control message as well. We will clean-up this list as well. * This contains any pending ASCONF request that we have queued/sent. * If we do get an ACK we will just drop it. However, given that * we are restarting chances are we aren't going to get any. */ if (sctp->sctp_cxmit_list != NULL) sctp_asconf_free_cxmit(sctp, NULL); sctp->sctp_cxmit_list = NULL; sctp->sctp_cchunk_pend = 0; sctp->sctp_rexmitting = B_FALSE; sctp->sctp_rxt_nxttsn = 0; sctp->sctp_rxt_maxtsn = 0; sctp->sctp_zero_win_probe = B_FALSE; } static void sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr, mblk_t *timer_mp) { sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(fp->sf_ixa != NULL); bcopy(addr, &fp->sf_faddr, sizeof (*addr)); if (IN6_IS_ADDR_V4MAPPED(addr)) { fp->sf_isv4 = 1; /* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */ fp->sf_pmss = (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) & ~(SCTP_ALIGN - 1); fp->sf_ixa->ixa_flags |= IXAF_IS_IPV4; } else { fp->sf_isv4 = 0; fp->sf_pmss = (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) & ~(SCTP_ALIGN - 1); fp->sf_ixa->ixa_flags &= ~IXAF_IS_IPV4; } fp->sf_cwnd = sctps->sctps_slow_start_initial * fp->sf_pmss; fp->sf_rto = MIN(sctp->sctp_rto_initial, sctp->sctp_rto_max_init); SCTP_MAX_RTO(sctp, fp); fp->sf_srtt = -1; fp->sf_rtt_updates = 0; fp->sf_strikes = 0; fp->sf_max_retr = sctp->sctp_pp_max_rxt; /* Mark it as not confirmed. */ fp->sf_state = SCTP_FADDRS_UNCONFIRMED; fp->sf_hb_interval = sctp->sctp_hb_interval; fp->sf_ssthresh = sctps->sctps_initial_ssthresh; fp->sf_suna = 0; fp->sf_pba = 0; fp->sf_acked = 0; fp->sf_lastactive = fp->sf_hb_expiry = ddi_get_lbolt64(); fp->sf_timer_mp = timer_mp; fp->sf_hb_pending = B_FALSE; fp->sf_hb_enabled = B_TRUE; fp->sf_df = 1; fp->sf_pmtu_discovered = 0; fp->sf_next = NULL; fp->sf_T3expire = 0; (void) random_get_pseudo_bytes((uint8_t *)&fp->sf_hb_secret, sizeof (fp->sf_hb_secret)); fp->sf_rxt_unacked = 0; sctp_get_dest(sctp, fp); } /*ARGSUSED*/ static int faddr_constructor(void *buf, void *arg, int flags) { sctp_faddr_t *fp = buf; fp->sf_timer_mp = NULL; fp->sf_timer_running = 0; fp->sf_rc_timer_mp = NULL; fp->sf_rc_timer_running = 0; return (0); } /*ARGSUSED*/ static void faddr_destructor(void *buf, void *arg) { sctp_faddr_t *fp = buf; ASSERT(fp->sf_timer_mp == NULL); ASSERT(fp->sf_timer_running == 0); ASSERT(fp->sf_rc_timer_mp == NULL); ASSERT(fp->sf_rc_timer_running == 0); } void sctp_faddr_init(void) { sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache", sizeof (sctp_faddr_t), 0, faddr_constructor, faddr_destructor, NULL, NULL, NULL, 0); } void sctp_faddr_fini(void) { kmem_cache_destroy(sctp_kmem_faddr_cache); }