/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #define _SUN_TPI_VERSION 2 #include #include #include #include #include #include /* swilly code in sys/socketvar.h turns off DEBUG */ #ifdef __lint #define DEBUG #endif #include #include #include #include #include #include /* * PR-SCTP comments. * * A message can expire before it gets to the transmit list (i.e. it is still * in the unsent list - unchunked), after it gets to the transmit list, but * before transmission has actually started, or after transmission has begun. * Accordingly, we check for the status of a message in sctp_chunkify() when * the message is being transferred from the unsent list to the transmit list; * in sctp_get_msg_to_send(), when we get the next chunk from the transmit * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. * When we nuke a message in sctp_chunkify(), all we need to do is take it * out of the unsent list and update sctp_unsent; when a message is deemed * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit * list, update sctp_unsent IFF transmission for the message has not yet begun * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the * message has started, then we cannot just take it out of the list, we need * to send Forward TSN chunk to the peer so that the peer can clear its * fragment list for this message. However, we cannot just send the Forward * TSN in sctp_get_msg_to_send() because there might be unacked chunks for * messages preceeding this abandoned message. So, we send a Forward TSN * IFF all messages prior to this abandoned message has been SACKd, if not * we defer sending the Forward TSN to sctp_cumack(), which will check for * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In * sctp_rexmit() when we check for retransmissions, we need to determine if * the advanced peer ack point can be moved ahead, and if so, send a Forward * TSN to the peer instead of retransmitting the chunk. Note that when * we send a Forward TSN for a message, there may be yet unsent chunks for * this message; we need to mark all such chunks as abandoned, so that * sctp_cumack() can take the message out of the transmit list, additionally * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. * decremented when a message/chunk is deemed abandoned), sockfs needs to * be notified so that it can adjust its idea of the queued message. */ #include "sctp_impl.h" static struct kmem_cache *sctp_kmem_ftsn_set_cache; /* Padding mblk for SCTP chunks. */ mblk_t *sctp_pad_mp; #ifdef DEBUG static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); #endif /* * Called to allocate a header mblk when sending data to SCTP. * Data will follow in b_cont of this mblk. */ mblk_t * sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, int flags) { mblk_t *mp; struct T_unitdata_req *tudr; size_t size; int error; size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; size = MAX(size, sizeof (sctp_msg_hdr_t)); if (flags & SCTP_CAN_BLOCK) { mp = allocb_wait(size, BPRI_MED, 0, &error); } else { mp = allocb(size, BPRI_MED); } if (mp) { tudr = (struct T_unitdata_req *)mp->b_rptr; tudr->PRIM_type = T_UNITDATA_REQ; tudr->DEST_length = nlen; tudr->DEST_offset = sizeof (*tudr); tudr->OPT_length = clen; tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen)); if (nlen > 0) bcopy(name, tudr + 1, nlen); if (clen > 0) bcopy(control, (char *)tudr + tudr->OPT_offset, clen); mp->b_wptr += (tudr ->OPT_offset + clen); mp->b_datap->db_type = M_PROTO; } return (mp); } /*ARGSUSED2*/ int sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) { sctp_faddr_t *fp = NULL; struct T_unitdata_req *tudr; int error = 0; mblk_t *mproto = mp; in6_addr_t *addr; in6_addr_t tmpaddr; uint16_t sid = sctp->sctp_def_stream; uint32_t ppid = sctp->sctp_def_ppid; uint32_t context = sctp->sctp_def_context; uint16_t msg_flags = sctp->sctp_def_flags; sctp_msg_hdr_t *sctp_msg_hdr; uint32_t msg_len = 0; uint32_t timetolive = sctp->sctp_def_timetolive; ASSERT(DB_TYPE(mproto) == M_PROTO); mp = mp->b_cont; ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); tudr = (struct T_unitdata_req *)mproto->b_rptr; ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); /* Get destination address, if specified */ if (tudr->DEST_length > 0) { sin_t *sin; sin6_t *sin6; sin = (struct sockaddr_in *) (mproto->b_rptr + tudr->DEST_offset); switch (sin->sin_family) { case AF_INET: if (tudr->DEST_length < sizeof (*sin)) { return (EINVAL); } IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); addr = &tmpaddr; break; case AF_INET6: if (tudr->DEST_length < sizeof (*sin6)) { return (EINVAL); } sin6 = (struct sockaddr_in6 *) (mproto->b_rptr + tudr->DEST_offset); addr = &sin6->sin6_addr; break; default: return (EAFNOSUPPORT); } fp = sctp_lookup_faddr(sctp, addr); if (fp == NULL) { return (EINVAL); } } /* Ancillary Data? */ if (tudr->OPT_length > 0) { struct cmsghdr *cmsg; char *cend; struct sctp_sndrcvinfo *sndrcv; cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); cend = ((char *)cmsg + tudr->OPT_length); ASSERT(cend <= (char *)mproto->b_wptr); for (;;) { if ((char *)(cmsg + 1) > cend || ((char *)cmsg + cmsg->cmsg_len) > cend) { break; } if ((cmsg->cmsg_level == IPPROTO_SCTP) && (cmsg->cmsg_type == SCTP_SNDRCV)) { if (cmsg->cmsg_len < (sizeof (*sndrcv) + sizeof (*cmsg))) { return (EINVAL); } sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); sid = sndrcv->sinfo_stream; msg_flags = sndrcv->sinfo_flags; ppid = sndrcv->sinfo_ppid; context = sndrcv->sinfo_context; timetolive = sndrcv->sinfo_timetolive; break; } if (cmsg->cmsg_len > 0) cmsg = CMSG_NEXT(cmsg); else break; } } if (msg_flags & MSG_ABORT) { if (mp && mp->b_cont) { mblk_t *pump = msgpullup(mp, -1); if (!pump) { return (ENOMEM); } freemsg(mp); mp = pump; mproto->b_cont = mp; } RUN_SCTP(sctp); sctp_user_abort(sctp, mp, B_TRUE); sctp_clean_death(sctp, ECONNRESET); freemsg(mproto); goto process_sendq; } if (mp == NULL) goto done; RUN_SCTP(sctp); /* Reject any new data requests if we are shutting down */ if (sctp->sctp_state > SCTPS_ESTABLISHED) { error = EPIPE; goto unlock_done; } /* Re-use the mproto to store relevant info. */ ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); mproto->b_rptr = mproto->b_datap->db_base; mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); sctp_msg_hdr->smh_context = context; sctp_msg_hdr->smh_sid = sid; sctp_msg_hdr->smh_ppid = ppid; sctp_msg_hdr->smh_flags = msg_flags; sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); sctp_msg_hdr->smh_tob = lbolt64; for (; mp != NULL; mp = mp->b_cont) msg_len += MBLKL(mp); sctp_msg_hdr->smh_msglen = msg_len; /* User requested specific destination */ SCTP_SET_CHUNK_DEST(mproto, fp); if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && sid >= sctp->sctp_num_ostr) { /* Send sendfail event */ sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, B_FALSE); error = EINVAL; goto unlock_done; } /* no data */ if (msg_len == 0) { sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_NO_USR_DATA, B_FALSE); error = EINVAL; goto unlock_done; } /* Add it to the unsent list */ if (sctp->sctp_xmit_unsent == NULL) { sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; } else { sctp->sctp_xmit_unsent_tail->b_next = mproto; sctp->sctp_xmit_unsent_tail = mproto; } sctp->sctp_unsent += msg_len; BUMP_LOCAL(sctp->sctp_msgcount); if (sctp->sctp_state == SCTPS_ESTABLISHED) sctp_output(sctp); process_sendq: WAKE_SCTP(sctp); sctp_process_sendq(sctp); return (0); unlock_done: WAKE_SCTP(sctp); done: return (error); } void sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send) { mblk_t *mp; mblk_t *chunk_mp; mblk_t *chunk_head; mblk_t *chunk_hdr; mblk_t *chunk_tail = NULL; int count; int chunksize; sctp_data_hdr_t *sdc; mblk_t *mdblk = sctp->sctp_xmit_unsent; sctp_faddr_t *fp; sctp_faddr_t *fp1; size_t xtralen; sctp_msg_hdr_t *msg_hdr; fp = SCTP_CHUNK_DEST(mdblk); if (fp == NULL) fp = sctp->sctp_current; if (fp->isv4) xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra + sizeof (*sdc); else xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra + sizeof (*sdc); count = chunksize = first_len - sizeof (*sdc); nextmsg: chunk_mp = mdblk->b_cont; /* * If this partially chunked, we ignore the first_len for now * and use the one already present. For the unchunked bits, we * use the length of the last chunk. */ if (SCTP_IS_MSG_CHUNKED(mdblk)) { int chunk_len; ASSERT(chunk_mp->b_next != NULL); mdblk->b_cont = chunk_mp->b_next; chunk_mp->b_next = NULL; SCTP_MSG_CLEAR_CHUNKED(mdblk); mp = mdblk->b_cont; while (mp->b_next != NULL) mp = mp->b_next; chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) count = chunksize = fp->sfa_pmss - chunk_len; else count = chunksize = fp->sfa_pmss; count = chunksize = count - sizeof (*sdc); } else { msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { sctp->sctp_xmit_unsent = mdblk->b_next; if (sctp->sctp_xmit_unsent == NULL) sctp->sctp_xmit_unsent_tail = NULL; ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); sctp->sctp_unsent -= msg_hdr->smh_msglen; mdblk->b_next = NULL; BUMP_LOCAL(sctp->sctp_prsctpdrop); /* * Update ULP the amount of queued data, which is * sent-unack'ed + unsent. */ if (!SCTP_IS_DETACHED(sctp)) { sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, sctp->sctp_unacked + sctp->sctp_unsent); } sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); goto try_next; } mdblk->b_cont = NULL; } msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; nextchunk: chunk_head = chunk_mp; chunk_tail = NULL; /* Skip as many mblk's as we need */ while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { count -= MBLKL(chunk_mp); chunk_tail = chunk_mp; chunk_mp = chunk_mp->b_cont; } /* Split the chain, if needed */ if (chunk_mp != NULL) { if (count > 0) { mblk_t *split_mp = dupb(chunk_mp); if (split_mp == NULL) { if (mdblk->b_cont == NULL) { mdblk->b_cont = chunk_head; } else { SCTP_MSG_SET_CHUNKED(mdblk); ASSERT(chunk_head->b_next == NULL); chunk_head->b_next = mdblk->b_cont; mdblk->b_cont = chunk_head; } return; } if (chunk_tail != NULL) { chunk_tail->b_cont = split_mp; chunk_tail = chunk_tail->b_cont; } else { chunk_head = chunk_tail = split_mp; } chunk_tail->b_wptr = chunk_tail->b_rptr + count; chunk_mp->b_rptr = chunk_tail->b_wptr; count = 0; } else if (chunk_tail == NULL) { goto next; } else { chunk_tail->b_cont = NULL; } } /* Alloc chunk hdr, if needed */ if (DB_REF(chunk_head) > 1 || ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || MBLKHEAD(chunk_head) < sizeof (*sdc)) { if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { if (mdblk->b_cont == NULL) { if (chunk_mp != NULL) linkb(chunk_head, chunk_mp); mdblk->b_cont = chunk_head; } else { SCTP_MSG_SET_CHUNKED(mdblk); if (chunk_mp != NULL) linkb(chunk_head, chunk_mp); ASSERT(chunk_head->b_next == NULL); chunk_head->b_next = mdblk->b_cont; mdblk->b_cont = chunk_head; } return; } chunk_hdr->b_rptr += xtralen - sizeof (*sdc); chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); chunk_hdr->b_cont = chunk_head; } else { chunk_hdr = chunk_head; chunk_hdr->b_rptr -= sizeof (*sdc); } ASSERT(chunk_hdr->b_datap->db_ref == 1); sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; sdc->sdh_id = CHUNK_DATA; sdc->sdh_flags = 0; sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); ASSERT(sdc->sdh_len); sdc->sdh_sid = htons(msg_hdr->smh_sid); /* * We defer assigning the SSN just before sending the chunk, else * if we drop the chunk in sctp_get_msg_to_send(), we would need * to send a Forward TSN to let the peer know. Some more comments * about this in sctp_impl.h for SCTP_CHUNK_SENT. */ sdc->sdh_payload_id = msg_hdr->smh_ppid; if (mdblk->b_cont == NULL) { mdblk->b_cont = chunk_hdr; SCTP_DATA_SET_BBIT(sdc); } else { mp = mdblk->b_cont; while (mp->b_next != NULL) mp = mp->b_next; mp->b_next = chunk_hdr; } bytes_to_send -= (chunksize - count); if (chunk_mp != NULL) { next: count = chunksize = fp->sfa_pmss - sizeof (*sdc); goto nextchunk; } SCTP_DATA_SET_EBIT(sdc); sctp->sctp_xmit_unsent = mdblk->b_next; if (mdblk->b_next == NULL) { sctp->sctp_xmit_unsent_tail = NULL; } mdblk->b_next = NULL; if (sctp->sctp_xmit_tail == NULL) { sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; } else { mp = sctp->sctp_xmit_tail; while (mp->b_next != NULL) mp = mp->b_next; mp->b_next = mdblk; mdblk->b_prev = mp; } try_next: if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { mdblk = sctp->sctp_xmit_unsent; fp1 = SCTP_CHUNK_DEST(mdblk); if (fp1 == NULL) fp1 = sctp->sctp_current; if (fp == fp1) { size_t len = MBLKL(mdblk->b_cont); if ((count > 0) && ((len > fp->sfa_pmss - sizeof (*sdc)) || (len <= count))) { count -= sizeof (*sdc); count = chunksize = count - (count & 0x3); } else { count = chunksize = fp->sfa_pmss - sizeof (*sdc); } } else { if (fp1->isv4) xtralen = sctp->sctp_hdr_len; else xtralen = sctp->sctp_hdr6_len; xtralen += sctp_wroff_xtra + sizeof (*sdc); count = chunksize = fp1->sfa_pmss - sizeof (*sdc); fp = fp1; } goto nextmsg; } } void sctp_free_msg(mblk_t *ump) { mblk_t *mp, *nmp; for (mp = ump->b_cont; mp; mp = nmp) { nmp = mp->b_next; mp->b_next = mp->b_prev = NULL; freemsg(mp); } ASSERT(!ump->b_prev); ump->b_next = NULL; freeb(ump); } mblk_t * sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, int *error) { int hdrlen; char *hdr; int isv4 = fp->isv4; if (error != NULL) *error = 0; if (isv4) { hdrlen = sctp->sctp_hdr_len; hdr = sctp->sctp_iphc; } else { hdrlen = sctp->sctp_hdr6_len; hdr = sctp->sctp_iphc6; } /* * A null fp->ire could mean that the address is 'down'. Similarly, * it is possible that the address went down, we tried to send an * heartbeat and ended up setting fp->saddr as unspec because we * didn't have any usable source address. In either case * sctp_get_ire() will try find an IRE, if available, and set * the source address, if needed. If we still don't have any * usable source address, fp->state will be SCTP_FADDRS_UNREACH and * we return EHOSTUNREACH. */ if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { sctp_get_ire(sctp, fp); if (fp->state == SCTP_FADDRS_UNREACH) { if (error != NULL) *error = EHOSTUNREACH; return (NULL); } } /* Copy in IP header. */ if ((mp->b_rptr - mp->b_datap->db_base) < (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { mblk_t *nmp; /* * This can happen if IP headers are adjusted after * data was moved into chunks, or during retransmission, * or things like snoop is running. */ nmp = allocb_cred(sctp_wroff_xtra + hdrlen + sacklen, CONN_CRED(sctp->sctp_connp)); if (nmp == NULL) { if (error != NULL) *error = ENOMEM; return (NULL); } nmp->b_rptr += sctp_wroff_xtra; nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; nmp->b_cont = mp; mp = nmp; } else { mp->b_rptr -= (hdrlen + sacklen); mblk_setcred(mp, CONN_CRED(sctp->sctp_connp)); } bcopy(hdr, mp->b_rptr, hdrlen); if (sacklen) { sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); } if (fp != sctp->sctp_current) { /* change addresses in header */ if (isv4) { ipha_t *iph = (ipha_t *)mp->b_rptr; IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { IN6_V4MAPPED_TO_IPADDR(&fp->saddr, iph->ipha_src); } else if (sctp->sctp_bound_to_all) { iph->ipha_src = INADDR_ANY; } } else { ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; } else if (sctp->sctp_bound_to_all) { V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src); } } } /* * IP will not free this IRE if it is condemned. SCTP needs to * free it. */ if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { IRE_REFRELE_NOTR(fp->ire); fp->ire = NULL; } /* Stash the conn and ire ptr info for IP */ SCTP_STASH_IPINFO(mp, fp->ire); return (mp); } /* * SCTP requires every chunk to be padded so that the total length * is a multiple of SCTP_ALIGN. This function returns a mblk with * the specified pad length. */ static mblk_t * sctp_get_padding(int pad) { mblk_t *fill; ASSERT(pad < SCTP_ALIGN); if ((fill = dupb(sctp_pad_mp)) != NULL) { fill->b_wptr += pad; return (fill); } /* * The memory saving path of reusing the sctp_pad_mp * fails may be because it has been dupb() too * many times (DBLK_REFMAX). Use the memory consuming * path of allocating the pad mblk. */ if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ *(int32_t *)fill->b_rptr = 0; fill->b_wptr += pad; } return (fill); } static mblk_t * sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) { mblk_t *meta; mblk_t *start_mp = NULL; mblk_t *end_mp = NULL; mblk_t *mp, *nmp; mblk_t *fill; sctp_data_hdr_t *sdh; int msglen; int extra; sctp_msg_hdr_t *msg_hdr; sctp_faddr_t *old_fp = NULL; sctp_faddr_t *chunk_fp; for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; if (SCTP_IS_MSG_ABANDONED(meta) || SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { continue; } for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { if (SCTP_CHUNK_WANT_REXMIT(mp)) { /* * Use the same peer address to do fast * retransmission. If the original peer * address is dead, switch to the current * one. Record the old one so that we * will pick the chunks sent to the old * one for fast retransmission. */ chunk_fp = SCTP_CHUNK_DEST(mp); if (*fp == NULL) { *fp = chunk_fp; if ((*fp)->state != SCTP_FADDRS_ALIVE) { old_fp = *fp; *fp = sctp->sctp_current; } } else if (old_fp == NULL && *fp != chunk_fp) { continue; } else if (old_fp != NULL && old_fp != chunk_fp) { continue; } sdh = (sctp_data_hdr_t *)mp->b_rptr; msglen = ntohs(sdh->sdh_len); if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { extra = SCTP_ALIGN - extra; } /* * We still return at least the first message * even if that message cannot fit in as * PMTU may have changed. */ if (*total + msglen + extra > (*fp)->sfa_pmss && start_mp != NULL) { return (start_mp); } if ((nmp = dupmsg(mp)) == NULL) return (start_mp); if (extra > 0) { fill = sctp_get_padding(extra); if (fill != NULL) { linkb(nmp, fill); } else { return (start_mp); } } BUMP_MIB(&sctp_mib, sctpOutFastRetrans); BUMP_LOCAL(sctp->sctp_rxtchunks); SCTP_CHUNK_CLEAR_REXMIT(mp); if (start_mp == NULL) { start_mp = nmp; } else { linkb(end_mp, nmp); } end_mp = nmp; *total += msglen + extra; dprint(2, ("sctp_find_fast_rexmit_mblks: " "tsn %x\n", sdh->sdh_tsn)); } } } /* Clear the flag as there is no more message to be fast rexmitted. */ sctp->sctp_chk_fast_rexmit = B_FALSE; return (start_mp); } /* A debug function just to make sure that a mblk chain is not broken */ #ifdef DEBUG static boolean_t sctp_verify_chain(mblk_t *head, mblk_t *tail) { mblk_t *mp = head; if (head == NULL || tail == NULL) return (B_TRUE); while (mp != NULL) { if (mp == tail) return (B_TRUE); mp = mp->b_next; } return (B_FALSE); } #endif /* * Gets the next unsent chunk to transmit. Messages that are abandoned are * skipped. A message can be abandoned if it has a non-zero timetolive and * transmission has not yet started or if it is a partially reliable * message and its time is up (assuming we are PR-SCTP aware). * 'cansend' is used to determine if need to try and chunkify messages from * the unsent list, if any, and also as an input to sctp_chunkify() if so. * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend' * will be set to 0. */ mblk_t * sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp) { mblk_t *mp1; sctp_msg_hdr_t *msg_hdr; mblk_t *tmp_meta; sctp_faddr_t *fp1; ASSERT(error != NULL && mp != NULL); *error = 0; ASSERT(sctp->sctp_current != NULL); chunkified: while (meta != NULL) { tmp_meta = meta->b_next; msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; mp1 = meta->b_cont; if (SCTP_IS_MSG_ABANDONED(meta)) goto next_msg; if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { while (mp1 != NULL) { if (SCTP_CHUNK_CANSEND(mp1)) { *mp = mp1; #ifdef DEBUG ASSERT(sctp_verify_chain( sctp->sctp_xmit_head, meta)); #endif return (meta); } mp1 = mp1->b_next; } goto next_msg; } /* * If we come here and the first chunk is sent, then we * we are PR-SCTP aware, in which case if the cumulative * TSN has moved upto or beyond the first chunk (which * means all the previous messages have been cumulative * SACK'd), then we send a Forward TSN with the last * chunk that was sent in this message. If we can't send * a Forward TSN because previous non-abandoned messages * have not been acked then we will defer the Forward TSN * to sctp_rexmit() or sctp_cumack(). */ if (SCTP_CHUNK_ISSENT(mp1)) { *error = sctp_check_abandoned_msg(sctp, meta); if (*error != 0) { #ifdef DEBUG ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); #endif return (NULL); } goto next_msg; } BUMP_LOCAL(sctp->sctp_prsctpdrop); ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); if (meta->b_prev == NULL) { ASSERT(sctp->sctp_xmit_head == meta); sctp->sctp_xmit_head = tmp_meta; if (sctp->sctp_xmit_tail == meta) sctp->sctp_xmit_tail = tmp_meta; meta->b_next = NULL; if (tmp_meta != NULL) tmp_meta->b_prev = NULL; } else if (meta->b_next == NULL) { if (sctp->sctp_xmit_tail == meta) sctp->sctp_xmit_tail = meta->b_prev; meta->b_prev->b_next = NULL; meta->b_prev = NULL; } else { meta->b_prev->b_next = tmp_meta; tmp_meta->b_prev = meta->b_prev; if (sctp->sctp_xmit_tail == meta) sctp->sctp_xmit_tail = tmp_meta; meta->b_prev = NULL; meta->b_next = NULL; } sctp->sctp_unsent -= msg_hdr->smh_msglen; /* * Update ULP the amount of queued data, which is * sent-unack'ed + unsent. */ if (!SCTP_IS_DETACHED(sctp)) { sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, sctp->sctp_unacked + sctp->sctp_unsent); } sctp_sendfail_event(sctp, meta, 0, B_TRUE); next_msg: meta = tmp_meta; } /* chunkify, if needed */ if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { ASSERT(sctp->sctp_unsent > 0); if (fp == NULL) { fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) fp = sctp->sctp_current; } else { /* * If user specified destination, try to honor that. */ fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && fp1 != fp) { goto chunk_done; } } sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend); if ((meta = sctp->sctp_xmit_tail) == NULL) goto chunk_done; /* * sctp_chunkify() won't advance sctp_xmit_tail if it adds * new chunk(s) to the tail, so we need to skip the * sctp_xmit_tail, which would have already been processed. * This could happen when there is unacked chunks, but * nothing new to send. * When sctp_chunkify() is called when the transmit queue * is empty then we need to start from sctp_xmit_tail. */ if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { #ifdef DEBUG mp1 = sctp->sctp_xmit_tail->b_cont; while (mp1 != NULL) { ASSERT(!SCTP_CHUNK_CANSEND(mp1)); mp1 = mp1->b_next; } #endif if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) goto chunk_done; } goto chunkified; } chunk_done: #ifdef DEBUG ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); #endif return (NULL); } void sctp_fast_rexmit(sctp_t *sctp) { mblk_t *mp, *head; int pktlen = 0; sctp_faddr_t *fp = NULL; ASSERT(sctp->sctp_xmit_head != NULL); mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); if (mp == NULL) { SCTP_KSTAT(sctp_fr_not_found); return; } if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { freemsg(mp); SCTP_KSTAT(sctp_fr_add_hdr); return; } if ((pktlen > fp->sfa_pmss) && fp->isv4) { ipha_t *iph = (ipha_t *)head->b_rptr; iph->ipha_fragment_offset_and_flags = 0; } sctp_set_iplen(sctp, head); sctp_add_sendq(sctp, head); sctp->sctp_active = fp->lastactive = lbolt64; } void sctp_output(sctp_t *sctp) { mblk_t *mp = NULL; mblk_t *nmp; mblk_t *head; mblk_t *meta = sctp->sctp_xmit_tail; mblk_t *fill = NULL; uint16_t chunklen; uint32_t cansend; int32_t seglen; int32_t xtralen; int32_t sacklen; int32_t pad = 0; int32_t pathmax; int extra; int64_t now = lbolt64; sctp_faddr_t *fp; sctp_faddr_t *lfp; sctp_data_hdr_t *sdc; int error; boolean_t notsent = B_TRUE; if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { sacklen = 0; } else { /* send a SACK chunk */ sacklen = sizeof (sctp_chunk_hdr_t) + sizeof (sctp_sack_chunk_t) + (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); lfp = sctp->sctp_lastdata; ASSERT(lfp != NULL); if (lfp->state != SCTP_FADDRS_ALIVE) lfp = sctp->sctp_current; } cansend = sctp->sctp_frwnd; if (sctp->sctp_unsent < cansend) cansend = sctp->sctp_unsent; if ((cansend < sctp->sctp_current->sfa_pmss / 2) && sctp->sctp_unacked && (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && !sctp->sctp_ndelay) { head = NULL; fp = sctp->sctp_current; goto unsent_data; } if (meta != NULL) mp = meta->b_cont; while (cansend > 0) { pad = 0; /* * Find first segment eligible for transmit. */ while (mp != NULL) { if (SCTP_CHUNK_CANSEND(mp)) break; mp = mp->b_next; } if (mp == NULL) { meta = sctp_get_msg_to_send(sctp, &mp, meta == NULL ? NULL : meta->b_next, &error, sacklen, cansend, NULL); if (error != 0 || meta == NULL) { head = NULL; fp = sctp->sctp_current; goto unsent_data; } sctp->sctp_xmit_tail = meta; } sdc = (sctp_data_hdr_t *)mp->b_rptr; seglen = ntohs(sdc->sdh_len); xtralen = sizeof (*sdc); chunklen = seglen - xtralen; /* * Check rwnd. */ if (chunklen > cansend) { head = NULL; fp = SCTP_CHUNK_DEST(meta); if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) fp = sctp->sctp_current; goto unsent_data; } if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) extra = SCTP_ALIGN - extra; /* * Pick destination address, and check cwnd. */ if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && (seglen + sacklen + extra <= lfp->sfa_pmss)) { /* * Only include SACK chunk if it can be bundled * with a data chunk, and sent to sctp_lastdata. */ pathmax = lfp->cwnd - lfp->suna; fp = lfp; if ((nmp = dupmsg(mp)) == NULL) { head = NULL; goto unsent_data; } SCTP_CHUNK_CLEAR_FLAGS(nmp); head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, &error); if (head == NULL) { /* * If none of the source addresses are * available (i.e error == EHOSTUNREACH), * pretend we have sent the data. We will * eventually time out trying to retramsmit * the data if the interface never comes up. * If we have already sent some stuff (i.e., * notsent is B_FALSE) then we are fine, else * just mark this packet as sent. */ if (notsent && error == EHOSTUNREACH) { SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); } freemsg(nmp); SCTP_KSTAT(sctp_output_failed); goto unsent_data; } seglen += sacklen; xtralen += sacklen; sacklen = 0; } else { fp = SCTP_CHUNK_DEST(meta); if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) fp = sctp->sctp_current; /* * If we haven't sent data to this destination for * a while, do slow start again. */ if (now - fp->lastactive > fp->rto) { fp->cwnd = sctp_slow_start_after_idle * fp->sfa_pmss; } pathmax = fp->cwnd - fp->suna; if (seglen + extra > pathmax) { head = NULL; goto unsent_data; } if ((nmp = dupmsg(mp)) == NULL) { head = NULL; goto unsent_data; } SCTP_CHUNK_CLEAR_FLAGS(nmp); head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); if (head == NULL) { /* * If none of the source addresses are * available (i.e error == EHOSTUNREACH), * pretend we have sent the data. We will * eventually time out trying to retramsmit * the data if the interface never comes up. * If we have already sent some stuff (i.e., * notsent is B_FALSE) then we are fine, else * just mark this packet as sent. */ if (notsent && error == EHOSTUNREACH) { SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); } freemsg(nmp); SCTP_KSTAT(sctp_output_failed); goto unsent_data; } } fp->lastactive = now; if (pathmax > fp->sfa_pmss) pathmax = fp->sfa_pmss; SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); mp = mp->b_next; /* Use this chunk to measure RTT? */ if (sctp->sctp_out_time == 0) { sctp->sctp_out_time = now; sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); } if (extra > 0) { fill = sctp_get_padding(extra); if (fill != NULL) { linkb(head, fill); pad = extra; seglen += extra; } else { goto unsent_data; } } /* See if we can bundle more. */ while (seglen < pathmax) { int32_t new_len; int32_t new_xtralen; while (mp != NULL) { if (SCTP_CHUNK_CANSEND(mp)) break; mp = mp->b_next; } if (mp == NULL) { meta = sctp_get_msg_to_send(sctp, &mp, meta->b_next, &error, seglen, (seglen - xtralen) >= cansend ? 0 : cansend - seglen, fp); if (error != 0 || meta == NULL) break; sctp->sctp_xmit_tail = meta; } ASSERT(mp != NULL); if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && fp != SCTP_CHUNK_DEST(meta)) { break; } sdc = (sctp_data_hdr_t *)mp->b_rptr; chunklen = ntohs(sdc->sdh_len); if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) extra = SCTP_ALIGN - extra; new_len = seglen + chunklen; new_xtralen = xtralen + sizeof (*sdc); chunklen -= sizeof (*sdc); if (new_len - new_xtralen > cansend || new_len + extra > pathmax) { break; } if ((nmp = dupmsg(mp)) == NULL) break; if (extra > 0) { fill = sctp_get_padding(extra); if (fill != NULL) { pad += extra; new_len += extra; linkb(nmp, fill); } else { freemsg(nmp); break; } } seglen = new_len; xtralen = new_xtralen; SCTP_CHUNK_CLEAR_FLAGS(nmp); SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); linkb(head, nmp); mp = mp->b_next; } if ((seglen > fp->sfa_pmss) && fp->isv4) { ipha_t *iph = (ipha_t *)head->b_rptr; /* * Path MTU is different from what we thought it would * be when we created chunks, or IP headers have grown. * Need to clear the DF bit. */ iph->ipha_fragment_offset_and_flags = 0; } /* xmit segment */ ASSERT(cansend >= seglen - pad - xtralen); cansend -= (seglen - pad - xtralen); dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", seglen - xtralen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, cansend, sctp->sctp_lastack_rxd)); sctp_set_iplen(sctp, head); sctp_add_sendq(sctp, head); /* arm rto timer (if not set) */ if (!fp->timer_running) SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); notsent = B_FALSE; } sctp->sctp_active = now; return; unsent_data: /* arm persist timer (if rto timer not set) */ if (!fp->timer_running) SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); if (head != NULL) freemsg(head); } /* * The following two functions initialize and destroy the cache * associated with the sets used for PR-SCTP. */ void sctp_ftsn_sets_init(void) { sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, NULL, 0); } void sctp_ftsn_sets_fini(void) { kmem_cache_destroy(sctp_kmem_ftsn_set_cache); } /* Free PR-SCTP sets */ void sctp_free_ftsn_set(sctp_ftsn_set_t *s) { sctp_ftsn_set_t *p; while (s != NULL) { p = s->next; s->next = NULL; kmem_cache_free(sctp_kmem_ftsn_set_cache, s); s = p; } } /* * Given a message meta block, meta, this routine creates or modifies * the set that will be used to generate a Forward TSN chunk. If the * entry for stream id, sid, for this message already exists, the * sequence number, ssn, is updated if it is greater than the existing * one. If an entry for this sid does not exist, one is created if * the size does not exceed fp->sfa_pmss. We return false in case * or an error. */ boolean_t sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, uint_t *nsets, uint32_t *slen) { sctp_ftsn_set_t *p; sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; uint16_t sid = htons(msg_hdr->smh_sid); /* msg_hdr->smh_ssn is already in NBO */ uint16_t ssn = msg_hdr->smh_ssn; ASSERT(s != NULL && nsets != NULL); ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); if (*s == NULL) { ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); if (*s == NULL) return (B_FALSE); (*s)->ftsn_entries.ftsn_sid = sid; (*s)->ftsn_entries.ftsn_ssn = ssn; (*s)->next = NULL; *nsets = 1; *slen += sizeof (uint32_t); return (B_TRUE); } for (p = *s; p->next != NULL; p = p->next) { if (p->ftsn_entries.ftsn_sid == sid) { if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) p->ftsn_entries.ftsn_ssn = ssn; return (B_TRUE); } } /* the last one */ if (p->ftsn_entries.ftsn_sid == sid) { if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) p->ftsn_entries.ftsn_ssn = ssn; } else { if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) return (B_FALSE); p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); if (p->next == NULL) return (B_FALSE); p = p->next; p->ftsn_entries.ftsn_sid = sid; p->ftsn_entries.ftsn_ssn = ssn; p->next = NULL; (*nsets)++; *slen += sizeof (uint32_t); } return (B_TRUE); } /* * Given a set of stream id - sequence number pairs, this routing creates * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) * for the chunk is obtained from sctp->sctp_adv_pap. The caller * will add the IP/SCTP header. */ mblk_t * sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, uint_t nsets, uint32_t seglen) { mblk_t *ftsn_mp; sctp_chunk_hdr_t *ch_hdr; uint32_t *advtsn; uint16_t schlen; size_t xtralen; ftsn_entry_t *ftsn_entry; seglen += sizeof (sctp_chunk_hdr_t); if (fp->isv4) xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra; else xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra; ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp)); if (ftsn_mp == NULL) return (NULL); ftsn_mp->b_rptr += xtralen; ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; ch_hdr->sch_id = CHUNK_FORWARD_TSN; ch_hdr->sch_flags = 0; /* * The cast here should not be an issue since seglen is * the length of the Forward TSN chunk. */ schlen = (uint16_t)seglen; U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); advtsn = (uint32_t *)(ch_hdr + 1); U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); ftsn_entry = (ftsn_entry_t *)(advtsn + 1); while (nsets > 0) { ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; ftsn_entry++; sets = sets->next; nsets--; } return (ftsn_mp); } /* * Given a starting message, the routine steps through all the * messages whose TSN is less than sctp->sctp_adv_pap and creates * ftsn sets. The ftsn sets is then used to create an Forward TSN * chunk. All the messages, that have chunks that are included in the * ftsn sets, are flagged abandonded. If a message is partially sent * and is deemed abandoned, all remaining unsent chunks are marked * abandoned and are deducted from sctp_unsent. */ void sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, sctp_faddr_t *fp, uint32_t *seglen) { mblk_t *mp1 = mp; mblk_t *mp_head = mp; mblk_t *meta_head = meta; mblk_t *head; sctp_ftsn_set_t *sets = NULL; uint_t nsets = 0; uint16_t clen; sctp_data_hdr_t *sdc; uint32_t sacklen; uint32_t adv_pap = sctp->sctp_adv_pap; uint32_t unsent = 0; boolean_t ubit; *seglen = sizeof (uint32_t); sdc = (sctp_data_hdr_t *)mp1->b_rptr; while (meta != NULL && SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { /* * Skip adding FTSN sets for un-ordered messages as they do * not have SSNs. */ ubit = SCTP_DATA_GET_UBIT(sdc); if (!ubit && !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { meta = NULL; sctp->sctp_adv_pap = adv_pap; goto ftsn_done; } while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { sdc = (sctp_data_hdr_t *)mp1->b_rptr; adv_pap = ntohl(sdc->sdh_tsn); mp1 = mp1->b_next; } meta = meta->b_next; if (meta != NULL) { mp1 = meta->b_cont; if (!SCTP_CHUNK_ISSENT(mp1)) break; sdc = (sctp_data_hdr_t *)mp1->b_rptr; } } ftsn_done: /* * Can't compare with sets == NULL, since we don't add any * sets for un-ordered messages. */ if (meta == meta_head) return; *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); sctp_free_ftsn_set(sets); if (*nmp == NULL) return; if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { sacklen = 0; } else { sacklen = sizeof (sctp_chunk_hdr_t) + sizeof (sctp_sack_chunk_t) + (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { /* piggybacked SACK doesn't fit */ sacklen = 0; } else { fp = sctp->sctp_lastdata; } } head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); if (head == NULL) { freemsg(*nmp); *nmp = NULL; SCTP_KSTAT(sctp_send_ftsn_failed); return; } *seglen += sacklen; *nmp = head; /* * XXXNeed to optimise this, the reason it is done here is so * that we don't have to undo in case of failure. */ mp1 = mp_head; sdc = (sctp_data_hdr_t *)mp1->b_rptr; while (meta_head != NULL && SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { if (!SCTP_IS_MSG_ABANDONED(meta_head)) SCTP_MSG_SET_ABANDONED(meta_head); while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { sdc = (sctp_data_hdr_t *)mp1->b_rptr; if (!SCTP_CHUNK_ISACKED(mp1)) { clen = ntohs(sdc->sdh_len) - sizeof (*sdc); SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, meta_head); } mp1 = mp1->b_next; } while (mp1 != NULL) { sdc = (sctp_data_hdr_t *)mp1->b_rptr; if (!SCTP_CHUNK_ABANDONED(mp1)) { ASSERT(!SCTP_CHUNK_ISSENT(mp1)); unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); SCTP_ABANDON_CHUNK(mp1); } mp1 = mp1->b_next; } meta_head = meta_head->b_next; if (meta_head != NULL) { mp1 = meta_head->b_cont; if (!SCTP_CHUNK_ISSENT(mp1)) break; sdc = (sctp_data_hdr_t *)mp1->b_rptr; } } if (unsent > 0) { ASSERT(sctp->sctp_unsent >= unsent); sctp->sctp_unsent -= unsent; /* * Update ULP the amount of queued data, which is * sent-unack'ed + unsent. */ if (!SCTP_IS_DETACHED(sctp)) { sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, sctp->sctp_unacked + sctp->sctp_unsent); } } } /* * This function steps through messages starting at meta and checks if * the message is abandoned. It stops when it hits an unsent chunk or * a message that has all its chunk acked. This is the only place * where the sctp_adv_pap is moved forward to indicated abandoned * messages. */ void sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) { uint32_t tsn = sctp->sctp_adv_pap; sctp_data_hdr_t *sdc; sctp_msg_hdr_t *msg_hdr; ASSERT(mp != NULL); sdc = (sctp_data_hdr_t *)mp->b_rptr; ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; if (!SCTP_IS_MSG_ABANDONED(meta) && !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { return; } while (meta != NULL) { while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { sdc = (sctp_data_hdr_t *)mp->b_rptr; tsn = ntohl(sdc->sdh_tsn); mp = mp->b_next; } if (mp != NULL) break; /* * We continue checking for successive messages only if there * is a chunk marked for retransmission. Else, we might * end up sending FTSN prematurely for chunks that have been * sent, but not yet acked. */ if ((meta = meta->b_next) != NULL) { msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; if (!SCTP_IS_MSG_ABANDONED(meta) && !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { break; } for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { if (!SCTP_CHUNK_ISSENT(mp)) { sctp->sctp_adv_pap = tsn; return; } if (SCTP_CHUNK_WANT_REXMIT(mp)) break; } if (mp == NULL) break; } } sctp->sctp_adv_pap = tsn; } /* * Determine if we should bundle a data chunk with the chunk being * retransmitted. We bundle if * * - the chunk is sent to the same destination and unack'ed. * * OR * * - the chunk is unsent, i.e. new data. */ #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ (!SCTP_CHUNK_ABANDONED((mp)) && \ ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ !SCTP_CHUNK_ISACKED(mp))) || \ (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ SCTP_CHUNK_FLAG_SENT))) /* * Retransmit first segment which hasn't been acked with cumtsn or send * a Forward TSN chunk, if appropriate. */ void sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) { mblk_t *mp; mblk_t *nmp = NULL; mblk_t *head; mblk_t *meta = sctp->sctp_xmit_head; mblk_t *fill; uint32_t seglen = 0; uint32_t sacklen; uint16_t chunklen; int extra; sctp_data_hdr_t *sdc; sctp_faddr_t *fp; uint32_t adv_pap = sctp->sctp_adv_pap; boolean_t do_ftsn = B_FALSE; boolean_t ftsn_check = B_TRUE; uint32_t first_ua_tsn; sctp_msg_hdr_t *mhdr; uint32_t tot_wnd; while (meta != NULL) { for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { uint32_t tsn; if (!SCTP_CHUNK_ISSENT(mp)) goto window_probe; /* * We break in the following cases - * * if the advanced peer ack point includes the next * chunk to be retransmited - possibly the Forward * TSN was lost. * * if we are PRSCTP aware and the next chunk to be * retransmitted is now abandoned * * if the next chunk to be retransmitted is for * the dest on which the timer went off. (this * message is not abandoned). * * We check for Forward TSN only for the first * eligible chunk to be retransmitted. The reason * being if the first eligible chunk is skipped (say * it was sent to a destination other than oldfp) * then we cannot advance the cum TSN via Forward * TSN chunk. * * Also, ftsn_check is B_TRUE only for the first * eligible chunk, it will be B_FALSE for all * subsequent candidate messages for retransmission. */ sdc = (sctp_data_hdr_t *)mp->b_rptr; tsn = ntohl(sdc->sdh_tsn); if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { if (sctp->sctp_prsctp_aware && ftsn_check) { if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { ASSERT(sctp->sctp_prsctp_aware); do_ftsn = B_TRUE; goto out; } else { sctp_check_adv_ack_pt(sctp, meta, mp); if (SEQ_GT(sctp->sctp_adv_pap, adv_pap)) { do_ftsn = B_TRUE; goto out; } } ftsn_check = B_FALSE; } if (SCTP_CHUNK_DEST(mp) == oldfp) goto out; } } meta = meta->b_next; if (meta != NULL && sctp->sctp_prsctp_aware) { mhdr = (sctp_msg_hdr_t *)meta->b_rptr; while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { meta = meta->b_next; } } } window_probe: /* * Retransmit fired for a destination which didn't have * any unacked data pending. */ if (!sctp->sctp_unacked && sctp->sctp_unsent) { /* * Send a window probe. Inflate frwnd to allow * sending one segment. */ if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) { sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); } BUMP_MIB(&sctp_mib, sctpOutWinProbe); sctp_output(sctp); } return; out: /* * Enter slowstart for this destination */ oldfp->ssthresh = oldfp->cwnd / 2; if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) oldfp->ssthresh = 2 * oldfp->sfa_pmss; oldfp->cwnd = oldfp->sfa_pmss; oldfp->pba = 0; fp = sctp_rotate_faddr(sctp, oldfp); ASSERT(fp != NULL); sdc = (sctp_data_hdr_t *)mp->b_rptr; first_ua_tsn = ntohl(sdc->sdh_tsn); if (do_ftsn) { sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); if (nmp == NULL) { sctp->sctp_adv_pap = adv_pap; goto restart_timer; } head = nmp; /* * Move to the next unabandoned chunk. XXXCheck if meta will * always be marked abandoned. */ while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) meta = meta->b_next; if (meta != NULL) mp = mp->b_cont; else mp = NULL; goto try_bundle; } seglen = ntohs(sdc->sdh_len); chunklen = seglen - sizeof (*sdc); if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) extra = SCTP_ALIGN - extra; /* Find out if we need to piggyback SACK. */ if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { sacklen = 0; } else { sacklen = sizeof (sctp_chunk_hdr_t) + sizeof (sctp_sack_chunk_t) + (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { /* piggybacked SACK doesn't fit */ sacklen = 0; } else { /* * OK, we have room to send SACK back. But we * should send it back to the last fp where we * receive data from, unless sctp_lastdata equals * oldfp, then we should probably not send it * back to that fp. Also we should check that * the fp is alive. */ if (sctp->sctp_lastdata != oldfp && sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { fp = sctp->sctp_lastdata; } } } /* * Cancel RTT measurement if the retransmitted TSN is before the * TSN used for timimg. */ if (sctp->sctp_out_time != 0 && SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { sctp->sctp_out_time = 0; } /* Clear the counter as the RTT calculation may be off. */ fp->rtt_updates = 0; oldfp->rtt_updates = 0; /* * After a timeout, we should change the current faddr so that * new chunks will be sent to the alternate address. */ sctp_set_faddr_current(sctp, fp); nmp = dupmsg(mp); if (nmp == NULL) goto restart_timer; if (extra > 0) { fill = sctp_get_padding(extra); if (fill != NULL) { linkb(nmp, fill); seglen += extra; } else { freemsg(nmp); goto restart_timer; } } SCTP_CHUNK_CLEAR_FLAGS(nmp); head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); if (head == NULL) { freemsg(nmp); SCTP_KSTAT(sctp_rexmit_failed); goto restart_timer; } seglen += sacklen; SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); mp = mp->b_next; /* Check how much more we can send. */ tot_wnd = MIN(fp->cwnd, sctp->sctp_frwnd); /* * If the number of outstanding bytes is more than what we are * allowed to send, stop. */ if (tot_wnd <= chunklen || tot_wnd < fp->suna + chunklen) goto done_bundle; else tot_wnd -= chunklen; try_bundle: while (seglen < fp->sfa_pmss) { int32_t new_len; /* Go through the list to find more chunks to be bundled. */ while (mp != NULL) { /* Check if the chunk can be bundled. */ if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) break; mp = mp->b_next; } /* Go to the next message. */ if (mp == NULL) { for (meta = meta->b_next; meta != NULL; meta = meta->b_next) { mhdr = (sctp_msg_hdr_t *)meta->b_rptr; if (SCTP_IS_MSG_ABANDONED(meta) || SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { continue; } mp = meta->b_cont; goto try_bundle; } /* No more chunk to be bundled. */ break; } sdc = (sctp_data_hdr_t *)mp->b_rptr; new_len = ntohs(sdc->sdh_len); chunklen = new_len - sizeof (*sdc); if (chunklen > tot_wnd) break; if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) extra = SCTP_ALIGN - extra; if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) break; if ((nmp = dupmsg(mp)) == NULL) break; if (extra > 0) { fill = sctp_get_padding(extra); if (fill != NULL) { linkb(nmp, fill); } else { freemsg(nmp); break; } } linkb(head, nmp); SCTP_CHUNK_CLEAR_FLAGS(nmp); SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); seglen = new_len; tot_wnd -= chunklen; mp = mp->b_next; } done_bundle: if ((seglen > fp->sfa_pmss) && fp->isv4) { ipha_t *iph = (ipha_t *)head->b_rptr; /* * Path MTU is different from path we thought it would * be when we created chunks, or IP headers have grown. * Need to clear the DF bit. */ iph->ipha_fragment_offset_and_flags = 0; } dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); sctp->sctp_rexmitting = B_TRUE; sctp->sctp_rxt_nxttsn = first_ua_tsn; sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; sctp_set_iplen(sctp, head); sctp_add_sendq(sctp, head); /* * Restart the oldfp timer with exponential backoff and * the new fp timer for the retransmitted chunks. */ restart_timer: oldfp->strikes++; sctp->sctp_strikes++; SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); if (oldfp->suna != 0) SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); sctp->sctp_active = lbolt64; /* * Should we restart the timer of the new fp? If there is * outstanding data to the new fp, the timer should be * running already. So restarting it means that the timer * will fire later for those outstanding data. But if * we don't restart it, the timer will fire too early for the * just retransmitted chunks to the new fp. The reason is that we * don't keep a timestamp on when a chunk is retransmitted. * So when the timer fires, it will just search for the * chunk with the earliest TSN sent to new fp. This probably * is the chunk we just retransmitted. So for now, let's * be conservative and restart the timer of the new fp. */ SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); } /* * The SCTP write put procedure called from IP. */ void sctp_wput(queue_t *q, mblk_t *mp) { uchar_t *rptr; t_scalar_t type; switch (mp->b_datap->db_type) { case M_IOCTL: sctp_wput_ioctl(q, mp); break; case M_DATA: /* Should be handled in sctp_output() */ ASSERT(0); freemsg(mp); break; case M_PROTO: case M_PCPROTO: rptr = mp->b_rptr; if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) { type = ((union T_primitives *)rptr)->type; /* * There is no "standard" way on how to respond * to T_CAPABILITY_REQ if a module does not * understand it. And the current TI mod * has problems handling an error ack. So we * catch the request here and reply with a response * which the TI mod knows how to respond to. */ switch (type) { case T_CAPABILITY_REQ: (void) putnextctl1(RD(q), M_ERROR, EPROTO); break; default: if ((mp = mi_tpi_err_ack_alloc(mp, TNOTSUPPORT, 0)) != NULL) { qreply(q, mp); return; } } } /* FALLTHRU */ default: freemsg(mp); return; } } /* * This function is called by sctp_ss_rexmit() to create a packet * to be retransmitted to the given fp. The given meta and mp * parameters are respectively the sctp_msg_hdr_t and the mblk of the * first chunk to be retransmitted. */ static mblk_t * sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, uint_t *packet_len) { uint32_t seglen = 0; uint16_t chunklen; int extra; mblk_t *nmp; mblk_t *head; mblk_t *fill; sctp_data_hdr_t *sdc; sctp_msg_hdr_t *mhdr; sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; seglen = ntohs(sdc->sdh_len); chunklen = seglen - sizeof (*sdc); if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) extra = SCTP_ALIGN - extra; nmp = dupmsg(*mp); if (nmp == NULL) return (NULL); if (extra > 0) { fill = sctp_get_padding(extra); if (fill != NULL) { linkb(nmp, fill); seglen += extra; } else { freemsg(nmp); return (NULL); } } SCTP_CHUNK_CLEAR_FLAGS(nmp); head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); if (head == NULL) { freemsg(nmp); return (NULL); } SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); *mp = (*mp)->b_next; try_bundle: while (seglen < fp->sfa_pmss) { int32_t new_len; /* * Go through the list to find more chunks to be bundled. * We should only retransmit sent by unack'ed chunks. Since * they were sent before, the peer's receive window should * be able to receive them. */ while (*mp != NULL) { /* Check if the chunk can be bundled. */ if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) break; *mp = (*mp)->b_next; } /* Go to the next message. */ if (*mp == NULL) { for (*meta = (*meta)->b_next; *meta != NULL; *meta = (*meta)->b_next) { mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; if (SCTP_IS_MSG_ABANDONED(*meta) || SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, sctp)) { continue; } *mp = (*meta)->b_cont; goto try_bundle; } /* No more chunk to be bundled. */ break; } sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) break; new_len = ntohs(sdc->sdh_len); chunklen = new_len - sizeof (*sdc); if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) extra = SCTP_ALIGN - extra; if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) break; if ((nmp = dupmsg(*mp)) == NULL) break; if (extra > 0) { fill = sctp_get_padding(extra); if (fill != NULL) { linkb(nmp, fill); } else { freemsg(nmp); break; } } linkb(head, nmp); SCTP_CHUNK_CLEAR_FLAGS(nmp); SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); seglen = new_len; *mp = (*mp)->b_next; } *packet_len = seglen; return (head); } /* * sctp_ss_rexmit() is called when we get a SACK after a timeout which * advances the cum_tsn but the cum_tsn is still less than what we have sent * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" * SACK. We retransmit unacked chunks without having to wait for another * timeout. The rationale is that the SACK should not be "partial" if all the * lost chunks have been retransmitted. Since the SACK is "partial," * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still * be missing. It is better for us to retransmit them now instead * of waiting for a timeout. */ void sctp_ss_rexmit(sctp_t *sctp) { mblk_t *meta; mblk_t *mp; mblk_t *pkt; sctp_faddr_t *fp; uint_t pkt_len; uint32_t tot_wnd; sctp_data_hdr_t *sdc; int burst; /* * If the last cum ack is smaller than what we have just * retransmitted, simply return. */ if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; else return; ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); /* * After a timer fires, sctp_current should be set to the new * fp where the retransmitted chunks are sent. */ fp = sctp->sctp_current; /* * Since we are retransmitting, we can only use cwnd to determine * how much we can send as we were allowed to send those chunks * previously. */ tot_wnd = fp->cwnd; /* So we have sent more than we can, just return. */ if (tot_wnd < fp->suna || tot_wnd - fp->suna < fp->sfa_pmss) return; else tot_wnd -= fp->suna; /* Find the first unack'ed chunk */ for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; if (SCTP_IS_MSG_ABANDONED(meta) || SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { continue; } for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { /* Again, this may not be possible */ if (!SCTP_CHUNK_ISSENT(mp)) return; sdc = (sctp_data_hdr_t *)mp->b_rptr; if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) goto found_msg; } } /* Everything is abandoned... */ return; found_msg: if (!fp->timer_running) SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); if (pkt == NULL) { SCTP_KSTAT(sctp_ss_rexmit_failed); return; } if ((pkt_len > fp->sfa_pmss) && fp->isv4) { ipha_t *iph = (ipha_t *)pkt->b_rptr; /* * Path MTU is different from path we thought it would * be when we created chunks, or IP headers have grown. * Need to clear the DF bit. */ iph->ipha_fragment_offset_and_flags = 0; } sctp_set_iplen(sctp, pkt); sctp_add_sendq(sctp, pkt); /* Check and see if there is more chunk to be retransmitted. */ if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || meta == NULL) return; if (mp == NULL) meta = meta->b_next; if (meta == NULL) return; /* Retransmit another packet if the window allows. */ for (tot_wnd -= pkt_len, burst = sctp_maxburst - 1; meta != NULL && burst > 0; meta = meta->b_next, burst--) { if (mp == NULL) mp = meta->b_cont; for (; mp != NULL; mp = mp->b_next) { /* Again, this may not be possible */ if (!SCTP_CHUNK_ISSENT(mp)) return; if (!SCTP_CHUNK_ISACKED(mp)) goto found_msg; } } }