1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/stream.h> 30 #include <sys/cmn_err.h> 31 #define _SUN_TPI_VERSION 2 32 #include <sys/tihdr.h> 33 #include <sys/socket.h> 34 #include <sys/stropts.h> 35 #include <sys/strsun.h> 36 #include <sys/strsubr.h> 37 #include <sys/socketvar.h> 38 #include <inet/common.h> 39 #include <inet/mi.h> 40 #include <inet/ip.h> 41 #include <inet/ip_ire.h> 42 #include <inet/ip6.h> 43 #include <inet/sctp_ip.h> 44 #include <inet/ipclassifier.h> 45 46 /* 47 * PR-SCTP comments. 48 * 49 * A message can expire before it gets to the transmit list (i.e. it is still 50 * in the unsent list - unchunked), after it gets to the transmit list, but 51 * before transmission has actually started, or after transmission has begun. 52 * Accordingly, we check for the status of a message in sctp_chunkify() when 53 * the message is being transferred from the unsent list to the transmit list; 54 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 55 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 56 * When we nuke a message in sctp_chunkify(), all we need to do is take it 57 * out of the unsent list and update sctp_unsent; when a message is deemed 58 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 59 * list, update sctp_unsent IFF transmission for the message has not yet begun 60 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 61 * message has started, then we cannot just take it out of the list, we need 62 * to send Forward TSN chunk to the peer so that the peer can clear its 63 * fragment list for this message. However, we cannot just send the Forward 64 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 65 * messages preceeding this abandoned message. So, we send a Forward TSN 66 * IFF all messages prior to this abandoned message has been SACKd, if not 67 * we defer sending the Forward TSN to sctp_cumack(), which will check for 68 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 69 * sctp_rexmit() when we check for retransmissions, we need to determine if 70 * the advanced peer ack point can be moved ahead, and if so, send a Forward 71 * TSN to the peer instead of retransmitting the chunk. Note that when 72 * we send a Forward TSN for a message, there may be yet unsent chunks for 73 * this message; we need to mark all such chunks as abandoned, so that 74 * sctp_cumack() can take the message out of the transmit list, additionally 75 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 76 * decremented when a message/chunk is deemed abandoned), sockfs needs to 77 * be notified so that it can adjust its idea of the queued message. 78 */ 79 80 #include "sctp_impl.h" 81 82 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 83 static mblk_t *sctp_chunkify(sctp_t *, int, int, int); 84 85 #ifdef DEBUG 86 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 87 #endif 88 89 /* 90 * Called to allocate a header mblk when sending data to SCTP. 91 * Data will follow in b_cont of this mblk. 92 */ 93 mblk_t * 94 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 95 int flags) 96 { 97 mblk_t *mp; 98 struct T_unitdata_req *tudr; 99 size_t size; 100 int error; 101 102 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 103 size = MAX(size, sizeof (sctp_msg_hdr_t)); 104 if (flags & SCTP_CAN_BLOCK) { 105 mp = allocb_wait(size, BPRI_MED, 0, &error); 106 } else { 107 mp = allocb(size, BPRI_MED); 108 } 109 if (mp) { 110 tudr = (struct T_unitdata_req *)mp->b_rptr; 111 tudr->PRIM_type = T_UNITDATA_REQ; 112 tudr->DEST_length = nlen; 113 tudr->DEST_offset = sizeof (*tudr); 114 tudr->OPT_length = clen; 115 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 116 _TPI_ALIGN_TOPT(nlen)); 117 if (nlen > 0) 118 bcopy(name, tudr + 1, nlen); 119 if (clen > 0) 120 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 121 mp->b_wptr += (tudr ->OPT_offset + clen); 122 mp->b_datap->db_type = M_PROTO; 123 } 124 return (mp); 125 } 126 127 /*ARGSUSED2*/ 128 int 129 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 130 { 131 sctp_faddr_t *fp = NULL; 132 struct T_unitdata_req *tudr; 133 int error = 0; 134 mblk_t *mproto = mp; 135 in6_addr_t *addr; 136 in6_addr_t tmpaddr; 137 uint16_t sid = sctp->sctp_def_stream; 138 uint32_t ppid = sctp->sctp_def_ppid; 139 uint32_t context = sctp->sctp_def_context; 140 uint16_t msg_flags = sctp->sctp_def_flags; 141 sctp_msg_hdr_t *sctp_msg_hdr; 142 uint32_t msg_len = 0; 143 uint32_t timetolive = sctp->sctp_def_timetolive; 144 conn_t *connp = sctp->sctp_connp; 145 146 ASSERT(DB_TYPE(mproto) == M_PROTO); 147 148 mp = mp->b_cont; 149 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 150 151 tudr = (struct T_unitdata_req *)mproto->b_rptr; 152 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 153 154 /* Get destination address, if specified */ 155 if (tudr->DEST_length > 0) { 156 sin_t *sin; 157 sin6_t *sin6; 158 159 sin = (struct sockaddr_in *) 160 (mproto->b_rptr + tudr->DEST_offset); 161 switch (sin->sin_family) { 162 case AF_INET: 163 if (tudr->DEST_length < sizeof (*sin)) { 164 return (EINVAL); 165 } 166 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 167 addr = &tmpaddr; 168 break; 169 case AF_INET6: 170 if (tudr->DEST_length < sizeof (*sin6)) { 171 return (EINVAL); 172 } 173 sin6 = (struct sockaddr_in6 *) 174 (mproto->b_rptr + tudr->DEST_offset); 175 addr = &sin6->sin6_addr; 176 break; 177 default: 178 return (EAFNOSUPPORT); 179 } 180 fp = sctp_lookup_faddr(sctp, addr); 181 if (fp == NULL) { 182 return (EINVAL); 183 } 184 } 185 /* Ancillary Data? */ 186 if (tudr->OPT_length > 0) { 187 struct cmsghdr *cmsg; 188 char *cend; 189 struct sctp_sndrcvinfo *sndrcv; 190 191 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 192 cend = ((char *)cmsg + tudr->OPT_length); 193 ASSERT(cend <= (char *)mproto->b_wptr); 194 195 for (;;) { 196 if ((char *)(cmsg + 1) > cend || 197 ((char *)cmsg + cmsg->cmsg_len) > cend) { 198 break; 199 } 200 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 201 (cmsg->cmsg_type == SCTP_SNDRCV)) { 202 if (cmsg->cmsg_len < 203 (sizeof (*sndrcv) + sizeof (*cmsg))) { 204 return (EINVAL); 205 } 206 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 207 sid = sndrcv->sinfo_stream; 208 msg_flags = sndrcv->sinfo_flags; 209 ppid = sndrcv->sinfo_ppid; 210 context = sndrcv->sinfo_context; 211 timetolive = sndrcv->sinfo_timetolive; 212 break; 213 } 214 if (cmsg->cmsg_len > 0) 215 cmsg = CMSG_NEXT(cmsg); 216 else 217 break; 218 } 219 } 220 if (msg_flags & MSG_ABORT) { 221 if (mp && mp->b_cont) { 222 mblk_t *pump = msgpullup(mp, -1); 223 if (!pump) { 224 return (ENOMEM); 225 } 226 freemsg(mp); 227 mp = pump; 228 mproto->b_cont = mp; 229 } 230 RUN_SCTP(sctp); 231 sctp_user_abort(sctp, mp); 232 freemsg(mproto); 233 goto done2; 234 } 235 if (mp == NULL) 236 goto done; 237 238 RUN_SCTP(sctp); 239 240 /* Reject any new data requests if we are shutting down */ 241 if (sctp->sctp_state > SCTPS_ESTABLISHED || 242 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) { 243 error = EPIPE; 244 goto unlock_done; 245 } 246 247 /* Re-use the mproto to store relevant info. */ 248 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 249 250 mproto->b_rptr = mproto->b_datap->db_base; 251 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 252 253 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 254 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 255 sctp_msg_hdr->smh_context = context; 256 sctp_msg_hdr->smh_sid = sid; 257 sctp_msg_hdr->smh_ppid = ppid; 258 sctp_msg_hdr->smh_flags = msg_flags; 259 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 260 sctp_msg_hdr->smh_tob = lbolt64; 261 for (; mp != NULL; mp = mp->b_cont) 262 msg_len += MBLKL(mp); 263 sctp_msg_hdr->smh_msglen = msg_len; 264 265 /* User requested specific destination */ 266 SCTP_SET_CHUNK_DEST(mproto, fp); 267 268 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 269 sid >= sctp->sctp_num_ostr) { 270 /* Send sendfail event */ 271 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 272 B_FALSE); 273 error = EINVAL; 274 goto unlock_done; 275 } 276 277 /* no data */ 278 if (msg_len == 0) { 279 sctp_sendfail_event(sctp, dupmsg(mproto), 280 SCTP_ERR_NO_USR_DATA, B_FALSE); 281 error = EINVAL; 282 goto unlock_done; 283 } 284 285 /* Add it to the unsent list */ 286 if (sctp->sctp_xmit_unsent == NULL) { 287 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 288 } else { 289 sctp->sctp_xmit_unsent_tail->b_next = mproto; 290 sctp->sctp_xmit_unsent_tail = mproto; 291 } 292 sctp->sctp_unsent += msg_len; 293 BUMP_LOCAL(sctp->sctp_msgcount); 294 /* 295 * Notify sockfs if the tx queue is full. 296 */ 297 if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) { 298 sctp->sctp_txq_full = 1; 299 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE); 300 } 301 if (sctp->sctp_state == SCTPS_ESTABLISHED) 302 sctp_output(sctp, UINT_MAX); 303 done2: 304 WAKE_SCTP(sctp); 305 return (0); 306 unlock_done: 307 WAKE_SCTP(sctp); 308 done: 309 return (error); 310 } 311 312 /* 313 * While there are messages on sctp_xmit_unsent, detach each one. For each: 314 * allocate space for the chunk header, fill in the data chunk, and fill in 315 * the chunk header. Then append it to sctp_xmit_tail. 316 * Return after appending as many bytes as required (bytes_to_send). 317 * We also return if we've appended one or more chunks, and find a subsequent 318 * unsent message is too big to fit in the segment. 319 */ 320 mblk_t * 321 sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send) 322 { 323 mblk_t *mp; 324 mblk_t *chunk_mp; 325 mblk_t *chunk_head; 326 mblk_t *chunk_hdr; 327 mblk_t *chunk_tail = NULL; 328 int count; 329 int chunksize; 330 sctp_data_hdr_t *sdc; 331 mblk_t *mdblk = sctp->sctp_xmit_unsent; 332 sctp_faddr_t *fp; 333 sctp_faddr_t *fp1; 334 size_t xtralen; 335 sctp_msg_hdr_t *msg_hdr; 336 sctp_stack_t *sctps = sctp->sctp_sctps; 337 sctp_msg_hdr_t *next_msg_hdr; 338 size_t nextlen; 339 int remaining_len = mss - firstseg_len; 340 341 ASSERT(remaining_len >= 0); 342 343 fp = SCTP_CHUNK_DEST(mdblk); 344 if (fp == NULL) 345 fp = sctp->sctp_current; 346 if (fp->isv4) 347 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra + 348 sizeof (*sdc); 349 else 350 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra + 351 sizeof (*sdc); 352 count = chunksize = remaining_len - sizeof (*sdc); 353 nextmsg: 354 next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr; 355 nextlen = next_msg_hdr->smh_msglen; 356 /* 357 * Will the entire next message fit in the current packet ? 358 * if not, leave it on the unsent list. 359 */ 360 if ((firstseg_len != 0) && (nextlen > remaining_len)) 361 return (NULL); 362 363 chunk_mp = mdblk->b_cont; 364 365 /* 366 * If this partially chunked, we ignore the next one for now and 367 * use the one already present. For the unchunked bits, we use the 368 * length of the last chunk. 369 */ 370 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 371 int chunk_len; 372 373 ASSERT(chunk_mp->b_next != NULL); 374 mdblk->b_cont = chunk_mp->b_next; 375 chunk_mp->b_next = NULL; 376 SCTP_MSG_CLEAR_CHUNKED(mdblk); 377 mp = mdblk->b_cont; 378 while (mp->b_next != NULL) 379 mp = mp->b_next; 380 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 381 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 382 count = chunksize = fp->sfa_pmss - chunk_len; 383 else 384 count = chunksize = fp->sfa_pmss; 385 count = chunksize = count - sizeof (*sdc); 386 } else { 387 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 388 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 389 sctp->sctp_xmit_unsent = mdblk->b_next; 390 if (sctp->sctp_xmit_unsent == NULL) 391 sctp->sctp_xmit_unsent_tail = NULL; 392 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 393 sctp->sctp_unsent -= msg_hdr->smh_msglen; 394 mdblk->b_next = NULL; 395 BUMP_LOCAL(sctp->sctp_prsctpdrop); 396 /* 397 * Update ULP the amount of queued data, which is 398 * sent-unack'ed + unsent. 399 */ 400 if (!SCTP_IS_DETACHED(sctp)) 401 SCTP_TXQ_UPDATE(sctp); 402 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 403 goto try_next; 404 } 405 mdblk->b_cont = NULL; 406 } 407 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 408 nextchunk: 409 chunk_head = chunk_mp; 410 chunk_tail = NULL; 411 412 /* Skip as many mblk's as we need */ 413 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 414 count -= MBLKL(chunk_mp); 415 chunk_tail = chunk_mp; 416 chunk_mp = chunk_mp->b_cont; 417 } 418 /* Split the chain, if needed */ 419 if (chunk_mp != NULL) { 420 if (count > 0) { 421 mblk_t *split_mp = dupb(chunk_mp); 422 423 if (split_mp == NULL) { 424 if (mdblk->b_cont == NULL) { 425 mdblk->b_cont = chunk_head; 426 } else { 427 SCTP_MSG_SET_CHUNKED(mdblk); 428 ASSERT(chunk_head->b_next == NULL); 429 chunk_head->b_next = mdblk->b_cont; 430 mdblk->b_cont = chunk_head; 431 } 432 return (sctp->sctp_xmit_tail); 433 } 434 if (chunk_tail != NULL) { 435 chunk_tail->b_cont = split_mp; 436 chunk_tail = chunk_tail->b_cont; 437 } else { 438 chunk_head = chunk_tail = split_mp; 439 } 440 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 441 chunk_mp->b_rptr = chunk_tail->b_wptr; 442 count = 0; 443 } else if (chunk_tail == NULL) { 444 goto next; 445 } else { 446 chunk_tail->b_cont = NULL; 447 } 448 } 449 /* Alloc chunk hdr, if needed */ 450 if (DB_REF(chunk_head) > 1 || 451 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 452 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 453 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 454 if (mdblk->b_cont == NULL) { 455 if (chunk_mp != NULL) 456 linkb(chunk_head, chunk_mp); 457 mdblk->b_cont = chunk_head; 458 } else { 459 SCTP_MSG_SET_CHUNKED(mdblk); 460 if (chunk_mp != NULL) 461 linkb(chunk_head, chunk_mp); 462 ASSERT(chunk_head->b_next == NULL); 463 chunk_head->b_next = mdblk->b_cont; 464 mdblk->b_cont = chunk_head; 465 } 466 return (sctp->sctp_xmit_tail); 467 } 468 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 469 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 470 chunk_hdr->b_cont = chunk_head; 471 } else { 472 chunk_hdr = chunk_head; 473 chunk_hdr->b_rptr -= sizeof (*sdc); 474 } 475 ASSERT(chunk_hdr->b_datap->db_ref == 1); 476 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 477 sdc->sdh_id = CHUNK_DATA; 478 sdc->sdh_flags = 0; 479 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 480 ASSERT(sdc->sdh_len); 481 sdc->sdh_sid = htons(msg_hdr->smh_sid); 482 /* 483 * We defer assigning the SSN just before sending the chunk, else 484 * if we drop the chunk in sctp_get_msg_to_send(), we would need 485 * to send a Forward TSN to let the peer know. Some more comments 486 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 487 */ 488 sdc->sdh_payload_id = msg_hdr->smh_ppid; 489 490 if (mdblk->b_cont == NULL) { 491 mdblk->b_cont = chunk_hdr; 492 SCTP_DATA_SET_BBIT(sdc); 493 } else { 494 mp = mdblk->b_cont; 495 while (mp->b_next != NULL) 496 mp = mp->b_next; 497 mp->b_next = chunk_hdr; 498 } 499 500 bytes_to_send -= (chunksize - count); 501 if (chunk_mp != NULL) { 502 next: 503 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 504 goto nextchunk; 505 } 506 SCTP_DATA_SET_EBIT(sdc); 507 sctp->sctp_xmit_unsent = mdblk->b_next; 508 if (mdblk->b_next == NULL) { 509 sctp->sctp_xmit_unsent_tail = NULL; 510 } 511 mdblk->b_next = NULL; 512 513 if (sctp->sctp_xmit_tail == NULL) { 514 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 515 } else { 516 mp = sctp->sctp_xmit_tail; 517 while (mp->b_next != NULL) 518 mp = mp->b_next; 519 mp->b_next = mdblk; 520 mdblk->b_prev = mp; 521 } 522 try_next: 523 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 524 mdblk = sctp->sctp_xmit_unsent; 525 fp1 = SCTP_CHUNK_DEST(mdblk); 526 if (fp1 == NULL) 527 fp1 = sctp->sctp_current; 528 if (fp == fp1) { 529 size_t len = MBLKL(mdblk->b_cont); 530 if ((count > 0) && 531 ((len > fp->sfa_pmss - sizeof (*sdc)) || 532 (len <= count))) { 533 count -= sizeof (*sdc); 534 count = chunksize = count - (count & 0x3); 535 } else { 536 count = chunksize = fp->sfa_pmss - 537 sizeof (*sdc); 538 } 539 } else { 540 if (fp1->isv4) 541 xtralen = sctp->sctp_hdr_len; 542 else 543 xtralen = sctp->sctp_hdr6_len; 544 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc); 545 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 546 fp = fp1; 547 } 548 goto nextmsg; 549 } 550 return (sctp->sctp_xmit_tail); 551 } 552 553 void 554 sctp_free_msg(mblk_t *ump) 555 { 556 mblk_t *mp, *nmp; 557 558 for (mp = ump->b_cont; mp; mp = nmp) { 559 nmp = mp->b_next; 560 mp->b_next = mp->b_prev = NULL; 561 freemsg(mp); 562 } 563 ASSERT(!ump->b_prev); 564 ump->b_next = NULL; 565 freeb(ump); 566 } 567 568 mblk_t * 569 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 570 int *error) 571 { 572 int hdrlen; 573 uchar_t *hdr; 574 int isv4 = fp->isv4; 575 sctp_stack_t *sctps = sctp->sctp_sctps; 576 577 if (error != NULL) 578 *error = 0; 579 580 if (isv4) { 581 hdrlen = sctp->sctp_hdr_len; 582 hdr = sctp->sctp_iphc; 583 } else { 584 hdrlen = sctp->sctp_hdr6_len; 585 hdr = sctp->sctp_iphc6; 586 } 587 /* 588 * A reject|blackhole could mean that the address is 'down'. Similarly, 589 * it is possible that the address went down, we tried to send an 590 * heartbeat and ended up setting fp->saddr as unspec because we 591 * didn't have any usable source address. In either case 592 * sctp_get_dest() will try find an IRE, if available, and set 593 * the source address, if needed. If we still don't have any 594 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 595 * we return EHOSTUNREACH. 596 */ 597 ASSERT(fp->ixa->ixa_ire != NULL); 598 if ((fp->ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 599 SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 600 sctp_get_dest(sctp, fp); 601 if (fp->state == SCTP_FADDRS_UNREACH) { 602 if (error != NULL) 603 *error = EHOSTUNREACH; 604 return (NULL); 605 } 606 } 607 /* Copy in IP header. */ 608 if ((mp->b_rptr - mp->b_datap->db_base) < 609 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) { 610 mblk_t *nmp; 611 612 /* 613 * This can happen if IP headers are adjusted after 614 * data was moved into chunks, or during retransmission, 615 * or things like snoop is running. 616 */ 617 nmp = allocb(sctps->sctps_wroff_xtra + hdrlen + sacklen, 618 BPRI_MED); 619 if (nmp == NULL) { 620 if (error != NULL) 621 *error = ENOMEM; 622 return (NULL); 623 } 624 nmp->b_rptr += sctps->sctps_wroff_xtra; 625 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 626 nmp->b_cont = mp; 627 mp = nmp; 628 } else { 629 mp->b_rptr -= (hdrlen + sacklen); 630 } 631 bcopy(hdr, mp->b_rptr, hdrlen); 632 if (sacklen) { 633 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 634 } 635 if (fp != sctp->sctp_current) { 636 /* change addresses in header */ 637 if (isv4) { 638 ipha_t *iph = (ipha_t *)mp->b_rptr; 639 640 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 641 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 642 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 643 iph->ipha_src); 644 } else if (sctp->sctp_bound_to_all) { 645 iph->ipha_src = INADDR_ANY; 646 } 647 } else { 648 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 649 650 ip6h->ip6_dst = fp->faddr; 651 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 652 ip6h->ip6_src = fp->saddr; 653 } else if (sctp->sctp_bound_to_all) { 654 ip6h->ip6_src = ipv6_all_zeros; 655 } 656 } 657 } 658 return (mp); 659 } 660 661 /* 662 * SCTP requires every chunk to be padded so that the total length 663 * is a multiple of SCTP_ALIGN. This function returns a mblk with 664 * the specified pad length. 665 */ 666 static mblk_t * 667 sctp_get_padding(sctp_t *sctp, int pad) 668 { 669 mblk_t *fill; 670 671 ASSERT(pad < SCTP_ALIGN); 672 ASSERT(sctp->sctp_pad_mp != NULL); 673 if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) { 674 fill->b_wptr += pad; 675 return (fill); 676 } 677 678 /* 679 * The memory saving path of reusing the sctp_pad_mp 680 * fails may be because it has been dupb() too 681 * many times (DBLK_REFMAX). Use the memory consuming 682 * path of allocating the pad mblk. 683 */ 684 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 685 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 686 *(int32_t *)fill->b_rptr = 0; 687 fill->b_wptr += pad; 688 } 689 return (fill); 690 } 691 692 static mblk_t * 693 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 694 { 695 mblk_t *meta; 696 mblk_t *start_mp = NULL; 697 mblk_t *end_mp = NULL; 698 mblk_t *mp, *nmp; 699 mblk_t *fill; 700 sctp_data_hdr_t *sdh; 701 int msglen; 702 int extra; 703 sctp_msg_hdr_t *msg_hdr; 704 sctp_faddr_t *old_fp = NULL; 705 sctp_faddr_t *chunk_fp; 706 sctp_stack_t *sctps = sctp->sctp_sctps; 707 708 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 709 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 710 if (SCTP_IS_MSG_ABANDONED(meta) || 711 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 712 continue; 713 } 714 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 715 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 716 /* 717 * Use the same peer address to do fast 718 * retransmission. If the original peer 719 * address is dead, switch to the current 720 * one. Record the old one so that we 721 * will pick the chunks sent to the old 722 * one for fast retransmission. 723 */ 724 chunk_fp = SCTP_CHUNK_DEST(mp); 725 if (*fp == NULL) { 726 *fp = chunk_fp; 727 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 728 old_fp = *fp; 729 *fp = sctp->sctp_current; 730 } 731 } else if (old_fp == NULL && *fp != chunk_fp) { 732 continue; 733 } else if (old_fp != NULL && 734 old_fp != chunk_fp) { 735 continue; 736 } 737 738 sdh = (sctp_data_hdr_t *)mp->b_rptr; 739 msglen = ntohs(sdh->sdh_len); 740 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 741 extra = SCTP_ALIGN - extra; 742 } 743 744 /* 745 * We still return at least the first message 746 * even if that message cannot fit in as 747 * PMTU may have changed. 748 */ 749 if (*total + msglen + extra > 750 (*fp)->sfa_pmss && start_mp != NULL) { 751 return (start_mp); 752 } 753 if ((nmp = dupmsg(mp)) == NULL) 754 return (start_mp); 755 if (extra > 0) { 756 fill = sctp_get_padding(sctp, extra); 757 if (fill != NULL) { 758 linkb(nmp, fill); 759 } else { 760 return (start_mp); 761 } 762 } 763 BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); 764 BUMP_LOCAL(sctp->sctp_rxtchunks); 765 SCTP_CHUNK_CLEAR_REXMIT(mp); 766 if (start_mp == NULL) { 767 start_mp = nmp; 768 } else { 769 linkb(end_mp, nmp); 770 } 771 end_mp = nmp; 772 *total += msglen + extra; 773 dprint(2, ("sctp_find_fast_rexmit_mblks: " 774 "tsn %x\n", sdh->sdh_tsn)); 775 } 776 } 777 } 778 /* Clear the flag as there is no more message to be fast rexmitted. */ 779 sctp->sctp_chk_fast_rexmit = B_FALSE; 780 return (start_mp); 781 } 782 783 /* A debug function just to make sure that a mblk chain is not broken */ 784 #ifdef DEBUG 785 static boolean_t 786 sctp_verify_chain(mblk_t *head, mblk_t *tail) 787 { 788 mblk_t *mp = head; 789 790 if (head == NULL || tail == NULL) 791 return (B_TRUE); 792 while (mp != NULL) { 793 if (mp == tail) 794 return (B_TRUE); 795 mp = mp->b_next; 796 } 797 return (B_FALSE); 798 } 799 #endif 800 801 /* 802 * Gets the next unsent chunk to transmit. Messages that are abandoned are 803 * skipped. A message can be abandoned if it has a non-zero timetolive and 804 * transmission has not yet started or if it is a partially reliable 805 * message and its time is up (assuming we are PR-SCTP aware). 806 * We only return a chunk if it will fit entirely in the current packet. 807 * 'cansend' is used to determine if need to try and chunkify messages from 808 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 809 * 810 * firstseg_len indicates the space already used, cansend represents remaining 811 * space in the window, ((sfa_pmss - firstseg_len) can therefore reasonably 812 * be used to compute the cansend arg). 813 */ 814 mblk_t * 815 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 816 int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp) 817 { 818 mblk_t *mp1; 819 sctp_msg_hdr_t *msg_hdr; 820 mblk_t *tmp_meta; 821 sctp_faddr_t *fp1; 822 823 ASSERT(error != NULL && mp != NULL); 824 *error = 0; 825 826 ASSERT(sctp->sctp_current != NULL); 827 828 chunkified: 829 while (meta != NULL) { 830 tmp_meta = meta->b_next; 831 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 832 mp1 = meta->b_cont; 833 if (SCTP_IS_MSG_ABANDONED(meta)) 834 goto next_msg; 835 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 836 while (mp1 != NULL) { 837 if (SCTP_CHUNK_CANSEND(mp1)) { 838 *mp = mp1; 839 #ifdef DEBUG 840 ASSERT(sctp_verify_chain( 841 sctp->sctp_xmit_head, meta)); 842 #endif 843 return (meta); 844 } 845 mp1 = mp1->b_next; 846 } 847 goto next_msg; 848 } 849 /* 850 * If we come here and the first chunk is sent, then we 851 * we are PR-SCTP aware, in which case if the cumulative 852 * TSN has moved upto or beyond the first chunk (which 853 * means all the previous messages have been cumulative 854 * SACK'd), then we send a Forward TSN with the last 855 * chunk that was sent in this message. If we can't send 856 * a Forward TSN because previous non-abandoned messages 857 * have not been acked then we will defer the Forward TSN 858 * to sctp_rexmit() or sctp_cumack(). 859 */ 860 if (SCTP_CHUNK_ISSENT(mp1)) { 861 *error = sctp_check_abandoned_msg(sctp, meta); 862 if (*error != 0) { 863 #ifdef DEBUG 864 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 865 sctp->sctp_xmit_tail)); 866 #endif 867 return (NULL); 868 } 869 goto next_msg; 870 } 871 BUMP_LOCAL(sctp->sctp_prsctpdrop); 872 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 873 if (meta->b_prev == NULL) { 874 ASSERT(sctp->sctp_xmit_head == meta); 875 sctp->sctp_xmit_head = tmp_meta; 876 if (sctp->sctp_xmit_tail == meta) 877 sctp->sctp_xmit_tail = tmp_meta; 878 meta->b_next = NULL; 879 if (tmp_meta != NULL) 880 tmp_meta->b_prev = NULL; 881 } else if (meta->b_next == NULL) { 882 if (sctp->sctp_xmit_tail == meta) 883 sctp->sctp_xmit_tail = meta->b_prev; 884 meta->b_prev->b_next = NULL; 885 meta->b_prev = NULL; 886 } else { 887 meta->b_prev->b_next = tmp_meta; 888 tmp_meta->b_prev = meta->b_prev; 889 if (sctp->sctp_xmit_tail == meta) 890 sctp->sctp_xmit_tail = tmp_meta; 891 meta->b_prev = NULL; 892 meta->b_next = NULL; 893 } 894 sctp->sctp_unsent -= msg_hdr->smh_msglen; 895 /* 896 * Update ULP the amount of queued data, which is 897 * sent-unack'ed + unsent. 898 */ 899 if (!SCTP_IS_DETACHED(sctp)) 900 SCTP_TXQ_UPDATE(sctp); 901 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 902 next_msg: 903 meta = tmp_meta; 904 } 905 /* chunkify, if needed */ 906 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 907 ASSERT(sctp->sctp_unsent > 0); 908 if (fp == NULL) { 909 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 910 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 911 fp = sctp->sctp_current; 912 } else { 913 /* 914 * If user specified destination, try to honor that. 915 */ 916 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 917 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 918 fp1 != fp) { 919 goto chunk_done; 920 } 921 } 922 meta = sctp_chunkify(sctp, fp->sfa_pmss, firstseg_len, cansend); 923 if (meta == NULL) 924 goto chunk_done; 925 /* 926 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 927 * new chunk(s) to the tail, so we need to skip the 928 * sctp_xmit_tail, which would have already been processed. 929 * This could happen when there is unacked chunks, but 930 * nothing new to send. 931 * When sctp_chunkify() is called when the transmit queue 932 * is empty then we need to start from sctp_xmit_tail. 933 */ 934 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 935 #ifdef DEBUG 936 mp1 = sctp->sctp_xmit_tail->b_cont; 937 while (mp1 != NULL) { 938 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 939 mp1 = mp1->b_next; 940 } 941 #endif 942 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 943 goto chunk_done; 944 } 945 goto chunkified; 946 } 947 chunk_done: 948 #ifdef DEBUG 949 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 950 #endif 951 return (NULL); 952 } 953 954 void 955 sctp_fast_rexmit(sctp_t *sctp) 956 { 957 mblk_t *mp, *head; 958 int pktlen = 0; 959 sctp_faddr_t *fp = NULL; 960 sctp_stack_t *sctps = sctp->sctp_sctps; 961 962 ASSERT(sctp->sctp_xmit_head != NULL); 963 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 964 if (mp == NULL) { 965 SCTP_KSTAT(sctps, sctp_fr_not_found); 966 return; 967 } 968 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 969 freemsg(mp); 970 SCTP_KSTAT(sctps, sctp_fr_add_hdr); 971 return; 972 } 973 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 974 ipha_t *iph = (ipha_t *)head->b_rptr; 975 976 iph->ipha_fragment_offset_and_flags = 0; 977 } 978 979 sctp_set_iplen(sctp, head, fp->ixa); 980 (void) conn_ip_output(head, fp->ixa); 981 BUMP_LOCAL(sctp->sctp_opkts); 982 sctp->sctp_active = fp->lastactive = lbolt64; 983 } 984 985 void 986 sctp_output(sctp_t *sctp, uint_t num_pkt) 987 { 988 mblk_t *mp = NULL; 989 mblk_t *nmp; 990 mblk_t *head; 991 mblk_t *meta = sctp->sctp_xmit_tail; 992 mblk_t *fill = NULL; 993 uint16_t chunklen; 994 uint32_t cansend; 995 int32_t seglen; 996 int32_t xtralen; 997 int32_t sacklen; 998 int32_t pad = 0; 999 int32_t pathmax; 1000 int extra; 1001 int64_t now = lbolt64; 1002 sctp_faddr_t *fp; 1003 sctp_faddr_t *lfp; 1004 sctp_data_hdr_t *sdc; 1005 int error; 1006 boolean_t notsent = B_TRUE; 1007 sctp_stack_t *sctps = sctp->sctp_sctps; 1008 1009 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1010 sacklen = 0; 1011 } else { 1012 /* send a SACK chunk */ 1013 sacklen = sizeof (sctp_chunk_hdr_t) + 1014 sizeof (sctp_sack_chunk_t) + 1015 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1016 lfp = sctp->sctp_lastdata; 1017 ASSERT(lfp != NULL); 1018 if (lfp->state != SCTP_FADDRS_ALIVE) 1019 lfp = sctp->sctp_current; 1020 } 1021 1022 cansend = sctp->sctp_frwnd; 1023 if (sctp->sctp_unsent < cansend) 1024 cansend = sctp->sctp_unsent; 1025 1026 /* 1027 * Start persist timer if unable to send or when 1028 * trying to send into a zero window. This timer 1029 * ensures the blocked send attempt is retried. 1030 */ 1031 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1032 (sctp->sctp_unacked != 0) && 1033 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1034 !sctp->sctp_ndelay || 1035 (cansend == 0 && sctp->sctp_unacked == 0 && 1036 sctp->sctp_unsent != 0)) { 1037 head = NULL; 1038 fp = sctp->sctp_current; 1039 goto unsent_data; 1040 } 1041 if (meta != NULL) 1042 mp = meta->b_cont; 1043 while (cansend > 0 && num_pkt-- != 0) { 1044 pad = 0; 1045 1046 /* 1047 * Find first segment eligible for transmit. 1048 */ 1049 while (mp != NULL) { 1050 if (SCTP_CHUNK_CANSEND(mp)) 1051 break; 1052 mp = mp->b_next; 1053 } 1054 if (mp == NULL) { 1055 meta = sctp_get_msg_to_send(sctp, &mp, 1056 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1057 cansend, NULL); 1058 if (error != 0 || meta == NULL) { 1059 head = NULL; 1060 fp = sctp->sctp_current; 1061 goto unsent_data; 1062 } 1063 sctp->sctp_xmit_tail = meta; 1064 } 1065 1066 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1067 seglen = ntohs(sdc->sdh_len); 1068 xtralen = sizeof (*sdc); 1069 chunklen = seglen - xtralen; 1070 1071 /* 1072 * Check rwnd. 1073 */ 1074 if (chunklen > cansend) { 1075 head = NULL; 1076 fp = SCTP_CHUNK_DEST(meta); 1077 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1078 fp = sctp->sctp_current; 1079 goto unsent_data; 1080 } 1081 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1082 extra = SCTP_ALIGN - extra; 1083 1084 /* 1085 * Pick destination address, and check cwnd. 1086 */ 1087 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1088 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1089 /* 1090 * Only include SACK chunk if it can be bundled 1091 * with a data chunk, and sent to sctp_lastdata. 1092 */ 1093 pathmax = lfp->cwnd - lfp->suna; 1094 1095 fp = lfp; 1096 if ((nmp = dupmsg(mp)) == NULL) { 1097 head = NULL; 1098 goto unsent_data; 1099 } 1100 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1101 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1102 &error); 1103 if (head == NULL) { 1104 /* 1105 * If none of the source addresses are 1106 * available (i.e error == EHOSTUNREACH), 1107 * pretend we have sent the data. We will 1108 * eventually time out trying to retramsmit 1109 * the data if the interface never comes up. 1110 * If we have already sent some stuff (i.e., 1111 * notsent is B_FALSE) then we are fine, else 1112 * just mark this packet as sent. 1113 */ 1114 if (notsent && error == EHOSTUNREACH) { 1115 SCTP_CHUNK_SENT(sctp, mp, sdc, 1116 fp, chunklen, meta); 1117 } 1118 freemsg(nmp); 1119 SCTP_KSTAT(sctps, sctp_output_failed); 1120 goto unsent_data; 1121 } 1122 seglen += sacklen; 1123 xtralen += sacklen; 1124 sacklen = 0; 1125 } else { 1126 fp = SCTP_CHUNK_DEST(meta); 1127 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1128 fp = sctp->sctp_current; 1129 /* 1130 * If we haven't sent data to this destination for 1131 * a while, do slow start again. 1132 */ 1133 if (now - fp->lastactive > fp->rto) { 1134 SET_CWND(fp, fp->sfa_pmss, 1135 sctps->sctps_slow_start_after_idle); 1136 } 1137 1138 pathmax = fp->cwnd - fp->suna; 1139 if (seglen + extra > pathmax) { 1140 head = NULL; 1141 goto unsent_data; 1142 } 1143 if ((nmp = dupmsg(mp)) == NULL) { 1144 head = NULL; 1145 goto unsent_data; 1146 } 1147 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1148 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1149 if (head == NULL) { 1150 /* 1151 * If none of the source addresses are 1152 * available (i.e error == EHOSTUNREACH), 1153 * pretend we have sent the data. We will 1154 * eventually time out trying to retramsmit 1155 * the data if the interface never comes up. 1156 * If we have already sent some stuff (i.e., 1157 * notsent is B_FALSE) then we are fine, else 1158 * just mark this packet as sent. 1159 */ 1160 if (notsent && error == EHOSTUNREACH) { 1161 SCTP_CHUNK_SENT(sctp, mp, sdc, 1162 fp, chunklen, meta); 1163 } 1164 freemsg(nmp); 1165 SCTP_KSTAT(sctps, sctp_output_failed); 1166 goto unsent_data; 1167 } 1168 } 1169 fp->lastactive = now; 1170 if (pathmax > fp->sfa_pmss) 1171 pathmax = fp->sfa_pmss; 1172 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1173 mp = mp->b_next; 1174 1175 /* Use this chunk to measure RTT? */ 1176 if (sctp->sctp_out_time == 0) { 1177 sctp->sctp_out_time = now; 1178 sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; 1179 ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); 1180 } 1181 if (extra > 0) { 1182 fill = sctp_get_padding(sctp, extra); 1183 if (fill != NULL) { 1184 linkb(head, fill); 1185 pad = extra; 1186 seglen += extra; 1187 } else { 1188 goto unsent_data; 1189 } 1190 } 1191 /* 1192 * Bundle chunks. We linkb() the chunks together to send 1193 * downstream in a single packet. 1194 * Partial chunks MUST NOT be bundled with full chunks, so we 1195 * rely on sctp_get_msg_to_send() to only return messages that 1196 * will fit entirely in the current packet. 1197 */ 1198 while (seglen < pathmax) { 1199 int32_t new_len; 1200 int32_t new_xtralen; 1201 1202 while (mp != NULL) { 1203 if (SCTP_CHUNK_CANSEND(mp)) 1204 break; 1205 mp = mp->b_next; 1206 } 1207 if (mp == NULL) { 1208 meta = sctp_get_msg_to_send(sctp, &mp, 1209 meta->b_next, &error, seglen, 1210 (seglen - xtralen) >= cansend ? 0 : 1211 cansend - seglen, fp); 1212 if (error != 0) 1213 break; 1214 /* If no more eligible chunks, cease bundling */ 1215 if (meta == NULL) 1216 break; 1217 sctp->sctp_xmit_tail = meta; 1218 } 1219 ASSERT(mp != NULL); 1220 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1221 fp != SCTP_CHUNK_DEST(meta)) { 1222 break; 1223 } 1224 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1225 chunklen = ntohs(sdc->sdh_len); 1226 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1227 extra = SCTP_ALIGN - extra; 1228 1229 new_len = seglen + chunklen; 1230 new_xtralen = xtralen + sizeof (*sdc); 1231 chunklen -= sizeof (*sdc); 1232 1233 if (new_len - new_xtralen > cansend || 1234 new_len + extra > pathmax) { 1235 break; 1236 } 1237 if ((nmp = dupmsg(mp)) == NULL) 1238 break; 1239 if (extra > 0) { 1240 fill = sctp_get_padding(sctp, extra); 1241 if (fill != NULL) { 1242 pad += extra; 1243 new_len += extra; 1244 linkb(nmp, fill); 1245 } else { 1246 freemsg(nmp); 1247 break; 1248 } 1249 } 1250 seglen = new_len; 1251 xtralen = new_xtralen; 1252 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1253 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1254 linkb(head, nmp); 1255 mp = mp->b_next; 1256 } 1257 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1258 ipha_t *iph = (ipha_t *)head->b_rptr; 1259 1260 /* 1261 * Path MTU is different from what we thought it would 1262 * be when we created chunks, or IP headers have grown. 1263 * Need to clear the DF bit. 1264 */ 1265 iph->ipha_fragment_offset_and_flags = 0; 1266 } 1267 /* xmit segment */ 1268 ASSERT(cansend >= seglen - pad - xtralen); 1269 cansend -= (seglen - pad - xtralen); 1270 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1271 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1272 seglen - xtralen, ntohl(sdc->sdh_tsn), 1273 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1274 cansend, sctp->sctp_lastack_rxd)); 1275 sctp_set_iplen(sctp, head, fp->ixa); 1276 (void) conn_ip_output(head, fp->ixa); 1277 BUMP_LOCAL(sctp->sctp_opkts); 1278 /* arm rto timer (if not set) */ 1279 if (!fp->timer_running) 1280 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1281 notsent = B_FALSE; 1282 } 1283 sctp->sctp_active = now; 1284 return; 1285 unsent_data: 1286 /* arm persist timer (if rto timer not set) */ 1287 if (!fp->timer_running) 1288 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1289 if (head != NULL) 1290 freemsg(head); 1291 } 1292 1293 /* 1294 * The following two functions initialize and destroy the cache 1295 * associated with the sets used for PR-SCTP. 1296 */ 1297 void 1298 sctp_ftsn_sets_init(void) 1299 { 1300 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1301 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1302 NULL, 0); 1303 } 1304 1305 void 1306 sctp_ftsn_sets_fini(void) 1307 { 1308 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1309 } 1310 1311 1312 /* Free PR-SCTP sets */ 1313 void 1314 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1315 { 1316 sctp_ftsn_set_t *p; 1317 1318 while (s != NULL) { 1319 p = s->next; 1320 s->next = NULL; 1321 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1322 s = p; 1323 } 1324 } 1325 1326 /* 1327 * Given a message meta block, meta, this routine creates or modifies 1328 * the set that will be used to generate a Forward TSN chunk. If the 1329 * entry for stream id, sid, for this message already exists, the 1330 * sequence number, ssn, is updated if it is greater than the existing 1331 * one. If an entry for this sid does not exist, one is created if 1332 * the size does not exceed fp->sfa_pmss. We return false in case 1333 * or an error. 1334 */ 1335 boolean_t 1336 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1337 uint_t *nsets, uint32_t *slen) 1338 { 1339 sctp_ftsn_set_t *p; 1340 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1341 uint16_t sid = htons(msg_hdr->smh_sid); 1342 /* msg_hdr->smh_ssn is already in NBO */ 1343 uint16_t ssn = msg_hdr->smh_ssn; 1344 1345 ASSERT(s != NULL && nsets != NULL); 1346 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1347 1348 if (*s == NULL) { 1349 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1350 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1351 if (*s == NULL) 1352 return (B_FALSE); 1353 (*s)->ftsn_entries.ftsn_sid = sid; 1354 (*s)->ftsn_entries.ftsn_ssn = ssn; 1355 (*s)->next = NULL; 1356 *nsets = 1; 1357 *slen += sizeof (uint32_t); 1358 return (B_TRUE); 1359 } 1360 for (p = *s; p->next != NULL; p = p->next) { 1361 if (p->ftsn_entries.ftsn_sid == sid) { 1362 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1363 p->ftsn_entries.ftsn_ssn = ssn; 1364 return (B_TRUE); 1365 } 1366 } 1367 /* the last one */ 1368 if (p->ftsn_entries.ftsn_sid == sid) { 1369 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1370 p->ftsn_entries.ftsn_ssn = ssn; 1371 } else { 1372 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1373 return (B_FALSE); 1374 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1375 KM_NOSLEEP); 1376 if (p->next == NULL) 1377 return (B_FALSE); 1378 p = p->next; 1379 p->ftsn_entries.ftsn_sid = sid; 1380 p->ftsn_entries.ftsn_ssn = ssn; 1381 p->next = NULL; 1382 (*nsets)++; 1383 *slen += sizeof (uint32_t); 1384 } 1385 return (B_TRUE); 1386 } 1387 1388 /* 1389 * Given a set of stream id - sequence number pairs, this routing creates 1390 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1391 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1392 * will add the IP/SCTP header. 1393 */ 1394 mblk_t * 1395 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1396 uint_t nsets, uint32_t seglen) 1397 { 1398 mblk_t *ftsn_mp; 1399 sctp_chunk_hdr_t *ch_hdr; 1400 uint32_t *advtsn; 1401 uint16_t schlen; 1402 size_t xtralen; 1403 ftsn_entry_t *ftsn_entry; 1404 sctp_stack_t *sctps = sctp->sctp_sctps; 1405 1406 seglen += sizeof (sctp_chunk_hdr_t); 1407 if (fp->isv4) 1408 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra; 1409 else 1410 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra; 1411 ftsn_mp = allocb(xtralen + seglen, BPRI_MED); 1412 if (ftsn_mp == NULL) 1413 return (NULL); 1414 ftsn_mp->b_rptr += xtralen; 1415 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1416 1417 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1418 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1419 ch_hdr->sch_flags = 0; 1420 /* 1421 * The cast here should not be an issue since seglen is 1422 * the length of the Forward TSN chunk. 1423 */ 1424 schlen = (uint16_t)seglen; 1425 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1426 1427 advtsn = (uint32_t *)(ch_hdr + 1); 1428 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1429 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1430 while (nsets > 0) { 1431 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1432 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1433 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1434 ftsn_entry++; 1435 sets = sets->next; 1436 nsets--; 1437 } 1438 return (ftsn_mp); 1439 } 1440 1441 /* 1442 * Given a starting message, the routine steps through all the 1443 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1444 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1445 * chunk. All the messages, that have chunks that are included in the 1446 * ftsn sets, are flagged abandonded. If a message is partially sent 1447 * and is deemed abandoned, all remaining unsent chunks are marked 1448 * abandoned and are deducted from sctp_unsent. 1449 */ 1450 void 1451 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1452 sctp_faddr_t *fp, uint32_t *seglen) 1453 { 1454 mblk_t *mp1 = mp; 1455 mblk_t *mp_head = mp; 1456 mblk_t *meta_head = meta; 1457 mblk_t *head; 1458 sctp_ftsn_set_t *sets = NULL; 1459 uint_t nsets = 0; 1460 uint16_t clen; 1461 sctp_data_hdr_t *sdc; 1462 uint32_t sacklen; 1463 uint32_t adv_pap = sctp->sctp_adv_pap; 1464 uint32_t unsent = 0; 1465 boolean_t ubit; 1466 sctp_stack_t *sctps = sctp->sctp_sctps; 1467 1468 *seglen = sizeof (uint32_t); 1469 1470 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1471 while (meta != NULL && 1472 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1473 /* 1474 * Skip adding FTSN sets for un-ordered messages as they do 1475 * not have SSNs. 1476 */ 1477 ubit = SCTP_DATA_GET_UBIT(sdc); 1478 if (!ubit && 1479 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1480 meta = NULL; 1481 sctp->sctp_adv_pap = adv_pap; 1482 goto ftsn_done; 1483 } 1484 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1485 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1486 adv_pap = ntohl(sdc->sdh_tsn); 1487 mp1 = mp1->b_next; 1488 } 1489 meta = meta->b_next; 1490 if (meta != NULL) { 1491 mp1 = meta->b_cont; 1492 if (!SCTP_CHUNK_ISSENT(mp1)) 1493 break; 1494 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1495 } 1496 } 1497 ftsn_done: 1498 /* 1499 * Can't compare with sets == NULL, since we don't add any 1500 * sets for un-ordered messages. 1501 */ 1502 if (meta == meta_head) 1503 return; 1504 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1505 sctp_free_ftsn_set(sets); 1506 if (*nmp == NULL) 1507 return; 1508 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1509 sacklen = 0; 1510 } else { 1511 sacklen = sizeof (sctp_chunk_hdr_t) + 1512 sizeof (sctp_sack_chunk_t) + 1513 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1514 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1515 /* piggybacked SACK doesn't fit */ 1516 sacklen = 0; 1517 } else { 1518 fp = sctp->sctp_lastdata; 1519 } 1520 } 1521 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1522 if (head == NULL) { 1523 freemsg(*nmp); 1524 *nmp = NULL; 1525 SCTP_KSTAT(sctps, sctp_send_ftsn_failed); 1526 return; 1527 } 1528 *seglen += sacklen; 1529 *nmp = head; 1530 1531 /* 1532 * XXXNeed to optimise this, the reason it is done here is so 1533 * that we don't have to undo in case of failure. 1534 */ 1535 mp1 = mp_head; 1536 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1537 while (meta_head != NULL && 1538 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1539 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1540 SCTP_MSG_SET_ABANDONED(meta_head); 1541 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1542 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1543 if (!SCTP_CHUNK_ISACKED(mp1)) { 1544 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1545 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1546 meta_head); 1547 } 1548 mp1 = mp1->b_next; 1549 } 1550 while (mp1 != NULL) { 1551 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1552 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1553 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1554 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1555 SCTP_ABANDON_CHUNK(mp1); 1556 } 1557 mp1 = mp1->b_next; 1558 } 1559 meta_head = meta_head->b_next; 1560 if (meta_head != NULL) { 1561 mp1 = meta_head->b_cont; 1562 if (!SCTP_CHUNK_ISSENT(mp1)) 1563 break; 1564 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1565 } 1566 } 1567 if (unsent > 0) { 1568 ASSERT(sctp->sctp_unsent >= unsent); 1569 sctp->sctp_unsent -= unsent; 1570 /* 1571 * Update ULP the amount of queued data, which is 1572 * sent-unack'ed + unsent. 1573 */ 1574 if (!SCTP_IS_DETACHED(sctp)) 1575 SCTP_TXQ_UPDATE(sctp); 1576 } 1577 } 1578 1579 /* 1580 * This function steps through messages starting at meta and checks if 1581 * the message is abandoned. It stops when it hits an unsent chunk or 1582 * a message that has all its chunk acked. This is the only place 1583 * where the sctp_adv_pap is moved forward to indicated abandoned 1584 * messages. 1585 */ 1586 void 1587 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1588 { 1589 uint32_t tsn = sctp->sctp_adv_pap; 1590 sctp_data_hdr_t *sdc; 1591 sctp_msg_hdr_t *msg_hdr; 1592 1593 ASSERT(mp != NULL); 1594 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1595 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1596 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1597 if (!SCTP_IS_MSG_ABANDONED(meta) && 1598 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1599 return; 1600 } 1601 while (meta != NULL) { 1602 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1603 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1604 tsn = ntohl(sdc->sdh_tsn); 1605 mp = mp->b_next; 1606 } 1607 if (mp != NULL) 1608 break; 1609 /* 1610 * We continue checking for successive messages only if there 1611 * is a chunk marked for retransmission. Else, we might 1612 * end up sending FTSN prematurely for chunks that have been 1613 * sent, but not yet acked. 1614 */ 1615 if ((meta = meta->b_next) != NULL) { 1616 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1617 if (!SCTP_IS_MSG_ABANDONED(meta) && 1618 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1619 break; 1620 } 1621 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1622 if (!SCTP_CHUNK_ISSENT(mp)) { 1623 sctp->sctp_adv_pap = tsn; 1624 return; 1625 } 1626 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1627 break; 1628 } 1629 if (mp == NULL) 1630 break; 1631 } 1632 } 1633 sctp->sctp_adv_pap = tsn; 1634 } 1635 1636 1637 /* 1638 * Determine if we should bundle a data chunk with the chunk being 1639 * retransmitted. We bundle if 1640 * 1641 * - the chunk is sent to the same destination and unack'ed. 1642 * 1643 * OR 1644 * 1645 * - the chunk is unsent, i.e. new data. 1646 */ 1647 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1648 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1649 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1650 !SCTP_CHUNK_ISACKED(mp))) || \ 1651 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1652 SCTP_CHUNK_FLAG_SENT))) 1653 1654 /* 1655 * Retransmit first segment which hasn't been acked with cumtsn or send 1656 * a Forward TSN chunk, if appropriate. 1657 */ 1658 void 1659 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1660 { 1661 mblk_t *mp; 1662 mblk_t *nmp = NULL; 1663 mblk_t *head; 1664 mblk_t *meta = sctp->sctp_xmit_head; 1665 mblk_t *fill; 1666 uint32_t seglen = 0; 1667 uint32_t sacklen; 1668 uint16_t chunklen; 1669 int extra; 1670 sctp_data_hdr_t *sdc; 1671 sctp_faddr_t *fp; 1672 uint32_t adv_pap = sctp->sctp_adv_pap; 1673 boolean_t do_ftsn = B_FALSE; 1674 boolean_t ftsn_check = B_TRUE; 1675 uint32_t first_ua_tsn; 1676 sctp_msg_hdr_t *mhdr; 1677 sctp_stack_t *sctps = sctp->sctp_sctps; 1678 int error; 1679 1680 while (meta != NULL) { 1681 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1682 uint32_t tsn; 1683 1684 if (!SCTP_CHUNK_ISSENT(mp)) 1685 goto window_probe; 1686 /* 1687 * We break in the following cases - 1688 * 1689 * if the advanced peer ack point includes the next 1690 * chunk to be retransmited - possibly the Forward 1691 * TSN was lost. 1692 * 1693 * if we are PRSCTP aware and the next chunk to be 1694 * retransmitted is now abandoned 1695 * 1696 * if the next chunk to be retransmitted is for 1697 * the dest on which the timer went off. (this 1698 * message is not abandoned). 1699 * 1700 * We check for Forward TSN only for the first 1701 * eligible chunk to be retransmitted. The reason 1702 * being if the first eligible chunk is skipped (say 1703 * it was sent to a destination other than oldfp) 1704 * then we cannot advance the cum TSN via Forward 1705 * TSN chunk. 1706 * 1707 * Also, ftsn_check is B_TRUE only for the first 1708 * eligible chunk, it will be B_FALSE for all 1709 * subsequent candidate messages for retransmission. 1710 */ 1711 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1712 tsn = ntohl(sdc->sdh_tsn); 1713 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1714 if (sctp->sctp_prsctp_aware && ftsn_check) { 1715 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1716 ASSERT(sctp->sctp_prsctp_aware); 1717 do_ftsn = B_TRUE; 1718 goto out; 1719 } else { 1720 sctp_check_adv_ack_pt(sctp, 1721 meta, mp); 1722 if (SEQ_GT(sctp->sctp_adv_pap, 1723 adv_pap)) { 1724 do_ftsn = B_TRUE; 1725 goto out; 1726 } 1727 } 1728 ftsn_check = B_FALSE; 1729 } 1730 if (SCTP_CHUNK_DEST(mp) == oldfp) 1731 goto out; 1732 } 1733 } 1734 meta = meta->b_next; 1735 if (meta != NULL && sctp->sctp_prsctp_aware) { 1736 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1737 1738 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1739 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1740 meta = meta->b_next; 1741 } 1742 } 1743 } 1744 window_probe: 1745 /* 1746 * Retransmit fired for a destination which didn't have 1747 * any unacked data pending. 1748 */ 1749 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1750 /* 1751 * Send a window probe. Inflate frwnd to allow 1752 * sending one segment. 1753 */ 1754 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1755 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1756 1757 /* next TSN to send */ 1758 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1759 1760 /* 1761 * The above sctp_frwnd adjustment is coarse. The "changed" 1762 * sctp_frwnd may allow us to send more than 1 packet. So 1763 * tell sctp_output() to send only 1 packet. 1764 */ 1765 sctp_output(sctp, 1); 1766 1767 /* Last sent TSN */ 1768 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1769 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1770 sctp->sctp_zero_win_probe = B_TRUE; 1771 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1772 } 1773 return; 1774 out: 1775 /* 1776 * After a time out, assume that everything has left the network. So 1777 * we can clear rxt_unacked for the original peer address. 1778 */ 1779 oldfp->rxt_unacked = 0; 1780 1781 /* 1782 * If we were probing for zero window, don't adjust retransmission 1783 * variables, but the timer is still backed off. 1784 */ 1785 if (sctp->sctp_zero_win_probe) { 1786 mblk_t *pkt; 1787 uint_t pkt_len; 1788 1789 /* 1790 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1791 * and sctp_rxt_maxtsn will specify the ZWP packet. 1792 */ 1793 fp = oldfp; 1794 if (oldfp->state != SCTP_FADDRS_ALIVE) 1795 fp = sctp_rotate_faddr(sctp, oldfp); 1796 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1797 if (pkt != NULL) { 1798 ASSERT(pkt_len <= fp->sfa_pmss); 1799 sctp_set_iplen(sctp, pkt, fp->ixa); 1800 (void) conn_ip_output(pkt, fp->ixa); 1801 BUMP_LOCAL(sctp->sctp_opkts); 1802 } else { 1803 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 1804 } 1805 1806 /* 1807 * The strikes will be clear by sctp_faddr_alive() when the 1808 * other side sends us an ack. 1809 */ 1810 oldfp->strikes++; 1811 sctp->sctp_strikes++; 1812 1813 SCTP_CALC_RXT(sctp, oldfp); 1814 if (oldfp != fp && oldfp->suna != 0) 1815 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1816 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1817 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1818 return; 1819 } 1820 1821 /* 1822 * Enter slowstart for this destination 1823 */ 1824 oldfp->ssthresh = oldfp->cwnd / 2; 1825 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1826 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1827 oldfp->cwnd = oldfp->sfa_pmss; 1828 oldfp->pba = 0; 1829 fp = sctp_rotate_faddr(sctp, oldfp); 1830 ASSERT(fp != NULL); 1831 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1832 1833 first_ua_tsn = ntohl(sdc->sdh_tsn); 1834 if (do_ftsn) { 1835 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1836 if (nmp == NULL) { 1837 sctp->sctp_adv_pap = adv_pap; 1838 goto restart_timer; 1839 } 1840 head = nmp; 1841 /* 1842 * Move to the next unabandoned chunk. XXXCheck if meta will 1843 * always be marked abandoned. 1844 */ 1845 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1846 meta = meta->b_next; 1847 if (meta != NULL) 1848 mp = mp->b_cont; 1849 else 1850 mp = NULL; 1851 goto try_bundle; 1852 } 1853 seglen = ntohs(sdc->sdh_len); 1854 chunklen = seglen - sizeof (*sdc); 1855 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1856 extra = SCTP_ALIGN - extra; 1857 1858 /* Find out if we need to piggyback SACK. */ 1859 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1860 sacklen = 0; 1861 } else { 1862 sacklen = sizeof (sctp_chunk_hdr_t) + 1863 sizeof (sctp_sack_chunk_t) + 1864 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1865 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1866 /* piggybacked SACK doesn't fit */ 1867 sacklen = 0; 1868 } else { 1869 /* 1870 * OK, we have room to send SACK back. But we 1871 * should send it back to the last fp where we 1872 * receive data from, unless sctp_lastdata equals 1873 * oldfp, then we should probably not send it 1874 * back to that fp. Also we should check that 1875 * the fp is alive. 1876 */ 1877 if (sctp->sctp_lastdata != oldfp && 1878 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1879 fp = sctp->sctp_lastdata; 1880 } 1881 } 1882 } 1883 1884 /* 1885 * Cancel RTT measurement if the retransmitted TSN is before the 1886 * TSN used for timimg. 1887 */ 1888 if (sctp->sctp_out_time != 0 && 1889 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1890 sctp->sctp_out_time = 0; 1891 } 1892 /* Clear the counter as the RTT calculation may be off. */ 1893 fp->rtt_updates = 0; 1894 oldfp->rtt_updates = 0; 1895 1896 /* 1897 * After a timeout, we should change the current faddr so that 1898 * new chunks will be sent to the alternate address. 1899 */ 1900 sctp_set_faddr_current(sctp, fp); 1901 1902 nmp = dupmsg(mp); 1903 if (nmp == NULL) 1904 goto restart_timer; 1905 if (extra > 0) { 1906 fill = sctp_get_padding(sctp, extra); 1907 if (fill != NULL) { 1908 linkb(nmp, fill); 1909 seglen += extra; 1910 } else { 1911 freemsg(nmp); 1912 goto restart_timer; 1913 } 1914 } 1915 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1916 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1917 if (head == NULL) { 1918 freemsg(nmp); 1919 SCTP_KSTAT(sctps, sctp_rexmit_failed); 1920 goto restart_timer; 1921 } 1922 seglen += sacklen; 1923 1924 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1925 1926 mp = mp->b_next; 1927 1928 try_bundle: 1929 /* We can at least and at most send 1 packet at timeout. */ 1930 while (seglen < fp->sfa_pmss) { 1931 int32_t new_len; 1932 1933 /* Go through the list to find more chunks to be bundled. */ 1934 while (mp != NULL) { 1935 /* Check if the chunk can be bundled. */ 1936 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1937 break; 1938 mp = mp->b_next; 1939 } 1940 /* Go to the next message. */ 1941 if (mp == NULL) { 1942 for (meta = meta->b_next; meta != NULL; 1943 meta = meta->b_next) { 1944 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1945 1946 if (SCTP_IS_MSG_ABANDONED(meta) || 1947 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1948 sctp)) { 1949 continue; 1950 } 1951 1952 mp = meta->b_cont; 1953 goto try_bundle; 1954 } 1955 /* 1956 * Check if there is a new message which potentially 1957 * could be bundled with this retransmission. 1958 */ 1959 meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error, 1960 seglen, fp->sfa_pmss - seglen, NULL); 1961 if (error != 0 || meta == NULL) { 1962 /* No more chunk to be bundled. */ 1963 break; 1964 } else { 1965 goto try_bundle; 1966 } 1967 } 1968 1969 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1970 new_len = ntohs(sdc->sdh_len); 1971 chunklen = new_len - sizeof (*sdc); 1972 1973 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1974 extra = SCTP_ALIGN - extra; 1975 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1976 break; 1977 if ((nmp = dupmsg(mp)) == NULL) 1978 break; 1979 1980 if (extra > 0) { 1981 fill = sctp_get_padding(sctp, extra); 1982 if (fill != NULL) { 1983 linkb(nmp, fill); 1984 } else { 1985 freemsg(nmp); 1986 break; 1987 } 1988 } 1989 linkb(head, nmp); 1990 1991 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1992 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1993 1994 seglen = new_len; 1995 mp = mp->b_next; 1996 } 1997 done_bundle: 1998 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1999 ipha_t *iph = (ipha_t *)head->b_rptr; 2000 2001 /* 2002 * Path MTU is different from path we thought it would 2003 * be when we created chunks, or IP headers have grown. 2004 * Need to clear the DF bit. 2005 */ 2006 iph->ipha_fragment_offset_and_flags = 0; 2007 } 2008 fp->rxt_unacked += seglen; 2009 2010 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 2011 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 2012 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 2013 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 2014 2015 sctp->sctp_rexmitting = B_TRUE; 2016 sctp->sctp_rxt_nxttsn = first_ua_tsn; 2017 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 2018 sctp_set_iplen(sctp, head, fp->ixa); 2019 (void) conn_ip_output(head, fp->ixa); 2020 BUMP_LOCAL(sctp->sctp_opkts); 2021 2022 /* 2023 * Restart the oldfp timer with exponential backoff and 2024 * the new fp timer for the retransmitted chunks. 2025 */ 2026 restart_timer: 2027 oldfp->strikes++; 2028 sctp->sctp_strikes++; 2029 SCTP_CALC_RXT(sctp, oldfp); 2030 /* 2031 * If there is still some data in the oldfp, restart the 2032 * retransmission timer. If there is no data, the heartbeat will 2033 * continue to run so it will do its job in checking the reachability 2034 * of the oldfp. 2035 */ 2036 if (oldfp != fp && oldfp->suna != 0) 2037 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 2038 2039 /* 2040 * Should we restart the timer of the new fp? If there is 2041 * outstanding data to the new fp, the timer should be 2042 * running already. So restarting it means that the timer 2043 * will fire later for those outstanding data. But if 2044 * we don't restart it, the timer will fire too early for the 2045 * just retransmitted chunks to the new fp. The reason is that we 2046 * don't keep a timestamp on when a chunk is retransmitted. 2047 * So when the timer fires, it will just search for the 2048 * chunk with the earliest TSN sent to new fp. This probably 2049 * is the chunk we just retransmitted. So for now, let's 2050 * be conservative and restart the timer of the new fp. 2051 */ 2052 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2053 2054 sctp->sctp_active = lbolt64; 2055 } 2056 2057 /* 2058 * This function is called by sctp_ss_rexmit() to create a packet 2059 * to be retransmitted to the given fp. The given meta and mp 2060 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2061 * first chunk to be retransmitted. This is also called when we want 2062 * to retransmit a zero window probe from sctp_rexmit() or when we 2063 * want to retransmit the zero window probe after the window has 2064 * opened from sctp_got_sack(). 2065 */ 2066 mblk_t * 2067 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2068 uint_t *packet_len) 2069 { 2070 uint32_t seglen = 0; 2071 uint16_t chunklen; 2072 int extra; 2073 mblk_t *nmp; 2074 mblk_t *head; 2075 mblk_t *fill; 2076 sctp_data_hdr_t *sdc; 2077 sctp_msg_hdr_t *mhdr; 2078 2079 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2080 seglen = ntohs(sdc->sdh_len); 2081 chunklen = seglen - sizeof (*sdc); 2082 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2083 extra = SCTP_ALIGN - extra; 2084 2085 nmp = dupmsg(*mp); 2086 if (nmp == NULL) 2087 return (NULL); 2088 if (extra > 0) { 2089 fill = sctp_get_padding(sctp, extra); 2090 if (fill != NULL) { 2091 linkb(nmp, fill); 2092 seglen += extra; 2093 } else { 2094 freemsg(nmp); 2095 return (NULL); 2096 } 2097 } 2098 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2099 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2100 if (head == NULL) { 2101 freemsg(nmp); 2102 return (NULL); 2103 } 2104 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2105 /* 2106 * Don't update the TSN if we are doing a Zero Win Probe. 2107 */ 2108 if (!sctp->sctp_zero_win_probe) 2109 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2110 *mp = (*mp)->b_next; 2111 2112 try_bundle: 2113 while (seglen < fp->sfa_pmss) { 2114 int32_t new_len; 2115 2116 /* 2117 * Go through the list to find more chunks to be bundled. 2118 * We should only retransmit sent by unack'ed chunks. Since 2119 * they were sent before, the peer's receive window should 2120 * be able to receive them. 2121 */ 2122 while (*mp != NULL) { 2123 /* Check if the chunk can be bundled. */ 2124 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2125 break; 2126 *mp = (*mp)->b_next; 2127 } 2128 /* Go to the next message. */ 2129 if (*mp == NULL) { 2130 for (*meta = (*meta)->b_next; *meta != NULL; 2131 *meta = (*meta)->b_next) { 2132 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2133 2134 if (SCTP_IS_MSG_ABANDONED(*meta) || 2135 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2136 sctp)) { 2137 continue; 2138 } 2139 2140 *mp = (*meta)->b_cont; 2141 goto try_bundle; 2142 } 2143 /* No more chunk to be bundled. */ 2144 break; 2145 } 2146 2147 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2148 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2149 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2150 break; 2151 new_len = ntohs(sdc->sdh_len); 2152 chunklen = new_len - sizeof (*sdc); 2153 2154 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2155 extra = SCTP_ALIGN - extra; 2156 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2157 break; 2158 if ((nmp = dupmsg(*mp)) == NULL) 2159 break; 2160 2161 if (extra > 0) { 2162 fill = sctp_get_padding(sctp, extra); 2163 if (fill != NULL) { 2164 linkb(nmp, fill); 2165 } else { 2166 freemsg(nmp); 2167 break; 2168 } 2169 } 2170 linkb(head, nmp); 2171 2172 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2173 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2174 /* 2175 * Don't update the TSN if we are doing a Zero Win Probe. 2176 */ 2177 if (!sctp->sctp_zero_win_probe) 2178 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2179 2180 seglen = new_len; 2181 *mp = (*mp)->b_next; 2182 } 2183 *packet_len = seglen; 2184 fp->rxt_unacked += seglen; 2185 return (head); 2186 } 2187 2188 /* 2189 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2190 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2191 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2192 * SACK. We retransmit unacked chunks without having to wait for another 2193 * timeout. The rationale is that the SACK should not be "partial" if all the 2194 * lost chunks have been retransmitted. Since the SACK is "partial," 2195 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2196 * be missing. It is better for us to retransmit them now instead 2197 * of waiting for a timeout. 2198 */ 2199 void 2200 sctp_ss_rexmit(sctp_t *sctp) 2201 { 2202 mblk_t *meta; 2203 mblk_t *mp; 2204 mblk_t *pkt; 2205 sctp_faddr_t *fp; 2206 uint_t pkt_len; 2207 uint32_t tot_wnd; 2208 sctp_data_hdr_t *sdc; 2209 int burst; 2210 sctp_stack_t *sctps = sctp->sctp_sctps; 2211 2212 ASSERT(!sctp->sctp_zero_win_probe); 2213 2214 /* 2215 * If the last cum ack is smaller than what we have just 2216 * retransmitted, simply return. 2217 */ 2218 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2219 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2220 else 2221 return; 2222 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2223 2224 /* 2225 * After a timer fires, sctp_current should be set to the new 2226 * fp where the retransmitted chunks are sent. 2227 */ 2228 fp = sctp->sctp_current; 2229 2230 /* 2231 * Since we are retransmitting, we only need to use cwnd to determine 2232 * how much we can send as we were allowed (by peer's receive window) 2233 * to send those retransmitted chunks previously when they are first 2234 * sent. If we record how much we have retransmitted but 2235 * unacknowledged using rxt_unacked, then the amount we can now send 2236 * is equal to cwnd minus rxt_unacked. 2237 * 2238 * The field rxt_unacked is incremented when we retransmit a packet 2239 * and decremented when we got a SACK acknowledging something. And 2240 * it is reset when the retransmission timer fires as we assume that 2241 * all packets have left the network after a timeout. If this 2242 * assumption is not true, it means that after a timeout, we can 2243 * get a SACK acknowledging more than rxt_unacked (its value only 2244 * contains what is retransmitted when the timer fires). So 2245 * rxt_unacked will become very big (it is an unsiged int so going 2246 * negative means that the value is huge). This is the reason we 2247 * always send at least 1 MSS bytes. 2248 * 2249 * The reason why we do not have an accurate count is that we 2250 * only know how many packets are outstanding (using the TSN numbers). 2251 * But we do not know how many bytes those packets contain. To 2252 * have an accurate count, we need to walk through the send list. 2253 * As it is not really important to have an accurate count during 2254 * retransmission, we skip this walk to save some time. This should 2255 * not make the retransmission too aggressive to cause congestion. 2256 */ 2257 if (fp->cwnd <= fp->rxt_unacked) 2258 tot_wnd = fp->sfa_pmss; 2259 else 2260 tot_wnd = fp->cwnd - fp->rxt_unacked; 2261 2262 /* Find the first unack'ed chunk */ 2263 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2264 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2265 2266 if (SCTP_IS_MSG_ABANDONED(meta) || 2267 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2268 continue; 2269 } 2270 2271 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2272 /* Again, this may not be possible */ 2273 if (!SCTP_CHUNK_ISSENT(mp)) 2274 return; 2275 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2276 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2277 goto found_msg; 2278 } 2279 } 2280 2281 /* Everything is abandoned... */ 2282 return; 2283 2284 found_msg: 2285 if (!fp->timer_running) 2286 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2287 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2288 if (pkt == NULL) { 2289 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 2290 return; 2291 } 2292 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2293 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2294 2295 /* 2296 * Path MTU is different from path we thought it would 2297 * be when we created chunks, or IP headers have grown. 2298 * Need to clear the DF bit. 2299 */ 2300 iph->ipha_fragment_offset_and_flags = 0; 2301 } 2302 sctp_set_iplen(sctp, pkt, fp->ixa); 2303 (void) conn_ip_output(pkt, fp->ixa); 2304 BUMP_LOCAL(sctp->sctp_opkts); 2305 2306 /* Check and see if there is more chunk to be retransmitted. */ 2307 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2308 meta == NULL) 2309 return; 2310 if (mp == NULL) 2311 meta = meta->b_next; 2312 if (meta == NULL) 2313 return; 2314 2315 /* Retransmit another packet if the window allows. */ 2316 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1; 2317 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2318 if (mp == NULL) 2319 mp = meta->b_cont; 2320 for (; mp != NULL; mp = mp->b_next) { 2321 /* Again, this may not be possible */ 2322 if (!SCTP_CHUNK_ISSENT(mp)) 2323 return; 2324 if (!SCTP_CHUNK_ISACKED(mp)) 2325 goto found_msg; 2326 } 2327 } 2328 } 2329