1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/stream.h> 30 #include <sys/cmn_err.h> 31 #define _SUN_TPI_VERSION 2 32 #include <sys/tihdr.h> 33 #include <sys/socket.h> 34 #include <sys/stropts.h> 35 #include <sys/strsun.h> 36 #include <sys/strsubr.h> 37 #include <sys/socketvar.h> 38 #include <inet/common.h> 39 #include <inet/mi.h> 40 #include <inet/ip.h> 41 #include <inet/ip6.h> 42 #include <inet/sctp_ip.h> 43 #include <inet/ipclassifier.h> 44 45 /* 46 * PR-SCTP comments. 47 * 48 * A message can expire before it gets to the transmit list (i.e. it is still 49 * in the unsent list - unchunked), after it gets to the transmit list, but 50 * before transmission has actually started, or after transmission has begun. 51 * Accordingly, we check for the status of a message in sctp_chunkify() when 52 * the message is being transferred from the unsent list to the transmit list; 53 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 54 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 55 * When we nuke a message in sctp_chunkify(), all we need to do is take it 56 * out of the unsent list and update sctp_unsent; when a message is deemed 57 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 58 * list, update sctp_unsent IFF transmission for the message has not yet begun 59 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 60 * message has started, then we cannot just take it out of the list, we need 61 * to send Forward TSN chunk to the peer so that the peer can clear its 62 * fragment list for this message. However, we cannot just send the Forward 63 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 64 * messages preceeding this abandoned message. So, we send a Forward TSN 65 * IFF all messages prior to this abandoned message has been SACKd, if not 66 * we defer sending the Forward TSN to sctp_cumack(), which will check for 67 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 68 * sctp_rexmit() when we check for retransmissions, we need to determine if 69 * the advanced peer ack point can be moved ahead, and if so, send a Forward 70 * TSN to the peer instead of retransmitting the chunk. Note that when 71 * we send a Forward TSN for a message, there may be yet unsent chunks for 72 * this message; we need to mark all such chunks as abandoned, so that 73 * sctp_cumack() can take the message out of the transmit list, additionally 74 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 75 * decremented when a message/chunk is deemed abandoned), sockfs needs to 76 * be notified so that it can adjust its idea of the queued message. 77 */ 78 79 #include "sctp_impl.h" 80 81 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 82 83 #ifdef DEBUG 84 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 85 #endif 86 87 /* 88 * Called to allocate a header mblk when sending data to SCTP. 89 * Data will follow in b_cont of this mblk. 90 */ 91 mblk_t * 92 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 93 int flags) 94 { 95 mblk_t *mp; 96 struct T_unitdata_req *tudr; 97 size_t size; 98 int error; 99 100 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 101 size = MAX(size, sizeof (sctp_msg_hdr_t)); 102 if (flags & SCTP_CAN_BLOCK) { 103 mp = allocb_wait(size, BPRI_MED, 0, &error); 104 } else { 105 mp = allocb(size, BPRI_MED); 106 } 107 if (mp) { 108 tudr = (struct T_unitdata_req *)mp->b_rptr; 109 tudr->PRIM_type = T_UNITDATA_REQ; 110 tudr->DEST_length = nlen; 111 tudr->DEST_offset = sizeof (*tudr); 112 tudr->OPT_length = clen; 113 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 114 _TPI_ALIGN_TOPT(nlen)); 115 if (nlen > 0) 116 bcopy(name, tudr + 1, nlen); 117 if (clen > 0) 118 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 119 mp->b_wptr += (tudr ->OPT_offset + clen); 120 mp->b_datap->db_type = M_PROTO; 121 } 122 return (mp); 123 } 124 125 /*ARGSUSED2*/ 126 int 127 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 128 { 129 sctp_faddr_t *fp = NULL; 130 struct T_unitdata_req *tudr; 131 int error = 0; 132 mblk_t *mproto = mp; 133 in6_addr_t *addr; 134 in6_addr_t tmpaddr; 135 uint16_t sid = sctp->sctp_def_stream; 136 uint32_t ppid = sctp->sctp_def_ppid; 137 uint32_t context = sctp->sctp_def_context; 138 uint16_t msg_flags = sctp->sctp_def_flags; 139 sctp_msg_hdr_t *sctp_msg_hdr; 140 uint32_t msg_len = 0; 141 uint32_t timetolive = sctp->sctp_def_timetolive; 142 143 ASSERT(DB_TYPE(mproto) == M_PROTO); 144 145 mp = mp->b_cont; 146 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 147 148 tudr = (struct T_unitdata_req *)mproto->b_rptr; 149 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 150 151 /* Get destination address, if specified */ 152 if (tudr->DEST_length > 0) { 153 sin_t *sin; 154 sin6_t *sin6; 155 156 sin = (struct sockaddr_in *) 157 (mproto->b_rptr + tudr->DEST_offset); 158 switch (sin->sin_family) { 159 case AF_INET: 160 if (tudr->DEST_length < sizeof (*sin)) { 161 return (EINVAL); 162 } 163 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 164 addr = &tmpaddr; 165 break; 166 case AF_INET6: 167 if (tudr->DEST_length < sizeof (*sin6)) { 168 return (EINVAL); 169 } 170 sin6 = (struct sockaddr_in6 *) 171 (mproto->b_rptr + tudr->DEST_offset); 172 addr = &sin6->sin6_addr; 173 break; 174 default: 175 return (EAFNOSUPPORT); 176 } 177 fp = sctp_lookup_faddr(sctp, addr); 178 if (fp == NULL) { 179 return (EINVAL); 180 } 181 } 182 /* Ancillary Data? */ 183 if (tudr->OPT_length > 0) { 184 struct cmsghdr *cmsg; 185 char *cend; 186 struct sctp_sndrcvinfo *sndrcv; 187 188 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 189 cend = ((char *)cmsg + tudr->OPT_length); 190 ASSERT(cend <= (char *)mproto->b_wptr); 191 192 for (;;) { 193 if ((char *)(cmsg + 1) > cend || 194 ((char *)cmsg + cmsg->cmsg_len) > cend) { 195 break; 196 } 197 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 198 (cmsg->cmsg_type == SCTP_SNDRCV)) { 199 if (cmsg->cmsg_len < 200 (sizeof (*sndrcv) + sizeof (*cmsg))) { 201 return (EINVAL); 202 } 203 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 204 sid = sndrcv->sinfo_stream; 205 msg_flags = sndrcv->sinfo_flags; 206 ppid = sndrcv->sinfo_ppid; 207 context = sndrcv->sinfo_context; 208 timetolive = sndrcv->sinfo_timetolive; 209 break; 210 } 211 if (cmsg->cmsg_len > 0) 212 cmsg = CMSG_NEXT(cmsg); 213 else 214 break; 215 } 216 } 217 if (msg_flags & MSG_ABORT) { 218 if (mp && mp->b_cont) { 219 mblk_t *pump = msgpullup(mp, -1); 220 if (!pump) { 221 return (ENOMEM); 222 } 223 freemsg(mp); 224 mp = pump; 225 mproto->b_cont = mp; 226 } 227 RUN_SCTP(sctp); 228 sctp_user_abort(sctp, mp); 229 freemsg(mproto); 230 goto process_sendq; 231 } 232 if (mp == NULL) 233 goto done; 234 235 RUN_SCTP(sctp); 236 237 /* Reject any new data requests if we are shutting down */ 238 if (sctp->sctp_state > SCTPS_ESTABLISHED || 239 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) { 240 error = EPIPE; 241 goto unlock_done; 242 } 243 244 /* Re-use the mproto to store relevant info. */ 245 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 246 247 mproto->b_rptr = mproto->b_datap->db_base; 248 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 249 250 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 251 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 252 sctp_msg_hdr->smh_context = context; 253 sctp_msg_hdr->smh_sid = sid; 254 sctp_msg_hdr->smh_ppid = ppid; 255 sctp_msg_hdr->smh_flags = msg_flags; 256 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 257 sctp_msg_hdr->smh_tob = lbolt64; 258 for (; mp != NULL; mp = mp->b_cont) 259 msg_len += MBLKL(mp); 260 sctp_msg_hdr->smh_msglen = msg_len; 261 262 /* User requested specific destination */ 263 SCTP_SET_CHUNK_DEST(mproto, fp); 264 265 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 266 sid >= sctp->sctp_num_ostr) { 267 /* Send sendfail event */ 268 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 269 B_FALSE); 270 error = EINVAL; 271 goto unlock_done; 272 } 273 274 /* no data */ 275 if (msg_len == 0) { 276 sctp_sendfail_event(sctp, dupmsg(mproto), 277 SCTP_ERR_NO_USR_DATA, B_FALSE); 278 error = EINVAL; 279 goto unlock_done; 280 } 281 282 /* Add it to the unsent list */ 283 if (sctp->sctp_xmit_unsent == NULL) { 284 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 285 } else { 286 sctp->sctp_xmit_unsent_tail->b_next = mproto; 287 sctp->sctp_xmit_unsent_tail = mproto; 288 } 289 sctp->sctp_unsent += msg_len; 290 BUMP_LOCAL(sctp->sctp_msgcount); 291 if (sctp->sctp_state == SCTPS_ESTABLISHED) 292 sctp_output(sctp, UINT_MAX); 293 process_sendq: 294 WAKE_SCTP(sctp); 295 sctp_process_sendq(sctp); 296 return (0); 297 unlock_done: 298 WAKE_SCTP(sctp); 299 done: 300 return (error); 301 } 302 303 void 304 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send) 305 { 306 mblk_t *mp; 307 mblk_t *chunk_mp; 308 mblk_t *chunk_head; 309 mblk_t *chunk_hdr; 310 mblk_t *chunk_tail = NULL; 311 int count; 312 int chunksize; 313 sctp_data_hdr_t *sdc; 314 mblk_t *mdblk = sctp->sctp_xmit_unsent; 315 sctp_faddr_t *fp; 316 sctp_faddr_t *fp1; 317 size_t xtralen; 318 sctp_msg_hdr_t *msg_hdr; 319 sctp_stack_t *sctps = sctp->sctp_sctps; 320 321 fp = SCTP_CHUNK_DEST(mdblk); 322 if (fp == NULL) 323 fp = sctp->sctp_current; 324 if (fp->isv4) 325 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra + 326 sizeof (*sdc); 327 else 328 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra + 329 sizeof (*sdc); 330 count = chunksize = first_len - sizeof (*sdc); 331 nextmsg: 332 chunk_mp = mdblk->b_cont; 333 334 /* 335 * If this partially chunked, we ignore the first_len for now 336 * and use the one already present. For the unchunked bits, we 337 * use the length of the last chunk. 338 */ 339 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 340 int chunk_len; 341 342 ASSERT(chunk_mp->b_next != NULL); 343 mdblk->b_cont = chunk_mp->b_next; 344 chunk_mp->b_next = NULL; 345 SCTP_MSG_CLEAR_CHUNKED(mdblk); 346 mp = mdblk->b_cont; 347 while (mp->b_next != NULL) 348 mp = mp->b_next; 349 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 350 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 351 count = chunksize = fp->sfa_pmss - chunk_len; 352 else 353 count = chunksize = fp->sfa_pmss; 354 count = chunksize = count - sizeof (*sdc); 355 } else { 356 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 357 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 358 sctp->sctp_xmit_unsent = mdblk->b_next; 359 if (sctp->sctp_xmit_unsent == NULL) 360 sctp->sctp_xmit_unsent_tail = NULL; 361 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 362 sctp->sctp_unsent -= msg_hdr->smh_msglen; 363 mdblk->b_next = NULL; 364 BUMP_LOCAL(sctp->sctp_prsctpdrop); 365 /* 366 * Update ULP the amount of queued data, which is 367 * sent-unack'ed + unsent. 368 */ 369 if (!SCTP_IS_DETACHED(sctp)) { 370 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 371 sctp->sctp_unacked + sctp->sctp_unsent); 372 } 373 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 374 goto try_next; 375 } 376 mdblk->b_cont = NULL; 377 } 378 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 379 nextchunk: 380 chunk_head = chunk_mp; 381 chunk_tail = NULL; 382 383 /* Skip as many mblk's as we need */ 384 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 385 count -= MBLKL(chunk_mp); 386 chunk_tail = chunk_mp; 387 chunk_mp = chunk_mp->b_cont; 388 } 389 /* Split the chain, if needed */ 390 if (chunk_mp != NULL) { 391 if (count > 0) { 392 mblk_t *split_mp = dupb(chunk_mp); 393 394 if (split_mp == NULL) { 395 if (mdblk->b_cont == NULL) { 396 mdblk->b_cont = chunk_head; 397 } else { 398 SCTP_MSG_SET_CHUNKED(mdblk); 399 ASSERT(chunk_head->b_next == NULL); 400 chunk_head->b_next = mdblk->b_cont; 401 mdblk->b_cont = chunk_head; 402 } 403 return; 404 } 405 if (chunk_tail != NULL) { 406 chunk_tail->b_cont = split_mp; 407 chunk_tail = chunk_tail->b_cont; 408 } else { 409 chunk_head = chunk_tail = split_mp; 410 } 411 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 412 chunk_mp->b_rptr = chunk_tail->b_wptr; 413 count = 0; 414 } else if (chunk_tail == NULL) { 415 goto next; 416 } else { 417 chunk_tail->b_cont = NULL; 418 } 419 } 420 /* Alloc chunk hdr, if needed */ 421 if (DB_REF(chunk_head) > 1 || 422 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 423 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 424 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 425 if (mdblk->b_cont == NULL) { 426 if (chunk_mp != NULL) 427 linkb(chunk_head, chunk_mp); 428 mdblk->b_cont = chunk_head; 429 } else { 430 SCTP_MSG_SET_CHUNKED(mdblk); 431 if (chunk_mp != NULL) 432 linkb(chunk_head, chunk_mp); 433 ASSERT(chunk_head->b_next == NULL); 434 chunk_head->b_next = mdblk->b_cont; 435 mdblk->b_cont = chunk_head; 436 } 437 return; 438 } 439 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 440 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 441 chunk_hdr->b_cont = chunk_head; 442 } else { 443 chunk_hdr = chunk_head; 444 chunk_hdr->b_rptr -= sizeof (*sdc); 445 } 446 ASSERT(chunk_hdr->b_datap->db_ref == 1); 447 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 448 sdc->sdh_id = CHUNK_DATA; 449 sdc->sdh_flags = 0; 450 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 451 ASSERT(sdc->sdh_len); 452 sdc->sdh_sid = htons(msg_hdr->smh_sid); 453 /* 454 * We defer assigning the SSN just before sending the chunk, else 455 * if we drop the chunk in sctp_get_msg_to_send(), we would need 456 * to send a Forward TSN to let the peer know. Some more comments 457 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 458 */ 459 sdc->sdh_payload_id = msg_hdr->smh_ppid; 460 461 if (mdblk->b_cont == NULL) { 462 mdblk->b_cont = chunk_hdr; 463 SCTP_DATA_SET_BBIT(sdc); 464 } else { 465 mp = mdblk->b_cont; 466 while (mp->b_next != NULL) 467 mp = mp->b_next; 468 mp->b_next = chunk_hdr; 469 } 470 471 bytes_to_send -= (chunksize - count); 472 if (chunk_mp != NULL) { 473 next: 474 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 475 goto nextchunk; 476 } 477 SCTP_DATA_SET_EBIT(sdc); 478 sctp->sctp_xmit_unsent = mdblk->b_next; 479 if (mdblk->b_next == NULL) { 480 sctp->sctp_xmit_unsent_tail = NULL; 481 } 482 mdblk->b_next = NULL; 483 484 if (sctp->sctp_xmit_tail == NULL) { 485 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 486 } else { 487 mp = sctp->sctp_xmit_tail; 488 while (mp->b_next != NULL) 489 mp = mp->b_next; 490 mp->b_next = mdblk; 491 mdblk->b_prev = mp; 492 } 493 try_next: 494 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 495 mdblk = sctp->sctp_xmit_unsent; 496 fp1 = SCTP_CHUNK_DEST(mdblk); 497 if (fp1 == NULL) 498 fp1 = sctp->sctp_current; 499 if (fp == fp1) { 500 size_t len = MBLKL(mdblk->b_cont); 501 if ((count > 0) && 502 ((len > fp->sfa_pmss - sizeof (*sdc)) || 503 (len <= count))) { 504 count -= sizeof (*sdc); 505 count = chunksize = count - (count & 0x3); 506 } else { 507 count = chunksize = fp->sfa_pmss - 508 sizeof (*sdc); 509 } 510 } else { 511 if (fp1->isv4) 512 xtralen = sctp->sctp_hdr_len; 513 else 514 xtralen = sctp->sctp_hdr6_len; 515 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc); 516 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 517 fp = fp1; 518 } 519 goto nextmsg; 520 } 521 } 522 523 void 524 sctp_free_msg(mblk_t *ump) 525 { 526 mblk_t *mp, *nmp; 527 528 for (mp = ump->b_cont; mp; mp = nmp) { 529 nmp = mp->b_next; 530 mp->b_next = mp->b_prev = NULL; 531 freemsg(mp); 532 } 533 ASSERT(!ump->b_prev); 534 ump->b_next = NULL; 535 freeb(ump); 536 } 537 538 mblk_t * 539 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 540 int *error) 541 { 542 int hdrlen; 543 char *hdr; 544 int isv4 = fp->isv4; 545 sctp_stack_t *sctps = sctp->sctp_sctps; 546 547 if (error != NULL) 548 *error = 0; 549 550 if (isv4) { 551 hdrlen = sctp->sctp_hdr_len; 552 hdr = sctp->sctp_iphc; 553 } else { 554 hdrlen = sctp->sctp_hdr6_len; 555 hdr = sctp->sctp_iphc6; 556 } 557 /* 558 * A null fp->ire could mean that the address is 'down'. Similarly, 559 * it is possible that the address went down, we tried to send an 560 * heartbeat and ended up setting fp->saddr as unspec because we 561 * didn't have any usable source address. In either case 562 * sctp_get_ire() will try find an IRE, if available, and set 563 * the source address, if needed. If we still don't have any 564 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 565 * we return EHOSTUNREACH. 566 */ 567 if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 568 sctp_get_ire(sctp, fp); 569 if (fp->state == SCTP_FADDRS_UNREACH) { 570 if (error != NULL) 571 *error = EHOSTUNREACH; 572 return (NULL); 573 } 574 } 575 /* Copy in IP header. */ 576 if ((mp->b_rptr - mp->b_datap->db_base) < 577 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || 578 !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { 579 mblk_t *nmp; 580 581 /* 582 * This can happen if IP headers are adjusted after 583 * data was moved into chunks, or during retransmission, 584 * or things like snoop is running. 585 */ 586 nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen, 587 CONN_CRED(sctp->sctp_connp)); 588 if (nmp == NULL) { 589 if (error != NULL) 590 *error = ENOMEM; 591 return (NULL); 592 } 593 nmp->b_rptr += sctps->sctps_wroff_xtra; 594 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 595 nmp->b_cont = mp; 596 mp = nmp; 597 } else { 598 mp->b_rptr -= (hdrlen + sacklen); 599 mblk_setcred(mp, CONN_CRED(sctp->sctp_connp)); 600 } 601 bcopy(hdr, mp->b_rptr, hdrlen); 602 if (sacklen) { 603 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 604 } 605 if (fp != sctp->sctp_current) { 606 /* change addresses in header */ 607 if (isv4) { 608 ipha_t *iph = (ipha_t *)mp->b_rptr; 609 610 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 611 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 612 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 613 iph->ipha_src); 614 } else if (sctp->sctp_bound_to_all) { 615 iph->ipha_src = INADDR_ANY; 616 } 617 } else { 618 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 619 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 620 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 621 } else if (sctp->sctp_bound_to_all) { 622 V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src); 623 } 624 } 625 } 626 /* 627 * IP will not free this IRE if it is condemned. SCTP needs to 628 * free it. 629 */ 630 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 631 IRE_REFRELE_NOTR(fp->ire); 632 fp->ire = NULL; 633 } 634 635 /* Stash the conn and ire ptr info for IP */ 636 SCTP_STASH_IPINFO(mp, fp->ire); 637 638 return (mp); 639 } 640 641 /* 642 * SCTP requires every chunk to be padded so that the total length 643 * is a multiple of SCTP_ALIGN. This function returns a mblk with 644 * the specified pad length. 645 */ 646 static mblk_t * 647 sctp_get_padding(sctp_t *sctp, int pad) 648 { 649 mblk_t *fill; 650 651 ASSERT(pad < SCTP_ALIGN); 652 ASSERT(sctp->sctp_pad_mp != NULL); 653 if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) { 654 fill->b_wptr += pad; 655 return (fill); 656 } 657 658 /* 659 * The memory saving path of reusing the sctp_pad_mp 660 * fails may be because it has been dupb() too 661 * many times (DBLK_REFMAX). Use the memory consuming 662 * path of allocating the pad mblk. 663 */ 664 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 665 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 666 *(int32_t *)fill->b_rptr = 0; 667 fill->b_wptr += pad; 668 } 669 return (fill); 670 } 671 672 static mblk_t * 673 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 674 { 675 mblk_t *meta; 676 mblk_t *start_mp = NULL; 677 mblk_t *end_mp = NULL; 678 mblk_t *mp, *nmp; 679 mblk_t *fill; 680 sctp_data_hdr_t *sdh; 681 int msglen; 682 int extra; 683 sctp_msg_hdr_t *msg_hdr; 684 sctp_faddr_t *old_fp = NULL; 685 sctp_faddr_t *chunk_fp; 686 sctp_stack_t *sctps = sctp->sctp_sctps; 687 688 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 689 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 690 if (SCTP_IS_MSG_ABANDONED(meta) || 691 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 692 continue; 693 } 694 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 695 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 696 /* 697 * Use the same peer address to do fast 698 * retransmission. If the original peer 699 * address is dead, switch to the current 700 * one. Record the old one so that we 701 * will pick the chunks sent to the old 702 * one for fast retransmission. 703 */ 704 chunk_fp = SCTP_CHUNK_DEST(mp); 705 if (*fp == NULL) { 706 *fp = chunk_fp; 707 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 708 old_fp = *fp; 709 *fp = sctp->sctp_current; 710 } 711 } else if (old_fp == NULL && *fp != chunk_fp) { 712 continue; 713 } else if (old_fp != NULL && 714 old_fp != chunk_fp) { 715 continue; 716 } 717 718 sdh = (sctp_data_hdr_t *)mp->b_rptr; 719 msglen = ntohs(sdh->sdh_len); 720 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 721 extra = SCTP_ALIGN - extra; 722 } 723 724 /* 725 * We still return at least the first message 726 * even if that message cannot fit in as 727 * PMTU may have changed. 728 */ 729 if (*total + msglen + extra > 730 (*fp)->sfa_pmss && start_mp != NULL) { 731 return (start_mp); 732 } 733 if ((nmp = dupmsg(mp)) == NULL) 734 return (start_mp); 735 if (extra > 0) { 736 fill = sctp_get_padding(sctp, extra); 737 if (fill != NULL) { 738 linkb(nmp, fill); 739 } else { 740 return (start_mp); 741 } 742 } 743 BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); 744 BUMP_LOCAL(sctp->sctp_rxtchunks); 745 SCTP_CHUNK_CLEAR_REXMIT(mp); 746 if (start_mp == NULL) { 747 start_mp = nmp; 748 } else { 749 linkb(end_mp, nmp); 750 } 751 end_mp = nmp; 752 *total += msglen + extra; 753 dprint(2, ("sctp_find_fast_rexmit_mblks: " 754 "tsn %x\n", sdh->sdh_tsn)); 755 } 756 } 757 } 758 /* Clear the flag as there is no more message to be fast rexmitted. */ 759 sctp->sctp_chk_fast_rexmit = B_FALSE; 760 return (start_mp); 761 } 762 763 /* A debug function just to make sure that a mblk chain is not broken */ 764 #ifdef DEBUG 765 static boolean_t 766 sctp_verify_chain(mblk_t *head, mblk_t *tail) 767 { 768 mblk_t *mp = head; 769 770 if (head == NULL || tail == NULL) 771 return (B_TRUE); 772 while (mp != NULL) { 773 if (mp == tail) 774 return (B_TRUE); 775 mp = mp->b_next; 776 } 777 return (B_FALSE); 778 } 779 #endif 780 781 /* 782 * Gets the next unsent chunk to transmit. Messages that are abandoned are 783 * skipped. A message can be abandoned if it has a non-zero timetolive and 784 * transmission has not yet started or if it is a partially reliable 785 * message and its time is up (assuming we are PR-SCTP aware). 786 * 'cansend' is used to determine if need to try and chunkify messages from 787 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 788 * 789 * firstseg indicates the space already used, cansend represents remaining 790 * space in the window, ((sfa_pmss - firstseg) can therefore reasonably 791 * be used to compute the cansend arg). 792 */ 793 mblk_t * 794 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 795 int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp) 796 { 797 mblk_t *mp1; 798 sctp_msg_hdr_t *msg_hdr; 799 mblk_t *tmp_meta; 800 sctp_faddr_t *fp1; 801 802 ASSERT(error != NULL && mp != NULL); 803 *error = 0; 804 805 ASSERT(sctp->sctp_current != NULL); 806 807 chunkified: 808 while (meta != NULL) { 809 tmp_meta = meta->b_next; 810 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 811 mp1 = meta->b_cont; 812 if (SCTP_IS_MSG_ABANDONED(meta)) 813 goto next_msg; 814 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 815 while (mp1 != NULL) { 816 if (SCTP_CHUNK_CANSEND(mp1)) { 817 *mp = mp1; 818 #ifdef DEBUG 819 ASSERT(sctp_verify_chain( 820 sctp->sctp_xmit_head, meta)); 821 #endif 822 return (meta); 823 } 824 mp1 = mp1->b_next; 825 } 826 goto next_msg; 827 } 828 /* 829 * If we come here and the first chunk is sent, then we 830 * we are PR-SCTP aware, in which case if the cumulative 831 * TSN has moved upto or beyond the first chunk (which 832 * means all the previous messages have been cumulative 833 * SACK'd), then we send a Forward TSN with the last 834 * chunk that was sent in this message. If we can't send 835 * a Forward TSN because previous non-abandoned messages 836 * have not been acked then we will defer the Forward TSN 837 * to sctp_rexmit() or sctp_cumack(). 838 */ 839 if (SCTP_CHUNK_ISSENT(mp1)) { 840 *error = sctp_check_abandoned_msg(sctp, meta); 841 if (*error != 0) { 842 #ifdef DEBUG 843 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 844 sctp->sctp_xmit_tail)); 845 #endif 846 return (NULL); 847 } 848 goto next_msg; 849 } 850 BUMP_LOCAL(sctp->sctp_prsctpdrop); 851 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 852 if (meta->b_prev == NULL) { 853 ASSERT(sctp->sctp_xmit_head == meta); 854 sctp->sctp_xmit_head = tmp_meta; 855 if (sctp->sctp_xmit_tail == meta) 856 sctp->sctp_xmit_tail = tmp_meta; 857 meta->b_next = NULL; 858 if (tmp_meta != NULL) 859 tmp_meta->b_prev = NULL; 860 } else if (meta->b_next == NULL) { 861 if (sctp->sctp_xmit_tail == meta) 862 sctp->sctp_xmit_tail = meta->b_prev; 863 meta->b_prev->b_next = NULL; 864 meta->b_prev = NULL; 865 } else { 866 meta->b_prev->b_next = tmp_meta; 867 tmp_meta->b_prev = meta->b_prev; 868 if (sctp->sctp_xmit_tail == meta) 869 sctp->sctp_xmit_tail = tmp_meta; 870 meta->b_prev = NULL; 871 meta->b_next = NULL; 872 } 873 sctp->sctp_unsent -= msg_hdr->smh_msglen; 874 /* 875 * Update ULP the amount of queued data, which is 876 * sent-unack'ed + unsent. 877 */ 878 if (!SCTP_IS_DETACHED(sctp)) { 879 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 880 sctp->sctp_unacked + sctp->sctp_unsent); 881 } 882 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 883 next_msg: 884 meta = tmp_meta; 885 } 886 /* chunkify, if needed */ 887 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 888 ASSERT(sctp->sctp_unsent > 0); 889 if (fp == NULL) { 890 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 891 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 892 fp = sctp->sctp_current; 893 } else { 894 /* 895 * If user specified destination, try to honor that. 896 */ 897 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 898 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 899 fp1 != fp) { 900 goto chunk_done; 901 } 902 } 903 sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend); 904 if ((meta = sctp->sctp_xmit_tail) == NULL) 905 goto chunk_done; 906 /* 907 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 908 * new chunk(s) to the tail, so we need to skip the 909 * sctp_xmit_tail, which would have already been processed. 910 * This could happen when there is unacked chunks, but 911 * nothing new to send. 912 * When sctp_chunkify() is called when the transmit queue 913 * is empty then we need to start from sctp_xmit_tail. 914 */ 915 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 916 #ifdef DEBUG 917 mp1 = sctp->sctp_xmit_tail->b_cont; 918 while (mp1 != NULL) { 919 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 920 mp1 = mp1->b_next; 921 } 922 #endif 923 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 924 goto chunk_done; 925 } 926 goto chunkified; 927 } 928 chunk_done: 929 #ifdef DEBUG 930 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 931 #endif 932 return (NULL); 933 } 934 935 void 936 sctp_fast_rexmit(sctp_t *sctp) 937 { 938 mblk_t *mp, *head; 939 int pktlen = 0; 940 sctp_faddr_t *fp = NULL; 941 sctp_stack_t *sctps = sctp->sctp_sctps; 942 943 ASSERT(sctp->sctp_xmit_head != NULL); 944 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 945 if (mp == NULL) { 946 SCTP_KSTAT(sctps, sctp_fr_not_found); 947 return; 948 } 949 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 950 freemsg(mp); 951 SCTP_KSTAT(sctps, sctp_fr_add_hdr); 952 return; 953 } 954 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 955 ipha_t *iph = (ipha_t *)head->b_rptr; 956 957 iph->ipha_fragment_offset_and_flags = 0; 958 } 959 960 sctp_set_iplen(sctp, head); 961 sctp_add_sendq(sctp, head); 962 sctp->sctp_active = fp->lastactive = lbolt64; 963 } 964 965 void 966 sctp_output(sctp_t *sctp, uint_t num_pkt) 967 { 968 mblk_t *mp = NULL; 969 mblk_t *nmp; 970 mblk_t *head; 971 mblk_t *meta = sctp->sctp_xmit_tail; 972 mblk_t *fill = NULL; 973 uint16_t chunklen; 974 uint32_t cansend; 975 int32_t seglen; 976 int32_t xtralen; 977 int32_t sacklen; 978 int32_t pad = 0; 979 int32_t pathmax; 980 int extra; 981 int64_t now = lbolt64; 982 sctp_faddr_t *fp; 983 sctp_faddr_t *lfp; 984 sctp_data_hdr_t *sdc; 985 int error; 986 boolean_t notsent = B_TRUE; 987 sctp_stack_t *sctps = sctp->sctp_sctps; 988 989 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 990 sacklen = 0; 991 } else { 992 /* send a SACK chunk */ 993 sacklen = sizeof (sctp_chunk_hdr_t) + 994 sizeof (sctp_sack_chunk_t) + 995 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 996 lfp = sctp->sctp_lastdata; 997 ASSERT(lfp != NULL); 998 if (lfp->state != SCTP_FADDRS_ALIVE) 999 lfp = sctp->sctp_current; 1000 } 1001 1002 cansend = sctp->sctp_frwnd; 1003 if (sctp->sctp_unsent < cansend) 1004 cansend = sctp->sctp_unsent; 1005 1006 /* 1007 * Start persist timer if unable to send or when 1008 * trying to send into a zero window. This timer 1009 * ensures the blocked send attempt is retried. 1010 */ 1011 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1012 (sctp->sctp_unacked != 0) && 1013 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1014 !sctp->sctp_ndelay || 1015 (cansend == 0 && sctp->sctp_unacked == 0 && 1016 sctp->sctp_unsent != 0)) { 1017 head = NULL; 1018 fp = sctp->sctp_current; 1019 goto unsent_data; 1020 } 1021 if (meta != NULL) 1022 mp = meta->b_cont; 1023 while (cansend > 0 && num_pkt-- != 0) { 1024 pad = 0; 1025 1026 /* 1027 * Find first segment eligible for transmit. 1028 */ 1029 while (mp != NULL) { 1030 if (SCTP_CHUNK_CANSEND(mp)) 1031 break; 1032 mp = mp->b_next; 1033 } 1034 if (mp == NULL) { 1035 meta = sctp_get_msg_to_send(sctp, &mp, 1036 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1037 cansend, NULL); 1038 if (error != 0 || meta == NULL) { 1039 head = NULL; 1040 fp = sctp->sctp_current; 1041 goto unsent_data; 1042 } 1043 sctp->sctp_xmit_tail = meta; 1044 } 1045 1046 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1047 seglen = ntohs(sdc->sdh_len); 1048 xtralen = sizeof (*sdc); 1049 chunklen = seglen - xtralen; 1050 1051 /* 1052 * Check rwnd. 1053 */ 1054 if (chunklen > cansend) { 1055 head = NULL; 1056 fp = SCTP_CHUNK_DEST(meta); 1057 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1058 fp = sctp->sctp_current; 1059 goto unsent_data; 1060 } 1061 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1062 extra = SCTP_ALIGN - extra; 1063 1064 /* 1065 * Pick destination address, and check cwnd. 1066 */ 1067 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1068 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1069 /* 1070 * Only include SACK chunk if it can be bundled 1071 * with a data chunk, and sent to sctp_lastdata. 1072 */ 1073 pathmax = lfp->cwnd - lfp->suna; 1074 1075 fp = lfp; 1076 if ((nmp = dupmsg(mp)) == NULL) { 1077 head = NULL; 1078 goto unsent_data; 1079 } 1080 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1081 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1082 &error); 1083 if (head == NULL) { 1084 /* 1085 * If none of the source addresses are 1086 * available (i.e error == EHOSTUNREACH), 1087 * pretend we have sent the data. We will 1088 * eventually time out trying to retramsmit 1089 * the data if the interface never comes up. 1090 * If we have already sent some stuff (i.e., 1091 * notsent is B_FALSE) then we are fine, else 1092 * just mark this packet as sent. 1093 */ 1094 if (notsent && error == EHOSTUNREACH) { 1095 SCTP_CHUNK_SENT(sctp, mp, sdc, 1096 fp, chunklen, meta); 1097 } 1098 freemsg(nmp); 1099 SCTP_KSTAT(sctps, sctp_output_failed); 1100 goto unsent_data; 1101 } 1102 seglen += sacklen; 1103 xtralen += sacklen; 1104 sacklen = 0; 1105 } else { 1106 fp = SCTP_CHUNK_DEST(meta); 1107 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1108 fp = sctp->sctp_current; 1109 /* 1110 * If we haven't sent data to this destination for 1111 * a while, do slow start again. 1112 */ 1113 if (now - fp->lastactive > fp->rto) { 1114 SET_CWND(fp, fp->sfa_pmss, 1115 sctps->sctps_slow_start_after_idle); 1116 } 1117 1118 pathmax = fp->cwnd - fp->suna; 1119 if (seglen + extra > pathmax) { 1120 head = NULL; 1121 goto unsent_data; 1122 } 1123 if ((nmp = dupmsg(mp)) == NULL) { 1124 head = NULL; 1125 goto unsent_data; 1126 } 1127 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1128 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1129 if (head == NULL) { 1130 /* 1131 * If none of the source addresses are 1132 * available (i.e error == EHOSTUNREACH), 1133 * pretend we have sent the data. We will 1134 * eventually time out trying to retramsmit 1135 * the data if the interface never comes up. 1136 * If we have already sent some stuff (i.e., 1137 * notsent is B_FALSE) then we are fine, else 1138 * just mark this packet as sent. 1139 */ 1140 if (notsent && error == EHOSTUNREACH) { 1141 SCTP_CHUNK_SENT(sctp, mp, sdc, 1142 fp, chunklen, meta); 1143 } 1144 freemsg(nmp); 1145 SCTP_KSTAT(sctps, sctp_output_failed); 1146 goto unsent_data; 1147 } 1148 } 1149 fp->lastactive = now; 1150 if (pathmax > fp->sfa_pmss) 1151 pathmax = fp->sfa_pmss; 1152 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1153 mp = mp->b_next; 1154 1155 /* Use this chunk to measure RTT? */ 1156 if (sctp->sctp_out_time == 0) { 1157 sctp->sctp_out_time = now; 1158 sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; 1159 ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); 1160 } 1161 if (extra > 0) { 1162 fill = sctp_get_padding(sctp, extra); 1163 if (fill != NULL) { 1164 linkb(head, fill); 1165 pad = extra; 1166 seglen += extra; 1167 } else { 1168 goto unsent_data; 1169 } 1170 } 1171 /* See if we can bundle more. */ 1172 while (seglen < pathmax) { 1173 int32_t new_len; 1174 int32_t new_xtralen; 1175 1176 while (mp != NULL) { 1177 if (SCTP_CHUNK_CANSEND(mp)) 1178 break; 1179 mp = mp->b_next; 1180 } 1181 if (mp == NULL) { 1182 meta = sctp_get_msg_to_send(sctp, &mp, 1183 meta->b_next, &error, seglen, 1184 (seglen - xtralen) >= cansend ? 0 : 1185 cansend - seglen, fp); 1186 if (error != 0 || meta == NULL) 1187 break; 1188 sctp->sctp_xmit_tail = meta; 1189 } 1190 ASSERT(mp != NULL); 1191 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1192 fp != SCTP_CHUNK_DEST(meta)) { 1193 break; 1194 } 1195 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1196 chunklen = ntohs(sdc->sdh_len); 1197 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1198 extra = SCTP_ALIGN - extra; 1199 1200 new_len = seglen + chunklen; 1201 new_xtralen = xtralen + sizeof (*sdc); 1202 chunklen -= sizeof (*sdc); 1203 1204 if (new_len - new_xtralen > cansend || 1205 new_len + extra > pathmax) { 1206 break; 1207 } 1208 if ((nmp = dupmsg(mp)) == NULL) 1209 break; 1210 if (extra > 0) { 1211 fill = sctp_get_padding(sctp, extra); 1212 if (fill != NULL) { 1213 pad += extra; 1214 new_len += extra; 1215 linkb(nmp, fill); 1216 } else { 1217 freemsg(nmp); 1218 break; 1219 } 1220 } 1221 seglen = new_len; 1222 xtralen = new_xtralen; 1223 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1224 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1225 linkb(head, nmp); 1226 mp = mp->b_next; 1227 } 1228 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1229 ipha_t *iph = (ipha_t *)head->b_rptr; 1230 1231 /* 1232 * Path MTU is different from what we thought it would 1233 * be when we created chunks, or IP headers have grown. 1234 * Need to clear the DF bit. 1235 */ 1236 iph->ipha_fragment_offset_and_flags = 0; 1237 } 1238 /* xmit segment */ 1239 ASSERT(cansend >= seglen - pad - xtralen); 1240 cansend -= (seglen - pad - xtralen); 1241 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1242 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1243 seglen - xtralen, ntohl(sdc->sdh_tsn), 1244 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1245 cansend, sctp->sctp_lastack_rxd)); 1246 sctp_set_iplen(sctp, head); 1247 sctp_add_sendq(sctp, head); 1248 /* arm rto timer (if not set) */ 1249 if (!fp->timer_running) 1250 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1251 notsent = B_FALSE; 1252 } 1253 sctp->sctp_active = now; 1254 return; 1255 unsent_data: 1256 /* arm persist timer (if rto timer not set) */ 1257 if (!fp->timer_running) 1258 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1259 if (head != NULL) 1260 freemsg(head); 1261 } 1262 1263 /* 1264 * The following two functions initialize and destroy the cache 1265 * associated with the sets used for PR-SCTP. 1266 */ 1267 void 1268 sctp_ftsn_sets_init(void) 1269 { 1270 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1271 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1272 NULL, 0); 1273 } 1274 1275 void 1276 sctp_ftsn_sets_fini(void) 1277 { 1278 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1279 } 1280 1281 1282 /* Free PR-SCTP sets */ 1283 void 1284 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1285 { 1286 sctp_ftsn_set_t *p; 1287 1288 while (s != NULL) { 1289 p = s->next; 1290 s->next = NULL; 1291 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1292 s = p; 1293 } 1294 } 1295 1296 /* 1297 * Given a message meta block, meta, this routine creates or modifies 1298 * the set that will be used to generate a Forward TSN chunk. If the 1299 * entry for stream id, sid, for this message already exists, the 1300 * sequence number, ssn, is updated if it is greater than the existing 1301 * one. If an entry for this sid does not exist, one is created if 1302 * the size does not exceed fp->sfa_pmss. We return false in case 1303 * or an error. 1304 */ 1305 boolean_t 1306 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1307 uint_t *nsets, uint32_t *slen) 1308 { 1309 sctp_ftsn_set_t *p; 1310 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1311 uint16_t sid = htons(msg_hdr->smh_sid); 1312 /* msg_hdr->smh_ssn is already in NBO */ 1313 uint16_t ssn = msg_hdr->smh_ssn; 1314 1315 ASSERT(s != NULL && nsets != NULL); 1316 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1317 1318 if (*s == NULL) { 1319 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1320 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1321 if (*s == NULL) 1322 return (B_FALSE); 1323 (*s)->ftsn_entries.ftsn_sid = sid; 1324 (*s)->ftsn_entries.ftsn_ssn = ssn; 1325 (*s)->next = NULL; 1326 *nsets = 1; 1327 *slen += sizeof (uint32_t); 1328 return (B_TRUE); 1329 } 1330 for (p = *s; p->next != NULL; p = p->next) { 1331 if (p->ftsn_entries.ftsn_sid == sid) { 1332 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1333 p->ftsn_entries.ftsn_ssn = ssn; 1334 return (B_TRUE); 1335 } 1336 } 1337 /* the last one */ 1338 if (p->ftsn_entries.ftsn_sid == sid) { 1339 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1340 p->ftsn_entries.ftsn_ssn = ssn; 1341 } else { 1342 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1343 return (B_FALSE); 1344 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1345 KM_NOSLEEP); 1346 if (p->next == NULL) 1347 return (B_FALSE); 1348 p = p->next; 1349 p->ftsn_entries.ftsn_sid = sid; 1350 p->ftsn_entries.ftsn_ssn = ssn; 1351 p->next = NULL; 1352 (*nsets)++; 1353 *slen += sizeof (uint32_t); 1354 } 1355 return (B_TRUE); 1356 } 1357 1358 /* 1359 * Given a set of stream id - sequence number pairs, this routing creates 1360 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1361 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1362 * will add the IP/SCTP header. 1363 */ 1364 mblk_t * 1365 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1366 uint_t nsets, uint32_t seglen) 1367 { 1368 mblk_t *ftsn_mp; 1369 sctp_chunk_hdr_t *ch_hdr; 1370 uint32_t *advtsn; 1371 uint16_t schlen; 1372 size_t xtralen; 1373 ftsn_entry_t *ftsn_entry; 1374 sctp_stack_t *sctps = sctp->sctp_sctps; 1375 1376 seglen += sizeof (sctp_chunk_hdr_t); 1377 if (fp->isv4) 1378 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra; 1379 else 1380 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra; 1381 ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp)); 1382 if (ftsn_mp == NULL) 1383 return (NULL); 1384 ftsn_mp->b_rptr += xtralen; 1385 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1386 1387 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1388 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1389 ch_hdr->sch_flags = 0; 1390 /* 1391 * The cast here should not be an issue since seglen is 1392 * the length of the Forward TSN chunk. 1393 */ 1394 schlen = (uint16_t)seglen; 1395 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1396 1397 advtsn = (uint32_t *)(ch_hdr + 1); 1398 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1399 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1400 while (nsets > 0) { 1401 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1402 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1403 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1404 ftsn_entry++; 1405 sets = sets->next; 1406 nsets--; 1407 } 1408 return (ftsn_mp); 1409 } 1410 1411 /* 1412 * Given a starting message, the routine steps through all the 1413 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1414 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1415 * chunk. All the messages, that have chunks that are included in the 1416 * ftsn sets, are flagged abandonded. If a message is partially sent 1417 * and is deemed abandoned, all remaining unsent chunks are marked 1418 * abandoned and are deducted from sctp_unsent. 1419 */ 1420 void 1421 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1422 sctp_faddr_t *fp, uint32_t *seglen) 1423 { 1424 mblk_t *mp1 = mp; 1425 mblk_t *mp_head = mp; 1426 mblk_t *meta_head = meta; 1427 mblk_t *head; 1428 sctp_ftsn_set_t *sets = NULL; 1429 uint_t nsets = 0; 1430 uint16_t clen; 1431 sctp_data_hdr_t *sdc; 1432 uint32_t sacklen; 1433 uint32_t adv_pap = sctp->sctp_adv_pap; 1434 uint32_t unsent = 0; 1435 boolean_t ubit; 1436 sctp_stack_t *sctps = sctp->sctp_sctps; 1437 1438 *seglen = sizeof (uint32_t); 1439 1440 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1441 while (meta != NULL && 1442 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1443 /* 1444 * Skip adding FTSN sets for un-ordered messages as they do 1445 * not have SSNs. 1446 */ 1447 ubit = SCTP_DATA_GET_UBIT(sdc); 1448 if (!ubit && 1449 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1450 meta = NULL; 1451 sctp->sctp_adv_pap = adv_pap; 1452 goto ftsn_done; 1453 } 1454 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1455 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1456 adv_pap = ntohl(sdc->sdh_tsn); 1457 mp1 = mp1->b_next; 1458 } 1459 meta = meta->b_next; 1460 if (meta != NULL) { 1461 mp1 = meta->b_cont; 1462 if (!SCTP_CHUNK_ISSENT(mp1)) 1463 break; 1464 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1465 } 1466 } 1467 ftsn_done: 1468 /* 1469 * Can't compare with sets == NULL, since we don't add any 1470 * sets for un-ordered messages. 1471 */ 1472 if (meta == meta_head) 1473 return; 1474 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1475 sctp_free_ftsn_set(sets); 1476 if (*nmp == NULL) 1477 return; 1478 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1479 sacklen = 0; 1480 } else { 1481 sacklen = sizeof (sctp_chunk_hdr_t) + 1482 sizeof (sctp_sack_chunk_t) + 1483 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1484 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1485 /* piggybacked SACK doesn't fit */ 1486 sacklen = 0; 1487 } else { 1488 fp = sctp->sctp_lastdata; 1489 } 1490 } 1491 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1492 if (head == NULL) { 1493 freemsg(*nmp); 1494 *nmp = NULL; 1495 SCTP_KSTAT(sctps, sctp_send_ftsn_failed); 1496 return; 1497 } 1498 *seglen += sacklen; 1499 *nmp = head; 1500 1501 /* 1502 * XXXNeed to optimise this, the reason it is done here is so 1503 * that we don't have to undo in case of failure. 1504 */ 1505 mp1 = mp_head; 1506 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1507 while (meta_head != NULL && 1508 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1509 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1510 SCTP_MSG_SET_ABANDONED(meta_head); 1511 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1512 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1513 if (!SCTP_CHUNK_ISACKED(mp1)) { 1514 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1515 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1516 meta_head); 1517 } 1518 mp1 = mp1->b_next; 1519 } 1520 while (mp1 != NULL) { 1521 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1522 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1523 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1524 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1525 SCTP_ABANDON_CHUNK(mp1); 1526 } 1527 mp1 = mp1->b_next; 1528 } 1529 meta_head = meta_head->b_next; 1530 if (meta_head != NULL) { 1531 mp1 = meta_head->b_cont; 1532 if (!SCTP_CHUNK_ISSENT(mp1)) 1533 break; 1534 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1535 } 1536 } 1537 if (unsent > 0) { 1538 ASSERT(sctp->sctp_unsent >= unsent); 1539 sctp->sctp_unsent -= unsent; 1540 /* 1541 * Update ULP the amount of queued data, which is 1542 * sent-unack'ed + unsent. 1543 */ 1544 if (!SCTP_IS_DETACHED(sctp)) { 1545 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 1546 sctp->sctp_unacked + sctp->sctp_unsent); 1547 } 1548 } 1549 } 1550 1551 /* 1552 * This function steps through messages starting at meta and checks if 1553 * the message is abandoned. It stops when it hits an unsent chunk or 1554 * a message that has all its chunk acked. This is the only place 1555 * where the sctp_adv_pap is moved forward to indicated abandoned 1556 * messages. 1557 */ 1558 void 1559 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1560 { 1561 uint32_t tsn = sctp->sctp_adv_pap; 1562 sctp_data_hdr_t *sdc; 1563 sctp_msg_hdr_t *msg_hdr; 1564 1565 ASSERT(mp != NULL); 1566 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1567 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1568 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1569 if (!SCTP_IS_MSG_ABANDONED(meta) && 1570 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1571 return; 1572 } 1573 while (meta != NULL) { 1574 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1575 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1576 tsn = ntohl(sdc->sdh_tsn); 1577 mp = mp->b_next; 1578 } 1579 if (mp != NULL) 1580 break; 1581 /* 1582 * We continue checking for successive messages only if there 1583 * is a chunk marked for retransmission. Else, we might 1584 * end up sending FTSN prematurely for chunks that have been 1585 * sent, but not yet acked. 1586 */ 1587 if ((meta = meta->b_next) != NULL) { 1588 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1589 if (!SCTP_IS_MSG_ABANDONED(meta) && 1590 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1591 break; 1592 } 1593 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1594 if (!SCTP_CHUNK_ISSENT(mp)) { 1595 sctp->sctp_adv_pap = tsn; 1596 return; 1597 } 1598 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1599 break; 1600 } 1601 if (mp == NULL) 1602 break; 1603 } 1604 } 1605 sctp->sctp_adv_pap = tsn; 1606 } 1607 1608 1609 /* 1610 * Determine if we should bundle a data chunk with the chunk being 1611 * retransmitted. We bundle if 1612 * 1613 * - the chunk is sent to the same destination and unack'ed. 1614 * 1615 * OR 1616 * 1617 * - the chunk is unsent, i.e. new data. 1618 */ 1619 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1620 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1621 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1622 !SCTP_CHUNK_ISACKED(mp))) || \ 1623 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1624 SCTP_CHUNK_FLAG_SENT))) 1625 1626 /* 1627 * Retransmit first segment which hasn't been acked with cumtsn or send 1628 * a Forward TSN chunk, if appropriate. 1629 */ 1630 void 1631 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1632 { 1633 mblk_t *mp; 1634 mblk_t *nmp = NULL; 1635 mblk_t *head; 1636 mblk_t *meta = sctp->sctp_xmit_head; 1637 mblk_t *fill; 1638 uint32_t seglen = 0; 1639 uint32_t sacklen; 1640 uint16_t chunklen; 1641 int extra; 1642 sctp_data_hdr_t *sdc; 1643 sctp_faddr_t *fp; 1644 uint32_t adv_pap = sctp->sctp_adv_pap; 1645 boolean_t do_ftsn = B_FALSE; 1646 boolean_t ftsn_check = B_TRUE; 1647 uint32_t first_ua_tsn; 1648 sctp_msg_hdr_t *mhdr; 1649 sctp_stack_t *sctps = sctp->sctp_sctps; 1650 int error; 1651 1652 while (meta != NULL) { 1653 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1654 uint32_t tsn; 1655 1656 if (!SCTP_CHUNK_ISSENT(mp)) 1657 goto window_probe; 1658 /* 1659 * We break in the following cases - 1660 * 1661 * if the advanced peer ack point includes the next 1662 * chunk to be retransmited - possibly the Forward 1663 * TSN was lost. 1664 * 1665 * if we are PRSCTP aware and the next chunk to be 1666 * retransmitted is now abandoned 1667 * 1668 * if the next chunk to be retransmitted is for 1669 * the dest on which the timer went off. (this 1670 * message is not abandoned). 1671 * 1672 * We check for Forward TSN only for the first 1673 * eligible chunk to be retransmitted. The reason 1674 * being if the first eligible chunk is skipped (say 1675 * it was sent to a destination other than oldfp) 1676 * then we cannot advance the cum TSN via Forward 1677 * TSN chunk. 1678 * 1679 * Also, ftsn_check is B_TRUE only for the first 1680 * eligible chunk, it will be B_FALSE for all 1681 * subsequent candidate messages for retransmission. 1682 */ 1683 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1684 tsn = ntohl(sdc->sdh_tsn); 1685 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1686 if (sctp->sctp_prsctp_aware && ftsn_check) { 1687 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1688 ASSERT(sctp->sctp_prsctp_aware); 1689 do_ftsn = B_TRUE; 1690 goto out; 1691 } else { 1692 sctp_check_adv_ack_pt(sctp, 1693 meta, mp); 1694 if (SEQ_GT(sctp->sctp_adv_pap, 1695 adv_pap)) { 1696 do_ftsn = B_TRUE; 1697 goto out; 1698 } 1699 } 1700 ftsn_check = B_FALSE; 1701 } 1702 if (SCTP_CHUNK_DEST(mp) == oldfp) 1703 goto out; 1704 } 1705 } 1706 meta = meta->b_next; 1707 if (meta != NULL && sctp->sctp_prsctp_aware) { 1708 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1709 1710 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1711 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1712 meta = meta->b_next; 1713 } 1714 } 1715 } 1716 window_probe: 1717 /* 1718 * Retransmit fired for a destination which didn't have 1719 * any unacked data pending. 1720 */ 1721 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1722 /* 1723 * Send a window probe. Inflate frwnd to allow 1724 * sending one segment. 1725 */ 1726 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1727 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1728 1729 /* next TSN to send */ 1730 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1731 1732 /* 1733 * The above sctp_frwnd adjustment is coarse. The "changed" 1734 * sctp_frwnd may allow us to send more than 1 packet. So 1735 * tell sctp_output() to send only 1 packet. 1736 */ 1737 sctp_output(sctp, 1); 1738 1739 /* Last sent TSN */ 1740 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1741 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1742 sctp->sctp_zero_win_probe = B_TRUE; 1743 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1744 } 1745 return; 1746 out: 1747 /* 1748 * After a time out, assume that everything has left the network. So 1749 * we can clear rxt_unacked for the original peer address. 1750 */ 1751 oldfp->rxt_unacked = 0; 1752 1753 /* 1754 * If we were probing for zero window, don't adjust retransmission 1755 * variables, but the timer is still backed off. 1756 */ 1757 if (sctp->sctp_zero_win_probe) { 1758 mblk_t *pkt; 1759 uint_t pkt_len; 1760 1761 /* 1762 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1763 * and sctp_rxt_maxtsn will specify the ZWP packet. 1764 */ 1765 fp = oldfp; 1766 if (oldfp->state != SCTP_FADDRS_ALIVE) 1767 fp = sctp_rotate_faddr(sctp, oldfp); 1768 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1769 if (pkt != NULL) { 1770 ASSERT(pkt_len <= fp->sfa_pmss); 1771 sctp_set_iplen(sctp, pkt); 1772 sctp_add_sendq(sctp, pkt); 1773 } else { 1774 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 1775 } 1776 1777 /* 1778 * The strikes will be clear by sctp_faddr_alive() when the 1779 * other side sends us an ack. 1780 */ 1781 oldfp->strikes++; 1782 sctp->sctp_strikes++; 1783 1784 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1785 if (oldfp != fp && oldfp->suna != 0) 1786 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1787 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1788 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1789 return; 1790 } 1791 1792 /* 1793 * Enter slowstart for this destination 1794 */ 1795 oldfp->ssthresh = oldfp->cwnd / 2; 1796 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1797 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1798 oldfp->cwnd = oldfp->sfa_pmss; 1799 oldfp->pba = 0; 1800 fp = sctp_rotate_faddr(sctp, oldfp); 1801 ASSERT(fp != NULL); 1802 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1803 1804 first_ua_tsn = ntohl(sdc->sdh_tsn); 1805 if (do_ftsn) { 1806 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1807 if (nmp == NULL) { 1808 sctp->sctp_adv_pap = adv_pap; 1809 goto restart_timer; 1810 } 1811 head = nmp; 1812 /* 1813 * Move to the next unabandoned chunk. XXXCheck if meta will 1814 * always be marked abandoned. 1815 */ 1816 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1817 meta = meta->b_next; 1818 if (meta != NULL) 1819 mp = mp->b_cont; 1820 else 1821 mp = NULL; 1822 goto try_bundle; 1823 } 1824 seglen = ntohs(sdc->sdh_len); 1825 chunklen = seglen - sizeof (*sdc); 1826 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1827 extra = SCTP_ALIGN - extra; 1828 1829 /* Find out if we need to piggyback SACK. */ 1830 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1831 sacklen = 0; 1832 } else { 1833 sacklen = sizeof (sctp_chunk_hdr_t) + 1834 sizeof (sctp_sack_chunk_t) + 1835 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1836 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1837 /* piggybacked SACK doesn't fit */ 1838 sacklen = 0; 1839 } else { 1840 /* 1841 * OK, we have room to send SACK back. But we 1842 * should send it back to the last fp where we 1843 * receive data from, unless sctp_lastdata equals 1844 * oldfp, then we should probably not send it 1845 * back to that fp. Also we should check that 1846 * the fp is alive. 1847 */ 1848 if (sctp->sctp_lastdata != oldfp && 1849 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1850 fp = sctp->sctp_lastdata; 1851 } 1852 } 1853 } 1854 1855 /* 1856 * Cancel RTT measurement if the retransmitted TSN is before the 1857 * TSN used for timimg. 1858 */ 1859 if (sctp->sctp_out_time != 0 && 1860 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1861 sctp->sctp_out_time = 0; 1862 } 1863 /* Clear the counter as the RTT calculation may be off. */ 1864 fp->rtt_updates = 0; 1865 oldfp->rtt_updates = 0; 1866 1867 /* 1868 * After a timeout, we should change the current faddr so that 1869 * new chunks will be sent to the alternate address. 1870 */ 1871 sctp_set_faddr_current(sctp, fp); 1872 1873 nmp = dupmsg(mp); 1874 if (nmp == NULL) 1875 goto restart_timer; 1876 if (extra > 0) { 1877 fill = sctp_get_padding(sctp, extra); 1878 if (fill != NULL) { 1879 linkb(nmp, fill); 1880 seglen += extra; 1881 } else { 1882 freemsg(nmp); 1883 goto restart_timer; 1884 } 1885 } 1886 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1887 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1888 if (head == NULL) { 1889 freemsg(nmp); 1890 SCTP_KSTAT(sctps, sctp_rexmit_failed); 1891 goto restart_timer; 1892 } 1893 seglen += sacklen; 1894 1895 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1896 1897 mp = mp->b_next; 1898 1899 try_bundle: 1900 /* We can at least and at most send 1 packet at timeout. */ 1901 while (seglen < fp->sfa_pmss) { 1902 int32_t new_len; 1903 1904 /* Go through the list to find more chunks to be bundled. */ 1905 while (mp != NULL) { 1906 /* Check if the chunk can be bundled. */ 1907 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1908 break; 1909 mp = mp->b_next; 1910 } 1911 /* Go to the next message. */ 1912 if (mp == NULL) { 1913 for (meta = meta->b_next; meta != NULL; 1914 meta = meta->b_next) { 1915 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1916 1917 if (SCTP_IS_MSG_ABANDONED(meta) || 1918 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1919 sctp)) { 1920 continue; 1921 } 1922 1923 mp = meta->b_cont; 1924 goto try_bundle; 1925 } 1926 /* 1927 * Check if there is a new message which potentially 1928 * could be bundled with this retransmission. 1929 */ 1930 meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error, 1931 seglen, fp->sfa_pmss - seglen, NULL); 1932 if (error != 0 || meta == NULL) { 1933 /* No more chunk to be bundled. */ 1934 break; 1935 } else { 1936 goto try_bundle; 1937 } 1938 } 1939 1940 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1941 new_len = ntohs(sdc->sdh_len); 1942 chunklen = new_len - sizeof (*sdc); 1943 1944 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1945 extra = SCTP_ALIGN - extra; 1946 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1947 break; 1948 if ((nmp = dupmsg(mp)) == NULL) 1949 break; 1950 1951 if (extra > 0) { 1952 fill = sctp_get_padding(sctp, extra); 1953 if (fill != NULL) { 1954 linkb(nmp, fill); 1955 } else { 1956 freemsg(nmp); 1957 break; 1958 } 1959 } 1960 linkb(head, nmp); 1961 1962 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1963 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1964 1965 seglen = new_len; 1966 mp = mp->b_next; 1967 } 1968 done_bundle: 1969 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1970 ipha_t *iph = (ipha_t *)head->b_rptr; 1971 1972 /* 1973 * Path MTU is different from path we thought it would 1974 * be when we created chunks, or IP headers have grown. 1975 * Need to clear the DF bit. 1976 */ 1977 iph->ipha_fragment_offset_and_flags = 0; 1978 } 1979 fp->rxt_unacked += seglen; 1980 1981 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 1982 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 1983 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 1984 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 1985 1986 sctp->sctp_rexmitting = B_TRUE; 1987 sctp->sctp_rxt_nxttsn = first_ua_tsn; 1988 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1989 sctp_set_iplen(sctp, head); 1990 sctp_add_sendq(sctp, head); 1991 1992 /* 1993 * Restart the oldfp timer with exponential backoff and 1994 * the new fp timer for the retransmitted chunks. 1995 */ 1996 restart_timer: 1997 oldfp->strikes++; 1998 sctp->sctp_strikes++; 1999 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 2000 /* 2001 * If there is still some data in the oldfp, restart the 2002 * retransmission timer. If there is no data, the heartbeat will 2003 * continue to run so it will do its job in checking the reachability 2004 * of the oldfp. 2005 */ 2006 if (oldfp != fp && oldfp->suna != 0) 2007 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 2008 2009 /* 2010 * Should we restart the timer of the new fp? If there is 2011 * outstanding data to the new fp, the timer should be 2012 * running already. So restarting it means that the timer 2013 * will fire later for those outstanding data. But if 2014 * we don't restart it, the timer will fire too early for the 2015 * just retransmitted chunks to the new fp. The reason is that we 2016 * don't keep a timestamp on when a chunk is retransmitted. 2017 * So when the timer fires, it will just search for the 2018 * chunk with the earliest TSN sent to new fp. This probably 2019 * is the chunk we just retransmitted. So for now, let's 2020 * be conservative and restart the timer of the new fp. 2021 */ 2022 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2023 2024 sctp->sctp_active = lbolt64; 2025 } 2026 2027 /* 2028 * This function is called by sctp_ss_rexmit() to create a packet 2029 * to be retransmitted to the given fp. The given meta and mp 2030 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2031 * first chunk to be retransmitted. This is also called when we want 2032 * to retransmit a zero window probe from sctp_rexmit() or when we 2033 * want to retransmit the zero window probe after the window has 2034 * opened from sctp_got_sack(). 2035 */ 2036 mblk_t * 2037 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2038 uint_t *packet_len) 2039 { 2040 uint32_t seglen = 0; 2041 uint16_t chunklen; 2042 int extra; 2043 mblk_t *nmp; 2044 mblk_t *head; 2045 mblk_t *fill; 2046 sctp_data_hdr_t *sdc; 2047 sctp_msg_hdr_t *mhdr; 2048 2049 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2050 seglen = ntohs(sdc->sdh_len); 2051 chunklen = seglen - sizeof (*sdc); 2052 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2053 extra = SCTP_ALIGN - extra; 2054 2055 nmp = dupmsg(*mp); 2056 if (nmp == NULL) 2057 return (NULL); 2058 if (extra > 0) { 2059 fill = sctp_get_padding(sctp, extra); 2060 if (fill != NULL) { 2061 linkb(nmp, fill); 2062 seglen += extra; 2063 } else { 2064 freemsg(nmp); 2065 return (NULL); 2066 } 2067 } 2068 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2069 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2070 if (head == NULL) { 2071 freemsg(nmp); 2072 return (NULL); 2073 } 2074 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2075 /* 2076 * Don't update the TSN if we are doing a Zero Win Probe. 2077 */ 2078 if (!sctp->sctp_zero_win_probe) 2079 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2080 *mp = (*mp)->b_next; 2081 2082 try_bundle: 2083 while (seglen < fp->sfa_pmss) { 2084 int32_t new_len; 2085 2086 /* 2087 * Go through the list to find more chunks to be bundled. 2088 * We should only retransmit sent by unack'ed chunks. Since 2089 * they were sent before, the peer's receive window should 2090 * be able to receive them. 2091 */ 2092 while (*mp != NULL) { 2093 /* Check if the chunk can be bundled. */ 2094 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2095 break; 2096 *mp = (*mp)->b_next; 2097 } 2098 /* Go to the next message. */ 2099 if (*mp == NULL) { 2100 for (*meta = (*meta)->b_next; *meta != NULL; 2101 *meta = (*meta)->b_next) { 2102 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2103 2104 if (SCTP_IS_MSG_ABANDONED(*meta) || 2105 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2106 sctp)) { 2107 continue; 2108 } 2109 2110 *mp = (*meta)->b_cont; 2111 goto try_bundle; 2112 } 2113 /* No more chunk to be bundled. */ 2114 break; 2115 } 2116 2117 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2118 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2119 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2120 break; 2121 new_len = ntohs(sdc->sdh_len); 2122 chunklen = new_len - sizeof (*sdc); 2123 2124 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2125 extra = SCTP_ALIGN - extra; 2126 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2127 break; 2128 if ((nmp = dupmsg(*mp)) == NULL) 2129 break; 2130 2131 if (extra > 0) { 2132 fill = sctp_get_padding(sctp, extra); 2133 if (fill != NULL) { 2134 linkb(nmp, fill); 2135 } else { 2136 freemsg(nmp); 2137 break; 2138 } 2139 } 2140 linkb(head, nmp); 2141 2142 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2143 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2144 /* 2145 * Don't update the TSN if we are doing a Zero Win Probe. 2146 */ 2147 if (!sctp->sctp_zero_win_probe) 2148 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2149 2150 seglen = new_len; 2151 *mp = (*mp)->b_next; 2152 } 2153 *packet_len = seglen; 2154 fp->rxt_unacked += seglen; 2155 return (head); 2156 } 2157 2158 /* 2159 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2160 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2161 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2162 * SACK. We retransmit unacked chunks without having to wait for another 2163 * timeout. The rationale is that the SACK should not be "partial" if all the 2164 * lost chunks have been retransmitted. Since the SACK is "partial," 2165 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2166 * be missing. It is better for us to retransmit them now instead 2167 * of waiting for a timeout. 2168 */ 2169 void 2170 sctp_ss_rexmit(sctp_t *sctp) 2171 { 2172 mblk_t *meta; 2173 mblk_t *mp; 2174 mblk_t *pkt; 2175 sctp_faddr_t *fp; 2176 uint_t pkt_len; 2177 uint32_t tot_wnd; 2178 sctp_data_hdr_t *sdc; 2179 int burst; 2180 sctp_stack_t *sctps = sctp->sctp_sctps; 2181 2182 ASSERT(!sctp->sctp_zero_win_probe); 2183 2184 /* 2185 * If the last cum ack is smaller than what we have just 2186 * retransmitted, simply return. 2187 */ 2188 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2189 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2190 else 2191 return; 2192 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2193 2194 /* 2195 * After a timer fires, sctp_current should be set to the new 2196 * fp where the retransmitted chunks are sent. 2197 */ 2198 fp = sctp->sctp_current; 2199 2200 /* 2201 * Since we are retransmitting, we only need to use cwnd to determine 2202 * how much we can send as we were allowed (by peer's receive window) 2203 * to send those retransmitted chunks previously when they are first 2204 * sent. If we record how much we have retransmitted but 2205 * unacknowledged using rxt_unacked, then the amount we can now send 2206 * is equal to cwnd minus rxt_unacked. 2207 * 2208 * The field rxt_unacked is incremented when we retransmit a packet 2209 * and decremented when we got a SACK acknowledging something. And 2210 * it is reset when the retransmission timer fires as we assume that 2211 * all packets have left the network after a timeout. If this 2212 * assumption is not true, it means that after a timeout, we can 2213 * get a SACK acknowledging more than rxt_unacked (its value only 2214 * contains what is retransmitted when the timer fires). So 2215 * rxt_unacked will become very big (it is an unsiged int so going 2216 * negative means that the value is huge). This is the reason we 2217 * always send at least 1 MSS bytes. 2218 * 2219 * The reason why we do not have an accurate count is that we 2220 * only know how many packets are outstanding (using the TSN numbers). 2221 * But we do not know how many bytes those packets contain. To 2222 * have an accurate count, we need to walk through the send list. 2223 * As it is not really important to have an accurate count during 2224 * retransmission, we skip this walk to save some time. This should 2225 * not make the retransmission too aggressive to cause congestion. 2226 */ 2227 if (fp->cwnd <= fp->rxt_unacked) 2228 tot_wnd = fp->sfa_pmss; 2229 else 2230 tot_wnd = fp->cwnd - fp->rxt_unacked; 2231 2232 /* Find the first unack'ed chunk */ 2233 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2234 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2235 2236 if (SCTP_IS_MSG_ABANDONED(meta) || 2237 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2238 continue; 2239 } 2240 2241 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2242 /* Again, this may not be possible */ 2243 if (!SCTP_CHUNK_ISSENT(mp)) 2244 return; 2245 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2246 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2247 goto found_msg; 2248 } 2249 } 2250 2251 /* Everything is abandoned... */ 2252 return; 2253 2254 found_msg: 2255 if (!fp->timer_running) 2256 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2257 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2258 if (pkt == NULL) { 2259 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 2260 return; 2261 } 2262 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2263 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2264 2265 /* 2266 * Path MTU is different from path we thought it would 2267 * be when we created chunks, or IP headers have grown. 2268 * Need to clear the DF bit. 2269 */ 2270 iph->ipha_fragment_offset_and_flags = 0; 2271 } 2272 sctp_set_iplen(sctp, pkt); 2273 sctp_add_sendq(sctp, pkt); 2274 2275 /* Check and see if there is more chunk to be retransmitted. */ 2276 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2277 meta == NULL) 2278 return; 2279 if (mp == NULL) 2280 meta = meta->b_next; 2281 if (meta == NULL) 2282 return; 2283 2284 /* Retransmit another packet if the window allows. */ 2285 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1; 2286 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2287 if (mp == NULL) 2288 mp = meta->b_cont; 2289 for (; mp != NULL; mp = mp->b_next) { 2290 /* Again, this may not be possible */ 2291 if (!SCTP_CHUNK_ISSENT(mp)) 2292 return; 2293 if (!SCTP_CHUNK_ISACKED(mp)) 2294 goto found_msg; 2295 } 2296 } 2297 } 2298