1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/stream.h> 30 #include <sys/cmn_err.h> 31 #define _SUN_TPI_VERSION 2 32 #include <sys/tihdr.h> 33 #include <sys/socket.h> 34 #include <sys/stropts.h> 35 #include <sys/strsun.h> 36 #include <sys/strsubr.h> 37 #include <sys/socketvar.h> 38 #include <inet/common.h> 39 #include <inet/mi.h> 40 #include <inet/ip.h> 41 #include <inet/ip6.h> 42 #include <inet/sctp_ip.h> 43 #include <inet/ipclassifier.h> 44 45 /* 46 * PR-SCTP comments. 47 * 48 * A message can expire before it gets to the transmit list (i.e. it is still 49 * in the unsent list - unchunked), after it gets to the transmit list, but 50 * before transmission has actually started, or after transmission has begun. 51 * Accordingly, we check for the status of a message in sctp_chunkify() when 52 * the message is being transferred from the unsent list to the transmit list; 53 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 54 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 55 * When we nuke a message in sctp_chunkify(), all we need to do is take it 56 * out of the unsent list and update sctp_unsent; when a message is deemed 57 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 58 * list, update sctp_unsent IFF transmission for the message has not yet begun 59 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 60 * message has started, then we cannot just take it out of the list, we need 61 * to send Forward TSN chunk to the peer so that the peer can clear its 62 * fragment list for this message. However, we cannot just send the Forward 63 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 64 * messages preceeding this abandoned message. So, we send a Forward TSN 65 * IFF all messages prior to this abandoned message has been SACKd, if not 66 * we defer sending the Forward TSN to sctp_cumack(), which will check for 67 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 68 * sctp_rexmit() when we check for retransmissions, we need to determine if 69 * the advanced peer ack point can be moved ahead, and if so, send a Forward 70 * TSN to the peer instead of retransmitting the chunk. Note that when 71 * we send a Forward TSN for a message, there may be yet unsent chunks for 72 * this message; we need to mark all such chunks as abandoned, so that 73 * sctp_cumack() can take the message out of the transmit list, additionally 74 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 75 * decremented when a message/chunk is deemed abandoned), sockfs needs to 76 * be notified so that it can adjust its idea of the queued message. 77 */ 78 79 #include "sctp_impl.h" 80 81 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 82 83 #ifdef DEBUG 84 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 85 #endif 86 87 /* 88 * Called to allocate a header mblk when sending data to SCTP. 89 * Data will follow in b_cont of this mblk. 90 */ 91 mblk_t * 92 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 93 int flags) 94 { 95 mblk_t *mp; 96 struct T_unitdata_req *tudr; 97 size_t size; 98 int error; 99 100 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 101 size = MAX(size, sizeof (sctp_msg_hdr_t)); 102 if (flags & SCTP_CAN_BLOCK) { 103 mp = allocb_wait(size, BPRI_MED, 0, &error); 104 } else { 105 mp = allocb(size, BPRI_MED); 106 } 107 if (mp) { 108 tudr = (struct T_unitdata_req *)mp->b_rptr; 109 tudr->PRIM_type = T_UNITDATA_REQ; 110 tudr->DEST_length = nlen; 111 tudr->DEST_offset = sizeof (*tudr); 112 tudr->OPT_length = clen; 113 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 114 _TPI_ALIGN_TOPT(nlen)); 115 if (nlen > 0) 116 bcopy(name, tudr + 1, nlen); 117 if (clen > 0) 118 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 119 mp->b_wptr += (tudr ->OPT_offset + clen); 120 mp->b_datap->db_type = M_PROTO; 121 } 122 return (mp); 123 } 124 125 /*ARGSUSED2*/ 126 int 127 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 128 { 129 sctp_faddr_t *fp = NULL; 130 struct T_unitdata_req *tudr; 131 int error = 0; 132 mblk_t *mproto = mp; 133 in6_addr_t *addr; 134 in6_addr_t tmpaddr; 135 uint16_t sid = sctp->sctp_def_stream; 136 uint32_t ppid = sctp->sctp_def_ppid; 137 uint32_t context = sctp->sctp_def_context; 138 uint16_t msg_flags = sctp->sctp_def_flags; 139 sctp_msg_hdr_t *sctp_msg_hdr; 140 uint32_t msg_len = 0; 141 uint32_t timetolive = sctp->sctp_def_timetolive; 142 143 ASSERT(DB_TYPE(mproto) == M_PROTO); 144 145 mp = mp->b_cont; 146 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 147 148 tudr = (struct T_unitdata_req *)mproto->b_rptr; 149 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 150 151 /* Get destination address, if specified */ 152 if (tudr->DEST_length > 0) { 153 sin_t *sin; 154 sin6_t *sin6; 155 156 sin = (struct sockaddr_in *) 157 (mproto->b_rptr + tudr->DEST_offset); 158 switch (sin->sin_family) { 159 case AF_INET: 160 if (tudr->DEST_length < sizeof (*sin)) { 161 return (EINVAL); 162 } 163 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 164 addr = &tmpaddr; 165 break; 166 case AF_INET6: 167 if (tudr->DEST_length < sizeof (*sin6)) { 168 return (EINVAL); 169 } 170 sin6 = (struct sockaddr_in6 *) 171 (mproto->b_rptr + tudr->DEST_offset); 172 addr = &sin6->sin6_addr; 173 break; 174 default: 175 return (EAFNOSUPPORT); 176 } 177 fp = sctp_lookup_faddr(sctp, addr); 178 if (fp == NULL) { 179 return (EINVAL); 180 } 181 } 182 /* Ancillary Data? */ 183 if (tudr->OPT_length > 0) { 184 struct cmsghdr *cmsg; 185 char *cend; 186 struct sctp_sndrcvinfo *sndrcv; 187 188 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 189 cend = ((char *)cmsg + tudr->OPT_length); 190 ASSERT(cend <= (char *)mproto->b_wptr); 191 192 for (;;) { 193 if ((char *)(cmsg + 1) > cend || 194 ((char *)cmsg + cmsg->cmsg_len) > cend) { 195 break; 196 } 197 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 198 (cmsg->cmsg_type == SCTP_SNDRCV)) { 199 if (cmsg->cmsg_len < 200 (sizeof (*sndrcv) + sizeof (*cmsg))) { 201 return (EINVAL); 202 } 203 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 204 sid = sndrcv->sinfo_stream; 205 msg_flags = sndrcv->sinfo_flags; 206 ppid = sndrcv->sinfo_ppid; 207 context = sndrcv->sinfo_context; 208 timetolive = sndrcv->sinfo_timetolive; 209 break; 210 } 211 if (cmsg->cmsg_len > 0) 212 cmsg = CMSG_NEXT(cmsg); 213 else 214 break; 215 } 216 } 217 if (msg_flags & MSG_ABORT) { 218 if (mp && mp->b_cont) { 219 mblk_t *pump = msgpullup(mp, -1); 220 if (!pump) { 221 return (ENOMEM); 222 } 223 freemsg(mp); 224 mp = pump; 225 mproto->b_cont = mp; 226 } 227 RUN_SCTP(sctp); 228 sctp_user_abort(sctp, mp); 229 freemsg(mproto); 230 goto process_sendq; 231 } 232 if (mp == NULL) 233 goto done; 234 235 RUN_SCTP(sctp); 236 237 /* Reject any new data requests if we are shutting down */ 238 if (sctp->sctp_state > SCTPS_ESTABLISHED || 239 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) { 240 error = EPIPE; 241 goto unlock_done; 242 } 243 244 /* Re-use the mproto to store relevant info. */ 245 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 246 247 mproto->b_rptr = mproto->b_datap->db_base; 248 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 249 250 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 251 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 252 sctp_msg_hdr->smh_context = context; 253 sctp_msg_hdr->smh_sid = sid; 254 sctp_msg_hdr->smh_ppid = ppid; 255 sctp_msg_hdr->smh_flags = msg_flags; 256 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 257 sctp_msg_hdr->smh_tob = lbolt64; 258 for (; mp != NULL; mp = mp->b_cont) 259 msg_len += MBLKL(mp); 260 sctp_msg_hdr->smh_msglen = msg_len; 261 262 /* User requested specific destination */ 263 SCTP_SET_CHUNK_DEST(mproto, fp); 264 265 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 266 sid >= sctp->sctp_num_ostr) { 267 /* Send sendfail event */ 268 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 269 B_FALSE); 270 error = EINVAL; 271 goto unlock_done; 272 } 273 274 /* no data */ 275 if (msg_len == 0) { 276 sctp_sendfail_event(sctp, dupmsg(mproto), 277 SCTP_ERR_NO_USR_DATA, B_FALSE); 278 error = EINVAL; 279 goto unlock_done; 280 } 281 282 /* Add it to the unsent list */ 283 if (sctp->sctp_xmit_unsent == NULL) { 284 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 285 } else { 286 sctp->sctp_xmit_unsent_tail->b_next = mproto; 287 sctp->sctp_xmit_unsent_tail = mproto; 288 } 289 sctp->sctp_unsent += msg_len; 290 BUMP_LOCAL(sctp->sctp_msgcount); 291 /* 292 * Notify sockfs if the tx queue is full. 293 */ 294 if (SCTP_TXQ_LEN(sctp) >= sctp->sctp_xmit_hiwater) { 295 sctp->sctp_txq_full = 1; 296 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE); 297 } 298 if (sctp->sctp_state == SCTPS_ESTABLISHED) 299 sctp_output(sctp, UINT_MAX); 300 process_sendq: 301 WAKE_SCTP(sctp); 302 sctp_process_sendq(sctp); 303 return (0); 304 unlock_done: 305 WAKE_SCTP(sctp); 306 done: 307 return (error); 308 } 309 310 void 311 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send) 312 { 313 mblk_t *mp; 314 mblk_t *chunk_mp; 315 mblk_t *chunk_head; 316 mblk_t *chunk_hdr; 317 mblk_t *chunk_tail = NULL; 318 int count; 319 int chunksize; 320 sctp_data_hdr_t *sdc; 321 mblk_t *mdblk = sctp->sctp_xmit_unsent; 322 sctp_faddr_t *fp; 323 sctp_faddr_t *fp1; 324 size_t xtralen; 325 sctp_msg_hdr_t *msg_hdr; 326 sctp_stack_t *sctps = sctp->sctp_sctps; 327 328 fp = SCTP_CHUNK_DEST(mdblk); 329 if (fp == NULL) 330 fp = sctp->sctp_current; 331 if (fp->isv4) 332 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra + 333 sizeof (*sdc); 334 else 335 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra + 336 sizeof (*sdc); 337 count = chunksize = first_len - sizeof (*sdc); 338 nextmsg: 339 chunk_mp = mdblk->b_cont; 340 341 /* 342 * If this partially chunked, we ignore the first_len for now 343 * and use the one already present. For the unchunked bits, we 344 * use the length of the last chunk. 345 */ 346 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 347 int chunk_len; 348 349 ASSERT(chunk_mp->b_next != NULL); 350 mdblk->b_cont = chunk_mp->b_next; 351 chunk_mp->b_next = NULL; 352 SCTP_MSG_CLEAR_CHUNKED(mdblk); 353 mp = mdblk->b_cont; 354 while (mp->b_next != NULL) 355 mp = mp->b_next; 356 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 357 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 358 count = chunksize = fp->sfa_pmss - chunk_len; 359 else 360 count = chunksize = fp->sfa_pmss; 361 count = chunksize = count - sizeof (*sdc); 362 } else { 363 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 364 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 365 sctp->sctp_xmit_unsent = mdblk->b_next; 366 if (sctp->sctp_xmit_unsent == NULL) 367 sctp->sctp_xmit_unsent_tail = NULL; 368 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 369 sctp->sctp_unsent -= msg_hdr->smh_msglen; 370 mdblk->b_next = NULL; 371 BUMP_LOCAL(sctp->sctp_prsctpdrop); 372 /* 373 * Update ULP the amount of queued data, which is 374 * sent-unack'ed + unsent. 375 */ 376 if (!SCTP_IS_DETACHED(sctp)) 377 SCTP_TXQ_UPDATE(sctp); 378 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 379 goto try_next; 380 } 381 mdblk->b_cont = NULL; 382 } 383 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 384 nextchunk: 385 chunk_head = chunk_mp; 386 chunk_tail = NULL; 387 388 /* Skip as many mblk's as we need */ 389 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 390 count -= MBLKL(chunk_mp); 391 chunk_tail = chunk_mp; 392 chunk_mp = chunk_mp->b_cont; 393 } 394 /* Split the chain, if needed */ 395 if (chunk_mp != NULL) { 396 if (count > 0) { 397 mblk_t *split_mp = dupb(chunk_mp); 398 399 if (split_mp == NULL) { 400 if (mdblk->b_cont == NULL) { 401 mdblk->b_cont = chunk_head; 402 } else { 403 SCTP_MSG_SET_CHUNKED(mdblk); 404 ASSERT(chunk_head->b_next == NULL); 405 chunk_head->b_next = mdblk->b_cont; 406 mdblk->b_cont = chunk_head; 407 } 408 return; 409 } 410 if (chunk_tail != NULL) { 411 chunk_tail->b_cont = split_mp; 412 chunk_tail = chunk_tail->b_cont; 413 } else { 414 chunk_head = chunk_tail = split_mp; 415 } 416 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 417 chunk_mp->b_rptr = chunk_tail->b_wptr; 418 count = 0; 419 } else if (chunk_tail == NULL) { 420 goto next; 421 } else { 422 chunk_tail->b_cont = NULL; 423 } 424 } 425 /* Alloc chunk hdr, if needed */ 426 if (DB_REF(chunk_head) > 1 || 427 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 428 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 429 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 430 if (mdblk->b_cont == NULL) { 431 if (chunk_mp != NULL) 432 linkb(chunk_head, chunk_mp); 433 mdblk->b_cont = chunk_head; 434 } else { 435 SCTP_MSG_SET_CHUNKED(mdblk); 436 if (chunk_mp != NULL) 437 linkb(chunk_head, chunk_mp); 438 ASSERT(chunk_head->b_next == NULL); 439 chunk_head->b_next = mdblk->b_cont; 440 mdblk->b_cont = chunk_head; 441 } 442 return; 443 } 444 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 445 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 446 chunk_hdr->b_cont = chunk_head; 447 } else { 448 chunk_hdr = chunk_head; 449 chunk_hdr->b_rptr -= sizeof (*sdc); 450 } 451 ASSERT(chunk_hdr->b_datap->db_ref == 1); 452 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 453 sdc->sdh_id = CHUNK_DATA; 454 sdc->sdh_flags = 0; 455 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 456 ASSERT(sdc->sdh_len); 457 sdc->sdh_sid = htons(msg_hdr->smh_sid); 458 /* 459 * We defer assigning the SSN just before sending the chunk, else 460 * if we drop the chunk in sctp_get_msg_to_send(), we would need 461 * to send a Forward TSN to let the peer know. Some more comments 462 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 463 */ 464 sdc->sdh_payload_id = msg_hdr->smh_ppid; 465 466 if (mdblk->b_cont == NULL) { 467 mdblk->b_cont = chunk_hdr; 468 SCTP_DATA_SET_BBIT(sdc); 469 } else { 470 mp = mdblk->b_cont; 471 while (mp->b_next != NULL) 472 mp = mp->b_next; 473 mp->b_next = chunk_hdr; 474 } 475 476 bytes_to_send -= (chunksize - count); 477 if (chunk_mp != NULL) { 478 next: 479 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 480 goto nextchunk; 481 } 482 SCTP_DATA_SET_EBIT(sdc); 483 sctp->sctp_xmit_unsent = mdblk->b_next; 484 if (mdblk->b_next == NULL) { 485 sctp->sctp_xmit_unsent_tail = NULL; 486 } 487 mdblk->b_next = NULL; 488 489 if (sctp->sctp_xmit_tail == NULL) { 490 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 491 } else { 492 mp = sctp->sctp_xmit_tail; 493 while (mp->b_next != NULL) 494 mp = mp->b_next; 495 mp->b_next = mdblk; 496 mdblk->b_prev = mp; 497 } 498 try_next: 499 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 500 mdblk = sctp->sctp_xmit_unsent; 501 fp1 = SCTP_CHUNK_DEST(mdblk); 502 if (fp1 == NULL) 503 fp1 = sctp->sctp_current; 504 if (fp == fp1) { 505 size_t len = MBLKL(mdblk->b_cont); 506 if ((count > 0) && 507 ((len > fp->sfa_pmss - sizeof (*sdc)) || 508 (len <= count))) { 509 count -= sizeof (*sdc); 510 count = chunksize = count - (count & 0x3); 511 } else { 512 count = chunksize = fp->sfa_pmss - 513 sizeof (*sdc); 514 } 515 } else { 516 if (fp1->isv4) 517 xtralen = sctp->sctp_hdr_len; 518 else 519 xtralen = sctp->sctp_hdr6_len; 520 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc); 521 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 522 fp = fp1; 523 } 524 goto nextmsg; 525 } 526 } 527 528 void 529 sctp_free_msg(mblk_t *ump) 530 { 531 mblk_t *mp, *nmp; 532 533 for (mp = ump->b_cont; mp; mp = nmp) { 534 nmp = mp->b_next; 535 mp->b_next = mp->b_prev = NULL; 536 freemsg(mp); 537 } 538 ASSERT(!ump->b_prev); 539 ump->b_next = NULL; 540 freeb(ump); 541 } 542 543 mblk_t * 544 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 545 int *error) 546 { 547 int hdrlen; 548 char *hdr; 549 int isv4 = fp->isv4; 550 sctp_stack_t *sctps = sctp->sctp_sctps; 551 552 if (error != NULL) 553 *error = 0; 554 555 if (isv4) { 556 hdrlen = sctp->sctp_hdr_len; 557 hdr = sctp->sctp_iphc; 558 } else { 559 hdrlen = sctp->sctp_hdr6_len; 560 hdr = sctp->sctp_iphc6; 561 } 562 /* 563 * A null fp->ire could mean that the address is 'down'. Similarly, 564 * it is possible that the address went down, we tried to send an 565 * heartbeat and ended up setting fp->saddr as unspec because we 566 * didn't have any usable source address. In either case 567 * sctp_get_ire() will try find an IRE, if available, and set 568 * the source address, if needed. If we still don't have any 569 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 570 * we return EHOSTUNREACH. 571 */ 572 if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 573 sctp_get_ire(sctp, fp); 574 if (fp->state == SCTP_FADDRS_UNREACH) { 575 if (error != NULL) 576 *error = EHOSTUNREACH; 577 return (NULL); 578 } 579 } 580 /* Copy in IP header. */ 581 if ((mp->b_rptr - mp->b_datap->db_base) < 582 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || 583 !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { 584 mblk_t *nmp; 585 586 /* 587 * This can happen if IP headers are adjusted after 588 * data was moved into chunks, or during retransmission, 589 * or things like snoop is running. 590 */ 591 nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen, 592 CONN_CRED(sctp->sctp_connp), sctp->sctp_cpid); 593 if (nmp == NULL) { 594 if (error != NULL) 595 *error = ENOMEM; 596 return (NULL); 597 } 598 nmp->b_rptr += sctps->sctps_wroff_xtra; 599 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 600 nmp->b_cont = mp; 601 mp = nmp; 602 } else { 603 mp->b_rptr -= (hdrlen + sacklen); 604 mblk_setcred(mp, CONN_CRED(sctp->sctp_connp), sctp->sctp_cpid); 605 } 606 bcopy(hdr, mp->b_rptr, hdrlen); 607 if (sacklen) { 608 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 609 } 610 if (fp != sctp->sctp_current) { 611 /* change addresses in header */ 612 if (isv4) { 613 ipha_t *iph = (ipha_t *)mp->b_rptr; 614 615 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 616 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 617 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 618 iph->ipha_src); 619 } else if (sctp->sctp_bound_to_all) { 620 iph->ipha_src = INADDR_ANY; 621 } 622 } else { 623 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 624 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 625 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 626 } else if (sctp->sctp_bound_to_all) { 627 V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src); 628 } 629 } 630 } 631 /* 632 * IP will not free this IRE if it is condemned. SCTP needs to 633 * free it. 634 */ 635 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 636 IRE_REFRELE_NOTR(fp->ire); 637 fp->ire = NULL; 638 } 639 640 /* Stash the conn and ire ptr info for IP */ 641 SCTP_STASH_IPINFO(mp, fp->ire); 642 643 return (mp); 644 } 645 646 /* 647 * SCTP requires every chunk to be padded so that the total length 648 * is a multiple of SCTP_ALIGN. This function returns a mblk with 649 * the specified pad length. 650 */ 651 static mblk_t * 652 sctp_get_padding(sctp_t *sctp, int pad) 653 { 654 mblk_t *fill; 655 656 ASSERT(pad < SCTP_ALIGN); 657 ASSERT(sctp->sctp_pad_mp != NULL); 658 if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) { 659 fill->b_wptr += pad; 660 return (fill); 661 } 662 663 /* 664 * The memory saving path of reusing the sctp_pad_mp 665 * fails may be because it has been dupb() too 666 * many times (DBLK_REFMAX). Use the memory consuming 667 * path of allocating the pad mblk. 668 */ 669 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 670 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 671 *(int32_t *)fill->b_rptr = 0; 672 fill->b_wptr += pad; 673 } 674 return (fill); 675 } 676 677 static mblk_t * 678 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 679 { 680 mblk_t *meta; 681 mblk_t *start_mp = NULL; 682 mblk_t *end_mp = NULL; 683 mblk_t *mp, *nmp; 684 mblk_t *fill; 685 sctp_data_hdr_t *sdh; 686 int msglen; 687 int extra; 688 sctp_msg_hdr_t *msg_hdr; 689 sctp_faddr_t *old_fp = NULL; 690 sctp_faddr_t *chunk_fp; 691 sctp_stack_t *sctps = sctp->sctp_sctps; 692 693 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 694 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 695 if (SCTP_IS_MSG_ABANDONED(meta) || 696 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 697 continue; 698 } 699 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 700 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 701 /* 702 * Use the same peer address to do fast 703 * retransmission. If the original peer 704 * address is dead, switch to the current 705 * one. Record the old one so that we 706 * will pick the chunks sent to the old 707 * one for fast retransmission. 708 */ 709 chunk_fp = SCTP_CHUNK_DEST(mp); 710 if (*fp == NULL) { 711 *fp = chunk_fp; 712 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 713 old_fp = *fp; 714 *fp = sctp->sctp_current; 715 } 716 } else if (old_fp == NULL && *fp != chunk_fp) { 717 continue; 718 } else if (old_fp != NULL && 719 old_fp != chunk_fp) { 720 continue; 721 } 722 723 sdh = (sctp_data_hdr_t *)mp->b_rptr; 724 msglen = ntohs(sdh->sdh_len); 725 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 726 extra = SCTP_ALIGN - extra; 727 } 728 729 /* 730 * We still return at least the first message 731 * even if that message cannot fit in as 732 * PMTU may have changed. 733 */ 734 if (*total + msglen + extra > 735 (*fp)->sfa_pmss && start_mp != NULL) { 736 return (start_mp); 737 } 738 if ((nmp = dupmsg(mp)) == NULL) 739 return (start_mp); 740 if (extra > 0) { 741 fill = sctp_get_padding(sctp, extra); 742 if (fill != NULL) { 743 linkb(nmp, fill); 744 } else { 745 return (start_mp); 746 } 747 } 748 BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); 749 BUMP_LOCAL(sctp->sctp_rxtchunks); 750 SCTP_CHUNK_CLEAR_REXMIT(mp); 751 if (start_mp == NULL) { 752 start_mp = nmp; 753 } else { 754 linkb(end_mp, nmp); 755 } 756 end_mp = nmp; 757 *total += msglen + extra; 758 dprint(2, ("sctp_find_fast_rexmit_mblks: " 759 "tsn %x\n", sdh->sdh_tsn)); 760 } 761 } 762 } 763 /* Clear the flag as there is no more message to be fast rexmitted. */ 764 sctp->sctp_chk_fast_rexmit = B_FALSE; 765 return (start_mp); 766 } 767 768 /* A debug function just to make sure that a mblk chain is not broken */ 769 #ifdef DEBUG 770 static boolean_t 771 sctp_verify_chain(mblk_t *head, mblk_t *tail) 772 { 773 mblk_t *mp = head; 774 775 if (head == NULL || tail == NULL) 776 return (B_TRUE); 777 while (mp != NULL) { 778 if (mp == tail) 779 return (B_TRUE); 780 mp = mp->b_next; 781 } 782 return (B_FALSE); 783 } 784 #endif 785 786 /* 787 * Gets the next unsent chunk to transmit. Messages that are abandoned are 788 * skipped. A message can be abandoned if it has a non-zero timetolive and 789 * transmission has not yet started or if it is a partially reliable 790 * message and its time is up (assuming we are PR-SCTP aware). 791 * 'cansend' is used to determine if need to try and chunkify messages from 792 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 793 * 794 * firstseg indicates the space already used, cansend represents remaining 795 * space in the window, ((sfa_pmss - firstseg) can therefore reasonably 796 * be used to compute the cansend arg). 797 */ 798 mblk_t * 799 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 800 int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp) 801 { 802 mblk_t *mp1; 803 sctp_msg_hdr_t *msg_hdr; 804 mblk_t *tmp_meta; 805 sctp_faddr_t *fp1; 806 807 ASSERT(error != NULL && mp != NULL); 808 *error = 0; 809 810 ASSERT(sctp->sctp_current != NULL); 811 812 chunkified: 813 while (meta != NULL) { 814 tmp_meta = meta->b_next; 815 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 816 mp1 = meta->b_cont; 817 if (SCTP_IS_MSG_ABANDONED(meta)) 818 goto next_msg; 819 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 820 while (mp1 != NULL) { 821 if (SCTP_CHUNK_CANSEND(mp1)) { 822 *mp = mp1; 823 #ifdef DEBUG 824 ASSERT(sctp_verify_chain( 825 sctp->sctp_xmit_head, meta)); 826 #endif 827 return (meta); 828 } 829 mp1 = mp1->b_next; 830 } 831 goto next_msg; 832 } 833 /* 834 * If we come here and the first chunk is sent, then we 835 * we are PR-SCTP aware, in which case if the cumulative 836 * TSN has moved upto or beyond the first chunk (which 837 * means all the previous messages have been cumulative 838 * SACK'd), then we send a Forward TSN with the last 839 * chunk that was sent in this message. If we can't send 840 * a Forward TSN because previous non-abandoned messages 841 * have not been acked then we will defer the Forward TSN 842 * to sctp_rexmit() or sctp_cumack(). 843 */ 844 if (SCTP_CHUNK_ISSENT(mp1)) { 845 *error = sctp_check_abandoned_msg(sctp, meta); 846 if (*error != 0) { 847 #ifdef DEBUG 848 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 849 sctp->sctp_xmit_tail)); 850 #endif 851 return (NULL); 852 } 853 goto next_msg; 854 } 855 BUMP_LOCAL(sctp->sctp_prsctpdrop); 856 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 857 if (meta->b_prev == NULL) { 858 ASSERT(sctp->sctp_xmit_head == meta); 859 sctp->sctp_xmit_head = tmp_meta; 860 if (sctp->sctp_xmit_tail == meta) 861 sctp->sctp_xmit_tail = tmp_meta; 862 meta->b_next = NULL; 863 if (tmp_meta != NULL) 864 tmp_meta->b_prev = NULL; 865 } else if (meta->b_next == NULL) { 866 if (sctp->sctp_xmit_tail == meta) 867 sctp->sctp_xmit_tail = meta->b_prev; 868 meta->b_prev->b_next = NULL; 869 meta->b_prev = NULL; 870 } else { 871 meta->b_prev->b_next = tmp_meta; 872 tmp_meta->b_prev = meta->b_prev; 873 if (sctp->sctp_xmit_tail == meta) 874 sctp->sctp_xmit_tail = tmp_meta; 875 meta->b_prev = NULL; 876 meta->b_next = NULL; 877 } 878 sctp->sctp_unsent -= msg_hdr->smh_msglen; 879 /* 880 * Update ULP the amount of queued data, which is 881 * sent-unack'ed + unsent. 882 */ 883 if (!SCTP_IS_DETACHED(sctp)) 884 SCTP_TXQ_UPDATE(sctp); 885 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 886 next_msg: 887 meta = tmp_meta; 888 } 889 /* chunkify, if needed */ 890 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 891 ASSERT(sctp->sctp_unsent > 0); 892 if (fp == NULL) { 893 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 894 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 895 fp = sctp->sctp_current; 896 } else { 897 /* 898 * If user specified destination, try to honor that. 899 */ 900 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 901 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 902 fp1 != fp) { 903 goto chunk_done; 904 } 905 } 906 sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend); 907 if ((meta = sctp->sctp_xmit_tail) == NULL) 908 goto chunk_done; 909 /* 910 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 911 * new chunk(s) to the tail, so we need to skip the 912 * sctp_xmit_tail, which would have already been processed. 913 * This could happen when there is unacked chunks, but 914 * nothing new to send. 915 * When sctp_chunkify() is called when the transmit queue 916 * is empty then we need to start from sctp_xmit_tail. 917 */ 918 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 919 #ifdef DEBUG 920 mp1 = sctp->sctp_xmit_tail->b_cont; 921 while (mp1 != NULL) { 922 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 923 mp1 = mp1->b_next; 924 } 925 #endif 926 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 927 goto chunk_done; 928 } 929 goto chunkified; 930 } 931 chunk_done: 932 #ifdef DEBUG 933 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 934 #endif 935 return (NULL); 936 } 937 938 void 939 sctp_fast_rexmit(sctp_t *sctp) 940 { 941 mblk_t *mp, *head; 942 int pktlen = 0; 943 sctp_faddr_t *fp = NULL; 944 sctp_stack_t *sctps = sctp->sctp_sctps; 945 946 ASSERT(sctp->sctp_xmit_head != NULL); 947 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 948 if (mp == NULL) { 949 SCTP_KSTAT(sctps, sctp_fr_not_found); 950 return; 951 } 952 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 953 freemsg(mp); 954 SCTP_KSTAT(sctps, sctp_fr_add_hdr); 955 return; 956 } 957 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 958 ipha_t *iph = (ipha_t *)head->b_rptr; 959 960 iph->ipha_fragment_offset_and_flags = 0; 961 } 962 963 sctp_set_iplen(sctp, head); 964 sctp_add_sendq(sctp, head); 965 sctp->sctp_active = fp->lastactive = lbolt64; 966 } 967 968 void 969 sctp_output(sctp_t *sctp, uint_t num_pkt) 970 { 971 mblk_t *mp = NULL; 972 mblk_t *nmp; 973 mblk_t *head; 974 mblk_t *meta = sctp->sctp_xmit_tail; 975 mblk_t *fill = NULL; 976 uint16_t chunklen; 977 uint32_t cansend; 978 int32_t seglen; 979 int32_t xtralen; 980 int32_t sacklen; 981 int32_t pad = 0; 982 int32_t pathmax; 983 int extra; 984 int64_t now = lbolt64; 985 sctp_faddr_t *fp; 986 sctp_faddr_t *lfp; 987 sctp_data_hdr_t *sdc; 988 int error; 989 boolean_t notsent = B_TRUE; 990 sctp_stack_t *sctps = sctp->sctp_sctps; 991 992 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 993 sacklen = 0; 994 } else { 995 /* send a SACK chunk */ 996 sacklen = sizeof (sctp_chunk_hdr_t) + 997 sizeof (sctp_sack_chunk_t) + 998 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 999 lfp = sctp->sctp_lastdata; 1000 ASSERT(lfp != NULL); 1001 if (lfp->state != SCTP_FADDRS_ALIVE) 1002 lfp = sctp->sctp_current; 1003 } 1004 1005 cansend = sctp->sctp_frwnd; 1006 if (sctp->sctp_unsent < cansend) 1007 cansend = sctp->sctp_unsent; 1008 1009 /* 1010 * Start persist timer if unable to send or when 1011 * trying to send into a zero window. This timer 1012 * ensures the blocked send attempt is retried. 1013 */ 1014 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1015 (sctp->sctp_unacked != 0) && 1016 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1017 !sctp->sctp_ndelay || 1018 (cansend == 0 && sctp->sctp_unacked == 0 && 1019 sctp->sctp_unsent != 0)) { 1020 head = NULL; 1021 fp = sctp->sctp_current; 1022 goto unsent_data; 1023 } 1024 if (meta != NULL) 1025 mp = meta->b_cont; 1026 while (cansend > 0 && num_pkt-- != 0) { 1027 pad = 0; 1028 1029 /* 1030 * Find first segment eligible for transmit. 1031 */ 1032 while (mp != NULL) { 1033 if (SCTP_CHUNK_CANSEND(mp)) 1034 break; 1035 mp = mp->b_next; 1036 } 1037 if (mp == NULL) { 1038 meta = sctp_get_msg_to_send(sctp, &mp, 1039 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1040 cansend, NULL); 1041 if (error != 0 || meta == NULL) { 1042 head = NULL; 1043 fp = sctp->sctp_current; 1044 goto unsent_data; 1045 } 1046 sctp->sctp_xmit_tail = meta; 1047 } 1048 1049 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1050 seglen = ntohs(sdc->sdh_len); 1051 xtralen = sizeof (*sdc); 1052 chunklen = seglen - xtralen; 1053 1054 /* 1055 * Check rwnd. 1056 */ 1057 if (chunklen > cansend) { 1058 head = NULL; 1059 fp = SCTP_CHUNK_DEST(meta); 1060 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1061 fp = sctp->sctp_current; 1062 goto unsent_data; 1063 } 1064 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1065 extra = SCTP_ALIGN - extra; 1066 1067 /* 1068 * Pick destination address, and check cwnd. 1069 */ 1070 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1071 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1072 /* 1073 * Only include SACK chunk if it can be bundled 1074 * with a data chunk, and sent to sctp_lastdata. 1075 */ 1076 pathmax = lfp->cwnd - lfp->suna; 1077 1078 fp = lfp; 1079 if ((nmp = dupmsg(mp)) == NULL) { 1080 head = NULL; 1081 goto unsent_data; 1082 } 1083 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1084 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1085 &error); 1086 if (head == NULL) { 1087 /* 1088 * If none of the source addresses are 1089 * available (i.e error == EHOSTUNREACH), 1090 * pretend we have sent the data. We will 1091 * eventually time out trying to retramsmit 1092 * the data if the interface never comes up. 1093 * If we have already sent some stuff (i.e., 1094 * notsent is B_FALSE) then we are fine, else 1095 * just mark this packet as sent. 1096 */ 1097 if (notsent && error == EHOSTUNREACH) { 1098 SCTP_CHUNK_SENT(sctp, mp, sdc, 1099 fp, chunklen, meta); 1100 } 1101 freemsg(nmp); 1102 SCTP_KSTAT(sctps, sctp_output_failed); 1103 goto unsent_data; 1104 } 1105 seglen += sacklen; 1106 xtralen += sacklen; 1107 sacklen = 0; 1108 } else { 1109 fp = SCTP_CHUNK_DEST(meta); 1110 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1111 fp = sctp->sctp_current; 1112 /* 1113 * If we haven't sent data to this destination for 1114 * a while, do slow start again. 1115 */ 1116 if (now - fp->lastactive > fp->rto) { 1117 SET_CWND(fp, fp->sfa_pmss, 1118 sctps->sctps_slow_start_after_idle); 1119 } 1120 1121 pathmax = fp->cwnd - fp->suna; 1122 if (seglen + extra > pathmax) { 1123 head = NULL; 1124 goto unsent_data; 1125 } 1126 if ((nmp = dupmsg(mp)) == NULL) { 1127 head = NULL; 1128 goto unsent_data; 1129 } 1130 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1131 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1132 if (head == NULL) { 1133 /* 1134 * If none of the source addresses are 1135 * available (i.e error == EHOSTUNREACH), 1136 * pretend we have sent the data. We will 1137 * eventually time out trying to retramsmit 1138 * the data if the interface never comes up. 1139 * If we have already sent some stuff (i.e., 1140 * notsent is B_FALSE) then we are fine, else 1141 * just mark this packet as sent. 1142 */ 1143 if (notsent && error == EHOSTUNREACH) { 1144 SCTP_CHUNK_SENT(sctp, mp, sdc, 1145 fp, chunklen, meta); 1146 } 1147 freemsg(nmp); 1148 SCTP_KSTAT(sctps, sctp_output_failed); 1149 goto unsent_data; 1150 } 1151 } 1152 fp->lastactive = now; 1153 if (pathmax > fp->sfa_pmss) 1154 pathmax = fp->sfa_pmss; 1155 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1156 mp = mp->b_next; 1157 1158 /* Use this chunk to measure RTT? */ 1159 if (sctp->sctp_out_time == 0) { 1160 sctp->sctp_out_time = now; 1161 sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; 1162 ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); 1163 } 1164 if (extra > 0) { 1165 fill = sctp_get_padding(sctp, extra); 1166 if (fill != NULL) { 1167 linkb(head, fill); 1168 pad = extra; 1169 seglen += extra; 1170 } else { 1171 goto unsent_data; 1172 } 1173 } 1174 /* See if we can bundle more. */ 1175 while (seglen < pathmax) { 1176 int32_t new_len; 1177 int32_t new_xtralen; 1178 1179 while (mp != NULL) { 1180 if (SCTP_CHUNK_CANSEND(mp)) 1181 break; 1182 mp = mp->b_next; 1183 } 1184 if (mp == NULL) { 1185 meta = sctp_get_msg_to_send(sctp, &mp, 1186 meta->b_next, &error, seglen, 1187 (seglen - xtralen) >= cansend ? 0 : 1188 cansend - seglen, fp); 1189 if (error != 0 || meta == NULL) 1190 break; 1191 sctp->sctp_xmit_tail = meta; 1192 } 1193 ASSERT(mp != NULL); 1194 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1195 fp != SCTP_CHUNK_DEST(meta)) { 1196 break; 1197 } 1198 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1199 chunklen = ntohs(sdc->sdh_len); 1200 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1201 extra = SCTP_ALIGN - extra; 1202 1203 new_len = seglen + chunklen; 1204 new_xtralen = xtralen + sizeof (*sdc); 1205 chunklen -= sizeof (*sdc); 1206 1207 if (new_len - new_xtralen > cansend || 1208 new_len + extra > pathmax) { 1209 break; 1210 } 1211 if ((nmp = dupmsg(mp)) == NULL) 1212 break; 1213 if (extra > 0) { 1214 fill = sctp_get_padding(sctp, extra); 1215 if (fill != NULL) { 1216 pad += extra; 1217 new_len += extra; 1218 linkb(nmp, fill); 1219 } else { 1220 freemsg(nmp); 1221 break; 1222 } 1223 } 1224 seglen = new_len; 1225 xtralen = new_xtralen; 1226 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1227 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1228 linkb(head, nmp); 1229 mp = mp->b_next; 1230 } 1231 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1232 ipha_t *iph = (ipha_t *)head->b_rptr; 1233 1234 /* 1235 * Path MTU is different from what we thought it would 1236 * be when we created chunks, or IP headers have grown. 1237 * Need to clear the DF bit. 1238 */ 1239 iph->ipha_fragment_offset_and_flags = 0; 1240 } 1241 /* xmit segment */ 1242 ASSERT(cansend >= seglen - pad - xtralen); 1243 cansend -= (seglen - pad - xtralen); 1244 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1245 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1246 seglen - xtralen, ntohl(sdc->sdh_tsn), 1247 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1248 cansend, sctp->sctp_lastack_rxd)); 1249 sctp_set_iplen(sctp, head); 1250 sctp_add_sendq(sctp, head); 1251 /* arm rto timer (if not set) */ 1252 if (!fp->timer_running) 1253 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1254 notsent = B_FALSE; 1255 } 1256 sctp->sctp_active = now; 1257 return; 1258 unsent_data: 1259 /* arm persist timer (if rto timer not set) */ 1260 if (!fp->timer_running) 1261 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1262 if (head != NULL) 1263 freemsg(head); 1264 } 1265 1266 /* 1267 * The following two functions initialize and destroy the cache 1268 * associated with the sets used for PR-SCTP. 1269 */ 1270 void 1271 sctp_ftsn_sets_init(void) 1272 { 1273 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1274 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1275 NULL, 0); 1276 } 1277 1278 void 1279 sctp_ftsn_sets_fini(void) 1280 { 1281 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1282 } 1283 1284 1285 /* Free PR-SCTP sets */ 1286 void 1287 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1288 { 1289 sctp_ftsn_set_t *p; 1290 1291 while (s != NULL) { 1292 p = s->next; 1293 s->next = NULL; 1294 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1295 s = p; 1296 } 1297 } 1298 1299 /* 1300 * Given a message meta block, meta, this routine creates or modifies 1301 * the set that will be used to generate a Forward TSN chunk. If the 1302 * entry for stream id, sid, for this message already exists, the 1303 * sequence number, ssn, is updated if it is greater than the existing 1304 * one. If an entry for this sid does not exist, one is created if 1305 * the size does not exceed fp->sfa_pmss. We return false in case 1306 * or an error. 1307 */ 1308 boolean_t 1309 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1310 uint_t *nsets, uint32_t *slen) 1311 { 1312 sctp_ftsn_set_t *p; 1313 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1314 uint16_t sid = htons(msg_hdr->smh_sid); 1315 /* msg_hdr->smh_ssn is already in NBO */ 1316 uint16_t ssn = msg_hdr->smh_ssn; 1317 1318 ASSERT(s != NULL && nsets != NULL); 1319 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1320 1321 if (*s == NULL) { 1322 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1323 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1324 if (*s == NULL) 1325 return (B_FALSE); 1326 (*s)->ftsn_entries.ftsn_sid = sid; 1327 (*s)->ftsn_entries.ftsn_ssn = ssn; 1328 (*s)->next = NULL; 1329 *nsets = 1; 1330 *slen += sizeof (uint32_t); 1331 return (B_TRUE); 1332 } 1333 for (p = *s; p->next != NULL; p = p->next) { 1334 if (p->ftsn_entries.ftsn_sid == sid) { 1335 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1336 p->ftsn_entries.ftsn_ssn = ssn; 1337 return (B_TRUE); 1338 } 1339 } 1340 /* the last one */ 1341 if (p->ftsn_entries.ftsn_sid == sid) { 1342 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1343 p->ftsn_entries.ftsn_ssn = ssn; 1344 } else { 1345 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1346 return (B_FALSE); 1347 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1348 KM_NOSLEEP); 1349 if (p->next == NULL) 1350 return (B_FALSE); 1351 p = p->next; 1352 p->ftsn_entries.ftsn_sid = sid; 1353 p->ftsn_entries.ftsn_ssn = ssn; 1354 p->next = NULL; 1355 (*nsets)++; 1356 *slen += sizeof (uint32_t); 1357 } 1358 return (B_TRUE); 1359 } 1360 1361 /* 1362 * Given a set of stream id - sequence number pairs, this routing creates 1363 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1364 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1365 * will add the IP/SCTP header. 1366 */ 1367 mblk_t * 1368 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1369 uint_t nsets, uint32_t seglen) 1370 { 1371 mblk_t *ftsn_mp; 1372 sctp_chunk_hdr_t *ch_hdr; 1373 uint32_t *advtsn; 1374 uint16_t schlen; 1375 size_t xtralen; 1376 ftsn_entry_t *ftsn_entry; 1377 sctp_stack_t *sctps = sctp->sctp_sctps; 1378 1379 seglen += sizeof (sctp_chunk_hdr_t); 1380 if (fp->isv4) 1381 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra; 1382 else 1383 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra; 1384 ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp), 1385 sctp->sctp_cpid); 1386 if (ftsn_mp == NULL) 1387 return (NULL); 1388 ftsn_mp->b_rptr += xtralen; 1389 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1390 1391 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1392 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1393 ch_hdr->sch_flags = 0; 1394 /* 1395 * The cast here should not be an issue since seglen is 1396 * the length of the Forward TSN chunk. 1397 */ 1398 schlen = (uint16_t)seglen; 1399 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1400 1401 advtsn = (uint32_t *)(ch_hdr + 1); 1402 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1403 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1404 while (nsets > 0) { 1405 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1406 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1407 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1408 ftsn_entry++; 1409 sets = sets->next; 1410 nsets--; 1411 } 1412 return (ftsn_mp); 1413 } 1414 1415 /* 1416 * Given a starting message, the routine steps through all the 1417 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1418 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1419 * chunk. All the messages, that have chunks that are included in the 1420 * ftsn sets, are flagged abandonded. If a message is partially sent 1421 * and is deemed abandoned, all remaining unsent chunks are marked 1422 * abandoned and are deducted from sctp_unsent. 1423 */ 1424 void 1425 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1426 sctp_faddr_t *fp, uint32_t *seglen) 1427 { 1428 mblk_t *mp1 = mp; 1429 mblk_t *mp_head = mp; 1430 mblk_t *meta_head = meta; 1431 mblk_t *head; 1432 sctp_ftsn_set_t *sets = NULL; 1433 uint_t nsets = 0; 1434 uint16_t clen; 1435 sctp_data_hdr_t *sdc; 1436 uint32_t sacklen; 1437 uint32_t adv_pap = sctp->sctp_adv_pap; 1438 uint32_t unsent = 0; 1439 boolean_t ubit; 1440 sctp_stack_t *sctps = sctp->sctp_sctps; 1441 1442 *seglen = sizeof (uint32_t); 1443 1444 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1445 while (meta != NULL && 1446 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1447 /* 1448 * Skip adding FTSN sets for un-ordered messages as they do 1449 * not have SSNs. 1450 */ 1451 ubit = SCTP_DATA_GET_UBIT(sdc); 1452 if (!ubit && 1453 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1454 meta = NULL; 1455 sctp->sctp_adv_pap = adv_pap; 1456 goto ftsn_done; 1457 } 1458 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1459 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1460 adv_pap = ntohl(sdc->sdh_tsn); 1461 mp1 = mp1->b_next; 1462 } 1463 meta = meta->b_next; 1464 if (meta != NULL) { 1465 mp1 = meta->b_cont; 1466 if (!SCTP_CHUNK_ISSENT(mp1)) 1467 break; 1468 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1469 } 1470 } 1471 ftsn_done: 1472 /* 1473 * Can't compare with sets == NULL, since we don't add any 1474 * sets for un-ordered messages. 1475 */ 1476 if (meta == meta_head) 1477 return; 1478 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1479 sctp_free_ftsn_set(sets); 1480 if (*nmp == NULL) 1481 return; 1482 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1483 sacklen = 0; 1484 } else { 1485 sacklen = sizeof (sctp_chunk_hdr_t) + 1486 sizeof (sctp_sack_chunk_t) + 1487 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1488 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1489 /* piggybacked SACK doesn't fit */ 1490 sacklen = 0; 1491 } else { 1492 fp = sctp->sctp_lastdata; 1493 } 1494 } 1495 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1496 if (head == NULL) { 1497 freemsg(*nmp); 1498 *nmp = NULL; 1499 SCTP_KSTAT(sctps, sctp_send_ftsn_failed); 1500 return; 1501 } 1502 *seglen += sacklen; 1503 *nmp = head; 1504 1505 /* 1506 * XXXNeed to optimise this, the reason it is done here is so 1507 * that we don't have to undo in case of failure. 1508 */ 1509 mp1 = mp_head; 1510 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1511 while (meta_head != NULL && 1512 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1513 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1514 SCTP_MSG_SET_ABANDONED(meta_head); 1515 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1516 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1517 if (!SCTP_CHUNK_ISACKED(mp1)) { 1518 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1519 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1520 meta_head); 1521 } 1522 mp1 = mp1->b_next; 1523 } 1524 while (mp1 != NULL) { 1525 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1526 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1527 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1528 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1529 SCTP_ABANDON_CHUNK(mp1); 1530 } 1531 mp1 = mp1->b_next; 1532 } 1533 meta_head = meta_head->b_next; 1534 if (meta_head != NULL) { 1535 mp1 = meta_head->b_cont; 1536 if (!SCTP_CHUNK_ISSENT(mp1)) 1537 break; 1538 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1539 } 1540 } 1541 if (unsent > 0) { 1542 ASSERT(sctp->sctp_unsent >= unsent); 1543 sctp->sctp_unsent -= unsent; 1544 /* 1545 * Update ULP the amount of queued data, which is 1546 * sent-unack'ed + unsent. 1547 */ 1548 if (!SCTP_IS_DETACHED(sctp)) 1549 SCTP_TXQ_UPDATE(sctp); 1550 } 1551 } 1552 1553 /* 1554 * This function steps through messages starting at meta and checks if 1555 * the message is abandoned. It stops when it hits an unsent chunk or 1556 * a message that has all its chunk acked. This is the only place 1557 * where the sctp_adv_pap is moved forward to indicated abandoned 1558 * messages. 1559 */ 1560 void 1561 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1562 { 1563 uint32_t tsn = sctp->sctp_adv_pap; 1564 sctp_data_hdr_t *sdc; 1565 sctp_msg_hdr_t *msg_hdr; 1566 1567 ASSERT(mp != NULL); 1568 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1569 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1570 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1571 if (!SCTP_IS_MSG_ABANDONED(meta) && 1572 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1573 return; 1574 } 1575 while (meta != NULL) { 1576 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1577 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1578 tsn = ntohl(sdc->sdh_tsn); 1579 mp = mp->b_next; 1580 } 1581 if (mp != NULL) 1582 break; 1583 /* 1584 * We continue checking for successive messages only if there 1585 * is a chunk marked for retransmission. Else, we might 1586 * end up sending FTSN prematurely for chunks that have been 1587 * sent, but not yet acked. 1588 */ 1589 if ((meta = meta->b_next) != NULL) { 1590 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1591 if (!SCTP_IS_MSG_ABANDONED(meta) && 1592 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1593 break; 1594 } 1595 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1596 if (!SCTP_CHUNK_ISSENT(mp)) { 1597 sctp->sctp_adv_pap = tsn; 1598 return; 1599 } 1600 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1601 break; 1602 } 1603 if (mp == NULL) 1604 break; 1605 } 1606 } 1607 sctp->sctp_adv_pap = tsn; 1608 } 1609 1610 1611 /* 1612 * Determine if we should bundle a data chunk with the chunk being 1613 * retransmitted. We bundle if 1614 * 1615 * - the chunk is sent to the same destination and unack'ed. 1616 * 1617 * OR 1618 * 1619 * - the chunk is unsent, i.e. new data. 1620 */ 1621 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1622 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1623 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1624 !SCTP_CHUNK_ISACKED(mp))) || \ 1625 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1626 SCTP_CHUNK_FLAG_SENT))) 1627 1628 /* 1629 * Retransmit first segment which hasn't been acked with cumtsn or send 1630 * a Forward TSN chunk, if appropriate. 1631 */ 1632 void 1633 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1634 { 1635 mblk_t *mp; 1636 mblk_t *nmp = NULL; 1637 mblk_t *head; 1638 mblk_t *meta = sctp->sctp_xmit_head; 1639 mblk_t *fill; 1640 uint32_t seglen = 0; 1641 uint32_t sacklen; 1642 uint16_t chunklen; 1643 int extra; 1644 sctp_data_hdr_t *sdc; 1645 sctp_faddr_t *fp; 1646 uint32_t adv_pap = sctp->sctp_adv_pap; 1647 boolean_t do_ftsn = B_FALSE; 1648 boolean_t ftsn_check = B_TRUE; 1649 uint32_t first_ua_tsn; 1650 sctp_msg_hdr_t *mhdr; 1651 sctp_stack_t *sctps = sctp->sctp_sctps; 1652 int error; 1653 1654 while (meta != NULL) { 1655 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1656 uint32_t tsn; 1657 1658 if (!SCTP_CHUNK_ISSENT(mp)) 1659 goto window_probe; 1660 /* 1661 * We break in the following cases - 1662 * 1663 * if the advanced peer ack point includes the next 1664 * chunk to be retransmited - possibly the Forward 1665 * TSN was lost. 1666 * 1667 * if we are PRSCTP aware and the next chunk to be 1668 * retransmitted is now abandoned 1669 * 1670 * if the next chunk to be retransmitted is for 1671 * the dest on which the timer went off. (this 1672 * message is not abandoned). 1673 * 1674 * We check for Forward TSN only for the first 1675 * eligible chunk to be retransmitted. The reason 1676 * being if the first eligible chunk is skipped (say 1677 * it was sent to a destination other than oldfp) 1678 * then we cannot advance the cum TSN via Forward 1679 * TSN chunk. 1680 * 1681 * Also, ftsn_check is B_TRUE only for the first 1682 * eligible chunk, it will be B_FALSE for all 1683 * subsequent candidate messages for retransmission. 1684 */ 1685 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1686 tsn = ntohl(sdc->sdh_tsn); 1687 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1688 if (sctp->sctp_prsctp_aware && ftsn_check) { 1689 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1690 ASSERT(sctp->sctp_prsctp_aware); 1691 do_ftsn = B_TRUE; 1692 goto out; 1693 } else { 1694 sctp_check_adv_ack_pt(sctp, 1695 meta, mp); 1696 if (SEQ_GT(sctp->sctp_adv_pap, 1697 adv_pap)) { 1698 do_ftsn = B_TRUE; 1699 goto out; 1700 } 1701 } 1702 ftsn_check = B_FALSE; 1703 } 1704 if (SCTP_CHUNK_DEST(mp) == oldfp) 1705 goto out; 1706 } 1707 } 1708 meta = meta->b_next; 1709 if (meta != NULL && sctp->sctp_prsctp_aware) { 1710 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1711 1712 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1713 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1714 meta = meta->b_next; 1715 } 1716 } 1717 } 1718 window_probe: 1719 /* 1720 * Retransmit fired for a destination which didn't have 1721 * any unacked data pending. 1722 */ 1723 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1724 /* 1725 * Send a window probe. Inflate frwnd to allow 1726 * sending one segment. 1727 */ 1728 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1729 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1730 1731 /* next TSN to send */ 1732 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1733 1734 /* 1735 * The above sctp_frwnd adjustment is coarse. The "changed" 1736 * sctp_frwnd may allow us to send more than 1 packet. So 1737 * tell sctp_output() to send only 1 packet. 1738 */ 1739 sctp_output(sctp, 1); 1740 1741 /* Last sent TSN */ 1742 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1743 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1744 sctp->sctp_zero_win_probe = B_TRUE; 1745 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1746 } 1747 return; 1748 out: 1749 /* 1750 * After a time out, assume that everything has left the network. So 1751 * we can clear rxt_unacked for the original peer address. 1752 */ 1753 oldfp->rxt_unacked = 0; 1754 1755 /* 1756 * If we were probing for zero window, don't adjust retransmission 1757 * variables, but the timer is still backed off. 1758 */ 1759 if (sctp->sctp_zero_win_probe) { 1760 mblk_t *pkt; 1761 uint_t pkt_len; 1762 1763 /* 1764 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1765 * and sctp_rxt_maxtsn will specify the ZWP packet. 1766 */ 1767 fp = oldfp; 1768 if (oldfp->state != SCTP_FADDRS_ALIVE) 1769 fp = sctp_rotate_faddr(sctp, oldfp); 1770 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1771 if (pkt != NULL) { 1772 ASSERT(pkt_len <= fp->sfa_pmss); 1773 sctp_set_iplen(sctp, pkt); 1774 sctp_add_sendq(sctp, pkt); 1775 } else { 1776 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 1777 } 1778 1779 /* 1780 * The strikes will be clear by sctp_faddr_alive() when the 1781 * other side sends us an ack. 1782 */ 1783 oldfp->strikes++; 1784 sctp->sctp_strikes++; 1785 1786 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1787 if (oldfp != fp && oldfp->suna != 0) 1788 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1789 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1790 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1791 return; 1792 } 1793 1794 /* 1795 * Enter slowstart for this destination 1796 */ 1797 oldfp->ssthresh = oldfp->cwnd / 2; 1798 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1799 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1800 oldfp->cwnd = oldfp->sfa_pmss; 1801 oldfp->pba = 0; 1802 fp = sctp_rotate_faddr(sctp, oldfp); 1803 ASSERT(fp != NULL); 1804 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1805 1806 first_ua_tsn = ntohl(sdc->sdh_tsn); 1807 if (do_ftsn) { 1808 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1809 if (nmp == NULL) { 1810 sctp->sctp_adv_pap = adv_pap; 1811 goto restart_timer; 1812 } 1813 head = nmp; 1814 /* 1815 * Move to the next unabandoned chunk. XXXCheck if meta will 1816 * always be marked abandoned. 1817 */ 1818 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1819 meta = meta->b_next; 1820 if (meta != NULL) 1821 mp = mp->b_cont; 1822 else 1823 mp = NULL; 1824 goto try_bundle; 1825 } 1826 seglen = ntohs(sdc->sdh_len); 1827 chunklen = seglen - sizeof (*sdc); 1828 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1829 extra = SCTP_ALIGN - extra; 1830 1831 /* Find out if we need to piggyback SACK. */ 1832 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1833 sacklen = 0; 1834 } else { 1835 sacklen = sizeof (sctp_chunk_hdr_t) + 1836 sizeof (sctp_sack_chunk_t) + 1837 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1838 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1839 /* piggybacked SACK doesn't fit */ 1840 sacklen = 0; 1841 } else { 1842 /* 1843 * OK, we have room to send SACK back. But we 1844 * should send it back to the last fp where we 1845 * receive data from, unless sctp_lastdata equals 1846 * oldfp, then we should probably not send it 1847 * back to that fp. Also we should check that 1848 * the fp is alive. 1849 */ 1850 if (sctp->sctp_lastdata != oldfp && 1851 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1852 fp = sctp->sctp_lastdata; 1853 } 1854 } 1855 } 1856 1857 /* 1858 * Cancel RTT measurement if the retransmitted TSN is before the 1859 * TSN used for timimg. 1860 */ 1861 if (sctp->sctp_out_time != 0 && 1862 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1863 sctp->sctp_out_time = 0; 1864 } 1865 /* Clear the counter as the RTT calculation may be off. */ 1866 fp->rtt_updates = 0; 1867 oldfp->rtt_updates = 0; 1868 1869 /* 1870 * After a timeout, we should change the current faddr so that 1871 * new chunks will be sent to the alternate address. 1872 */ 1873 sctp_set_faddr_current(sctp, fp); 1874 1875 nmp = dupmsg(mp); 1876 if (nmp == NULL) 1877 goto restart_timer; 1878 if (extra > 0) { 1879 fill = sctp_get_padding(sctp, extra); 1880 if (fill != NULL) { 1881 linkb(nmp, fill); 1882 seglen += extra; 1883 } else { 1884 freemsg(nmp); 1885 goto restart_timer; 1886 } 1887 } 1888 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1889 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1890 if (head == NULL) { 1891 freemsg(nmp); 1892 SCTP_KSTAT(sctps, sctp_rexmit_failed); 1893 goto restart_timer; 1894 } 1895 seglen += sacklen; 1896 1897 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1898 1899 mp = mp->b_next; 1900 1901 try_bundle: 1902 /* We can at least and at most send 1 packet at timeout. */ 1903 while (seglen < fp->sfa_pmss) { 1904 int32_t new_len; 1905 1906 /* Go through the list to find more chunks to be bundled. */ 1907 while (mp != NULL) { 1908 /* Check if the chunk can be bundled. */ 1909 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1910 break; 1911 mp = mp->b_next; 1912 } 1913 /* Go to the next message. */ 1914 if (mp == NULL) { 1915 for (meta = meta->b_next; meta != NULL; 1916 meta = meta->b_next) { 1917 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1918 1919 if (SCTP_IS_MSG_ABANDONED(meta) || 1920 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1921 sctp)) { 1922 continue; 1923 } 1924 1925 mp = meta->b_cont; 1926 goto try_bundle; 1927 } 1928 /* 1929 * Check if there is a new message which potentially 1930 * could be bundled with this retransmission. 1931 */ 1932 meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error, 1933 seglen, fp->sfa_pmss - seglen, NULL); 1934 if (error != 0 || meta == NULL) { 1935 /* No more chunk to be bundled. */ 1936 break; 1937 } else { 1938 goto try_bundle; 1939 } 1940 } 1941 1942 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1943 new_len = ntohs(sdc->sdh_len); 1944 chunklen = new_len - sizeof (*sdc); 1945 1946 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1947 extra = SCTP_ALIGN - extra; 1948 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1949 break; 1950 if ((nmp = dupmsg(mp)) == NULL) 1951 break; 1952 1953 if (extra > 0) { 1954 fill = sctp_get_padding(sctp, extra); 1955 if (fill != NULL) { 1956 linkb(nmp, fill); 1957 } else { 1958 freemsg(nmp); 1959 break; 1960 } 1961 } 1962 linkb(head, nmp); 1963 1964 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1965 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1966 1967 seglen = new_len; 1968 mp = mp->b_next; 1969 } 1970 done_bundle: 1971 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1972 ipha_t *iph = (ipha_t *)head->b_rptr; 1973 1974 /* 1975 * Path MTU is different from path we thought it would 1976 * be when we created chunks, or IP headers have grown. 1977 * Need to clear the DF bit. 1978 */ 1979 iph->ipha_fragment_offset_and_flags = 0; 1980 } 1981 fp->rxt_unacked += seglen; 1982 1983 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 1984 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 1985 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 1986 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 1987 1988 sctp->sctp_rexmitting = B_TRUE; 1989 sctp->sctp_rxt_nxttsn = first_ua_tsn; 1990 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1991 sctp_set_iplen(sctp, head); 1992 sctp_add_sendq(sctp, head); 1993 1994 /* 1995 * Restart the oldfp timer with exponential backoff and 1996 * the new fp timer for the retransmitted chunks. 1997 */ 1998 restart_timer: 1999 oldfp->strikes++; 2000 sctp->sctp_strikes++; 2001 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 2002 /* 2003 * If there is still some data in the oldfp, restart the 2004 * retransmission timer. If there is no data, the heartbeat will 2005 * continue to run so it will do its job in checking the reachability 2006 * of the oldfp. 2007 */ 2008 if (oldfp != fp && oldfp->suna != 0) 2009 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 2010 2011 /* 2012 * Should we restart the timer of the new fp? If there is 2013 * outstanding data to the new fp, the timer should be 2014 * running already. So restarting it means that the timer 2015 * will fire later for those outstanding data. But if 2016 * we don't restart it, the timer will fire too early for the 2017 * just retransmitted chunks to the new fp. The reason is that we 2018 * don't keep a timestamp on when a chunk is retransmitted. 2019 * So when the timer fires, it will just search for the 2020 * chunk with the earliest TSN sent to new fp. This probably 2021 * is the chunk we just retransmitted. So for now, let's 2022 * be conservative and restart the timer of the new fp. 2023 */ 2024 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2025 2026 sctp->sctp_active = lbolt64; 2027 } 2028 2029 /* 2030 * This function is called by sctp_ss_rexmit() to create a packet 2031 * to be retransmitted to the given fp. The given meta and mp 2032 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2033 * first chunk to be retransmitted. This is also called when we want 2034 * to retransmit a zero window probe from sctp_rexmit() or when we 2035 * want to retransmit the zero window probe after the window has 2036 * opened from sctp_got_sack(). 2037 */ 2038 mblk_t * 2039 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2040 uint_t *packet_len) 2041 { 2042 uint32_t seglen = 0; 2043 uint16_t chunklen; 2044 int extra; 2045 mblk_t *nmp; 2046 mblk_t *head; 2047 mblk_t *fill; 2048 sctp_data_hdr_t *sdc; 2049 sctp_msg_hdr_t *mhdr; 2050 2051 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2052 seglen = ntohs(sdc->sdh_len); 2053 chunklen = seglen - sizeof (*sdc); 2054 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2055 extra = SCTP_ALIGN - extra; 2056 2057 nmp = dupmsg(*mp); 2058 if (nmp == NULL) 2059 return (NULL); 2060 if (extra > 0) { 2061 fill = sctp_get_padding(sctp, extra); 2062 if (fill != NULL) { 2063 linkb(nmp, fill); 2064 seglen += extra; 2065 } else { 2066 freemsg(nmp); 2067 return (NULL); 2068 } 2069 } 2070 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2071 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2072 if (head == NULL) { 2073 freemsg(nmp); 2074 return (NULL); 2075 } 2076 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2077 /* 2078 * Don't update the TSN if we are doing a Zero Win Probe. 2079 */ 2080 if (!sctp->sctp_zero_win_probe) 2081 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2082 *mp = (*mp)->b_next; 2083 2084 try_bundle: 2085 while (seglen < fp->sfa_pmss) { 2086 int32_t new_len; 2087 2088 /* 2089 * Go through the list to find more chunks to be bundled. 2090 * We should only retransmit sent by unack'ed chunks. Since 2091 * they were sent before, the peer's receive window should 2092 * be able to receive them. 2093 */ 2094 while (*mp != NULL) { 2095 /* Check if the chunk can be bundled. */ 2096 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2097 break; 2098 *mp = (*mp)->b_next; 2099 } 2100 /* Go to the next message. */ 2101 if (*mp == NULL) { 2102 for (*meta = (*meta)->b_next; *meta != NULL; 2103 *meta = (*meta)->b_next) { 2104 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2105 2106 if (SCTP_IS_MSG_ABANDONED(*meta) || 2107 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2108 sctp)) { 2109 continue; 2110 } 2111 2112 *mp = (*meta)->b_cont; 2113 goto try_bundle; 2114 } 2115 /* No more chunk to be bundled. */ 2116 break; 2117 } 2118 2119 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2120 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2121 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2122 break; 2123 new_len = ntohs(sdc->sdh_len); 2124 chunklen = new_len - sizeof (*sdc); 2125 2126 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2127 extra = SCTP_ALIGN - extra; 2128 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2129 break; 2130 if ((nmp = dupmsg(*mp)) == NULL) 2131 break; 2132 2133 if (extra > 0) { 2134 fill = sctp_get_padding(sctp, extra); 2135 if (fill != NULL) { 2136 linkb(nmp, fill); 2137 } else { 2138 freemsg(nmp); 2139 break; 2140 } 2141 } 2142 linkb(head, nmp); 2143 2144 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2145 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2146 /* 2147 * Don't update the TSN if we are doing a Zero Win Probe. 2148 */ 2149 if (!sctp->sctp_zero_win_probe) 2150 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2151 2152 seglen = new_len; 2153 *mp = (*mp)->b_next; 2154 } 2155 *packet_len = seglen; 2156 fp->rxt_unacked += seglen; 2157 return (head); 2158 } 2159 2160 /* 2161 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2162 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2163 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2164 * SACK. We retransmit unacked chunks without having to wait for another 2165 * timeout. The rationale is that the SACK should not be "partial" if all the 2166 * lost chunks have been retransmitted. Since the SACK is "partial," 2167 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2168 * be missing. It is better for us to retransmit them now instead 2169 * of waiting for a timeout. 2170 */ 2171 void 2172 sctp_ss_rexmit(sctp_t *sctp) 2173 { 2174 mblk_t *meta; 2175 mblk_t *mp; 2176 mblk_t *pkt; 2177 sctp_faddr_t *fp; 2178 uint_t pkt_len; 2179 uint32_t tot_wnd; 2180 sctp_data_hdr_t *sdc; 2181 int burst; 2182 sctp_stack_t *sctps = sctp->sctp_sctps; 2183 2184 ASSERT(!sctp->sctp_zero_win_probe); 2185 2186 /* 2187 * If the last cum ack is smaller than what we have just 2188 * retransmitted, simply return. 2189 */ 2190 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2191 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2192 else 2193 return; 2194 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2195 2196 /* 2197 * After a timer fires, sctp_current should be set to the new 2198 * fp where the retransmitted chunks are sent. 2199 */ 2200 fp = sctp->sctp_current; 2201 2202 /* 2203 * Since we are retransmitting, we only need to use cwnd to determine 2204 * how much we can send as we were allowed (by peer's receive window) 2205 * to send those retransmitted chunks previously when they are first 2206 * sent. If we record how much we have retransmitted but 2207 * unacknowledged using rxt_unacked, then the amount we can now send 2208 * is equal to cwnd minus rxt_unacked. 2209 * 2210 * The field rxt_unacked is incremented when we retransmit a packet 2211 * and decremented when we got a SACK acknowledging something. And 2212 * it is reset when the retransmission timer fires as we assume that 2213 * all packets have left the network after a timeout. If this 2214 * assumption is not true, it means that after a timeout, we can 2215 * get a SACK acknowledging more than rxt_unacked (its value only 2216 * contains what is retransmitted when the timer fires). So 2217 * rxt_unacked will become very big (it is an unsiged int so going 2218 * negative means that the value is huge). This is the reason we 2219 * always send at least 1 MSS bytes. 2220 * 2221 * The reason why we do not have an accurate count is that we 2222 * only know how many packets are outstanding (using the TSN numbers). 2223 * But we do not know how many bytes those packets contain. To 2224 * have an accurate count, we need to walk through the send list. 2225 * As it is not really important to have an accurate count during 2226 * retransmission, we skip this walk to save some time. This should 2227 * not make the retransmission too aggressive to cause congestion. 2228 */ 2229 if (fp->cwnd <= fp->rxt_unacked) 2230 tot_wnd = fp->sfa_pmss; 2231 else 2232 tot_wnd = fp->cwnd - fp->rxt_unacked; 2233 2234 /* Find the first unack'ed chunk */ 2235 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2236 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2237 2238 if (SCTP_IS_MSG_ABANDONED(meta) || 2239 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2240 continue; 2241 } 2242 2243 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2244 /* Again, this may not be possible */ 2245 if (!SCTP_CHUNK_ISSENT(mp)) 2246 return; 2247 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2248 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2249 goto found_msg; 2250 } 2251 } 2252 2253 /* Everything is abandoned... */ 2254 return; 2255 2256 found_msg: 2257 if (!fp->timer_running) 2258 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2259 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2260 if (pkt == NULL) { 2261 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 2262 return; 2263 } 2264 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2265 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2266 2267 /* 2268 * Path MTU is different from path we thought it would 2269 * be when we created chunks, or IP headers have grown. 2270 * Need to clear the DF bit. 2271 */ 2272 iph->ipha_fragment_offset_and_flags = 0; 2273 } 2274 sctp_set_iplen(sctp, pkt); 2275 sctp_add_sendq(sctp, pkt); 2276 2277 /* Check and see if there is more chunk to be retransmitted. */ 2278 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2279 meta == NULL) 2280 return; 2281 if (mp == NULL) 2282 meta = meta->b_next; 2283 if (meta == NULL) 2284 return; 2285 2286 /* Retransmit another packet if the window allows. */ 2287 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1; 2288 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2289 if (mp == NULL) 2290 mp = meta->b_cont; 2291 for (; mp != NULL; mp = mp->b_next) { 2292 /* Again, this may not be possible */ 2293 if (!SCTP_CHUNK_ISSENT(mp)) 2294 return; 2295 if (!SCTP_CHUNK_ISACKED(mp)) 2296 goto found_msg; 2297 } 2298 } 2299 } 2300