1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/systm.h> 28 #include <sys/stream.h> 29 #include <sys/cmn_err.h> 30 #define _SUN_TPI_VERSION 2 31 #include <sys/tihdr.h> 32 #include <sys/socket.h> 33 #include <sys/stropts.h> 34 #include <sys/strsun.h> 35 #include <sys/strsubr.h> 36 #include <sys/socketvar.h> 37 #include <inet/common.h> 38 #include <inet/mi.h> 39 #include <inet/ip.h> 40 #include <inet/ip_ire.h> 41 #include <inet/ip6.h> 42 #include <inet/sctp_ip.h> 43 #include <inet/ipclassifier.h> 44 45 /* 46 * PR-SCTP comments. 47 * 48 * A message can expire before it gets to the transmit list (i.e. it is still 49 * in the unsent list - unchunked), after it gets to the transmit list, but 50 * before transmission has actually started, or after transmission has begun. 51 * Accordingly, we check for the status of a message in sctp_chunkify() when 52 * the message is being transferred from the unsent list to the transmit list; 53 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 54 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 55 * When we nuke a message in sctp_chunkify(), all we need to do is take it 56 * out of the unsent list and update sctp_unsent; when a message is deemed 57 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 58 * list, update sctp_unsent IFF transmission for the message has not yet begun 59 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 60 * message has started, then we cannot just take it out of the list, we need 61 * to send Forward TSN chunk to the peer so that the peer can clear its 62 * fragment list for this message. However, we cannot just send the Forward 63 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 64 * messages preceeding this abandoned message. So, we send a Forward TSN 65 * IFF all messages prior to this abandoned message has been SACKd, if not 66 * we defer sending the Forward TSN to sctp_cumack(), which will check for 67 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 68 * sctp_rexmit() when we check for retransmissions, we need to determine if 69 * the advanced peer ack point can be moved ahead, and if so, send a Forward 70 * TSN to the peer instead of retransmitting the chunk. Note that when 71 * we send a Forward TSN for a message, there may be yet unsent chunks for 72 * this message; we need to mark all such chunks as abandoned, so that 73 * sctp_cumack() can take the message out of the transmit list, additionally 74 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 75 * decremented when a message/chunk is deemed abandoned), sockfs needs to 76 * be notified so that it can adjust its idea of the queued message. 77 */ 78 79 #include "sctp_impl.h" 80 81 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 82 static mblk_t *sctp_chunkify(sctp_t *, int, int, int); 83 84 #ifdef DEBUG 85 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 86 #endif 87 88 /* 89 * Called to allocate a header mblk when sending data to SCTP. 90 * Data will follow in b_cont of this mblk. 91 */ 92 mblk_t * 93 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 94 int flags) 95 { 96 mblk_t *mp; 97 struct T_unitdata_req *tudr; 98 size_t size; 99 int error; 100 101 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 102 size = MAX(size, sizeof (sctp_msg_hdr_t)); 103 if (flags & SCTP_CAN_BLOCK) { 104 mp = allocb_wait(size, BPRI_MED, 0, &error); 105 } else { 106 mp = allocb(size, BPRI_MED); 107 } 108 if (mp) { 109 tudr = (struct T_unitdata_req *)mp->b_rptr; 110 tudr->PRIM_type = T_UNITDATA_REQ; 111 tudr->DEST_length = nlen; 112 tudr->DEST_offset = sizeof (*tudr); 113 tudr->OPT_length = clen; 114 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 115 _TPI_ALIGN_TOPT(nlen)); 116 if (nlen > 0) 117 bcopy(name, tudr + 1, nlen); 118 if (clen > 0) 119 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 120 mp->b_wptr += (tudr ->OPT_offset + clen); 121 mp->b_datap->db_type = M_PROTO; 122 } 123 return (mp); 124 } 125 126 /*ARGSUSED2*/ 127 int 128 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 129 { 130 sctp_faddr_t *fp = NULL; 131 struct T_unitdata_req *tudr; 132 int error = 0; 133 mblk_t *mproto = mp; 134 in6_addr_t *addr; 135 in6_addr_t tmpaddr; 136 uint16_t sid = sctp->sctp_def_stream; 137 uint32_t ppid = sctp->sctp_def_ppid; 138 uint32_t context = sctp->sctp_def_context; 139 uint16_t msg_flags = sctp->sctp_def_flags; 140 sctp_msg_hdr_t *sctp_msg_hdr; 141 uint32_t msg_len = 0; 142 uint32_t timetolive = sctp->sctp_def_timetolive; 143 conn_t *connp = sctp->sctp_connp; 144 145 ASSERT(DB_TYPE(mproto) == M_PROTO); 146 147 mp = mp->b_cont; 148 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 149 150 tudr = (struct T_unitdata_req *)mproto->b_rptr; 151 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 152 153 /* Get destination address, if specified */ 154 if (tudr->DEST_length > 0) { 155 sin_t *sin; 156 sin6_t *sin6; 157 158 sin = (struct sockaddr_in *) 159 (mproto->b_rptr + tudr->DEST_offset); 160 switch (sin->sin_family) { 161 case AF_INET: 162 if (tudr->DEST_length < sizeof (*sin)) { 163 return (EINVAL); 164 } 165 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 166 addr = &tmpaddr; 167 break; 168 case AF_INET6: 169 if (tudr->DEST_length < sizeof (*sin6)) { 170 return (EINVAL); 171 } 172 sin6 = (struct sockaddr_in6 *) 173 (mproto->b_rptr + tudr->DEST_offset); 174 addr = &sin6->sin6_addr; 175 break; 176 default: 177 return (EAFNOSUPPORT); 178 } 179 fp = sctp_lookup_faddr(sctp, addr); 180 if (fp == NULL) { 181 return (EINVAL); 182 } 183 } 184 /* Ancillary Data? */ 185 if (tudr->OPT_length > 0) { 186 struct cmsghdr *cmsg; 187 char *cend; 188 struct sctp_sndrcvinfo *sndrcv; 189 190 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 191 cend = ((char *)cmsg + tudr->OPT_length); 192 ASSERT(cend <= (char *)mproto->b_wptr); 193 194 for (;;) { 195 if ((char *)(cmsg + 1) > cend || 196 ((char *)cmsg + cmsg->cmsg_len) > cend) { 197 break; 198 } 199 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 200 (cmsg->cmsg_type == SCTP_SNDRCV)) { 201 if (cmsg->cmsg_len < 202 (sizeof (*sndrcv) + sizeof (*cmsg))) { 203 return (EINVAL); 204 } 205 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 206 sid = sndrcv->sinfo_stream; 207 msg_flags = sndrcv->sinfo_flags; 208 ppid = sndrcv->sinfo_ppid; 209 context = sndrcv->sinfo_context; 210 timetolive = sndrcv->sinfo_timetolive; 211 break; 212 } 213 if (cmsg->cmsg_len > 0) 214 cmsg = CMSG_NEXT(cmsg); 215 else 216 break; 217 } 218 } 219 if (msg_flags & MSG_ABORT) { 220 if (mp && mp->b_cont) { 221 mblk_t *pump = msgpullup(mp, -1); 222 if (!pump) { 223 return (ENOMEM); 224 } 225 freemsg(mp); 226 mp = pump; 227 mproto->b_cont = mp; 228 } 229 RUN_SCTP(sctp); 230 sctp_user_abort(sctp, mp); 231 freemsg(mproto); 232 goto done2; 233 } 234 if (mp == NULL) 235 goto done; 236 237 RUN_SCTP(sctp); 238 239 /* Reject any new data requests if we are shutting down */ 240 if (sctp->sctp_state > SCTPS_ESTABLISHED || 241 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) { 242 error = EPIPE; 243 goto unlock_done; 244 } 245 246 /* Re-use the mproto to store relevant info. */ 247 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 248 249 mproto->b_rptr = mproto->b_datap->db_base; 250 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 251 252 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 253 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 254 sctp_msg_hdr->smh_context = context; 255 sctp_msg_hdr->smh_sid = sid; 256 sctp_msg_hdr->smh_ppid = ppid; 257 sctp_msg_hdr->smh_flags = msg_flags; 258 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 259 sctp_msg_hdr->smh_tob = ddi_get_lbolt64(); 260 for (; mp != NULL; mp = mp->b_cont) 261 msg_len += MBLKL(mp); 262 sctp_msg_hdr->smh_msglen = msg_len; 263 264 /* User requested specific destination */ 265 SCTP_SET_CHUNK_DEST(mproto, fp); 266 267 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 268 sid >= sctp->sctp_num_ostr) { 269 /* Send sendfail event */ 270 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 271 B_FALSE); 272 error = EINVAL; 273 goto unlock_done; 274 } 275 276 /* no data */ 277 if (msg_len == 0) { 278 sctp_sendfail_event(sctp, dupmsg(mproto), 279 SCTP_ERR_NO_USR_DATA, B_FALSE); 280 error = EINVAL; 281 goto unlock_done; 282 } 283 284 /* Add it to the unsent list */ 285 if (sctp->sctp_xmit_unsent == NULL) { 286 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 287 } else { 288 sctp->sctp_xmit_unsent_tail->b_next = mproto; 289 sctp->sctp_xmit_unsent_tail = mproto; 290 } 291 sctp->sctp_unsent += msg_len; 292 BUMP_LOCAL(sctp->sctp_msgcount); 293 /* 294 * Notify sockfs if the tx queue is full. 295 */ 296 if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) { 297 sctp->sctp_txq_full = 1; 298 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE); 299 } 300 if (sctp->sctp_state == SCTPS_ESTABLISHED) 301 sctp_output(sctp, UINT_MAX); 302 done2: 303 WAKE_SCTP(sctp); 304 return (0); 305 unlock_done: 306 WAKE_SCTP(sctp); 307 done: 308 return (error); 309 } 310 311 /* 312 * While there are messages on sctp_xmit_unsent, detach each one. For each: 313 * allocate space for the chunk header, fill in the data chunk, and fill in 314 * the chunk header. Then append it to sctp_xmit_tail. 315 * Return after appending as many bytes as required (bytes_to_send). 316 * We also return if we've appended one or more chunks, and find a subsequent 317 * unsent message is too big to fit in the segment. 318 */ 319 mblk_t * 320 sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send) 321 { 322 mblk_t *mp; 323 mblk_t *chunk_mp; 324 mblk_t *chunk_head; 325 mblk_t *chunk_hdr; 326 mblk_t *chunk_tail = NULL; 327 int count; 328 int chunksize; 329 sctp_data_hdr_t *sdc; 330 mblk_t *mdblk = sctp->sctp_xmit_unsent; 331 sctp_faddr_t *fp; 332 sctp_faddr_t *fp1; 333 size_t xtralen; 334 sctp_msg_hdr_t *msg_hdr; 335 sctp_stack_t *sctps = sctp->sctp_sctps; 336 sctp_msg_hdr_t *next_msg_hdr; 337 size_t nextlen; 338 int remaining_len = mss - firstseg_len; 339 340 ASSERT(remaining_len >= 0); 341 342 fp = SCTP_CHUNK_DEST(mdblk); 343 if (fp == NULL) 344 fp = sctp->sctp_current; 345 if (fp->isv4) 346 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra + 347 sizeof (*sdc); 348 else 349 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra + 350 sizeof (*sdc); 351 count = chunksize = remaining_len - sizeof (*sdc); 352 nextmsg: 353 next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr; 354 nextlen = next_msg_hdr->smh_msglen; 355 /* 356 * Will the entire next message fit in the current packet ? 357 * if not, leave it on the unsent list. 358 */ 359 if ((firstseg_len != 0) && (nextlen > remaining_len)) 360 return (NULL); 361 362 chunk_mp = mdblk->b_cont; 363 364 /* 365 * If this partially chunked, we ignore the next one for now and 366 * use the one already present. For the unchunked bits, we use the 367 * length of the last chunk. 368 */ 369 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 370 int chunk_len; 371 372 ASSERT(chunk_mp->b_next != NULL); 373 mdblk->b_cont = chunk_mp->b_next; 374 chunk_mp->b_next = NULL; 375 SCTP_MSG_CLEAR_CHUNKED(mdblk); 376 mp = mdblk->b_cont; 377 while (mp->b_next != NULL) 378 mp = mp->b_next; 379 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 380 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 381 count = chunksize = fp->sfa_pmss - chunk_len; 382 else 383 count = chunksize = fp->sfa_pmss; 384 count = chunksize = count - sizeof (*sdc); 385 } else { 386 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 387 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 388 sctp->sctp_xmit_unsent = mdblk->b_next; 389 if (sctp->sctp_xmit_unsent == NULL) 390 sctp->sctp_xmit_unsent_tail = NULL; 391 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 392 sctp->sctp_unsent -= msg_hdr->smh_msglen; 393 mdblk->b_next = NULL; 394 BUMP_LOCAL(sctp->sctp_prsctpdrop); 395 /* 396 * Update ULP the amount of queued data, which is 397 * sent-unack'ed + unsent. 398 */ 399 if (!SCTP_IS_DETACHED(sctp)) 400 SCTP_TXQ_UPDATE(sctp); 401 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 402 goto try_next; 403 } 404 mdblk->b_cont = NULL; 405 } 406 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 407 nextchunk: 408 chunk_head = chunk_mp; 409 chunk_tail = NULL; 410 411 /* Skip as many mblk's as we need */ 412 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 413 count -= MBLKL(chunk_mp); 414 chunk_tail = chunk_mp; 415 chunk_mp = chunk_mp->b_cont; 416 } 417 /* Split the chain, if needed */ 418 if (chunk_mp != NULL) { 419 if (count > 0) { 420 mblk_t *split_mp = dupb(chunk_mp); 421 422 if (split_mp == NULL) { 423 if (mdblk->b_cont == NULL) { 424 mdblk->b_cont = chunk_head; 425 } else { 426 SCTP_MSG_SET_CHUNKED(mdblk); 427 ASSERT(chunk_head->b_next == NULL); 428 chunk_head->b_next = mdblk->b_cont; 429 mdblk->b_cont = chunk_head; 430 } 431 return (sctp->sctp_xmit_tail); 432 } 433 if (chunk_tail != NULL) { 434 chunk_tail->b_cont = split_mp; 435 chunk_tail = chunk_tail->b_cont; 436 } else { 437 chunk_head = chunk_tail = split_mp; 438 } 439 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 440 chunk_mp->b_rptr = chunk_tail->b_wptr; 441 count = 0; 442 } else if (chunk_tail == NULL) { 443 goto next; 444 } else { 445 chunk_tail->b_cont = NULL; 446 } 447 } 448 /* Alloc chunk hdr, if needed */ 449 if (DB_REF(chunk_head) > 1 || 450 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 451 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 452 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 453 if (mdblk->b_cont == NULL) { 454 if (chunk_mp != NULL) 455 linkb(chunk_head, chunk_mp); 456 mdblk->b_cont = chunk_head; 457 } else { 458 SCTP_MSG_SET_CHUNKED(mdblk); 459 if (chunk_mp != NULL) 460 linkb(chunk_head, chunk_mp); 461 ASSERT(chunk_head->b_next == NULL); 462 chunk_head->b_next = mdblk->b_cont; 463 mdblk->b_cont = chunk_head; 464 } 465 return (sctp->sctp_xmit_tail); 466 } 467 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 468 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 469 chunk_hdr->b_cont = chunk_head; 470 } else { 471 chunk_hdr = chunk_head; 472 chunk_hdr->b_rptr -= sizeof (*sdc); 473 } 474 ASSERT(chunk_hdr->b_datap->db_ref == 1); 475 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 476 sdc->sdh_id = CHUNK_DATA; 477 sdc->sdh_flags = 0; 478 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 479 ASSERT(sdc->sdh_len); 480 sdc->sdh_sid = htons(msg_hdr->smh_sid); 481 /* 482 * We defer assigning the SSN just before sending the chunk, else 483 * if we drop the chunk in sctp_get_msg_to_send(), we would need 484 * to send a Forward TSN to let the peer know. Some more comments 485 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 486 */ 487 sdc->sdh_payload_id = msg_hdr->smh_ppid; 488 489 if (mdblk->b_cont == NULL) { 490 mdblk->b_cont = chunk_hdr; 491 SCTP_DATA_SET_BBIT(sdc); 492 } else { 493 mp = mdblk->b_cont; 494 while (mp->b_next != NULL) 495 mp = mp->b_next; 496 mp->b_next = chunk_hdr; 497 } 498 499 bytes_to_send -= (chunksize - count); 500 if (chunk_mp != NULL) { 501 next: 502 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 503 goto nextchunk; 504 } 505 SCTP_DATA_SET_EBIT(sdc); 506 sctp->sctp_xmit_unsent = mdblk->b_next; 507 if (mdblk->b_next == NULL) { 508 sctp->sctp_xmit_unsent_tail = NULL; 509 } 510 mdblk->b_next = NULL; 511 512 if (sctp->sctp_xmit_tail == NULL) { 513 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 514 } else { 515 mp = sctp->sctp_xmit_tail; 516 while (mp->b_next != NULL) 517 mp = mp->b_next; 518 mp->b_next = mdblk; 519 mdblk->b_prev = mp; 520 } 521 try_next: 522 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 523 mdblk = sctp->sctp_xmit_unsent; 524 fp1 = SCTP_CHUNK_DEST(mdblk); 525 if (fp1 == NULL) 526 fp1 = sctp->sctp_current; 527 if (fp == fp1) { 528 size_t len = MBLKL(mdblk->b_cont); 529 if ((count > 0) && 530 ((len > fp->sfa_pmss - sizeof (*sdc)) || 531 (len <= count))) { 532 count -= sizeof (*sdc); 533 count = chunksize = count - (count & 0x3); 534 } else { 535 count = chunksize = fp->sfa_pmss - 536 sizeof (*sdc); 537 } 538 } else { 539 if (fp1->isv4) 540 xtralen = sctp->sctp_hdr_len; 541 else 542 xtralen = sctp->sctp_hdr6_len; 543 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc); 544 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 545 fp = fp1; 546 } 547 goto nextmsg; 548 } 549 return (sctp->sctp_xmit_tail); 550 } 551 552 void 553 sctp_free_msg(mblk_t *ump) 554 { 555 mblk_t *mp, *nmp; 556 557 for (mp = ump->b_cont; mp; mp = nmp) { 558 nmp = mp->b_next; 559 mp->b_next = mp->b_prev = NULL; 560 freemsg(mp); 561 } 562 ASSERT(!ump->b_prev); 563 ump->b_next = NULL; 564 freeb(ump); 565 } 566 567 mblk_t * 568 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 569 int *error) 570 { 571 int hdrlen; 572 uchar_t *hdr; 573 int isv4 = fp->isv4; 574 sctp_stack_t *sctps = sctp->sctp_sctps; 575 576 if (error != NULL) 577 *error = 0; 578 579 if (isv4) { 580 hdrlen = sctp->sctp_hdr_len; 581 hdr = sctp->sctp_iphc; 582 } else { 583 hdrlen = sctp->sctp_hdr6_len; 584 hdr = sctp->sctp_iphc6; 585 } 586 /* 587 * A reject|blackhole could mean that the address is 'down'. Similarly, 588 * it is possible that the address went down, we tried to send an 589 * heartbeat and ended up setting fp->saddr as unspec because we 590 * didn't have any usable source address. In either case 591 * sctp_get_dest() will try find an IRE, if available, and set 592 * the source address, if needed. If we still don't have any 593 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 594 * we return EHOSTUNREACH. 595 */ 596 ASSERT(fp->ixa->ixa_ire != NULL); 597 if ((fp->ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 598 SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 599 sctp_get_dest(sctp, fp); 600 if (fp->state == SCTP_FADDRS_UNREACH) { 601 if (error != NULL) 602 *error = EHOSTUNREACH; 603 return (NULL); 604 } 605 } 606 /* Copy in IP header. */ 607 if ((mp->b_rptr - mp->b_datap->db_base) < 608 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) { 609 mblk_t *nmp; 610 611 /* 612 * This can happen if IP headers are adjusted after 613 * data was moved into chunks, or during retransmission, 614 * or things like snoop is running. 615 */ 616 nmp = allocb(sctps->sctps_wroff_xtra + hdrlen + sacklen, 617 BPRI_MED); 618 if (nmp == NULL) { 619 if (error != NULL) 620 *error = ENOMEM; 621 return (NULL); 622 } 623 nmp->b_rptr += sctps->sctps_wroff_xtra; 624 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 625 nmp->b_cont = mp; 626 mp = nmp; 627 } else { 628 mp->b_rptr -= (hdrlen + sacklen); 629 } 630 bcopy(hdr, mp->b_rptr, hdrlen); 631 if (sacklen) { 632 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 633 } 634 if (fp != sctp->sctp_current) { 635 /* change addresses in header */ 636 if (isv4) { 637 ipha_t *iph = (ipha_t *)mp->b_rptr; 638 639 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 640 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 641 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 642 iph->ipha_src); 643 } else if (sctp->sctp_bound_to_all) { 644 iph->ipha_src = INADDR_ANY; 645 } 646 } else { 647 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 648 649 ip6h->ip6_dst = fp->faddr; 650 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 651 ip6h->ip6_src = fp->saddr; 652 } else if (sctp->sctp_bound_to_all) { 653 ip6h->ip6_src = ipv6_all_zeros; 654 } 655 } 656 } 657 return (mp); 658 } 659 660 /* 661 * SCTP requires every chunk to be padded so that the total length 662 * is a multiple of SCTP_ALIGN. This function returns a mblk with 663 * the specified pad length. 664 */ 665 static mblk_t * 666 sctp_get_padding(sctp_t *sctp, int pad) 667 { 668 mblk_t *fill; 669 670 ASSERT(pad < SCTP_ALIGN); 671 ASSERT(sctp->sctp_pad_mp != NULL); 672 if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) { 673 fill->b_wptr += pad; 674 return (fill); 675 } 676 677 /* 678 * The memory saving path of reusing the sctp_pad_mp 679 * fails may be because it has been dupb() too 680 * many times (DBLK_REFMAX). Use the memory consuming 681 * path of allocating the pad mblk. 682 */ 683 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 684 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 685 *(int32_t *)fill->b_rptr = 0; 686 fill->b_wptr += pad; 687 } 688 return (fill); 689 } 690 691 static mblk_t * 692 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 693 { 694 mblk_t *meta; 695 mblk_t *start_mp = NULL; 696 mblk_t *end_mp = NULL; 697 mblk_t *mp, *nmp; 698 mblk_t *fill; 699 sctp_data_hdr_t *sdh; 700 int msglen; 701 int extra; 702 sctp_msg_hdr_t *msg_hdr; 703 sctp_faddr_t *old_fp = NULL; 704 sctp_faddr_t *chunk_fp; 705 sctp_stack_t *sctps = sctp->sctp_sctps; 706 707 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 708 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 709 if (SCTP_IS_MSG_ABANDONED(meta) || 710 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 711 continue; 712 } 713 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 714 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 715 /* 716 * Use the same peer address to do fast 717 * retransmission. If the original peer 718 * address is dead, switch to the current 719 * one. Record the old one so that we 720 * will pick the chunks sent to the old 721 * one for fast retransmission. 722 */ 723 chunk_fp = SCTP_CHUNK_DEST(mp); 724 if (*fp == NULL) { 725 *fp = chunk_fp; 726 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 727 old_fp = *fp; 728 *fp = sctp->sctp_current; 729 } 730 } else if (old_fp == NULL && *fp != chunk_fp) { 731 continue; 732 } else if (old_fp != NULL && 733 old_fp != chunk_fp) { 734 continue; 735 } 736 737 sdh = (sctp_data_hdr_t *)mp->b_rptr; 738 msglen = ntohs(sdh->sdh_len); 739 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 740 extra = SCTP_ALIGN - extra; 741 } 742 743 /* 744 * We still return at least the first message 745 * even if that message cannot fit in as 746 * PMTU may have changed. 747 */ 748 if (*total + msglen + extra > 749 (*fp)->sfa_pmss && start_mp != NULL) { 750 return (start_mp); 751 } 752 if ((nmp = dupmsg(mp)) == NULL) 753 return (start_mp); 754 if (extra > 0) { 755 fill = sctp_get_padding(sctp, extra); 756 if (fill != NULL) { 757 linkb(nmp, fill); 758 } else { 759 return (start_mp); 760 } 761 } 762 BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); 763 BUMP_LOCAL(sctp->sctp_rxtchunks); 764 SCTP_CHUNK_CLEAR_REXMIT(mp); 765 if (start_mp == NULL) { 766 start_mp = nmp; 767 } else { 768 linkb(end_mp, nmp); 769 } 770 end_mp = nmp; 771 *total += msglen + extra; 772 dprint(2, ("sctp_find_fast_rexmit_mblks: " 773 "tsn %x\n", sdh->sdh_tsn)); 774 } 775 } 776 } 777 /* Clear the flag as there is no more message to be fast rexmitted. */ 778 sctp->sctp_chk_fast_rexmit = B_FALSE; 779 return (start_mp); 780 } 781 782 /* A debug function just to make sure that a mblk chain is not broken */ 783 #ifdef DEBUG 784 static boolean_t 785 sctp_verify_chain(mblk_t *head, mblk_t *tail) 786 { 787 mblk_t *mp = head; 788 789 if (head == NULL || tail == NULL) 790 return (B_TRUE); 791 while (mp != NULL) { 792 if (mp == tail) 793 return (B_TRUE); 794 mp = mp->b_next; 795 } 796 return (B_FALSE); 797 } 798 #endif 799 800 /* 801 * Gets the next unsent chunk to transmit. Messages that are abandoned are 802 * skipped. A message can be abandoned if it has a non-zero timetolive and 803 * transmission has not yet started or if it is a partially reliable 804 * message and its time is up (assuming we are PR-SCTP aware). 805 * We only return a chunk if it will fit entirely in the current packet. 806 * 'cansend' is used to determine if need to try and chunkify messages from 807 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 808 * 809 * firstseg_len indicates the space already used, cansend represents remaining 810 * space in the window, ((sfa_pmss - firstseg_len) can therefore reasonably 811 * be used to compute the cansend arg). 812 */ 813 mblk_t * 814 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 815 int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp) 816 { 817 mblk_t *mp1; 818 sctp_msg_hdr_t *msg_hdr; 819 mblk_t *tmp_meta; 820 sctp_faddr_t *fp1; 821 822 ASSERT(error != NULL && mp != NULL); 823 *error = 0; 824 825 ASSERT(sctp->sctp_current != NULL); 826 827 chunkified: 828 while (meta != NULL) { 829 tmp_meta = meta->b_next; 830 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 831 mp1 = meta->b_cont; 832 if (SCTP_IS_MSG_ABANDONED(meta)) 833 goto next_msg; 834 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 835 while (mp1 != NULL) { 836 if (SCTP_CHUNK_CANSEND(mp1)) { 837 *mp = mp1; 838 #ifdef DEBUG 839 ASSERT(sctp_verify_chain( 840 sctp->sctp_xmit_head, meta)); 841 #endif 842 return (meta); 843 } 844 mp1 = mp1->b_next; 845 } 846 goto next_msg; 847 } 848 /* 849 * If we come here and the first chunk is sent, then we 850 * we are PR-SCTP aware, in which case if the cumulative 851 * TSN has moved upto or beyond the first chunk (which 852 * means all the previous messages have been cumulative 853 * SACK'd), then we send a Forward TSN with the last 854 * chunk that was sent in this message. If we can't send 855 * a Forward TSN because previous non-abandoned messages 856 * have not been acked then we will defer the Forward TSN 857 * to sctp_rexmit() or sctp_cumack(). 858 */ 859 if (SCTP_CHUNK_ISSENT(mp1)) { 860 *error = sctp_check_abandoned_msg(sctp, meta); 861 if (*error != 0) { 862 #ifdef DEBUG 863 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 864 sctp->sctp_xmit_tail)); 865 #endif 866 return (NULL); 867 } 868 goto next_msg; 869 } 870 BUMP_LOCAL(sctp->sctp_prsctpdrop); 871 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 872 if (meta->b_prev == NULL) { 873 ASSERT(sctp->sctp_xmit_head == meta); 874 sctp->sctp_xmit_head = tmp_meta; 875 if (sctp->sctp_xmit_tail == meta) 876 sctp->sctp_xmit_tail = tmp_meta; 877 meta->b_next = NULL; 878 if (tmp_meta != NULL) 879 tmp_meta->b_prev = NULL; 880 } else if (meta->b_next == NULL) { 881 if (sctp->sctp_xmit_tail == meta) 882 sctp->sctp_xmit_tail = meta->b_prev; 883 meta->b_prev->b_next = NULL; 884 meta->b_prev = NULL; 885 } else { 886 meta->b_prev->b_next = tmp_meta; 887 tmp_meta->b_prev = meta->b_prev; 888 if (sctp->sctp_xmit_tail == meta) 889 sctp->sctp_xmit_tail = tmp_meta; 890 meta->b_prev = NULL; 891 meta->b_next = NULL; 892 } 893 sctp->sctp_unsent -= msg_hdr->smh_msglen; 894 /* 895 * Update ULP the amount of queued data, which is 896 * sent-unack'ed + unsent. 897 */ 898 if (!SCTP_IS_DETACHED(sctp)) 899 SCTP_TXQ_UPDATE(sctp); 900 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 901 next_msg: 902 meta = tmp_meta; 903 } 904 /* chunkify, if needed */ 905 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 906 ASSERT(sctp->sctp_unsent > 0); 907 if (fp == NULL) { 908 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 909 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 910 fp = sctp->sctp_current; 911 } else { 912 /* 913 * If user specified destination, try to honor that. 914 */ 915 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 916 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 917 fp1 != fp) { 918 goto chunk_done; 919 } 920 } 921 meta = sctp_chunkify(sctp, fp->sfa_pmss, firstseg_len, cansend); 922 if (meta == NULL) 923 goto chunk_done; 924 /* 925 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 926 * new chunk(s) to the tail, so we need to skip the 927 * sctp_xmit_tail, which would have already been processed. 928 * This could happen when there is unacked chunks, but 929 * nothing new to send. 930 * When sctp_chunkify() is called when the transmit queue 931 * is empty then we need to start from sctp_xmit_tail. 932 */ 933 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 934 #ifdef DEBUG 935 mp1 = sctp->sctp_xmit_tail->b_cont; 936 while (mp1 != NULL) { 937 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 938 mp1 = mp1->b_next; 939 } 940 #endif 941 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 942 goto chunk_done; 943 } 944 goto chunkified; 945 } 946 chunk_done: 947 #ifdef DEBUG 948 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 949 #endif 950 return (NULL); 951 } 952 953 void 954 sctp_fast_rexmit(sctp_t *sctp) 955 { 956 mblk_t *mp, *head; 957 int pktlen = 0; 958 sctp_faddr_t *fp = NULL; 959 sctp_stack_t *sctps = sctp->sctp_sctps; 960 961 ASSERT(sctp->sctp_xmit_head != NULL); 962 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 963 if (mp == NULL) { 964 SCTP_KSTAT(sctps, sctp_fr_not_found); 965 return; 966 } 967 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 968 freemsg(mp); 969 SCTP_KSTAT(sctps, sctp_fr_add_hdr); 970 return; 971 } 972 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 973 ipha_t *iph = (ipha_t *)head->b_rptr; 974 975 iph->ipha_fragment_offset_and_flags = 0; 976 } 977 978 sctp_set_iplen(sctp, head, fp->ixa); 979 (void) conn_ip_output(head, fp->ixa); 980 BUMP_LOCAL(sctp->sctp_opkts); 981 sctp->sctp_active = fp->lastactive = ddi_get_lbolt64(); 982 } 983 984 void 985 sctp_output(sctp_t *sctp, uint_t num_pkt) 986 { 987 mblk_t *mp = NULL; 988 mblk_t *nmp; 989 mblk_t *head; 990 mblk_t *meta = sctp->sctp_xmit_tail; 991 mblk_t *fill = NULL; 992 uint16_t chunklen; 993 uint32_t cansend; 994 int32_t seglen; 995 int32_t xtralen; 996 int32_t sacklen; 997 int32_t pad = 0; 998 int32_t pathmax; 999 int extra; 1000 int64_t now = ddi_get_lbolt64(); 1001 sctp_faddr_t *fp; 1002 sctp_faddr_t *lfp; 1003 sctp_data_hdr_t *sdc; 1004 int error; 1005 boolean_t notsent = B_TRUE; 1006 sctp_stack_t *sctps = sctp->sctp_sctps; 1007 uint32_t tsn; 1008 1009 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1010 sacklen = 0; 1011 } else { 1012 /* send a SACK chunk */ 1013 sacklen = sizeof (sctp_chunk_hdr_t) + 1014 sizeof (sctp_sack_chunk_t) + 1015 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1016 lfp = sctp->sctp_lastdata; 1017 ASSERT(lfp != NULL); 1018 if (lfp->state != SCTP_FADDRS_ALIVE) 1019 lfp = sctp->sctp_current; 1020 } 1021 1022 cansend = sctp->sctp_frwnd; 1023 if (sctp->sctp_unsent < cansend) 1024 cansend = sctp->sctp_unsent; 1025 1026 /* 1027 * Start persist timer if unable to send or when 1028 * trying to send into a zero window. This timer 1029 * ensures the blocked send attempt is retried. 1030 */ 1031 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1032 (sctp->sctp_unacked != 0) && 1033 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1034 !sctp->sctp_ndelay || 1035 (cansend == 0 && sctp->sctp_unacked == 0 && 1036 sctp->sctp_unsent != 0)) { 1037 head = NULL; 1038 fp = sctp->sctp_current; 1039 goto unsent_data; 1040 } 1041 if (meta != NULL) 1042 mp = meta->b_cont; 1043 while (cansend > 0 && num_pkt-- != 0) { 1044 pad = 0; 1045 1046 /* 1047 * Find first segment eligible for transmit. 1048 */ 1049 while (mp != NULL) { 1050 if (SCTP_CHUNK_CANSEND(mp)) 1051 break; 1052 mp = mp->b_next; 1053 } 1054 if (mp == NULL) { 1055 meta = sctp_get_msg_to_send(sctp, &mp, 1056 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1057 cansend, NULL); 1058 if (error != 0 || meta == NULL) { 1059 head = NULL; 1060 fp = sctp->sctp_current; 1061 goto unsent_data; 1062 } 1063 sctp->sctp_xmit_tail = meta; 1064 } 1065 1066 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1067 seglen = ntohs(sdc->sdh_len); 1068 xtralen = sizeof (*sdc); 1069 chunklen = seglen - xtralen; 1070 1071 /* 1072 * Check rwnd. 1073 */ 1074 if (chunklen > cansend) { 1075 head = NULL; 1076 fp = SCTP_CHUNK_DEST(meta); 1077 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1078 fp = sctp->sctp_current; 1079 goto unsent_data; 1080 } 1081 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1082 extra = SCTP_ALIGN - extra; 1083 1084 /* 1085 * Pick destination address, and check cwnd. 1086 */ 1087 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1088 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1089 /* 1090 * Only include SACK chunk if it can be bundled 1091 * with a data chunk, and sent to sctp_lastdata. 1092 */ 1093 pathmax = lfp->cwnd - lfp->suna; 1094 1095 fp = lfp; 1096 if ((nmp = dupmsg(mp)) == NULL) { 1097 head = NULL; 1098 goto unsent_data; 1099 } 1100 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1101 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1102 &error); 1103 if (head == NULL) { 1104 /* 1105 * If none of the source addresses are 1106 * available (i.e error == EHOSTUNREACH), 1107 * pretend we have sent the data. We will 1108 * eventually time out trying to retramsmit 1109 * the data if the interface never comes up. 1110 * If we have already sent some stuff (i.e., 1111 * notsent is B_FALSE) then we are fine, else 1112 * just mark this packet as sent. 1113 */ 1114 if (notsent && error == EHOSTUNREACH) { 1115 SCTP_CHUNK_SENT(sctp, mp, sdc, 1116 fp, chunklen, meta); 1117 } 1118 freemsg(nmp); 1119 SCTP_KSTAT(sctps, sctp_output_failed); 1120 goto unsent_data; 1121 } 1122 seglen += sacklen; 1123 xtralen += sacklen; 1124 sacklen = 0; 1125 } else { 1126 fp = SCTP_CHUNK_DEST(meta); 1127 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1128 fp = sctp->sctp_current; 1129 /* 1130 * If we haven't sent data to this destination for 1131 * a while, do slow start again. 1132 */ 1133 if (now - fp->lastactive > fp->rto) { 1134 SET_CWND(fp, fp->sfa_pmss, 1135 sctps->sctps_slow_start_after_idle); 1136 } 1137 1138 pathmax = fp->cwnd - fp->suna; 1139 if (seglen + extra > pathmax) { 1140 head = NULL; 1141 goto unsent_data; 1142 } 1143 if ((nmp = dupmsg(mp)) == NULL) { 1144 head = NULL; 1145 goto unsent_data; 1146 } 1147 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1148 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1149 if (head == NULL) { 1150 /* 1151 * If none of the source addresses are 1152 * available (i.e error == EHOSTUNREACH), 1153 * pretend we have sent the data. We will 1154 * eventually time out trying to retramsmit 1155 * the data if the interface never comes up. 1156 * If we have already sent some stuff (i.e., 1157 * notsent is B_FALSE) then we are fine, else 1158 * just mark this packet as sent. 1159 */ 1160 if (notsent && error == EHOSTUNREACH) { 1161 SCTP_CHUNK_SENT(sctp, mp, sdc, 1162 fp, chunklen, meta); 1163 } 1164 freemsg(nmp); 1165 SCTP_KSTAT(sctps, sctp_output_failed); 1166 goto unsent_data; 1167 } 1168 } 1169 fp->lastactive = now; 1170 if (pathmax > fp->sfa_pmss) 1171 pathmax = fp->sfa_pmss; 1172 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1173 mp = mp->b_next; 1174 1175 /* 1176 * Use this chunk to measure RTT? 1177 * Must not be a retransmision of an earlier chunk, 1178 * ensure the tsn is current. 1179 */ 1180 tsn = ntohl(sdc->sdh_tsn); 1181 if (sctp->sctp_out_time == 0 && tsn == (sctp->sctp_ltsn - 1)) { 1182 sctp->sctp_out_time = now; 1183 sctp->sctp_rtt_tsn = tsn; 1184 } 1185 if (extra > 0) { 1186 fill = sctp_get_padding(sctp, extra); 1187 if (fill != NULL) { 1188 linkb(head, fill); 1189 pad = extra; 1190 seglen += extra; 1191 } else { 1192 goto unsent_data; 1193 } 1194 } 1195 /* 1196 * Bundle chunks. We linkb() the chunks together to send 1197 * downstream in a single packet. 1198 * Partial chunks MUST NOT be bundled with full chunks, so we 1199 * rely on sctp_get_msg_to_send() to only return messages that 1200 * will fit entirely in the current packet. 1201 */ 1202 while (seglen < pathmax) { 1203 int32_t new_len; 1204 int32_t new_xtralen; 1205 1206 while (mp != NULL) { 1207 if (SCTP_CHUNK_CANSEND(mp)) 1208 break; 1209 mp = mp->b_next; 1210 } 1211 if (mp == NULL) { 1212 meta = sctp_get_msg_to_send(sctp, &mp, 1213 meta->b_next, &error, seglen, 1214 (seglen - xtralen) >= cansend ? 0 : 1215 cansend - seglen, fp); 1216 if (error != 0) 1217 break; 1218 /* If no more eligible chunks, cease bundling */ 1219 if (meta == NULL) 1220 break; 1221 sctp->sctp_xmit_tail = meta; 1222 } 1223 ASSERT(mp != NULL); 1224 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1225 fp != SCTP_CHUNK_DEST(meta)) { 1226 break; 1227 } 1228 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1229 chunklen = ntohs(sdc->sdh_len); 1230 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1231 extra = SCTP_ALIGN - extra; 1232 1233 new_len = seglen + chunklen; 1234 new_xtralen = xtralen + sizeof (*sdc); 1235 chunklen -= sizeof (*sdc); 1236 1237 if (new_len - new_xtralen > cansend || 1238 new_len + extra > pathmax) { 1239 break; 1240 } 1241 if ((nmp = dupmsg(mp)) == NULL) 1242 break; 1243 if (extra > 0) { 1244 fill = sctp_get_padding(sctp, extra); 1245 if (fill != NULL) { 1246 pad += extra; 1247 new_len += extra; 1248 linkb(nmp, fill); 1249 } else { 1250 freemsg(nmp); 1251 break; 1252 } 1253 } 1254 seglen = new_len; 1255 xtralen = new_xtralen; 1256 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1257 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1258 linkb(head, nmp); 1259 mp = mp->b_next; 1260 } 1261 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1262 ipha_t *iph = (ipha_t *)head->b_rptr; 1263 1264 /* 1265 * Path MTU is different from what we thought it would 1266 * be when we created chunks, or IP headers have grown. 1267 * Need to clear the DF bit. 1268 */ 1269 iph->ipha_fragment_offset_and_flags = 0; 1270 } 1271 /* xmit segment */ 1272 ASSERT(cansend >= seglen - pad - xtralen); 1273 cansend -= (seglen - pad - xtralen); 1274 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1275 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1276 seglen - xtralen, ntohl(sdc->sdh_tsn), 1277 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1278 cansend, sctp->sctp_lastack_rxd)); 1279 sctp_set_iplen(sctp, head, fp->ixa); 1280 (void) conn_ip_output(head, fp->ixa); 1281 BUMP_LOCAL(sctp->sctp_opkts); 1282 /* arm rto timer (if not set) */ 1283 if (!fp->timer_running) 1284 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1285 notsent = B_FALSE; 1286 } 1287 sctp->sctp_active = now; 1288 return; 1289 unsent_data: 1290 /* arm persist timer (if rto timer not set) */ 1291 if (!fp->timer_running) 1292 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1293 if (head != NULL) 1294 freemsg(head); 1295 } 1296 1297 /* 1298 * The following two functions initialize and destroy the cache 1299 * associated with the sets used for PR-SCTP. 1300 */ 1301 void 1302 sctp_ftsn_sets_init(void) 1303 { 1304 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1305 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1306 NULL, 0); 1307 } 1308 1309 void 1310 sctp_ftsn_sets_fini(void) 1311 { 1312 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1313 } 1314 1315 1316 /* Free PR-SCTP sets */ 1317 void 1318 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1319 { 1320 sctp_ftsn_set_t *p; 1321 1322 while (s != NULL) { 1323 p = s->next; 1324 s->next = NULL; 1325 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1326 s = p; 1327 } 1328 } 1329 1330 /* 1331 * Given a message meta block, meta, this routine creates or modifies 1332 * the set that will be used to generate a Forward TSN chunk. If the 1333 * entry for stream id, sid, for this message already exists, the 1334 * sequence number, ssn, is updated if it is greater than the existing 1335 * one. If an entry for this sid does not exist, one is created if 1336 * the size does not exceed fp->sfa_pmss. We return false in case 1337 * or an error. 1338 */ 1339 boolean_t 1340 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1341 uint_t *nsets, uint32_t *slen) 1342 { 1343 sctp_ftsn_set_t *p; 1344 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1345 uint16_t sid = htons(msg_hdr->smh_sid); 1346 /* msg_hdr->smh_ssn is already in NBO */ 1347 uint16_t ssn = msg_hdr->smh_ssn; 1348 1349 ASSERT(s != NULL && nsets != NULL); 1350 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1351 1352 if (*s == NULL) { 1353 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1354 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1355 if (*s == NULL) 1356 return (B_FALSE); 1357 (*s)->ftsn_entries.ftsn_sid = sid; 1358 (*s)->ftsn_entries.ftsn_ssn = ssn; 1359 (*s)->next = NULL; 1360 *nsets = 1; 1361 *slen += sizeof (uint32_t); 1362 return (B_TRUE); 1363 } 1364 for (p = *s; p->next != NULL; p = p->next) { 1365 if (p->ftsn_entries.ftsn_sid == sid) { 1366 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1367 p->ftsn_entries.ftsn_ssn = ssn; 1368 return (B_TRUE); 1369 } 1370 } 1371 /* the last one */ 1372 if (p->ftsn_entries.ftsn_sid == sid) { 1373 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1374 p->ftsn_entries.ftsn_ssn = ssn; 1375 } else { 1376 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1377 return (B_FALSE); 1378 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1379 KM_NOSLEEP); 1380 if (p->next == NULL) 1381 return (B_FALSE); 1382 p = p->next; 1383 p->ftsn_entries.ftsn_sid = sid; 1384 p->ftsn_entries.ftsn_ssn = ssn; 1385 p->next = NULL; 1386 (*nsets)++; 1387 *slen += sizeof (uint32_t); 1388 } 1389 return (B_TRUE); 1390 } 1391 1392 /* 1393 * Given a set of stream id - sequence number pairs, this routing creates 1394 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1395 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1396 * will add the IP/SCTP header. 1397 */ 1398 mblk_t * 1399 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1400 uint_t nsets, uint32_t seglen) 1401 { 1402 mblk_t *ftsn_mp; 1403 sctp_chunk_hdr_t *ch_hdr; 1404 uint32_t *advtsn; 1405 uint16_t schlen; 1406 size_t xtralen; 1407 ftsn_entry_t *ftsn_entry; 1408 sctp_stack_t *sctps = sctp->sctp_sctps; 1409 1410 seglen += sizeof (sctp_chunk_hdr_t); 1411 if (fp->isv4) 1412 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra; 1413 else 1414 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra; 1415 ftsn_mp = allocb(xtralen + seglen, BPRI_MED); 1416 if (ftsn_mp == NULL) 1417 return (NULL); 1418 ftsn_mp->b_rptr += xtralen; 1419 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1420 1421 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1422 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1423 ch_hdr->sch_flags = 0; 1424 /* 1425 * The cast here should not be an issue since seglen is 1426 * the length of the Forward TSN chunk. 1427 */ 1428 schlen = (uint16_t)seglen; 1429 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1430 1431 advtsn = (uint32_t *)(ch_hdr + 1); 1432 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1433 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1434 while (nsets > 0) { 1435 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1436 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1437 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1438 ftsn_entry++; 1439 sets = sets->next; 1440 nsets--; 1441 } 1442 return (ftsn_mp); 1443 } 1444 1445 /* 1446 * Given a starting message, the routine steps through all the 1447 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1448 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1449 * chunk. All the messages, that have chunks that are included in the 1450 * ftsn sets, are flagged abandonded. If a message is partially sent 1451 * and is deemed abandoned, all remaining unsent chunks are marked 1452 * abandoned and are deducted from sctp_unsent. 1453 */ 1454 void 1455 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1456 sctp_faddr_t *fp, uint32_t *seglen) 1457 { 1458 mblk_t *mp1 = mp; 1459 mblk_t *mp_head = mp; 1460 mblk_t *meta_head = meta; 1461 mblk_t *head; 1462 sctp_ftsn_set_t *sets = NULL; 1463 uint_t nsets = 0; 1464 uint16_t clen; 1465 sctp_data_hdr_t *sdc; 1466 uint32_t sacklen; 1467 uint32_t adv_pap = sctp->sctp_adv_pap; 1468 uint32_t unsent = 0; 1469 boolean_t ubit; 1470 sctp_stack_t *sctps = sctp->sctp_sctps; 1471 1472 *seglen = sizeof (uint32_t); 1473 1474 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1475 while (meta != NULL && 1476 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1477 /* 1478 * Skip adding FTSN sets for un-ordered messages as they do 1479 * not have SSNs. 1480 */ 1481 ubit = SCTP_DATA_GET_UBIT(sdc); 1482 if (!ubit && 1483 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1484 meta = NULL; 1485 sctp->sctp_adv_pap = adv_pap; 1486 goto ftsn_done; 1487 } 1488 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1489 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1490 adv_pap = ntohl(sdc->sdh_tsn); 1491 mp1 = mp1->b_next; 1492 } 1493 meta = meta->b_next; 1494 if (meta != NULL) { 1495 mp1 = meta->b_cont; 1496 if (!SCTP_CHUNK_ISSENT(mp1)) 1497 break; 1498 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1499 } 1500 } 1501 ftsn_done: 1502 /* 1503 * Can't compare with sets == NULL, since we don't add any 1504 * sets for un-ordered messages. 1505 */ 1506 if (meta == meta_head) 1507 return; 1508 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1509 sctp_free_ftsn_set(sets); 1510 if (*nmp == NULL) 1511 return; 1512 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1513 sacklen = 0; 1514 } else { 1515 sacklen = sizeof (sctp_chunk_hdr_t) + 1516 sizeof (sctp_sack_chunk_t) + 1517 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1518 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1519 /* piggybacked SACK doesn't fit */ 1520 sacklen = 0; 1521 } else { 1522 fp = sctp->sctp_lastdata; 1523 } 1524 } 1525 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1526 if (head == NULL) { 1527 freemsg(*nmp); 1528 *nmp = NULL; 1529 SCTP_KSTAT(sctps, sctp_send_ftsn_failed); 1530 return; 1531 } 1532 *seglen += sacklen; 1533 *nmp = head; 1534 1535 /* 1536 * XXXNeed to optimise this, the reason it is done here is so 1537 * that we don't have to undo in case of failure. 1538 */ 1539 mp1 = mp_head; 1540 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1541 while (meta_head != NULL && 1542 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1543 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1544 SCTP_MSG_SET_ABANDONED(meta_head); 1545 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1546 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1547 if (!SCTP_CHUNK_ISACKED(mp1)) { 1548 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1549 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1550 meta_head); 1551 } 1552 mp1 = mp1->b_next; 1553 } 1554 while (mp1 != NULL) { 1555 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1556 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1557 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1558 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1559 SCTP_ABANDON_CHUNK(mp1); 1560 } 1561 mp1 = mp1->b_next; 1562 } 1563 meta_head = meta_head->b_next; 1564 if (meta_head != NULL) { 1565 mp1 = meta_head->b_cont; 1566 if (!SCTP_CHUNK_ISSENT(mp1)) 1567 break; 1568 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1569 } 1570 } 1571 if (unsent > 0) { 1572 ASSERT(sctp->sctp_unsent >= unsent); 1573 sctp->sctp_unsent -= unsent; 1574 /* 1575 * Update ULP the amount of queued data, which is 1576 * sent-unack'ed + unsent. 1577 */ 1578 if (!SCTP_IS_DETACHED(sctp)) 1579 SCTP_TXQ_UPDATE(sctp); 1580 } 1581 } 1582 1583 /* 1584 * This function steps through messages starting at meta and checks if 1585 * the message is abandoned. It stops when it hits an unsent chunk or 1586 * a message that has all its chunk acked. This is the only place 1587 * where the sctp_adv_pap is moved forward to indicated abandoned 1588 * messages. 1589 */ 1590 void 1591 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1592 { 1593 uint32_t tsn = sctp->sctp_adv_pap; 1594 sctp_data_hdr_t *sdc; 1595 sctp_msg_hdr_t *msg_hdr; 1596 1597 ASSERT(mp != NULL); 1598 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1599 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1600 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1601 if (!SCTP_IS_MSG_ABANDONED(meta) && 1602 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1603 return; 1604 } 1605 while (meta != NULL) { 1606 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1607 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1608 tsn = ntohl(sdc->sdh_tsn); 1609 mp = mp->b_next; 1610 } 1611 if (mp != NULL) 1612 break; 1613 /* 1614 * We continue checking for successive messages only if there 1615 * is a chunk marked for retransmission. Else, we might 1616 * end up sending FTSN prematurely for chunks that have been 1617 * sent, but not yet acked. 1618 */ 1619 if ((meta = meta->b_next) != NULL) { 1620 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1621 if (!SCTP_IS_MSG_ABANDONED(meta) && 1622 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1623 break; 1624 } 1625 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1626 if (!SCTP_CHUNK_ISSENT(mp)) { 1627 sctp->sctp_adv_pap = tsn; 1628 return; 1629 } 1630 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1631 break; 1632 } 1633 if (mp == NULL) 1634 break; 1635 } 1636 } 1637 sctp->sctp_adv_pap = tsn; 1638 } 1639 1640 1641 /* 1642 * Determine if we should bundle a data chunk with the chunk being 1643 * retransmitted. We bundle if 1644 * 1645 * - the chunk is sent to the same destination and unack'ed. 1646 * 1647 * OR 1648 * 1649 * - the chunk is unsent, i.e. new data. 1650 */ 1651 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1652 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1653 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1654 !SCTP_CHUNK_ISACKED(mp))) || \ 1655 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1656 SCTP_CHUNK_FLAG_SENT))) 1657 1658 /* 1659 * Retransmit first segment which hasn't been acked with cumtsn or send 1660 * a Forward TSN chunk, if appropriate. 1661 */ 1662 void 1663 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1664 { 1665 mblk_t *mp; 1666 mblk_t *nmp = NULL; 1667 mblk_t *head; 1668 mblk_t *meta = sctp->sctp_xmit_head; 1669 mblk_t *fill; 1670 uint32_t seglen = 0; 1671 uint32_t sacklen; 1672 uint16_t chunklen; 1673 int extra; 1674 sctp_data_hdr_t *sdc; 1675 sctp_faddr_t *fp; 1676 uint32_t adv_pap = sctp->sctp_adv_pap; 1677 boolean_t do_ftsn = B_FALSE; 1678 boolean_t ftsn_check = B_TRUE; 1679 uint32_t first_ua_tsn; 1680 sctp_msg_hdr_t *mhdr; 1681 sctp_stack_t *sctps = sctp->sctp_sctps; 1682 int error; 1683 1684 while (meta != NULL) { 1685 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1686 uint32_t tsn; 1687 1688 if (!SCTP_CHUNK_ISSENT(mp)) 1689 goto window_probe; 1690 /* 1691 * We break in the following cases - 1692 * 1693 * if the advanced peer ack point includes the next 1694 * chunk to be retransmited - possibly the Forward 1695 * TSN was lost. 1696 * 1697 * if we are PRSCTP aware and the next chunk to be 1698 * retransmitted is now abandoned 1699 * 1700 * if the next chunk to be retransmitted is for 1701 * the dest on which the timer went off. (this 1702 * message is not abandoned). 1703 * 1704 * We check for Forward TSN only for the first 1705 * eligible chunk to be retransmitted. The reason 1706 * being if the first eligible chunk is skipped (say 1707 * it was sent to a destination other than oldfp) 1708 * then we cannot advance the cum TSN via Forward 1709 * TSN chunk. 1710 * 1711 * Also, ftsn_check is B_TRUE only for the first 1712 * eligible chunk, it will be B_FALSE for all 1713 * subsequent candidate messages for retransmission. 1714 */ 1715 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1716 tsn = ntohl(sdc->sdh_tsn); 1717 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1718 if (sctp->sctp_prsctp_aware && ftsn_check) { 1719 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1720 ASSERT(sctp->sctp_prsctp_aware); 1721 do_ftsn = B_TRUE; 1722 goto out; 1723 } else { 1724 sctp_check_adv_ack_pt(sctp, 1725 meta, mp); 1726 if (SEQ_GT(sctp->sctp_adv_pap, 1727 adv_pap)) { 1728 do_ftsn = B_TRUE; 1729 goto out; 1730 } 1731 } 1732 ftsn_check = B_FALSE; 1733 } 1734 if (SCTP_CHUNK_DEST(mp) == oldfp) 1735 goto out; 1736 } 1737 } 1738 meta = meta->b_next; 1739 if (meta != NULL && sctp->sctp_prsctp_aware) { 1740 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1741 1742 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1743 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1744 meta = meta->b_next; 1745 } 1746 } 1747 } 1748 window_probe: 1749 /* 1750 * Retransmit fired for a destination which didn't have 1751 * any unacked data pending. 1752 */ 1753 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1754 /* 1755 * Send a window probe. Inflate frwnd to allow 1756 * sending one segment. 1757 */ 1758 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1759 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1760 1761 /* next TSN to send */ 1762 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1763 1764 /* 1765 * The above sctp_frwnd adjustment is coarse. The "changed" 1766 * sctp_frwnd may allow us to send more than 1 packet. So 1767 * tell sctp_output() to send only 1 packet. 1768 */ 1769 sctp_output(sctp, 1); 1770 1771 /* Last sent TSN */ 1772 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1773 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1774 sctp->sctp_zero_win_probe = B_TRUE; 1775 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1776 } 1777 return; 1778 out: 1779 /* 1780 * After a time out, assume that everything has left the network. So 1781 * we can clear rxt_unacked for the original peer address. 1782 */ 1783 oldfp->rxt_unacked = 0; 1784 1785 /* 1786 * If we were probing for zero window, don't adjust retransmission 1787 * variables, but the timer is still backed off. 1788 */ 1789 if (sctp->sctp_zero_win_probe) { 1790 mblk_t *pkt; 1791 uint_t pkt_len; 1792 1793 /* 1794 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1795 * and sctp_rxt_maxtsn will specify the ZWP packet. 1796 */ 1797 fp = oldfp; 1798 if (oldfp->state != SCTP_FADDRS_ALIVE) 1799 fp = sctp_rotate_faddr(sctp, oldfp); 1800 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1801 if (pkt != NULL) { 1802 ASSERT(pkt_len <= fp->sfa_pmss); 1803 sctp_set_iplen(sctp, pkt, fp->ixa); 1804 (void) conn_ip_output(pkt, fp->ixa); 1805 BUMP_LOCAL(sctp->sctp_opkts); 1806 } else { 1807 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 1808 } 1809 1810 /* 1811 * The strikes will be clear by sctp_faddr_alive() when the 1812 * other side sends us an ack. 1813 */ 1814 oldfp->strikes++; 1815 sctp->sctp_strikes++; 1816 1817 SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max); 1818 if (oldfp != fp && oldfp->suna != 0) 1819 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1820 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1821 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1822 return; 1823 } 1824 1825 /* 1826 * Enter slowstart for this destination 1827 */ 1828 oldfp->ssthresh = oldfp->cwnd / 2; 1829 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1830 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1831 oldfp->cwnd = oldfp->sfa_pmss; 1832 oldfp->pba = 0; 1833 fp = sctp_rotate_faddr(sctp, oldfp); 1834 ASSERT(fp != NULL); 1835 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1836 1837 first_ua_tsn = ntohl(sdc->sdh_tsn); 1838 if (do_ftsn) { 1839 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1840 if (nmp == NULL) { 1841 sctp->sctp_adv_pap = adv_pap; 1842 goto restart_timer; 1843 } 1844 head = nmp; 1845 /* 1846 * Move to the next unabandoned chunk. XXXCheck if meta will 1847 * always be marked abandoned. 1848 */ 1849 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1850 meta = meta->b_next; 1851 if (meta != NULL) 1852 mp = mp->b_cont; 1853 else 1854 mp = NULL; 1855 goto try_bundle; 1856 } 1857 seglen = ntohs(sdc->sdh_len); 1858 chunklen = seglen - sizeof (*sdc); 1859 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1860 extra = SCTP_ALIGN - extra; 1861 1862 /* Find out if we need to piggyback SACK. */ 1863 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1864 sacklen = 0; 1865 } else { 1866 sacklen = sizeof (sctp_chunk_hdr_t) + 1867 sizeof (sctp_sack_chunk_t) + 1868 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1869 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1870 /* piggybacked SACK doesn't fit */ 1871 sacklen = 0; 1872 } else { 1873 /* 1874 * OK, we have room to send SACK back. But we 1875 * should send it back to the last fp where we 1876 * receive data from, unless sctp_lastdata equals 1877 * oldfp, then we should probably not send it 1878 * back to that fp. Also we should check that 1879 * the fp is alive. 1880 */ 1881 if (sctp->sctp_lastdata != oldfp && 1882 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1883 fp = sctp->sctp_lastdata; 1884 } 1885 } 1886 } 1887 1888 /* 1889 * Cancel RTT measurement if the retransmitted TSN is before the 1890 * TSN used for timimg. 1891 */ 1892 if (sctp->sctp_out_time != 0 && 1893 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1894 sctp->sctp_out_time = 0; 1895 } 1896 /* Clear the counter as the RTT calculation may be off. */ 1897 fp->rtt_updates = 0; 1898 oldfp->rtt_updates = 0; 1899 1900 /* 1901 * After a timeout, we should change the current faddr so that 1902 * new chunks will be sent to the alternate address. 1903 */ 1904 sctp_set_faddr_current(sctp, fp); 1905 1906 nmp = dupmsg(mp); 1907 if (nmp == NULL) 1908 goto restart_timer; 1909 if (extra > 0) { 1910 fill = sctp_get_padding(sctp, extra); 1911 if (fill != NULL) { 1912 linkb(nmp, fill); 1913 seglen += extra; 1914 } else { 1915 freemsg(nmp); 1916 goto restart_timer; 1917 } 1918 } 1919 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1920 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1921 if (head == NULL) { 1922 freemsg(nmp); 1923 SCTP_KSTAT(sctps, sctp_rexmit_failed); 1924 goto restart_timer; 1925 } 1926 seglen += sacklen; 1927 1928 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1929 1930 mp = mp->b_next; 1931 1932 try_bundle: 1933 /* We can at least and at most send 1 packet at timeout. */ 1934 while (seglen < fp->sfa_pmss) { 1935 int32_t new_len; 1936 1937 /* Go through the list to find more chunks to be bundled. */ 1938 while (mp != NULL) { 1939 /* Check if the chunk can be bundled. */ 1940 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1941 break; 1942 mp = mp->b_next; 1943 } 1944 /* Go to the next message. */ 1945 if (mp == NULL) { 1946 for (meta = meta->b_next; meta != NULL; 1947 meta = meta->b_next) { 1948 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1949 1950 if (SCTP_IS_MSG_ABANDONED(meta) || 1951 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1952 sctp)) { 1953 continue; 1954 } 1955 1956 mp = meta->b_cont; 1957 goto try_bundle; 1958 } 1959 /* 1960 * Check if there is a new message which potentially 1961 * could be bundled with this retransmission. 1962 */ 1963 meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error, 1964 seglen, fp->sfa_pmss - seglen, NULL); 1965 if (error != 0 || meta == NULL) { 1966 /* No more chunk to be bundled. */ 1967 break; 1968 } else { 1969 goto try_bundle; 1970 } 1971 } 1972 1973 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1974 new_len = ntohs(sdc->sdh_len); 1975 chunklen = new_len - sizeof (*sdc); 1976 1977 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1978 extra = SCTP_ALIGN - extra; 1979 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1980 break; 1981 if ((nmp = dupmsg(mp)) == NULL) 1982 break; 1983 1984 if (extra > 0) { 1985 fill = sctp_get_padding(sctp, extra); 1986 if (fill != NULL) { 1987 linkb(nmp, fill); 1988 } else { 1989 freemsg(nmp); 1990 break; 1991 } 1992 } 1993 linkb(head, nmp); 1994 1995 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1996 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1997 1998 seglen = new_len; 1999 mp = mp->b_next; 2000 } 2001 done_bundle: 2002 if ((seglen > fp->sfa_pmss) && fp->isv4) { 2003 ipha_t *iph = (ipha_t *)head->b_rptr; 2004 2005 /* 2006 * Path MTU is different from path we thought it would 2007 * be when we created chunks, or IP headers have grown. 2008 * Need to clear the DF bit. 2009 */ 2010 iph->ipha_fragment_offset_and_flags = 0; 2011 } 2012 fp->rxt_unacked += seglen; 2013 2014 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 2015 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 2016 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 2017 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 2018 2019 sctp->sctp_rexmitting = B_TRUE; 2020 sctp->sctp_rxt_nxttsn = first_ua_tsn; 2021 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 2022 sctp_set_iplen(sctp, head, fp->ixa); 2023 (void) conn_ip_output(head, fp->ixa); 2024 BUMP_LOCAL(sctp->sctp_opkts); 2025 2026 /* 2027 * Restart the oldfp timer with exponential backoff and 2028 * the new fp timer for the retransmitted chunks. 2029 */ 2030 restart_timer: 2031 oldfp->strikes++; 2032 sctp->sctp_strikes++; 2033 SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max); 2034 /* 2035 * If there is still some data in the oldfp, restart the 2036 * retransmission timer. If there is no data, the heartbeat will 2037 * continue to run so it will do its job in checking the reachability 2038 * of the oldfp. 2039 */ 2040 if (oldfp != fp && oldfp->suna != 0) 2041 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 2042 2043 /* 2044 * Should we restart the timer of the new fp? If there is 2045 * outstanding data to the new fp, the timer should be 2046 * running already. So restarting it means that the timer 2047 * will fire later for those outstanding data. But if 2048 * we don't restart it, the timer will fire too early for the 2049 * just retransmitted chunks to the new fp. The reason is that we 2050 * don't keep a timestamp on when a chunk is retransmitted. 2051 * So when the timer fires, it will just search for the 2052 * chunk with the earliest TSN sent to new fp. This probably 2053 * is the chunk we just retransmitted. So for now, let's 2054 * be conservative and restart the timer of the new fp. 2055 */ 2056 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2057 2058 sctp->sctp_active = ddi_get_lbolt64(); 2059 } 2060 2061 /* 2062 * This function is called by sctp_ss_rexmit() to create a packet 2063 * to be retransmitted to the given fp. The given meta and mp 2064 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2065 * first chunk to be retransmitted. This is also called when we want 2066 * to retransmit a zero window probe from sctp_rexmit() or when we 2067 * want to retransmit the zero window probe after the window has 2068 * opened from sctp_got_sack(). 2069 */ 2070 mblk_t * 2071 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2072 uint_t *packet_len) 2073 { 2074 uint32_t seglen = 0; 2075 uint16_t chunklen; 2076 int extra; 2077 mblk_t *nmp; 2078 mblk_t *head; 2079 mblk_t *fill; 2080 sctp_data_hdr_t *sdc; 2081 sctp_msg_hdr_t *mhdr; 2082 2083 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2084 seglen = ntohs(sdc->sdh_len); 2085 chunklen = seglen - sizeof (*sdc); 2086 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2087 extra = SCTP_ALIGN - extra; 2088 2089 nmp = dupmsg(*mp); 2090 if (nmp == NULL) 2091 return (NULL); 2092 if (extra > 0) { 2093 fill = sctp_get_padding(sctp, extra); 2094 if (fill != NULL) { 2095 linkb(nmp, fill); 2096 seglen += extra; 2097 } else { 2098 freemsg(nmp); 2099 return (NULL); 2100 } 2101 } 2102 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2103 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2104 if (head == NULL) { 2105 freemsg(nmp); 2106 return (NULL); 2107 } 2108 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2109 /* 2110 * Don't update the TSN if we are doing a Zero Win Probe. 2111 */ 2112 if (!sctp->sctp_zero_win_probe) 2113 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2114 *mp = (*mp)->b_next; 2115 2116 try_bundle: 2117 while (seglen < fp->sfa_pmss) { 2118 int32_t new_len; 2119 2120 /* 2121 * Go through the list to find more chunks to be bundled. 2122 * We should only retransmit sent by unack'ed chunks. Since 2123 * they were sent before, the peer's receive window should 2124 * be able to receive them. 2125 */ 2126 while (*mp != NULL) { 2127 /* Check if the chunk can be bundled. */ 2128 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2129 break; 2130 *mp = (*mp)->b_next; 2131 } 2132 /* Go to the next message. */ 2133 if (*mp == NULL) { 2134 for (*meta = (*meta)->b_next; *meta != NULL; 2135 *meta = (*meta)->b_next) { 2136 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2137 2138 if (SCTP_IS_MSG_ABANDONED(*meta) || 2139 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2140 sctp)) { 2141 continue; 2142 } 2143 2144 *mp = (*meta)->b_cont; 2145 goto try_bundle; 2146 } 2147 /* No more chunk to be bundled. */ 2148 break; 2149 } 2150 2151 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2152 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2153 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2154 break; 2155 new_len = ntohs(sdc->sdh_len); 2156 chunklen = new_len - sizeof (*sdc); 2157 2158 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2159 extra = SCTP_ALIGN - extra; 2160 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2161 break; 2162 if ((nmp = dupmsg(*mp)) == NULL) 2163 break; 2164 2165 if (extra > 0) { 2166 fill = sctp_get_padding(sctp, extra); 2167 if (fill != NULL) { 2168 linkb(nmp, fill); 2169 } else { 2170 freemsg(nmp); 2171 break; 2172 } 2173 } 2174 linkb(head, nmp); 2175 2176 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2177 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2178 /* 2179 * Don't update the TSN if we are doing a Zero Win Probe. 2180 */ 2181 if (!sctp->sctp_zero_win_probe) 2182 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2183 2184 seglen = new_len; 2185 *mp = (*mp)->b_next; 2186 } 2187 *packet_len = seglen; 2188 fp->rxt_unacked += seglen; 2189 return (head); 2190 } 2191 2192 /* 2193 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2194 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2195 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2196 * SACK. We retransmit unacked chunks without having to wait for another 2197 * timeout. The rationale is that the SACK should not be "partial" if all the 2198 * lost chunks have been retransmitted. Since the SACK is "partial," 2199 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2200 * be missing. It is better for us to retransmit them now instead 2201 * of waiting for a timeout. 2202 */ 2203 void 2204 sctp_ss_rexmit(sctp_t *sctp) 2205 { 2206 mblk_t *meta; 2207 mblk_t *mp; 2208 mblk_t *pkt; 2209 sctp_faddr_t *fp; 2210 uint_t pkt_len; 2211 uint32_t tot_wnd; 2212 sctp_data_hdr_t *sdc; 2213 int burst; 2214 sctp_stack_t *sctps = sctp->sctp_sctps; 2215 2216 ASSERT(!sctp->sctp_zero_win_probe); 2217 2218 /* 2219 * If the last cum ack is smaller than what we have just 2220 * retransmitted, simply return. 2221 */ 2222 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2223 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2224 else 2225 return; 2226 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2227 2228 /* 2229 * After a timer fires, sctp_current should be set to the new 2230 * fp where the retransmitted chunks are sent. 2231 */ 2232 fp = sctp->sctp_current; 2233 2234 /* 2235 * Since we are retransmitting, we only need to use cwnd to determine 2236 * how much we can send as we were allowed (by peer's receive window) 2237 * to send those retransmitted chunks previously when they are first 2238 * sent. If we record how much we have retransmitted but 2239 * unacknowledged using rxt_unacked, then the amount we can now send 2240 * is equal to cwnd minus rxt_unacked. 2241 * 2242 * The field rxt_unacked is incremented when we retransmit a packet 2243 * and decremented when we got a SACK acknowledging something. And 2244 * it is reset when the retransmission timer fires as we assume that 2245 * all packets have left the network after a timeout. If this 2246 * assumption is not true, it means that after a timeout, we can 2247 * get a SACK acknowledging more than rxt_unacked (its value only 2248 * contains what is retransmitted when the timer fires). So 2249 * rxt_unacked will become very big (it is an unsiged int so going 2250 * negative means that the value is huge). This is the reason we 2251 * always send at least 1 MSS bytes. 2252 * 2253 * The reason why we do not have an accurate count is that we 2254 * only know how many packets are outstanding (using the TSN numbers). 2255 * But we do not know how many bytes those packets contain. To 2256 * have an accurate count, we need to walk through the send list. 2257 * As it is not really important to have an accurate count during 2258 * retransmission, we skip this walk to save some time. This should 2259 * not make the retransmission too aggressive to cause congestion. 2260 */ 2261 if (fp->cwnd <= fp->rxt_unacked) 2262 tot_wnd = fp->sfa_pmss; 2263 else 2264 tot_wnd = fp->cwnd - fp->rxt_unacked; 2265 2266 /* Find the first unack'ed chunk */ 2267 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2268 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2269 2270 if (SCTP_IS_MSG_ABANDONED(meta) || 2271 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2272 continue; 2273 } 2274 2275 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2276 /* Again, this may not be possible */ 2277 if (!SCTP_CHUNK_ISSENT(mp)) 2278 return; 2279 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2280 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2281 goto found_msg; 2282 } 2283 } 2284 2285 /* Everything is abandoned... */ 2286 return; 2287 2288 found_msg: 2289 if (!fp->timer_running) 2290 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2291 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2292 if (pkt == NULL) { 2293 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 2294 return; 2295 } 2296 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2297 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2298 2299 /* 2300 * Path MTU is different from path we thought it would 2301 * be when we created chunks, or IP headers have grown. 2302 * Need to clear the DF bit. 2303 */ 2304 iph->ipha_fragment_offset_and_flags = 0; 2305 } 2306 sctp_set_iplen(sctp, pkt, fp->ixa); 2307 (void) conn_ip_output(pkt, fp->ixa); 2308 BUMP_LOCAL(sctp->sctp_opkts); 2309 2310 /* Check and see if there is more chunk to be retransmitted. */ 2311 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2312 meta == NULL) 2313 return; 2314 if (mp == NULL) 2315 meta = meta->b_next; 2316 if (meta == NULL) 2317 return; 2318 2319 /* Retransmit another packet if the window allows. */ 2320 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1; 2321 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2322 if (mp == NULL) 2323 mp = meta->b_cont; 2324 for (; mp != NULL; mp = mp->b_next) { 2325 /* Again, this may not be possible */ 2326 if (!SCTP_CHUNK_ISSENT(mp)) 2327 return; 2328 if (!SCTP_CHUNK_ISACKED(mp)) 2329 goto found_msg; 2330 } 2331 } 2332 } 2333