1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/stream.h> 30 #include <sys/cmn_err.h> 31 #define _SUN_TPI_VERSION 2 32 #include <sys/tihdr.h> 33 #include <sys/socket.h> 34 #include <sys/stropts.h> 35 #include <sys/strsun.h> 36 #include <sys/strsubr.h> 37 #include <sys/socketvar.h> 38 #include <inet/common.h> 39 #include <inet/mi.h> 40 #include <inet/ip.h> 41 #include <inet/ip6.h> 42 #include <inet/sctp_ip.h> 43 #include <inet/ipclassifier.h> 44 45 /* 46 * PR-SCTP comments. 47 * 48 * A message can expire before it gets to the transmit list (i.e. it is still 49 * in the unsent list - unchunked), after it gets to the transmit list, but 50 * before transmission has actually started, or after transmission has begun. 51 * Accordingly, we check for the status of a message in sctp_chunkify() when 52 * the message is being transferred from the unsent list to the transmit list; 53 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 54 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 55 * When we nuke a message in sctp_chunkify(), all we need to do is take it 56 * out of the unsent list and update sctp_unsent; when a message is deemed 57 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 58 * list, update sctp_unsent IFF transmission for the message has not yet begun 59 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 60 * message has started, then we cannot just take it out of the list, we need 61 * to send Forward TSN chunk to the peer so that the peer can clear its 62 * fragment list for this message. However, we cannot just send the Forward 63 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 64 * messages preceeding this abandoned message. So, we send a Forward TSN 65 * IFF all messages prior to this abandoned message has been SACKd, if not 66 * we defer sending the Forward TSN to sctp_cumack(), which will check for 67 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 68 * sctp_rexmit() when we check for retransmissions, we need to determine if 69 * the advanced peer ack point can be moved ahead, and if so, send a Forward 70 * TSN to the peer instead of retransmitting the chunk. Note that when 71 * we send a Forward TSN for a message, there may be yet unsent chunks for 72 * this message; we need to mark all such chunks as abandoned, so that 73 * sctp_cumack() can take the message out of the transmit list, additionally 74 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 75 * decremented when a message/chunk is deemed abandoned), sockfs needs to 76 * be notified so that it can adjust its idea of the queued message. 77 */ 78 79 #include "sctp_impl.h" 80 81 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 82 static mblk_t *sctp_chunkify(sctp_t *, int, int, int); 83 84 #ifdef DEBUG 85 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 86 #endif 87 88 /* 89 * Called to allocate a header mblk when sending data to SCTP. 90 * Data will follow in b_cont of this mblk. 91 */ 92 mblk_t * 93 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 94 int flags) 95 { 96 mblk_t *mp; 97 struct T_unitdata_req *tudr; 98 size_t size; 99 int error; 100 101 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 102 size = MAX(size, sizeof (sctp_msg_hdr_t)); 103 if (flags & SCTP_CAN_BLOCK) { 104 mp = allocb_wait(size, BPRI_MED, 0, &error); 105 } else { 106 mp = allocb(size, BPRI_MED); 107 } 108 if (mp) { 109 tudr = (struct T_unitdata_req *)mp->b_rptr; 110 tudr->PRIM_type = T_UNITDATA_REQ; 111 tudr->DEST_length = nlen; 112 tudr->DEST_offset = sizeof (*tudr); 113 tudr->OPT_length = clen; 114 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 115 _TPI_ALIGN_TOPT(nlen)); 116 if (nlen > 0) 117 bcopy(name, tudr + 1, nlen); 118 if (clen > 0) 119 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 120 mp->b_wptr += (tudr ->OPT_offset + clen); 121 mp->b_datap->db_type = M_PROTO; 122 } 123 return (mp); 124 } 125 126 /*ARGSUSED2*/ 127 int 128 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 129 { 130 sctp_faddr_t *fp = NULL; 131 struct T_unitdata_req *tudr; 132 int error = 0; 133 mblk_t *mproto = mp; 134 in6_addr_t *addr; 135 in6_addr_t tmpaddr; 136 uint16_t sid = sctp->sctp_def_stream; 137 uint32_t ppid = sctp->sctp_def_ppid; 138 uint32_t context = sctp->sctp_def_context; 139 uint16_t msg_flags = sctp->sctp_def_flags; 140 sctp_msg_hdr_t *sctp_msg_hdr; 141 uint32_t msg_len = 0; 142 uint32_t timetolive = sctp->sctp_def_timetolive; 143 144 ASSERT(DB_TYPE(mproto) == M_PROTO); 145 146 mp = mp->b_cont; 147 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 148 149 tudr = (struct T_unitdata_req *)mproto->b_rptr; 150 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 151 152 /* Get destination address, if specified */ 153 if (tudr->DEST_length > 0) { 154 sin_t *sin; 155 sin6_t *sin6; 156 157 sin = (struct sockaddr_in *) 158 (mproto->b_rptr + tudr->DEST_offset); 159 switch (sin->sin_family) { 160 case AF_INET: 161 if (tudr->DEST_length < sizeof (*sin)) { 162 return (EINVAL); 163 } 164 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 165 addr = &tmpaddr; 166 break; 167 case AF_INET6: 168 if (tudr->DEST_length < sizeof (*sin6)) { 169 return (EINVAL); 170 } 171 sin6 = (struct sockaddr_in6 *) 172 (mproto->b_rptr + tudr->DEST_offset); 173 addr = &sin6->sin6_addr; 174 break; 175 default: 176 return (EAFNOSUPPORT); 177 } 178 fp = sctp_lookup_faddr(sctp, addr); 179 if (fp == NULL) { 180 return (EINVAL); 181 } 182 } 183 /* Ancillary Data? */ 184 if (tudr->OPT_length > 0) { 185 struct cmsghdr *cmsg; 186 char *cend; 187 struct sctp_sndrcvinfo *sndrcv; 188 189 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 190 cend = ((char *)cmsg + tudr->OPT_length); 191 ASSERT(cend <= (char *)mproto->b_wptr); 192 193 for (;;) { 194 if ((char *)(cmsg + 1) > cend || 195 ((char *)cmsg + cmsg->cmsg_len) > cend) { 196 break; 197 } 198 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 199 (cmsg->cmsg_type == SCTP_SNDRCV)) { 200 if (cmsg->cmsg_len < 201 (sizeof (*sndrcv) + sizeof (*cmsg))) { 202 return (EINVAL); 203 } 204 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 205 sid = sndrcv->sinfo_stream; 206 msg_flags = sndrcv->sinfo_flags; 207 ppid = sndrcv->sinfo_ppid; 208 context = sndrcv->sinfo_context; 209 timetolive = sndrcv->sinfo_timetolive; 210 break; 211 } 212 if (cmsg->cmsg_len > 0) 213 cmsg = CMSG_NEXT(cmsg); 214 else 215 break; 216 } 217 } 218 if (msg_flags & MSG_ABORT) { 219 if (mp && mp->b_cont) { 220 mblk_t *pump = msgpullup(mp, -1); 221 if (!pump) { 222 return (ENOMEM); 223 } 224 freemsg(mp); 225 mp = pump; 226 mproto->b_cont = mp; 227 } 228 RUN_SCTP(sctp); 229 sctp_user_abort(sctp, mp); 230 freemsg(mproto); 231 goto process_sendq; 232 } 233 if (mp == NULL) 234 goto done; 235 236 RUN_SCTP(sctp); 237 238 /* Reject any new data requests if we are shutting down */ 239 if (sctp->sctp_state > SCTPS_ESTABLISHED || 240 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) { 241 error = EPIPE; 242 goto unlock_done; 243 } 244 245 /* Re-use the mproto to store relevant info. */ 246 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 247 248 mproto->b_rptr = mproto->b_datap->db_base; 249 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 250 251 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 252 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 253 sctp_msg_hdr->smh_context = context; 254 sctp_msg_hdr->smh_sid = sid; 255 sctp_msg_hdr->smh_ppid = ppid; 256 sctp_msg_hdr->smh_flags = msg_flags; 257 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 258 sctp_msg_hdr->smh_tob = lbolt64; 259 for (; mp != NULL; mp = mp->b_cont) 260 msg_len += MBLKL(mp); 261 sctp_msg_hdr->smh_msglen = msg_len; 262 263 /* User requested specific destination */ 264 SCTP_SET_CHUNK_DEST(mproto, fp); 265 266 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 267 sid >= sctp->sctp_num_ostr) { 268 /* Send sendfail event */ 269 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 270 B_FALSE); 271 error = EINVAL; 272 goto unlock_done; 273 } 274 275 /* no data */ 276 if (msg_len == 0) { 277 sctp_sendfail_event(sctp, dupmsg(mproto), 278 SCTP_ERR_NO_USR_DATA, B_FALSE); 279 error = EINVAL; 280 goto unlock_done; 281 } 282 283 /* Add it to the unsent list */ 284 if (sctp->sctp_xmit_unsent == NULL) { 285 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 286 } else { 287 sctp->sctp_xmit_unsent_tail->b_next = mproto; 288 sctp->sctp_xmit_unsent_tail = mproto; 289 } 290 sctp->sctp_unsent += msg_len; 291 BUMP_LOCAL(sctp->sctp_msgcount); 292 /* 293 * Notify sockfs if the tx queue is full. 294 */ 295 if (SCTP_TXQ_LEN(sctp) >= sctp->sctp_xmit_hiwater) { 296 sctp->sctp_txq_full = 1; 297 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE); 298 } 299 if (sctp->sctp_state == SCTPS_ESTABLISHED) 300 sctp_output(sctp, UINT_MAX); 301 process_sendq: 302 WAKE_SCTP(sctp); 303 sctp_process_sendq(sctp); 304 return (0); 305 unlock_done: 306 WAKE_SCTP(sctp); 307 done: 308 return (error); 309 } 310 311 /* 312 * While there are messages on sctp_xmit_unsent, detach each one. For each: 313 * allocate space for the chunk header, fill in the data chunk, and fill in 314 * the chunk header. Then append it to sctp_xmit_tail. 315 * Return after appending as many bytes as required (bytes_to_send). 316 * We also return if we've appended one or more chunks, and find a subsequent 317 * unsent message is too big to fit in the segment. 318 */ 319 mblk_t * 320 sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send) 321 { 322 mblk_t *mp; 323 mblk_t *chunk_mp; 324 mblk_t *chunk_head; 325 mblk_t *chunk_hdr; 326 mblk_t *chunk_tail = NULL; 327 int count; 328 int chunksize; 329 sctp_data_hdr_t *sdc; 330 mblk_t *mdblk = sctp->sctp_xmit_unsent; 331 sctp_faddr_t *fp; 332 sctp_faddr_t *fp1; 333 size_t xtralen; 334 sctp_msg_hdr_t *msg_hdr; 335 sctp_stack_t *sctps = sctp->sctp_sctps; 336 sctp_msg_hdr_t *next_msg_hdr; 337 size_t nextlen; 338 int remaining_len = mss - firstseg_len; 339 340 ASSERT(remaining_len >= 0); 341 342 fp = SCTP_CHUNK_DEST(mdblk); 343 if (fp == NULL) 344 fp = sctp->sctp_current; 345 if (fp->isv4) 346 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra + 347 sizeof (*sdc); 348 else 349 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra + 350 sizeof (*sdc); 351 count = chunksize = remaining_len - sizeof (*sdc); 352 nextmsg: 353 next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr; 354 nextlen = next_msg_hdr->smh_msglen; 355 /* 356 * Will the entire next message fit in the current packet ? 357 * if not, leave it on the unsent list. 358 */ 359 if ((firstseg_len != 0) && (nextlen > remaining_len)) 360 return (NULL); 361 362 chunk_mp = mdblk->b_cont; 363 364 /* 365 * If this partially chunked, we ignore the next one for now and 366 * use the one already present. For the unchunked bits, we use the 367 * length of the last chunk. 368 */ 369 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 370 int chunk_len; 371 372 ASSERT(chunk_mp->b_next != NULL); 373 mdblk->b_cont = chunk_mp->b_next; 374 chunk_mp->b_next = NULL; 375 SCTP_MSG_CLEAR_CHUNKED(mdblk); 376 mp = mdblk->b_cont; 377 while (mp->b_next != NULL) 378 mp = mp->b_next; 379 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 380 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 381 count = chunksize = fp->sfa_pmss - chunk_len; 382 else 383 count = chunksize = fp->sfa_pmss; 384 count = chunksize = count - sizeof (*sdc); 385 } else { 386 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 387 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 388 sctp->sctp_xmit_unsent = mdblk->b_next; 389 if (sctp->sctp_xmit_unsent == NULL) 390 sctp->sctp_xmit_unsent_tail = NULL; 391 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 392 sctp->sctp_unsent -= msg_hdr->smh_msglen; 393 mdblk->b_next = NULL; 394 BUMP_LOCAL(sctp->sctp_prsctpdrop); 395 /* 396 * Update ULP the amount of queued data, which is 397 * sent-unack'ed + unsent. 398 */ 399 if (!SCTP_IS_DETACHED(sctp)) 400 SCTP_TXQ_UPDATE(sctp); 401 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 402 goto try_next; 403 } 404 mdblk->b_cont = NULL; 405 } 406 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 407 nextchunk: 408 chunk_head = chunk_mp; 409 chunk_tail = NULL; 410 411 /* Skip as many mblk's as we need */ 412 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 413 count -= MBLKL(chunk_mp); 414 chunk_tail = chunk_mp; 415 chunk_mp = chunk_mp->b_cont; 416 } 417 /* Split the chain, if needed */ 418 if (chunk_mp != NULL) { 419 if (count > 0) { 420 mblk_t *split_mp = dupb(chunk_mp); 421 422 if (split_mp == NULL) { 423 if (mdblk->b_cont == NULL) { 424 mdblk->b_cont = chunk_head; 425 } else { 426 SCTP_MSG_SET_CHUNKED(mdblk); 427 ASSERT(chunk_head->b_next == NULL); 428 chunk_head->b_next = mdblk->b_cont; 429 mdblk->b_cont = chunk_head; 430 } 431 return (sctp->sctp_xmit_tail); 432 } 433 if (chunk_tail != NULL) { 434 chunk_tail->b_cont = split_mp; 435 chunk_tail = chunk_tail->b_cont; 436 } else { 437 chunk_head = chunk_tail = split_mp; 438 } 439 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 440 chunk_mp->b_rptr = chunk_tail->b_wptr; 441 count = 0; 442 } else if (chunk_tail == NULL) { 443 goto next; 444 } else { 445 chunk_tail->b_cont = NULL; 446 } 447 } 448 /* Alloc chunk hdr, if needed */ 449 if (DB_REF(chunk_head) > 1 || 450 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 451 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 452 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 453 if (mdblk->b_cont == NULL) { 454 if (chunk_mp != NULL) 455 linkb(chunk_head, chunk_mp); 456 mdblk->b_cont = chunk_head; 457 } else { 458 SCTP_MSG_SET_CHUNKED(mdblk); 459 if (chunk_mp != NULL) 460 linkb(chunk_head, chunk_mp); 461 ASSERT(chunk_head->b_next == NULL); 462 chunk_head->b_next = mdblk->b_cont; 463 mdblk->b_cont = chunk_head; 464 } 465 return (sctp->sctp_xmit_tail); 466 } 467 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 468 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 469 chunk_hdr->b_cont = chunk_head; 470 } else { 471 chunk_hdr = chunk_head; 472 chunk_hdr->b_rptr -= sizeof (*sdc); 473 } 474 ASSERT(chunk_hdr->b_datap->db_ref == 1); 475 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 476 sdc->sdh_id = CHUNK_DATA; 477 sdc->sdh_flags = 0; 478 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 479 ASSERT(sdc->sdh_len); 480 sdc->sdh_sid = htons(msg_hdr->smh_sid); 481 /* 482 * We defer assigning the SSN just before sending the chunk, else 483 * if we drop the chunk in sctp_get_msg_to_send(), we would need 484 * to send a Forward TSN to let the peer know. Some more comments 485 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 486 */ 487 sdc->sdh_payload_id = msg_hdr->smh_ppid; 488 489 if (mdblk->b_cont == NULL) { 490 mdblk->b_cont = chunk_hdr; 491 SCTP_DATA_SET_BBIT(sdc); 492 } else { 493 mp = mdblk->b_cont; 494 while (mp->b_next != NULL) 495 mp = mp->b_next; 496 mp->b_next = chunk_hdr; 497 } 498 499 bytes_to_send -= (chunksize - count); 500 if (chunk_mp != NULL) { 501 next: 502 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 503 goto nextchunk; 504 } 505 SCTP_DATA_SET_EBIT(sdc); 506 sctp->sctp_xmit_unsent = mdblk->b_next; 507 if (mdblk->b_next == NULL) { 508 sctp->sctp_xmit_unsent_tail = NULL; 509 } 510 mdblk->b_next = NULL; 511 512 if (sctp->sctp_xmit_tail == NULL) { 513 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 514 } else { 515 mp = sctp->sctp_xmit_tail; 516 while (mp->b_next != NULL) 517 mp = mp->b_next; 518 mp->b_next = mdblk; 519 mdblk->b_prev = mp; 520 } 521 try_next: 522 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 523 mdblk = sctp->sctp_xmit_unsent; 524 fp1 = SCTP_CHUNK_DEST(mdblk); 525 if (fp1 == NULL) 526 fp1 = sctp->sctp_current; 527 if (fp == fp1) { 528 size_t len = MBLKL(mdblk->b_cont); 529 if ((count > 0) && 530 ((len > fp->sfa_pmss - sizeof (*sdc)) || 531 (len <= count))) { 532 count -= sizeof (*sdc); 533 count = chunksize = count - (count & 0x3); 534 } else { 535 count = chunksize = fp->sfa_pmss - 536 sizeof (*sdc); 537 } 538 } else { 539 if (fp1->isv4) 540 xtralen = sctp->sctp_hdr_len; 541 else 542 xtralen = sctp->sctp_hdr6_len; 543 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc); 544 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 545 fp = fp1; 546 } 547 goto nextmsg; 548 } 549 return (sctp->sctp_xmit_tail); 550 } 551 552 void 553 sctp_free_msg(mblk_t *ump) 554 { 555 mblk_t *mp, *nmp; 556 557 for (mp = ump->b_cont; mp; mp = nmp) { 558 nmp = mp->b_next; 559 mp->b_next = mp->b_prev = NULL; 560 freemsg(mp); 561 } 562 ASSERT(!ump->b_prev); 563 ump->b_next = NULL; 564 freeb(ump); 565 } 566 567 mblk_t * 568 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 569 int *error) 570 { 571 int hdrlen; 572 char *hdr; 573 int isv4 = fp->isv4; 574 sctp_stack_t *sctps = sctp->sctp_sctps; 575 576 if (error != NULL) 577 *error = 0; 578 579 if (isv4) { 580 hdrlen = sctp->sctp_hdr_len; 581 hdr = sctp->sctp_iphc; 582 } else { 583 hdrlen = sctp->sctp_hdr6_len; 584 hdr = sctp->sctp_iphc6; 585 } 586 /* 587 * A null fp->ire could mean that the address is 'down'. Similarly, 588 * it is possible that the address went down, we tried to send an 589 * heartbeat and ended up setting fp->saddr as unspec because we 590 * didn't have any usable source address. In either case 591 * sctp_get_ire() will try find an IRE, if available, and set 592 * the source address, if needed. If we still don't have any 593 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 594 * we return EHOSTUNREACH. 595 */ 596 if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 597 sctp_get_ire(sctp, fp); 598 if (fp->state == SCTP_FADDRS_UNREACH) { 599 if (error != NULL) 600 *error = EHOSTUNREACH; 601 return (NULL); 602 } 603 } 604 /* Copy in IP header. */ 605 if ((mp->b_rptr - mp->b_datap->db_base) < 606 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || 607 !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { 608 mblk_t *nmp; 609 610 /* 611 * This can happen if IP headers are adjusted after 612 * data was moved into chunks, or during retransmission, 613 * or things like snoop is running. 614 */ 615 nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen, 616 CONN_CRED(sctp->sctp_connp), sctp->sctp_cpid); 617 if (nmp == NULL) { 618 if (error != NULL) 619 *error = ENOMEM; 620 return (NULL); 621 } 622 nmp->b_rptr += sctps->sctps_wroff_xtra; 623 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 624 nmp->b_cont = mp; 625 mp = nmp; 626 } else { 627 mp->b_rptr -= (hdrlen + sacklen); 628 mblk_setcred(mp, CONN_CRED(sctp->sctp_connp), sctp->sctp_cpid); 629 } 630 bcopy(hdr, mp->b_rptr, hdrlen); 631 if (sacklen) { 632 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 633 } 634 if (fp != sctp->sctp_current) { 635 /* change addresses in header */ 636 if (isv4) { 637 ipha_t *iph = (ipha_t *)mp->b_rptr; 638 639 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 640 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 641 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 642 iph->ipha_src); 643 } else if (sctp->sctp_bound_to_all) { 644 iph->ipha_src = INADDR_ANY; 645 } 646 } else { 647 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 648 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 649 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 650 } else if (sctp->sctp_bound_to_all) { 651 V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src); 652 } 653 } 654 } 655 /* 656 * IP will not free this IRE if it is condemned. SCTP needs to 657 * free it. 658 */ 659 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 660 IRE_REFRELE_NOTR(fp->ire); 661 fp->ire = NULL; 662 } 663 664 /* Stash the conn and ire ptr info for IP */ 665 SCTP_STASH_IPINFO(mp, fp->ire); 666 667 return (mp); 668 } 669 670 /* 671 * SCTP requires every chunk to be padded so that the total length 672 * is a multiple of SCTP_ALIGN. This function returns a mblk with 673 * the specified pad length. 674 */ 675 static mblk_t * 676 sctp_get_padding(sctp_t *sctp, int pad) 677 { 678 mblk_t *fill; 679 680 ASSERT(pad < SCTP_ALIGN); 681 ASSERT(sctp->sctp_pad_mp != NULL); 682 if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) { 683 fill->b_wptr += pad; 684 return (fill); 685 } 686 687 /* 688 * The memory saving path of reusing the sctp_pad_mp 689 * fails may be because it has been dupb() too 690 * many times (DBLK_REFMAX). Use the memory consuming 691 * path of allocating the pad mblk. 692 */ 693 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 694 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 695 *(int32_t *)fill->b_rptr = 0; 696 fill->b_wptr += pad; 697 } 698 return (fill); 699 } 700 701 static mblk_t * 702 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 703 { 704 mblk_t *meta; 705 mblk_t *start_mp = NULL; 706 mblk_t *end_mp = NULL; 707 mblk_t *mp, *nmp; 708 mblk_t *fill; 709 sctp_data_hdr_t *sdh; 710 int msglen; 711 int extra; 712 sctp_msg_hdr_t *msg_hdr; 713 sctp_faddr_t *old_fp = NULL; 714 sctp_faddr_t *chunk_fp; 715 sctp_stack_t *sctps = sctp->sctp_sctps; 716 717 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 718 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 719 if (SCTP_IS_MSG_ABANDONED(meta) || 720 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 721 continue; 722 } 723 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 724 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 725 /* 726 * Use the same peer address to do fast 727 * retransmission. If the original peer 728 * address is dead, switch to the current 729 * one. Record the old one so that we 730 * will pick the chunks sent to the old 731 * one for fast retransmission. 732 */ 733 chunk_fp = SCTP_CHUNK_DEST(mp); 734 if (*fp == NULL) { 735 *fp = chunk_fp; 736 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 737 old_fp = *fp; 738 *fp = sctp->sctp_current; 739 } 740 } else if (old_fp == NULL && *fp != chunk_fp) { 741 continue; 742 } else if (old_fp != NULL && 743 old_fp != chunk_fp) { 744 continue; 745 } 746 747 sdh = (sctp_data_hdr_t *)mp->b_rptr; 748 msglen = ntohs(sdh->sdh_len); 749 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 750 extra = SCTP_ALIGN - extra; 751 } 752 753 /* 754 * We still return at least the first message 755 * even if that message cannot fit in as 756 * PMTU may have changed. 757 */ 758 if (*total + msglen + extra > 759 (*fp)->sfa_pmss && start_mp != NULL) { 760 return (start_mp); 761 } 762 if ((nmp = dupmsg(mp)) == NULL) 763 return (start_mp); 764 if (extra > 0) { 765 fill = sctp_get_padding(sctp, extra); 766 if (fill != NULL) { 767 linkb(nmp, fill); 768 } else { 769 return (start_mp); 770 } 771 } 772 BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); 773 BUMP_LOCAL(sctp->sctp_rxtchunks); 774 SCTP_CHUNK_CLEAR_REXMIT(mp); 775 if (start_mp == NULL) { 776 start_mp = nmp; 777 } else { 778 linkb(end_mp, nmp); 779 } 780 end_mp = nmp; 781 *total += msglen + extra; 782 dprint(2, ("sctp_find_fast_rexmit_mblks: " 783 "tsn %x\n", sdh->sdh_tsn)); 784 } 785 } 786 } 787 /* Clear the flag as there is no more message to be fast rexmitted. */ 788 sctp->sctp_chk_fast_rexmit = B_FALSE; 789 return (start_mp); 790 } 791 792 /* A debug function just to make sure that a mblk chain is not broken */ 793 #ifdef DEBUG 794 static boolean_t 795 sctp_verify_chain(mblk_t *head, mblk_t *tail) 796 { 797 mblk_t *mp = head; 798 799 if (head == NULL || tail == NULL) 800 return (B_TRUE); 801 while (mp != NULL) { 802 if (mp == tail) 803 return (B_TRUE); 804 mp = mp->b_next; 805 } 806 return (B_FALSE); 807 } 808 #endif 809 810 /* 811 * Gets the next unsent chunk to transmit. Messages that are abandoned are 812 * skipped. A message can be abandoned if it has a non-zero timetolive and 813 * transmission has not yet started or if it is a partially reliable 814 * message and its time is up (assuming we are PR-SCTP aware). 815 * We only return a chunk if it will fit entirely in the current packet. 816 * 'cansend' is used to determine if need to try and chunkify messages from 817 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 818 * 819 * firstseg_len indicates the space already used, cansend represents remaining 820 * space in the window, ((sfa_pmss - firstseg_len) can therefore reasonably 821 * be used to compute the cansend arg). 822 */ 823 mblk_t * 824 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 825 int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp) 826 { 827 mblk_t *mp1; 828 sctp_msg_hdr_t *msg_hdr; 829 mblk_t *tmp_meta; 830 sctp_faddr_t *fp1; 831 832 ASSERT(error != NULL && mp != NULL); 833 *error = 0; 834 835 ASSERT(sctp->sctp_current != NULL); 836 837 chunkified: 838 while (meta != NULL) { 839 tmp_meta = meta->b_next; 840 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 841 mp1 = meta->b_cont; 842 if (SCTP_IS_MSG_ABANDONED(meta)) 843 goto next_msg; 844 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 845 while (mp1 != NULL) { 846 if (SCTP_CHUNK_CANSEND(mp1)) { 847 *mp = mp1; 848 #ifdef DEBUG 849 ASSERT(sctp_verify_chain( 850 sctp->sctp_xmit_head, meta)); 851 #endif 852 return (meta); 853 } 854 mp1 = mp1->b_next; 855 } 856 goto next_msg; 857 } 858 /* 859 * If we come here and the first chunk is sent, then we 860 * we are PR-SCTP aware, in which case if the cumulative 861 * TSN has moved upto or beyond the first chunk (which 862 * means all the previous messages have been cumulative 863 * SACK'd), then we send a Forward TSN with the last 864 * chunk that was sent in this message. If we can't send 865 * a Forward TSN because previous non-abandoned messages 866 * have not been acked then we will defer the Forward TSN 867 * to sctp_rexmit() or sctp_cumack(). 868 */ 869 if (SCTP_CHUNK_ISSENT(mp1)) { 870 *error = sctp_check_abandoned_msg(sctp, meta); 871 if (*error != 0) { 872 #ifdef DEBUG 873 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 874 sctp->sctp_xmit_tail)); 875 #endif 876 return (NULL); 877 } 878 goto next_msg; 879 } 880 BUMP_LOCAL(sctp->sctp_prsctpdrop); 881 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 882 if (meta->b_prev == NULL) { 883 ASSERT(sctp->sctp_xmit_head == meta); 884 sctp->sctp_xmit_head = tmp_meta; 885 if (sctp->sctp_xmit_tail == meta) 886 sctp->sctp_xmit_tail = tmp_meta; 887 meta->b_next = NULL; 888 if (tmp_meta != NULL) 889 tmp_meta->b_prev = NULL; 890 } else if (meta->b_next == NULL) { 891 if (sctp->sctp_xmit_tail == meta) 892 sctp->sctp_xmit_tail = meta->b_prev; 893 meta->b_prev->b_next = NULL; 894 meta->b_prev = NULL; 895 } else { 896 meta->b_prev->b_next = tmp_meta; 897 tmp_meta->b_prev = meta->b_prev; 898 if (sctp->sctp_xmit_tail == meta) 899 sctp->sctp_xmit_tail = tmp_meta; 900 meta->b_prev = NULL; 901 meta->b_next = NULL; 902 } 903 sctp->sctp_unsent -= msg_hdr->smh_msglen; 904 /* 905 * Update ULP the amount of queued data, which is 906 * sent-unack'ed + unsent. 907 */ 908 if (!SCTP_IS_DETACHED(sctp)) 909 SCTP_TXQ_UPDATE(sctp); 910 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 911 next_msg: 912 meta = tmp_meta; 913 } 914 /* chunkify, if needed */ 915 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 916 ASSERT(sctp->sctp_unsent > 0); 917 if (fp == NULL) { 918 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 919 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 920 fp = sctp->sctp_current; 921 } else { 922 /* 923 * If user specified destination, try to honor that. 924 */ 925 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 926 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 927 fp1 != fp) { 928 goto chunk_done; 929 } 930 } 931 meta = sctp_chunkify(sctp, fp->sfa_pmss, firstseg_len, cansend); 932 if (meta == NULL) 933 goto chunk_done; 934 /* 935 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 936 * new chunk(s) to the tail, so we need to skip the 937 * sctp_xmit_tail, which would have already been processed. 938 * This could happen when there is unacked chunks, but 939 * nothing new to send. 940 * When sctp_chunkify() is called when the transmit queue 941 * is empty then we need to start from sctp_xmit_tail. 942 */ 943 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 944 #ifdef DEBUG 945 mp1 = sctp->sctp_xmit_tail->b_cont; 946 while (mp1 != NULL) { 947 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 948 mp1 = mp1->b_next; 949 } 950 #endif 951 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 952 goto chunk_done; 953 } 954 goto chunkified; 955 } 956 chunk_done: 957 #ifdef DEBUG 958 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 959 #endif 960 return (NULL); 961 } 962 963 void 964 sctp_fast_rexmit(sctp_t *sctp) 965 { 966 mblk_t *mp, *head; 967 int pktlen = 0; 968 sctp_faddr_t *fp = NULL; 969 sctp_stack_t *sctps = sctp->sctp_sctps; 970 971 ASSERT(sctp->sctp_xmit_head != NULL); 972 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 973 if (mp == NULL) { 974 SCTP_KSTAT(sctps, sctp_fr_not_found); 975 return; 976 } 977 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 978 freemsg(mp); 979 SCTP_KSTAT(sctps, sctp_fr_add_hdr); 980 return; 981 } 982 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 983 ipha_t *iph = (ipha_t *)head->b_rptr; 984 985 iph->ipha_fragment_offset_and_flags = 0; 986 } 987 988 sctp_set_iplen(sctp, head); 989 sctp_add_sendq(sctp, head); 990 sctp->sctp_active = fp->lastactive = lbolt64; 991 } 992 993 void 994 sctp_output(sctp_t *sctp, uint_t num_pkt) 995 { 996 mblk_t *mp = NULL; 997 mblk_t *nmp; 998 mblk_t *head; 999 mblk_t *meta = sctp->sctp_xmit_tail; 1000 mblk_t *fill = NULL; 1001 uint16_t chunklen; 1002 uint32_t cansend; 1003 int32_t seglen; 1004 int32_t xtralen; 1005 int32_t sacklen; 1006 int32_t pad = 0; 1007 int32_t pathmax; 1008 int extra; 1009 int64_t now = lbolt64; 1010 sctp_faddr_t *fp; 1011 sctp_faddr_t *lfp; 1012 sctp_data_hdr_t *sdc; 1013 int error; 1014 boolean_t notsent = B_TRUE; 1015 sctp_stack_t *sctps = sctp->sctp_sctps; 1016 1017 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1018 sacklen = 0; 1019 } else { 1020 /* send a SACK chunk */ 1021 sacklen = sizeof (sctp_chunk_hdr_t) + 1022 sizeof (sctp_sack_chunk_t) + 1023 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1024 lfp = sctp->sctp_lastdata; 1025 ASSERT(lfp != NULL); 1026 if (lfp->state != SCTP_FADDRS_ALIVE) 1027 lfp = sctp->sctp_current; 1028 } 1029 1030 cansend = sctp->sctp_frwnd; 1031 if (sctp->sctp_unsent < cansend) 1032 cansend = sctp->sctp_unsent; 1033 1034 /* 1035 * Start persist timer if unable to send or when 1036 * trying to send into a zero window. This timer 1037 * ensures the blocked send attempt is retried. 1038 */ 1039 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1040 (sctp->sctp_unacked != 0) && 1041 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1042 !sctp->sctp_ndelay || 1043 (cansend == 0 && sctp->sctp_unacked == 0 && 1044 sctp->sctp_unsent != 0)) { 1045 head = NULL; 1046 fp = sctp->sctp_current; 1047 goto unsent_data; 1048 } 1049 if (meta != NULL) 1050 mp = meta->b_cont; 1051 while (cansend > 0 && num_pkt-- != 0) { 1052 pad = 0; 1053 1054 /* 1055 * Find first segment eligible for transmit. 1056 */ 1057 while (mp != NULL) { 1058 if (SCTP_CHUNK_CANSEND(mp)) 1059 break; 1060 mp = mp->b_next; 1061 } 1062 if (mp == NULL) { 1063 meta = sctp_get_msg_to_send(sctp, &mp, 1064 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1065 cansend, NULL); 1066 if (error != 0 || meta == NULL) { 1067 head = NULL; 1068 fp = sctp->sctp_current; 1069 goto unsent_data; 1070 } 1071 sctp->sctp_xmit_tail = meta; 1072 } 1073 1074 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1075 seglen = ntohs(sdc->sdh_len); 1076 xtralen = sizeof (*sdc); 1077 chunklen = seglen - xtralen; 1078 1079 /* 1080 * Check rwnd. 1081 */ 1082 if (chunklen > cansend) { 1083 head = NULL; 1084 fp = SCTP_CHUNK_DEST(meta); 1085 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1086 fp = sctp->sctp_current; 1087 goto unsent_data; 1088 } 1089 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1090 extra = SCTP_ALIGN - extra; 1091 1092 /* 1093 * Pick destination address, and check cwnd. 1094 */ 1095 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1096 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1097 /* 1098 * Only include SACK chunk if it can be bundled 1099 * with a data chunk, and sent to sctp_lastdata. 1100 */ 1101 pathmax = lfp->cwnd - lfp->suna; 1102 1103 fp = lfp; 1104 if ((nmp = dupmsg(mp)) == NULL) { 1105 head = NULL; 1106 goto unsent_data; 1107 } 1108 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1109 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1110 &error); 1111 if (head == NULL) { 1112 /* 1113 * If none of the source addresses are 1114 * available (i.e error == EHOSTUNREACH), 1115 * pretend we have sent the data. We will 1116 * eventually time out trying to retramsmit 1117 * the data if the interface never comes up. 1118 * If we have already sent some stuff (i.e., 1119 * notsent is B_FALSE) then we are fine, else 1120 * just mark this packet as sent. 1121 */ 1122 if (notsent && error == EHOSTUNREACH) { 1123 SCTP_CHUNK_SENT(sctp, mp, sdc, 1124 fp, chunklen, meta); 1125 } 1126 freemsg(nmp); 1127 SCTP_KSTAT(sctps, sctp_output_failed); 1128 goto unsent_data; 1129 } 1130 seglen += sacklen; 1131 xtralen += sacklen; 1132 sacklen = 0; 1133 } else { 1134 fp = SCTP_CHUNK_DEST(meta); 1135 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1136 fp = sctp->sctp_current; 1137 /* 1138 * If we haven't sent data to this destination for 1139 * a while, do slow start again. 1140 */ 1141 if (now - fp->lastactive > fp->rto) { 1142 SET_CWND(fp, fp->sfa_pmss, 1143 sctps->sctps_slow_start_after_idle); 1144 } 1145 1146 pathmax = fp->cwnd - fp->suna; 1147 if (seglen + extra > pathmax) { 1148 head = NULL; 1149 goto unsent_data; 1150 } 1151 if ((nmp = dupmsg(mp)) == NULL) { 1152 head = NULL; 1153 goto unsent_data; 1154 } 1155 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1156 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1157 if (head == NULL) { 1158 /* 1159 * If none of the source addresses are 1160 * available (i.e error == EHOSTUNREACH), 1161 * pretend we have sent the data. We will 1162 * eventually time out trying to retramsmit 1163 * the data if the interface never comes up. 1164 * If we have already sent some stuff (i.e., 1165 * notsent is B_FALSE) then we are fine, else 1166 * just mark this packet as sent. 1167 */ 1168 if (notsent && error == EHOSTUNREACH) { 1169 SCTP_CHUNK_SENT(sctp, mp, sdc, 1170 fp, chunklen, meta); 1171 } 1172 freemsg(nmp); 1173 SCTP_KSTAT(sctps, sctp_output_failed); 1174 goto unsent_data; 1175 } 1176 } 1177 fp->lastactive = now; 1178 if (pathmax > fp->sfa_pmss) 1179 pathmax = fp->sfa_pmss; 1180 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1181 mp = mp->b_next; 1182 1183 /* Use this chunk to measure RTT? */ 1184 if (sctp->sctp_out_time == 0) { 1185 sctp->sctp_out_time = now; 1186 sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; 1187 ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); 1188 } 1189 if (extra > 0) { 1190 fill = sctp_get_padding(sctp, extra); 1191 if (fill != NULL) { 1192 linkb(head, fill); 1193 pad = extra; 1194 seglen += extra; 1195 } else { 1196 goto unsent_data; 1197 } 1198 } 1199 /* 1200 * Bundle chunks. We linkb() the chunks together to send 1201 * downstream in a single packet. 1202 * Partial chunks MUST NOT be bundled with full chunks, so we 1203 * rely on sctp_get_msg_to_send() to only return messages that 1204 * will fit entirely in the current packet. 1205 */ 1206 while (seglen < pathmax) { 1207 int32_t new_len; 1208 int32_t new_xtralen; 1209 1210 while (mp != NULL) { 1211 if (SCTP_CHUNK_CANSEND(mp)) 1212 break; 1213 mp = mp->b_next; 1214 } 1215 if (mp == NULL) { 1216 meta = sctp_get_msg_to_send(sctp, &mp, 1217 meta->b_next, &error, seglen, 1218 (seglen - xtralen) >= cansend ? 0 : 1219 cansend - seglen, fp); 1220 if (error != 0) 1221 break; 1222 /* If no more eligible chunks, cease bundling */ 1223 if (meta == NULL) 1224 break; 1225 sctp->sctp_xmit_tail = meta; 1226 } 1227 ASSERT(mp != NULL); 1228 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1229 fp != SCTP_CHUNK_DEST(meta)) { 1230 break; 1231 } 1232 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1233 chunklen = ntohs(sdc->sdh_len); 1234 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1235 extra = SCTP_ALIGN - extra; 1236 1237 new_len = seglen + chunklen; 1238 new_xtralen = xtralen + sizeof (*sdc); 1239 chunklen -= sizeof (*sdc); 1240 1241 if (new_len - new_xtralen > cansend || 1242 new_len + extra > pathmax) { 1243 break; 1244 } 1245 if ((nmp = dupmsg(mp)) == NULL) 1246 break; 1247 if (extra > 0) { 1248 fill = sctp_get_padding(sctp, extra); 1249 if (fill != NULL) { 1250 pad += extra; 1251 new_len += extra; 1252 linkb(nmp, fill); 1253 } else { 1254 freemsg(nmp); 1255 break; 1256 } 1257 } 1258 seglen = new_len; 1259 xtralen = new_xtralen; 1260 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1261 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1262 linkb(head, nmp); 1263 mp = mp->b_next; 1264 } 1265 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1266 ipha_t *iph = (ipha_t *)head->b_rptr; 1267 1268 /* 1269 * Path MTU is different from what we thought it would 1270 * be when we created chunks, or IP headers have grown. 1271 * Need to clear the DF bit. 1272 */ 1273 iph->ipha_fragment_offset_and_flags = 0; 1274 } 1275 /* xmit segment */ 1276 ASSERT(cansend >= seglen - pad - xtralen); 1277 cansend -= (seglen - pad - xtralen); 1278 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1279 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1280 seglen - xtralen, ntohl(sdc->sdh_tsn), 1281 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1282 cansend, sctp->sctp_lastack_rxd)); 1283 sctp_set_iplen(sctp, head); 1284 sctp_add_sendq(sctp, head); 1285 /* arm rto timer (if not set) */ 1286 if (!fp->timer_running) 1287 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1288 notsent = B_FALSE; 1289 } 1290 sctp->sctp_active = now; 1291 return; 1292 unsent_data: 1293 /* arm persist timer (if rto timer not set) */ 1294 if (!fp->timer_running) 1295 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1296 if (head != NULL) 1297 freemsg(head); 1298 } 1299 1300 /* 1301 * The following two functions initialize and destroy the cache 1302 * associated with the sets used for PR-SCTP. 1303 */ 1304 void 1305 sctp_ftsn_sets_init(void) 1306 { 1307 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1308 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1309 NULL, 0); 1310 } 1311 1312 void 1313 sctp_ftsn_sets_fini(void) 1314 { 1315 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1316 } 1317 1318 1319 /* Free PR-SCTP sets */ 1320 void 1321 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1322 { 1323 sctp_ftsn_set_t *p; 1324 1325 while (s != NULL) { 1326 p = s->next; 1327 s->next = NULL; 1328 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1329 s = p; 1330 } 1331 } 1332 1333 /* 1334 * Given a message meta block, meta, this routine creates or modifies 1335 * the set that will be used to generate a Forward TSN chunk. If the 1336 * entry for stream id, sid, for this message already exists, the 1337 * sequence number, ssn, is updated if it is greater than the existing 1338 * one. If an entry for this sid does not exist, one is created if 1339 * the size does not exceed fp->sfa_pmss. We return false in case 1340 * or an error. 1341 */ 1342 boolean_t 1343 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1344 uint_t *nsets, uint32_t *slen) 1345 { 1346 sctp_ftsn_set_t *p; 1347 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1348 uint16_t sid = htons(msg_hdr->smh_sid); 1349 /* msg_hdr->smh_ssn is already in NBO */ 1350 uint16_t ssn = msg_hdr->smh_ssn; 1351 1352 ASSERT(s != NULL && nsets != NULL); 1353 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1354 1355 if (*s == NULL) { 1356 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1357 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1358 if (*s == NULL) 1359 return (B_FALSE); 1360 (*s)->ftsn_entries.ftsn_sid = sid; 1361 (*s)->ftsn_entries.ftsn_ssn = ssn; 1362 (*s)->next = NULL; 1363 *nsets = 1; 1364 *slen += sizeof (uint32_t); 1365 return (B_TRUE); 1366 } 1367 for (p = *s; p->next != NULL; p = p->next) { 1368 if (p->ftsn_entries.ftsn_sid == sid) { 1369 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1370 p->ftsn_entries.ftsn_ssn = ssn; 1371 return (B_TRUE); 1372 } 1373 } 1374 /* the last one */ 1375 if (p->ftsn_entries.ftsn_sid == sid) { 1376 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1377 p->ftsn_entries.ftsn_ssn = ssn; 1378 } else { 1379 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1380 return (B_FALSE); 1381 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1382 KM_NOSLEEP); 1383 if (p->next == NULL) 1384 return (B_FALSE); 1385 p = p->next; 1386 p->ftsn_entries.ftsn_sid = sid; 1387 p->ftsn_entries.ftsn_ssn = ssn; 1388 p->next = NULL; 1389 (*nsets)++; 1390 *slen += sizeof (uint32_t); 1391 } 1392 return (B_TRUE); 1393 } 1394 1395 /* 1396 * Given a set of stream id - sequence number pairs, this routing creates 1397 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1398 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1399 * will add the IP/SCTP header. 1400 */ 1401 mblk_t * 1402 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1403 uint_t nsets, uint32_t seglen) 1404 { 1405 mblk_t *ftsn_mp; 1406 sctp_chunk_hdr_t *ch_hdr; 1407 uint32_t *advtsn; 1408 uint16_t schlen; 1409 size_t xtralen; 1410 ftsn_entry_t *ftsn_entry; 1411 sctp_stack_t *sctps = sctp->sctp_sctps; 1412 1413 seglen += sizeof (sctp_chunk_hdr_t); 1414 if (fp->isv4) 1415 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra; 1416 else 1417 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra; 1418 ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp), 1419 sctp->sctp_cpid); 1420 if (ftsn_mp == NULL) 1421 return (NULL); 1422 ftsn_mp->b_rptr += xtralen; 1423 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1424 1425 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1426 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1427 ch_hdr->sch_flags = 0; 1428 /* 1429 * The cast here should not be an issue since seglen is 1430 * the length of the Forward TSN chunk. 1431 */ 1432 schlen = (uint16_t)seglen; 1433 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1434 1435 advtsn = (uint32_t *)(ch_hdr + 1); 1436 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1437 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1438 while (nsets > 0) { 1439 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1440 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1441 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1442 ftsn_entry++; 1443 sets = sets->next; 1444 nsets--; 1445 } 1446 return (ftsn_mp); 1447 } 1448 1449 /* 1450 * Given a starting message, the routine steps through all the 1451 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1452 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1453 * chunk. All the messages, that have chunks that are included in the 1454 * ftsn sets, are flagged abandonded. If a message is partially sent 1455 * and is deemed abandoned, all remaining unsent chunks are marked 1456 * abandoned and are deducted from sctp_unsent. 1457 */ 1458 void 1459 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1460 sctp_faddr_t *fp, uint32_t *seglen) 1461 { 1462 mblk_t *mp1 = mp; 1463 mblk_t *mp_head = mp; 1464 mblk_t *meta_head = meta; 1465 mblk_t *head; 1466 sctp_ftsn_set_t *sets = NULL; 1467 uint_t nsets = 0; 1468 uint16_t clen; 1469 sctp_data_hdr_t *sdc; 1470 uint32_t sacklen; 1471 uint32_t adv_pap = sctp->sctp_adv_pap; 1472 uint32_t unsent = 0; 1473 boolean_t ubit; 1474 sctp_stack_t *sctps = sctp->sctp_sctps; 1475 1476 *seglen = sizeof (uint32_t); 1477 1478 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1479 while (meta != NULL && 1480 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1481 /* 1482 * Skip adding FTSN sets for un-ordered messages as they do 1483 * not have SSNs. 1484 */ 1485 ubit = SCTP_DATA_GET_UBIT(sdc); 1486 if (!ubit && 1487 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1488 meta = NULL; 1489 sctp->sctp_adv_pap = adv_pap; 1490 goto ftsn_done; 1491 } 1492 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1493 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1494 adv_pap = ntohl(sdc->sdh_tsn); 1495 mp1 = mp1->b_next; 1496 } 1497 meta = meta->b_next; 1498 if (meta != NULL) { 1499 mp1 = meta->b_cont; 1500 if (!SCTP_CHUNK_ISSENT(mp1)) 1501 break; 1502 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1503 } 1504 } 1505 ftsn_done: 1506 /* 1507 * Can't compare with sets == NULL, since we don't add any 1508 * sets for un-ordered messages. 1509 */ 1510 if (meta == meta_head) 1511 return; 1512 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1513 sctp_free_ftsn_set(sets); 1514 if (*nmp == NULL) 1515 return; 1516 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1517 sacklen = 0; 1518 } else { 1519 sacklen = sizeof (sctp_chunk_hdr_t) + 1520 sizeof (sctp_sack_chunk_t) + 1521 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1522 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1523 /* piggybacked SACK doesn't fit */ 1524 sacklen = 0; 1525 } else { 1526 fp = sctp->sctp_lastdata; 1527 } 1528 } 1529 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1530 if (head == NULL) { 1531 freemsg(*nmp); 1532 *nmp = NULL; 1533 SCTP_KSTAT(sctps, sctp_send_ftsn_failed); 1534 return; 1535 } 1536 *seglen += sacklen; 1537 *nmp = head; 1538 1539 /* 1540 * XXXNeed to optimise this, the reason it is done here is so 1541 * that we don't have to undo in case of failure. 1542 */ 1543 mp1 = mp_head; 1544 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1545 while (meta_head != NULL && 1546 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1547 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1548 SCTP_MSG_SET_ABANDONED(meta_head); 1549 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1550 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1551 if (!SCTP_CHUNK_ISACKED(mp1)) { 1552 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1553 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1554 meta_head); 1555 } 1556 mp1 = mp1->b_next; 1557 } 1558 while (mp1 != NULL) { 1559 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1560 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1561 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1562 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1563 SCTP_ABANDON_CHUNK(mp1); 1564 } 1565 mp1 = mp1->b_next; 1566 } 1567 meta_head = meta_head->b_next; 1568 if (meta_head != NULL) { 1569 mp1 = meta_head->b_cont; 1570 if (!SCTP_CHUNK_ISSENT(mp1)) 1571 break; 1572 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1573 } 1574 } 1575 if (unsent > 0) { 1576 ASSERT(sctp->sctp_unsent >= unsent); 1577 sctp->sctp_unsent -= unsent; 1578 /* 1579 * Update ULP the amount of queued data, which is 1580 * sent-unack'ed + unsent. 1581 */ 1582 if (!SCTP_IS_DETACHED(sctp)) 1583 SCTP_TXQ_UPDATE(sctp); 1584 } 1585 } 1586 1587 /* 1588 * This function steps through messages starting at meta and checks if 1589 * the message is abandoned. It stops when it hits an unsent chunk or 1590 * a message that has all its chunk acked. This is the only place 1591 * where the sctp_adv_pap is moved forward to indicated abandoned 1592 * messages. 1593 */ 1594 void 1595 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1596 { 1597 uint32_t tsn = sctp->sctp_adv_pap; 1598 sctp_data_hdr_t *sdc; 1599 sctp_msg_hdr_t *msg_hdr; 1600 1601 ASSERT(mp != NULL); 1602 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1603 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1604 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1605 if (!SCTP_IS_MSG_ABANDONED(meta) && 1606 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1607 return; 1608 } 1609 while (meta != NULL) { 1610 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1611 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1612 tsn = ntohl(sdc->sdh_tsn); 1613 mp = mp->b_next; 1614 } 1615 if (mp != NULL) 1616 break; 1617 /* 1618 * We continue checking for successive messages only if there 1619 * is a chunk marked for retransmission. Else, we might 1620 * end up sending FTSN prematurely for chunks that have been 1621 * sent, but not yet acked. 1622 */ 1623 if ((meta = meta->b_next) != NULL) { 1624 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1625 if (!SCTP_IS_MSG_ABANDONED(meta) && 1626 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1627 break; 1628 } 1629 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1630 if (!SCTP_CHUNK_ISSENT(mp)) { 1631 sctp->sctp_adv_pap = tsn; 1632 return; 1633 } 1634 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1635 break; 1636 } 1637 if (mp == NULL) 1638 break; 1639 } 1640 } 1641 sctp->sctp_adv_pap = tsn; 1642 } 1643 1644 1645 /* 1646 * Determine if we should bundle a data chunk with the chunk being 1647 * retransmitted. We bundle if 1648 * 1649 * - the chunk is sent to the same destination and unack'ed. 1650 * 1651 * OR 1652 * 1653 * - the chunk is unsent, i.e. new data. 1654 */ 1655 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1656 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1657 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1658 !SCTP_CHUNK_ISACKED(mp))) || \ 1659 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1660 SCTP_CHUNK_FLAG_SENT))) 1661 1662 /* 1663 * Retransmit first segment which hasn't been acked with cumtsn or send 1664 * a Forward TSN chunk, if appropriate. 1665 */ 1666 void 1667 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1668 { 1669 mblk_t *mp; 1670 mblk_t *nmp = NULL; 1671 mblk_t *head; 1672 mblk_t *meta = sctp->sctp_xmit_head; 1673 mblk_t *fill; 1674 uint32_t seglen = 0; 1675 uint32_t sacklen; 1676 uint16_t chunklen; 1677 int extra; 1678 sctp_data_hdr_t *sdc; 1679 sctp_faddr_t *fp; 1680 uint32_t adv_pap = sctp->sctp_adv_pap; 1681 boolean_t do_ftsn = B_FALSE; 1682 boolean_t ftsn_check = B_TRUE; 1683 uint32_t first_ua_tsn; 1684 sctp_msg_hdr_t *mhdr; 1685 sctp_stack_t *sctps = sctp->sctp_sctps; 1686 int error; 1687 1688 while (meta != NULL) { 1689 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1690 uint32_t tsn; 1691 1692 if (!SCTP_CHUNK_ISSENT(mp)) 1693 goto window_probe; 1694 /* 1695 * We break in the following cases - 1696 * 1697 * if the advanced peer ack point includes the next 1698 * chunk to be retransmited - possibly the Forward 1699 * TSN was lost. 1700 * 1701 * if we are PRSCTP aware and the next chunk to be 1702 * retransmitted is now abandoned 1703 * 1704 * if the next chunk to be retransmitted is for 1705 * the dest on which the timer went off. (this 1706 * message is not abandoned). 1707 * 1708 * We check for Forward TSN only for the first 1709 * eligible chunk to be retransmitted. The reason 1710 * being if the first eligible chunk is skipped (say 1711 * it was sent to a destination other than oldfp) 1712 * then we cannot advance the cum TSN via Forward 1713 * TSN chunk. 1714 * 1715 * Also, ftsn_check is B_TRUE only for the first 1716 * eligible chunk, it will be B_FALSE for all 1717 * subsequent candidate messages for retransmission. 1718 */ 1719 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1720 tsn = ntohl(sdc->sdh_tsn); 1721 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1722 if (sctp->sctp_prsctp_aware && ftsn_check) { 1723 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1724 ASSERT(sctp->sctp_prsctp_aware); 1725 do_ftsn = B_TRUE; 1726 goto out; 1727 } else { 1728 sctp_check_adv_ack_pt(sctp, 1729 meta, mp); 1730 if (SEQ_GT(sctp->sctp_adv_pap, 1731 adv_pap)) { 1732 do_ftsn = B_TRUE; 1733 goto out; 1734 } 1735 } 1736 ftsn_check = B_FALSE; 1737 } 1738 if (SCTP_CHUNK_DEST(mp) == oldfp) 1739 goto out; 1740 } 1741 } 1742 meta = meta->b_next; 1743 if (meta != NULL && sctp->sctp_prsctp_aware) { 1744 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1745 1746 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1747 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1748 meta = meta->b_next; 1749 } 1750 } 1751 } 1752 window_probe: 1753 /* 1754 * Retransmit fired for a destination which didn't have 1755 * any unacked data pending. 1756 */ 1757 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1758 /* 1759 * Send a window probe. Inflate frwnd to allow 1760 * sending one segment. 1761 */ 1762 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1763 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1764 1765 /* next TSN to send */ 1766 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1767 1768 /* 1769 * The above sctp_frwnd adjustment is coarse. The "changed" 1770 * sctp_frwnd may allow us to send more than 1 packet. So 1771 * tell sctp_output() to send only 1 packet. 1772 */ 1773 sctp_output(sctp, 1); 1774 1775 /* Last sent TSN */ 1776 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1777 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1778 sctp->sctp_zero_win_probe = B_TRUE; 1779 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1780 } 1781 return; 1782 out: 1783 /* 1784 * After a time out, assume that everything has left the network. So 1785 * we can clear rxt_unacked for the original peer address. 1786 */ 1787 oldfp->rxt_unacked = 0; 1788 1789 /* 1790 * If we were probing for zero window, don't adjust retransmission 1791 * variables, but the timer is still backed off. 1792 */ 1793 if (sctp->sctp_zero_win_probe) { 1794 mblk_t *pkt; 1795 uint_t pkt_len; 1796 1797 /* 1798 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1799 * and sctp_rxt_maxtsn will specify the ZWP packet. 1800 */ 1801 fp = oldfp; 1802 if (oldfp->state != SCTP_FADDRS_ALIVE) 1803 fp = sctp_rotate_faddr(sctp, oldfp); 1804 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1805 if (pkt != NULL) { 1806 ASSERT(pkt_len <= fp->sfa_pmss); 1807 sctp_set_iplen(sctp, pkt); 1808 sctp_add_sendq(sctp, pkt); 1809 } else { 1810 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 1811 } 1812 1813 /* 1814 * The strikes will be clear by sctp_faddr_alive() when the 1815 * other side sends us an ack. 1816 */ 1817 oldfp->strikes++; 1818 sctp->sctp_strikes++; 1819 1820 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1821 if (oldfp != fp && oldfp->suna != 0) 1822 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1823 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1824 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1825 return; 1826 } 1827 1828 /* 1829 * Enter slowstart for this destination 1830 */ 1831 oldfp->ssthresh = oldfp->cwnd / 2; 1832 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1833 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1834 oldfp->cwnd = oldfp->sfa_pmss; 1835 oldfp->pba = 0; 1836 fp = sctp_rotate_faddr(sctp, oldfp); 1837 ASSERT(fp != NULL); 1838 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1839 1840 first_ua_tsn = ntohl(sdc->sdh_tsn); 1841 if (do_ftsn) { 1842 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1843 if (nmp == NULL) { 1844 sctp->sctp_adv_pap = adv_pap; 1845 goto restart_timer; 1846 } 1847 head = nmp; 1848 /* 1849 * Move to the next unabandoned chunk. XXXCheck if meta will 1850 * always be marked abandoned. 1851 */ 1852 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1853 meta = meta->b_next; 1854 if (meta != NULL) 1855 mp = mp->b_cont; 1856 else 1857 mp = NULL; 1858 goto try_bundle; 1859 } 1860 seglen = ntohs(sdc->sdh_len); 1861 chunklen = seglen - sizeof (*sdc); 1862 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1863 extra = SCTP_ALIGN - extra; 1864 1865 /* Find out if we need to piggyback SACK. */ 1866 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1867 sacklen = 0; 1868 } else { 1869 sacklen = sizeof (sctp_chunk_hdr_t) + 1870 sizeof (sctp_sack_chunk_t) + 1871 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1872 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1873 /* piggybacked SACK doesn't fit */ 1874 sacklen = 0; 1875 } else { 1876 /* 1877 * OK, we have room to send SACK back. But we 1878 * should send it back to the last fp where we 1879 * receive data from, unless sctp_lastdata equals 1880 * oldfp, then we should probably not send it 1881 * back to that fp. Also we should check that 1882 * the fp is alive. 1883 */ 1884 if (sctp->sctp_lastdata != oldfp && 1885 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1886 fp = sctp->sctp_lastdata; 1887 } 1888 } 1889 } 1890 1891 /* 1892 * Cancel RTT measurement if the retransmitted TSN is before the 1893 * TSN used for timimg. 1894 */ 1895 if (sctp->sctp_out_time != 0 && 1896 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1897 sctp->sctp_out_time = 0; 1898 } 1899 /* Clear the counter as the RTT calculation may be off. */ 1900 fp->rtt_updates = 0; 1901 oldfp->rtt_updates = 0; 1902 1903 /* 1904 * After a timeout, we should change the current faddr so that 1905 * new chunks will be sent to the alternate address. 1906 */ 1907 sctp_set_faddr_current(sctp, fp); 1908 1909 nmp = dupmsg(mp); 1910 if (nmp == NULL) 1911 goto restart_timer; 1912 if (extra > 0) { 1913 fill = sctp_get_padding(sctp, extra); 1914 if (fill != NULL) { 1915 linkb(nmp, fill); 1916 seglen += extra; 1917 } else { 1918 freemsg(nmp); 1919 goto restart_timer; 1920 } 1921 } 1922 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1923 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1924 if (head == NULL) { 1925 freemsg(nmp); 1926 SCTP_KSTAT(sctps, sctp_rexmit_failed); 1927 goto restart_timer; 1928 } 1929 seglen += sacklen; 1930 1931 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1932 1933 mp = mp->b_next; 1934 1935 try_bundle: 1936 /* We can at least and at most send 1 packet at timeout. */ 1937 while (seglen < fp->sfa_pmss) { 1938 int32_t new_len; 1939 1940 /* Go through the list to find more chunks to be bundled. */ 1941 while (mp != NULL) { 1942 /* Check if the chunk can be bundled. */ 1943 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1944 break; 1945 mp = mp->b_next; 1946 } 1947 /* Go to the next message. */ 1948 if (mp == NULL) { 1949 for (meta = meta->b_next; meta != NULL; 1950 meta = meta->b_next) { 1951 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1952 1953 if (SCTP_IS_MSG_ABANDONED(meta) || 1954 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1955 sctp)) { 1956 continue; 1957 } 1958 1959 mp = meta->b_cont; 1960 goto try_bundle; 1961 } 1962 /* 1963 * Check if there is a new message which potentially 1964 * could be bundled with this retransmission. 1965 */ 1966 meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error, 1967 seglen, fp->sfa_pmss - seglen, NULL); 1968 if (error != 0 || meta == NULL) { 1969 /* No more chunk to be bundled. */ 1970 break; 1971 } else { 1972 goto try_bundle; 1973 } 1974 } 1975 1976 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1977 new_len = ntohs(sdc->sdh_len); 1978 chunklen = new_len - sizeof (*sdc); 1979 1980 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1981 extra = SCTP_ALIGN - extra; 1982 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1983 break; 1984 if ((nmp = dupmsg(mp)) == NULL) 1985 break; 1986 1987 if (extra > 0) { 1988 fill = sctp_get_padding(sctp, extra); 1989 if (fill != NULL) { 1990 linkb(nmp, fill); 1991 } else { 1992 freemsg(nmp); 1993 break; 1994 } 1995 } 1996 linkb(head, nmp); 1997 1998 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1999 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 2000 2001 seglen = new_len; 2002 mp = mp->b_next; 2003 } 2004 done_bundle: 2005 if ((seglen > fp->sfa_pmss) && fp->isv4) { 2006 ipha_t *iph = (ipha_t *)head->b_rptr; 2007 2008 /* 2009 * Path MTU is different from path we thought it would 2010 * be when we created chunks, or IP headers have grown. 2011 * Need to clear the DF bit. 2012 */ 2013 iph->ipha_fragment_offset_and_flags = 0; 2014 } 2015 fp->rxt_unacked += seglen; 2016 2017 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 2018 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 2019 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 2020 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 2021 2022 sctp->sctp_rexmitting = B_TRUE; 2023 sctp->sctp_rxt_nxttsn = first_ua_tsn; 2024 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 2025 sctp_set_iplen(sctp, head); 2026 sctp_add_sendq(sctp, head); 2027 2028 /* 2029 * Restart the oldfp timer with exponential backoff and 2030 * the new fp timer for the retransmitted chunks. 2031 */ 2032 restart_timer: 2033 oldfp->strikes++; 2034 sctp->sctp_strikes++; 2035 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 2036 /* 2037 * If there is still some data in the oldfp, restart the 2038 * retransmission timer. If there is no data, the heartbeat will 2039 * continue to run so it will do its job in checking the reachability 2040 * of the oldfp. 2041 */ 2042 if (oldfp != fp && oldfp->suna != 0) 2043 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 2044 2045 /* 2046 * Should we restart the timer of the new fp? If there is 2047 * outstanding data to the new fp, the timer should be 2048 * running already. So restarting it means that the timer 2049 * will fire later for those outstanding data. But if 2050 * we don't restart it, the timer will fire too early for the 2051 * just retransmitted chunks to the new fp. The reason is that we 2052 * don't keep a timestamp on when a chunk is retransmitted. 2053 * So when the timer fires, it will just search for the 2054 * chunk with the earliest TSN sent to new fp. This probably 2055 * is the chunk we just retransmitted. So for now, let's 2056 * be conservative and restart the timer of the new fp. 2057 */ 2058 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2059 2060 sctp->sctp_active = lbolt64; 2061 } 2062 2063 /* 2064 * This function is called by sctp_ss_rexmit() to create a packet 2065 * to be retransmitted to the given fp. The given meta and mp 2066 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2067 * first chunk to be retransmitted. This is also called when we want 2068 * to retransmit a zero window probe from sctp_rexmit() or when we 2069 * want to retransmit the zero window probe after the window has 2070 * opened from sctp_got_sack(). 2071 */ 2072 mblk_t * 2073 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2074 uint_t *packet_len) 2075 { 2076 uint32_t seglen = 0; 2077 uint16_t chunklen; 2078 int extra; 2079 mblk_t *nmp; 2080 mblk_t *head; 2081 mblk_t *fill; 2082 sctp_data_hdr_t *sdc; 2083 sctp_msg_hdr_t *mhdr; 2084 2085 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2086 seglen = ntohs(sdc->sdh_len); 2087 chunklen = seglen - sizeof (*sdc); 2088 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2089 extra = SCTP_ALIGN - extra; 2090 2091 nmp = dupmsg(*mp); 2092 if (nmp == NULL) 2093 return (NULL); 2094 if (extra > 0) { 2095 fill = sctp_get_padding(sctp, extra); 2096 if (fill != NULL) { 2097 linkb(nmp, fill); 2098 seglen += extra; 2099 } else { 2100 freemsg(nmp); 2101 return (NULL); 2102 } 2103 } 2104 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2105 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2106 if (head == NULL) { 2107 freemsg(nmp); 2108 return (NULL); 2109 } 2110 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2111 /* 2112 * Don't update the TSN if we are doing a Zero Win Probe. 2113 */ 2114 if (!sctp->sctp_zero_win_probe) 2115 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2116 *mp = (*mp)->b_next; 2117 2118 try_bundle: 2119 while (seglen < fp->sfa_pmss) { 2120 int32_t new_len; 2121 2122 /* 2123 * Go through the list to find more chunks to be bundled. 2124 * We should only retransmit sent by unack'ed chunks. Since 2125 * they were sent before, the peer's receive window should 2126 * be able to receive them. 2127 */ 2128 while (*mp != NULL) { 2129 /* Check if the chunk can be bundled. */ 2130 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2131 break; 2132 *mp = (*mp)->b_next; 2133 } 2134 /* Go to the next message. */ 2135 if (*mp == NULL) { 2136 for (*meta = (*meta)->b_next; *meta != NULL; 2137 *meta = (*meta)->b_next) { 2138 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2139 2140 if (SCTP_IS_MSG_ABANDONED(*meta) || 2141 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2142 sctp)) { 2143 continue; 2144 } 2145 2146 *mp = (*meta)->b_cont; 2147 goto try_bundle; 2148 } 2149 /* No more chunk to be bundled. */ 2150 break; 2151 } 2152 2153 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2154 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2155 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2156 break; 2157 new_len = ntohs(sdc->sdh_len); 2158 chunklen = new_len - sizeof (*sdc); 2159 2160 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2161 extra = SCTP_ALIGN - extra; 2162 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2163 break; 2164 if ((nmp = dupmsg(*mp)) == NULL) 2165 break; 2166 2167 if (extra > 0) { 2168 fill = sctp_get_padding(sctp, extra); 2169 if (fill != NULL) { 2170 linkb(nmp, fill); 2171 } else { 2172 freemsg(nmp); 2173 break; 2174 } 2175 } 2176 linkb(head, nmp); 2177 2178 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2179 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2180 /* 2181 * Don't update the TSN if we are doing a Zero Win Probe. 2182 */ 2183 if (!sctp->sctp_zero_win_probe) 2184 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2185 2186 seglen = new_len; 2187 *mp = (*mp)->b_next; 2188 } 2189 *packet_len = seglen; 2190 fp->rxt_unacked += seglen; 2191 return (head); 2192 } 2193 2194 /* 2195 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2196 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2197 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2198 * SACK. We retransmit unacked chunks without having to wait for another 2199 * timeout. The rationale is that the SACK should not be "partial" if all the 2200 * lost chunks have been retransmitted. Since the SACK is "partial," 2201 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2202 * be missing. It is better for us to retransmit them now instead 2203 * of waiting for a timeout. 2204 */ 2205 void 2206 sctp_ss_rexmit(sctp_t *sctp) 2207 { 2208 mblk_t *meta; 2209 mblk_t *mp; 2210 mblk_t *pkt; 2211 sctp_faddr_t *fp; 2212 uint_t pkt_len; 2213 uint32_t tot_wnd; 2214 sctp_data_hdr_t *sdc; 2215 int burst; 2216 sctp_stack_t *sctps = sctp->sctp_sctps; 2217 2218 ASSERT(!sctp->sctp_zero_win_probe); 2219 2220 /* 2221 * If the last cum ack is smaller than what we have just 2222 * retransmitted, simply return. 2223 */ 2224 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2225 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2226 else 2227 return; 2228 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2229 2230 /* 2231 * After a timer fires, sctp_current should be set to the new 2232 * fp where the retransmitted chunks are sent. 2233 */ 2234 fp = sctp->sctp_current; 2235 2236 /* 2237 * Since we are retransmitting, we only need to use cwnd to determine 2238 * how much we can send as we were allowed (by peer's receive window) 2239 * to send those retransmitted chunks previously when they are first 2240 * sent. If we record how much we have retransmitted but 2241 * unacknowledged using rxt_unacked, then the amount we can now send 2242 * is equal to cwnd minus rxt_unacked. 2243 * 2244 * The field rxt_unacked is incremented when we retransmit a packet 2245 * and decremented when we got a SACK acknowledging something. And 2246 * it is reset when the retransmission timer fires as we assume that 2247 * all packets have left the network after a timeout. If this 2248 * assumption is not true, it means that after a timeout, we can 2249 * get a SACK acknowledging more than rxt_unacked (its value only 2250 * contains what is retransmitted when the timer fires). So 2251 * rxt_unacked will become very big (it is an unsiged int so going 2252 * negative means that the value is huge). This is the reason we 2253 * always send at least 1 MSS bytes. 2254 * 2255 * The reason why we do not have an accurate count is that we 2256 * only know how many packets are outstanding (using the TSN numbers). 2257 * But we do not know how many bytes those packets contain. To 2258 * have an accurate count, we need to walk through the send list. 2259 * As it is not really important to have an accurate count during 2260 * retransmission, we skip this walk to save some time. This should 2261 * not make the retransmission too aggressive to cause congestion. 2262 */ 2263 if (fp->cwnd <= fp->rxt_unacked) 2264 tot_wnd = fp->sfa_pmss; 2265 else 2266 tot_wnd = fp->cwnd - fp->rxt_unacked; 2267 2268 /* Find the first unack'ed chunk */ 2269 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2270 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2271 2272 if (SCTP_IS_MSG_ABANDONED(meta) || 2273 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2274 continue; 2275 } 2276 2277 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2278 /* Again, this may not be possible */ 2279 if (!SCTP_CHUNK_ISSENT(mp)) 2280 return; 2281 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2282 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2283 goto found_msg; 2284 } 2285 } 2286 2287 /* Everything is abandoned... */ 2288 return; 2289 2290 found_msg: 2291 if (!fp->timer_running) 2292 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2293 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2294 if (pkt == NULL) { 2295 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 2296 return; 2297 } 2298 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2299 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2300 2301 /* 2302 * Path MTU is different from path we thought it would 2303 * be when we created chunks, or IP headers have grown. 2304 * Need to clear the DF bit. 2305 */ 2306 iph->ipha_fragment_offset_and_flags = 0; 2307 } 2308 sctp_set_iplen(sctp, pkt); 2309 sctp_add_sendq(sctp, pkt); 2310 2311 /* Check and see if there is more chunk to be retransmitted. */ 2312 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2313 meta == NULL) 2314 return; 2315 if (mp == NULL) 2316 meta = meta->b_next; 2317 if (meta == NULL) 2318 return; 2319 2320 /* Retransmit another packet if the window allows. */ 2321 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1; 2322 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2323 if (mp == NULL) 2324 mp = meta->b_cont; 2325 for (; mp != NULL; mp = mp->b_next) { 2326 /* Again, this may not be possible */ 2327 if (!SCTP_CHUNK_ISSENT(mp)) 2328 return; 2329 if (!SCTP_CHUNK_ISACKED(mp)) 2330 goto found_msg; 2331 } 2332 } 2333 } 2334