1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #define _SUN_TPI_VERSION 2 34 #include <sys/tihdr.h> 35 #include <sys/socket.h> 36 #include <sys/stropts.h> 37 #include <sys/strsun.h> 38 #include <sys/strsubr.h> 39 #include <sys/socketvar.h> 40 /* swilly code in sys/socketvar.h turns off DEBUG */ 41 #ifdef __lint 42 #define DEBUG 43 #endif 44 45 #include <inet/common.h> 46 #include <inet/mi.h> 47 #include <inet/ip.h> 48 #include <inet/ip6.h> 49 #include <inet/sctp_ip.h> 50 #include <inet/ipclassifier.h> 51 52 /* 53 * PR-SCTP comments. 54 * 55 * A message can expire before it gets to the transmit list (i.e. it is still 56 * in the unsent list - unchunked), after it gets to the transmit list, but 57 * before transmission has actually started, or after transmission has begun. 58 * Accordingly, we check for the status of a message in sctp_chunkify() when 59 * the message is being transferred from the unsent list to the transmit list; 60 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 61 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 62 * When we nuke a message in sctp_chunkify(), all we need to do is take it 63 * out of the unsent list and update sctp_unsent; when a message is deemed 64 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 65 * list, update sctp_unsent IFF transmission for the message has not yet begun 66 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 67 * message has started, then we cannot just take it out of the list, we need 68 * to send Forward TSN chunk to the peer so that the peer can clear its 69 * fragment list for this message. However, we cannot just send the Forward 70 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 71 * messages preceeding this abandoned message. So, we send a Forward TSN 72 * IFF all messages prior to this abandoned message has been SACKd, if not 73 * we defer sending the Forward TSN to sctp_cumack(), which will check for 74 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 75 * sctp_rexmit() when we check for retransmissions, we need to determine if 76 * the advanced peer ack point can be moved ahead, and if so, send a Forward 77 * TSN to the peer instead of retransmitting the chunk. Note that when 78 * we send a Forward TSN for a message, there may be yet unsent chunks for 79 * this message; we need to mark all such chunks as abandoned, so that 80 * sctp_cumack() can take the message out of the transmit list, additionally 81 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 82 * decremented when a message/chunk is deemed abandoned), sockfs needs to 83 * be notified so that it can adjust its idea of the queued message. 84 */ 85 86 #include "sctp_impl.h" 87 88 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 89 90 #ifdef DEBUG 91 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 92 #endif 93 94 /* 95 * Called to allocate a header mblk when sending data to SCTP. 96 * Data will follow in b_cont of this mblk. 97 */ 98 mblk_t * 99 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 100 int flags) 101 { 102 mblk_t *mp; 103 struct T_unitdata_req *tudr; 104 size_t size; 105 int error; 106 107 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 108 size = MAX(size, sizeof (sctp_msg_hdr_t)); 109 if (flags & SCTP_CAN_BLOCK) { 110 mp = allocb_wait(size, BPRI_MED, 0, &error); 111 } else { 112 mp = allocb(size, BPRI_MED); 113 } 114 if (mp) { 115 tudr = (struct T_unitdata_req *)mp->b_rptr; 116 tudr->PRIM_type = T_UNITDATA_REQ; 117 tudr->DEST_length = nlen; 118 tudr->DEST_offset = sizeof (*tudr); 119 tudr->OPT_length = clen; 120 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 121 _TPI_ALIGN_TOPT(nlen)); 122 if (nlen > 0) 123 bcopy(name, tudr + 1, nlen); 124 if (clen > 0) 125 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 126 mp->b_wptr += (tudr ->OPT_offset + clen); 127 mp->b_datap->db_type = M_PROTO; 128 } 129 return (mp); 130 } 131 132 /*ARGSUSED2*/ 133 int 134 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 135 { 136 sctp_faddr_t *fp = NULL; 137 struct T_unitdata_req *tudr; 138 int error = 0; 139 mblk_t *mproto = mp; 140 in6_addr_t *addr; 141 in6_addr_t tmpaddr; 142 uint16_t sid = sctp->sctp_def_stream; 143 uint32_t ppid = sctp->sctp_def_ppid; 144 uint32_t context = sctp->sctp_def_context; 145 uint16_t msg_flags = sctp->sctp_def_flags; 146 sctp_msg_hdr_t *sctp_msg_hdr; 147 uint32_t msg_len = 0; 148 uint32_t timetolive = sctp->sctp_def_timetolive; 149 150 ASSERT(DB_TYPE(mproto) == M_PROTO); 151 152 mp = mp->b_cont; 153 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 154 155 tudr = (struct T_unitdata_req *)mproto->b_rptr; 156 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 157 158 /* Get destination address, if specified */ 159 if (tudr->DEST_length > 0) { 160 sin_t *sin; 161 sin6_t *sin6; 162 163 sin = (struct sockaddr_in *) 164 (mproto->b_rptr + tudr->DEST_offset); 165 switch (sin->sin_family) { 166 case AF_INET: 167 if (tudr->DEST_length < sizeof (*sin)) { 168 return (EINVAL); 169 } 170 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 171 addr = &tmpaddr; 172 break; 173 case AF_INET6: 174 if (tudr->DEST_length < sizeof (*sin6)) { 175 return (EINVAL); 176 } 177 sin6 = (struct sockaddr_in6 *) 178 (mproto->b_rptr + tudr->DEST_offset); 179 addr = &sin6->sin6_addr; 180 break; 181 default: 182 return (EAFNOSUPPORT); 183 } 184 fp = sctp_lookup_faddr(sctp, addr); 185 if (fp == NULL) { 186 return (EINVAL); 187 } 188 } 189 /* Ancillary Data? */ 190 if (tudr->OPT_length > 0) { 191 struct cmsghdr *cmsg; 192 char *cend; 193 struct sctp_sndrcvinfo *sndrcv; 194 195 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 196 cend = ((char *)cmsg + tudr->OPT_length); 197 ASSERT(cend <= (char *)mproto->b_wptr); 198 199 for (;;) { 200 if ((char *)(cmsg + 1) > cend || 201 ((char *)cmsg + cmsg->cmsg_len) > cend) { 202 break; 203 } 204 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 205 (cmsg->cmsg_type == SCTP_SNDRCV)) { 206 if (cmsg->cmsg_len < 207 (sizeof (*sndrcv) + sizeof (*cmsg))) { 208 return (EINVAL); 209 } 210 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 211 sid = sndrcv->sinfo_stream; 212 msg_flags = sndrcv->sinfo_flags; 213 ppid = sndrcv->sinfo_ppid; 214 context = sndrcv->sinfo_context; 215 timetolive = sndrcv->sinfo_timetolive; 216 break; 217 } 218 if (cmsg->cmsg_len > 0) 219 cmsg = CMSG_NEXT(cmsg); 220 else 221 break; 222 } 223 } 224 if (msg_flags & MSG_ABORT) { 225 if (mp && mp->b_cont) { 226 mblk_t *pump = msgpullup(mp, -1); 227 if (!pump) { 228 return (ENOMEM); 229 } 230 freemsg(mp); 231 mp = pump; 232 mproto->b_cont = mp; 233 } 234 RUN_SCTP(sctp); 235 sctp_user_abort(sctp, mp, B_TRUE); 236 sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); 237 sctp_clean_death(sctp, ECONNRESET); 238 freemsg(mproto); 239 goto process_sendq; 240 } 241 if (mp == NULL) 242 goto done; 243 244 RUN_SCTP(sctp); 245 246 /* Reject any new data requests if we are shutting down */ 247 if (sctp->sctp_state > SCTPS_ESTABLISHED || 248 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) { 249 error = EPIPE; 250 goto unlock_done; 251 } 252 253 /* Re-use the mproto to store relevant info. */ 254 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 255 256 mproto->b_rptr = mproto->b_datap->db_base; 257 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 258 259 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 260 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 261 sctp_msg_hdr->smh_context = context; 262 sctp_msg_hdr->smh_sid = sid; 263 sctp_msg_hdr->smh_ppid = ppid; 264 sctp_msg_hdr->smh_flags = msg_flags; 265 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 266 sctp_msg_hdr->smh_tob = lbolt64; 267 for (; mp != NULL; mp = mp->b_cont) 268 msg_len += MBLKL(mp); 269 sctp_msg_hdr->smh_msglen = msg_len; 270 271 /* User requested specific destination */ 272 SCTP_SET_CHUNK_DEST(mproto, fp); 273 274 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 275 sid >= sctp->sctp_num_ostr) { 276 /* Send sendfail event */ 277 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 278 B_FALSE); 279 error = EINVAL; 280 goto unlock_done; 281 } 282 283 /* no data */ 284 if (msg_len == 0) { 285 sctp_sendfail_event(sctp, dupmsg(mproto), 286 SCTP_ERR_NO_USR_DATA, B_FALSE); 287 error = EINVAL; 288 goto unlock_done; 289 } 290 291 /* Add it to the unsent list */ 292 if (sctp->sctp_xmit_unsent == NULL) { 293 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 294 } else { 295 sctp->sctp_xmit_unsent_tail->b_next = mproto; 296 sctp->sctp_xmit_unsent_tail = mproto; 297 } 298 sctp->sctp_unsent += msg_len; 299 BUMP_LOCAL(sctp->sctp_msgcount); 300 if (sctp->sctp_state == SCTPS_ESTABLISHED) 301 sctp_output(sctp, UINT_MAX); 302 process_sendq: 303 WAKE_SCTP(sctp); 304 sctp_process_sendq(sctp); 305 return (0); 306 unlock_done: 307 WAKE_SCTP(sctp); 308 done: 309 return (error); 310 } 311 312 void 313 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send) 314 { 315 mblk_t *mp; 316 mblk_t *chunk_mp; 317 mblk_t *chunk_head; 318 mblk_t *chunk_hdr; 319 mblk_t *chunk_tail = NULL; 320 int count; 321 int chunksize; 322 sctp_data_hdr_t *sdc; 323 mblk_t *mdblk = sctp->sctp_xmit_unsent; 324 sctp_faddr_t *fp; 325 sctp_faddr_t *fp1; 326 size_t xtralen; 327 sctp_msg_hdr_t *msg_hdr; 328 sctp_stack_t *sctps = sctp->sctp_sctps; 329 330 fp = SCTP_CHUNK_DEST(mdblk); 331 if (fp == NULL) 332 fp = sctp->sctp_current; 333 if (fp->isv4) 334 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra + 335 sizeof (*sdc); 336 else 337 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra + 338 sizeof (*sdc); 339 count = chunksize = first_len - sizeof (*sdc); 340 nextmsg: 341 chunk_mp = mdblk->b_cont; 342 343 /* 344 * If this partially chunked, we ignore the first_len for now 345 * and use the one already present. For the unchunked bits, we 346 * use the length of the last chunk. 347 */ 348 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 349 int chunk_len; 350 351 ASSERT(chunk_mp->b_next != NULL); 352 mdblk->b_cont = chunk_mp->b_next; 353 chunk_mp->b_next = NULL; 354 SCTP_MSG_CLEAR_CHUNKED(mdblk); 355 mp = mdblk->b_cont; 356 while (mp->b_next != NULL) 357 mp = mp->b_next; 358 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 359 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 360 count = chunksize = fp->sfa_pmss - chunk_len; 361 else 362 count = chunksize = fp->sfa_pmss; 363 count = chunksize = count - sizeof (*sdc); 364 } else { 365 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 366 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 367 sctp->sctp_xmit_unsent = mdblk->b_next; 368 if (sctp->sctp_xmit_unsent == NULL) 369 sctp->sctp_xmit_unsent_tail = NULL; 370 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 371 sctp->sctp_unsent -= msg_hdr->smh_msglen; 372 mdblk->b_next = NULL; 373 BUMP_LOCAL(sctp->sctp_prsctpdrop); 374 /* 375 * Update ULP the amount of queued data, which is 376 * sent-unack'ed + unsent. 377 */ 378 if (!SCTP_IS_DETACHED(sctp)) { 379 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 380 sctp->sctp_unacked + sctp->sctp_unsent); 381 } 382 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 383 goto try_next; 384 } 385 mdblk->b_cont = NULL; 386 } 387 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 388 nextchunk: 389 chunk_head = chunk_mp; 390 chunk_tail = NULL; 391 392 /* Skip as many mblk's as we need */ 393 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 394 count -= MBLKL(chunk_mp); 395 chunk_tail = chunk_mp; 396 chunk_mp = chunk_mp->b_cont; 397 } 398 /* Split the chain, if needed */ 399 if (chunk_mp != NULL) { 400 if (count > 0) { 401 mblk_t *split_mp = dupb(chunk_mp); 402 403 if (split_mp == NULL) { 404 if (mdblk->b_cont == NULL) { 405 mdblk->b_cont = chunk_head; 406 } else { 407 SCTP_MSG_SET_CHUNKED(mdblk); 408 ASSERT(chunk_head->b_next == NULL); 409 chunk_head->b_next = mdblk->b_cont; 410 mdblk->b_cont = chunk_head; 411 } 412 return; 413 } 414 if (chunk_tail != NULL) { 415 chunk_tail->b_cont = split_mp; 416 chunk_tail = chunk_tail->b_cont; 417 } else { 418 chunk_head = chunk_tail = split_mp; 419 } 420 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 421 chunk_mp->b_rptr = chunk_tail->b_wptr; 422 count = 0; 423 } else if (chunk_tail == NULL) { 424 goto next; 425 } else { 426 chunk_tail->b_cont = NULL; 427 } 428 } 429 /* Alloc chunk hdr, if needed */ 430 if (DB_REF(chunk_head) > 1 || 431 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 432 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 433 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 434 if (mdblk->b_cont == NULL) { 435 if (chunk_mp != NULL) 436 linkb(chunk_head, chunk_mp); 437 mdblk->b_cont = chunk_head; 438 } else { 439 SCTP_MSG_SET_CHUNKED(mdblk); 440 if (chunk_mp != NULL) 441 linkb(chunk_head, chunk_mp); 442 ASSERT(chunk_head->b_next == NULL); 443 chunk_head->b_next = mdblk->b_cont; 444 mdblk->b_cont = chunk_head; 445 } 446 return; 447 } 448 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 449 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 450 chunk_hdr->b_cont = chunk_head; 451 } else { 452 chunk_hdr = chunk_head; 453 chunk_hdr->b_rptr -= sizeof (*sdc); 454 } 455 ASSERT(chunk_hdr->b_datap->db_ref == 1); 456 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 457 sdc->sdh_id = CHUNK_DATA; 458 sdc->sdh_flags = 0; 459 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 460 ASSERT(sdc->sdh_len); 461 sdc->sdh_sid = htons(msg_hdr->smh_sid); 462 /* 463 * We defer assigning the SSN just before sending the chunk, else 464 * if we drop the chunk in sctp_get_msg_to_send(), we would need 465 * to send a Forward TSN to let the peer know. Some more comments 466 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 467 */ 468 sdc->sdh_payload_id = msg_hdr->smh_ppid; 469 470 if (mdblk->b_cont == NULL) { 471 mdblk->b_cont = chunk_hdr; 472 SCTP_DATA_SET_BBIT(sdc); 473 } else { 474 mp = mdblk->b_cont; 475 while (mp->b_next != NULL) 476 mp = mp->b_next; 477 mp->b_next = chunk_hdr; 478 } 479 480 bytes_to_send -= (chunksize - count); 481 if (chunk_mp != NULL) { 482 next: 483 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 484 goto nextchunk; 485 } 486 SCTP_DATA_SET_EBIT(sdc); 487 sctp->sctp_xmit_unsent = mdblk->b_next; 488 if (mdblk->b_next == NULL) { 489 sctp->sctp_xmit_unsent_tail = NULL; 490 } 491 mdblk->b_next = NULL; 492 493 if (sctp->sctp_xmit_tail == NULL) { 494 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 495 } else { 496 mp = sctp->sctp_xmit_tail; 497 while (mp->b_next != NULL) 498 mp = mp->b_next; 499 mp->b_next = mdblk; 500 mdblk->b_prev = mp; 501 } 502 try_next: 503 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 504 mdblk = sctp->sctp_xmit_unsent; 505 fp1 = SCTP_CHUNK_DEST(mdblk); 506 if (fp1 == NULL) 507 fp1 = sctp->sctp_current; 508 if (fp == fp1) { 509 size_t len = MBLKL(mdblk->b_cont); 510 if ((count > 0) && 511 ((len > fp->sfa_pmss - sizeof (*sdc)) || 512 (len <= count))) { 513 count -= sizeof (*sdc); 514 count = chunksize = count - (count & 0x3); 515 } else { 516 count = chunksize = fp->sfa_pmss - 517 sizeof (*sdc); 518 } 519 } else { 520 if (fp1->isv4) 521 xtralen = sctp->sctp_hdr_len; 522 else 523 xtralen = sctp->sctp_hdr6_len; 524 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc); 525 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 526 fp = fp1; 527 } 528 goto nextmsg; 529 } 530 } 531 532 void 533 sctp_free_msg(mblk_t *ump) 534 { 535 mblk_t *mp, *nmp; 536 537 for (mp = ump->b_cont; mp; mp = nmp) { 538 nmp = mp->b_next; 539 mp->b_next = mp->b_prev = NULL; 540 freemsg(mp); 541 } 542 ASSERT(!ump->b_prev); 543 ump->b_next = NULL; 544 freeb(ump); 545 } 546 547 mblk_t * 548 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 549 int *error) 550 { 551 int hdrlen; 552 char *hdr; 553 int isv4 = fp->isv4; 554 sctp_stack_t *sctps = sctp->sctp_sctps; 555 556 if (error != NULL) 557 *error = 0; 558 559 if (isv4) { 560 hdrlen = sctp->sctp_hdr_len; 561 hdr = sctp->sctp_iphc; 562 } else { 563 hdrlen = sctp->sctp_hdr6_len; 564 hdr = sctp->sctp_iphc6; 565 } 566 /* 567 * A null fp->ire could mean that the address is 'down'. Similarly, 568 * it is possible that the address went down, we tried to send an 569 * heartbeat and ended up setting fp->saddr as unspec because we 570 * didn't have any usable source address. In either case 571 * sctp_get_ire() will try find an IRE, if available, and set 572 * the source address, if needed. If we still don't have any 573 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 574 * we return EHOSTUNREACH. 575 */ 576 if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 577 sctp_get_ire(sctp, fp); 578 if (fp->state == SCTP_FADDRS_UNREACH) { 579 if (error != NULL) 580 *error = EHOSTUNREACH; 581 return (NULL); 582 } 583 } 584 /* Copy in IP header. */ 585 if ((mp->b_rptr - mp->b_datap->db_base) < 586 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || 587 !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { 588 mblk_t *nmp; 589 590 /* 591 * This can happen if IP headers are adjusted after 592 * data was moved into chunks, or during retransmission, 593 * or things like snoop is running. 594 */ 595 nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen, 596 CONN_CRED(sctp->sctp_connp)); 597 if (nmp == NULL) { 598 if (error != NULL) 599 *error = ENOMEM; 600 return (NULL); 601 } 602 nmp->b_rptr += sctps->sctps_wroff_xtra; 603 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 604 nmp->b_cont = mp; 605 mp = nmp; 606 } else { 607 mp->b_rptr -= (hdrlen + sacklen); 608 mblk_setcred(mp, CONN_CRED(sctp->sctp_connp)); 609 } 610 bcopy(hdr, mp->b_rptr, hdrlen); 611 if (sacklen) { 612 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 613 } 614 if (fp != sctp->sctp_current) { 615 /* change addresses in header */ 616 if (isv4) { 617 ipha_t *iph = (ipha_t *)mp->b_rptr; 618 619 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 620 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 621 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 622 iph->ipha_src); 623 } else if (sctp->sctp_bound_to_all) { 624 iph->ipha_src = INADDR_ANY; 625 } 626 } else { 627 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 628 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 629 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 630 } else if (sctp->sctp_bound_to_all) { 631 V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src); 632 } 633 } 634 } 635 /* 636 * IP will not free this IRE if it is condemned. SCTP needs to 637 * free it. 638 */ 639 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 640 IRE_REFRELE_NOTR(fp->ire); 641 fp->ire = NULL; 642 } 643 644 /* Stash the conn and ire ptr info for IP */ 645 SCTP_STASH_IPINFO(mp, fp->ire); 646 647 return (mp); 648 } 649 650 /* 651 * SCTP requires every chunk to be padded so that the total length 652 * is a multiple of SCTP_ALIGN. This function returns a mblk with 653 * the specified pad length. 654 */ 655 static mblk_t * 656 sctp_get_padding(int pad, sctp_stack_t *sctps) 657 { 658 mblk_t *fill; 659 660 ASSERT(pad < SCTP_ALIGN); 661 if ((fill = dupb(sctps->sctps_pad_mp)) != NULL) { 662 fill->b_wptr += pad; 663 return (fill); 664 } 665 666 /* 667 * The memory saving path of reusing the sctp_pad_mp 668 * fails may be because it has been dupb() too 669 * many times (DBLK_REFMAX). Use the memory consuming 670 * path of allocating the pad mblk. 671 */ 672 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 673 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 674 *(int32_t *)fill->b_rptr = 0; 675 fill->b_wptr += pad; 676 } 677 return (fill); 678 } 679 680 static mblk_t * 681 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 682 { 683 mblk_t *meta; 684 mblk_t *start_mp = NULL; 685 mblk_t *end_mp = NULL; 686 mblk_t *mp, *nmp; 687 mblk_t *fill; 688 sctp_data_hdr_t *sdh; 689 int msglen; 690 int extra; 691 sctp_msg_hdr_t *msg_hdr; 692 sctp_faddr_t *old_fp = NULL; 693 sctp_faddr_t *chunk_fp; 694 sctp_stack_t *sctps = sctp->sctp_sctps; 695 696 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 697 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 698 if (SCTP_IS_MSG_ABANDONED(meta) || 699 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 700 continue; 701 } 702 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 703 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 704 /* 705 * Use the same peer address to do fast 706 * retransmission. If the original peer 707 * address is dead, switch to the current 708 * one. Record the old one so that we 709 * will pick the chunks sent to the old 710 * one for fast retransmission. 711 */ 712 chunk_fp = SCTP_CHUNK_DEST(mp); 713 if (*fp == NULL) { 714 *fp = chunk_fp; 715 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 716 old_fp = *fp; 717 *fp = sctp->sctp_current; 718 } 719 } else if (old_fp == NULL && *fp != chunk_fp) { 720 continue; 721 } else if (old_fp != NULL && 722 old_fp != chunk_fp) { 723 continue; 724 } 725 726 sdh = (sctp_data_hdr_t *)mp->b_rptr; 727 msglen = ntohs(sdh->sdh_len); 728 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 729 extra = SCTP_ALIGN - extra; 730 } 731 732 /* 733 * We still return at least the first message 734 * even if that message cannot fit in as 735 * PMTU may have changed. 736 */ 737 if (*total + msglen + extra > 738 (*fp)->sfa_pmss && start_mp != NULL) { 739 return (start_mp); 740 } 741 if ((nmp = dupmsg(mp)) == NULL) 742 return (start_mp); 743 if (extra > 0) { 744 fill = sctp_get_padding(extra, sctps); 745 if (fill != NULL) { 746 linkb(nmp, fill); 747 } else { 748 return (start_mp); 749 } 750 } 751 BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); 752 BUMP_LOCAL(sctp->sctp_rxtchunks); 753 SCTP_CHUNK_CLEAR_REXMIT(mp); 754 if (start_mp == NULL) { 755 start_mp = nmp; 756 } else { 757 linkb(end_mp, nmp); 758 } 759 end_mp = nmp; 760 *total += msglen + extra; 761 dprint(2, ("sctp_find_fast_rexmit_mblks: " 762 "tsn %x\n", sdh->sdh_tsn)); 763 } 764 } 765 } 766 /* Clear the flag as there is no more message to be fast rexmitted. */ 767 sctp->sctp_chk_fast_rexmit = B_FALSE; 768 return (start_mp); 769 } 770 771 /* A debug function just to make sure that a mblk chain is not broken */ 772 #ifdef DEBUG 773 static boolean_t 774 sctp_verify_chain(mblk_t *head, mblk_t *tail) 775 { 776 mblk_t *mp = head; 777 778 if (head == NULL || tail == NULL) 779 return (B_TRUE); 780 while (mp != NULL) { 781 if (mp == tail) 782 return (B_TRUE); 783 mp = mp->b_next; 784 } 785 return (B_FALSE); 786 } 787 #endif 788 789 /* 790 * Gets the next unsent chunk to transmit. Messages that are abandoned are 791 * skipped. A message can be abandoned if it has a non-zero timetolive and 792 * transmission has not yet started or if it is a partially reliable 793 * message and its time is up (assuming we are PR-SCTP aware). 794 * 'cansend' is used to determine if need to try and chunkify messages from 795 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 796 * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend' 797 * will be set to 0. 798 */ 799 mblk_t * 800 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 801 int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp) 802 { 803 mblk_t *mp1; 804 sctp_msg_hdr_t *msg_hdr; 805 mblk_t *tmp_meta; 806 sctp_faddr_t *fp1; 807 808 ASSERT(error != NULL && mp != NULL); 809 *error = 0; 810 811 ASSERT(sctp->sctp_current != NULL); 812 813 chunkified: 814 while (meta != NULL) { 815 tmp_meta = meta->b_next; 816 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 817 mp1 = meta->b_cont; 818 if (SCTP_IS_MSG_ABANDONED(meta)) 819 goto next_msg; 820 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 821 while (mp1 != NULL) { 822 if (SCTP_CHUNK_CANSEND(mp1)) { 823 *mp = mp1; 824 #ifdef DEBUG 825 ASSERT(sctp_verify_chain( 826 sctp->sctp_xmit_head, meta)); 827 #endif 828 return (meta); 829 } 830 mp1 = mp1->b_next; 831 } 832 goto next_msg; 833 } 834 /* 835 * If we come here and the first chunk is sent, then we 836 * we are PR-SCTP aware, in which case if the cumulative 837 * TSN has moved upto or beyond the first chunk (which 838 * means all the previous messages have been cumulative 839 * SACK'd), then we send a Forward TSN with the last 840 * chunk that was sent in this message. If we can't send 841 * a Forward TSN because previous non-abandoned messages 842 * have not been acked then we will defer the Forward TSN 843 * to sctp_rexmit() or sctp_cumack(). 844 */ 845 if (SCTP_CHUNK_ISSENT(mp1)) { 846 *error = sctp_check_abandoned_msg(sctp, meta); 847 if (*error != 0) { 848 #ifdef DEBUG 849 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 850 sctp->sctp_xmit_tail)); 851 #endif 852 return (NULL); 853 } 854 goto next_msg; 855 } 856 BUMP_LOCAL(sctp->sctp_prsctpdrop); 857 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 858 if (meta->b_prev == NULL) { 859 ASSERT(sctp->sctp_xmit_head == meta); 860 sctp->sctp_xmit_head = tmp_meta; 861 if (sctp->sctp_xmit_tail == meta) 862 sctp->sctp_xmit_tail = tmp_meta; 863 meta->b_next = NULL; 864 if (tmp_meta != NULL) 865 tmp_meta->b_prev = NULL; 866 } else if (meta->b_next == NULL) { 867 if (sctp->sctp_xmit_tail == meta) 868 sctp->sctp_xmit_tail = meta->b_prev; 869 meta->b_prev->b_next = NULL; 870 meta->b_prev = NULL; 871 } else { 872 meta->b_prev->b_next = tmp_meta; 873 tmp_meta->b_prev = meta->b_prev; 874 if (sctp->sctp_xmit_tail == meta) 875 sctp->sctp_xmit_tail = tmp_meta; 876 meta->b_prev = NULL; 877 meta->b_next = NULL; 878 } 879 sctp->sctp_unsent -= msg_hdr->smh_msglen; 880 /* 881 * Update ULP the amount of queued data, which is 882 * sent-unack'ed + unsent. 883 */ 884 if (!SCTP_IS_DETACHED(sctp)) { 885 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 886 sctp->sctp_unacked + sctp->sctp_unsent); 887 } 888 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 889 next_msg: 890 meta = tmp_meta; 891 } 892 /* chunkify, if needed */ 893 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 894 ASSERT(sctp->sctp_unsent > 0); 895 if (fp == NULL) { 896 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 897 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 898 fp = sctp->sctp_current; 899 } else { 900 /* 901 * If user specified destination, try to honor that. 902 */ 903 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 904 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 905 fp1 != fp) { 906 goto chunk_done; 907 } 908 } 909 sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend); 910 if ((meta = sctp->sctp_xmit_tail) == NULL) 911 goto chunk_done; 912 /* 913 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 914 * new chunk(s) to the tail, so we need to skip the 915 * sctp_xmit_tail, which would have already been processed. 916 * This could happen when there is unacked chunks, but 917 * nothing new to send. 918 * When sctp_chunkify() is called when the transmit queue 919 * is empty then we need to start from sctp_xmit_tail. 920 */ 921 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 922 #ifdef DEBUG 923 mp1 = sctp->sctp_xmit_tail->b_cont; 924 while (mp1 != NULL) { 925 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 926 mp1 = mp1->b_next; 927 } 928 #endif 929 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 930 goto chunk_done; 931 } 932 goto chunkified; 933 } 934 chunk_done: 935 #ifdef DEBUG 936 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 937 #endif 938 return (NULL); 939 } 940 941 void 942 sctp_fast_rexmit(sctp_t *sctp) 943 { 944 mblk_t *mp, *head; 945 int pktlen = 0; 946 sctp_faddr_t *fp = NULL; 947 sctp_stack_t *sctps = sctp->sctp_sctps; 948 949 ASSERT(sctp->sctp_xmit_head != NULL); 950 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 951 if (mp == NULL) { 952 SCTP_KSTAT(sctps, sctp_fr_not_found); 953 return; 954 } 955 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 956 freemsg(mp); 957 SCTP_KSTAT(sctps, sctp_fr_add_hdr); 958 return; 959 } 960 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 961 ipha_t *iph = (ipha_t *)head->b_rptr; 962 963 iph->ipha_fragment_offset_and_flags = 0; 964 } 965 966 sctp_set_iplen(sctp, head); 967 sctp_add_sendq(sctp, head); 968 sctp->sctp_active = fp->lastactive = lbolt64; 969 } 970 971 void 972 sctp_output(sctp_t *sctp, uint_t num_pkt) 973 { 974 mblk_t *mp = NULL; 975 mblk_t *nmp; 976 mblk_t *head; 977 mblk_t *meta = sctp->sctp_xmit_tail; 978 mblk_t *fill = NULL; 979 uint16_t chunklen; 980 uint32_t cansend; 981 int32_t seglen; 982 int32_t xtralen; 983 int32_t sacklen; 984 int32_t pad = 0; 985 int32_t pathmax; 986 int extra; 987 int64_t now = lbolt64; 988 sctp_faddr_t *fp; 989 sctp_faddr_t *lfp; 990 sctp_data_hdr_t *sdc; 991 int error; 992 boolean_t notsent = B_TRUE; 993 sctp_stack_t *sctps = sctp->sctp_sctps; 994 995 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 996 sacklen = 0; 997 } else { 998 /* send a SACK chunk */ 999 sacklen = sizeof (sctp_chunk_hdr_t) + 1000 sizeof (sctp_sack_chunk_t) + 1001 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1002 lfp = sctp->sctp_lastdata; 1003 ASSERT(lfp != NULL); 1004 if (lfp->state != SCTP_FADDRS_ALIVE) 1005 lfp = sctp->sctp_current; 1006 } 1007 1008 cansend = sctp->sctp_frwnd; 1009 if (sctp->sctp_unsent < cansend) 1010 cansend = sctp->sctp_unsent; 1011 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1012 sctp->sctp_unacked && 1013 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1014 !sctp->sctp_ndelay) { 1015 head = NULL; 1016 fp = sctp->sctp_current; 1017 goto unsent_data; 1018 } 1019 if (meta != NULL) 1020 mp = meta->b_cont; 1021 while (cansend > 0 && num_pkt-- != 0) { 1022 pad = 0; 1023 1024 /* 1025 * Find first segment eligible for transmit. 1026 */ 1027 while (mp != NULL) { 1028 if (SCTP_CHUNK_CANSEND(mp)) 1029 break; 1030 mp = mp->b_next; 1031 } 1032 if (mp == NULL) { 1033 meta = sctp_get_msg_to_send(sctp, &mp, 1034 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1035 cansend, NULL); 1036 if (error != 0 || meta == NULL) { 1037 head = NULL; 1038 fp = sctp->sctp_current; 1039 goto unsent_data; 1040 } 1041 sctp->sctp_xmit_tail = meta; 1042 } 1043 1044 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1045 seglen = ntohs(sdc->sdh_len); 1046 xtralen = sizeof (*sdc); 1047 chunklen = seglen - xtralen; 1048 1049 /* 1050 * Check rwnd. 1051 */ 1052 if (chunklen > cansend) { 1053 head = NULL; 1054 fp = SCTP_CHUNK_DEST(meta); 1055 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1056 fp = sctp->sctp_current; 1057 goto unsent_data; 1058 } 1059 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1060 extra = SCTP_ALIGN - extra; 1061 1062 /* 1063 * Pick destination address, and check cwnd. 1064 */ 1065 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1066 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1067 /* 1068 * Only include SACK chunk if it can be bundled 1069 * with a data chunk, and sent to sctp_lastdata. 1070 */ 1071 pathmax = lfp->cwnd - lfp->suna; 1072 1073 fp = lfp; 1074 if ((nmp = dupmsg(mp)) == NULL) { 1075 head = NULL; 1076 goto unsent_data; 1077 } 1078 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1079 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1080 &error); 1081 if (head == NULL) { 1082 /* 1083 * If none of the source addresses are 1084 * available (i.e error == EHOSTUNREACH), 1085 * pretend we have sent the data. We will 1086 * eventually time out trying to retramsmit 1087 * the data if the interface never comes up. 1088 * If we have already sent some stuff (i.e., 1089 * notsent is B_FALSE) then we are fine, else 1090 * just mark this packet as sent. 1091 */ 1092 if (notsent && error == EHOSTUNREACH) { 1093 SCTP_CHUNK_SENT(sctp, mp, sdc, 1094 fp, chunklen, meta); 1095 } 1096 freemsg(nmp); 1097 SCTP_KSTAT(sctps, sctp_output_failed); 1098 goto unsent_data; 1099 } 1100 seglen += sacklen; 1101 xtralen += sacklen; 1102 sacklen = 0; 1103 } else { 1104 fp = SCTP_CHUNK_DEST(meta); 1105 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1106 fp = sctp->sctp_current; 1107 /* 1108 * If we haven't sent data to this destination for 1109 * a while, do slow start again. 1110 */ 1111 if (now - fp->lastactive > fp->rto) { 1112 SET_CWND(fp, fp->sfa_pmss, 1113 sctps->sctps_slow_start_after_idle); 1114 } 1115 1116 pathmax = fp->cwnd - fp->suna; 1117 if (seglen + extra > pathmax) { 1118 head = NULL; 1119 goto unsent_data; 1120 } 1121 if ((nmp = dupmsg(mp)) == NULL) { 1122 head = NULL; 1123 goto unsent_data; 1124 } 1125 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1126 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1127 if (head == NULL) { 1128 /* 1129 * If none of the source addresses are 1130 * available (i.e error == EHOSTUNREACH), 1131 * pretend we have sent the data. We will 1132 * eventually time out trying to retramsmit 1133 * the data if the interface never comes up. 1134 * If we have already sent some stuff (i.e., 1135 * notsent is B_FALSE) then we are fine, else 1136 * just mark this packet as sent. 1137 */ 1138 if (notsent && error == EHOSTUNREACH) { 1139 SCTP_CHUNK_SENT(sctp, mp, sdc, 1140 fp, chunklen, meta); 1141 } 1142 freemsg(nmp); 1143 SCTP_KSTAT(sctps, sctp_output_failed); 1144 goto unsent_data; 1145 } 1146 } 1147 fp->lastactive = now; 1148 if (pathmax > fp->sfa_pmss) 1149 pathmax = fp->sfa_pmss; 1150 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1151 mp = mp->b_next; 1152 1153 /* Use this chunk to measure RTT? */ 1154 if (sctp->sctp_out_time == 0) { 1155 sctp->sctp_out_time = now; 1156 sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; 1157 ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); 1158 } 1159 if (extra > 0) { 1160 fill = sctp_get_padding(extra, sctps); 1161 if (fill != NULL) { 1162 linkb(head, fill); 1163 pad = extra; 1164 seglen += extra; 1165 } else { 1166 goto unsent_data; 1167 } 1168 } 1169 /* See if we can bundle more. */ 1170 while (seglen < pathmax) { 1171 int32_t new_len; 1172 int32_t new_xtralen; 1173 1174 while (mp != NULL) { 1175 if (SCTP_CHUNK_CANSEND(mp)) 1176 break; 1177 mp = mp->b_next; 1178 } 1179 if (mp == NULL) { 1180 meta = sctp_get_msg_to_send(sctp, &mp, 1181 meta->b_next, &error, seglen, 1182 (seglen - xtralen) >= cansend ? 0 : 1183 cansend - seglen, fp); 1184 if (error != 0 || meta == NULL) 1185 break; 1186 sctp->sctp_xmit_tail = meta; 1187 } 1188 ASSERT(mp != NULL); 1189 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1190 fp != SCTP_CHUNK_DEST(meta)) { 1191 break; 1192 } 1193 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1194 chunklen = ntohs(sdc->sdh_len); 1195 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1196 extra = SCTP_ALIGN - extra; 1197 1198 new_len = seglen + chunklen; 1199 new_xtralen = xtralen + sizeof (*sdc); 1200 chunklen -= sizeof (*sdc); 1201 1202 if (new_len - new_xtralen > cansend || 1203 new_len + extra > pathmax) { 1204 break; 1205 } 1206 if ((nmp = dupmsg(mp)) == NULL) 1207 break; 1208 if (extra > 0) { 1209 fill = sctp_get_padding(extra, sctps); 1210 if (fill != NULL) { 1211 pad += extra; 1212 new_len += extra; 1213 linkb(nmp, fill); 1214 } else { 1215 freemsg(nmp); 1216 break; 1217 } 1218 } 1219 seglen = new_len; 1220 xtralen = new_xtralen; 1221 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1222 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1223 linkb(head, nmp); 1224 mp = mp->b_next; 1225 } 1226 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1227 ipha_t *iph = (ipha_t *)head->b_rptr; 1228 1229 /* 1230 * Path MTU is different from what we thought it would 1231 * be when we created chunks, or IP headers have grown. 1232 * Need to clear the DF bit. 1233 */ 1234 iph->ipha_fragment_offset_and_flags = 0; 1235 } 1236 /* xmit segment */ 1237 ASSERT(cansend >= seglen - pad - xtralen); 1238 cansend -= (seglen - pad - xtralen); 1239 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1240 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1241 seglen - xtralen, ntohl(sdc->sdh_tsn), 1242 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1243 cansend, sctp->sctp_lastack_rxd)); 1244 sctp_set_iplen(sctp, head); 1245 sctp_add_sendq(sctp, head); 1246 /* arm rto timer (if not set) */ 1247 if (!fp->timer_running) 1248 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1249 notsent = B_FALSE; 1250 } 1251 sctp->sctp_active = now; 1252 return; 1253 unsent_data: 1254 /* arm persist timer (if rto timer not set) */ 1255 if (!fp->timer_running) 1256 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1257 if (head != NULL) 1258 freemsg(head); 1259 } 1260 1261 /* 1262 * The following two functions initialize and destroy the cache 1263 * associated with the sets used for PR-SCTP. 1264 */ 1265 void 1266 sctp_ftsn_sets_init(void) 1267 { 1268 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1269 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1270 NULL, 0); 1271 } 1272 1273 void 1274 sctp_ftsn_sets_fini(void) 1275 { 1276 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1277 } 1278 1279 1280 /* Free PR-SCTP sets */ 1281 void 1282 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1283 { 1284 sctp_ftsn_set_t *p; 1285 1286 while (s != NULL) { 1287 p = s->next; 1288 s->next = NULL; 1289 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1290 s = p; 1291 } 1292 } 1293 1294 /* 1295 * Given a message meta block, meta, this routine creates or modifies 1296 * the set that will be used to generate a Forward TSN chunk. If the 1297 * entry for stream id, sid, for this message already exists, the 1298 * sequence number, ssn, is updated if it is greater than the existing 1299 * one. If an entry for this sid does not exist, one is created if 1300 * the size does not exceed fp->sfa_pmss. We return false in case 1301 * or an error. 1302 */ 1303 boolean_t 1304 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1305 uint_t *nsets, uint32_t *slen) 1306 { 1307 sctp_ftsn_set_t *p; 1308 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1309 uint16_t sid = htons(msg_hdr->smh_sid); 1310 /* msg_hdr->smh_ssn is already in NBO */ 1311 uint16_t ssn = msg_hdr->smh_ssn; 1312 1313 ASSERT(s != NULL && nsets != NULL); 1314 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1315 1316 if (*s == NULL) { 1317 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1318 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1319 if (*s == NULL) 1320 return (B_FALSE); 1321 (*s)->ftsn_entries.ftsn_sid = sid; 1322 (*s)->ftsn_entries.ftsn_ssn = ssn; 1323 (*s)->next = NULL; 1324 *nsets = 1; 1325 *slen += sizeof (uint32_t); 1326 return (B_TRUE); 1327 } 1328 for (p = *s; p->next != NULL; p = p->next) { 1329 if (p->ftsn_entries.ftsn_sid == sid) { 1330 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1331 p->ftsn_entries.ftsn_ssn = ssn; 1332 return (B_TRUE); 1333 } 1334 } 1335 /* the last one */ 1336 if (p->ftsn_entries.ftsn_sid == sid) { 1337 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1338 p->ftsn_entries.ftsn_ssn = ssn; 1339 } else { 1340 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1341 return (B_FALSE); 1342 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1343 KM_NOSLEEP); 1344 if (p->next == NULL) 1345 return (B_FALSE); 1346 p = p->next; 1347 p->ftsn_entries.ftsn_sid = sid; 1348 p->ftsn_entries.ftsn_ssn = ssn; 1349 p->next = NULL; 1350 (*nsets)++; 1351 *slen += sizeof (uint32_t); 1352 } 1353 return (B_TRUE); 1354 } 1355 1356 /* 1357 * Given a set of stream id - sequence number pairs, this routing creates 1358 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1359 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1360 * will add the IP/SCTP header. 1361 */ 1362 mblk_t * 1363 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1364 uint_t nsets, uint32_t seglen) 1365 { 1366 mblk_t *ftsn_mp; 1367 sctp_chunk_hdr_t *ch_hdr; 1368 uint32_t *advtsn; 1369 uint16_t schlen; 1370 size_t xtralen; 1371 ftsn_entry_t *ftsn_entry; 1372 sctp_stack_t *sctps = sctp->sctp_sctps; 1373 1374 seglen += sizeof (sctp_chunk_hdr_t); 1375 if (fp->isv4) 1376 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra; 1377 else 1378 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra; 1379 ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp)); 1380 if (ftsn_mp == NULL) 1381 return (NULL); 1382 ftsn_mp->b_rptr += xtralen; 1383 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1384 1385 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1386 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1387 ch_hdr->sch_flags = 0; 1388 /* 1389 * The cast here should not be an issue since seglen is 1390 * the length of the Forward TSN chunk. 1391 */ 1392 schlen = (uint16_t)seglen; 1393 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1394 1395 advtsn = (uint32_t *)(ch_hdr + 1); 1396 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1397 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1398 while (nsets > 0) { 1399 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1400 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1401 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1402 ftsn_entry++; 1403 sets = sets->next; 1404 nsets--; 1405 } 1406 return (ftsn_mp); 1407 } 1408 1409 /* 1410 * Given a starting message, the routine steps through all the 1411 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1412 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1413 * chunk. All the messages, that have chunks that are included in the 1414 * ftsn sets, are flagged abandonded. If a message is partially sent 1415 * and is deemed abandoned, all remaining unsent chunks are marked 1416 * abandoned and are deducted from sctp_unsent. 1417 */ 1418 void 1419 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1420 sctp_faddr_t *fp, uint32_t *seglen) 1421 { 1422 mblk_t *mp1 = mp; 1423 mblk_t *mp_head = mp; 1424 mblk_t *meta_head = meta; 1425 mblk_t *head; 1426 sctp_ftsn_set_t *sets = NULL; 1427 uint_t nsets = 0; 1428 uint16_t clen; 1429 sctp_data_hdr_t *sdc; 1430 uint32_t sacklen; 1431 uint32_t adv_pap = sctp->sctp_adv_pap; 1432 uint32_t unsent = 0; 1433 boolean_t ubit; 1434 sctp_stack_t *sctps = sctp->sctp_sctps; 1435 1436 *seglen = sizeof (uint32_t); 1437 1438 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1439 while (meta != NULL && 1440 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1441 /* 1442 * Skip adding FTSN sets for un-ordered messages as they do 1443 * not have SSNs. 1444 */ 1445 ubit = SCTP_DATA_GET_UBIT(sdc); 1446 if (!ubit && 1447 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1448 meta = NULL; 1449 sctp->sctp_adv_pap = adv_pap; 1450 goto ftsn_done; 1451 } 1452 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1453 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1454 adv_pap = ntohl(sdc->sdh_tsn); 1455 mp1 = mp1->b_next; 1456 } 1457 meta = meta->b_next; 1458 if (meta != NULL) { 1459 mp1 = meta->b_cont; 1460 if (!SCTP_CHUNK_ISSENT(mp1)) 1461 break; 1462 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1463 } 1464 } 1465 ftsn_done: 1466 /* 1467 * Can't compare with sets == NULL, since we don't add any 1468 * sets for un-ordered messages. 1469 */ 1470 if (meta == meta_head) 1471 return; 1472 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1473 sctp_free_ftsn_set(sets); 1474 if (*nmp == NULL) 1475 return; 1476 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1477 sacklen = 0; 1478 } else { 1479 sacklen = sizeof (sctp_chunk_hdr_t) + 1480 sizeof (sctp_sack_chunk_t) + 1481 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1482 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1483 /* piggybacked SACK doesn't fit */ 1484 sacklen = 0; 1485 } else { 1486 fp = sctp->sctp_lastdata; 1487 } 1488 } 1489 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1490 if (head == NULL) { 1491 freemsg(*nmp); 1492 *nmp = NULL; 1493 SCTP_KSTAT(sctps, sctp_send_ftsn_failed); 1494 return; 1495 } 1496 *seglen += sacklen; 1497 *nmp = head; 1498 1499 /* 1500 * XXXNeed to optimise this, the reason it is done here is so 1501 * that we don't have to undo in case of failure. 1502 */ 1503 mp1 = mp_head; 1504 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1505 while (meta_head != NULL && 1506 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1507 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1508 SCTP_MSG_SET_ABANDONED(meta_head); 1509 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1510 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1511 if (!SCTP_CHUNK_ISACKED(mp1)) { 1512 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1513 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1514 meta_head); 1515 } 1516 mp1 = mp1->b_next; 1517 } 1518 while (mp1 != NULL) { 1519 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1520 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1521 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1522 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1523 SCTP_ABANDON_CHUNK(mp1); 1524 } 1525 mp1 = mp1->b_next; 1526 } 1527 meta_head = meta_head->b_next; 1528 if (meta_head != NULL) { 1529 mp1 = meta_head->b_cont; 1530 if (!SCTP_CHUNK_ISSENT(mp1)) 1531 break; 1532 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1533 } 1534 } 1535 if (unsent > 0) { 1536 ASSERT(sctp->sctp_unsent >= unsent); 1537 sctp->sctp_unsent -= unsent; 1538 /* 1539 * Update ULP the amount of queued data, which is 1540 * sent-unack'ed + unsent. 1541 */ 1542 if (!SCTP_IS_DETACHED(sctp)) { 1543 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 1544 sctp->sctp_unacked + sctp->sctp_unsent); 1545 } 1546 } 1547 } 1548 1549 /* 1550 * This function steps through messages starting at meta and checks if 1551 * the message is abandoned. It stops when it hits an unsent chunk or 1552 * a message that has all its chunk acked. This is the only place 1553 * where the sctp_adv_pap is moved forward to indicated abandoned 1554 * messages. 1555 */ 1556 void 1557 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1558 { 1559 uint32_t tsn = sctp->sctp_adv_pap; 1560 sctp_data_hdr_t *sdc; 1561 sctp_msg_hdr_t *msg_hdr; 1562 1563 ASSERT(mp != NULL); 1564 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1565 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1566 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1567 if (!SCTP_IS_MSG_ABANDONED(meta) && 1568 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1569 return; 1570 } 1571 while (meta != NULL) { 1572 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1573 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1574 tsn = ntohl(sdc->sdh_tsn); 1575 mp = mp->b_next; 1576 } 1577 if (mp != NULL) 1578 break; 1579 /* 1580 * We continue checking for successive messages only if there 1581 * is a chunk marked for retransmission. Else, we might 1582 * end up sending FTSN prematurely for chunks that have been 1583 * sent, but not yet acked. 1584 */ 1585 if ((meta = meta->b_next) != NULL) { 1586 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1587 if (!SCTP_IS_MSG_ABANDONED(meta) && 1588 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1589 break; 1590 } 1591 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1592 if (!SCTP_CHUNK_ISSENT(mp)) { 1593 sctp->sctp_adv_pap = tsn; 1594 return; 1595 } 1596 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1597 break; 1598 } 1599 if (mp == NULL) 1600 break; 1601 } 1602 } 1603 sctp->sctp_adv_pap = tsn; 1604 } 1605 1606 1607 /* 1608 * Determine if we should bundle a data chunk with the chunk being 1609 * retransmitted. We bundle if 1610 * 1611 * - the chunk is sent to the same destination and unack'ed. 1612 * 1613 * OR 1614 * 1615 * - the chunk is unsent, i.e. new data. 1616 */ 1617 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1618 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1619 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1620 !SCTP_CHUNK_ISACKED(mp))) || \ 1621 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1622 SCTP_CHUNK_FLAG_SENT))) 1623 1624 /* 1625 * Retransmit first segment which hasn't been acked with cumtsn or send 1626 * a Forward TSN chunk, if appropriate. 1627 */ 1628 void 1629 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1630 { 1631 mblk_t *mp; 1632 mblk_t *nmp = NULL; 1633 mblk_t *head; 1634 mblk_t *meta = sctp->sctp_xmit_head; 1635 mblk_t *fill; 1636 uint32_t seglen = 0; 1637 uint32_t sacklen; 1638 uint16_t chunklen; 1639 int extra; 1640 sctp_data_hdr_t *sdc; 1641 sctp_faddr_t *fp; 1642 uint32_t adv_pap = sctp->sctp_adv_pap; 1643 boolean_t do_ftsn = B_FALSE; 1644 boolean_t ftsn_check = B_TRUE; 1645 uint32_t first_ua_tsn; 1646 sctp_msg_hdr_t *mhdr; 1647 sctp_stack_t *sctps = sctp->sctp_sctps; 1648 1649 while (meta != NULL) { 1650 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1651 uint32_t tsn; 1652 1653 if (!SCTP_CHUNK_ISSENT(mp)) 1654 goto window_probe; 1655 /* 1656 * We break in the following cases - 1657 * 1658 * if the advanced peer ack point includes the next 1659 * chunk to be retransmited - possibly the Forward 1660 * TSN was lost. 1661 * 1662 * if we are PRSCTP aware and the next chunk to be 1663 * retransmitted is now abandoned 1664 * 1665 * if the next chunk to be retransmitted is for 1666 * the dest on which the timer went off. (this 1667 * message is not abandoned). 1668 * 1669 * We check for Forward TSN only for the first 1670 * eligible chunk to be retransmitted. The reason 1671 * being if the first eligible chunk is skipped (say 1672 * it was sent to a destination other than oldfp) 1673 * then we cannot advance the cum TSN via Forward 1674 * TSN chunk. 1675 * 1676 * Also, ftsn_check is B_TRUE only for the first 1677 * eligible chunk, it will be B_FALSE for all 1678 * subsequent candidate messages for retransmission. 1679 */ 1680 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1681 tsn = ntohl(sdc->sdh_tsn); 1682 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1683 if (sctp->sctp_prsctp_aware && ftsn_check) { 1684 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1685 ASSERT(sctp->sctp_prsctp_aware); 1686 do_ftsn = B_TRUE; 1687 goto out; 1688 } else { 1689 sctp_check_adv_ack_pt(sctp, 1690 meta, mp); 1691 if (SEQ_GT(sctp->sctp_adv_pap, 1692 adv_pap)) { 1693 do_ftsn = B_TRUE; 1694 goto out; 1695 } 1696 } 1697 ftsn_check = B_FALSE; 1698 } 1699 if (SCTP_CHUNK_DEST(mp) == oldfp) 1700 goto out; 1701 } 1702 } 1703 meta = meta->b_next; 1704 if (meta != NULL && sctp->sctp_prsctp_aware) { 1705 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1706 1707 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1708 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1709 meta = meta->b_next; 1710 } 1711 } 1712 } 1713 window_probe: 1714 /* 1715 * Retransmit fired for a destination which didn't have 1716 * any unacked data pending. 1717 */ 1718 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1719 /* 1720 * Send a window probe. Inflate frwnd to allow 1721 * sending one segment. 1722 */ 1723 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1724 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1725 1726 /* next TSN to send */ 1727 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1728 1729 /* 1730 * The above sctp_frwnd adjustment is coarse. The "changed" 1731 * sctp_frwnd may allow us to send more than 1 packet. So 1732 * tell sctp_output() to send only 1 packet. 1733 */ 1734 sctp_output(sctp, 1); 1735 1736 /* Last sent TSN */ 1737 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1738 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1739 sctp->sctp_zero_win_probe = B_TRUE; 1740 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1741 } 1742 return; 1743 out: 1744 /* 1745 * After a time out, assume that everything has left the network. So 1746 * we can clear rxt_unacked for the original peer address. 1747 */ 1748 oldfp->rxt_unacked = 0; 1749 1750 /* 1751 * If we were probing for zero window, don't adjust retransmission 1752 * variables, but the timer is still backed off. 1753 */ 1754 if (sctp->sctp_zero_win_probe) { 1755 mblk_t *pkt; 1756 uint_t pkt_len; 1757 1758 /* 1759 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1760 * and sctp_rxt_maxtsn will specify the ZWP packet. 1761 */ 1762 fp = oldfp; 1763 if (oldfp->state != SCTP_FADDRS_ALIVE) 1764 fp = sctp_rotate_faddr(sctp, oldfp); 1765 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1766 if (pkt != NULL) { 1767 ASSERT(pkt_len <= fp->sfa_pmss); 1768 sctp_set_iplen(sctp, pkt); 1769 sctp_add_sendq(sctp, pkt); 1770 } else { 1771 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 1772 } 1773 1774 /* 1775 * The strikes will be clear by sctp_faddr_alive() when the 1776 * other side sends us an ack. 1777 */ 1778 oldfp->strikes++; 1779 sctp->sctp_strikes++; 1780 1781 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1782 if (oldfp != fp && oldfp->suna != 0) 1783 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1784 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1785 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1786 return; 1787 } 1788 1789 /* 1790 * Enter slowstart for this destination 1791 */ 1792 oldfp->ssthresh = oldfp->cwnd / 2; 1793 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1794 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1795 oldfp->cwnd = oldfp->sfa_pmss; 1796 oldfp->pba = 0; 1797 fp = sctp_rotate_faddr(sctp, oldfp); 1798 ASSERT(fp != NULL); 1799 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1800 1801 first_ua_tsn = ntohl(sdc->sdh_tsn); 1802 if (do_ftsn) { 1803 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1804 if (nmp == NULL) { 1805 sctp->sctp_adv_pap = adv_pap; 1806 goto restart_timer; 1807 } 1808 head = nmp; 1809 /* 1810 * Move to the next unabandoned chunk. XXXCheck if meta will 1811 * always be marked abandoned. 1812 */ 1813 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1814 meta = meta->b_next; 1815 if (meta != NULL) 1816 mp = mp->b_cont; 1817 else 1818 mp = NULL; 1819 goto try_bundle; 1820 } 1821 seglen = ntohs(sdc->sdh_len); 1822 chunklen = seglen - sizeof (*sdc); 1823 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1824 extra = SCTP_ALIGN - extra; 1825 1826 /* Find out if we need to piggyback SACK. */ 1827 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1828 sacklen = 0; 1829 } else { 1830 sacklen = sizeof (sctp_chunk_hdr_t) + 1831 sizeof (sctp_sack_chunk_t) + 1832 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1833 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1834 /* piggybacked SACK doesn't fit */ 1835 sacklen = 0; 1836 } else { 1837 /* 1838 * OK, we have room to send SACK back. But we 1839 * should send it back to the last fp where we 1840 * receive data from, unless sctp_lastdata equals 1841 * oldfp, then we should probably not send it 1842 * back to that fp. Also we should check that 1843 * the fp is alive. 1844 */ 1845 if (sctp->sctp_lastdata != oldfp && 1846 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1847 fp = sctp->sctp_lastdata; 1848 } 1849 } 1850 } 1851 1852 /* 1853 * Cancel RTT measurement if the retransmitted TSN is before the 1854 * TSN used for timimg. 1855 */ 1856 if (sctp->sctp_out_time != 0 && 1857 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1858 sctp->sctp_out_time = 0; 1859 } 1860 /* Clear the counter as the RTT calculation may be off. */ 1861 fp->rtt_updates = 0; 1862 oldfp->rtt_updates = 0; 1863 1864 /* 1865 * After a timeout, we should change the current faddr so that 1866 * new chunks will be sent to the alternate address. 1867 */ 1868 sctp_set_faddr_current(sctp, fp); 1869 1870 nmp = dupmsg(mp); 1871 if (nmp == NULL) 1872 goto restart_timer; 1873 if (extra > 0) { 1874 fill = sctp_get_padding(extra, sctps); 1875 if (fill != NULL) { 1876 linkb(nmp, fill); 1877 seglen += extra; 1878 } else { 1879 freemsg(nmp); 1880 goto restart_timer; 1881 } 1882 } 1883 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1884 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1885 if (head == NULL) { 1886 freemsg(nmp); 1887 SCTP_KSTAT(sctps, sctp_rexmit_failed); 1888 goto restart_timer; 1889 } 1890 seglen += sacklen; 1891 1892 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1893 1894 mp = mp->b_next; 1895 1896 try_bundle: 1897 /* We can at least and at most send 1 packet at timeout. */ 1898 while (seglen < fp->sfa_pmss) { 1899 int32_t new_len; 1900 1901 /* Go through the list to find more chunks to be bundled. */ 1902 while (mp != NULL) { 1903 /* Check if the chunk can be bundled. */ 1904 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1905 break; 1906 mp = mp->b_next; 1907 } 1908 /* Go to the next message. */ 1909 if (mp == NULL) { 1910 for (meta = meta->b_next; meta != NULL; 1911 meta = meta->b_next) { 1912 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1913 1914 if (SCTP_IS_MSG_ABANDONED(meta) || 1915 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1916 sctp)) { 1917 continue; 1918 } 1919 1920 mp = meta->b_cont; 1921 goto try_bundle; 1922 } 1923 /* No more chunk to be bundled. */ 1924 break; 1925 } 1926 1927 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1928 new_len = ntohs(sdc->sdh_len); 1929 chunklen = new_len - sizeof (*sdc); 1930 1931 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1932 extra = SCTP_ALIGN - extra; 1933 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1934 break; 1935 if ((nmp = dupmsg(mp)) == NULL) 1936 break; 1937 1938 if (extra > 0) { 1939 fill = sctp_get_padding(extra, sctps); 1940 if (fill != NULL) { 1941 linkb(nmp, fill); 1942 } else { 1943 freemsg(nmp); 1944 break; 1945 } 1946 } 1947 linkb(head, nmp); 1948 1949 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1950 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1951 1952 seglen = new_len; 1953 mp = mp->b_next; 1954 } 1955 done_bundle: 1956 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1957 ipha_t *iph = (ipha_t *)head->b_rptr; 1958 1959 /* 1960 * Path MTU is different from path we thought it would 1961 * be when we created chunks, or IP headers have grown. 1962 * Need to clear the DF bit. 1963 */ 1964 iph->ipha_fragment_offset_and_flags = 0; 1965 } 1966 fp->rxt_unacked += seglen; 1967 1968 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 1969 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 1970 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 1971 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 1972 1973 sctp->sctp_rexmitting = B_TRUE; 1974 sctp->sctp_rxt_nxttsn = first_ua_tsn; 1975 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1976 sctp_set_iplen(sctp, head); 1977 sctp_add_sendq(sctp, head); 1978 1979 /* 1980 * Restart the oldfp timer with exponential backoff and 1981 * the new fp timer for the retransmitted chunks. 1982 */ 1983 restart_timer: 1984 oldfp->strikes++; 1985 sctp->sctp_strikes++; 1986 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1987 if (oldfp->suna != 0) 1988 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 1989 sctp->sctp_active = lbolt64; 1990 1991 /* 1992 * Should we restart the timer of the new fp? If there is 1993 * outstanding data to the new fp, the timer should be 1994 * running already. So restarting it means that the timer 1995 * will fire later for those outstanding data. But if 1996 * we don't restart it, the timer will fire too early for the 1997 * just retransmitted chunks to the new fp. The reason is that we 1998 * don't keep a timestamp on when a chunk is retransmitted. 1999 * So when the timer fires, it will just search for the 2000 * chunk with the earliest TSN sent to new fp. This probably 2001 * is the chunk we just retransmitted. So for now, let's 2002 * be conservative and restart the timer of the new fp. 2003 */ 2004 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2005 } 2006 2007 /* 2008 * The SCTP write put procedure called from IP. 2009 */ 2010 void 2011 sctp_wput(queue_t *q, mblk_t *mp) 2012 { 2013 uchar_t *rptr; 2014 t_scalar_t type; 2015 2016 switch (mp->b_datap->db_type) { 2017 case M_IOCTL: 2018 sctp_wput_ioctl(q, mp); 2019 break; 2020 case M_DATA: 2021 /* Should be handled in sctp_output() */ 2022 ASSERT(0); 2023 freemsg(mp); 2024 break; 2025 case M_PROTO: 2026 case M_PCPROTO: 2027 rptr = mp->b_rptr; 2028 if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) { 2029 type = ((union T_primitives *)rptr)->type; 2030 /* 2031 * There is no "standard" way on how to respond 2032 * to T_CAPABILITY_REQ if a module does not 2033 * understand it. And the current TI mod 2034 * has problems handling an error ack. So we 2035 * catch the request here and reply with a response 2036 * which the TI mod knows how to respond to. 2037 */ 2038 switch (type) { 2039 case T_CAPABILITY_REQ: 2040 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 2041 break; 2042 default: 2043 if ((mp = mi_tpi_err_ack_alloc(mp, 2044 TNOTSUPPORT, 0)) != NULL) { 2045 qreply(q, mp); 2046 return; 2047 } 2048 } 2049 } 2050 /* FALLTHRU */ 2051 default: 2052 freemsg(mp); 2053 return; 2054 } 2055 } 2056 2057 /* 2058 * This function is called by sctp_ss_rexmit() to create a packet 2059 * to be retransmitted to the given fp. The given meta and mp 2060 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2061 * first chunk to be retransmitted. This is also called when we want 2062 * to retransmit a zero window probe from sctp_rexmit() or when we 2063 * want to retransmit the zero window probe after the window has 2064 * opened from sctp_got_sack(). 2065 */ 2066 mblk_t * 2067 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2068 uint_t *packet_len) 2069 { 2070 uint32_t seglen = 0; 2071 uint16_t chunklen; 2072 int extra; 2073 mblk_t *nmp; 2074 mblk_t *head; 2075 mblk_t *fill; 2076 sctp_data_hdr_t *sdc; 2077 sctp_msg_hdr_t *mhdr; 2078 sctp_stack_t *sctps = sctp->sctp_sctps; 2079 2080 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2081 seglen = ntohs(sdc->sdh_len); 2082 chunklen = seglen - sizeof (*sdc); 2083 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2084 extra = SCTP_ALIGN - extra; 2085 2086 nmp = dupmsg(*mp); 2087 if (nmp == NULL) 2088 return (NULL); 2089 if (extra > 0) { 2090 fill = sctp_get_padding(extra, sctps); 2091 if (fill != NULL) { 2092 linkb(nmp, fill); 2093 seglen += extra; 2094 } else { 2095 freemsg(nmp); 2096 return (NULL); 2097 } 2098 } 2099 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2100 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2101 if (head == NULL) { 2102 freemsg(nmp); 2103 return (NULL); 2104 } 2105 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2106 /* 2107 * Don't update the TSN if we are doing a Zero Win Probe. 2108 */ 2109 if (!sctp->sctp_zero_win_probe) 2110 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2111 *mp = (*mp)->b_next; 2112 2113 try_bundle: 2114 while (seglen < fp->sfa_pmss) { 2115 int32_t new_len; 2116 2117 /* 2118 * Go through the list to find more chunks to be bundled. 2119 * We should only retransmit sent by unack'ed chunks. Since 2120 * they were sent before, the peer's receive window should 2121 * be able to receive them. 2122 */ 2123 while (*mp != NULL) { 2124 /* Check if the chunk can be bundled. */ 2125 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2126 break; 2127 *mp = (*mp)->b_next; 2128 } 2129 /* Go to the next message. */ 2130 if (*mp == NULL) { 2131 for (*meta = (*meta)->b_next; *meta != NULL; 2132 *meta = (*meta)->b_next) { 2133 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2134 2135 if (SCTP_IS_MSG_ABANDONED(*meta) || 2136 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2137 sctp)) { 2138 continue; 2139 } 2140 2141 *mp = (*meta)->b_cont; 2142 goto try_bundle; 2143 } 2144 /* No more chunk to be bundled. */ 2145 break; 2146 } 2147 2148 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2149 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2150 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2151 break; 2152 new_len = ntohs(sdc->sdh_len); 2153 chunklen = new_len - sizeof (*sdc); 2154 2155 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2156 extra = SCTP_ALIGN - extra; 2157 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2158 break; 2159 if ((nmp = dupmsg(*mp)) == NULL) 2160 break; 2161 2162 if (extra > 0) { 2163 fill = sctp_get_padding(extra, sctps); 2164 if (fill != NULL) { 2165 linkb(nmp, fill); 2166 } else { 2167 freemsg(nmp); 2168 break; 2169 } 2170 } 2171 linkb(head, nmp); 2172 2173 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2174 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2175 /* 2176 * Don't update the TSN if we are doing a Zero Win Probe. 2177 */ 2178 if (!sctp->sctp_zero_win_probe) 2179 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2180 2181 seglen = new_len; 2182 *mp = (*mp)->b_next; 2183 } 2184 *packet_len = seglen; 2185 fp->rxt_unacked += seglen; 2186 return (head); 2187 } 2188 2189 /* 2190 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2191 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2192 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2193 * SACK. We retransmit unacked chunks without having to wait for another 2194 * timeout. The rationale is that the SACK should not be "partial" if all the 2195 * lost chunks have been retransmitted. Since the SACK is "partial," 2196 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2197 * be missing. It is better for us to retransmit them now instead 2198 * of waiting for a timeout. 2199 */ 2200 void 2201 sctp_ss_rexmit(sctp_t *sctp) 2202 { 2203 mblk_t *meta; 2204 mblk_t *mp; 2205 mblk_t *pkt; 2206 sctp_faddr_t *fp; 2207 uint_t pkt_len; 2208 uint32_t tot_wnd; 2209 sctp_data_hdr_t *sdc; 2210 int burst; 2211 sctp_stack_t *sctps = sctp->sctp_sctps; 2212 2213 ASSERT(!sctp->sctp_zero_win_probe); 2214 2215 /* 2216 * If the last cum ack is smaller than what we have just 2217 * retransmitted, simply return. 2218 */ 2219 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2220 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2221 else 2222 return; 2223 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2224 2225 /* 2226 * After a timer fires, sctp_current should be set to the new 2227 * fp where the retransmitted chunks are sent. 2228 */ 2229 fp = sctp->sctp_current; 2230 2231 /* 2232 * Since we are retransmitting, we only need to use cwnd to determine 2233 * how much we can send as we were allowed (by peer's receive window) 2234 * to send those retransmitted chunks previously when they are first 2235 * sent. If we record how much we have retransmitted but 2236 * unacknowledged using rxt_unacked, then the amount we can now send 2237 * is equal to cwnd minus rxt_unacked. 2238 * 2239 * The field rxt_unacked is incremented when we retransmit a packet 2240 * and decremented when we got a SACK acknowledging something. And 2241 * it is reset when the retransmission timer fires as we assume that 2242 * all packets have left the network after a timeout. If this 2243 * assumption is not true, it means that after a timeout, we can 2244 * get a SACK acknowledging more than rxt_unacked (its value only 2245 * contains what is retransmitted when the timer fires). So 2246 * rxt_unacked will become very big (it is an unsiged int so going 2247 * negative means that the value is huge). This is the reason we 2248 * always send at least 1 MSS bytes. 2249 * 2250 * The reason why we do not have an accurate count is that we 2251 * only know how many packets are outstanding (using the TSN numbers). 2252 * But we do not know how many bytes those packets contain. To 2253 * have an accurate count, we need to walk through the send list. 2254 * As it is not really important to have an accurate count during 2255 * retransmission, we skip this walk to save some time. This should 2256 * not make the retransmission too aggressive to cause congestion. 2257 */ 2258 if (fp->cwnd <= fp->rxt_unacked) 2259 tot_wnd = fp->sfa_pmss; 2260 else 2261 tot_wnd = fp->cwnd - fp->rxt_unacked; 2262 2263 /* Find the first unack'ed chunk */ 2264 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2265 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2266 2267 if (SCTP_IS_MSG_ABANDONED(meta) || 2268 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2269 continue; 2270 } 2271 2272 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2273 /* Again, this may not be possible */ 2274 if (!SCTP_CHUNK_ISSENT(mp)) 2275 return; 2276 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2277 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2278 goto found_msg; 2279 } 2280 } 2281 2282 /* Everything is abandoned... */ 2283 return; 2284 2285 found_msg: 2286 if (!fp->timer_running) 2287 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2288 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2289 if (pkt == NULL) { 2290 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 2291 return; 2292 } 2293 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2294 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2295 2296 /* 2297 * Path MTU is different from path we thought it would 2298 * be when we created chunks, or IP headers have grown. 2299 * Need to clear the DF bit. 2300 */ 2301 iph->ipha_fragment_offset_and_flags = 0; 2302 } 2303 sctp_set_iplen(sctp, pkt); 2304 sctp_add_sendq(sctp, pkt); 2305 2306 /* Check and see if there is more chunk to be retransmitted. */ 2307 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2308 meta == NULL) 2309 return; 2310 if (mp == NULL) 2311 meta = meta->b_next; 2312 if (meta == NULL) 2313 return; 2314 2315 /* Retransmit another packet if the window allows. */ 2316 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1; 2317 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2318 if (mp == NULL) 2319 mp = meta->b_cont; 2320 for (; mp != NULL; mp = mp->b_next) { 2321 /* Again, this may not be possible */ 2322 if (!SCTP_CHUNK_ISSENT(mp)) 2323 return; 2324 if (!SCTP_CHUNK_ISACKED(mp)) 2325 goto found_msg; 2326 } 2327 } 2328 } 2329