1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #define _SUN_TPI_VERSION 2 34 #include <sys/tihdr.h> 35 #include <sys/socket.h> 36 #include <sys/stropts.h> 37 #include <sys/strsun.h> 38 #include <sys/strsubr.h> 39 #include <sys/socketvar.h> 40 /* swilly code in sys/socketvar.h turns off DEBUG */ 41 #ifdef __lint 42 #define DEBUG 43 #endif 44 45 #include <inet/common.h> 46 #include <inet/mi.h> 47 #include <inet/ip.h> 48 #include <inet/ip6.h> 49 #include <inet/sctp_ip.h> 50 #include <inet/ipclassifier.h> 51 52 /* 53 * PR-SCTP comments. 54 * 55 * A message can expire before it gets to the transmit list (i.e. it is still 56 * in the unsent list - unchunked), after it gets to the transmit list, but 57 * before transmission has actually started, or after transmission has begun. 58 * Accordingly, we check for the status of a message in sctp_chunkify() when 59 * the message is being transferred from the unsent list to the transmit list; 60 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 61 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 62 * When we nuke a message in sctp_chunkify(), all we need to do is take it 63 * out of the unsent list and update sctp_unsent; when a message is deemed 64 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 65 * list, update sctp_unsent IFF transmission for the message has not yet begun 66 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 67 * message has started, then we cannot just take it out of the list, we need 68 * to send Forward TSN chunk to the peer so that the peer can clear its 69 * fragment list for this message. However, we cannot just send the Forward 70 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 71 * messages preceeding this abandoned message. So, we send a Forward TSN 72 * IFF all messages prior to this abandoned message has been SACKd, if not 73 * we defer sending the Forward TSN to sctp_cumack(), which will check for 74 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 75 * sctp_rexmit() when we check for retransmissions, we need to determine if 76 * the advanced peer ack point can be moved ahead, and if so, send a Forward 77 * TSN to the peer instead of retransmitting the chunk. Note that when 78 * we send a Forward TSN for a message, there may be yet unsent chunks for 79 * this message; we need to mark all such chunks as abandoned, so that 80 * sctp_cumack() can take the message out of the transmit list, additionally 81 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 82 * decremented when a message/chunk is deemed abandoned), sockfs needs to 83 * be notified so that it can adjust its idea of the queued message. 84 */ 85 86 #include "sctp_impl.h" 87 88 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 89 90 /* Padding mblk for SCTP chunks. */ 91 mblk_t *sctp_pad_mp; 92 93 #ifdef DEBUG 94 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 95 #endif 96 97 /* 98 * Called to allocate a header mblk when sending data to SCTP. 99 * Data will follow in b_cont of this mblk. 100 */ 101 mblk_t * 102 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 103 int flags) 104 { 105 mblk_t *mp; 106 struct T_unitdata_req *tudr; 107 size_t size; 108 int error; 109 110 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 111 size = MAX(size, sizeof (sctp_msg_hdr_t)); 112 if (flags & SCTP_CAN_BLOCK) { 113 mp = allocb_wait(size, BPRI_MED, 0, &error); 114 } else { 115 mp = allocb(size, BPRI_MED); 116 } 117 if (mp) { 118 tudr = (struct T_unitdata_req *)mp->b_rptr; 119 tudr->PRIM_type = T_UNITDATA_REQ; 120 tudr->DEST_length = nlen; 121 tudr->DEST_offset = sizeof (*tudr); 122 tudr->OPT_length = clen; 123 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 124 _TPI_ALIGN_TOPT(nlen)); 125 if (nlen > 0) 126 bcopy(name, tudr + 1, nlen); 127 if (clen > 0) 128 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 129 mp->b_wptr += (tudr ->OPT_offset + clen); 130 mp->b_datap->db_type = M_PROTO; 131 } 132 return (mp); 133 } 134 135 /*ARGSUSED2*/ 136 int 137 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 138 { 139 sctp_faddr_t *fp = NULL; 140 struct T_unitdata_req *tudr; 141 int error = 0; 142 mblk_t *mproto = mp; 143 in6_addr_t *addr; 144 in6_addr_t tmpaddr; 145 uint16_t sid = sctp->sctp_def_stream; 146 uint32_t ppid = sctp->sctp_def_ppid; 147 uint32_t context = sctp->sctp_def_context; 148 uint16_t msg_flags = sctp->sctp_def_flags; 149 sctp_msg_hdr_t *sctp_msg_hdr; 150 uint32_t msg_len = 0; 151 uint32_t timetolive = sctp->sctp_def_timetolive; 152 153 ASSERT(DB_TYPE(mproto) == M_PROTO); 154 155 mp = mp->b_cont; 156 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 157 158 tudr = (struct T_unitdata_req *)mproto->b_rptr; 159 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 160 161 /* Get destination address, if specified */ 162 if (tudr->DEST_length > 0) { 163 sin_t *sin; 164 sin6_t *sin6; 165 166 sin = (struct sockaddr_in *) 167 (mproto->b_rptr + tudr->DEST_offset); 168 switch (sin->sin_family) { 169 case AF_INET: 170 if (tudr->DEST_length < sizeof (*sin)) { 171 return (EINVAL); 172 } 173 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 174 addr = &tmpaddr; 175 break; 176 case AF_INET6: 177 if (tudr->DEST_length < sizeof (*sin6)) { 178 return (EINVAL); 179 } 180 sin6 = (struct sockaddr_in6 *) 181 (mproto->b_rptr + tudr->DEST_offset); 182 addr = &sin6->sin6_addr; 183 break; 184 default: 185 return (EAFNOSUPPORT); 186 } 187 fp = sctp_lookup_faddr(sctp, addr); 188 if (fp == NULL) { 189 return (EINVAL); 190 } 191 } 192 /* Ancillary Data? */ 193 if (tudr->OPT_length > 0) { 194 struct cmsghdr *cmsg; 195 char *cend; 196 struct sctp_sndrcvinfo *sndrcv; 197 198 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 199 cend = ((char *)cmsg + tudr->OPT_length); 200 ASSERT(cend <= (char *)mproto->b_wptr); 201 202 for (;;) { 203 if ((char *)(cmsg + 1) > cend || 204 ((char *)cmsg + cmsg->cmsg_len) > cend) { 205 break; 206 } 207 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 208 (cmsg->cmsg_type == SCTP_SNDRCV)) { 209 if (cmsg->cmsg_len < 210 (sizeof (*sndrcv) + sizeof (*cmsg))) { 211 return (EINVAL); 212 } 213 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 214 sid = sndrcv->sinfo_stream; 215 msg_flags = sndrcv->sinfo_flags; 216 ppid = sndrcv->sinfo_ppid; 217 context = sndrcv->sinfo_context; 218 timetolive = sndrcv->sinfo_timetolive; 219 break; 220 } 221 if (cmsg->cmsg_len > 0) 222 cmsg = CMSG_NEXT(cmsg); 223 else 224 break; 225 } 226 } 227 if (msg_flags & MSG_ABORT) { 228 if (mp && mp->b_cont) { 229 mblk_t *pump = msgpullup(mp, -1); 230 if (!pump) { 231 return (ENOMEM); 232 } 233 freemsg(mp); 234 mp = pump; 235 mproto->b_cont = mp; 236 } 237 RUN_SCTP(sctp); 238 sctp_user_abort(sctp, mp, B_TRUE); 239 sctp_clean_death(sctp, ECONNRESET); 240 freemsg(mproto); 241 goto process_sendq; 242 } 243 if (mp == NULL) 244 goto done; 245 246 RUN_SCTP(sctp); 247 248 /* Reject any new data requests if we are shutting down */ 249 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 250 error = EPIPE; 251 goto unlock_done; 252 } 253 254 /* Re-use the mproto to store relevant info. */ 255 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 256 257 mproto->b_rptr = mproto->b_datap->db_base; 258 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 259 260 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 261 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 262 sctp_msg_hdr->smh_context = context; 263 sctp_msg_hdr->smh_sid = sid; 264 sctp_msg_hdr->smh_ppid = ppid; 265 sctp_msg_hdr->smh_flags = msg_flags; 266 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 267 sctp_msg_hdr->smh_tob = lbolt64; 268 for (; mp != NULL; mp = mp->b_cont) 269 msg_len += MBLKL(mp); 270 sctp_msg_hdr->smh_msglen = msg_len; 271 272 /* User requested specific destination */ 273 SCTP_SET_CHUNK_DEST(mproto, fp); 274 275 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 276 sid >= sctp->sctp_num_ostr) { 277 /* Send sendfail event */ 278 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 279 B_FALSE); 280 error = EINVAL; 281 goto unlock_done; 282 } 283 284 /* no data */ 285 if (msg_len == 0) { 286 sctp_sendfail_event(sctp, dupmsg(mproto), 287 SCTP_ERR_NO_USR_DATA, B_FALSE); 288 error = EINVAL; 289 goto unlock_done; 290 } 291 292 /* Add it to the unsent list */ 293 if (sctp->sctp_xmit_unsent == NULL) { 294 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 295 } else { 296 sctp->sctp_xmit_unsent_tail->b_next = mproto; 297 sctp->sctp_xmit_unsent_tail = mproto; 298 } 299 sctp->sctp_unsent += msg_len; 300 BUMP_LOCAL(sctp->sctp_msgcount); 301 if (sctp->sctp_state == SCTPS_ESTABLISHED) 302 sctp_output(sctp); 303 process_sendq: 304 WAKE_SCTP(sctp); 305 sctp_process_sendq(sctp); 306 return (0); 307 unlock_done: 308 WAKE_SCTP(sctp); 309 done: 310 return (error); 311 } 312 313 void 314 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send) 315 { 316 mblk_t *mp; 317 mblk_t *chunk_mp; 318 mblk_t *chunk_head; 319 mblk_t *chunk_hdr; 320 mblk_t *chunk_tail = NULL; 321 int count; 322 int chunksize; 323 sctp_data_hdr_t *sdc; 324 mblk_t *mdblk = sctp->sctp_xmit_unsent; 325 sctp_faddr_t *fp; 326 sctp_faddr_t *fp1; 327 size_t xtralen; 328 sctp_msg_hdr_t *msg_hdr; 329 330 fp = SCTP_CHUNK_DEST(mdblk); 331 if (fp == NULL) 332 fp = sctp->sctp_current; 333 if (fp->isv4) 334 xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra + sizeof (*sdc); 335 else 336 xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra + sizeof (*sdc); 337 count = chunksize = first_len - sizeof (*sdc); 338 nextmsg: 339 chunk_mp = mdblk->b_cont; 340 341 /* 342 * If this partially chunked, we ignore the first_len for now 343 * and use the one already present. For the unchunked bits, we 344 * use the length of the last chunk. 345 */ 346 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 347 int chunk_len; 348 349 ASSERT(chunk_mp->b_next != NULL); 350 mdblk->b_cont = chunk_mp->b_next; 351 chunk_mp->b_next = NULL; 352 SCTP_MSG_CLEAR_CHUNKED(mdblk); 353 mp = mdblk->b_cont; 354 while (mp->b_next != NULL) 355 mp = mp->b_next; 356 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 357 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 358 count = chunksize = fp->sfa_pmss - chunk_len; 359 else 360 count = chunksize = fp->sfa_pmss; 361 count = chunksize = count - sizeof (*sdc); 362 } else { 363 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 364 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 365 sctp->sctp_xmit_unsent = mdblk->b_next; 366 if (sctp->sctp_xmit_unsent == NULL) 367 sctp->sctp_xmit_unsent_tail = NULL; 368 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 369 sctp->sctp_unsent -= msg_hdr->smh_msglen; 370 mdblk->b_next = NULL; 371 BUMP_LOCAL(sctp->sctp_prsctpdrop); 372 /* 373 * Update ULP the amount of queued data, which is 374 * sent-unack'ed + unsent. 375 */ 376 if (!SCTP_IS_DETACHED(sctp)) { 377 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 378 sctp->sctp_unacked + sctp->sctp_unsent); 379 } 380 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 381 goto try_next; 382 } 383 mdblk->b_cont = NULL; 384 } 385 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 386 nextchunk: 387 chunk_head = chunk_mp; 388 chunk_tail = NULL; 389 390 /* Skip as many mblk's as we need */ 391 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 392 count -= MBLKL(chunk_mp); 393 chunk_tail = chunk_mp; 394 chunk_mp = chunk_mp->b_cont; 395 } 396 /* Split the chain, if needed */ 397 if (chunk_mp != NULL) { 398 if (count > 0) { 399 mblk_t *split_mp = dupb(chunk_mp); 400 401 if (split_mp == NULL) { 402 if (mdblk->b_cont == NULL) { 403 mdblk->b_cont = chunk_head; 404 } else { 405 SCTP_MSG_SET_CHUNKED(mdblk); 406 ASSERT(chunk_head->b_next == NULL); 407 chunk_head->b_next = mdblk->b_cont; 408 mdblk->b_cont = chunk_head; 409 } 410 return; 411 } 412 if (chunk_tail != NULL) { 413 chunk_tail->b_cont = split_mp; 414 chunk_tail = chunk_tail->b_cont; 415 } else { 416 chunk_head = chunk_tail = split_mp; 417 } 418 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 419 chunk_mp->b_rptr = chunk_tail->b_wptr; 420 count = 0; 421 } else if (chunk_tail == NULL) { 422 goto next; 423 } else { 424 chunk_tail->b_cont = NULL; 425 } 426 } 427 /* Alloc chunk hdr, if needed */ 428 if (DB_REF(chunk_head) > 1 || 429 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 430 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 431 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 432 if (mdblk->b_cont == NULL) { 433 if (chunk_mp != NULL) 434 linkb(chunk_head, chunk_mp); 435 mdblk->b_cont = chunk_head; 436 } else { 437 SCTP_MSG_SET_CHUNKED(mdblk); 438 if (chunk_mp != NULL) 439 linkb(chunk_head, chunk_mp); 440 ASSERT(chunk_head->b_next == NULL); 441 chunk_head->b_next = mdblk->b_cont; 442 mdblk->b_cont = chunk_head; 443 } 444 return; 445 } 446 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 447 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 448 chunk_hdr->b_cont = chunk_head; 449 } else { 450 chunk_hdr = chunk_head; 451 chunk_hdr->b_rptr -= sizeof (*sdc); 452 } 453 ASSERT(chunk_hdr->b_datap->db_ref == 1); 454 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 455 sdc->sdh_id = CHUNK_DATA; 456 sdc->sdh_flags = 0; 457 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 458 ASSERT(sdc->sdh_len); 459 sdc->sdh_sid = htons(msg_hdr->smh_sid); 460 /* 461 * We defer assigning the SSN just before sending the chunk, else 462 * if we drop the chunk in sctp_get_msg_to_send(), we would need 463 * to send a Forward TSN to let the peer know. Some more comments 464 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 465 */ 466 sdc->sdh_payload_id = msg_hdr->smh_ppid; 467 468 if (mdblk->b_cont == NULL) { 469 mdblk->b_cont = chunk_hdr; 470 SCTP_DATA_SET_BBIT(sdc); 471 } else { 472 mp = mdblk->b_cont; 473 while (mp->b_next != NULL) 474 mp = mp->b_next; 475 mp->b_next = chunk_hdr; 476 } 477 478 bytes_to_send -= (chunksize - count); 479 if (chunk_mp != NULL) { 480 next: 481 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 482 goto nextchunk; 483 } 484 SCTP_DATA_SET_EBIT(sdc); 485 sctp->sctp_xmit_unsent = mdblk->b_next; 486 if (mdblk->b_next == NULL) { 487 sctp->sctp_xmit_unsent_tail = NULL; 488 } 489 mdblk->b_next = NULL; 490 491 if (sctp->sctp_xmit_tail == NULL) { 492 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 493 } else { 494 mp = sctp->sctp_xmit_tail; 495 while (mp->b_next != NULL) 496 mp = mp->b_next; 497 mp->b_next = mdblk; 498 mdblk->b_prev = mp; 499 } 500 try_next: 501 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 502 mdblk = sctp->sctp_xmit_unsent; 503 fp1 = SCTP_CHUNK_DEST(mdblk); 504 if (fp1 == NULL) 505 fp1 = sctp->sctp_current; 506 if (fp == fp1) { 507 size_t len = MBLKL(mdblk->b_cont); 508 if ((count > 0) && 509 ((len > fp->sfa_pmss - sizeof (*sdc)) || 510 (len <= count))) { 511 count -= sizeof (*sdc); 512 count = chunksize = count - (count & 0x3); 513 } else { 514 count = chunksize = fp->sfa_pmss - 515 sizeof (*sdc); 516 } 517 } else { 518 if (fp1->isv4) 519 xtralen = sctp->sctp_hdr_len; 520 else 521 xtralen = sctp->sctp_hdr6_len; 522 xtralen += sctp_wroff_xtra + sizeof (*sdc); 523 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 524 fp = fp1; 525 } 526 goto nextmsg; 527 } 528 } 529 530 void 531 sctp_free_msg(mblk_t *ump) 532 { 533 mblk_t *mp, *nmp; 534 535 for (mp = ump->b_cont; mp; mp = nmp) { 536 nmp = mp->b_next; 537 mp->b_next = mp->b_prev = NULL; 538 freemsg(mp); 539 } 540 ASSERT(!ump->b_prev); 541 ump->b_next = NULL; 542 freeb(ump); 543 } 544 545 mblk_t * 546 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 547 int *error) 548 { 549 int hdrlen; 550 char *hdr; 551 int isv4 = fp->isv4; 552 553 if (error != NULL) 554 *error = 0; 555 556 if (isv4) { 557 hdrlen = sctp->sctp_hdr_len; 558 hdr = sctp->sctp_iphc; 559 } else { 560 hdrlen = sctp->sctp_hdr6_len; 561 hdr = sctp->sctp_iphc6; 562 } 563 /* 564 * A null fp->ire could mean that the address is 'down'. Similarly, 565 * it is possible that the address went down, we tried to send an 566 * heartbeat and ended up setting fp->saddr as unspec because we 567 * didn't have any usable source address. In either case 568 * sctp_get_ire() will try find an IRE, if available, and set 569 * the source address, if needed. If we still don't have any 570 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 571 * we return EHOSTUNREACH. 572 */ 573 if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 574 sctp_get_ire(sctp, fp); 575 if (fp->state == SCTP_FADDRS_UNREACH) { 576 if (error != NULL) 577 *error = EHOSTUNREACH; 578 return (NULL); 579 } 580 } 581 /* Copy in IP header. */ 582 if ((mp->b_rptr - mp->b_datap->db_base) < 583 (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || 584 !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { 585 mblk_t *nmp; 586 587 /* 588 * This can happen if IP headers are adjusted after 589 * data was moved into chunks, or during retransmission, 590 * or things like snoop is running. 591 */ 592 nmp = allocb_cred(sctp_wroff_xtra + hdrlen + sacklen, 593 CONN_CRED(sctp->sctp_connp)); 594 if (nmp == NULL) { 595 if (error != NULL) 596 *error = ENOMEM; 597 return (NULL); 598 } 599 nmp->b_rptr += sctp_wroff_xtra; 600 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 601 nmp->b_cont = mp; 602 mp = nmp; 603 } else { 604 mp->b_rptr -= (hdrlen + sacklen); 605 mblk_setcred(mp, CONN_CRED(sctp->sctp_connp)); 606 } 607 bcopy(hdr, mp->b_rptr, hdrlen); 608 if (sacklen) { 609 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 610 } 611 if (fp != sctp->sctp_current) { 612 /* change addresses in header */ 613 if (isv4) { 614 ipha_t *iph = (ipha_t *)mp->b_rptr; 615 616 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 617 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 618 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 619 iph->ipha_src); 620 } else if (sctp->sctp_bound_to_all) { 621 iph->ipha_src = INADDR_ANY; 622 } 623 } else { 624 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 625 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 626 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 627 } else if (sctp->sctp_bound_to_all) { 628 V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src); 629 } 630 } 631 } 632 /* 633 * IP will not free this IRE if it is condemned. SCTP needs to 634 * free it. 635 */ 636 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 637 IRE_REFRELE_NOTR(fp->ire); 638 fp->ire = NULL; 639 } 640 641 /* Stash the conn and ire ptr info for IP */ 642 SCTP_STASH_IPINFO(mp, fp->ire); 643 644 return (mp); 645 } 646 647 /* 648 * SCTP requires every chunk to be padded so that the total length 649 * is a multiple of SCTP_ALIGN. This function returns a mblk with 650 * the specified pad length. 651 */ 652 static mblk_t * 653 sctp_get_padding(int pad) 654 { 655 mblk_t *fill; 656 657 ASSERT(pad < SCTP_ALIGN); 658 if ((fill = dupb(sctp_pad_mp)) != NULL) { 659 fill->b_wptr += pad; 660 return (fill); 661 } 662 663 /* 664 * The memory saving path of reusing the sctp_pad_mp 665 * fails may be because it has been dupb() too 666 * many times (DBLK_REFMAX). Use the memory consuming 667 * path of allocating the pad mblk. 668 */ 669 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 670 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 671 *(int32_t *)fill->b_rptr = 0; 672 fill->b_wptr += pad; 673 } 674 return (fill); 675 } 676 677 static mblk_t * 678 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 679 { 680 mblk_t *meta; 681 mblk_t *start_mp = NULL; 682 mblk_t *end_mp = NULL; 683 mblk_t *mp, *nmp; 684 mblk_t *fill; 685 sctp_data_hdr_t *sdh; 686 int msglen; 687 int extra; 688 sctp_msg_hdr_t *msg_hdr; 689 sctp_faddr_t *old_fp = NULL; 690 sctp_faddr_t *chunk_fp; 691 692 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 693 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 694 if (SCTP_IS_MSG_ABANDONED(meta) || 695 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 696 continue; 697 } 698 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 699 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 700 /* 701 * Use the same peer address to do fast 702 * retransmission. If the original peer 703 * address is dead, switch to the current 704 * one. Record the old one so that we 705 * will pick the chunks sent to the old 706 * one for fast retransmission. 707 */ 708 chunk_fp = SCTP_CHUNK_DEST(mp); 709 if (*fp == NULL) { 710 *fp = chunk_fp; 711 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 712 old_fp = *fp; 713 *fp = sctp->sctp_current; 714 } 715 } else if (old_fp == NULL && *fp != chunk_fp) { 716 continue; 717 } else if (old_fp != NULL && 718 old_fp != chunk_fp) { 719 continue; 720 } 721 722 sdh = (sctp_data_hdr_t *)mp->b_rptr; 723 msglen = ntohs(sdh->sdh_len); 724 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 725 extra = SCTP_ALIGN - extra; 726 } 727 728 /* 729 * We still return at least the first message 730 * even if that message cannot fit in as 731 * PMTU may have changed. 732 */ 733 if (*total + msglen + extra > 734 (*fp)->sfa_pmss && start_mp != NULL) { 735 return (start_mp); 736 } 737 if ((nmp = dupmsg(mp)) == NULL) 738 return (start_mp); 739 if (extra > 0) { 740 fill = sctp_get_padding(extra); 741 if (fill != NULL) { 742 linkb(nmp, fill); 743 } else { 744 return (start_mp); 745 } 746 } 747 BUMP_MIB(&sctp_mib, sctpOutFastRetrans); 748 BUMP_LOCAL(sctp->sctp_rxtchunks); 749 SCTP_CHUNK_CLEAR_REXMIT(mp); 750 if (start_mp == NULL) { 751 start_mp = nmp; 752 } else { 753 linkb(end_mp, nmp); 754 } 755 end_mp = nmp; 756 *total += msglen + extra; 757 dprint(2, ("sctp_find_fast_rexmit_mblks: " 758 "tsn %x\n", sdh->sdh_tsn)); 759 } 760 } 761 } 762 /* Clear the flag as there is no more message to be fast rexmitted. */ 763 sctp->sctp_chk_fast_rexmit = B_FALSE; 764 return (start_mp); 765 } 766 767 /* A debug function just to make sure that a mblk chain is not broken */ 768 #ifdef DEBUG 769 static boolean_t 770 sctp_verify_chain(mblk_t *head, mblk_t *tail) 771 { 772 mblk_t *mp = head; 773 774 if (head == NULL || tail == NULL) 775 return (B_TRUE); 776 while (mp != NULL) { 777 if (mp == tail) 778 return (B_TRUE); 779 mp = mp->b_next; 780 } 781 return (B_FALSE); 782 } 783 #endif 784 785 /* 786 * Gets the next unsent chunk to transmit. Messages that are abandoned are 787 * skipped. A message can be abandoned if it has a non-zero timetolive and 788 * transmission has not yet started or if it is a partially reliable 789 * message and its time is up (assuming we are PR-SCTP aware). 790 * 'cansend' is used to determine if need to try and chunkify messages from 791 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 792 * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend' 793 * will be set to 0. 794 */ 795 mblk_t * 796 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 797 int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp) 798 { 799 mblk_t *mp1; 800 sctp_msg_hdr_t *msg_hdr; 801 mblk_t *tmp_meta; 802 sctp_faddr_t *fp1; 803 804 ASSERT(error != NULL && mp != NULL); 805 *error = 0; 806 807 ASSERT(sctp->sctp_current != NULL); 808 809 chunkified: 810 while (meta != NULL) { 811 tmp_meta = meta->b_next; 812 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 813 mp1 = meta->b_cont; 814 if (SCTP_IS_MSG_ABANDONED(meta)) 815 goto next_msg; 816 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 817 while (mp1 != NULL) { 818 if (SCTP_CHUNK_CANSEND(mp1)) { 819 *mp = mp1; 820 #ifdef DEBUG 821 ASSERT(sctp_verify_chain( 822 sctp->sctp_xmit_head, meta)); 823 #endif 824 return (meta); 825 } 826 mp1 = mp1->b_next; 827 } 828 goto next_msg; 829 } 830 /* 831 * If we come here and the first chunk is sent, then we 832 * we are PR-SCTP aware, in which case if the cumulative 833 * TSN has moved upto or beyond the first chunk (which 834 * means all the previous messages have been cumulative 835 * SACK'd), then we send a Forward TSN with the last 836 * chunk that was sent in this message. If we can't send 837 * a Forward TSN because previous non-abandoned messages 838 * have not been acked then we will defer the Forward TSN 839 * to sctp_rexmit() or sctp_cumack(). 840 */ 841 if (SCTP_CHUNK_ISSENT(mp1)) { 842 *error = sctp_check_abandoned_msg(sctp, meta); 843 if (*error != 0) { 844 #ifdef DEBUG 845 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 846 sctp->sctp_xmit_tail)); 847 #endif 848 return (NULL); 849 } 850 goto next_msg; 851 } 852 BUMP_LOCAL(sctp->sctp_prsctpdrop); 853 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 854 if (meta->b_prev == NULL) { 855 ASSERT(sctp->sctp_xmit_head == meta); 856 sctp->sctp_xmit_head = tmp_meta; 857 if (sctp->sctp_xmit_tail == meta) 858 sctp->sctp_xmit_tail = tmp_meta; 859 meta->b_next = NULL; 860 if (tmp_meta != NULL) 861 tmp_meta->b_prev = NULL; 862 } else if (meta->b_next == NULL) { 863 if (sctp->sctp_xmit_tail == meta) 864 sctp->sctp_xmit_tail = meta->b_prev; 865 meta->b_prev->b_next = NULL; 866 meta->b_prev = NULL; 867 } else { 868 meta->b_prev->b_next = tmp_meta; 869 tmp_meta->b_prev = meta->b_prev; 870 if (sctp->sctp_xmit_tail == meta) 871 sctp->sctp_xmit_tail = tmp_meta; 872 meta->b_prev = NULL; 873 meta->b_next = NULL; 874 } 875 sctp->sctp_unsent -= msg_hdr->smh_msglen; 876 /* 877 * Update ULP the amount of queued data, which is 878 * sent-unack'ed + unsent. 879 */ 880 if (!SCTP_IS_DETACHED(sctp)) { 881 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 882 sctp->sctp_unacked + sctp->sctp_unsent); 883 } 884 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 885 next_msg: 886 meta = tmp_meta; 887 } 888 /* chunkify, if needed */ 889 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 890 ASSERT(sctp->sctp_unsent > 0); 891 if (fp == NULL) { 892 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 893 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 894 fp = sctp->sctp_current; 895 } else { 896 /* 897 * If user specified destination, try to honor that. 898 */ 899 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 900 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 901 fp1 != fp) { 902 goto chunk_done; 903 } 904 } 905 sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend); 906 if ((meta = sctp->sctp_xmit_tail) == NULL) 907 goto chunk_done; 908 /* 909 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 910 * new chunk(s) to the tail, so we need to skip the 911 * sctp_xmit_tail, which would have already been processed. 912 * This could happen when there is unacked chunks, but 913 * nothing new to send. 914 * When sctp_chunkify() is called when the transmit queue 915 * is empty then we need to start from sctp_xmit_tail. 916 */ 917 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 918 #ifdef DEBUG 919 mp1 = sctp->sctp_xmit_tail->b_cont; 920 while (mp1 != NULL) { 921 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 922 mp1 = mp1->b_next; 923 } 924 #endif 925 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 926 goto chunk_done; 927 } 928 goto chunkified; 929 } 930 chunk_done: 931 #ifdef DEBUG 932 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 933 #endif 934 return (NULL); 935 } 936 937 void 938 sctp_fast_rexmit(sctp_t *sctp) 939 { 940 mblk_t *mp, *head; 941 int pktlen = 0; 942 sctp_faddr_t *fp = NULL; 943 944 ASSERT(sctp->sctp_xmit_head != NULL); 945 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 946 if (mp == NULL) { 947 SCTP_KSTAT(sctp_fr_not_found); 948 return; 949 } 950 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 951 freemsg(mp); 952 SCTP_KSTAT(sctp_fr_add_hdr); 953 return; 954 } 955 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 956 ipha_t *iph = (ipha_t *)head->b_rptr; 957 958 iph->ipha_fragment_offset_and_flags = 0; 959 } 960 961 sctp_set_iplen(sctp, head); 962 sctp_add_sendq(sctp, head); 963 sctp->sctp_active = fp->lastactive = lbolt64; 964 } 965 966 void 967 sctp_output(sctp_t *sctp) 968 { 969 mblk_t *mp = NULL; 970 mblk_t *nmp; 971 mblk_t *head; 972 mblk_t *meta = sctp->sctp_xmit_tail; 973 mblk_t *fill = NULL; 974 uint16_t chunklen; 975 uint32_t cansend; 976 int32_t seglen; 977 int32_t xtralen; 978 int32_t sacklen; 979 int32_t pad = 0; 980 int32_t pathmax; 981 int extra; 982 int64_t now = lbolt64; 983 sctp_faddr_t *fp; 984 sctp_faddr_t *lfp; 985 sctp_data_hdr_t *sdc; 986 int error; 987 boolean_t notsent = B_TRUE; 988 989 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 990 sacklen = 0; 991 } else { 992 /* send a SACK chunk */ 993 sacklen = sizeof (sctp_chunk_hdr_t) + 994 sizeof (sctp_sack_chunk_t) + 995 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 996 lfp = sctp->sctp_lastdata; 997 ASSERT(lfp != NULL); 998 if (lfp->state != SCTP_FADDRS_ALIVE) 999 lfp = sctp->sctp_current; 1000 } 1001 1002 cansend = sctp->sctp_frwnd; 1003 if (sctp->sctp_unsent < cansend) 1004 cansend = sctp->sctp_unsent; 1005 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1006 sctp->sctp_unacked && 1007 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1008 !sctp->sctp_ndelay) { 1009 head = NULL; 1010 fp = sctp->sctp_current; 1011 goto unsent_data; 1012 } 1013 if (meta != NULL) 1014 mp = meta->b_cont; 1015 while (cansend > 0) { 1016 pad = 0; 1017 1018 /* 1019 * Find first segment eligible for transmit. 1020 */ 1021 while (mp != NULL) { 1022 if (SCTP_CHUNK_CANSEND(mp)) 1023 break; 1024 mp = mp->b_next; 1025 } 1026 if (mp == NULL) { 1027 meta = sctp_get_msg_to_send(sctp, &mp, 1028 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1029 cansend, NULL); 1030 if (error != 0 || meta == NULL) { 1031 head = NULL; 1032 fp = sctp->sctp_current; 1033 goto unsent_data; 1034 } 1035 sctp->sctp_xmit_tail = meta; 1036 } 1037 1038 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1039 seglen = ntohs(sdc->sdh_len); 1040 xtralen = sizeof (*sdc); 1041 chunklen = seglen - xtralen; 1042 1043 /* 1044 * Check rwnd. 1045 */ 1046 if (chunklen > cansend) { 1047 head = NULL; 1048 fp = SCTP_CHUNK_DEST(meta); 1049 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1050 fp = sctp->sctp_current; 1051 goto unsent_data; 1052 } 1053 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1054 extra = SCTP_ALIGN - extra; 1055 1056 /* 1057 * Pick destination address, and check cwnd. 1058 */ 1059 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1060 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1061 /* 1062 * Only include SACK chunk if it can be bundled 1063 * with a data chunk, and sent to sctp_lastdata. 1064 */ 1065 pathmax = lfp->cwnd - lfp->suna; 1066 1067 fp = lfp; 1068 if ((nmp = dupmsg(mp)) == NULL) { 1069 head = NULL; 1070 goto unsent_data; 1071 } 1072 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1073 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1074 &error); 1075 if (head == NULL) { 1076 /* 1077 * If none of the source addresses are 1078 * available (i.e error == EHOSTUNREACH), 1079 * pretend we have sent the data. We will 1080 * eventually time out trying to retramsmit 1081 * the data if the interface never comes up. 1082 * If we have already sent some stuff (i.e., 1083 * notsent is B_FALSE) then we are fine, else 1084 * just mark this packet as sent. 1085 */ 1086 if (notsent && error == EHOSTUNREACH) { 1087 SCTP_CHUNK_SENT(sctp, mp, sdc, 1088 fp, chunklen, meta); 1089 } 1090 freemsg(nmp); 1091 SCTP_KSTAT(sctp_output_failed); 1092 goto unsent_data; 1093 } 1094 seglen += sacklen; 1095 xtralen += sacklen; 1096 sacklen = 0; 1097 } else { 1098 fp = SCTP_CHUNK_DEST(meta); 1099 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1100 fp = sctp->sctp_current; 1101 /* 1102 * If we haven't sent data to this destination for 1103 * a while, do slow start again. 1104 */ 1105 if (now - fp->lastactive > fp->rto) { 1106 fp->cwnd = sctp_slow_start_after_idle * 1107 fp->sfa_pmss; 1108 } 1109 1110 pathmax = fp->cwnd - fp->suna; 1111 if (seglen + extra > pathmax) { 1112 head = NULL; 1113 goto unsent_data; 1114 } 1115 if ((nmp = dupmsg(mp)) == NULL) { 1116 head = NULL; 1117 goto unsent_data; 1118 } 1119 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1120 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1121 if (head == NULL) { 1122 /* 1123 * If none of the source addresses are 1124 * available (i.e error == EHOSTUNREACH), 1125 * pretend we have sent the data. We will 1126 * eventually time out trying to retramsmit 1127 * the data if the interface never comes up. 1128 * If we have already sent some stuff (i.e., 1129 * notsent is B_FALSE) then we are fine, else 1130 * just mark this packet as sent. 1131 */ 1132 if (notsent && error == EHOSTUNREACH) { 1133 SCTP_CHUNK_SENT(sctp, mp, sdc, 1134 fp, chunklen, meta); 1135 } 1136 freemsg(nmp); 1137 SCTP_KSTAT(sctp_output_failed); 1138 goto unsent_data; 1139 } 1140 } 1141 fp->lastactive = now; 1142 if (pathmax > fp->sfa_pmss) 1143 pathmax = fp->sfa_pmss; 1144 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1145 mp = mp->b_next; 1146 1147 /* Use this chunk to measure RTT? */ 1148 if (sctp->sctp_out_time == 0) { 1149 sctp->sctp_out_time = now; 1150 sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; 1151 ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); 1152 } 1153 if (extra > 0) { 1154 fill = sctp_get_padding(extra); 1155 if (fill != NULL) { 1156 linkb(head, fill); 1157 pad = extra; 1158 seglen += extra; 1159 } else { 1160 goto unsent_data; 1161 } 1162 } 1163 /* See if we can bundle more. */ 1164 while (seglen < pathmax) { 1165 int32_t new_len; 1166 int32_t new_xtralen; 1167 1168 while (mp != NULL) { 1169 if (SCTP_CHUNK_CANSEND(mp)) 1170 break; 1171 mp = mp->b_next; 1172 } 1173 if (mp == NULL) { 1174 meta = sctp_get_msg_to_send(sctp, &mp, 1175 meta->b_next, &error, seglen, 1176 (seglen - xtralen) >= cansend ? 0 : 1177 cansend - seglen, fp); 1178 if (error != 0 || meta == NULL) 1179 break; 1180 sctp->sctp_xmit_tail = meta; 1181 } 1182 ASSERT(mp != NULL); 1183 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1184 fp != SCTP_CHUNK_DEST(meta)) { 1185 break; 1186 } 1187 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1188 chunklen = ntohs(sdc->sdh_len); 1189 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1190 extra = SCTP_ALIGN - extra; 1191 1192 new_len = seglen + chunklen; 1193 new_xtralen = xtralen + sizeof (*sdc); 1194 chunklen -= sizeof (*sdc); 1195 1196 if (new_len - new_xtralen > cansend || 1197 new_len + extra > pathmax) { 1198 break; 1199 } 1200 if ((nmp = dupmsg(mp)) == NULL) 1201 break; 1202 if (extra > 0) { 1203 fill = sctp_get_padding(extra); 1204 if (fill != NULL) { 1205 pad += extra; 1206 new_len += extra; 1207 linkb(nmp, fill); 1208 } else { 1209 freemsg(nmp); 1210 break; 1211 } 1212 } 1213 seglen = new_len; 1214 xtralen = new_xtralen; 1215 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1216 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1217 linkb(head, nmp); 1218 mp = mp->b_next; 1219 } 1220 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1221 ipha_t *iph = (ipha_t *)head->b_rptr; 1222 1223 /* 1224 * Path MTU is different from what we thought it would 1225 * be when we created chunks, or IP headers have grown. 1226 * Need to clear the DF bit. 1227 */ 1228 iph->ipha_fragment_offset_and_flags = 0; 1229 } 1230 /* xmit segment */ 1231 ASSERT(cansend >= seglen - pad - xtralen); 1232 cansend -= (seglen - pad - xtralen); 1233 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1234 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1235 seglen - xtralen, ntohl(sdc->sdh_tsn), 1236 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1237 cansend, sctp->sctp_lastack_rxd)); 1238 sctp_set_iplen(sctp, head); 1239 sctp_add_sendq(sctp, head); 1240 /* arm rto timer (if not set) */ 1241 if (!fp->timer_running) 1242 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1243 notsent = B_FALSE; 1244 } 1245 sctp->sctp_active = now; 1246 return; 1247 unsent_data: 1248 /* arm persist timer (if rto timer not set) */ 1249 if (!fp->timer_running) 1250 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1251 if (head != NULL) 1252 freemsg(head); 1253 } 1254 1255 /* 1256 * The following two functions initialize and destroy the cache 1257 * associated with the sets used for PR-SCTP. 1258 */ 1259 void 1260 sctp_ftsn_sets_init(void) 1261 { 1262 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1263 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1264 NULL, 0); 1265 } 1266 1267 void 1268 sctp_ftsn_sets_fini(void) 1269 { 1270 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1271 } 1272 1273 1274 /* Free PR-SCTP sets */ 1275 void 1276 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1277 { 1278 sctp_ftsn_set_t *p; 1279 1280 while (s != NULL) { 1281 p = s->next; 1282 s->next = NULL; 1283 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1284 s = p; 1285 } 1286 } 1287 1288 /* 1289 * Given a message meta block, meta, this routine creates or modifies 1290 * the set that will be used to generate a Forward TSN chunk. If the 1291 * entry for stream id, sid, for this message already exists, the 1292 * sequence number, ssn, is updated if it is greater than the existing 1293 * one. If an entry for this sid does not exist, one is created if 1294 * the size does not exceed fp->sfa_pmss. We return false in case 1295 * or an error. 1296 */ 1297 boolean_t 1298 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1299 uint_t *nsets, uint32_t *slen) 1300 { 1301 sctp_ftsn_set_t *p; 1302 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1303 uint16_t sid = htons(msg_hdr->smh_sid); 1304 /* msg_hdr->smh_ssn is already in NBO */ 1305 uint16_t ssn = msg_hdr->smh_ssn; 1306 1307 ASSERT(s != NULL && nsets != NULL); 1308 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1309 1310 if (*s == NULL) { 1311 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1312 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1313 if (*s == NULL) 1314 return (B_FALSE); 1315 (*s)->ftsn_entries.ftsn_sid = sid; 1316 (*s)->ftsn_entries.ftsn_ssn = ssn; 1317 (*s)->next = NULL; 1318 *nsets = 1; 1319 *slen += sizeof (uint32_t); 1320 return (B_TRUE); 1321 } 1322 for (p = *s; p->next != NULL; p = p->next) { 1323 if (p->ftsn_entries.ftsn_sid == sid) { 1324 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1325 p->ftsn_entries.ftsn_ssn = ssn; 1326 return (B_TRUE); 1327 } 1328 } 1329 /* the last one */ 1330 if (p->ftsn_entries.ftsn_sid == sid) { 1331 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1332 p->ftsn_entries.ftsn_ssn = ssn; 1333 } else { 1334 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1335 return (B_FALSE); 1336 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1337 KM_NOSLEEP); 1338 if (p->next == NULL) 1339 return (B_FALSE); 1340 p = p->next; 1341 p->ftsn_entries.ftsn_sid = sid; 1342 p->ftsn_entries.ftsn_ssn = ssn; 1343 p->next = NULL; 1344 (*nsets)++; 1345 *slen += sizeof (uint32_t); 1346 } 1347 return (B_TRUE); 1348 } 1349 1350 /* 1351 * Given a set of stream id - sequence number pairs, this routing creates 1352 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1353 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1354 * will add the IP/SCTP header. 1355 */ 1356 mblk_t * 1357 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1358 uint_t nsets, uint32_t seglen) 1359 { 1360 mblk_t *ftsn_mp; 1361 sctp_chunk_hdr_t *ch_hdr; 1362 uint32_t *advtsn; 1363 uint16_t schlen; 1364 size_t xtralen; 1365 ftsn_entry_t *ftsn_entry; 1366 1367 seglen += sizeof (sctp_chunk_hdr_t); 1368 if (fp->isv4) 1369 xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra; 1370 else 1371 xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra; 1372 ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp)); 1373 if (ftsn_mp == NULL) 1374 return (NULL); 1375 ftsn_mp->b_rptr += xtralen; 1376 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1377 1378 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1379 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1380 ch_hdr->sch_flags = 0; 1381 /* 1382 * The cast here should not be an issue since seglen is 1383 * the length of the Forward TSN chunk. 1384 */ 1385 schlen = (uint16_t)seglen; 1386 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1387 1388 advtsn = (uint32_t *)(ch_hdr + 1); 1389 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1390 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1391 while (nsets > 0) { 1392 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1393 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1394 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1395 ftsn_entry++; 1396 sets = sets->next; 1397 nsets--; 1398 } 1399 return (ftsn_mp); 1400 } 1401 1402 /* 1403 * Given a starting message, the routine steps through all the 1404 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1405 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1406 * chunk. All the messages, that have chunks that are included in the 1407 * ftsn sets, are flagged abandonded. If a message is partially sent 1408 * and is deemed abandoned, all remaining unsent chunks are marked 1409 * abandoned and are deducted from sctp_unsent. 1410 */ 1411 void 1412 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1413 sctp_faddr_t *fp, uint32_t *seglen) 1414 { 1415 mblk_t *mp1 = mp; 1416 mblk_t *mp_head = mp; 1417 mblk_t *meta_head = meta; 1418 mblk_t *head; 1419 sctp_ftsn_set_t *sets = NULL; 1420 uint_t nsets = 0; 1421 uint16_t clen; 1422 sctp_data_hdr_t *sdc; 1423 uint32_t sacklen; 1424 uint32_t adv_pap = sctp->sctp_adv_pap; 1425 uint32_t unsent = 0; 1426 boolean_t ubit; 1427 1428 *seglen = sizeof (uint32_t); 1429 1430 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1431 while (meta != NULL && 1432 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1433 /* 1434 * Skip adding FTSN sets for un-ordered messages as they do 1435 * not have SSNs. 1436 */ 1437 ubit = SCTP_DATA_GET_UBIT(sdc); 1438 if (!ubit && 1439 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1440 meta = NULL; 1441 sctp->sctp_adv_pap = adv_pap; 1442 goto ftsn_done; 1443 } 1444 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1445 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1446 adv_pap = ntohl(sdc->sdh_tsn); 1447 mp1 = mp1->b_next; 1448 } 1449 meta = meta->b_next; 1450 if (meta != NULL) { 1451 mp1 = meta->b_cont; 1452 if (!SCTP_CHUNK_ISSENT(mp1)) 1453 break; 1454 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1455 } 1456 } 1457 ftsn_done: 1458 /* 1459 * Can't compare with sets == NULL, since we don't add any 1460 * sets for un-ordered messages. 1461 */ 1462 if (meta == meta_head) 1463 return; 1464 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1465 sctp_free_ftsn_set(sets); 1466 if (*nmp == NULL) 1467 return; 1468 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1469 sacklen = 0; 1470 } else { 1471 sacklen = sizeof (sctp_chunk_hdr_t) + 1472 sizeof (sctp_sack_chunk_t) + 1473 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1474 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1475 /* piggybacked SACK doesn't fit */ 1476 sacklen = 0; 1477 } else { 1478 fp = sctp->sctp_lastdata; 1479 } 1480 } 1481 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1482 if (head == NULL) { 1483 freemsg(*nmp); 1484 *nmp = NULL; 1485 SCTP_KSTAT(sctp_send_ftsn_failed); 1486 return; 1487 } 1488 *seglen += sacklen; 1489 *nmp = head; 1490 1491 /* 1492 * XXXNeed to optimise this, the reason it is done here is so 1493 * that we don't have to undo in case of failure. 1494 */ 1495 mp1 = mp_head; 1496 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1497 while (meta_head != NULL && 1498 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1499 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1500 SCTP_MSG_SET_ABANDONED(meta_head); 1501 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1502 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1503 if (!SCTP_CHUNK_ISACKED(mp1)) { 1504 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1505 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1506 meta_head); 1507 } 1508 mp1 = mp1->b_next; 1509 } 1510 while (mp1 != NULL) { 1511 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1512 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1513 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1514 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1515 SCTP_ABANDON_CHUNK(mp1); 1516 } 1517 mp1 = mp1->b_next; 1518 } 1519 meta_head = meta_head->b_next; 1520 if (meta_head != NULL) { 1521 mp1 = meta_head->b_cont; 1522 if (!SCTP_CHUNK_ISSENT(mp1)) 1523 break; 1524 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1525 } 1526 } 1527 if (unsent > 0) { 1528 ASSERT(sctp->sctp_unsent >= unsent); 1529 sctp->sctp_unsent -= unsent; 1530 /* 1531 * Update ULP the amount of queued data, which is 1532 * sent-unack'ed + unsent. 1533 */ 1534 if (!SCTP_IS_DETACHED(sctp)) { 1535 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 1536 sctp->sctp_unacked + sctp->sctp_unsent); 1537 } 1538 } 1539 } 1540 1541 /* 1542 * This function steps through messages starting at meta and checks if 1543 * the message is abandoned. It stops when it hits an unsent chunk or 1544 * a message that has all its chunk acked. This is the only place 1545 * where the sctp_adv_pap is moved forward to indicated abandoned 1546 * messages. 1547 */ 1548 void 1549 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1550 { 1551 uint32_t tsn = sctp->sctp_adv_pap; 1552 sctp_data_hdr_t *sdc; 1553 sctp_msg_hdr_t *msg_hdr; 1554 1555 ASSERT(mp != NULL); 1556 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1557 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1558 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1559 if (!SCTP_IS_MSG_ABANDONED(meta) && 1560 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1561 return; 1562 } 1563 while (meta != NULL) { 1564 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1565 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1566 tsn = ntohl(sdc->sdh_tsn); 1567 mp = mp->b_next; 1568 } 1569 if (mp != NULL) 1570 break; 1571 /* 1572 * We continue checking for successive messages only if there 1573 * is a chunk marked for retransmission. Else, we might 1574 * end up sending FTSN prematurely for chunks that have been 1575 * sent, but not yet acked. 1576 */ 1577 if ((meta = meta->b_next) != NULL) { 1578 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1579 if (!SCTP_IS_MSG_ABANDONED(meta) && 1580 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1581 break; 1582 } 1583 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1584 if (!SCTP_CHUNK_ISSENT(mp)) { 1585 sctp->sctp_adv_pap = tsn; 1586 return; 1587 } 1588 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1589 break; 1590 } 1591 if (mp == NULL) 1592 break; 1593 } 1594 } 1595 sctp->sctp_adv_pap = tsn; 1596 } 1597 1598 1599 /* 1600 * Determine if we should bundle a data chunk with the chunk being 1601 * retransmitted. We bundle if 1602 * 1603 * - the chunk is sent to the same destination and unack'ed. 1604 * 1605 * OR 1606 * 1607 * - the chunk is unsent, i.e. new data. 1608 */ 1609 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1610 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1611 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1612 !SCTP_CHUNK_ISACKED(mp))) || \ 1613 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1614 SCTP_CHUNK_FLAG_SENT))) 1615 1616 /* 1617 * Retransmit first segment which hasn't been acked with cumtsn or send 1618 * a Forward TSN chunk, if appropriate. 1619 */ 1620 void 1621 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1622 { 1623 mblk_t *mp; 1624 mblk_t *nmp = NULL; 1625 mblk_t *head; 1626 mblk_t *meta = sctp->sctp_xmit_head; 1627 mblk_t *fill; 1628 uint32_t seglen = 0; 1629 uint32_t sacklen; 1630 uint16_t chunklen; 1631 int extra; 1632 sctp_data_hdr_t *sdc; 1633 sctp_faddr_t *fp; 1634 uint32_t adv_pap = sctp->sctp_adv_pap; 1635 boolean_t do_ftsn = B_FALSE; 1636 boolean_t ftsn_check = B_TRUE; 1637 uint32_t first_ua_tsn; 1638 sctp_msg_hdr_t *mhdr; 1639 uint32_t tot_wnd; 1640 1641 while (meta != NULL) { 1642 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1643 uint32_t tsn; 1644 1645 if (!SCTP_CHUNK_ISSENT(mp)) 1646 goto window_probe; 1647 /* 1648 * We break in the following cases - 1649 * 1650 * if the advanced peer ack point includes the next 1651 * chunk to be retransmited - possibly the Forward 1652 * TSN was lost. 1653 * 1654 * if we are PRSCTP aware and the next chunk to be 1655 * retransmitted is now abandoned 1656 * 1657 * if the next chunk to be retransmitted is for 1658 * the dest on which the timer went off. (this 1659 * message is not abandoned). 1660 * 1661 * We check for Forward TSN only for the first 1662 * eligible chunk to be retransmitted. The reason 1663 * being if the first eligible chunk is skipped (say 1664 * it was sent to a destination other than oldfp) 1665 * then we cannot advance the cum TSN via Forward 1666 * TSN chunk. 1667 * 1668 * Also, ftsn_check is B_TRUE only for the first 1669 * eligible chunk, it will be B_FALSE for all 1670 * subsequent candidate messages for retransmission. 1671 */ 1672 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1673 tsn = ntohl(sdc->sdh_tsn); 1674 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1675 if (sctp->sctp_prsctp_aware && ftsn_check) { 1676 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1677 ASSERT(sctp->sctp_prsctp_aware); 1678 do_ftsn = B_TRUE; 1679 goto out; 1680 } else { 1681 sctp_check_adv_ack_pt(sctp, 1682 meta, mp); 1683 if (SEQ_GT(sctp->sctp_adv_pap, 1684 adv_pap)) { 1685 do_ftsn = B_TRUE; 1686 goto out; 1687 } 1688 } 1689 ftsn_check = B_FALSE; 1690 } 1691 if (SCTP_CHUNK_DEST(mp) == oldfp) 1692 goto out; 1693 } 1694 } 1695 meta = meta->b_next; 1696 if (meta != NULL && sctp->sctp_prsctp_aware) { 1697 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1698 1699 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1700 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1701 meta = meta->b_next; 1702 } 1703 } 1704 } 1705 window_probe: 1706 /* 1707 * Retransmit fired for a destination which didn't have 1708 * any unacked data pending. 1709 */ 1710 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1711 /* 1712 * Send a window probe. Inflate frwnd to allow 1713 * sending one segment. 1714 */ 1715 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1716 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1717 /* next TSN to send */ 1718 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1719 sctp_output(sctp); 1720 /* Last sent TSN */ 1721 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1722 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1723 sctp->sctp_zero_win_probe = B_TRUE; 1724 BUMP_MIB(&sctp_mib, sctpOutWinProbe); 1725 } 1726 return; 1727 out: 1728 /* 1729 * If were are probing for zero window, don't adjust retransmission 1730 * variables, but the timer is still backed off. 1731 */ 1732 if (sctp->sctp_zero_win_probe) { 1733 mblk_t *pkt; 1734 uint_t pkt_len; 1735 1736 /* 1737 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1738 * and sctp_rxt_maxtsn will specify the ZWP packet. 1739 */ 1740 fp = oldfp; 1741 if (oldfp->state != SCTP_FADDRS_ALIVE) 1742 fp = sctp_rotate_faddr(sctp, oldfp); 1743 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1744 if (pkt != NULL) { 1745 ASSERT(pkt_len <= fp->sfa_pmss); 1746 sctp_set_iplen(sctp, pkt); 1747 sctp_add_sendq(sctp, pkt); 1748 } else { 1749 SCTP_KSTAT(sctp_ss_rexmit_failed); 1750 } 1751 oldfp->strikes++; 1752 sctp->sctp_strikes++; 1753 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1754 if (oldfp != fp && oldfp->suna != 0) 1755 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1756 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1757 BUMP_MIB(&sctp_mib, sctpOutWinProbe); 1758 return; 1759 } 1760 1761 /* 1762 * Enter slowstart for this destination 1763 */ 1764 oldfp->ssthresh = oldfp->cwnd / 2; 1765 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1766 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1767 oldfp->cwnd = oldfp->sfa_pmss; 1768 oldfp->pba = 0; 1769 fp = sctp_rotate_faddr(sctp, oldfp); 1770 ASSERT(fp != NULL); 1771 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1772 1773 first_ua_tsn = ntohl(sdc->sdh_tsn); 1774 if (do_ftsn) { 1775 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1776 if (nmp == NULL) { 1777 sctp->sctp_adv_pap = adv_pap; 1778 goto restart_timer; 1779 } 1780 head = nmp; 1781 /* 1782 * Move to the next unabandoned chunk. XXXCheck if meta will 1783 * always be marked abandoned. 1784 */ 1785 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1786 meta = meta->b_next; 1787 if (meta != NULL) 1788 mp = mp->b_cont; 1789 else 1790 mp = NULL; 1791 goto try_bundle; 1792 } 1793 seglen = ntohs(sdc->sdh_len); 1794 chunklen = seglen - sizeof (*sdc); 1795 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1796 extra = SCTP_ALIGN - extra; 1797 1798 /* Find out if we need to piggyback SACK. */ 1799 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1800 sacklen = 0; 1801 } else { 1802 sacklen = sizeof (sctp_chunk_hdr_t) + 1803 sizeof (sctp_sack_chunk_t) + 1804 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1805 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1806 /* piggybacked SACK doesn't fit */ 1807 sacklen = 0; 1808 } else { 1809 /* 1810 * OK, we have room to send SACK back. But we 1811 * should send it back to the last fp where we 1812 * receive data from, unless sctp_lastdata equals 1813 * oldfp, then we should probably not send it 1814 * back to that fp. Also we should check that 1815 * the fp is alive. 1816 */ 1817 if (sctp->sctp_lastdata != oldfp && 1818 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1819 fp = sctp->sctp_lastdata; 1820 } 1821 } 1822 } 1823 1824 /* 1825 * Cancel RTT measurement if the retransmitted TSN is before the 1826 * TSN used for timimg. 1827 */ 1828 if (sctp->sctp_out_time != 0 && 1829 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1830 sctp->sctp_out_time = 0; 1831 } 1832 /* Clear the counter as the RTT calculation may be off. */ 1833 fp->rtt_updates = 0; 1834 oldfp->rtt_updates = 0; 1835 1836 /* 1837 * After a timeout, we should change the current faddr so that 1838 * new chunks will be sent to the alternate address. 1839 */ 1840 sctp_set_faddr_current(sctp, fp); 1841 1842 nmp = dupmsg(mp); 1843 if (nmp == NULL) 1844 goto restart_timer; 1845 if (extra > 0) { 1846 fill = sctp_get_padding(extra); 1847 if (fill != NULL) { 1848 linkb(nmp, fill); 1849 seglen += extra; 1850 } else { 1851 freemsg(nmp); 1852 goto restart_timer; 1853 } 1854 } 1855 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1856 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1857 if (head == NULL) { 1858 freemsg(nmp); 1859 SCTP_KSTAT(sctp_rexmit_failed); 1860 goto restart_timer; 1861 } 1862 seglen += sacklen; 1863 1864 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1865 1866 mp = mp->b_next; 1867 1868 /* Check how much more we can send. */ 1869 tot_wnd = MIN(fp->cwnd, sctp->sctp_frwnd); 1870 /* 1871 * If the number of outstanding bytes is more than what we are 1872 * allowed to send, stop. 1873 */ 1874 if (tot_wnd <= chunklen || tot_wnd < fp->suna + chunklen) 1875 goto done_bundle; 1876 else 1877 tot_wnd -= chunklen; 1878 1879 try_bundle: 1880 while (seglen < fp->sfa_pmss) { 1881 int32_t new_len; 1882 1883 /* Go through the list to find more chunks to be bundled. */ 1884 while (mp != NULL) { 1885 /* Check if the chunk can be bundled. */ 1886 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1887 break; 1888 mp = mp->b_next; 1889 } 1890 /* Go to the next message. */ 1891 if (mp == NULL) { 1892 for (meta = meta->b_next; meta != NULL; 1893 meta = meta->b_next) { 1894 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1895 1896 if (SCTP_IS_MSG_ABANDONED(meta) || 1897 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1898 sctp)) { 1899 continue; 1900 } 1901 1902 mp = meta->b_cont; 1903 goto try_bundle; 1904 } 1905 /* No more chunk to be bundled. */ 1906 break; 1907 } 1908 1909 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1910 new_len = ntohs(sdc->sdh_len); 1911 chunklen = new_len - sizeof (*sdc); 1912 if (chunklen > tot_wnd) 1913 break; 1914 1915 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1916 extra = SCTP_ALIGN - extra; 1917 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1918 break; 1919 if ((nmp = dupmsg(mp)) == NULL) 1920 break; 1921 1922 if (extra > 0) { 1923 fill = sctp_get_padding(extra); 1924 if (fill != NULL) { 1925 linkb(nmp, fill); 1926 } else { 1927 freemsg(nmp); 1928 break; 1929 } 1930 } 1931 linkb(head, nmp); 1932 1933 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1934 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1935 1936 seglen = new_len; 1937 tot_wnd -= chunklen; 1938 mp = mp->b_next; 1939 } 1940 done_bundle: 1941 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1942 ipha_t *iph = (ipha_t *)head->b_rptr; 1943 1944 /* 1945 * Path MTU is different from path we thought it would 1946 * be when we created chunks, or IP headers have grown. 1947 * Need to clear the DF bit. 1948 */ 1949 iph->ipha_fragment_offset_and_flags = 0; 1950 } 1951 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 1952 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 1953 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 1954 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 1955 1956 sctp->sctp_rexmitting = B_TRUE; 1957 sctp->sctp_rxt_nxttsn = first_ua_tsn; 1958 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1959 sctp_set_iplen(sctp, head); 1960 sctp_add_sendq(sctp, head); 1961 1962 /* 1963 * Restart the oldfp timer with exponential backoff and 1964 * the new fp timer for the retransmitted chunks. 1965 */ 1966 restart_timer: 1967 oldfp->strikes++; 1968 sctp->sctp_strikes++; 1969 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1970 if (oldfp->suna != 0) 1971 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 1972 sctp->sctp_active = lbolt64; 1973 1974 /* 1975 * Should we restart the timer of the new fp? If there is 1976 * outstanding data to the new fp, the timer should be 1977 * running already. So restarting it means that the timer 1978 * will fire later for those outstanding data. But if 1979 * we don't restart it, the timer will fire too early for the 1980 * just retransmitted chunks to the new fp. The reason is that we 1981 * don't keep a timestamp on when a chunk is retransmitted. 1982 * So when the timer fires, it will just search for the 1983 * chunk with the earliest TSN sent to new fp. This probably 1984 * is the chunk we just retransmitted. So for now, let's 1985 * be conservative and restart the timer of the new fp. 1986 */ 1987 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1988 } 1989 1990 /* 1991 * The SCTP write put procedure called from IP. 1992 */ 1993 void 1994 sctp_wput(queue_t *q, mblk_t *mp) 1995 { 1996 uchar_t *rptr; 1997 t_scalar_t type; 1998 1999 switch (mp->b_datap->db_type) { 2000 case M_IOCTL: 2001 sctp_wput_ioctl(q, mp); 2002 break; 2003 case M_DATA: 2004 /* Should be handled in sctp_output() */ 2005 ASSERT(0); 2006 freemsg(mp); 2007 break; 2008 case M_PROTO: 2009 case M_PCPROTO: 2010 rptr = mp->b_rptr; 2011 if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) { 2012 type = ((union T_primitives *)rptr)->type; 2013 /* 2014 * There is no "standard" way on how to respond 2015 * to T_CAPABILITY_REQ if a module does not 2016 * understand it. And the current TI mod 2017 * has problems handling an error ack. So we 2018 * catch the request here and reply with a response 2019 * which the TI mod knows how to respond to. 2020 */ 2021 switch (type) { 2022 case T_CAPABILITY_REQ: 2023 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 2024 break; 2025 default: 2026 if ((mp = mi_tpi_err_ack_alloc(mp, 2027 TNOTSUPPORT, 0)) != NULL) { 2028 qreply(q, mp); 2029 return; 2030 } 2031 } 2032 } 2033 /* FALLTHRU */ 2034 default: 2035 freemsg(mp); 2036 return; 2037 } 2038 } 2039 2040 /* 2041 * This function is called by sctp_ss_rexmit() to create a packet 2042 * to be retransmitted to the given fp. The given meta and mp 2043 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2044 * first chunk to be retransmitted. This is also called when we want 2045 * to retransmit a zero window probe from sctp_rexmit() or when we 2046 * want to retransmit the zero window probe after the window has 2047 * opened from sctp_got_sack(). 2048 */ 2049 mblk_t * 2050 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2051 uint_t *packet_len) 2052 { 2053 uint32_t seglen = 0; 2054 uint16_t chunklen; 2055 int extra; 2056 mblk_t *nmp; 2057 mblk_t *head; 2058 mblk_t *fill; 2059 sctp_data_hdr_t *sdc; 2060 sctp_msg_hdr_t *mhdr; 2061 2062 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2063 seglen = ntohs(sdc->sdh_len); 2064 chunklen = seglen - sizeof (*sdc); 2065 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2066 extra = SCTP_ALIGN - extra; 2067 2068 nmp = dupmsg(*mp); 2069 if (nmp == NULL) 2070 return (NULL); 2071 if (extra > 0) { 2072 fill = sctp_get_padding(extra); 2073 if (fill != NULL) { 2074 linkb(nmp, fill); 2075 seglen += extra; 2076 } else { 2077 freemsg(nmp); 2078 return (NULL); 2079 } 2080 } 2081 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2082 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2083 if (head == NULL) { 2084 freemsg(nmp); 2085 return (NULL); 2086 } 2087 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2088 /* 2089 * Don't update the TSN if we are doing a Zero Win Probe. 2090 */ 2091 if (!sctp->sctp_zero_win_probe) 2092 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2093 *mp = (*mp)->b_next; 2094 2095 try_bundle: 2096 while (seglen < fp->sfa_pmss) { 2097 int32_t new_len; 2098 2099 /* 2100 * Go through the list to find more chunks to be bundled. 2101 * We should only retransmit sent by unack'ed chunks. Since 2102 * they were sent before, the peer's receive window should 2103 * be able to receive them. 2104 */ 2105 while (*mp != NULL) { 2106 /* Check if the chunk can be bundled. */ 2107 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2108 break; 2109 *mp = (*mp)->b_next; 2110 } 2111 /* Go to the next message. */ 2112 if (*mp == NULL) { 2113 for (*meta = (*meta)->b_next; *meta != NULL; 2114 *meta = (*meta)->b_next) { 2115 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2116 2117 if (SCTP_IS_MSG_ABANDONED(*meta) || 2118 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2119 sctp)) { 2120 continue; 2121 } 2122 2123 *mp = (*meta)->b_cont; 2124 goto try_bundle; 2125 } 2126 /* No more chunk to be bundled. */ 2127 break; 2128 } 2129 2130 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2131 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2132 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2133 break; 2134 new_len = ntohs(sdc->sdh_len); 2135 chunklen = new_len - sizeof (*sdc); 2136 2137 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2138 extra = SCTP_ALIGN - extra; 2139 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2140 break; 2141 if ((nmp = dupmsg(*mp)) == NULL) 2142 break; 2143 2144 if (extra > 0) { 2145 fill = sctp_get_padding(extra); 2146 if (fill != NULL) { 2147 linkb(nmp, fill); 2148 } else { 2149 freemsg(nmp); 2150 break; 2151 } 2152 } 2153 linkb(head, nmp); 2154 2155 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2156 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2157 /* 2158 * Don't update the TSN if we are doing a Zero Win Probe. 2159 */ 2160 if (!sctp->sctp_zero_win_probe) 2161 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2162 2163 seglen = new_len; 2164 *mp = (*mp)->b_next; 2165 } 2166 *packet_len = seglen; 2167 return (head); 2168 } 2169 2170 /* 2171 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2172 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2173 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2174 * SACK. We retransmit unacked chunks without having to wait for another 2175 * timeout. The rationale is that the SACK should not be "partial" if all the 2176 * lost chunks have been retransmitted. Since the SACK is "partial," 2177 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2178 * be missing. It is better for us to retransmit them now instead 2179 * of waiting for a timeout. 2180 */ 2181 void 2182 sctp_ss_rexmit(sctp_t *sctp) 2183 { 2184 mblk_t *meta; 2185 mblk_t *mp; 2186 mblk_t *pkt; 2187 sctp_faddr_t *fp; 2188 uint_t pkt_len; 2189 uint32_t tot_wnd; 2190 sctp_data_hdr_t *sdc; 2191 int burst; 2192 2193 ASSERT(!sctp->sctp_zero_win_probe); 2194 2195 /* 2196 * If the last cum ack is smaller than what we have just 2197 * retransmitted, simply return. 2198 */ 2199 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2200 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2201 else 2202 return; 2203 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2204 2205 /* 2206 * After a timer fires, sctp_current should be set to the new 2207 * fp where the retransmitted chunks are sent. 2208 */ 2209 fp = sctp->sctp_current; 2210 2211 /* 2212 * Since we are retransmitting, we can only use cwnd to determine 2213 * how much we can send as we were allowed to send those chunks 2214 * previously. 2215 */ 2216 tot_wnd = fp->cwnd; 2217 /* So we have sent more than we can, just return. */ 2218 if (tot_wnd < fp->suna || tot_wnd - fp->suna < fp->sfa_pmss) 2219 return; 2220 else 2221 tot_wnd -= fp->suna; 2222 2223 /* Find the first unack'ed chunk */ 2224 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2225 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2226 2227 if (SCTP_IS_MSG_ABANDONED(meta) || 2228 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2229 continue; 2230 } 2231 2232 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2233 /* Again, this may not be possible */ 2234 if (!SCTP_CHUNK_ISSENT(mp)) 2235 return; 2236 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2237 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2238 goto found_msg; 2239 } 2240 } 2241 2242 /* Everything is abandoned... */ 2243 return; 2244 2245 found_msg: 2246 if (!fp->timer_running) 2247 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2248 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2249 if (pkt == NULL) { 2250 SCTP_KSTAT(sctp_ss_rexmit_failed); 2251 return; 2252 } 2253 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2254 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2255 2256 /* 2257 * Path MTU is different from path we thought it would 2258 * be when we created chunks, or IP headers have grown. 2259 * Need to clear the DF bit. 2260 */ 2261 iph->ipha_fragment_offset_and_flags = 0; 2262 } 2263 sctp_set_iplen(sctp, pkt); 2264 sctp_add_sendq(sctp, pkt); 2265 2266 /* Check and see if there is more chunk to be retransmitted. */ 2267 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2268 meta == NULL) 2269 return; 2270 if (mp == NULL) 2271 meta = meta->b_next; 2272 if (meta == NULL) 2273 return; 2274 2275 /* Retransmit another packet if the window allows. */ 2276 for (tot_wnd -= pkt_len, burst = sctp_maxburst - 1; 2277 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2278 if (mp == NULL) 2279 mp = meta->b_cont; 2280 for (; mp != NULL; mp = mp->b_next) { 2281 /* Again, this may not be possible */ 2282 if (!SCTP_CHUNK_ISSENT(mp)) 2283 return; 2284 if (!SCTP_CHUNK_ISACKED(mp)) 2285 goto found_msg; 2286 } 2287 } 2288 } 2289