1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #define _SUN_TPI_VERSION 2 34 #include <sys/tihdr.h> 35 #include <sys/socket.h> 36 #include <sys/stropts.h> 37 #include <sys/strsun.h> 38 #include <sys/strsubr.h> 39 #include <sys/socketvar.h> 40 /* swilly code in sys/socketvar.h turns off DEBUG */ 41 #ifdef __lint 42 #define DEBUG 43 #endif 44 45 #include <inet/common.h> 46 #include <inet/mi.h> 47 #include <inet/ip.h> 48 #include <inet/ip6.h> 49 #include <inet/sctp_ip.h> 50 #include <inet/ipclassifier.h> 51 52 /* 53 * PR-SCTP comments. 54 * 55 * A message can expire before it gets to the transmit list (i.e. it is still 56 * in the unsent list - unchunked), after it gets to the transmit list, but 57 * before transmission has actually started, or after transmission has begun. 58 * Accordingly, we check for the status of a message in sctp_chunkify() when 59 * the message is being transferred from the unsent list to the transmit list; 60 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 61 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 62 * When we nuke a message in sctp_chunkify(), all we need to do is take it 63 * out of the unsent list and update sctp_unsent; when a message is deemed 64 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 65 * list, update sctp_unsent IFF transmission for the message has not yet begun 66 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 67 * message has started, then we cannot just take it out of the list, we need 68 * to send Forward TSN chunk to the peer so that the peer can clear its 69 * fragment list for this message. However, we cannot just send the Forward 70 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 71 * messages preceeding this abandoned message. So, we send a Forward TSN 72 * IFF all messages prior to this abandoned message has been SACKd, if not 73 * we defer sending the Forward TSN to sctp_cumack(), which will check for 74 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 75 * sctp_rexmit() when we check for retransmissions, we need to determine if 76 * the advanced peer ack point can be moved ahead, and if so, send a Forward 77 * TSN to the peer instead of retransmitting the chunk. Note that when 78 * we send a Forward TSN for a message, there may be yet unsent chunks for 79 * this message; we need to mark all such chunks as abandoned, so that 80 * sctp_cumack() can take the message out of the transmit list, additionally 81 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 82 * decremented when a message/chunk is deemed abandoned), sockfs needs to 83 * be notified so that it can adjust its idea of the queued message. 84 */ 85 86 #include "sctp_impl.h" 87 88 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 89 90 #ifdef DEBUG 91 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 92 #endif 93 94 /* 95 * Called to allocate a header mblk when sending data to SCTP. 96 * Data will follow in b_cont of this mblk. 97 */ 98 mblk_t * 99 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 100 int flags) 101 { 102 mblk_t *mp; 103 struct T_unitdata_req *tudr; 104 size_t size; 105 int error; 106 107 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 108 size = MAX(size, sizeof (sctp_msg_hdr_t)); 109 if (flags & SCTP_CAN_BLOCK) { 110 mp = allocb_wait(size, BPRI_MED, 0, &error); 111 } else { 112 mp = allocb(size, BPRI_MED); 113 } 114 if (mp) { 115 tudr = (struct T_unitdata_req *)mp->b_rptr; 116 tudr->PRIM_type = T_UNITDATA_REQ; 117 tudr->DEST_length = nlen; 118 tudr->DEST_offset = sizeof (*tudr); 119 tudr->OPT_length = clen; 120 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 121 _TPI_ALIGN_TOPT(nlen)); 122 if (nlen > 0) 123 bcopy(name, tudr + 1, nlen); 124 if (clen > 0) 125 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 126 mp->b_wptr += (tudr ->OPT_offset + clen); 127 mp->b_datap->db_type = M_PROTO; 128 } 129 return (mp); 130 } 131 132 /*ARGSUSED2*/ 133 int 134 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 135 { 136 sctp_faddr_t *fp = NULL; 137 struct T_unitdata_req *tudr; 138 int error = 0; 139 mblk_t *mproto = mp; 140 in6_addr_t *addr; 141 in6_addr_t tmpaddr; 142 uint16_t sid = sctp->sctp_def_stream; 143 uint32_t ppid = sctp->sctp_def_ppid; 144 uint32_t context = sctp->sctp_def_context; 145 uint16_t msg_flags = sctp->sctp_def_flags; 146 sctp_msg_hdr_t *sctp_msg_hdr; 147 uint32_t msg_len = 0; 148 uint32_t timetolive = sctp->sctp_def_timetolive; 149 150 ASSERT(DB_TYPE(mproto) == M_PROTO); 151 152 mp = mp->b_cont; 153 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 154 155 tudr = (struct T_unitdata_req *)mproto->b_rptr; 156 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 157 158 /* Get destination address, if specified */ 159 if (tudr->DEST_length > 0) { 160 sin_t *sin; 161 sin6_t *sin6; 162 163 sin = (struct sockaddr_in *) 164 (mproto->b_rptr + tudr->DEST_offset); 165 switch (sin->sin_family) { 166 case AF_INET: 167 if (tudr->DEST_length < sizeof (*sin)) { 168 return (EINVAL); 169 } 170 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 171 addr = &tmpaddr; 172 break; 173 case AF_INET6: 174 if (tudr->DEST_length < sizeof (*sin6)) { 175 return (EINVAL); 176 } 177 sin6 = (struct sockaddr_in6 *) 178 (mproto->b_rptr + tudr->DEST_offset); 179 addr = &sin6->sin6_addr; 180 break; 181 default: 182 return (EAFNOSUPPORT); 183 } 184 fp = sctp_lookup_faddr(sctp, addr); 185 if (fp == NULL) { 186 return (EINVAL); 187 } 188 } 189 /* Ancillary Data? */ 190 if (tudr->OPT_length > 0) { 191 struct cmsghdr *cmsg; 192 char *cend; 193 struct sctp_sndrcvinfo *sndrcv; 194 195 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 196 cend = ((char *)cmsg + tudr->OPT_length); 197 ASSERT(cend <= (char *)mproto->b_wptr); 198 199 for (;;) { 200 if ((char *)(cmsg + 1) > cend || 201 ((char *)cmsg + cmsg->cmsg_len) > cend) { 202 break; 203 } 204 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 205 (cmsg->cmsg_type == SCTP_SNDRCV)) { 206 if (cmsg->cmsg_len < 207 (sizeof (*sndrcv) + sizeof (*cmsg))) { 208 return (EINVAL); 209 } 210 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 211 sid = sndrcv->sinfo_stream; 212 msg_flags = sndrcv->sinfo_flags; 213 ppid = sndrcv->sinfo_ppid; 214 context = sndrcv->sinfo_context; 215 timetolive = sndrcv->sinfo_timetolive; 216 break; 217 } 218 if (cmsg->cmsg_len > 0) 219 cmsg = CMSG_NEXT(cmsg); 220 else 221 break; 222 } 223 } 224 if (msg_flags & MSG_ABORT) { 225 if (mp && mp->b_cont) { 226 mblk_t *pump = msgpullup(mp, -1); 227 if (!pump) { 228 return (ENOMEM); 229 } 230 freemsg(mp); 231 mp = pump; 232 mproto->b_cont = mp; 233 } 234 RUN_SCTP(sctp); 235 sctp_user_abort(sctp, mp, B_TRUE); 236 sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); 237 sctp_clean_death(sctp, ECONNRESET); 238 freemsg(mproto); 239 goto process_sendq; 240 } 241 if (mp == NULL) 242 goto done; 243 244 RUN_SCTP(sctp); 245 246 /* Reject any new data requests if we are shutting down */ 247 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 248 error = EPIPE; 249 goto unlock_done; 250 } 251 252 /* Re-use the mproto to store relevant info. */ 253 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 254 255 mproto->b_rptr = mproto->b_datap->db_base; 256 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 257 258 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 259 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 260 sctp_msg_hdr->smh_context = context; 261 sctp_msg_hdr->smh_sid = sid; 262 sctp_msg_hdr->smh_ppid = ppid; 263 sctp_msg_hdr->smh_flags = msg_flags; 264 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 265 sctp_msg_hdr->smh_tob = lbolt64; 266 for (; mp != NULL; mp = mp->b_cont) 267 msg_len += MBLKL(mp); 268 sctp_msg_hdr->smh_msglen = msg_len; 269 270 /* User requested specific destination */ 271 SCTP_SET_CHUNK_DEST(mproto, fp); 272 273 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 274 sid >= sctp->sctp_num_ostr) { 275 /* Send sendfail event */ 276 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 277 B_FALSE); 278 error = EINVAL; 279 goto unlock_done; 280 } 281 282 /* no data */ 283 if (msg_len == 0) { 284 sctp_sendfail_event(sctp, dupmsg(mproto), 285 SCTP_ERR_NO_USR_DATA, B_FALSE); 286 error = EINVAL; 287 goto unlock_done; 288 } 289 290 /* Add it to the unsent list */ 291 if (sctp->sctp_xmit_unsent == NULL) { 292 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 293 } else { 294 sctp->sctp_xmit_unsent_tail->b_next = mproto; 295 sctp->sctp_xmit_unsent_tail = mproto; 296 } 297 sctp->sctp_unsent += msg_len; 298 BUMP_LOCAL(sctp->sctp_msgcount); 299 if (sctp->sctp_state == SCTPS_ESTABLISHED) 300 sctp_output(sctp, UINT_MAX); 301 process_sendq: 302 WAKE_SCTP(sctp); 303 sctp_process_sendq(sctp); 304 return (0); 305 unlock_done: 306 WAKE_SCTP(sctp); 307 done: 308 return (error); 309 } 310 311 void 312 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send) 313 { 314 mblk_t *mp; 315 mblk_t *chunk_mp; 316 mblk_t *chunk_head; 317 mblk_t *chunk_hdr; 318 mblk_t *chunk_tail = NULL; 319 int count; 320 int chunksize; 321 sctp_data_hdr_t *sdc; 322 mblk_t *mdblk = sctp->sctp_xmit_unsent; 323 sctp_faddr_t *fp; 324 sctp_faddr_t *fp1; 325 size_t xtralen; 326 sctp_msg_hdr_t *msg_hdr; 327 sctp_stack_t *sctps = sctp->sctp_sctps; 328 329 fp = SCTP_CHUNK_DEST(mdblk); 330 if (fp == NULL) 331 fp = sctp->sctp_current; 332 if (fp->isv4) 333 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra + 334 sizeof (*sdc); 335 else 336 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra + 337 sizeof (*sdc); 338 count = chunksize = first_len - sizeof (*sdc); 339 nextmsg: 340 chunk_mp = mdblk->b_cont; 341 342 /* 343 * If this partially chunked, we ignore the first_len for now 344 * and use the one already present. For the unchunked bits, we 345 * use the length of the last chunk. 346 */ 347 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 348 int chunk_len; 349 350 ASSERT(chunk_mp->b_next != NULL); 351 mdblk->b_cont = chunk_mp->b_next; 352 chunk_mp->b_next = NULL; 353 SCTP_MSG_CLEAR_CHUNKED(mdblk); 354 mp = mdblk->b_cont; 355 while (mp->b_next != NULL) 356 mp = mp->b_next; 357 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 358 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 359 count = chunksize = fp->sfa_pmss - chunk_len; 360 else 361 count = chunksize = fp->sfa_pmss; 362 count = chunksize = count - sizeof (*sdc); 363 } else { 364 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 365 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 366 sctp->sctp_xmit_unsent = mdblk->b_next; 367 if (sctp->sctp_xmit_unsent == NULL) 368 sctp->sctp_xmit_unsent_tail = NULL; 369 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 370 sctp->sctp_unsent -= msg_hdr->smh_msglen; 371 mdblk->b_next = NULL; 372 BUMP_LOCAL(sctp->sctp_prsctpdrop); 373 /* 374 * Update ULP the amount of queued data, which is 375 * sent-unack'ed + unsent. 376 */ 377 if (!SCTP_IS_DETACHED(sctp)) { 378 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 379 sctp->sctp_unacked + sctp->sctp_unsent); 380 } 381 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 382 goto try_next; 383 } 384 mdblk->b_cont = NULL; 385 } 386 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 387 nextchunk: 388 chunk_head = chunk_mp; 389 chunk_tail = NULL; 390 391 /* Skip as many mblk's as we need */ 392 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 393 count -= MBLKL(chunk_mp); 394 chunk_tail = chunk_mp; 395 chunk_mp = chunk_mp->b_cont; 396 } 397 /* Split the chain, if needed */ 398 if (chunk_mp != NULL) { 399 if (count > 0) { 400 mblk_t *split_mp = dupb(chunk_mp); 401 402 if (split_mp == NULL) { 403 if (mdblk->b_cont == NULL) { 404 mdblk->b_cont = chunk_head; 405 } else { 406 SCTP_MSG_SET_CHUNKED(mdblk); 407 ASSERT(chunk_head->b_next == NULL); 408 chunk_head->b_next = mdblk->b_cont; 409 mdblk->b_cont = chunk_head; 410 } 411 return; 412 } 413 if (chunk_tail != NULL) { 414 chunk_tail->b_cont = split_mp; 415 chunk_tail = chunk_tail->b_cont; 416 } else { 417 chunk_head = chunk_tail = split_mp; 418 } 419 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 420 chunk_mp->b_rptr = chunk_tail->b_wptr; 421 count = 0; 422 } else if (chunk_tail == NULL) { 423 goto next; 424 } else { 425 chunk_tail->b_cont = NULL; 426 } 427 } 428 /* Alloc chunk hdr, if needed */ 429 if (DB_REF(chunk_head) > 1 || 430 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 431 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 432 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 433 if (mdblk->b_cont == NULL) { 434 if (chunk_mp != NULL) 435 linkb(chunk_head, chunk_mp); 436 mdblk->b_cont = chunk_head; 437 } else { 438 SCTP_MSG_SET_CHUNKED(mdblk); 439 if (chunk_mp != NULL) 440 linkb(chunk_head, chunk_mp); 441 ASSERT(chunk_head->b_next == NULL); 442 chunk_head->b_next = mdblk->b_cont; 443 mdblk->b_cont = chunk_head; 444 } 445 return; 446 } 447 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 448 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 449 chunk_hdr->b_cont = chunk_head; 450 } else { 451 chunk_hdr = chunk_head; 452 chunk_hdr->b_rptr -= sizeof (*sdc); 453 } 454 ASSERT(chunk_hdr->b_datap->db_ref == 1); 455 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 456 sdc->sdh_id = CHUNK_DATA; 457 sdc->sdh_flags = 0; 458 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 459 ASSERT(sdc->sdh_len); 460 sdc->sdh_sid = htons(msg_hdr->smh_sid); 461 /* 462 * We defer assigning the SSN just before sending the chunk, else 463 * if we drop the chunk in sctp_get_msg_to_send(), we would need 464 * to send a Forward TSN to let the peer know. Some more comments 465 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 466 */ 467 sdc->sdh_payload_id = msg_hdr->smh_ppid; 468 469 if (mdblk->b_cont == NULL) { 470 mdblk->b_cont = chunk_hdr; 471 SCTP_DATA_SET_BBIT(sdc); 472 } else { 473 mp = mdblk->b_cont; 474 while (mp->b_next != NULL) 475 mp = mp->b_next; 476 mp->b_next = chunk_hdr; 477 } 478 479 bytes_to_send -= (chunksize - count); 480 if (chunk_mp != NULL) { 481 next: 482 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 483 goto nextchunk; 484 } 485 SCTP_DATA_SET_EBIT(sdc); 486 sctp->sctp_xmit_unsent = mdblk->b_next; 487 if (mdblk->b_next == NULL) { 488 sctp->sctp_xmit_unsent_tail = NULL; 489 } 490 mdblk->b_next = NULL; 491 492 if (sctp->sctp_xmit_tail == NULL) { 493 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 494 } else { 495 mp = sctp->sctp_xmit_tail; 496 while (mp->b_next != NULL) 497 mp = mp->b_next; 498 mp->b_next = mdblk; 499 mdblk->b_prev = mp; 500 } 501 try_next: 502 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 503 mdblk = sctp->sctp_xmit_unsent; 504 fp1 = SCTP_CHUNK_DEST(mdblk); 505 if (fp1 == NULL) 506 fp1 = sctp->sctp_current; 507 if (fp == fp1) { 508 size_t len = MBLKL(mdblk->b_cont); 509 if ((count > 0) && 510 ((len > fp->sfa_pmss - sizeof (*sdc)) || 511 (len <= count))) { 512 count -= sizeof (*sdc); 513 count = chunksize = count - (count & 0x3); 514 } else { 515 count = chunksize = fp->sfa_pmss - 516 sizeof (*sdc); 517 } 518 } else { 519 if (fp1->isv4) 520 xtralen = sctp->sctp_hdr_len; 521 else 522 xtralen = sctp->sctp_hdr6_len; 523 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc); 524 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 525 fp = fp1; 526 } 527 goto nextmsg; 528 } 529 } 530 531 void 532 sctp_free_msg(mblk_t *ump) 533 { 534 mblk_t *mp, *nmp; 535 536 for (mp = ump->b_cont; mp; mp = nmp) { 537 nmp = mp->b_next; 538 mp->b_next = mp->b_prev = NULL; 539 freemsg(mp); 540 } 541 ASSERT(!ump->b_prev); 542 ump->b_next = NULL; 543 freeb(ump); 544 } 545 546 mblk_t * 547 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 548 int *error) 549 { 550 int hdrlen; 551 char *hdr; 552 int isv4 = fp->isv4; 553 sctp_stack_t *sctps = sctp->sctp_sctps; 554 555 if (error != NULL) 556 *error = 0; 557 558 if (isv4) { 559 hdrlen = sctp->sctp_hdr_len; 560 hdr = sctp->sctp_iphc; 561 } else { 562 hdrlen = sctp->sctp_hdr6_len; 563 hdr = sctp->sctp_iphc6; 564 } 565 /* 566 * A null fp->ire could mean that the address is 'down'. Similarly, 567 * it is possible that the address went down, we tried to send an 568 * heartbeat and ended up setting fp->saddr as unspec because we 569 * didn't have any usable source address. In either case 570 * sctp_get_ire() will try find an IRE, if available, and set 571 * the source address, if needed. If we still don't have any 572 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 573 * we return EHOSTUNREACH. 574 */ 575 if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 576 sctp_get_ire(sctp, fp); 577 if (fp->state == SCTP_FADDRS_UNREACH) { 578 if (error != NULL) 579 *error = EHOSTUNREACH; 580 return (NULL); 581 } 582 } 583 /* Copy in IP header. */ 584 if ((mp->b_rptr - mp->b_datap->db_base) < 585 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || 586 !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { 587 mblk_t *nmp; 588 589 /* 590 * This can happen if IP headers are adjusted after 591 * data was moved into chunks, or during retransmission, 592 * or things like snoop is running. 593 */ 594 nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen, 595 CONN_CRED(sctp->sctp_connp)); 596 if (nmp == NULL) { 597 if (error != NULL) 598 *error = ENOMEM; 599 return (NULL); 600 } 601 nmp->b_rptr += sctps->sctps_wroff_xtra; 602 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 603 nmp->b_cont = mp; 604 mp = nmp; 605 } else { 606 mp->b_rptr -= (hdrlen + sacklen); 607 mblk_setcred(mp, CONN_CRED(sctp->sctp_connp)); 608 } 609 bcopy(hdr, mp->b_rptr, hdrlen); 610 if (sacklen) { 611 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 612 } 613 if (fp != sctp->sctp_current) { 614 /* change addresses in header */ 615 if (isv4) { 616 ipha_t *iph = (ipha_t *)mp->b_rptr; 617 618 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 619 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 620 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 621 iph->ipha_src); 622 } else if (sctp->sctp_bound_to_all) { 623 iph->ipha_src = INADDR_ANY; 624 } 625 } else { 626 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 627 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 628 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 629 } else if (sctp->sctp_bound_to_all) { 630 V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src); 631 } 632 } 633 } 634 /* 635 * IP will not free this IRE if it is condemned. SCTP needs to 636 * free it. 637 */ 638 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 639 IRE_REFRELE_NOTR(fp->ire); 640 fp->ire = NULL; 641 } 642 643 /* Stash the conn and ire ptr info for IP */ 644 SCTP_STASH_IPINFO(mp, fp->ire); 645 646 return (mp); 647 } 648 649 /* 650 * SCTP requires every chunk to be padded so that the total length 651 * is a multiple of SCTP_ALIGN. This function returns a mblk with 652 * the specified pad length. 653 */ 654 static mblk_t * 655 sctp_get_padding(int pad, sctp_stack_t *sctps) 656 { 657 mblk_t *fill; 658 659 ASSERT(pad < SCTP_ALIGN); 660 if ((fill = dupb(sctps->sctps_pad_mp)) != NULL) { 661 fill->b_wptr += pad; 662 return (fill); 663 } 664 665 /* 666 * The memory saving path of reusing the sctp_pad_mp 667 * fails may be because it has been dupb() too 668 * many times (DBLK_REFMAX). Use the memory consuming 669 * path of allocating the pad mblk. 670 */ 671 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 672 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 673 *(int32_t *)fill->b_rptr = 0; 674 fill->b_wptr += pad; 675 } 676 return (fill); 677 } 678 679 static mblk_t * 680 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 681 { 682 mblk_t *meta; 683 mblk_t *start_mp = NULL; 684 mblk_t *end_mp = NULL; 685 mblk_t *mp, *nmp; 686 mblk_t *fill; 687 sctp_data_hdr_t *sdh; 688 int msglen; 689 int extra; 690 sctp_msg_hdr_t *msg_hdr; 691 sctp_faddr_t *old_fp = NULL; 692 sctp_faddr_t *chunk_fp; 693 sctp_stack_t *sctps = sctp->sctp_sctps; 694 695 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 696 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 697 if (SCTP_IS_MSG_ABANDONED(meta) || 698 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 699 continue; 700 } 701 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 702 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 703 /* 704 * Use the same peer address to do fast 705 * retransmission. If the original peer 706 * address is dead, switch to the current 707 * one. Record the old one so that we 708 * will pick the chunks sent to the old 709 * one for fast retransmission. 710 */ 711 chunk_fp = SCTP_CHUNK_DEST(mp); 712 if (*fp == NULL) { 713 *fp = chunk_fp; 714 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 715 old_fp = *fp; 716 *fp = sctp->sctp_current; 717 } 718 } else if (old_fp == NULL && *fp != chunk_fp) { 719 continue; 720 } else if (old_fp != NULL && 721 old_fp != chunk_fp) { 722 continue; 723 } 724 725 sdh = (sctp_data_hdr_t *)mp->b_rptr; 726 msglen = ntohs(sdh->sdh_len); 727 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 728 extra = SCTP_ALIGN - extra; 729 } 730 731 /* 732 * We still return at least the first message 733 * even if that message cannot fit in as 734 * PMTU may have changed. 735 */ 736 if (*total + msglen + extra > 737 (*fp)->sfa_pmss && start_mp != NULL) { 738 return (start_mp); 739 } 740 if ((nmp = dupmsg(mp)) == NULL) 741 return (start_mp); 742 if (extra > 0) { 743 fill = sctp_get_padding(extra, sctps); 744 if (fill != NULL) { 745 linkb(nmp, fill); 746 } else { 747 return (start_mp); 748 } 749 } 750 BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); 751 BUMP_LOCAL(sctp->sctp_rxtchunks); 752 SCTP_CHUNK_CLEAR_REXMIT(mp); 753 if (start_mp == NULL) { 754 start_mp = nmp; 755 } else { 756 linkb(end_mp, nmp); 757 } 758 end_mp = nmp; 759 *total += msglen + extra; 760 dprint(2, ("sctp_find_fast_rexmit_mblks: " 761 "tsn %x\n", sdh->sdh_tsn)); 762 } 763 } 764 } 765 /* Clear the flag as there is no more message to be fast rexmitted. */ 766 sctp->sctp_chk_fast_rexmit = B_FALSE; 767 return (start_mp); 768 } 769 770 /* A debug function just to make sure that a mblk chain is not broken */ 771 #ifdef DEBUG 772 static boolean_t 773 sctp_verify_chain(mblk_t *head, mblk_t *tail) 774 { 775 mblk_t *mp = head; 776 777 if (head == NULL || tail == NULL) 778 return (B_TRUE); 779 while (mp != NULL) { 780 if (mp == tail) 781 return (B_TRUE); 782 mp = mp->b_next; 783 } 784 return (B_FALSE); 785 } 786 #endif 787 788 /* 789 * Gets the next unsent chunk to transmit. Messages that are abandoned are 790 * skipped. A message can be abandoned if it has a non-zero timetolive and 791 * transmission has not yet started or if it is a partially reliable 792 * message and its time is up (assuming we are PR-SCTP aware). 793 * 'cansend' is used to determine if need to try and chunkify messages from 794 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 795 * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend' 796 * will be set to 0. 797 */ 798 mblk_t * 799 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 800 int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp) 801 { 802 mblk_t *mp1; 803 sctp_msg_hdr_t *msg_hdr; 804 mblk_t *tmp_meta; 805 sctp_faddr_t *fp1; 806 807 ASSERT(error != NULL && mp != NULL); 808 *error = 0; 809 810 ASSERT(sctp->sctp_current != NULL); 811 812 chunkified: 813 while (meta != NULL) { 814 tmp_meta = meta->b_next; 815 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 816 mp1 = meta->b_cont; 817 if (SCTP_IS_MSG_ABANDONED(meta)) 818 goto next_msg; 819 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 820 while (mp1 != NULL) { 821 if (SCTP_CHUNK_CANSEND(mp1)) { 822 *mp = mp1; 823 #ifdef DEBUG 824 ASSERT(sctp_verify_chain( 825 sctp->sctp_xmit_head, meta)); 826 #endif 827 return (meta); 828 } 829 mp1 = mp1->b_next; 830 } 831 goto next_msg; 832 } 833 /* 834 * If we come here and the first chunk is sent, then we 835 * we are PR-SCTP aware, in which case if the cumulative 836 * TSN has moved upto or beyond the first chunk (which 837 * means all the previous messages have been cumulative 838 * SACK'd), then we send a Forward TSN with the last 839 * chunk that was sent in this message. If we can't send 840 * a Forward TSN because previous non-abandoned messages 841 * have not been acked then we will defer the Forward TSN 842 * to sctp_rexmit() or sctp_cumack(). 843 */ 844 if (SCTP_CHUNK_ISSENT(mp1)) { 845 *error = sctp_check_abandoned_msg(sctp, meta); 846 if (*error != 0) { 847 #ifdef DEBUG 848 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 849 sctp->sctp_xmit_tail)); 850 #endif 851 return (NULL); 852 } 853 goto next_msg; 854 } 855 BUMP_LOCAL(sctp->sctp_prsctpdrop); 856 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 857 if (meta->b_prev == NULL) { 858 ASSERT(sctp->sctp_xmit_head == meta); 859 sctp->sctp_xmit_head = tmp_meta; 860 if (sctp->sctp_xmit_tail == meta) 861 sctp->sctp_xmit_tail = tmp_meta; 862 meta->b_next = NULL; 863 if (tmp_meta != NULL) 864 tmp_meta->b_prev = NULL; 865 } else if (meta->b_next == NULL) { 866 if (sctp->sctp_xmit_tail == meta) 867 sctp->sctp_xmit_tail = meta->b_prev; 868 meta->b_prev->b_next = NULL; 869 meta->b_prev = NULL; 870 } else { 871 meta->b_prev->b_next = tmp_meta; 872 tmp_meta->b_prev = meta->b_prev; 873 if (sctp->sctp_xmit_tail == meta) 874 sctp->sctp_xmit_tail = tmp_meta; 875 meta->b_prev = NULL; 876 meta->b_next = NULL; 877 } 878 sctp->sctp_unsent -= msg_hdr->smh_msglen; 879 /* 880 * Update ULP the amount of queued data, which is 881 * sent-unack'ed + unsent. 882 */ 883 if (!SCTP_IS_DETACHED(sctp)) { 884 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 885 sctp->sctp_unacked + sctp->sctp_unsent); 886 } 887 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 888 next_msg: 889 meta = tmp_meta; 890 } 891 /* chunkify, if needed */ 892 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 893 ASSERT(sctp->sctp_unsent > 0); 894 if (fp == NULL) { 895 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 896 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 897 fp = sctp->sctp_current; 898 } else { 899 /* 900 * If user specified destination, try to honor that. 901 */ 902 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 903 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 904 fp1 != fp) { 905 goto chunk_done; 906 } 907 } 908 sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend); 909 if ((meta = sctp->sctp_xmit_tail) == NULL) 910 goto chunk_done; 911 /* 912 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 913 * new chunk(s) to the tail, so we need to skip the 914 * sctp_xmit_tail, which would have already been processed. 915 * This could happen when there is unacked chunks, but 916 * nothing new to send. 917 * When sctp_chunkify() is called when the transmit queue 918 * is empty then we need to start from sctp_xmit_tail. 919 */ 920 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 921 #ifdef DEBUG 922 mp1 = sctp->sctp_xmit_tail->b_cont; 923 while (mp1 != NULL) { 924 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 925 mp1 = mp1->b_next; 926 } 927 #endif 928 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 929 goto chunk_done; 930 } 931 goto chunkified; 932 } 933 chunk_done: 934 #ifdef DEBUG 935 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 936 #endif 937 return (NULL); 938 } 939 940 void 941 sctp_fast_rexmit(sctp_t *sctp) 942 { 943 mblk_t *mp, *head; 944 int pktlen = 0; 945 sctp_faddr_t *fp = NULL; 946 sctp_stack_t *sctps = sctp->sctp_sctps; 947 948 ASSERT(sctp->sctp_xmit_head != NULL); 949 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 950 if (mp == NULL) { 951 SCTP_KSTAT(sctps, sctp_fr_not_found); 952 return; 953 } 954 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 955 freemsg(mp); 956 SCTP_KSTAT(sctps, sctp_fr_add_hdr); 957 return; 958 } 959 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 960 ipha_t *iph = (ipha_t *)head->b_rptr; 961 962 iph->ipha_fragment_offset_and_flags = 0; 963 } 964 965 sctp_set_iplen(sctp, head); 966 sctp_add_sendq(sctp, head); 967 sctp->sctp_active = fp->lastactive = lbolt64; 968 } 969 970 void 971 sctp_output(sctp_t *sctp, uint_t num_pkt) 972 { 973 mblk_t *mp = NULL; 974 mblk_t *nmp; 975 mblk_t *head; 976 mblk_t *meta = sctp->sctp_xmit_tail; 977 mblk_t *fill = NULL; 978 uint16_t chunklen; 979 uint32_t cansend; 980 int32_t seglen; 981 int32_t xtralen; 982 int32_t sacklen; 983 int32_t pad = 0; 984 int32_t pathmax; 985 int extra; 986 int64_t now = lbolt64; 987 sctp_faddr_t *fp; 988 sctp_faddr_t *lfp; 989 sctp_data_hdr_t *sdc; 990 int error; 991 boolean_t notsent = B_TRUE; 992 sctp_stack_t *sctps = sctp->sctp_sctps; 993 994 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 995 sacklen = 0; 996 } else { 997 /* send a SACK chunk */ 998 sacklen = sizeof (sctp_chunk_hdr_t) + 999 sizeof (sctp_sack_chunk_t) + 1000 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1001 lfp = sctp->sctp_lastdata; 1002 ASSERT(lfp != NULL); 1003 if (lfp->state != SCTP_FADDRS_ALIVE) 1004 lfp = sctp->sctp_current; 1005 } 1006 1007 cansend = sctp->sctp_frwnd; 1008 if (sctp->sctp_unsent < cansend) 1009 cansend = sctp->sctp_unsent; 1010 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1011 sctp->sctp_unacked && 1012 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1013 !sctp->sctp_ndelay) { 1014 head = NULL; 1015 fp = sctp->sctp_current; 1016 goto unsent_data; 1017 } 1018 if (meta != NULL) 1019 mp = meta->b_cont; 1020 while (cansend > 0 && num_pkt-- != 0) { 1021 pad = 0; 1022 1023 /* 1024 * Find first segment eligible for transmit. 1025 */ 1026 while (mp != NULL) { 1027 if (SCTP_CHUNK_CANSEND(mp)) 1028 break; 1029 mp = mp->b_next; 1030 } 1031 if (mp == NULL) { 1032 meta = sctp_get_msg_to_send(sctp, &mp, 1033 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1034 cansend, NULL); 1035 if (error != 0 || meta == NULL) { 1036 head = NULL; 1037 fp = sctp->sctp_current; 1038 goto unsent_data; 1039 } 1040 sctp->sctp_xmit_tail = meta; 1041 } 1042 1043 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1044 seglen = ntohs(sdc->sdh_len); 1045 xtralen = sizeof (*sdc); 1046 chunklen = seglen - xtralen; 1047 1048 /* 1049 * Check rwnd. 1050 */ 1051 if (chunklen > cansend) { 1052 head = NULL; 1053 fp = SCTP_CHUNK_DEST(meta); 1054 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1055 fp = sctp->sctp_current; 1056 goto unsent_data; 1057 } 1058 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1059 extra = SCTP_ALIGN - extra; 1060 1061 /* 1062 * Pick destination address, and check cwnd. 1063 */ 1064 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1065 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1066 /* 1067 * Only include SACK chunk if it can be bundled 1068 * with a data chunk, and sent to sctp_lastdata. 1069 */ 1070 pathmax = lfp->cwnd - lfp->suna; 1071 1072 fp = lfp; 1073 if ((nmp = dupmsg(mp)) == NULL) { 1074 head = NULL; 1075 goto unsent_data; 1076 } 1077 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1078 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1079 &error); 1080 if (head == NULL) { 1081 /* 1082 * If none of the source addresses are 1083 * available (i.e error == EHOSTUNREACH), 1084 * pretend we have sent the data. We will 1085 * eventually time out trying to retramsmit 1086 * the data if the interface never comes up. 1087 * If we have already sent some stuff (i.e., 1088 * notsent is B_FALSE) then we are fine, else 1089 * just mark this packet as sent. 1090 */ 1091 if (notsent && error == EHOSTUNREACH) { 1092 SCTP_CHUNK_SENT(sctp, mp, sdc, 1093 fp, chunklen, meta); 1094 } 1095 freemsg(nmp); 1096 SCTP_KSTAT(sctps, sctp_output_failed); 1097 goto unsent_data; 1098 } 1099 seglen += sacklen; 1100 xtralen += sacklen; 1101 sacklen = 0; 1102 } else { 1103 fp = SCTP_CHUNK_DEST(meta); 1104 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1105 fp = sctp->sctp_current; 1106 /* 1107 * If we haven't sent data to this destination for 1108 * a while, do slow start again. 1109 */ 1110 if (now - fp->lastactive > fp->rto) { 1111 SET_CWND(fp, fp->sfa_pmss, 1112 sctps->sctps_slow_start_after_idle); 1113 } 1114 1115 pathmax = fp->cwnd - fp->suna; 1116 if (seglen + extra > pathmax) { 1117 head = NULL; 1118 goto unsent_data; 1119 } 1120 if ((nmp = dupmsg(mp)) == NULL) { 1121 head = NULL; 1122 goto unsent_data; 1123 } 1124 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1125 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1126 if (head == NULL) { 1127 /* 1128 * If none of the source addresses are 1129 * available (i.e error == EHOSTUNREACH), 1130 * pretend we have sent the data. We will 1131 * eventually time out trying to retramsmit 1132 * the data if the interface never comes up. 1133 * If we have already sent some stuff (i.e., 1134 * notsent is B_FALSE) then we are fine, else 1135 * just mark this packet as sent. 1136 */ 1137 if (notsent && error == EHOSTUNREACH) { 1138 SCTP_CHUNK_SENT(sctp, mp, sdc, 1139 fp, chunklen, meta); 1140 } 1141 freemsg(nmp); 1142 SCTP_KSTAT(sctps, sctp_output_failed); 1143 goto unsent_data; 1144 } 1145 } 1146 fp->lastactive = now; 1147 if (pathmax > fp->sfa_pmss) 1148 pathmax = fp->sfa_pmss; 1149 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1150 mp = mp->b_next; 1151 1152 /* Use this chunk to measure RTT? */ 1153 if (sctp->sctp_out_time == 0) { 1154 sctp->sctp_out_time = now; 1155 sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; 1156 ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); 1157 } 1158 if (extra > 0) { 1159 fill = sctp_get_padding(extra, sctps); 1160 if (fill != NULL) { 1161 linkb(head, fill); 1162 pad = extra; 1163 seglen += extra; 1164 } else { 1165 goto unsent_data; 1166 } 1167 } 1168 /* See if we can bundle more. */ 1169 while (seglen < pathmax) { 1170 int32_t new_len; 1171 int32_t new_xtralen; 1172 1173 while (mp != NULL) { 1174 if (SCTP_CHUNK_CANSEND(mp)) 1175 break; 1176 mp = mp->b_next; 1177 } 1178 if (mp == NULL) { 1179 meta = sctp_get_msg_to_send(sctp, &mp, 1180 meta->b_next, &error, seglen, 1181 (seglen - xtralen) >= cansend ? 0 : 1182 cansend - seglen, fp); 1183 if (error != 0 || meta == NULL) 1184 break; 1185 sctp->sctp_xmit_tail = meta; 1186 } 1187 ASSERT(mp != NULL); 1188 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1189 fp != SCTP_CHUNK_DEST(meta)) { 1190 break; 1191 } 1192 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1193 chunklen = ntohs(sdc->sdh_len); 1194 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1195 extra = SCTP_ALIGN - extra; 1196 1197 new_len = seglen + chunklen; 1198 new_xtralen = xtralen + sizeof (*sdc); 1199 chunklen -= sizeof (*sdc); 1200 1201 if (new_len - new_xtralen > cansend || 1202 new_len + extra > pathmax) { 1203 break; 1204 } 1205 if ((nmp = dupmsg(mp)) == NULL) 1206 break; 1207 if (extra > 0) { 1208 fill = sctp_get_padding(extra, sctps); 1209 if (fill != NULL) { 1210 pad += extra; 1211 new_len += extra; 1212 linkb(nmp, fill); 1213 } else { 1214 freemsg(nmp); 1215 break; 1216 } 1217 } 1218 seglen = new_len; 1219 xtralen = new_xtralen; 1220 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1221 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1222 linkb(head, nmp); 1223 mp = mp->b_next; 1224 } 1225 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1226 ipha_t *iph = (ipha_t *)head->b_rptr; 1227 1228 /* 1229 * Path MTU is different from what we thought it would 1230 * be when we created chunks, or IP headers have grown. 1231 * Need to clear the DF bit. 1232 */ 1233 iph->ipha_fragment_offset_and_flags = 0; 1234 } 1235 /* xmit segment */ 1236 ASSERT(cansend >= seglen - pad - xtralen); 1237 cansend -= (seglen - pad - xtralen); 1238 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1239 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1240 seglen - xtralen, ntohl(sdc->sdh_tsn), 1241 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1242 cansend, sctp->sctp_lastack_rxd)); 1243 sctp_set_iplen(sctp, head); 1244 sctp_add_sendq(sctp, head); 1245 /* arm rto timer (if not set) */ 1246 if (!fp->timer_running) 1247 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1248 notsent = B_FALSE; 1249 } 1250 sctp->sctp_active = now; 1251 return; 1252 unsent_data: 1253 /* arm persist timer (if rto timer not set) */ 1254 if (!fp->timer_running) 1255 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1256 if (head != NULL) 1257 freemsg(head); 1258 } 1259 1260 /* 1261 * The following two functions initialize and destroy the cache 1262 * associated with the sets used for PR-SCTP. 1263 */ 1264 void 1265 sctp_ftsn_sets_init(void) 1266 { 1267 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1268 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1269 NULL, 0); 1270 } 1271 1272 void 1273 sctp_ftsn_sets_fini(void) 1274 { 1275 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1276 } 1277 1278 1279 /* Free PR-SCTP sets */ 1280 void 1281 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1282 { 1283 sctp_ftsn_set_t *p; 1284 1285 while (s != NULL) { 1286 p = s->next; 1287 s->next = NULL; 1288 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1289 s = p; 1290 } 1291 } 1292 1293 /* 1294 * Given a message meta block, meta, this routine creates or modifies 1295 * the set that will be used to generate a Forward TSN chunk. If the 1296 * entry for stream id, sid, for this message already exists, the 1297 * sequence number, ssn, is updated if it is greater than the existing 1298 * one. If an entry for this sid does not exist, one is created if 1299 * the size does not exceed fp->sfa_pmss. We return false in case 1300 * or an error. 1301 */ 1302 boolean_t 1303 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1304 uint_t *nsets, uint32_t *slen) 1305 { 1306 sctp_ftsn_set_t *p; 1307 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1308 uint16_t sid = htons(msg_hdr->smh_sid); 1309 /* msg_hdr->smh_ssn is already in NBO */ 1310 uint16_t ssn = msg_hdr->smh_ssn; 1311 1312 ASSERT(s != NULL && nsets != NULL); 1313 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1314 1315 if (*s == NULL) { 1316 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1317 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1318 if (*s == NULL) 1319 return (B_FALSE); 1320 (*s)->ftsn_entries.ftsn_sid = sid; 1321 (*s)->ftsn_entries.ftsn_ssn = ssn; 1322 (*s)->next = NULL; 1323 *nsets = 1; 1324 *slen += sizeof (uint32_t); 1325 return (B_TRUE); 1326 } 1327 for (p = *s; p->next != NULL; p = p->next) { 1328 if (p->ftsn_entries.ftsn_sid == sid) { 1329 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1330 p->ftsn_entries.ftsn_ssn = ssn; 1331 return (B_TRUE); 1332 } 1333 } 1334 /* the last one */ 1335 if (p->ftsn_entries.ftsn_sid == sid) { 1336 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1337 p->ftsn_entries.ftsn_ssn = ssn; 1338 } else { 1339 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1340 return (B_FALSE); 1341 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1342 KM_NOSLEEP); 1343 if (p->next == NULL) 1344 return (B_FALSE); 1345 p = p->next; 1346 p->ftsn_entries.ftsn_sid = sid; 1347 p->ftsn_entries.ftsn_ssn = ssn; 1348 p->next = NULL; 1349 (*nsets)++; 1350 *slen += sizeof (uint32_t); 1351 } 1352 return (B_TRUE); 1353 } 1354 1355 /* 1356 * Given a set of stream id - sequence number pairs, this routing creates 1357 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1358 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1359 * will add the IP/SCTP header. 1360 */ 1361 mblk_t * 1362 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1363 uint_t nsets, uint32_t seglen) 1364 { 1365 mblk_t *ftsn_mp; 1366 sctp_chunk_hdr_t *ch_hdr; 1367 uint32_t *advtsn; 1368 uint16_t schlen; 1369 size_t xtralen; 1370 ftsn_entry_t *ftsn_entry; 1371 sctp_stack_t *sctps = sctp->sctp_sctps; 1372 1373 seglen += sizeof (sctp_chunk_hdr_t); 1374 if (fp->isv4) 1375 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra; 1376 else 1377 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra; 1378 ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp)); 1379 if (ftsn_mp == NULL) 1380 return (NULL); 1381 ftsn_mp->b_rptr += xtralen; 1382 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1383 1384 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1385 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1386 ch_hdr->sch_flags = 0; 1387 /* 1388 * The cast here should not be an issue since seglen is 1389 * the length of the Forward TSN chunk. 1390 */ 1391 schlen = (uint16_t)seglen; 1392 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1393 1394 advtsn = (uint32_t *)(ch_hdr + 1); 1395 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1396 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1397 while (nsets > 0) { 1398 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1399 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1400 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1401 ftsn_entry++; 1402 sets = sets->next; 1403 nsets--; 1404 } 1405 return (ftsn_mp); 1406 } 1407 1408 /* 1409 * Given a starting message, the routine steps through all the 1410 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1411 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1412 * chunk. All the messages, that have chunks that are included in the 1413 * ftsn sets, are flagged abandonded. If a message is partially sent 1414 * and is deemed abandoned, all remaining unsent chunks are marked 1415 * abandoned and are deducted from sctp_unsent. 1416 */ 1417 void 1418 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1419 sctp_faddr_t *fp, uint32_t *seglen) 1420 { 1421 mblk_t *mp1 = mp; 1422 mblk_t *mp_head = mp; 1423 mblk_t *meta_head = meta; 1424 mblk_t *head; 1425 sctp_ftsn_set_t *sets = NULL; 1426 uint_t nsets = 0; 1427 uint16_t clen; 1428 sctp_data_hdr_t *sdc; 1429 uint32_t sacklen; 1430 uint32_t adv_pap = sctp->sctp_adv_pap; 1431 uint32_t unsent = 0; 1432 boolean_t ubit; 1433 sctp_stack_t *sctps = sctp->sctp_sctps; 1434 1435 *seglen = sizeof (uint32_t); 1436 1437 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1438 while (meta != NULL && 1439 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1440 /* 1441 * Skip adding FTSN sets for un-ordered messages as they do 1442 * not have SSNs. 1443 */ 1444 ubit = SCTP_DATA_GET_UBIT(sdc); 1445 if (!ubit && 1446 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1447 meta = NULL; 1448 sctp->sctp_adv_pap = adv_pap; 1449 goto ftsn_done; 1450 } 1451 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1452 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1453 adv_pap = ntohl(sdc->sdh_tsn); 1454 mp1 = mp1->b_next; 1455 } 1456 meta = meta->b_next; 1457 if (meta != NULL) { 1458 mp1 = meta->b_cont; 1459 if (!SCTP_CHUNK_ISSENT(mp1)) 1460 break; 1461 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1462 } 1463 } 1464 ftsn_done: 1465 /* 1466 * Can't compare with sets == NULL, since we don't add any 1467 * sets for un-ordered messages. 1468 */ 1469 if (meta == meta_head) 1470 return; 1471 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1472 sctp_free_ftsn_set(sets); 1473 if (*nmp == NULL) 1474 return; 1475 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1476 sacklen = 0; 1477 } else { 1478 sacklen = sizeof (sctp_chunk_hdr_t) + 1479 sizeof (sctp_sack_chunk_t) + 1480 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1481 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1482 /* piggybacked SACK doesn't fit */ 1483 sacklen = 0; 1484 } else { 1485 fp = sctp->sctp_lastdata; 1486 } 1487 } 1488 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1489 if (head == NULL) { 1490 freemsg(*nmp); 1491 *nmp = NULL; 1492 SCTP_KSTAT(sctps, sctp_send_ftsn_failed); 1493 return; 1494 } 1495 *seglen += sacklen; 1496 *nmp = head; 1497 1498 /* 1499 * XXXNeed to optimise this, the reason it is done here is so 1500 * that we don't have to undo in case of failure. 1501 */ 1502 mp1 = mp_head; 1503 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1504 while (meta_head != NULL && 1505 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1506 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1507 SCTP_MSG_SET_ABANDONED(meta_head); 1508 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1509 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1510 if (!SCTP_CHUNK_ISACKED(mp1)) { 1511 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1512 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1513 meta_head); 1514 } 1515 mp1 = mp1->b_next; 1516 } 1517 while (mp1 != NULL) { 1518 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1519 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1520 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1521 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1522 SCTP_ABANDON_CHUNK(mp1); 1523 } 1524 mp1 = mp1->b_next; 1525 } 1526 meta_head = meta_head->b_next; 1527 if (meta_head != NULL) { 1528 mp1 = meta_head->b_cont; 1529 if (!SCTP_CHUNK_ISSENT(mp1)) 1530 break; 1531 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1532 } 1533 } 1534 if (unsent > 0) { 1535 ASSERT(sctp->sctp_unsent >= unsent); 1536 sctp->sctp_unsent -= unsent; 1537 /* 1538 * Update ULP the amount of queued data, which is 1539 * sent-unack'ed + unsent. 1540 */ 1541 if (!SCTP_IS_DETACHED(sctp)) { 1542 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 1543 sctp->sctp_unacked + sctp->sctp_unsent); 1544 } 1545 } 1546 } 1547 1548 /* 1549 * This function steps through messages starting at meta and checks if 1550 * the message is abandoned. It stops when it hits an unsent chunk or 1551 * a message that has all its chunk acked. This is the only place 1552 * where the sctp_adv_pap is moved forward to indicated abandoned 1553 * messages. 1554 */ 1555 void 1556 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1557 { 1558 uint32_t tsn = sctp->sctp_adv_pap; 1559 sctp_data_hdr_t *sdc; 1560 sctp_msg_hdr_t *msg_hdr; 1561 1562 ASSERT(mp != NULL); 1563 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1564 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1565 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1566 if (!SCTP_IS_MSG_ABANDONED(meta) && 1567 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1568 return; 1569 } 1570 while (meta != NULL) { 1571 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1572 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1573 tsn = ntohl(sdc->sdh_tsn); 1574 mp = mp->b_next; 1575 } 1576 if (mp != NULL) 1577 break; 1578 /* 1579 * We continue checking for successive messages only if there 1580 * is a chunk marked for retransmission. Else, we might 1581 * end up sending FTSN prematurely for chunks that have been 1582 * sent, but not yet acked. 1583 */ 1584 if ((meta = meta->b_next) != NULL) { 1585 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1586 if (!SCTP_IS_MSG_ABANDONED(meta) && 1587 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1588 break; 1589 } 1590 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1591 if (!SCTP_CHUNK_ISSENT(mp)) { 1592 sctp->sctp_adv_pap = tsn; 1593 return; 1594 } 1595 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1596 break; 1597 } 1598 if (mp == NULL) 1599 break; 1600 } 1601 } 1602 sctp->sctp_adv_pap = tsn; 1603 } 1604 1605 1606 /* 1607 * Determine if we should bundle a data chunk with the chunk being 1608 * retransmitted. We bundle if 1609 * 1610 * - the chunk is sent to the same destination and unack'ed. 1611 * 1612 * OR 1613 * 1614 * - the chunk is unsent, i.e. new data. 1615 */ 1616 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1617 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1618 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1619 !SCTP_CHUNK_ISACKED(mp))) || \ 1620 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1621 SCTP_CHUNK_FLAG_SENT))) 1622 1623 /* 1624 * Retransmit first segment which hasn't been acked with cumtsn or send 1625 * a Forward TSN chunk, if appropriate. 1626 */ 1627 void 1628 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1629 { 1630 mblk_t *mp; 1631 mblk_t *nmp = NULL; 1632 mblk_t *head; 1633 mblk_t *meta = sctp->sctp_xmit_head; 1634 mblk_t *fill; 1635 uint32_t seglen = 0; 1636 uint32_t sacklen; 1637 uint16_t chunklen; 1638 int extra; 1639 sctp_data_hdr_t *sdc; 1640 sctp_faddr_t *fp; 1641 uint32_t adv_pap = sctp->sctp_adv_pap; 1642 boolean_t do_ftsn = B_FALSE; 1643 boolean_t ftsn_check = B_TRUE; 1644 uint32_t first_ua_tsn; 1645 sctp_msg_hdr_t *mhdr; 1646 sctp_stack_t *sctps = sctp->sctp_sctps; 1647 1648 while (meta != NULL) { 1649 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1650 uint32_t tsn; 1651 1652 if (!SCTP_CHUNK_ISSENT(mp)) 1653 goto window_probe; 1654 /* 1655 * We break in the following cases - 1656 * 1657 * if the advanced peer ack point includes the next 1658 * chunk to be retransmited - possibly the Forward 1659 * TSN was lost. 1660 * 1661 * if we are PRSCTP aware and the next chunk to be 1662 * retransmitted is now abandoned 1663 * 1664 * if the next chunk to be retransmitted is for 1665 * the dest on which the timer went off. (this 1666 * message is not abandoned). 1667 * 1668 * We check for Forward TSN only for the first 1669 * eligible chunk to be retransmitted. The reason 1670 * being if the first eligible chunk is skipped (say 1671 * it was sent to a destination other than oldfp) 1672 * then we cannot advance the cum TSN via Forward 1673 * TSN chunk. 1674 * 1675 * Also, ftsn_check is B_TRUE only for the first 1676 * eligible chunk, it will be B_FALSE for all 1677 * subsequent candidate messages for retransmission. 1678 */ 1679 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1680 tsn = ntohl(sdc->sdh_tsn); 1681 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1682 if (sctp->sctp_prsctp_aware && ftsn_check) { 1683 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1684 ASSERT(sctp->sctp_prsctp_aware); 1685 do_ftsn = B_TRUE; 1686 goto out; 1687 } else { 1688 sctp_check_adv_ack_pt(sctp, 1689 meta, mp); 1690 if (SEQ_GT(sctp->sctp_adv_pap, 1691 adv_pap)) { 1692 do_ftsn = B_TRUE; 1693 goto out; 1694 } 1695 } 1696 ftsn_check = B_FALSE; 1697 } 1698 if (SCTP_CHUNK_DEST(mp) == oldfp) 1699 goto out; 1700 } 1701 } 1702 meta = meta->b_next; 1703 if (meta != NULL && sctp->sctp_prsctp_aware) { 1704 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1705 1706 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1707 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1708 meta = meta->b_next; 1709 } 1710 } 1711 } 1712 window_probe: 1713 /* 1714 * Retransmit fired for a destination which didn't have 1715 * any unacked data pending. 1716 */ 1717 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1718 /* 1719 * Send a window probe. Inflate frwnd to allow 1720 * sending one segment. 1721 */ 1722 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1723 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1724 1725 /* next TSN to send */ 1726 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1727 1728 /* 1729 * The above sctp_frwnd adjustment is coarse. The "changed" 1730 * sctp_frwnd may allow us to send more than 1 packet. So 1731 * tell sctp_output() to send only 1 packet. 1732 */ 1733 sctp_output(sctp, 1); 1734 1735 /* Last sent TSN */ 1736 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1737 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1738 sctp->sctp_zero_win_probe = B_TRUE; 1739 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1740 } 1741 return; 1742 out: 1743 /* 1744 * After a time out, assume that everything has left the network. So 1745 * we can clear rxt_unacked for the original peer address. 1746 */ 1747 oldfp->rxt_unacked = 0; 1748 1749 /* 1750 * If we were probing for zero window, don't adjust retransmission 1751 * variables, but the timer is still backed off. 1752 */ 1753 if (sctp->sctp_zero_win_probe) { 1754 mblk_t *pkt; 1755 uint_t pkt_len; 1756 1757 /* 1758 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1759 * and sctp_rxt_maxtsn will specify the ZWP packet. 1760 */ 1761 fp = oldfp; 1762 if (oldfp->state != SCTP_FADDRS_ALIVE) 1763 fp = sctp_rotate_faddr(sctp, oldfp); 1764 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1765 if (pkt != NULL) { 1766 ASSERT(pkt_len <= fp->sfa_pmss); 1767 sctp_set_iplen(sctp, pkt); 1768 sctp_add_sendq(sctp, pkt); 1769 } else { 1770 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 1771 } 1772 1773 /* 1774 * The strikes will be clear by sctp_faddr_alive() when the 1775 * other side sends us an ack. 1776 */ 1777 oldfp->strikes++; 1778 sctp->sctp_strikes++; 1779 1780 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1781 if (oldfp != fp && oldfp->suna != 0) 1782 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1783 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1784 BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); 1785 return; 1786 } 1787 1788 /* 1789 * Enter slowstart for this destination 1790 */ 1791 oldfp->ssthresh = oldfp->cwnd / 2; 1792 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1793 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1794 oldfp->cwnd = oldfp->sfa_pmss; 1795 oldfp->pba = 0; 1796 fp = sctp_rotate_faddr(sctp, oldfp); 1797 ASSERT(fp != NULL); 1798 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1799 1800 first_ua_tsn = ntohl(sdc->sdh_tsn); 1801 if (do_ftsn) { 1802 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1803 if (nmp == NULL) { 1804 sctp->sctp_adv_pap = adv_pap; 1805 goto restart_timer; 1806 } 1807 head = nmp; 1808 /* 1809 * Move to the next unabandoned chunk. XXXCheck if meta will 1810 * always be marked abandoned. 1811 */ 1812 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1813 meta = meta->b_next; 1814 if (meta != NULL) 1815 mp = mp->b_cont; 1816 else 1817 mp = NULL; 1818 goto try_bundle; 1819 } 1820 seglen = ntohs(sdc->sdh_len); 1821 chunklen = seglen - sizeof (*sdc); 1822 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1823 extra = SCTP_ALIGN - extra; 1824 1825 /* Find out if we need to piggyback SACK. */ 1826 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1827 sacklen = 0; 1828 } else { 1829 sacklen = sizeof (sctp_chunk_hdr_t) + 1830 sizeof (sctp_sack_chunk_t) + 1831 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1832 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1833 /* piggybacked SACK doesn't fit */ 1834 sacklen = 0; 1835 } else { 1836 /* 1837 * OK, we have room to send SACK back. But we 1838 * should send it back to the last fp where we 1839 * receive data from, unless sctp_lastdata equals 1840 * oldfp, then we should probably not send it 1841 * back to that fp. Also we should check that 1842 * the fp is alive. 1843 */ 1844 if (sctp->sctp_lastdata != oldfp && 1845 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1846 fp = sctp->sctp_lastdata; 1847 } 1848 } 1849 } 1850 1851 /* 1852 * Cancel RTT measurement if the retransmitted TSN is before the 1853 * TSN used for timimg. 1854 */ 1855 if (sctp->sctp_out_time != 0 && 1856 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1857 sctp->sctp_out_time = 0; 1858 } 1859 /* Clear the counter as the RTT calculation may be off. */ 1860 fp->rtt_updates = 0; 1861 oldfp->rtt_updates = 0; 1862 1863 /* 1864 * After a timeout, we should change the current faddr so that 1865 * new chunks will be sent to the alternate address. 1866 */ 1867 sctp_set_faddr_current(sctp, fp); 1868 1869 nmp = dupmsg(mp); 1870 if (nmp == NULL) 1871 goto restart_timer; 1872 if (extra > 0) { 1873 fill = sctp_get_padding(extra, sctps); 1874 if (fill != NULL) { 1875 linkb(nmp, fill); 1876 seglen += extra; 1877 } else { 1878 freemsg(nmp); 1879 goto restart_timer; 1880 } 1881 } 1882 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1883 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1884 if (head == NULL) { 1885 freemsg(nmp); 1886 SCTP_KSTAT(sctps, sctp_rexmit_failed); 1887 goto restart_timer; 1888 } 1889 seglen += sacklen; 1890 1891 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1892 1893 mp = mp->b_next; 1894 1895 try_bundle: 1896 /* We can at least and at most send 1 packet at timeout. */ 1897 while (seglen < fp->sfa_pmss) { 1898 int32_t new_len; 1899 1900 /* Go through the list to find more chunks to be bundled. */ 1901 while (mp != NULL) { 1902 /* Check if the chunk can be bundled. */ 1903 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1904 break; 1905 mp = mp->b_next; 1906 } 1907 /* Go to the next message. */ 1908 if (mp == NULL) { 1909 for (meta = meta->b_next; meta != NULL; 1910 meta = meta->b_next) { 1911 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1912 1913 if (SCTP_IS_MSG_ABANDONED(meta) || 1914 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1915 sctp)) { 1916 continue; 1917 } 1918 1919 mp = meta->b_cont; 1920 goto try_bundle; 1921 } 1922 /* No more chunk to be bundled. */ 1923 break; 1924 } 1925 1926 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1927 new_len = ntohs(sdc->sdh_len); 1928 chunklen = new_len - sizeof (*sdc); 1929 1930 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1931 extra = SCTP_ALIGN - extra; 1932 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1933 break; 1934 if ((nmp = dupmsg(mp)) == NULL) 1935 break; 1936 1937 if (extra > 0) { 1938 fill = sctp_get_padding(extra, sctps); 1939 if (fill != NULL) { 1940 linkb(nmp, fill); 1941 } else { 1942 freemsg(nmp); 1943 break; 1944 } 1945 } 1946 linkb(head, nmp); 1947 1948 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1949 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1950 1951 seglen = new_len; 1952 mp = mp->b_next; 1953 } 1954 done_bundle: 1955 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1956 ipha_t *iph = (ipha_t *)head->b_rptr; 1957 1958 /* 1959 * Path MTU is different from path we thought it would 1960 * be when we created chunks, or IP headers have grown. 1961 * Need to clear the DF bit. 1962 */ 1963 iph->ipha_fragment_offset_and_flags = 0; 1964 } 1965 fp->rxt_unacked += seglen; 1966 1967 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 1968 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 1969 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 1970 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 1971 1972 sctp->sctp_rexmitting = B_TRUE; 1973 sctp->sctp_rxt_nxttsn = first_ua_tsn; 1974 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1975 sctp_set_iplen(sctp, head); 1976 sctp_add_sendq(sctp, head); 1977 1978 /* 1979 * Restart the oldfp timer with exponential backoff and 1980 * the new fp timer for the retransmitted chunks. 1981 */ 1982 restart_timer: 1983 oldfp->strikes++; 1984 sctp->sctp_strikes++; 1985 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1986 if (oldfp->suna != 0) 1987 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 1988 sctp->sctp_active = lbolt64; 1989 1990 /* 1991 * Should we restart the timer of the new fp? If there is 1992 * outstanding data to the new fp, the timer should be 1993 * running already. So restarting it means that the timer 1994 * will fire later for those outstanding data. But if 1995 * we don't restart it, the timer will fire too early for the 1996 * just retransmitted chunks to the new fp. The reason is that we 1997 * don't keep a timestamp on when a chunk is retransmitted. 1998 * So when the timer fires, it will just search for the 1999 * chunk with the earliest TSN sent to new fp. This probably 2000 * is the chunk we just retransmitted. So for now, let's 2001 * be conservative and restart the timer of the new fp. 2002 */ 2003 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2004 } 2005 2006 /* 2007 * The SCTP write put procedure called from IP. 2008 */ 2009 void 2010 sctp_wput(queue_t *q, mblk_t *mp) 2011 { 2012 uchar_t *rptr; 2013 t_scalar_t type; 2014 2015 switch (mp->b_datap->db_type) { 2016 case M_IOCTL: 2017 sctp_wput_ioctl(q, mp); 2018 break; 2019 case M_DATA: 2020 /* Should be handled in sctp_output() */ 2021 ASSERT(0); 2022 freemsg(mp); 2023 break; 2024 case M_PROTO: 2025 case M_PCPROTO: 2026 rptr = mp->b_rptr; 2027 if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) { 2028 type = ((union T_primitives *)rptr)->type; 2029 /* 2030 * There is no "standard" way on how to respond 2031 * to T_CAPABILITY_REQ if a module does not 2032 * understand it. And the current TI mod 2033 * has problems handling an error ack. So we 2034 * catch the request here and reply with a response 2035 * which the TI mod knows how to respond to. 2036 */ 2037 switch (type) { 2038 case T_CAPABILITY_REQ: 2039 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 2040 break; 2041 default: 2042 if ((mp = mi_tpi_err_ack_alloc(mp, 2043 TNOTSUPPORT, 0)) != NULL) { 2044 qreply(q, mp); 2045 return; 2046 } 2047 } 2048 } 2049 /* FALLTHRU */ 2050 default: 2051 freemsg(mp); 2052 return; 2053 } 2054 } 2055 2056 /* 2057 * This function is called by sctp_ss_rexmit() to create a packet 2058 * to be retransmitted to the given fp. The given meta and mp 2059 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2060 * first chunk to be retransmitted. This is also called when we want 2061 * to retransmit a zero window probe from sctp_rexmit() or when we 2062 * want to retransmit the zero window probe after the window has 2063 * opened from sctp_got_sack(). 2064 */ 2065 mblk_t * 2066 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2067 uint_t *packet_len) 2068 { 2069 uint32_t seglen = 0; 2070 uint16_t chunklen; 2071 int extra; 2072 mblk_t *nmp; 2073 mblk_t *head; 2074 mblk_t *fill; 2075 sctp_data_hdr_t *sdc; 2076 sctp_msg_hdr_t *mhdr; 2077 sctp_stack_t *sctps = sctp->sctp_sctps; 2078 2079 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2080 seglen = ntohs(sdc->sdh_len); 2081 chunklen = seglen - sizeof (*sdc); 2082 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2083 extra = SCTP_ALIGN - extra; 2084 2085 nmp = dupmsg(*mp); 2086 if (nmp == NULL) 2087 return (NULL); 2088 if (extra > 0) { 2089 fill = sctp_get_padding(extra, sctps); 2090 if (fill != NULL) { 2091 linkb(nmp, fill); 2092 seglen += extra; 2093 } else { 2094 freemsg(nmp); 2095 return (NULL); 2096 } 2097 } 2098 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2099 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2100 if (head == NULL) { 2101 freemsg(nmp); 2102 return (NULL); 2103 } 2104 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2105 /* 2106 * Don't update the TSN if we are doing a Zero Win Probe. 2107 */ 2108 if (!sctp->sctp_zero_win_probe) 2109 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2110 *mp = (*mp)->b_next; 2111 2112 try_bundle: 2113 while (seglen < fp->sfa_pmss) { 2114 int32_t new_len; 2115 2116 /* 2117 * Go through the list to find more chunks to be bundled. 2118 * We should only retransmit sent by unack'ed chunks. Since 2119 * they were sent before, the peer's receive window should 2120 * be able to receive them. 2121 */ 2122 while (*mp != NULL) { 2123 /* Check if the chunk can be bundled. */ 2124 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2125 break; 2126 *mp = (*mp)->b_next; 2127 } 2128 /* Go to the next message. */ 2129 if (*mp == NULL) { 2130 for (*meta = (*meta)->b_next; *meta != NULL; 2131 *meta = (*meta)->b_next) { 2132 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2133 2134 if (SCTP_IS_MSG_ABANDONED(*meta) || 2135 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2136 sctp)) { 2137 continue; 2138 } 2139 2140 *mp = (*meta)->b_cont; 2141 goto try_bundle; 2142 } 2143 /* No more chunk to be bundled. */ 2144 break; 2145 } 2146 2147 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2148 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2149 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2150 break; 2151 new_len = ntohs(sdc->sdh_len); 2152 chunklen = new_len - sizeof (*sdc); 2153 2154 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2155 extra = SCTP_ALIGN - extra; 2156 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2157 break; 2158 if ((nmp = dupmsg(*mp)) == NULL) 2159 break; 2160 2161 if (extra > 0) { 2162 fill = sctp_get_padding(extra, sctps); 2163 if (fill != NULL) { 2164 linkb(nmp, fill); 2165 } else { 2166 freemsg(nmp); 2167 break; 2168 } 2169 } 2170 linkb(head, nmp); 2171 2172 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2173 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2174 /* 2175 * Don't update the TSN if we are doing a Zero Win Probe. 2176 */ 2177 if (!sctp->sctp_zero_win_probe) 2178 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2179 2180 seglen = new_len; 2181 *mp = (*mp)->b_next; 2182 } 2183 *packet_len = seglen; 2184 fp->rxt_unacked += seglen; 2185 return (head); 2186 } 2187 2188 /* 2189 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2190 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2191 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2192 * SACK. We retransmit unacked chunks without having to wait for another 2193 * timeout. The rationale is that the SACK should not be "partial" if all the 2194 * lost chunks have been retransmitted. Since the SACK is "partial," 2195 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2196 * be missing. It is better for us to retransmit them now instead 2197 * of waiting for a timeout. 2198 */ 2199 void 2200 sctp_ss_rexmit(sctp_t *sctp) 2201 { 2202 mblk_t *meta; 2203 mblk_t *mp; 2204 mblk_t *pkt; 2205 sctp_faddr_t *fp; 2206 uint_t pkt_len; 2207 uint32_t tot_wnd; 2208 sctp_data_hdr_t *sdc; 2209 int burst; 2210 sctp_stack_t *sctps = sctp->sctp_sctps; 2211 2212 ASSERT(!sctp->sctp_zero_win_probe); 2213 2214 /* 2215 * If the last cum ack is smaller than what we have just 2216 * retransmitted, simply return. 2217 */ 2218 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2219 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2220 else 2221 return; 2222 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2223 2224 /* 2225 * After a timer fires, sctp_current should be set to the new 2226 * fp where the retransmitted chunks are sent. 2227 */ 2228 fp = sctp->sctp_current; 2229 2230 /* 2231 * Since we are retransmitting, we only need to use cwnd to determine 2232 * how much we can send as we were allowed (by peer's receive window) 2233 * to send those retransmitted chunks previously when they are first 2234 * sent. If we record how much we have retransmitted but 2235 * unacknowledged using rxt_unacked, then the amount we can now send 2236 * is equal to cwnd minus rxt_unacked. 2237 * 2238 * The field rxt_unacked is incremented when we retransmit a packet 2239 * and decremented when we got a SACK acknowledging something. And 2240 * it is reset when the retransmission timer fires as we assume that 2241 * all packets have left the network after a timeout. If this 2242 * assumption is not true, it means that after a timeout, we can 2243 * get a SACK acknowledging more than rxt_unacked (its value only 2244 * contains what is retransmitted when the timer fires). So 2245 * rxt_unacked will become very big (it is an unsiged int so going 2246 * negative means that the value is huge). This is the reason we 2247 * always send at least 1 MSS bytes. 2248 * 2249 * The reason why we do not have an accurate count is that we 2250 * only know how many packets are outstanding (using the TSN numbers). 2251 * But we do not know how many bytes those packets contain. To 2252 * have an accurate count, we need to walk through the send list. 2253 * As it is not really important to have an accurate count during 2254 * retransmission, we skip this walk to save some time. This should 2255 * not make the retransmission too aggressive to cause congestion. 2256 */ 2257 if (fp->cwnd <= fp->rxt_unacked) 2258 tot_wnd = fp->sfa_pmss; 2259 else 2260 tot_wnd = fp->cwnd - fp->rxt_unacked; 2261 2262 /* Find the first unack'ed chunk */ 2263 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2264 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2265 2266 if (SCTP_IS_MSG_ABANDONED(meta) || 2267 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2268 continue; 2269 } 2270 2271 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2272 /* Again, this may not be possible */ 2273 if (!SCTP_CHUNK_ISSENT(mp)) 2274 return; 2275 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2276 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2277 goto found_msg; 2278 } 2279 } 2280 2281 /* Everything is abandoned... */ 2282 return; 2283 2284 found_msg: 2285 if (!fp->timer_running) 2286 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2287 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2288 if (pkt == NULL) { 2289 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed); 2290 return; 2291 } 2292 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2293 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2294 2295 /* 2296 * Path MTU is different from path we thought it would 2297 * be when we created chunks, or IP headers have grown. 2298 * Need to clear the DF bit. 2299 */ 2300 iph->ipha_fragment_offset_and_flags = 0; 2301 } 2302 sctp_set_iplen(sctp, pkt); 2303 sctp_add_sendq(sctp, pkt); 2304 2305 /* Check and see if there is more chunk to be retransmitted. */ 2306 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2307 meta == NULL) 2308 return; 2309 if (mp == NULL) 2310 meta = meta->b_next; 2311 if (meta == NULL) 2312 return; 2313 2314 /* Retransmit another packet if the window allows. */ 2315 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1; 2316 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2317 if (mp == NULL) 2318 mp = meta->b_cont; 2319 for (; mp != NULL; mp = mp->b_next) { 2320 /* Again, this may not be possible */ 2321 if (!SCTP_CHUNK_ISSENT(mp)) 2322 return; 2323 if (!SCTP_CHUNK_ISACKED(mp)) 2324 goto found_msg; 2325 } 2326 } 2327 } 2328