1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #define _SUN_TPI_VERSION 2 34 #include <sys/tihdr.h> 35 #include <sys/socket.h> 36 #include <sys/stropts.h> 37 #include <sys/strsun.h> 38 #include <sys/strsubr.h> 39 #include <sys/socketvar.h> 40 /* swilly code in sys/socketvar.h turns off DEBUG */ 41 #ifdef __lint 42 #define DEBUG 43 #endif 44 45 #include <inet/common.h> 46 #include <inet/mi.h> 47 #include <inet/ip.h> 48 #include <inet/ip6.h> 49 #include <inet/sctp_ip.h> 50 #include <inet/ipclassifier.h> 51 52 /* 53 * PR-SCTP comments. 54 * 55 * A message can expire before it gets to the transmit list (i.e. it is still 56 * in the unsent list - unchunked), after it gets to the transmit list, but 57 * before transmission has actually started, or after transmission has begun. 58 * Accordingly, we check for the status of a message in sctp_chunkify() when 59 * the message is being transferred from the unsent list to the transmit list; 60 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit 61 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted. 62 * When we nuke a message in sctp_chunkify(), all we need to do is take it 63 * out of the unsent list and update sctp_unsent; when a message is deemed 64 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit 65 * list, update sctp_unsent IFF transmission for the message has not yet begun 66 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the 67 * message has started, then we cannot just take it out of the list, we need 68 * to send Forward TSN chunk to the peer so that the peer can clear its 69 * fragment list for this message. However, we cannot just send the Forward 70 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for 71 * messages preceeding this abandoned message. So, we send a Forward TSN 72 * IFF all messages prior to this abandoned message has been SACKd, if not 73 * we defer sending the Forward TSN to sctp_cumack(), which will check for 74 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In 75 * sctp_rexmit() when we check for retransmissions, we need to determine if 76 * the advanced peer ack point can be moved ahead, and if so, send a Forward 77 * TSN to the peer instead of retransmitting the chunk. Note that when 78 * we send a Forward TSN for a message, there may be yet unsent chunks for 79 * this message; we need to mark all such chunks as abandoned, so that 80 * sctp_cumack() can take the message out of the transmit list, additionally 81 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e. 82 * decremented when a message/chunk is deemed abandoned), sockfs needs to 83 * be notified so that it can adjust its idea of the queued message. 84 */ 85 86 #include "sctp_impl.h" 87 88 static struct kmem_cache *sctp_kmem_ftsn_set_cache; 89 90 /* Padding mblk for SCTP chunks. */ 91 mblk_t *sctp_pad_mp; 92 93 #ifdef DEBUG 94 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *); 95 #endif 96 97 /* 98 * Called to allocate a header mblk when sending data to SCTP. 99 * Data will follow in b_cont of this mblk. 100 */ 101 mblk_t * 102 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen, 103 int flags) 104 { 105 mblk_t *mp; 106 struct T_unitdata_req *tudr; 107 size_t size; 108 int error; 109 110 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen; 111 size = MAX(size, sizeof (sctp_msg_hdr_t)); 112 if (flags & SCTP_CAN_BLOCK) { 113 mp = allocb_wait(size, BPRI_MED, 0, &error); 114 } else { 115 mp = allocb(size, BPRI_MED); 116 } 117 if (mp) { 118 tudr = (struct T_unitdata_req *)mp->b_rptr; 119 tudr->PRIM_type = T_UNITDATA_REQ; 120 tudr->DEST_length = nlen; 121 tudr->DEST_offset = sizeof (*tudr); 122 tudr->OPT_length = clen; 123 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) + 124 _TPI_ALIGN_TOPT(nlen)); 125 if (nlen > 0) 126 bcopy(name, tudr + 1, nlen); 127 if (clen > 0) 128 bcopy(control, (char *)tudr + tudr->OPT_offset, clen); 129 mp->b_wptr += (tudr ->OPT_offset + clen); 130 mp->b_datap->db_type = M_PROTO; 131 } 132 return (mp); 133 } 134 135 /*ARGSUSED2*/ 136 int 137 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) 138 { 139 sctp_faddr_t *fp = NULL; 140 struct T_unitdata_req *tudr; 141 int error = 0; 142 mblk_t *mproto = mp; 143 in6_addr_t *addr; 144 in6_addr_t tmpaddr; 145 uint16_t sid = sctp->sctp_def_stream; 146 uint32_t ppid = sctp->sctp_def_ppid; 147 uint32_t context = sctp->sctp_def_context; 148 uint16_t msg_flags = sctp->sctp_def_flags; 149 sctp_msg_hdr_t *sctp_msg_hdr; 150 uint32_t msg_len = 0; 151 uint32_t timetolive = sctp->sctp_def_timetolive; 152 153 ASSERT(DB_TYPE(mproto) == M_PROTO); 154 155 mp = mp->b_cont; 156 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA); 157 158 tudr = (struct T_unitdata_req *)mproto->b_rptr; 159 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 160 161 /* Get destination address, if specified */ 162 if (tudr->DEST_length > 0) { 163 sin_t *sin; 164 sin6_t *sin6; 165 166 sin = (struct sockaddr_in *) 167 (mproto->b_rptr + tudr->DEST_offset); 168 switch (sin->sin_family) { 169 case AF_INET: 170 if (tudr->DEST_length < sizeof (*sin)) { 171 return (EINVAL); 172 } 173 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr); 174 addr = &tmpaddr; 175 break; 176 case AF_INET6: 177 if (tudr->DEST_length < sizeof (*sin6)) { 178 return (EINVAL); 179 } 180 sin6 = (struct sockaddr_in6 *) 181 (mproto->b_rptr + tudr->DEST_offset); 182 addr = &sin6->sin6_addr; 183 break; 184 default: 185 return (EAFNOSUPPORT); 186 } 187 fp = sctp_lookup_faddr(sctp, addr); 188 if (fp == NULL) { 189 return (EINVAL); 190 } 191 } 192 /* Ancillary Data? */ 193 if (tudr->OPT_length > 0) { 194 struct cmsghdr *cmsg; 195 char *cend; 196 struct sctp_sndrcvinfo *sndrcv; 197 198 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset); 199 cend = ((char *)cmsg + tudr->OPT_length); 200 ASSERT(cend <= (char *)mproto->b_wptr); 201 202 for (;;) { 203 if ((char *)(cmsg + 1) > cend || 204 ((char *)cmsg + cmsg->cmsg_len) > cend) { 205 break; 206 } 207 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 208 (cmsg->cmsg_type == SCTP_SNDRCV)) { 209 if (cmsg->cmsg_len < 210 (sizeof (*sndrcv) + sizeof (*cmsg))) { 211 return (EINVAL); 212 } 213 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1); 214 sid = sndrcv->sinfo_stream; 215 msg_flags = sndrcv->sinfo_flags; 216 ppid = sndrcv->sinfo_ppid; 217 context = sndrcv->sinfo_context; 218 timetolive = sndrcv->sinfo_timetolive; 219 break; 220 } 221 if (cmsg->cmsg_len > 0) 222 cmsg = CMSG_NEXT(cmsg); 223 else 224 break; 225 } 226 } 227 if (msg_flags & MSG_ABORT) { 228 if (mp && mp->b_cont) { 229 mblk_t *pump = msgpullup(mp, -1); 230 if (!pump) { 231 return (ENOMEM); 232 } 233 freemsg(mp); 234 mp = pump; 235 mproto->b_cont = mp; 236 } 237 RUN_SCTP(sctp); 238 sctp_user_abort(sctp, mp, B_TRUE); 239 sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); 240 sctp_clean_death(sctp, ECONNRESET); 241 freemsg(mproto); 242 goto process_sendq; 243 } 244 if (mp == NULL) 245 goto done; 246 247 RUN_SCTP(sctp); 248 249 /* Reject any new data requests if we are shutting down */ 250 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 251 error = EPIPE; 252 goto unlock_done; 253 } 254 255 /* Re-use the mproto to store relevant info. */ 256 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr)); 257 258 mproto->b_rptr = mproto->b_datap->db_base; 259 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr); 260 261 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr; 262 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr)); 263 sctp_msg_hdr->smh_context = context; 264 sctp_msg_hdr->smh_sid = sid; 265 sctp_msg_hdr->smh_ppid = ppid; 266 sctp_msg_hdr->smh_flags = msg_flags; 267 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive); 268 sctp_msg_hdr->smh_tob = lbolt64; 269 for (; mp != NULL; mp = mp->b_cont) 270 msg_len += MBLKL(mp); 271 sctp_msg_hdr->smh_msglen = msg_len; 272 273 /* User requested specific destination */ 274 SCTP_SET_CHUNK_DEST(mproto, fp); 275 276 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED && 277 sid >= sctp->sctp_num_ostr) { 278 /* Send sendfail event */ 279 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID, 280 B_FALSE); 281 error = EINVAL; 282 goto unlock_done; 283 } 284 285 /* no data */ 286 if (msg_len == 0) { 287 sctp_sendfail_event(sctp, dupmsg(mproto), 288 SCTP_ERR_NO_USR_DATA, B_FALSE); 289 error = EINVAL; 290 goto unlock_done; 291 } 292 293 /* Add it to the unsent list */ 294 if (sctp->sctp_xmit_unsent == NULL) { 295 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto; 296 } else { 297 sctp->sctp_xmit_unsent_tail->b_next = mproto; 298 sctp->sctp_xmit_unsent_tail = mproto; 299 } 300 sctp->sctp_unsent += msg_len; 301 BUMP_LOCAL(sctp->sctp_msgcount); 302 if (sctp->sctp_state == SCTPS_ESTABLISHED) 303 sctp_output(sctp); 304 process_sendq: 305 WAKE_SCTP(sctp); 306 sctp_process_sendq(sctp); 307 return (0); 308 unlock_done: 309 WAKE_SCTP(sctp); 310 done: 311 return (error); 312 } 313 314 void 315 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send) 316 { 317 mblk_t *mp; 318 mblk_t *chunk_mp; 319 mblk_t *chunk_head; 320 mblk_t *chunk_hdr; 321 mblk_t *chunk_tail = NULL; 322 int count; 323 int chunksize; 324 sctp_data_hdr_t *sdc; 325 mblk_t *mdblk = sctp->sctp_xmit_unsent; 326 sctp_faddr_t *fp; 327 sctp_faddr_t *fp1; 328 size_t xtralen; 329 sctp_msg_hdr_t *msg_hdr; 330 331 fp = SCTP_CHUNK_DEST(mdblk); 332 if (fp == NULL) 333 fp = sctp->sctp_current; 334 if (fp->isv4) 335 xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra + sizeof (*sdc); 336 else 337 xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra + sizeof (*sdc); 338 count = chunksize = first_len - sizeof (*sdc); 339 nextmsg: 340 chunk_mp = mdblk->b_cont; 341 342 /* 343 * If this partially chunked, we ignore the first_len for now 344 * and use the one already present. For the unchunked bits, we 345 * use the length of the last chunk. 346 */ 347 if (SCTP_IS_MSG_CHUNKED(mdblk)) { 348 int chunk_len; 349 350 ASSERT(chunk_mp->b_next != NULL); 351 mdblk->b_cont = chunk_mp->b_next; 352 chunk_mp->b_next = NULL; 353 SCTP_MSG_CLEAR_CHUNKED(mdblk); 354 mp = mdblk->b_cont; 355 while (mp->b_next != NULL) 356 mp = mp->b_next; 357 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len); 358 if (fp->sfa_pmss - chunk_len > sizeof (*sdc)) 359 count = chunksize = fp->sfa_pmss - chunk_len; 360 else 361 count = chunksize = fp->sfa_pmss; 362 count = chunksize = count - sizeof (*sdc); 363 } else { 364 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 365 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) { 366 sctp->sctp_xmit_unsent = mdblk->b_next; 367 if (sctp->sctp_xmit_unsent == NULL) 368 sctp->sctp_xmit_unsent_tail = NULL; 369 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 370 sctp->sctp_unsent -= msg_hdr->smh_msglen; 371 mdblk->b_next = NULL; 372 BUMP_LOCAL(sctp->sctp_prsctpdrop); 373 /* 374 * Update ULP the amount of queued data, which is 375 * sent-unack'ed + unsent. 376 */ 377 if (!SCTP_IS_DETACHED(sctp)) { 378 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 379 sctp->sctp_unacked + sctp->sctp_unsent); 380 } 381 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE); 382 goto try_next; 383 } 384 mdblk->b_cont = NULL; 385 } 386 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr; 387 nextchunk: 388 chunk_head = chunk_mp; 389 chunk_tail = NULL; 390 391 /* Skip as many mblk's as we need */ 392 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) { 393 count -= MBLKL(chunk_mp); 394 chunk_tail = chunk_mp; 395 chunk_mp = chunk_mp->b_cont; 396 } 397 /* Split the chain, if needed */ 398 if (chunk_mp != NULL) { 399 if (count > 0) { 400 mblk_t *split_mp = dupb(chunk_mp); 401 402 if (split_mp == NULL) { 403 if (mdblk->b_cont == NULL) { 404 mdblk->b_cont = chunk_head; 405 } else { 406 SCTP_MSG_SET_CHUNKED(mdblk); 407 ASSERT(chunk_head->b_next == NULL); 408 chunk_head->b_next = mdblk->b_cont; 409 mdblk->b_cont = chunk_head; 410 } 411 return; 412 } 413 if (chunk_tail != NULL) { 414 chunk_tail->b_cont = split_mp; 415 chunk_tail = chunk_tail->b_cont; 416 } else { 417 chunk_head = chunk_tail = split_mp; 418 } 419 chunk_tail->b_wptr = chunk_tail->b_rptr + count; 420 chunk_mp->b_rptr = chunk_tail->b_wptr; 421 count = 0; 422 } else if (chunk_tail == NULL) { 423 goto next; 424 } else { 425 chunk_tail->b_cont = NULL; 426 } 427 } 428 /* Alloc chunk hdr, if needed */ 429 if (DB_REF(chunk_head) > 1 || 430 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) || 431 MBLKHEAD(chunk_head) < sizeof (*sdc)) { 432 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) { 433 if (mdblk->b_cont == NULL) { 434 if (chunk_mp != NULL) 435 linkb(chunk_head, chunk_mp); 436 mdblk->b_cont = chunk_head; 437 } else { 438 SCTP_MSG_SET_CHUNKED(mdblk); 439 if (chunk_mp != NULL) 440 linkb(chunk_head, chunk_mp); 441 ASSERT(chunk_head->b_next == NULL); 442 chunk_head->b_next = mdblk->b_cont; 443 mdblk->b_cont = chunk_head; 444 } 445 return; 446 } 447 chunk_hdr->b_rptr += xtralen - sizeof (*sdc); 448 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc); 449 chunk_hdr->b_cont = chunk_head; 450 } else { 451 chunk_hdr = chunk_head; 452 chunk_hdr->b_rptr -= sizeof (*sdc); 453 } 454 ASSERT(chunk_hdr->b_datap->db_ref == 1); 455 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr; 456 sdc->sdh_id = CHUNK_DATA; 457 sdc->sdh_flags = 0; 458 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count); 459 ASSERT(sdc->sdh_len); 460 sdc->sdh_sid = htons(msg_hdr->smh_sid); 461 /* 462 * We defer assigning the SSN just before sending the chunk, else 463 * if we drop the chunk in sctp_get_msg_to_send(), we would need 464 * to send a Forward TSN to let the peer know. Some more comments 465 * about this in sctp_impl.h for SCTP_CHUNK_SENT. 466 */ 467 sdc->sdh_payload_id = msg_hdr->smh_ppid; 468 469 if (mdblk->b_cont == NULL) { 470 mdblk->b_cont = chunk_hdr; 471 SCTP_DATA_SET_BBIT(sdc); 472 } else { 473 mp = mdblk->b_cont; 474 while (mp->b_next != NULL) 475 mp = mp->b_next; 476 mp->b_next = chunk_hdr; 477 } 478 479 bytes_to_send -= (chunksize - count); 480 if (chunk_mp != NULL) { 481 next: 482 count = chunksize = fp->sfa_pmss - sizeof (*sdc); 483 goto nextchunk; 484 } 485 SCTP_DATA_SET_EBIT(sdc); 486 sctp->sctp_xmit_unsent = mdblk->b_next; 487 if (mdblk->b_next == NULL) { 488 sctp->sctp_xmit_unsent_tail = NULL; 489 } 490 mdblk->b_next = NULL; 491 492 if (sctp->sctp_xmit_tail == NULL) { 493 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk; 494 } else { 495 mp = sctp->sctp_xmit_tail; 496 while (mp->b_next != NULL) 497 mp = mp->b_next; 498 mp->b_next = mdblk; 499 mdblk->b_prev = mp; 500 } 501 try_next: 502 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) { 503 mdblk = sctp->sctp_xmit_unsent; 504 fp1 = SCTP_CHUNK_DEST(mdblk); 505 if (fp1 == NULL) 506 fp1 = sctp->sctp_current; 507 if (fp == fp1) { 508 size_t len = MBLKL(mdblk->b_cont); 509 if ((count > 0) && 510 ((len > fp->sfa_pmss - sizeof (*sdc)) || 511 (len <= count))) { 512 count -= sizeof (*sdc); 513 count = chunksize = count - (count & 0x3); 514 } else { 515 count = chunksize = fp->sfa_pmss - 516 sizeof (*sdc); 517 } 518 } else { 519 if (fp1->isv4) 520 xtralen = sctp->sctp_hdr_len; 521 else 522 xtralen = sctp->sctp_hdr6_len; 523 xtralen += sctp_wroff_xtra + sizeof (*sdc); 524 count = chunksize = fp1->sfa_pmss - sizeof (*sdc); 525 fp = fp1; 526 } 527 goto nextmsg; 528 } 529 } 530 531 void 532 sctp_free_msg(mblk_t *ump) 533 { 534 mblk_t *mp, *nmp; 535 536 for (mp = ump->b_cont; mp; mp = nmp) { 537 nmp = mp->b_next; 538 mp->b_next = mp->b_prev = NULL; 539 freemsg(mp); 540 } 541 ASSERT(!ump->b_prev); 542 ump->b_next = NULL; 543 freeb(ump); 544 } 545 546 mblk_t * 547 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen, 548 int *error) 549 { 550 int hdrlen; 551 char *hdr; 552 int isv4 = fp->isv4; 553 554 if (error != NULL) 555 *error = 0; 556 557 if (isv4) { 558 hdrlen = sctp->sctp_hdr_len; 559 hdr = sctp->sctp_iphc; 560 } else { 561 hdrlen = sctp->sctp_hdr6_len; 562 hdr = sctp->sctp_iphc6; 563 } 564 /* 565 * A null fp->ire could mean that the address is 'down'. Similarly, 566 * it is possible that the address went down, we tried to send an 567 * heartbeat and ended up setting fp->saddr as unspec because we 568 * didn't have any usable source address. In either case 569 * sctp_get_ire() will try find an IRE, if available, and set 570 * the source address, if needed. If we still don't have any 571 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and 572 * we return EHOSTUNREACH. 573 */ 574 if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) { 575 sctp_get_ire(sctp, fp); 576 if (fp->state == SCTP_FADDRS_UNREACH) { 577 if (error != NULL) 578 *error = EHOSTUNREACH; 579 return (NULL); 580 } 581 } 582 /* Copy in IP header. */ 583 if ((mp->b_rptr - mp->b_datap->db_base) < 584 (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 || 585 !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) { 586 mblk_t *nmp; 587 588 /* 589 * This can happen if IP headers are adjusted after 590 * data was moved into chunks, or during retransmission, 591 * or things like snoop is running. 592 */ 593 nmp = allocb_cred(sctp_wroff_xtra + hdrlen + sacklen, 594 CONN_CRED(sctp->sctp_connp)); 595 if (nmp == NULL) { 596 if (error != NULL) 597 *error = ENOMEM; 598 return (NULL); 599 } 600 nmp->b_rptr += sctp_wroff_xtra; 601 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen; 602 nmp->b_cont = mp; 603 mp = nmp; 604 } else { 605 mp->b_rptr -= (hdrlen + sacklen); 606 mblk_setcred(mp, CONN_CRED(sctp->sctp_connp)); 607 } 608 bcopy(hdr, mp->b_rptr, hdrlen); 609 if (sacklen) { 610 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen); 611 } 612 if (fp != sctp->sctp_current) { 613 /* change addresses in header */ 614 if (isv4) { 615 ipha_t *iph = (ipha_t *)mp->b_rptr; 616 617 IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst); 618 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) { 619 IN6_V4MAPPED_TO_IPADDR(&fp->saddr, 620 iph->ipha_src); 621 } else if (sctp->sctp_bound_to_all) { 622 iph->ipha_src = INADDR_ANY; 623 } 624 } else { 625 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr; 626 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) { 627 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr; 628 } else if (sctp->sctp_bound_to_all) { 629 V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src); 630 } 631 } 632 } 633 /* 634 * IP will not free this IRE if it is condemned. SCTP needs to 635 * free it. 636 */ 637 if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) { 638 IRE_REFRELE_NOTR(fp->ire); 639 fp->ire = NULL; 640 } 641 642 /* Stash the conn and ire ptr info for IP */ 643 SCTP_STASH_IPINFO(mp, fp->ire); 644 645 return (mp); 646 } 647 648 /* 649 * SCTP requires every chunk to be padded so that the total length 650 * is a multiple of SCTP_ALIGN. This function returns a mblk with 651 * the specified pad length. 652 */ 653 static mblk_t * 654 sctp_get_padding(int pad) 655 { 656 mblk_t *fill; 657 658 ASSERT(pad < SCTP_ALIGN); 659 if ((fill = dupb(sctp_pad_mp)) != NULL) { 660 fill->b_wptr += pad; 661 return (fill); 662 } 663 664 /* 665 * The memory saving path of reusing the sctp_pad_mp 666 * fails may be because it has been dupb() too 667 * many times (DBLK_REFMAX). Use the memory consuming 668 * path of allocating the pad mblk. 669 */ 670 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) { 671 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */ 672 *(int32_t *)fill->b_rptr = 0; 673 fill->b_wptr += pad; 674 } 675 return (fill); 676 } 677 678 static mblk_t * 679 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp) 680 { 681 mblk_t *meta; 682 mblk_t *start_mp = NULL; 683 mblk_t *end_mp = NULL; 684 mblk_t *mp, *nmp; 685 mblk_t *fill; 686 sctp_data_hdr_t *sdh; 687 int msglen; 688 int extra; 689 sctp_msg_hdr_t *msg_hdr; 690 sctp_faddr_t *old_fp = NULL; 691 sctp_faddr_t *chunk_fp; 692 693 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 694 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 695 if (SCTP_IS_MSG_ABANDONED(meta) || 696 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 697 continue; 698 } 699 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 700 if (SCTP_CHUNK_WANT_REXMIT(mp)) { 701 /* 702 * Use the same peer address to do fast 703 * retransmission. If the original peer 704 * address is dead, switch to the current 705 * one. Record the old one so that we 706 * will pick the chunks sent to the old 707 * one for fast retransmission. 708 */ 709 chunk_fp = SCTP_CHUNK_DEST(mp); 710 if (*fp == NULL) { 711 *fp = chunk_fp; 712 if ((*fp)->state != SCTP_FADDRS_ALIVE) { 713 old_fp = *fp; 714 *fp = sctp->sctp_current; 715 } 716 } else if (old_fp == NULL && *fp != chunk_fp) { 717 continue; 718 } else if (old_fp != NULL && 719 old_fp != chunk_fp) { 720 continue; 721 } 722 723 sdh = (sctp_data_hdr_t *)mp->b_rptr; 724 msglen = ntohs(sdh->sdh_len); 725 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) { 726 extra = SCTP_ALIGN - extra; 727 } 728 729 /* 730 * We still return at least the first message 731 * even if that message cannot fit in as 732 * PMTU may have changed. 733 */ 734 if (*total + msglen + extra > 735 (*fp)->sfa_pmss && start_mp != NULL) { 736 return (start_mp); 737 } 738 if ((nmp = dupmsg(mp)) == NULL) 739 return (start_mp); 740 if (extra > 0) { 741 fill = sctp_get_padding(extra); 742 if (fill != NULL) { 743 linkb(nmp, fill); 744 } else { 745 return (start_mp); 746 } 747 } 748 BUMP_MIB(&sctp_mib, sctpOutFastRetrans); 749 BUMP_LOCAL(sctp->sctp_rxtchunks); 750 SCTP_CHUNK_CLEAR_REXMIT(mp); 751 if (start_mp == NULL) { 752 start_mp = nmp; 753 } else { 754 linkb(end_mp, nmp); 755 } 756 end_mp = nmp; 757 *total += msglen + extra; 758 dprint(2, ("sctp_find_fast_rexmit_mblks: " 759 "tsn %x\n", sdh->sdh_tsn)); 760 } 761 } 762 } 763 /* Clear the flag as there is no more message to be fast rexmitted. */ 764 sctp->sctp_chk_fast_rexmit = B_FALSE; 765 return (start_mp); 766 } 767 768 /* A debug function just to make sure that a mblk chain is not broken */ 769 #ifdef DEBUG 770 static boolean_t 771 sctp_verify_chain(mblk_t *head, mblk_t *tail) 772 { 773 mblk_t *mp = head; 774 775 if (head == NULL || tail == NULL) 776 return (B_TRUE); 777 while (mp != NULL) { 778 if (mp == tail) 779 return (B_TRUE); 780 mp = mp->b_next; 781 } 782 return (B_FALSE); 783 } 784 #endif 785 786 /* 787 * Gets the next unsent chunk to transmit. Messages that are abandoned are 788 * skipped. A message can be abandoned if it has a non-zero timetolive and 789 * transmission has not yet started or if it is a partially reliable 790 * message and its time is up (assuming we are PR-SCTP aware). 791 * 'cansend' is used to determine if need to try and chunkify messages from 792 * the unsent list, if any, and also as an input to sctp_chunkify() if so. 793 * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend' 794 * will be set to 0. 795 */ 796 mblk_t * 797 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error, 798 int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp) 799 { 800 mblk_t *mp1; 801 sctp_msg_hdr_t *msg_hdr; 802 mblk_t *tmp_meta; 803 sctp_faddr_t *fp1; 804 805 ASSERT(error != NULL && mp != NULL); 806 *error = 0; 807 808 ASSERT(sctp->sctp_current != NULL); 809 810 chunkified: 811 while (meta != NULL) { 812 tmp_meta = meta->b_next; 813 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 814 mp1 = meta->b_cont; 815 if (SCTP_IS_MSG_ABANDONED(meta)) 816 goto next_msg; 817 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 818 while (mp1 != NULL) { 819 if (SCTP_CHUNK_CANSEND(mp1)) { 820 *mp = mp1; 821 #ifdef DEBUG 822 ASSERT(sctp_verify_chain( 823 sctp->sctp_xmit_head, meta)); 824 #endif 825 return (meta); 826 } 827 mp1 = mp1->b_next; 828 } 829 goto next_msg; 830 } 831 /* 832 * If we come here and the first chunk is sent, then we 833 * we are PR-SCTP aware, in which case if the cumulative 834 * TSN has moved upto or beyond the first chunk (which 835 * means all the previous messages have been cumulative 836 * SACK'd), then we send a Forward TSN with the last 837 * chunk that was sent in this message. If we can't send 838 * a Forward TSN because previous non-abandoned messages 839 * have not been acked then we will defer the Forward TSN 840 * to sctp_rexmit() or sctp_cumack(). 841 */ 842 if (SCTP_CHUNK_ISSENT(mp1)) { 843 *error = sctp_check_abandoned_msg(sctp, meta); 844 if (*error != 0) { 845 #ifdef DEBUG 846 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, 847 sctp->sctp_xmit_tail)); 848 #endif 849 return (NULL); 850 } 851 goto next_msg; 852 } 853 BUMP_LOCAL(sctp->sctp_prsctpdrop); 854 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen); 855 if (meta->b_prev == NULL) { 856 ASSERT(sctp->sctp_xmit_head == meta); 857 sctp->sctp_xmit_head = tmp_meta; 858 if (sctp->sctp_xmit_tail == meta) 859 sctp->sctp_xmit_tail = tmp_meta; 860 meta->b_next = NULL; 861 if (tmp_meta != NULL) 862 tmp_meta->b_prev = NULL; 863 } else if (meta->b_next == NULL) { 864 if (sctp->sctp_xmit_tail == meta) 865 sctp->sctp_xmit_tail = meta->b_prev; 866 meta->b_prev->b_next = NULL; 867 meta->b_prev = NULL; 868 } else { 869 meta->b_prev->b_next = tmp_meta; 870 tmp_meta->b_prev = meta->b_prev; 871 if (sctp->sctp_xmit_tail == meta) 872 sctp->sctp_xmit_tail = tmp_meta; 873 meta->b_prev = NULL; 874 meta->b_next = NULL; 875 } 876 sctp->sctp_unsent -= msg_hdr->smh_msglen; 877 /* 878 * Update ULP the amount of queued data, which is 879 * sent-unack'ed + unsent. 880 */ 881 if (!SCTP_IS_DETACHED(sctp)) { 882 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 883 sctp->sctp_unacked + sctp->sctp_unsent); 884 } 885 sctp_sendfail_event(sctp, meta, 0, B_TRUE); 886 next_msg: 887 meta = tmp_meta; 888 } 889 /* chunkify, if needed */ 890 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) { 891 ASSERT(sctp->sctp_unsent > 0); 892 if (fp == NULL) { 893 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 894 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 895 fp = sctp->sctp_current; 896 } else { 897 /* 898 * If user specified destination, try to honor that. 899 */ 900 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent); 901 if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE && 902 fp1 != fp) { 903 goto chunk_done; 904 } 905 } 906 sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend); 907 if ((meta = sctp->sctp_xmit_tail) == NULL) 908 goto chunk_done; 909 /* 910 * sctp_chunkify() won't advance sctp_xmit_tail if it adds 911 * new chunk(s) to the tail, so we need to skip the 912 * sctp_xmit_tail, which would have already been processed. 913 * This could happen when there is unacked chunks, but 914 * nothing new to send. 915 * When sctp_chunkify() is called when the transmit queue 916 * is empty then we need to start from sctp_xmit_tail. 917 */ 918 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) { 919 #ifdef DEBUG 920 mp1 = sctp->sctp_xmit_tail->b_cont; 921 while (mp1 != NULL) { 922 ASSERT(!SCTP_CHUNK_CANSEND(mp1)); 923 mp1 = mp1->b_next; 924 } 925 #endif 926 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL) 927 goto chunk_done; 928 } 929 goto chunkified; 930 } 931 chunk_done: 932 #ifdef DEBUG 933 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail)); 934 #endif 935 return (NULL); 936 } 937 938 void 939 sctp_fast_rexmit(sctp_t *sctp) 940 { 941 mblk_t *mp, *head; 942 int pktlen = 0; 943 sctp_faddr_t *fp = NULL; 944 945 ASSERT(sctp->sctp_xmit_head != NULL); 946 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp); 947 if (mp == NULL) { 948 SCTP_KSTAT(sctp_fr_not_found); 949 return; 950 } 951 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) { 952 freemsg(mp); 953 SCTP_KSTAT(sctp_fr_add_hdr); 954 return; 955 } 956 if ((pktlen > fp->sfa_pmss) && fp->isv4) { 957 ipha_t *iph = (ipha_t *)head->b_rptr; 958 959 iph->ipha_fragment_offset_and_flags = 0; 960 } 961 962 sctp_set_iplen(sctp, head); 963 sctp_add_sendq(sctp, head); 964 sctp->sctp_active = fp->lastactive = lbolt64; 965 } 966 967 void 968 sctp_output(sctp_t *sctp) 969 { 970 mblk_t *mp = NULL; 971 mblk_t *nmp; 972 mblk_t *head; 973 mblk_t *meta = sctp->sctp_xmit_tail; 974 mblk_t *fill = NULL; 975 uint16_t chunklen; 976 uint32_t cansend; 977 int32_t seglen; 978 int32_t xtralen; 979 int32_t sacklen; 980 int32_t pad = 0; 981 int32_t pathmax; 982 int extra; 983 int64_t now = lbolt64; 984 sctp_faddr_t *fp; 985 sctp_faddr_t *lfp; 986 sctp_data_hdr_t *sdc; 987 int error; 988 boolean_t notsent = B_TRUE; 989 990 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 991 sacklen = 0; 992 } else { 993 /* send a SACK chunk */ 994 sacklen = sizeof (sctp_chunk_hdr_t) + 995 sizeof (sctp_sack_chunk_t) + 996 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 997 lfp = sctp->sctp_lastdata; 998 ASSERT(lfp != NULL); 999 if (lfp->state != SCTP_FADDRS_ALIVE) 1000 lfp = sctp->sctp_current; 1001 } 1002 1003 cansend = sctp->sctp_frwnd; 1004 if (sctp->sctp_unsent < cansend) 1005 cansend = sctp->sctp_unsent; 1006 if ((cansend < sctp->sctp_current->sfa_pmss / 2) && 1007 sctp->sctp_unacked && 1008 (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) && 1009 !sctp->sctp_ndelay) { 1010 head = NULL; 1011 fp = sctp->sctp_current; 1012 goto unsent_data; 1013 } 1014 if (meta != NULL) 1015 mp = meta->b_cont; 1016 while (cansend > 0) { 1017 pad = 0; 1018 1019 /* 1020 * Find first segment eligible for transmit. 1021 */ 1022 while (mp != NULL) { 1023 if (SCTP_CHUNK_CANSEND(mp)) 1024 break; 1025 mp = mp->b_next; 1026 } 1027 if (mp == NULL) { 1028 meta = sctp_get_msg_to_send(sctp, &mp, 1029 meta == NULL ? NULL : meta->b_next, &error, sacklen, 1030 cansend, NULL); 1031 if (error != 0 || meta == NULL) { 1032 head = NULL; 1033 fp = sctp->sctp_current; 1034 goto unsent_data; 1035 } 1036 sctp->sctp_xmit_tail = meta; 1037 } 1038 1039 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1040 seglen = ntohs(sdc->sdh_len); 1041 xtralen = sizeof (*sdc); 1042 chunklen = seglen - xtralen; 1043 1044 /* 1045 * Check rwnd. 1046 */ 1047 if (chunklen > cansend) { 1048 head = NULL; 1049 fp = SCTP_CHUNK_DEST(meta); 1050 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1051 fp = sctp->sctp_current; 1052 goto unsent_data; 1053 } 1054 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1055 extra = SCTP_ALIGN - extra; 1056 1057 /* 1058 * Pick destination address, and check cwnd. 1059 */ 1060 if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) && 1061 (seglen + sacklen + extra <= lfp->sfa_pmss)) { 1062 /* 1063 * Only include SACK chunk if it can be bundled 1064 * with a data chunk, and sent to sctp_lastdata. 1065 */ 1066 pathmax = lfp->cwnd - lfp->suna; 1067 1068 fp = lfp; 1069 if ((nmp = dupmsg(mp)) == NULL) { 1070 head = NULL; 1071 goto unsent_data; 1072 } 1073 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1074 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, 1075 &error); 1076 if (head == NULL) { 1077 /* 1078 * If none of the source addresses are 1079 * available (i.e error == EHOSTUNREACH), 1080 * pretend we have sent the data. We will 1081 * eventually time out trying to retramsmit 1082 * the data if the interface never comes up. 1083 * If we have already sent some stuff (i.e., 1084 * notsent is B_FALSE) then we are fine, else 1085 * just mark this packet as sent. 1086 */ 1087 if (notsent && error == EHOSTUNREACH) { 1088 SCTP_CHUNK_SENT(sctp, mp, sdc, 1089 fp, chunklen, meta); 1090 } 1091 freemsg(nmp); 1092 SCTP_KSTAT(sctp_output_failed); 1093 goto unsent_data; 1094 } 1095 seglen += sacklen; 1096 xtralen += sacklen; 1097 sacklen = 0; 1098 } else { 1099 fp = SCTP_CHUNK_DEST(meta); 1100 if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE) 1101 fp = sctp->sctp_current; 1102 /* 1103 * If we haven't sent data to this destination for 1104 * a while, do slow start again. 1105 */ 1106 if (now - fp->lastactive > fp->rto) { 1107 fp->cwnd = sctp_slow_start_after_idle * 1108 fp->sfa_pmss; 1109 } 1110 1111 pathmax = fp->cwnd - fp->suna; 1112 if (seglen + extra > pathmax) { 1113 head = NULL; 1114 goto unsent_data; 1115 } 1116 if ((nmp = dupmsg(mp)) == NULL) { 1117 head = NULL; 1118 goto unsent_data; 1119 } 1120 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1121 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error); 1122 if (head == NULL) { 1123 /* 1124 * If none of the source addresses are 1125 * available (i.e error == EHOSTUNREACH), 1126 * pretend we have sent the data. We will 1127 * eventually time out trying to retramsmit 1128 * the data if the interface never comes up. 1129 * If we have already sent some stuff (i.e., 1130 * notsent is B_FALSE) then we are fine, else 1131 * just mark this packet as sent. 1132 */ 1133 if (notsent && error == EHOSTUNREACH) { 1134 SCTP_CHUNK_SENT(sctp, mp, sdc, 1135 fp, chunklen, meta); 1136 } 1137 freemsg(nmp); 1138 SCTP_KSTAT(sctp_output_failed); 1139 goto unsent_data; 1140 } 1141 } 1142 fp->lastactive = now; 1143 if (pathmax > fp->sfa_pmss) 1144 pathmax = fp->sfa_pmss; 1145 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1146 mp = mp->b_next; 1147 1148 /* Use this chunk to measure RTT? */ 1149 if (sctp->sctp_out_time == 0) { 1150 sctp->sctp_out_time = now; 1151 sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1; 1152 ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn)); 1153 } 1154 if (extra > 0) { 1155 fill = sctp_get_padding(extra); 1156 if (fill != NULL) { 1157 linkb(head, fill); 1158 pad = extra; 1159 seglen += extra; 1160 } else { 1161 goto unsent_data; 1162 } 1163 } 1164 /* See if we can bundle more. */ 1165 while (seglen < pathmax) { 1166 int32_t new_len; 1167 int32_t new_xtralen; 1168 1169 while (mp != NULL) { 1170 if (SCTP_CHUNK_CANSEND(mp)) 1171 break; 1172 mp = mp->b_next; 1173 } 1174 if (mp == NULL) { 1175 meta = sctp_get_msg_to_send(sctp, &mp, 1176 meta->b_next, &error, seglen, 1177 (seglen - xtralen) >= cansend ? 0 : 1178 cansend - seglen, fp); 1179 if (error != 0 || meta == NULL) 1180 break; 1181 sctp->sctp_xmit_tail = meta; 1182 } 1183 ASSERT(mp != NULL); 1184 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) && 1185 fp != SCTP_CHUNK_DEST(meta)) { 1186 break; 1187 } 1188 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1189 chunklen = ntohs(sdc->sdh_len); 1190 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0) 1191 extra = SCTP_ALIGN - extra; 1192 1193 new_len = seglen + chunklen; 1194 new_xtralen = xtralen + sizeof (*sdc); 1195 chunklen -= sizeof (*sdc); 1196 1197 if (new_len - new_xtralen > cansend || 1198 new_len + extra > pathmax) { 1199 break; 1200 } 1201 if ((nmp = dupmsg(mp)) == NULL) 1202 break; 1203 if (extra > 0) { 1204 fill = sctp_get_padding(extra); 1205 if (fill != NULL) { 1206 pad += extra; 1207 new_len += extra; 1208 linkb(nmp, fill); 1209 } else { 1210 freemsg(nmp); 1211 break; 1212 } 1213 } 1214 seglen = new_len; 1215 xtralen = new_xtralen; 1216 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1217 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1218 linkb(head, nmp); 1219 mp = mp->b_next; 1220 } 1221 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1222 ipha_t *iph = (ipha_t *)head->b_rptr; 1223 1224 /* 1225 * Path MTU is different from what we thought it would 1226 * be when we created chunks, or IP headers have grown. 1227 * Need to clear the DF bit. 1228 */ 1229 iph->ipha_fragment_offset_and_flags = 0; 1230 } 1231 /* xmit segment */ 1232 ASSERT(cansend >= seglen - pad - xtralen); 1233 cansend -= (seglen - pad - xtralen); 1234 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x " 1235 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n", 1236 seglen - xtralen, ntohl(sdc->sdh_tsn), 1237 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd, 1238 cansend, sctp->sctp_lastack_rxd)); 1239 sctp_set_iplen(sctp, head); 1240 sctp_add_sendq(sctp, head); 1241 /* arm rto timer (if not set) */ 1242 if (!fp->timer_running) 1243 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1244 notsent = B_FALSE; 1245 } 1246 sctp->sctp_active = now; 1247 return; 1248 unsent_data: 1249 /* arm persist timer (if rto timer not set) */ 1250 if (!fp->timer_running) 1251 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1252 if (head != NULL) 1253 freemsg(head); 1254 } 1255 1256 /* 1257 * The following two functions initialize and destroy the cache 1258 * associated with the sets used for PR-SCTP. 1259 */ 1260 void 1261 sctp_ftsn_sets_init(void) 1262 { 1263 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache", 1264 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL, 1265 NULL, 0); 1266 } 1267 1268 void 1269 sctp_ftsn_sets_fini(void) 1270 { 1271 kmem_cache_destroy(sctp_kmem_ftsn_set_cache); 1272 } 1273 1274 1275 /* Free PR-SCTP sets */ 1276 void 1277 sctp_free_ftsn_set(sctp_ftsn_set_t *s) 1278 { 1279 sctp_ftsn_set_t *p; 1280 1281 while (s != NULL) { 1282 p = s->next; 1283 s->next = NULL; 1284 kmem_cache_free(sctp_kmem_ftsn_set_cache, s); 1285 s = p; 1286 } 1287 } 1288 1289 /* 1290 * Given a message meta block, meta, this routine creates or modifies 1291 * the set that will be used to generate a Forward TSN chunk. If the 1292 * entry for stream id, sid, for this message already exists, the 1293 * sequence number, ssn, is updated if it is greater than the existing 1294 * one. If an entry for this sid does not exist, one is created if 1295 * the size does not exceed fp->sfa_pmss. We return false in case 1296 * or an error. 1297 */ 1298 boolean_t 1299 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta, 1300 uint_t *nsets, uint32_t *slen) 1301 { 1302 sctp_ftsn_set_t *p; 1303 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1304 uint16_t sid = htons(msg_hdr->smh_sid); 1305 /* msg_hdr->smh_ssn is already in NBO */ 1306 uint16_t ssn = msg_hdr->smh_ssn; 1307 1308 ASSERT(s != NULL && nsets != NULL); 1309 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL)); 1310 1311 if (*s == NULL) { 1312 ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss); 1313 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP); 1314 if (*s == NULL) 1315 return (B_FALSE); 1316 (*s)->ftsn_entries.ftsn_sid = sid; 1317 (*s)->ftsn_entries.ftsn_ssn = ssn; 1318 (*s)->next = NULL; 1319 *nsets = 1; 1320 *slen += sizeof (uint32_t); 1321 return (B_TRUE); 1322 } 1323 for (p = *s; p->next != NULL; p = p->next) { 1324 if (p->ftsn_entries.ftsn_sid == sid) { 1325 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1326 p->ftsn_entries.ftsn_ssn = ssn; 1327 return (B_TRUE); 1328 } 1329 } 1330 /* the last one */ 1331 if (p->ftsn_entries.ftsn_sid == sid) { 1332 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn)) 1333 p->ftsn_entries.ftsn_ssn = ssn; 1334 } else { 1335 if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss) 1336 return (B_FALSE); 1337 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, 1338 KM_NOSLEEP); 1339 if (p->next == NULL) 1340 return (B_FALSE); 1341 p = p->next; 1342 p->ftsn_entries.ftsn_sid = sid; 1343 p->ftsn_entries.ftsn_ssn = ssn; 1344 p->next = NULL; 1345 (*nsets)++; 1346 *slen += sizeof (uint32_t); 1347 } 1348 return (B_TRUE); 1349 } 1350 1351 /* 1352 * Given a set of stream id - sequence number pairs, this routing creates 1353 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point) 1354 * for the chunk is obtained from sctp->sctp_adv_pap. The caller 1355 * will add the IP/SCTP header. 1356 */ 1357 mblk_t * 1358 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets, 1359 uint_t nsets, uint32_t seglen) 1360 { 1361 mblk_t *ftsn_mp; 1362 sctp_chunk_hdr_t *ch_hdr; 1363 uint32_t *advtsn; 1364 uint16_t schlen; 1365 size_t xtralen; 1366 ftsn_entry_t *ftsn_entry; 1367 1368 seglen += sizeof (sctp_chunk_hdr_t); 1369 if (fp->isv4) 1370 xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra; 1371 else 1372 xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra; 1373 ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp)); 1374 if (ftsn_mp == NULL) 1375 return (NULL); 1376 ftsn_mp->b_rptr += xtralen; 1377 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen; 1378 1379 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr; 1380 ch_hdr->sch_id = CHUNK_FORWARD_TSN; 1381 ch_hdr->sch_flags = 0; 1382 /* 1383 * The cast here should not be an issue since seglen is 1384 * the length of the Forward TSN chunk. 1385 */ 1386 schlen = (uint16_t)seglen; 1387 U16_TO_ABE16(schlen, &(ch_hdr->sch_len)); 1388 1389 advtsn = (uint32_t *)(ch_hdr + 1); 1390 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn); 1391 ftsn_entry = (ftsn_entry_t *)(advtsn + 1); 1392 while (nsets > 0) { 1393 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr); 1394 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid; 1395 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn; 1396 ftsn_entry++; 1397 sets = sets->next; 1398 nsets--; 1399 } 1400 return (ftsn_mp); 1401 } 1402 1403 /* 1404 * Given a starting message, the routine steps through all the 1405 * messages whose TSN is less than sctp->sctp_adv_pap and creates 1406 * ftsn sets. The ftsn sets is then used to create an Forward TSN 1407 * chunk. All the messages, that have chunks that are included in the 1408 * ftsn sets, are flagged abandonded. If a message is partially sent 1409 * and is deemed abandoned, all remaining unsent chunks are marked 1410 * abandoned and are deducted from sctp_unsent. 1411 */ 1412 void 1413 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp, 1414 sctp_faddr_t *fp, uint32_t *seglen) 1415 { 1416 mblk_t *mp1 = mp; 1417 mblk_t *mp_head = mp; 1418 mblk_t *meta_head = meta; 1419 mblk_t *head; 1420 sctp_ftsn_set_t *sets = NULL; 1421 uint_t nsets = 0; 1422 uint16_t clen; 1423 sctp_data_hdr_t *sdc; 1424 uint32_t sacklen; 1425 uint32_t adv_pap = sctp->sctp_adv_pap; 1426 uint32_t unsent = 0; 1427 boolean_t ubit; 1428 1429 *seglen = sizeof (uint32_t); 1430 1431 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1432 while (meta != NULL && 1433 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1434 /* 1435 * Skip adding FTSN sets for un-ordered messages as they do 1436 * not have SSNs. 1437 */ 1438 ubit = SCTP_DATA_GET_UBIT(sdc); 1439 if (!ubit && 1440 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) { 1441 meta = NULL; 1442 sctp->sctp_adv_pap = adv_pap; 1443 goto ftsn_done; 1444 } 1445 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1446 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1447 adv_pap = ntohl(sdc->sdh_tsn); 1448 mp1 = mp1->b_next; 1449 } 1450 meta = meta->b_next; 1451 if (meta != NULL) { 1452 mp1 = meta->b_cont; 1453 if (!SCTP_CHUNK_ISSENT(mp1)) 1454 break; 1455 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1456 } 1457 } 1458 ftsn_done: 1459 /* 1460 * Can't compare with sets == NULL, since we don't add any 1461 * sets for un-ordered messages. 1462 */ 1463 if (meta == meta_head) 1464 return; 1465 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen); 1466 sctp_free_ftsn_set(sets); 1467 if (*nmp == NULL) 1468 return; 1469 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1470 sacklen = 0; 1471 } else { 1472 sacklen = sizeof (sctp_chunk_hdr_t) + 1473 sizeof (sctp_sack_chunk_t) + 1474 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1475 if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1476 /* piggybacked SACK doesn't fit */ 1477 sacklen = 0; 1478 } else { 1479 fp = sctp->sctp_lastdata; 1480 } 1481 } 1482 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL); 1483 if (head == NULL) { 1484 freemsg(*nmp); 1485 *nmp = NULL; 1486 SCTP_KSTAT(sctp_send_ftsn_failed); 1487 return; 1488 } 1489 *seglen += sacklen; 1490 *nmp = head; 1491 1492 /* 1493 * XXXNeed to optimise this, the reason it is done here is so 1494 * that we don't have to undo in case of failure. 1495 */ 1496 mp1 = mp_head; 1497 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1498 while (meta_head != NULL && 1499 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) { 1500 if (!SCTP_IS_MSG_ABANDONED(meta_head)) 1501 SCTP_MSG_SET_ABANDONED(meta_head); 1502 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) { 1503 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1504 if (!SCTP_CHUNK_ISACKED(mp1)) { 1505 clen = ntohs(sdc->sdh_len) - sizeof (*sdc); 1506 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen, 1507 meta_head); 1508 } 1509 mp1 = mp1->b_next; 1510 } 1511 while (mp1 != NULL) { 1512 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1513 if (!SCTP_CHUNK_ABANDONED(mp1)) { 1514 ASSERT(!SCTP_CHUNK_ISSENT(mp1)); 1515 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc); 1516 SCTP_ABANDON_CHUNK(mp1); 1517 } 1518 mp1 = mp1->b_next; 1519 } 1520 meta_head = meta_head->b_next; 1521 if (meta_head != NULL) { 1522 mp1 = meta_head->b_cont; 1523 if (!SCTP_CHUNK_ISSENT(mp1)) 1524 break; 1525 sdc = (sctp_data_hdr_t *)mp1->b_rptr; 1526 } 1527 } 1528 if (unsent > 0) { 1529 ASSERT(sctp->sctp_unsent >= unsent); 1530 sctp->sctp_unsent -= unsent; 1531 /* 1532 * Update ULP the amount of queued data, which is 1533 * sent-unack'ed + unsent. 1534 */ 1535 if (!SCTP_IS_DETACHED(sctp)) { 1536 sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, 1537 sctp->sctp_unacked + sctp->sctp_unsent); 1538 } 1539 } 1540 } 1541 1542 /* 1543 * This function steps through messages starting at meta and checks if 1544 * the message is abandoned. It stops when it hits an unsent chunk or 1545 * a message that has all its chunk acked. This is the only place 1546 * where the sctp_adv_pap is moved forward to indicated abandoned 1547 * messages. 1548 */ 1549 void 1550 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) 1551 { 1552 uint32_t tsn = sctp->sctp_adv_pap; 1553 sctp_data_hdr_t *sdc; 1554 sctp_msg_hdr_t *msg_hdr; 1555 1556 ASSERT(mp != NULL); 1557 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1558 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd)); 1559 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1560 if (!SCTP_IS_MSG_ABANDONED(meta) && 1561 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1562 return; 1563 } 1564 while (meta != NULL) { 1565 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) { 1566 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1567 tsn = ntohl(sdc->sdh_tsn); 1568 mp = mp->b_next; 1569 } 1570 if (mp != NULL) 1571 break; 1572 /* 1573 * We continue checking for successive messages only if there 1574 * is a chunk marked for retransmission. Else, we might 1575 * end up sending FTSN prematurely for chunks that have been 1576 * sent, but not yet acked. 1577 */ 1578 if ((meta = meta->b_next) != NULL) { 1579 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr; 1580 if (!SCTP_IS_MSG_ABANDONED(meta) && 1581 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) { 1582 break; 1583 } 1584 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1585 if (!SCTP_CHUNK_ISSENT(mp)) { 1586 sctp->sctp_adv_pap = tsn; 1587 return; 1588 } 1589 if (SCTP_CHUNK_WANT_REXMIT(mp)) 1590 break; 1591 } 1592 if (mp == NULL) 1593 break; 1594 } 1595 } 1596 sctp->sctp_adv_pap = tsn; 1597 } 1598 1599 1600 /* 1601 * Determine if we should bundle a data chunk with the chunk being 1602 * retransmitted. We bundle if 1603 * 1604 * - the chunk is sent to the same destination and unack'ed. 1605 * 1606 * OR 1607 * 1608 * - the chunk is unsent, i.e. new data. 1609 */ 1610 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ 1611 (!SCTP_CHUNK_ABANDONED((mp)) && \ 1612 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ 1613 !SCTP_CHUNK_ISACKED(mp))) || \ 1614 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ 1615 SCTP_CHUNK_FLAG_SENT))) 1616 1617 /* 1618 * Retransmit first segment which hasn't been acked with cumtsn or send 1619 * a Forward TSN chunk, if appropriate. 1620 */ 1621 void 1622 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) 1623 { 1624 mblk_t *mp; 1625 mblk_t *nmp = NULL; 1626 mblk_t *head; 1627 mblk_t *meta = sctp->sctp_xmit_head; 1628 mblk_t *fill; 1629 uint32_t seglen = 0; 1630 uint32_t sacklen; 1631 uint16_t chunklen; 1632 int extra; 1633 sctp_data_hdr_t *sdc; 1634 sctp_faddr_t *fp; 1635 uint32_t adv_pap = sctp->sctp_adv_pap; 1636 boolean_t do_ftsn = B_FALSE; 1637 boolean_t ftsn_check = B_TRUE; 1638 uint32_t first_ua_tsn; 1639 sctp_msg_hdr_t *mhdr; 1640 uint32_t tot_wnd; 1641 1642 while (meta != NULL) { 1643 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 1644 uint32_t tsn; 1645 1646 if (!SCTP_CHUNK_ISSENT(mp)) 1647 goto window_probe; 1648 /* 1649 * We break in the following cases - 1650 * 1651 * if the advanced peer ack point includes the next 1652 * chunk to be retransmited - possibly the Forward 1653 * TSN was lost. 1654 * 1655 * if we are PRSCTP aware and the next chunk to be 1656 * retransmitted is now abandoned 1657 * 1658 * if the next chunk to be retransmitted is for 1659 * the dest on which the timer went off. (this 1660 * message is not abandoned). 1661 * 1662 * We check for Forward TSN only for the first 1663 * eligible chunk to be retransmitted. The reason 1664 * being if the first eligible chunk is skipped (say 1665 * it was sent to a destination other than oldfp) 1666 * then we cannot advance the cum TSN via Forward 1667 * TSN chunk. 1668 * 1669 * Also, ftsn_check is B_TRUE only for the first 1670 * eligible chunk, it will be B_FALSE for all 1671 * subsequent candidate messages for retransmission. 1672 */ 1673 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1674 tsn = ntohl(sdc->sdh_tsn); 1675 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) { 1676 if (sctp->sctp_prsctp_aware && ftsn_check) { 1677 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) { 1678 ASSERT(sctp->sctp_prsctp_aware); 1679 do_ftsn = B_TRUE; 1680 goto out; 1681 } else { 1682 sctp_check_adv_ack_pt(sctp, 1683 meta, mp); 1684 if (SEQ_GT(sctp->sctp_adv_pap, 1685 adv_pap)) { 1686 do_ftsn = B_TRUE; 1687 goto out; 1688 } 1689 } 1690 ftsn_check = B_FALSE; 1691 } 1692 if (SCTP_CHUNK_DEST(mp) == oldfp) 1693 goto out; 1694 } 1695 } 1696 meta = meta->b_next; 1697 if (meta != NULL && sctp->sctp_prsctp_aware) { 1698 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1699 1700 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) || 1701 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) { 1702 meta = meta->b_next; 1703 } 1704 } 1705 } 1706 window_probe: 1707 /* 1708 * Retransmit fired for a destination which didn't have 1709 * any unacked data pending. 1710 */ 1711 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) { 1712 /* 1713 * Send a window probe. Inflate frwnd to allow 1714 * sending one segment. 1715 */ 1716 if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) 1717 sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc); 1718 /* next TSN to send */ 1719 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn; 1720 sctp_output(sctp); 1721 /* Last sent TSN */ 1722 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1723 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); 1724 sctp->sctp_zero_win_probe = B_TRUE; 1725 BUMP_MIB(&sctp_mib, sctpOutWinProbe); 1726 } 1727 return; 1728 out: 1729 /* 1730 * If were are probing for zero window, don't adjust retransmission 1731 * variables, but the timer is still backed off. 1732 */ 1733 if (sctp->sctp_zero_win_probe) { 1734 mblk_t *pkt; 1735 uint_t pkt_len; 1736 1737 /* 1738 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn 1739 * and sctp_rxt_maxtsn will specify the ZWP packet. 1740 */ 1741 fp = oldfp; 1742 if (oldfp->state != SCTP_FADDRS_ALIVE) 1743 fp = sctp_rotate_faddr(sctp, oldfp); 1744 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 1745 if (pkt != NULL) { 1746 ASSERT(pkt_len <= fp->sfa_pmss); 1747 sctp_set_iplen(sctp, pkt); 1748 sctp_add_sendq(sctp, pkt); 1749 } else { 1750 SCTP_KSTAT(sctp_ss_rexmit_failed); 1751 } 1752 oldfp->strikes++; 1753 sctp->sctp_strikes++; 1754 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1755 if (oldfp != fp && oldfp->suna != 0) 1756 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); 1757 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1758 BUMP_MIB(&sctp_mib, sctpOutWinProbe); 1759 return; 1760 } 1761 1762 /* 1763 * Enter slowstart for this destination 1764 */ 1765 oldfp->ssthresh = oldfp->cwnd / 2; 1766 if (oldfp->ssthresh < 2 * oldfp->sfa_pmss) 1767 oldfp->ssthresh = 2 * oldfp->sfa_pmss; 1768 oldfp->cwnd = oldfp->sfa_pmss; 1769 oldfp->pba = 0; 1770 fp = sctp_rotate_faddr(sctp, oldfp); 1771 ASSERT(fp != NULL); 1772 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1773 1774 first_ua_tsn = ntohl(sdc->sdh_tsn); 1775 if (do_ftsn) { 1776 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen); 1777 if (nmp == NULL) { 1778 sctp->sctp_adv_pap = adv_pap; 1779 goto restart_timer; 1780 } 1781 head = nmp; 1782 /* 1783 * Move to the next unabandoned chunk. XXXCheck if meta will 1784 * always be marked abandoned. 1785 */ 1786 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta)) 1787 meta = meta->b_next; 1788 if (meta != NULL) 1789 mp = mp->b_cont; 1790 else 1791 mp = NULL; 1792 goto try_bundle; 1793 } 1794 seglen = ntohs(sdc->sdh_len); 1795 chunklen = seglen - sizeof (*sdc); 1796 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 1797 extra = SCTP_ALIGN - extra; 1798 1799 /* Find out if we need to piggyback SACK. */ 1800 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { 1801 sacklen = 0; 1802 } else { 1803 sacklen = sizeof (sctp_chunk_hdr_t) + 1804 sizeof (sctp_sack_chunk_t) + 1805 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps); 1806 if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) { 1807 /* piggybacked SACK doesn't fit */ 1808 sacklen = 0; 1809 } else { 1810 /* 1811 * OK, we have room to send SACK back. But we 1812 * should send it back to the last fp where we 1813 * receive data from, unless sctp_lastdata equals 1814 * oldfp, then we should probably not send it 1815 * back to that fp. Also we should check that 1816 * the fp is alive. 1817 */ 1818 if (sctp->sctp_lastdata != oldfp && 1819 sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) { 1820 fp = sctp->sctp_lastdata; 1821 } 1822 } 1823 } 1824 1825 /* 1826 * Cancel RTT measurement if the retransmitted TSN is before the 1827 * TSN used for timimg. 1828 */ 1829 if (sctp->sctp_out_time != 0 && 1830 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) { 1831 sctp->sctp_out_time = 0; 1832 } 1833 /* Clear the counter as the RTT calculation may be off. */ 1834 fp->rtt_updates = 0; 1835 oldfp->rtt_updates = 0; 1836 1837 /* 1838 * After a timeout, we should change the current faddr so that 1839 * new chunks will be sent to the alternate address. 1840 */ 1841 sctp_set_faddr_current(sctp, fp); 1842 1843 nmp = dupmsg(mp); 1844 if (nmp == NULL) 1845 goto restart_timer; 1846 if (extra > 0) { 1847 fill = sctp_get_padding(extra); 1848 if (fill != NULL) { 1849 linkb(nmp, fill); 1850 seglen += extra; 1851 } else { 1852 freemsg(nmp); 1853 goto restart_timer; 1854 } 1855 } 1856 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1857 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL); 1858 if (head == NULL) { 1859 freemsg(nmp); 1860 SCTP_KSTAT(sctp_rexmit_failed); 1861 goto restart_timer; 1862 } 1863 seglen += sacklen; 1864 1865 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1866 1867 mp = mp->b_next; 1868 1869 /* Check how much more we can send. */ 1870 tot_wnd = MIN(fp->cwnd, sctp->sctp_frwnd); 1871 /* 1872 * If the number of outstanding bytes is more than what we are 1873 * allowed to send, stop. 1874 */ 1875 if (tot_wnd <= chunklen || tot_wnd < fp->suna + chunklen) 1876 goto done_bundle; 1877 else 1878 tot_wnd -= chunklen; 1879 1880 try_bundle: 1881 while (seglen < fp->sfa_pmss) { 1882 int32_t new_len; 1883 1884 /* Go through the list to find more chunks to be bundled. */ 1885 while (mp != NULL) { 1886 /* Check if the chunk can be bundled. */ 1887 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp)) 1888 break; 1889 mp = mp->b_next; 1890 } 1891 /* Go to the next message. */ 1892 if (mp == NULL) { 1893 for (meta = meta->b_next; meta != NULL; 1894 meta = meta->b_next) { 1895 mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 1896 1897 if (SCTP_IS_MSG_ABANDONED(meta) || 1898 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, 1899 sctp)) { 1900 continue; 1901 } 1902 1903 mp = meta->b_cont; 1904 goto try_bundle; 1905 } 1906 /* No more chunk to be bundled. */ 1907 break; 1908 } 1909 1910 sdc = (sctp_data_hdr_t *)mp->b_rptr; 1911 new_len = ntohs(sdc->sdh_len); 1912 chunklen = new_len - sizeof (*sdc); 1913 if (chunklen > tot_wnd) 1914 break; 1915 1916 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 1917 extra = SCTP_ALIGN - extra; 1918 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 1919 break; 1920 if ((nmp = dupmsg(mp)) == NULL) 1921 break; 1922 1923 if (extra > 0) { 1924 fill = sctp_get_padding(extra); 1925 if (fill != NULL) { 1926 linkb(nmp, fill); 1927 } else { 1928 freemsg(nmp); 1929 break; 1930 } 1931 } 1932 linkb(head, nmp); 1933 1934 SCTP_CHUNK_CLEAR_FLAGS(nmp); 1935 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta); 1936 1937 seglen = new_len; 1938 tot_wnd -= chunklen; 1939 mp = mp->b_next; 1940 } 1941 done_bundle: 1942 if ((seglen > fp->sfa_pmss) && fp->isv4) { 1943 ipha_t *iph = (ipha_t *)head->b_rptr; 1944 1945 /* 1946 * Path MTU is different from path we thought it would 1947 * be when we created chunks, or IP headers have grown. 1948 * Need to clear the DF bit. 1949 */ 1950 iph->ipha_fragment_offset_and_flags = 0; 1951 } 1952 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x " 1953 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n", 1954 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), 1955 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd)); 1956 1957 sctp->sctp_rexmitting = B_TRUE; 1958 sctp->sctp_rxt_nxttsn = first_ua_tsn; 1959 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; 1960 sctp_set_iplen(sctp, head); 1961 sctp_add_sendq(sctp, head); 1962 1963 /* 1964 * Restart the oldfp timer with exponential backoff and 1965 * the new fp timer for the retransmitted chunks. 1966 */ 1967 restart_timer: 1968 oldfp->strikes++; 1969 sctp->sctp_strikes++; 1970 SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max); 1971 if (oldfp->suna != 0) 1972 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto); 1973 sctp->sctp_active = lbolt64; 1974 1975 /* 1976 * Should we restart the timer of the new fp? If there is 1977 * outstanding data to the new fp, the timer should be 1978 * running already. So restarting it means that the timer 1979 * will fire later for those outstanding data. But if 1980 * we don't restart it, the timer will fire too early for the 1981 * just retransmitted chunks to the new fp. The reason is that we 1982 * don't keep a timestamp on when a chunk is retransmitted. 1983 * So when the timer fires, it will just search for the 1984 * chunk with the earliest TSN sent to new fp. This probably 1985 * is the chunk we just retransmitted. So for now, let's 1986 * be conservative and restart the timer of the new fp. 1987 */ 1988 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 1989 } 1990 1991 /* 1992 * The SCTP write put procedure called from IP. 1993 */ 1994 void 1995 sctp_wput(queue_t *q, mblk_t *mp) 1996 { 1997 uchar_t *rptr; 1998 t_scalar_t type; 1999 2000 switch (mp->b_datap->db_type) { 2001 case M_IOCTL: 2002 sctp_wput_ioctl(q, mp); 2003 break; 2004 case M_DATA: 2005 /* Should be handled in sctp_output() */ 2006 ASSERT(0); 2007 freemsg(mp); 2008 break; 2009 case M_PROTO: 2010 case M_PCPROTO: 2011 rptr = mp->b_rptr; 2012 if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) { 2013 type = ((union T_primitives *)rptr)->type; 2014 /* 2015 * There is no "standard" way on how to respond 2016 * to T_CAPABILITY_REQ if a module does not 2017 * understand it. And the current TI mod 2018 * has problems handling an error ack. So we 2019 * catch the request here and reply with a response 2020 * which the TI mod knows how to respond to. 2021 */ 2022 switch (type) { 2023 case T_CAPABILITY_REQ: 2024 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 2025 break; 2026 default: 2027 if ((mp = mi_tpi_err_ack_alloc(mp, 2028 TNOTSUPPORT, 0)) != NULL) { 2029 qreply(q, mp); 2030 return; 2031 } 2032 } 2033 } 2034 /* FALLTHRU */ 2035 default: 2036 freemsg(mp); 2037 return; 2038 } 2039 } 2040 2041 /* 2042 * This function is called by sctp_ss_rexmit() to create a packet 2043 * to be retransmitted to the given fp. The given meta and mp 2044 * parameters are respectively the sctp_msg_hdr_t and the mblk of the 2045 * first chunk to be retransmitted. This is also called when we want 2046 * to retransmit a zero window probe from sctp_rexmit() or when we 2047 * want to retransmit the zero window probe after the window has 2048 * opened from sctp_got_sack(). 2049 */ 2050 mblk_t * 2051 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp, 2052 uint_t *packet_len) 2053 { 2054 uint32_t seglen = 0; 2055 uint16_t chunklen; 2056 int extra; 2057 mblk_t *nmp; 2058 mblk_t *head; 2059 mblk_t *fill; 2060 sctp_data_hdr_t *sdc; 2061 sctp_msg_hdr_t *mhdr; 2062 2063 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2064 seglen = ntohs(sdc->sdh_len); 2065 chunklen = seglen - sizeof (*sdc); 2066 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0) 2067 extra = SCTP_ALIGN - extra; 2068 2069 nmp = dupmsg(*mp); 2070 if (nmp == NULL) 2071 return (NULL); 2072 if (extra > 0) { 2073 fill = sctp_get_padding(extra); 2074 if (fill != NULL) { 2075 linkb(nmp, fill); 2076 seglen += extra; 2077 } else { 2078 freemsg(nmp); 2079 return (NULL); 2080 } 2081 } 2082 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2083 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL); 2084 if (head == NULL) { 2085 freemsg(nmp); 2086 return (NULL); 2087 } 2088 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2089 /* 2090 * Don't update the TSN if we are doing a Zero Win Probe. 2091 */ 2092 if (!sctp->sctp_zero_win_probe) 2093 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2094 *mp = (*mp)->b_next; 2095 2096 try_bundle: 2097 while (seglen < fp->sfa_pmss) { 2098 int32_t new_len; 2099 2100 /* 2101 * Go through the list to find more chunks to be bundled. 2102 * We should only retransmit sent by unack'ed chunks. Since 2103 * they were sent before, the peer's receive window should 2104 * be able to receive them. 2105 */ 2106 while (*mp != NULL) { 2107 /* Check if the chunk can be bundled. */ 2108 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp)) 2109 break; 2110 *mp = (*mp)->b_next; 2111 } 2112 /* Go to the next message. */ 2113 if (*mp == NULL) { 2114 for (*meta = (*meta)->b_next; *meta != NULL; 2115 *meta = (*meta)->b_next) { 2116 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr; 2117 2118 if (SCTP_IS_MSG_ABANDONED(*meta) || 2119 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr, 2120 sctp)) { 2121 continue; 2122 } 2123 2124 *mp = (*meta)->b_cont; 2125 goto try_bundle; 2126 } 2127 /* No more chunk to be bundled. */ 2128 break; 2129 } 2130 2131 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr; 2132 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */ 2133 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn)) 2134 break; 2135 new_len = ntohs(sdc->sdh_len); 2136 chunklen = new_len - sizeof (*sdc); 2137 2138 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0) 2139 extra = SCTP_ALIGN - extra; 2140 if ((new_len = seglen + new_len + extra) > fp->sfa_pmss) 2141 break; 2142 if ((nmp = dupmsg(*mp)) == NULL) 2143 break; 2144 2145 if (extra > 0) { 2146 fill = sctp_get_padding(extra); 2147 if (fill != NULL) { 2148 linkb(nmp, fill); 2149 } else { 2150 freemsg(nmp); 2151 break; 2152 } 2153 } 2154 linkb(head, nmp); 2155 2156 SCTP_CHUNK_CLEAR_FLAGS(nmp); 2157 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta); 2158 /* 2159 * Don't update the TSN if we are doing a Zero Win Probe. 2160 */ 2161 if (!sctp->sctp_zero_win_probe) 2162 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn); 2163 2164 seglen = new_len; 2165 *mp = (*mp)->b_next; 2166 } 2167 *packet_len = seglen; 2168 return (head); 2169 } 2170 2171 /* 2172 * sctp_ss_rexmit() is called when we get a SACK after a timeout which 2173 * advances the cum_tsn but the cum_tsn is still less than what we have sent 2174 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial" 2175 * SACK. We retransmit unacked chunks without having to wait for another 2176 * timeout. The rationale is that the SACK should not be "partial" if all the 2177 * lost chunks have been retransmitted. Since the SACK is "partial," 2178 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still 2179 * be missing. It is better for us to retransmit them now instead 2180 * of waiting for a timeout. 2181 */ 2182 void 2183 sctp_ss_rexmit(sctp_t *sctp) 2184 { 2185 mblk_t *meta; 2186 mblk_t *mp; 2187 mblk_t *pkt; 2188 sctp_faddr_t *fp; 2189 uint_t pkt_len; 2190 uint32_t tot_wnd; 2191 sctp_data_hdr_t *sdc; 2192 int burst; 2193 2194 ASSERT(!sctp->sctp_zero_win_probe); 2195 2196 /* 2197 * If the last cum ack is smaller than what we have just 2198 * retransmitted, simply return. 2199 */ 2200 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn)) 2201 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1; 2202 else 2203 return; 2204 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn)); 2205 2206 /* 2207 * After a timer fires, sctp_current should be set to the new 2208 * fp where the retransmitted chunks are sent. 2209 */ 2210 fp = sctp->sctp_current; 2211 2212 /* 2213 * Since we are retransmitting, we can only use cwnd to determine 2214 * how much we can send as we were allowed to send those chunks 2215 * previously. 2216 */ 2217 tot_wnd = fp->cwnd; 2218 /* So we have sent more than we can, just return. */ 2219 if (tot_wnd < fp->suna || tot_wnd - fp->suna < fp->sfa_pmss) 2220 return; 2221 else 2222 tot_wnd -= fp->suna; 2223 2224 /* Find the first unack'ed chunk */ 2225 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) { 2226 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr; 2227 2228 if (SCTP_IS_MSG_ABANDONED(meta) || 2229 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) { 2230 continue; 2231 } 2232 2233 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) { 2234 /* Again, this may not be possible */ 2235 if (!SCTP_CHUNK_ISSENT(mp)) 2236 return; 2237 sdc = (sctp_data_hdr_t *)mp->b_rptr; 2238 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn) 2239 goto found_msg; 2240 } 2241 } 2242 2243 /* Everything is abandoned... */ 2244 return; 2245 2246 found_msg: 2247 if (!fp->timer_running) 2248 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); 2249 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len); 2250 if (pkt == NULL) { 2251 SCTP_KSTAT(sctp_ss_rexmit_failed); 2252 return; 2253 } 2254 if ((pkt_len > fp->sfa_pmss) && fp->isv4) { 2255 ipha_t *iph = (ipha_t *)pkt->b_rptr; 2256 2257 /* 2258 * Path MTU is different from path we thought it would 2259 * be when we created chunks, or IP headers have grown. 2260 * Need to clear the DF bit. 2261 */ 2262 iph->ipha_fragment_offset_and_flags = 0; 2263 } 2264 sctp_set_iplen(sctp, pkt); 2265 sctp_add_sendq(sctp, pkt); 2266 2267 /* Check and see if there is more chunk to be retransmitted. */ 2268 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss || 2269 meta == NULL) 2270 return; 2271 if (mp == NULL) 2272 meta = meta->b_next; 2273 if (meta == NULL) 2274 return; 2275 2276 /* Retransmit another packet if the window allows. */ 2277 for (tot_wnd -= pkt_len, burst = sctp_maxburst - 1; 2278 meta != NULL && burst > 0; meta = meta->b_next, burst--) { 2279 if (mp == NULL) 2280 mp = meta->b_cont; 2281 for (; mp != NULL; mp = mp->b_next) { 2282 /* Again, this may not be possible */ 2283 if (!SCTP_CHUNK_ISSENT(mp)) 2284 return; 2285 if (!SCTP_CHUNK_ISACKED(mp)) 2286 goto found_msg; 2287 } 2288 } 2289 } 2290