xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_output.c (revision 3e5bc1d795e8c41f3680a71e3954e72d079ee46d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/stream.h>
30 #include <sys/cmn_err.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/socket.h>
34 #include <sys/stropts.h>
35 #include <sys/strsun.h>
36 #include <sys/strsubr.h>
37 #include <sys/socketvar.h>
38 #include <inet/common.h>
39 #include <inet/mi.h>
40 #include <inet/ip.h>
41 #include <inet/ip6.h>
42 #include <inet/sctp_ip.h>
43 #include <inet/ipclassifier.h>
44 
45 /*
46  * PR-SCTP comments.
47  *
48  * A message can expire before it gets to the transmit list (i.e. it is still
49  * in the unsent list - unchunked), after it gets to the transmit list, but
50  * before transmission has actually started, or after transmission has begun.
51  * Accordingly, we check for the status of a message in sctp_chunkify() when
52  * the message is being transferred from the unsent list to the transmit list;
53  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
54  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
55  * When we nuke a message in sctp_chunkify(), all we need to do is take it
56  * out of the unsent list and update sctp_unsent; when a message is deemed
57  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
58  * list, update sctp_unsent IFF transmission for the message has not yet begun
59  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
60  * message has started, then we cannot just take it out of the list, we need
61  * to send Forward TSN chunk to the peer so that the peer can clear its
62  * fragment list for this message. However, we cannot just send the Forward
63  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
64  * messages preceeding this abandoned message. So, we send a Forward TSN
65  * IFF all messages prior to this abandoned message has been SACKd, if not
66  * we defer sending the Forward TSN to sctp_cumack(), which will check for
67  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
68  * sctp_rexmit() when we check for retransmissions, we need to determine if
69  * the advanced peer ack point can be moved ahead, and if so, send a Forward
70  * TSN to the peer instead of retransmitting the chunk. Note that when
71  * we send a Forward TSN for a message, there may be yet unsent chunks for
72  * this message; we need to mark all such chunks as abandoned, so that
73  * sctp_cumack() can take the message out of the transmit list, additionally
74  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
75  * decremented when a message/chunk is deemed abandoned), sockfs needs to
76  * be notified so that it can adjust its idea of the queued message.
77  */
78 
79 #include "sctp_impl.h"
80 
81 static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
82 
83 #ifdef	DEBUG
84 static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
85 #endif
86 
87 /*
88  * Called to allocate a header mblk when sending data to SCTP.
89  * Data will follow in b_cont of this mblk.
90  */
91 mblk_t *
92 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
93     int flags)
94 {
95 	mblk_t *mp;
96 	struct T_unitdata_req *tudr;
97 	size_t size;
98 	int error;
99 
100 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
101 	size = MAX(size, sizeof (sctp_msg_hdr_t));
102 	if (flags & SCTP_CAN_BLOCK) {
103 		mp = allocb_wait(size, BPRI_MED, 0, &error);
104 	} else {
105 		mp = allocb(size, BPRI_MED);
106 	}
107 	if (mp) {
108 		tudr = (struct T_unitdata_req *)mp->b_rptr;
109 		tudr->PRIM_type = T_UNITDATA_REQ;
110 		tudr->DEST_length = nlen;
111 		tudr->DEST_offset = sizeof (*tudr);
112 		tudr->OPT_length = clen;
113 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
114 		    _TPI_ALIGN_TOPT(nlen));
115 		if (nlen > 0)
116 			bcopy(name, tudr + 1, nlen);
117 		if (clen > 0)
118 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
119 		mp->b_wptr += (tudr ->OPT_offset + clen);
120 		mp->b_datap->db_type = M_PROTO;
121 	}
122 	return (mp);
123 }
124 
125 /*ARGSUSED2*/
126 int
127 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
128 {
129 	sctp_faddr_t	*fp = NULL;
130 	struct T_unitdata_req	*tudr;
131 	int		error = 0;
132 	mblk_t		*mproto = mp;
133 	in6_addr_t	*addr;
134 	in6_addr_t	tmpaddr;
135 	uint16_t	sid = sctp->sctp_def_stream;
136 	uint32_t	ppid = sctp->sctp_def_ppid;
137 	uint32_t	context = sctp->sctp_def_context;
138 	uint16_t	msg_flags = sctp->sctp_def_flags;
139 	sctp_msg_hdr_t	*sctp_msg_hdr;
140 	uint32_t	msg_len = 0;
141 	uint32_t	timetolive = sctp->sctp_def_timetolive;
142 
143 	ASSERT(DB_TYPE(mproto) == M_PROTO);
144 
145 	mp = mp->b_cont;
146 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
147 
148 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
149 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
150 
151 	/* Get destination address, if specified */
152 	if (tudr->DEST_length > 0) {
153 		sin_t *sin;
154 		sin6_t *sin6;
155 
156 		sin = (struct sockaddr_in *)
157 		    (mproto->b_rptr + tudr->DEST_offset);
158 		switch (sin->sin_family) {
159 		case AF_INET:
160 			if (tudr->DEST_length < sizeof (*sin)) {
161 				return (EINVAL);
162 			}
163 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
164 			addr = &tmpaddr;
165 			break;
166 		case AF_INET6:
167 			if (tudr->DEST_length < sizeof (*sin6)) {
168 				return (EINVAL);
169 			}
170 			sin6 = (struct sockaddr_in6 *)
171 			    (mproto->b_rptr + tudr->DEST_offset);
172 			addr = &sin6->sin6_addr;
173 			break;
174 		default:
175 			return (EAFNOSUPPORT);
176 		}
177 		fp = sctp_lookup_faddr(sctp, addr);
178 		if (fp == NULL) {
179 			return (EINVAL);
180 		}
181 	}
182 	/* Ancillary Data? */
183 	if (tudr->OPT_length > 0) {
184 		struct cmsghdr		*cmsg;
185 		char			*cend;
186 		struct sctp_sndrcvinfo	*sndrcv;
187 
188 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
189 		cend = ((char *)cmsg + tudr->OPT_length);
190 		ASSERT(cend <= (char *)mproto->b_wptr);
191 
192 		for (;;) {
193 			if ((char *)(cmsg + 1) > cend ||
194 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
195 				break;
196 			}
197 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
198 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
199 				if (cmsg->cmsg_len <
200 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
201 					return (EINVAL);
202 				}
203 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
204 				sid = sndrcv->sinfo_stream;
205 				msg_flags = sndrcv->sinfo_flags;
206 				ppid = sndrcv->sinfo_ppid;
207 				context = sndrcv->sinfo_context;
208 				timetolive = sndrcv->sinfo_timetolive;
209 				break;
210 			}
211 			if (cmsg->cmsg_len > 0)
212 				cmsg = CMSG_NEXT(cmsg);
213 			else
214 				break;
215 		}
216 	}
217 	if (msg_flags & MSG_ABORT) {
218 		if (mp && mp->b_cont) {
219 			mblk_t *pump = msgpullup(mp, -1);
220 			if (!pump) {
221 				return (ENOMEM);
222 			}
223 			freemsg(mp);
224 			mp = pump;
225 			mproto->b_cont = mp;
226 		}
227 		RUN_SCTP(sctp);
228 		sctp_user_abort(sctp, mp);
229 		freemsg(mproto);
230 		goto process_sendq;
231 	}
232 	if (mp == NULL)
233 		goto done;
234 
235 	RUN_SCTP(sctp);
236 
237 	/* Reject any new data requests if we are shutting down */
238 	if (sctp->sctp_state > SCTPS_ESTABLISHED ||
239 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
240 		error = EPIPE;
241 		goto unlock_done;
242 	}
243 
244 	/* Re-use the mproto to store relevant info. */
245 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
246 
247 	mproto->b_rptr = mproto->b_datap->db_base;
248 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
249 
250 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
251 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
252 	sctp_msg_hdr->smh_context = context;
253 	sctp_msg_hdr->smh_sid = sid;
254 	sctp_msg_hdr->smh_ppid = ppid;
255 	sctp_msg_hdr->smh_flags = msg_flags;
256 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
257 	sctp_msg_hdr->smh_tob = lbolt64;
258 	for (; mp != NULL; mp = mp->b_cont)
259 		msg_len += MBLKL(mp);
260 	sctp_msg_hdr->smh_msglen = msg_len;
261 
262 	/* User requested specific destination */
263 	SCTP_SET_CHUNK_DEST(mproto, fp);
264 
265 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
266 	    sid >= sctp->sctp_num_ostr) {
267 		/* Send sendfail event */
268 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
269 		    B_FALSE);
270 		error = EINVAL;
271 		goto unlock_done;
272 	}
273 
274 	/* no data */
275 	if (msg_len == 0) {
276 		sctp_sendfail_event(sctp, dupmsg(mproto),
277 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
278 		error = EINVAL;
279 		goto unlock_done;
280 	}
281 
282 	/* Add it to the unsent list */
283 	if (sctp->sctp_xmit_unsent == NULL) {
284 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
285 	} else {
286 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
287 		sctp->sctp_xmit_unsent_tail = mproto;
288 	}
289 	sctp->sctp_unsent += msg_len;
290 	BUMP_LOCAL(sctp->sctp_msgcount);
291 	/*
292 	 * Notify sockfs if the tx queue is full.
293 	 */
294 	if (SCTP_TXQ_LEN(sctp) >= sctp->sctp_xmit_hiwater) {
295 		sctp->sctp_txq_full = 1;
296 		sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE);
297 	}
298 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
299 		sctp_output(sctp, UINT_MAX);
300 process_sendq:
301 	WAKE_SCTP(sctp);
302 	sctp_process_sendq(sctp);
303 	return (0);
304 unlock_done:
305 	WAKE_SCTP(sctp);
306 done:
307 	return (error);
308 }
309 
310 void
311 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send)
312 {
313 	mblk_t			*mp;
314 	mblk_t			*chunk_mp;
315 	mblk_t			*chunk_head;
316 	mblk_t			*chunk_hdr;
317 	mblk_t			*chunk_tail = NULL;
318 	int			count;
319 	int			chunksize;
320 	sctp_data_hdr_t		*sdc;
321 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
322 	sctp_faddr_t		*fp;
323 	sctp_faddr_t		*fp1;
324 	size_t			xtralen;
325 	sctp_msg_hdr_t		*msg_hdr;
326 	sctp_stack_t	*sctps = sctp->sctp_sctps;
327 
328 	fp = SCTP_CHUNK_DEST(mdblk);
329 	if (fp == NULL)
330 		fp = sctp->sctp_current;
331 	if (fp->isv4)
332 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
333 		    sizeof (*sdc);
334 	else
335 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
336 		    sizeof (*sdc);
337 	count = chunksize = first_len - sizeof (*sdc);
338 nextmsg:
339 	chunk_mp = mdblk->b_cont;
340 
341 	/*
342 	 * If this partially chunked, we ignore the first_len for now
343 	 * and use the one already present. For the unchunked bits, we
344 	 * use the length of the last chunk.
345 	 */
346 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
347 		int	chunk_len;
348 
349 		ASSERT(chunk_mp->b_next != NULL);
350 		mdblk->b_cont = chunk_mp->b_next;
351 		chunk_mp->b_next = NULL;
352 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
353 		mp = mdblk->b_cont;
354 		while (mp->b_next != NULL)
355 			mp = mp->b_next;
356 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
357 		if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
358 			count = chunksize = fp->sfa_pmss - chunk_len;
359 		else
360 			count = chunksize = fp->sfa_pmss;
361 		count = chunksize = count - sizeof (*sdc);
362 	} else {
363 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
364 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
365 			sctp->sctp_xmit_unsent = mdblk->b_next;
366 			if (sctp->sctp_xmit_unsent == NULL)
367 				sctp->sctp_xmit_unsent_tail = NULL;
368 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
369 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
370 			mdblk->b_next = NULL;
371 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
372 			/*
373 			 * Update ULP the amount of queued data, which is
374 			 * sent-unack'ed + unsent.
375 			 */
376 			if (!SCTP_IS_DETACHED(sctp))
377 				SCTP_TXQ_UPDATE(sctp);
378 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
379 			goto try_next;
380 		}
381 		mdblk->b_cont = NULL;
382 	}
383 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
384 nextchunk:
385 	chunk_head = chunk_mp;
386 	chunk_tail = NULL;
387 
388 	/* Skip as many mblk's as we need */
389 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
390 		count -= MBLKL(chunk_mp);
391 		chunk_tail = chunk_mp;
392 		chunk_mp = chunk_mp->b_cont;
393 	}
394 	/* Split the chain, if needed */
395 	if (chunk_mp != NULL) {
396 		if (count > 0) {
397 			mblk_t	*split_mp = dupb(chunk_mp);
398 
399 			if (split_mp == NULL) {
400 				if (mdblk->b_cont == NULL) {
401 					mdblk->b_cont = chunk_head;
402 				} else  {
403 					SCTP_MSG_SET_CHUNKED(mdblk);
404 					ASSERT(chunk_head->b_next == NULL);
405 					chunk_head->b_next = mdblk->b_cont;
406 					mdblk->b_cont = chunk_head;
407 				}
408 				return;
409 			}
410 			if (chunk_tail != NULL) {
411 				chunk_tail->b_cont = split_mp;
412 				chunk_tail = chunk_tail->b_cont;
413 			} else {
414 				chunk_head = chunk_tail = split_mp;
415 			}
416 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
417 			chunk_mp->b_rptr = chunk_tail->b_wptr;
418 			count = 0;
419 		} else if (chunk_tail == NULL) {
420 			goto next;
421 		} else {
422 			chunk_tail->b_cont = NULL;
423 		}
424 	}
425 	/* Alloc chunk hdr, if needed */
426 	if (DB_REF(chunk_head) > 1 ||
427 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
428 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
429 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
430 			if (mdblk->b_cont == NULL) {
431 				if (chunk_mp != NULL)
432 					linkb(chunk_head, chunk_mp);
433 				mdblk->b_cont = chunk_head;
434 			} else {
435 				SCTP_MSG_SET_CHUNKED(mdblk);
436 				if (chunk_mp != NULL)
437 					linkb(chunk_head, chunk_mp);
438 				ASSERT(chunk_head->b_next == NULL);
439 				chunk_head->b_next = mdblk->b_cont;
440 				mdblk->b_cont = chunk_head;
441 			}
442 			return;
443 		}
444 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
445 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
446 		chunk_hdr->b_cont = chunk_head;
447 	} else {
448 		chunk_hdr = chunk_head;
449 		chunk_hdr->b_rptr -= sizeof (*sdc);
450 	}
451 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
452 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
453 	sdc->sdh_id = CHUNK_DATA;
454 	sdc->sdh_flags = 0;
455 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
456 	ASSERT(sdc->sdh_len);
457 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
458 	/*
459 	 * We defer assigning the SSN just before sending the chunk, else
460 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
461 	 * to send a Forward TSN to let the peer know. Some more comments
462 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
463 	 */
464 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
465 
466 	if (mdblk->b_cont == NULL) {
467 		mdblk->b_cont = chunk_hdr;
468 		SCTP_DATA_SET_BBIT(sdc);
469 	} else {
470 		mp = mdblk->b_cont;
471 		while (mp->b_next != NULL)
472 			mp = mp->b_next;
473 		mp->b_next = chunk_hdr;
474 	}
475 
476 	bytes_to_send -= (chunksize - count);
477 	if (chunk_mp != NULL) {
478 next:
479 		count = chunksize = fp->sfa_pmss - sizeof (*sdc);
480 		goto nextchunk;
481 	}
482 	SCTP_DATA_SET_EBIT(sdc);
483 	sctp->sctp_xmit_unsent = mdblk->b_next;
484 	if (mdblk->b_next == NULL) {
485 		sctp->sctp_xmit_unsent_tail = NULL;
486 	}
487 	mdblk->b_next = NULL;
488 
489 	if (sctp->sctp_xmit_tail == NULL) {
490 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
491 	} else {
492 		mp = sctp->sctp_xmit_tail;
493 		while (mp->b_next != NULL)
494 			mp = mp->b_next;
495 		mp->b_next = mdblk;
496 		mdblk->b_prev = mp;
497 	}
498 try_next:
499 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
500 		mdblk = sctp->sctp_xmit_unsent;
501 		fp1 = SCTP_CHUNK_DEST(mdblk);
502 		if (fp1 == NULL)
503 			fp1 = sctp->sctp_current;
504 		if (fp == fp1) {
505 			size_t len = MBLKL(mdblk->b_cont);
506 			if ((count > 0) &&
507 			    ((len > fp->sfa_pmss - sizeof (*sdc)) ||
508 			    (len <= count))) {
509 				count -= sizeof (*sdc);
510 				count = chunksize = count - (count & 0x3);
511 			} else {
512 				count = chunksize = fp->sfa_pmss -
513 				    sizeof (*sdc);
514 			}
515 		} else {
516 			if (fp1->isv4)
517 				xtralen = sctp->sctp_hdr_len;
518 			else
519 				xtralen = sctp->sctp_hdr6_len;
520 			xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
521 			count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
522 			fp = fp1;
523 		}
524 		goto nextmsg;
525 	}
526 }
527 
528 void
529 sctp_free_msg(mblk_t *ump)
530 {
531 	mblk_t *mp, *nmp;
532 
533 	for (mp = ump->b_cont; mp; mp = nmp) {
534 		nmp = mp->b_next;
535 		mp->b_next = mp->b_prev = NULL;
536 		freemsg(mp);
537 	}
538 	ASSERT(!ump->b_prev);
539 	ump->b_next = NULL;
540 	freeb(ump);
541 }
542 
543 mblk_t *
544 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
545     int *error)
546 {
547 	int hdrlen;
548 	char *hdr;
549 	int isv4 = fp->isv4;
550 	sctp_stack_t	*sctps = sctp->sctp_sctps;
551 
552 	if (error != NULL)
553 		*error = 0;
554 
555 	if (isv4) {
556 		hdrlen = sctp->sctp_hdr_len;
557 		hdr = sctp->sctp_iphc;
558 	} else {
559 		hdrlen = sctp->sctp_hdr6_len;
560 		hdr = sctp->sctp_iphc6;
561 	}
562 	/*
563 	 * A null fp->ire could mean that the address is 'down'. Similarly,
564 	 * it is possible that the address went down, we tried to send an
565 	 * heartbeat and ended up setting fp->saddr as unspec because we
566 	 * didn't have any usable source address.  In either case
567 	 * sctp_get_ire() will try find an IRE, if available, and set
568 	 * the source address, if needed.  If we still don't have any
569 	 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and
570 	 * we return EHOSTUNREACH.
571 	 */
572 	if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
573 		sctp_get_ire(sctp, fp);
574 		if (fp->state == SCTP_FADDRS_UNREACH) {
575 			if (error != NULL)
576 				*error = EHOSTUNREACH;
577 			return (NULL);
578 		}
579 	}
580 	/* Copy in IP header. */
581 	if ((mp->b_rptr - mp->b_datap->db_base) <
582 	    (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 ||
583 	    !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) {
584 		mblk_t *nmp;
585 
586 		/*
587 		 * This can happen if IP headers are adjusted after
588 		 * data was moved into chunks, or during retransmission,
589 		 * or things like snoop is running.
590 		 */
591 		nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen,
592 		    CONN_CRED(sctp->sctp_connp));
593 		if (nmp == NULL) {
594 			if (error !=  NULL)
595 				*error = ENOMEM;
596 			return (NULL);
597 		}
598 		nmp->b_rptr += sctps->sctps_wroff_xtra;
599 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
600 		nmp->b_cont = mp;
601 		mp = nmp;
602 	} else {
603 		mp->b_rptr -= (hdrlen + sacklen);
604 		mblk_setcred(mp, CONN_CRED(sctp->sctp_connp));
605 	}
606 	bcopy(hdr, mp->b_rptr, hdrlen);
607 	if (sacklen) {
608 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
609 	}
610 	if (fp != sctp->sctp_current) {
611 		/* change addresses in header */
612 		if (isv4) {
613 			ipha_t *iph = (ipha_t *)mp->b_rptr;
614 
615 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
616 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
617 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
618 				    iph->ipha_src);
619 			} else if (sctp->sctp_bound_to_all) {
620 				iph->ipha_src = INADDR_ANY;
621 			}
622 		} else {
623 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
624 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
625 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
626 			} else if (sctp->sctp_bound_to_all) {
627 				V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src);
628 			}
629 		}
630 	}
631 	/*
632 	 * IP will not free this IRE if it is condemned.  SCTP needs to
633 	 * free it.
634 	 */
635 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
636 		IRE_REFRELE_NOTR(fp->ire);
637 		fp->ire = NULL;
638 	}
639 
640 	/* Stash the conn and ire ptr info for IP */
641 	SCTP_STASH_IPINFO(mp, fp->ire);
642 
643 	return (mp);
644 }
645 
646 /*
647  * SCTP requires every chunk to be padded so that the total length
648  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
649  * the specified pad length.
650  */
651 static mblk_t *
652 sctp_get_padding(sctp_t *sctp, int pad)
653 {
654 	mblk_t *fill;
655 
656 	ASSERT(pad < SCTP_ALIGN);
657 	ASSERT(sctp->sctp_pad_mp != NULL);
658 	if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) {
659 		fill->b_wptr += pad;
660 		return (fill);
661 	}
662 
663 	/*
664 	 * The memory saving path of reusing the sctp_pad_mp
665 	 * fails may be because it has been dupb() too
666 	 * many times (DBLK_REFMAX).  Use the memory consuming
667 	 * path of allocating the pad mblk.
668 	 */
669 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
670 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
671 		*(int32_t *)fill->b_rptr = 0;
672 		fill->b_wptr += pad;
673 	}
674 	return (fill);
675 }
676 
677 static mblk_t *
678 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
679 {
680 	mblk_t		*meta;
681 	mblk_t		*start_mp = NULL;
682 	mblk_t		*end_mp = NULL;
683 	mblk_t		*mp, *nmp;
684 	mblk_t		*fill;
685 	sctp_data_hdr_t	*sdh;
686 	int		msglen;
687 	int		extra;
688 	sctp_msg_hdr_t	*msg_hdr;
689 	sctp_faddr_t	*old_fp = NULL;
690 	sctp_faddr_t	*chunk_fp;
691 	sctp_stack_t	*sctps = sctp->sctp_sctps;
692 
693 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
694 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
695 		if (SCTP_IS_MSG_ABANDONED(meta) ||
696 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
697 			continue;
698 		}
699 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
700 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
701 				/*
702 				 * Use the same peer address to do fast
703 				 * retransmission.  If the original peer
704 				 * address is dead, switch to the current
705 				 * one.  Record the old one so that we
706 				 * will pick the chunks sent to the old
707 				 * one for fast retransmission.
708 				 */
709 				chunk_fp = SCTP_CHUNK_DEST(mp);
710 				if (*fp == NULL) {
711 					*fp = chunk_fp;
712 					if ((*fp)->state != SCTP_FADDRS_ALIVE) {
713 						old_fp = *fp;
714 						*fp = sctp->sctp_current;
715 					}
716 				} else if (old_fp == NULL && *fp != chunk_fp) {
717 					continue;
718 				} else if (old_fp != NULL &&
719 				    old_fp != chunk_fp) {
720 					continue;
721 				}
722 
723 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
724 				msglen = ntohs(sdh->sdh_len);
725 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
726 					extra = SCTP_ALIGN - extra;
727 				}
728 
729 				/*
730 				 * We still return at least the first message
731 				 * even if that message cannot fit in as
732 				 * PMTU may have changed.
733 				 */
734 				if (*total + msglen + extra >
735 				    (*fp)->sfa_pmss && start_mp != NULL) {
736 					return (start_mp);
737 				}
738 				if ((nmp = dupmsg(mp)) == NULL)
739 					return (start_mp);
740 				if (extra > 0) {
741 					fill = sctp_get_padding(sctp, extra);
742 					if (fill != NULL) {
743 						linkb(nmp, fill);
744 					} else {
745 						return (start_mp);
746 					}
747 				}
748 				BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans);
749 				BUMP_LOCAL(sctp->sctp_rxtchunks);
750 				SCTP_CHUNK_CLEAR_REXMIT(mp);
751 				if (start_mp == NULL) {
752 					start_mp = nmp;
753 				} else {
754 					linkb(end_mp, nmp);
755 				}
756 				end_mp = nmp;
757 				*total += msglen + extra;
758 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
759 				    "tsn %x\n", sdh->sdh_tsn));
760 			}
761 		}
762 	}
763 	/* Clear the flag as there is no more message to be fast rexmitted. */
764 	sctp->sctp_chk_fast_rexmit = B_FALSE;
765 	return (start_mp);
766 }
767 
768 /* A debug function just to make sure that a mblk chain is not broken */
769 #ifdef	DEBUG
770 static boolean_t
771 sctp_verify_chain(mblk_t *head, mblk_t *tail)
772 {
773 	mblk_t	*mp = head;
774 
775 	if (head == NULL || tail == NULL)
776 		return (B_TRUE);
777 	while (mp != NULL) {
778 		if (mp == tail)
779 			return (B_TRUE);
780 		mp = mp->b_next;
781 	}
782 	return (B_FALSE);
783 }
784 #endif
785 
786 /*
787  * Gets the next unsent chunk to transmit. Messages that are abandoned are
788  * skipped. A message can be abandoned if it has a non-zero timetolive and
789  * transmission has not yet started or if it is a partially reliable
790  * message and its time is up (assuming we are PR-SCTP aware).
791  * 'cansend' is used to determine if need to try and chunkify messages from
792  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
793  *
794  * firstseg indicates the space already used, cansend represents remaining
795  * space in the window, ((sfa_pmss - firstseg) can therefore reasonably
796  * be used to compute the cansend arg).
797  */
798 mblk_t *
799 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
800     int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp)
801 {
802 	mblk_t		*mp1;
803 	sctp_msg_hdr_t	*msg_hdr;
804 	mblk_t		*tmp_meta;
805 	sctp_faddr_t	*fp1;
806 
807 	ASSERT(error != NULL && mp != NULL);
808 	*error = 0;
809 
810 	ASSERT(sctp->sctp_current != NULL);
811 
812 chunkified:
813 	while (meta != NULL) {
814 		tmp_meta = meta->b_next;
815 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
816 		mp1 = meta->b_cont;
817 		if (SCTP_IS_MSG_ABANDONED(meta))
818 			goto next_msg;
819 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
820 			while (mp1 != NULL) {
821 				if (SCTP_CHUNK_CANSEND(mp1)) {
822 					*mp = mp1;
823 #ifdef	DEBUG
824 					ASSERT(sctp_verify_chain(
825 					    sctp->sctp_xmit_head, meta));
826 #endif
827 					return (meta);
828 				}
829 				mp1 = mp1->b_next;
830 			}
831 			goto next_msg;
832 		}
833 		/*
834 		 * If we come here and the first chunk is sent, then we
835 		 * we are PR-SCTP aware, in which case if the cumulative
836 		 * TSN has moved upto or beyond the first chunk (which
837 		 * means all the previous messages have been cumulative
838 		 * SACK'd), then we send a Forward TSN with the last
839 		 * chunk that was sent in this message. If we can't send
840 		 * a Forward TSN because previous non-abandoned messages
841 		 * have not been acked then we will defer the Forward TSN
842 		 * to sctp_rexmit() or sctp_cumack().
843 		 */
844 		if (SCTP_CHUNK_ISSENT(mp1)) {
845 			*error = sctp_check_abandoned_msg(sctp, meta);
846 			if (*error != 0) {
847 #ifdef	DEBUG
848 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
849 				    sctp->sctp_xmit_tail));
850 #endif
851 				return (NULL);
852 			}
853 			goto next_msg;
854 		}
855 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
856 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
857 		if (meta->b_prev == NULL) {
858 			ASSERT(sctp->sctp_xmit_head == meta);
859 			sctp->sctp_xmit_head = tmp_meta;
860 			if (sctp->sctp_xmit_tail == meta)
861 				sctp->sctp_xmit_tail = tmp_meta;
862 			meta->b_next = NULL;
863 			if (tmp_meta != NULL)
864 				tmp_meta->b_prev = NULL;
865 		} else if (meta->b_next == NULL) {
866 			if (sctp->sctp_xmit_tail == meta)
867 				sctp->sctp_xmit_tail = meta->b_prev;
868 			meta->b_prev->b_next = NULL;
869 			meta->b_prev = NULL;
870 		} else {
871 			meta->b_prev->b_next = tmp_meta;
872 			tmp_meta->b_prev = meta->b_prev;
873 			if (sctp->sctp_xmit_tail == meta)
874 				sctp->sctp_xmit_tail = tmp_meta;
875 			meta->b_prev = NULL;
876 			meta->b_next = NULL;
877 		}
878 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
879 		/*
880 		 * Update ULP the amount of queued data, which is
881 		 * sent-unack'ed + unsent.
882 		 */
883 		if (!SCTP_IS_DETACHED(sctp))
884 			SCTP_TXQ_UPDATE(sctp);
885 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
886 next_msg:
887 		meta = tmp_meta;
888 	}
889 	/* chunkify, if needed */
890 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
891 		ASSERT(sctp->sctp_unsent > 0);
892 		if (fp == NULL) {
893 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
894 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
895 				fp = sctp->sctp_current;
896 		} else {
897 			/*
898 			 * If user specified destination, try to honor that.
899 			 */
900 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
901 			if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
902 			    fp1 != fp) {
903 				goto chunk_done;
904 			}
905 		}
906 		sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend);
907 		if ((meta = sctp->sctp_xmit_tail) == NULL)
908 			goto chunk_done;
909 		/*
910 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
911 		 * new chunk(s) to the tail, so we need to skip the
912 		 * sctp_xmit_tail, which would have already been processed.
913 		 * This could happen when there is unacked chunks, but
914 		 * nothing new to send.
915 		 * When sctp_chunkify() is called when the transmit queue
916 		 * is empty then we need to start from sctp_xmit_tail.
917 		 */
918 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
919 #ifdef	DEBUG
920 			mp1 = sctp->sctp_xmit_tail->b_cont;
921 			while (mp1 != NULL) {
922 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
923 				mp1 = mp1->b_next;
924 			}
925 #endif
926 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
927 				goto chunk_done;
928 		}
929 		goto chunkified;
930 	}
931 chunk_done:
932 #ifdef	DEBUG
933 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
934 #endif
935 	return (NULL);
936 }
937 
938 void
939 sctp_fast_rexmit(sctp_t *sctp)
940 {
941 	mblk_t		*mp, *head;
942 	int		pktlen = 0;
943 	sctp_faddr_t	*fp = NULL;
944 	sctp_stack_t	*sctps = sctp->sctp_sctps;
945 
946 	ASSERT(sctp->sctp_xmit_head != NULL);
947 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
948 	if (mp == NULL) {
949 		SCTP_KSTAT(sctps, sctp_fr_not_found);
950 		return;
951 	}
952 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
953 		freemsg(mp);
954 		SCTP_KSTAT(sctps, sctp_fr_add_hdr);
955 		return;
956 	}
957 	if ((pktlen > fp->sfa_pmss) && fp->isv4) {
958 		ipha_t *iph = (ipha_t *)head->b_rptr;
959 
960 		iph->ipha_fragment_offset_and_flags = 0;
961 	}
962 
963 	sctp_set_iplen(sctp, head);
964 	sctp_add_sendq(sctp, head);
965 	sctp->sctp_active = fp->lastactive = lbolt64;
966 }
967 
968 void
969 sctp_output(sctp_t *sctp, uint_t num_pkt)
970 {
971 	mblk_t			*mp = NULL;
972 	mblk_t			*nmp;
973 	mblk_t			*head;
974 	mblk_t			*meta = sctp->sctp_xmit_tail;
975 	mblk_t			*fill = NULL;
976 	uint16_t 		chunklen;
977 	uint32_t 		cansend;
978 	int32_t			seglen;
979 	int32_t			xtralen;
980 	int32_t			sacklen;
981 	int32_t			pad = 0;
982 	int32_t			pathmax;
983 	int			extra;
984 	int64_t			now = lbolt64;
985 	sctp_faddr_t		*fp;
986 	sctp_faddr_t		*lfp;
987 	sctp_data_hdr_t		*sdc;
988 	int			error;
989 	boolean_t		notsent = B_TRUE;
990 	sctp_stack_t		*sctps = sctp->sctp_sctps;
991 
992 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
993 		sacklen = 0;
994 	} else {
995 		/* send a SACK chunk */
996 		sacklen = sizeof (sctp_chunk_hdr_t) +
997 		    sizeof (sctp_sack_chunk_t) +
998 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
999 		lfp = sctp->sctp_lastdata;
1000 		ASSERT(lfp != NULL);
1001 		if (lfp->state != SCTP_FADDRS_ALIVE)
1002 			lfp = sctp->sctp_current;
1003 	}
1004 
1005 	cansend = sctp->sctp_frwnd;
1006 	if (sctp->sctp_unsent < cansend)
1007 		cansend = sctp->sctp_unsent;
1008 
1009 	/*
1010 	 * Start persist timer if unable to send or when
1011 	 * trying to send into a zero window. This timer
1012 	 * ensures the blocked send attempt is retried.
1013 	 */
1014 	if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
1015 	    (sctp->sctp_unacked != 0) &&
1016 	    (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
1017 	    !sctp->sctp_ndelay ||
1018 	    (cansend == 0 && sctp->sctp_unacked == 0 &&
1019 	    sctp->sctp_unsent != 0)) {
1020 		head = NULL;
1021 		fp = sctp->sctp_current;
1022 		goto unsent_data;
1023 	}
1024 	if (meta != NULL)
1025 		mp = meta->b_cont;
1026 	while (cansend > 0 && num_pkt-- != 0) {
1027 		pad = 0;
1028 
1029 		/*
1030 		 * Find first segment eligible for transmit.
1031 		 */
1032 		while (mp != NULL) {
1033 			if (SCTP_CHUNK_CANSEND(mp))
1034 				break;
1035 			mp = mp->b_next;
1036 		}
1037 		if (mp == NULL) {
1038 			meta = sctp_get_msg_to_send(sctp, &mp,
1039 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
1040 			    cansend, NULL);
1041 			if (error != 0 || meta == NULL) {
1042 				head = NULL;
1043 				fp = sctp->sctp_current;
1044 				goto unsent_data;
1045 			}
1046 			sctp->sctp_xmit_tail =  meta;
1047 		}
1048 
1049 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1050 		seglen = ntohs(sdc->sdh_len);
1051 		xtralen = sizeof (*sdc);
1052 		chunklen = seglen - xtralen;
1053 
1054 		/*
1055 		 * Check rwnd.
1056 		 */
1057 		if (chunklen > cansend) {
1058 			head = NULL;
1059 			fp = SCTP_CHUNK_DEST(meta);
1060 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1061 				fp = sctp->sctp_current;
1062 			goto unsent_data;
1063 		}
1064 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1065 			extra = SCTP_ALIGN - extra;
1066 
1067 		/*
1068 		 * Pick destination address, and check cwnd.
1069 		 */
1070 		if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
1071 		    (seglen + sacklen + extra <= lfp->sfa_pmss)) {
1072 			/*
1073 			 * Only include SACK chunk if it can be bundled
1074 			 * with a data chunk, and sent to sctp_lastdata.
1075 			 */
1076 			pathmax = lfp->cwnd - lfp->suna;
1077 
1078 			fp = lfp;
1079 			if ((nmp = dupmsg(mp)) == NULL) {
1080 				head = NULL;
1081 				goto unsent_data;
1082 			}
1083 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1084 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1085 			    &error);
1086 			if (head == NULL) {
1087 				/*
1088 				 * If none of the source addresses are
1089 				 * available (i.e error == EHOSTUNREACH),
1090 				 * pretend we have sent the data. We will
1091 				 * eventually time out trying to retramsmit
1092 				 * the data if the interface never comes up.
1093 				 * If we have already sent some stuff (i.e.,
1094 				 * notsent is B_FALSE) then we are fine, else
1095 				 * just mark this packet as sent.
1096 				 */
1097 				if (notsent && error == EHOSTUNREACH) {
1098 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1099 					    fp, chunklen, meta);
1100 				}
1101 				freemsg(nmp);
1102 				SCTP_KSTAT(sctps, sctp_output_failed);
1103 				goto unsent_data;
1104 			}
1105 			seglen += sacklen;
1106 			xtralen += sacklen;
1107 			sacklen = 0;
1108 		} else {
1109 			fp = SCTP_CHUNK_DEST(meta);
1110 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1111 				fp = sctp->sctp_current;
1112 			/*
1113 			 * If we haven't sent data to this destination for
1114 			 * a while, do slow start again.
1115 			 */
1116 			if (now - fp->lastactive > fp->rto) {
1117 				SET_CWND(fp, fp->sfa_pmss,
1118 				    sctps->sctps_slow_start_after_idle);
1119 			}
1120 
1121 			pathmax = fp->cwnd - fp->suna;
1122 			if (seglen + extra > pathmax) {
1123 				head = NULL;
1124 				goto unsent_data;
1125 			}
1126 			if ((nmp = dupmsg(mp)) == NULL) {
1127 				head = NULL;
1128 				goto unsent_data;
1129 			}
1130 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1131 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1132 			if (head == NULL) {
1133 				/*
1134 				 * If none of the source addresses are
1135 				 * available (i.e error == EHOSTUNREACH),
1136 				 * pretend we have sent the data. We will
1137 				 * eventually time out trying to retramsmit
1138 				 * the data if the interface never comes up.
1139 				 * If we have already sent some stuff (i.e.,
1140 				 * notsent is B_FALSE) then we are fine, else
1141 				 * just mark this packet as sent.
1142 				 */
1143 				if (notsent && error == EHOSTUNREACH) {
1144 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1145 					    fp, chunklen, meta);
1146 				}
1147 				freemsg(nmp);
1148 				SCTP_KSTAT(sctps, sctp_output_failed);
1149 				goto unsent_data;
1150 			}
1151 		}
1152 		fp->lastactive = now;
1153 		if (pathmax > fp->sfa_pmss)
1154 			pathmax = fp->sfa_pmss;
1155 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1156 		mp = mp->b_next;
1157 
1158 		/* Use this chunk to measure RTT? */
1159 		if (sctp->sctp_out_time == 0) {
1160 			sctp->sctp_out_time = now;
1161 			sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
1162 			ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn));
1163 		}
1164 		if (extra > 0) {
1165 			fill = sctp_get_padding(sctp, extra);
1166 			if (fill != NULL) {
1167 				linkb(head, fill);
1168 				pad = extra;
1169 				seglen += extra;
1170 			} else {
1171 				goto unsent_data;
1172 			}
1173 		}
1174 		/* See if we can bundle more. */
1175 		while (seglen < pathmax) {
1176 			int32_t		new_len;
1177 			int32_t		new_xtralen;
1178 
1179 			while (mp != NULL) {
1180 				if (SCTP_CHUNK_CANSEND(mp))
1181 					break;
1182 				mp = mp->b_next;
1183 			}
1184 			if (mp == NULL) {
1185 				meta = sctp_get_msg_to_send(sctp, &mp,
1186 				    meta->b_next, &error, seglen,
1187 				    (seglen - xtralen) >= cansend ? 0 :
1188 				    cansend - seglen, fp);
1189 				if (error != 0 || meta == NULL)
1190 					break;
1191 				sctp->sctp_xmit_tail =  meta;
1192 			}
1193 			ASSERT(mp != NULL);
1194 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1195 			    fp != SCTP_CHUNK_DEST(meta)) {
1196 				break;
1197 			}
1198 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1199 			chunklen = ntohs(sdc->sdh_len);
1200 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
1201 				extra = SCTP_ALIGN - extra;
1202 
1203 			new_len = seglen + chunklen;
1204 			new_xtralen = xtralen + sizeof (*sdc);
1205 			chunklen -= sizeof (*sdc);
1206 
1207 			if (new_len - new_xtralen > cansend ||
1208 			    new_len + extra > pathmax) {
1209 				break;
1210 			}
1211 			if ((nmp = dupmsg(mp)) == NULL)
1212 				break;
1213 			if (extra > 0) {
1214 				fill = sctp_get_padding(sctp, extra);
1215 				if (fill != NULL) {
1216 					pad += extra;
1217 					new_len += extra;
1218 					linkb(nmp, fill);
1219 				} else {
1220 					freemsg(nmp);
1221 					break;
1222 				}
1223 			}
1224 			seglen = new_len;
1225 			xtralen = new_xtralen;
1226 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1227 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1228 			linkb(head, nmp);
1229 			mp = mp->b_next;
1230 		}
1231 		if ((seglen > fp->sfa_pmss) && fp->isv4) {
1232 			ipha_t *iph = (ipha_t *)head->b_rptr;
1233 
1234 			/*
1235 			 * Path MTU is different from what we thought it would
1236 			 * be when we created chunks, or IP headers have grown.
1237 			 * Need to clear the DF bit.
1238 			 */
1239 			iph->ipha_fragment_offset_and_flags = 0;
1240 		}
1241 		/* xmit segment */
1242 		ASSERT(cansend >= seglen - pad - xtralen);
1243 		cansend -= (seglen - pad - xtralen);
1244 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1245 		    "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1246 		    seglen - xtralen, ntohl(sdc->sdh_tsn),
1247 		    ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1248 		    cansend, sctp->sctp_lastack_rxd));
1249 		sctp_set_iplen(sctp, head);
1250 		sctp_add_sendq(sctp, head);
1251 		/* arm rto timer (if not set) */
1252 		if (!fp->timer_running)
1253 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1254 		notsent = B_FALSE;
1255 	}
1256 	sctp->sctp_active = now;
1257 	return;
1258 unsent_data:
1259 	/* arm persist timer (if rto timer not set) */
1260 	if (!fp->timer_running)
1261 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1262 	if (head != NULL)
1263 		freemsg(head);
1264 }
1265 
1266 /*
1267  * The following two functions initialize and destroy the cache
1268  * associated with the sets used for PR-SCTP.
1269  */
1270 void
1271 sctp_ftsn_sets_init(void)
1272 {
1273 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1274 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1275 	    NULL, 0);
1276 }
1277 
1278 void
1279 sctp_ftsn_sets_fini(void)
1280 {
1281 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1282 }
1283 
1284 
1285 /* Free PR-SCTP sets */
1286 void
1287 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1288 {
1289 	sctp_ftsn_set_t *p;
1290 
1291 	while (s != NULL) {
1292 		p = s->next;
1293 		s->next = NULL;
1294 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1295 		s = p;
1296 	}
1297 }
1298 
1299 /*
1300  * Given a message meta block, meta, this routine creates or modifies
1301  * the set that will be used to generate a Forward TSN chunk. If the
1302  * entry for stream id, sid, for this message already exists, the
1303  * sequence number, ssn, is updated if it is greater than the existing
1304  * one. If an entry for this sid does not exist, one is created if
1305  * the size does not exceed fp->sfa_pmss. We return false in case
1306  * or an error.
1307  */
1308 boolean_t
1309 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1310     uint_t *nsets, uint32_t *slen)
1311 {
1312 	sctp_ftsn_set_t		*p;
1313 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1314 	uint16_t		sid = htons(msg_hdr->smh_sid);
1315 	/* msg_hdr->smh_ssn is already in NBO */
1316 	uint16_t		ssn = msg_hdr->smh_ssn;
1317 
1318 	ASSERT(s != NULL && nsets != NULL);
1319 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1320 
1321 	if (*s == NULL) {
1322 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
1323 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1324 		if (*s == NULL)
1325 			return (B_FALSE);
1326 		(*s)->ftsn_entries.ftsn_sid = sid;
1327 		(*s)->ftsn_entries.ftsn_ssn = ssn;
1328 		(*s)->next = NULL;
1329 		*nsets = 1;
1330 		*slen += sizeof (uint32_t);
1331 		return (B_TRUE);
1332 	}
1333 	for (p = *s; p->next != NULL; p = p->next) {
1334 		if (p->ftsn_entries.ftsn_sid == sid) {
1335 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1336 				p->ftsn_entries.ftsn_ssn = ssn;
1337 			return (B_TRUE);
1338 		}
1339 	}
1340 	/* the last one */
1341 	if (p->ftsn_entries.ftsn_sid == sid) {
1342 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1343 			p->ftsn_entries.ftsn_ssn = ssn;
1344 	} else {
1345 		if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
1346 			return (B_FALSE);
1347 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1348 		    KM_NOSLEEP);
1349 		if (p->next == NULL)
1350 			return (B_FALSE);
1351 		p = p->next;
1352 		p->ftsn_entries.ftsn_sid = sid;
1353 		p->ftsn_entries.ftsn_ssn = ssn;
1354 		p->next = NULL;
1355 		(*nsets)++;
1356 		*slen += sizeof (uint32_t);
1357 	}
1358 	return (B_TRUE);
1359 }
1360 
1361 /*
1362  * Given a set of stream id - sequence number pairs, this routing creates
1363  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1364  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1365  * will add the IP/SCTP header.
1366  */
1367 mblk_t *
1368 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1369     uint_t nsets, uint32_t seglen)
1370 {
1371 	mblk_t			*ftsn_mp;
1372 	sctp_chunk_hdr_t	*ch_hdr;
1373 	uint32_t		*advtsn;
1374 	uint16_t		schlen;
1375 	size_t			xtralen;
1376 	ftsn_entry_t		*ftsn_entry;
1377 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1378 
1379 	seglen += sizeof (sctp_chunk_hdr_t);
1380 	if (fp->isv4)
1381 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
1382 	else
1383 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
1384 	ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp));
1385 	if (ftsn_mp == NULL)
1386 		return (NULL);
1387 	ftsn_mp->b_rptr += xtralen;
1388 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1389 
1390 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1391 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1392 	ch_hdr->sch_flags = 0;
1393 	/*
1394 	 * The cast here should not be an issue since seglen is
1395 	 * the length of the Forward TSN chunk.
1396 	 */
1397 	schlen = (uint16_t)seglen;
1398 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1399 
1400 	advtsn = (uint32_t *)(ch_hdr + 1);
1401 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1402 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1403 	while (nsets > 0) {
1404 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1405 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1406 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1407 		ftsn_entry++;
1408 		sets = sets->next;
1409 		nsets--;
1410 	}
1411 	return (ftsn_mp);
1412 }
1413 
1414 /*
1415  * Given a starting message, the routine steps through all the
1416  * messages whose TSN is less than sctp->sctp_adv_pap and creates
1417  * ftsn sets. The ftsn sets is then used to create an Forward TSN
1418  * chunk. All the messages, that have chunks that are included in the
1419  * ftsn sets, are flagged abandonded. If a message is partially sent
1420  * and is deemed abandoned, all remaining unsent chunks are marked
1421  * abandoned and are deducted from sctp_unsent.
1422  */
1423 void
1424 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1425     sctp_faddr_t *fp, uint32_t *seglen)
1426 {
1427 	mblk_t		*mp1 = mp;
1428 	mblk_t		*mp_head = mp;
1429 	mblk_t		*meta_head = meta;
1430 	mblk_t		*head;
1431 	sctp_ftsn_set_t	*sets = NULL;
1432 	uint_t		nsets = 0;
1433 	uint16_t	clen;
1434 	sctp_data_hdr_t	*sdc;
1435 	uint32_t	sacklen;
1436 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1437 	uint32_t	unsent = 0;
1438 	boolean_t	ubit;
1439 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1440 
1441 	*seglen = sizeof (uint32_t);
1442 
1443 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1444 	while (meta != NULL &&
1445 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1446 		/*
1447 		 * Skip adding FTSN sets for un-ordered messages as they do
1448 		 * not have SSNs.
1449 		 */
1450 		ubit = SCTP_DATA_GET_UBIT(sdc);
1451 		if (!ubit &&
1452 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1453 			meta = NULL;
1454 			sctp->sctp_adv_pap = adv_pap;
1455 			goto ftsn_done;
1456 		}
1457 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1458 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1459 			adv_pap = ntohl(sdc->sdh_tsn);
1460 			mp1 = mp1->b_next;
1461 		}
1462 		meta = meta->b_next;
1463 		if (meta != NULL) {
1464 			mp1 = meta->b_cont;
1465 			if (!SCTP_CHUNK_ISSENT(mp1))
1466 				break;
1467 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1468 		}
1469 	}
1470 ftsn_done:
1471 	/*
1472 	 * Can't compare with sets == NULL, since we don't add any
1473 	 * sets for un-ordered messages.
1474 	 */
1475 	if (meta == meta_head)
1476 		return;
1477 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1478 	sctp_free_ftsn_set(sets);
1479 	if (*nmp == NULL)
1480 		return;
1481 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1482 		sacklen = 0;
1483 	} else {
1484 		sacklen = sizeof (sctp_chunk_hdr_t) +
1485 		    sizeof (sctp_sack_chunk_t) +
1486 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1487 		if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1488 			/* piggybacked SACK doesn't fit */
1489 			sacklen = 0;
1490 		} else {
1491 			fp = sctp->sctp_lastdata;
1492 		}
1493 	}
1494 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1495 	if (head == NULL) {
1496 		freemsg(*nmp);
1497 		*nmp = NULL;
1498 		SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1499 		return;
1500 	}
1501 	*seglen += sacklen;
1502 	*nmp = head;
1503 
1504 	/*
1505 	 * XXXNeed to optimise this, the reason it is done here is so
1506 	 * that we don't have to undo in case of failure.
1507 	 */
1508 	mp1 = mp_head;
1509 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1510 	while (meta_head != NULL &&
1511 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1512 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
1513 			SCTP_MSG_SET_ABANDONED(meta_head);
1514 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1515 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1516 			if (!SCTP_CHUNK_ISACKED(mp1)) {
1517 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1518 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1519 				    meta_head);
1520 			}
1521 			mp1 = mp1->b_next;
1522 		}
1523 		while (mp1 != NULL) {
1524 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1525 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
1526 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1527 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1528 				SCTP_ABANDON_CHUNK(mp1);
1529 			}
1530 			mp1 = mp1->b_next;
1531 		}
1532 		meta_head = meta_head->b_next;
1533 		if (meta_head != NULL) {
1534 			mp1 = meta_head->b_cont;
1535 			if (!SCTP_CHUNK_ISSENT(mp1))
1536 				break;
1537 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1538 		}
1539 	}
1540 	if (unsent > 0) {
1541 		ASSERT(sctp->sctp_unsent >= unsent);
1542 		sctp->sctp_unsent -= unsent;
1543 		/*
1544 		 * Update ULP the amount of queued data, which is
1545 		 * sent-unack'ed + unsent.
1546 		 */
1547 		if (!SCTP_IS_DETACHED(sctp))
1548 			SCTP_TXQ_UPDATE(sctp);
1549 	}
1550 }
1551 
1552 /*
1553  * This function steps through messages starting at meta and checks if
1554  * the message is abandoned. It stops when it hits an unsent chunk or
1555  * a message that has all its chunk acked. This is the only place
1556  * where the sctp_adv_pap is moved forward to indicated abandoned
1557  * messages.
1558  */
1559 void
1560 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1561 {
1562 	uint32_t	tsn = sctp->sctp_adv_pap;
1563 	sctp_data_hdr_t	*sdc;
1564 	sctp_msg_hdr_t	*msg_hdr;
1565 
1566 	ASSERT(mp != NULL);
1567 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1568 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1569 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1570 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
1571 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1572 		return;
1573 	}
1574 	while (meta != NULL) {
1575 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1576 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1577 			tsn = ntohl(sdc->sdh_tsn);
1578 			mp = mp->b_next;
1579 		}
1580 		if (mp != NULL)
1581 			break;
1582 		/*
1583 		 * We continue checking for successive messages only if there
1584 		 * is a chunk marked for retransmission. Else, we might
1585 		 * end up sending FTSN prematurely for chunks that have been
1586 		 * sent, but not yet acked.
1587 		 */
1588 		if ((meta = meta->b_next) != NULL) {
1589 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1590 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
1591 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1592 				break;
1593 			}
1594 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1595 				if (!SCTP_CHUNK_ISSENT(mp)) {
1596 					sctp->sctp_adv_pap = tsn;
1597 					return;
1598 				}
1599 				if (SCTP_CHUNK_WANT_REXMIT(mp))
1600 					break;
1601 			}
1602 			if (mp == NULL)
1603 				break;
1604 		}
1605 	}
1606 	sctp->sctp_adv_pap = tsn;
1607 }
1608 
1609 
1610 /*
1611  * Determine if we should bundle a data chunk with the chunk being
1612  * retransmitted.  We bundle if
1613  *
1614  * - the chunk is sent to the same destination and unack'ed.
1615  *
1616  * OR
1617  *
1618  * - the chunk is unsent, i.e. new data.
1619  */
1620 #define	SCTP_CHUNK_RX_CANBUNDLE(mp, fp)					\
1621 	(!SCTP_CHUNK_ABANDONED((mp)) && 				\
1622 	((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) &&	\
1623 	!SCTP_CHUNK_ISACKED(mp))) ||					\
1624 	(((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1625 	SCTP_CHUNK_FLAG_SENT)))
1626 
1627 /*
1628  * Retransmit first segment which hasn't been acked with cumtsn or send
1629  * a Forward TSN chunk, if appropriate.
1630  */
1631 void
1632 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1633 {
1634 	mblk_t		*mp;
1635 	mblk_t		*nmp = NULL;
1636 	mblk_t		*head;
1637 	mblk_t		*meta = sctp->sctp_xmit_head;
1638 	mblk_t		*fill;
1639 	uint32_t	seglen = 0;
1640 	uint32_t	sacklen;
1641 	uint16_t	chunklen;
1642 	int		extra;
1643 	sctp_data_hdr_t	*sdc;
1644 	sctp_faddr_t	*fp;
1645 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1646 	boolean_t	do_ftsn = B_FALSE;
1647 	boolean_t	ftsn_check = B_TRUE;
1648 	uint32_t	first_ua_tsn;
1649 	sctp_msg_hdr_t	*mhdr;
1650 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1651 	int		error;
1652 
1653 	while (meta != NULL) {
1654 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1655 			uint32_t	tsn;
1656 
1657 			if (!SCTP_CHUNK_ISSENT(mp))
1658 				goto window_probe;
1659 			/*
1660 			 * We break in the following cases -
1661 			 *
1662 			 *	if the advanced peer ack point includes the next
1663 			 *	chunk to be retransmited - possibly the Forward
1664 			 * 	TSN was lost.
1665 			 *
1666 			 *	if we are PRSCTP aware and the next chunk to be
1667 			 *	retransmitted is now abandoned
1668 			 *
1669 			 *	if the next chunk to be retransmitted is for
1670 			 *	the dest on which the timer went off. (this
1671 			 *	message is not abandoned).
1672 			 *
1673 			 * We check for Forward TSN only for the first
1674 			 * eligible chunk to be retransmitted. The reason
1675 			 * being if the first eligible chunk is skipped (say
1676 			 * it was sent to a destination other than oldfp)
1677 			 * then we cannot advance the cum TSN via Forward
1678 			 * TSN chunk.
1679 			 *
1680 			 * Also, ftsn_check is B_TRUE only for the first
1681 			 * eligible chunk, it  will be B_FALSE for all
1682 			 * subsequent candidate messages for retransmission.
1683 			 */
1684 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1685 			tsn = ntohl(sdc->sdh_tsn);
1686 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1687 				if (sctp->sctp_prsctp_aware && ftsn_check) {
1688 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1689 						ASSERT(sctp->sctp_prsctp_aware);
1690 						do_ftsn = B_TRUE;
1691 						goto out;
1692 					} else {
1693 						sctp_check_adv_ack_pt(sctp,
1694 						    meta, mp);
1695 						if (SEQ_GT(sctp->sctp_adv_pap,
1696 						    adv_pap)) {
1697 							do_ftsn = B_TRUE;
1698 							goto out;
1699 						}
1700 					}
1701 					ftsn_check = B_FALSE;
1702 				}
1703 				if (SCTP_CHUNK_DEST(mp) == oldfp)
1704 					goto out;
1705 			}
1706 		}
1707 		meta = meta->b_next;
1708 		if (meta != NULL && sctp->sctp_prsctp_aware) {
1709 			mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1710 
1711 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1712 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1713 				meta = meta->b_next;
1714 			}
1715 		}
1716 	}
1717 window_probe:
1718 	/*
1719 	 * Retransmit fired for a destination which didn't have
1720 	 * any unacked data pending.
1721 	 */
1722 	if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
1723 		/*
1724 		 * Send a window probe. Inflate frwnd to allow
1725 		 * sending one segment.
1726 		 */
1727 		if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc)))
1728 			sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
1729 
1730 		/* next TSN to send */
1731 		sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
1732 
1733 		/*
1734 		 * The above sctp_frwnd adjustment is coarse.  The "changed"
1735 		 * sctp_frwnd may allow us to send more than 1 packet.  So
1736 		 * tell sctp_output() to send only 1 packet.
1737 		 */
1738 		sctp_output(sctp, 1);
1739 
1740 		/* Last sent TSN */
1741 		sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1742 		ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
1743 		sctp->sctp_zero_win_probe = B_TRUE;
1744 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
1745 	}
1746 	return;
1747 out:
1748 	/*
1749 	 * After a time out, assume that everything has left the network.  So
1750 	 * we can clear rxt_unacked for the original peer address.
1751 	 */
1752 	oldfp->rxt_unacked = 0;
1753 
1754 	/*
1755 	 * If we were probing for zero window, don't adjust retransmission
1756 	 * variables, but the timer is still backed off.
1757 	 */
1758 	if (sctp->sctp_zero_win_probe) {
1759 		mblk_t	*pkt;
1760 		uint_t	pkt_len;
1761 
1762 		/*
1763 		 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
1764 		 * and sctp_rxt_maxtsn will specify the ZWP packet.
1765 		 */
1766 		fp = oldfp;
1767 		if (oldfp->state != SCTP_FADDRS_ALIVE)
1768 			fp = sctp_rotate_faddr(sctp, oldfp);
1769 		pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
1770 		if (pkt != NULL) {
1771 			ASSERT(pkt_len <= fp->sfa_pmss);
1772 			sctp_set_iplen(sctp, pkt);
1773 			sctp_add_sendq(sctp, pkt);
1774 		} else {
1775 			SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
1776 		}
1777 
1778 		/*
1779 		 * The strikes will be clear by sctp_faddr_alive() when the
1780 		 * other side sends us an ack.
1781 		 */
1782 		oldfp->strikes++;
1783 		sctp->sctp_strikes++;
1784 
1785 		SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
1786 		if (oldfp != fp && oldfp->suna != 0)
1787 			SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto);
1788 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1789 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
1790 		return;
1791 	}
1792 
1793 	/*
1794 	 * Enter slowstart for this destination
1795 	 */
1796 	oldfp->ssthresh = oldfp->cwnd / 2;
1797 	if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
1798 		oldfp->ssthresh = 2 * oldfp->sfa_pmss;
1799 	oldfp->cwnd = oldfp->sfa_pmss;
1800 	oldfp->pba = 0;
1801 	fp = sctp_rotate_faddr(sctp, oldfp);
1802 	ASSERT(fp != NULL);
1803 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1804 
1805 	first_ua_tsn = ntohl(sdc->sdh_tsn);
1806 	if (do_ftsn) {
1807 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1808 		if (nmp == NULL) {
1809 			sctp->sctp_adv_pap = adv_pap;
1810 			goto restart_timer;
1811 		}
1812 		head = nmp;
1813 		/*
1814 		 * Move to the next unabandoned chunk. XXXCheck if meta will
1815 		 * always be marked abandoned.
1816 		 */
1817 		while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
1818 			meta = meta->b_next;
1819 		if (meta != NULL)
1820 			mp = mp->b_cont;
1821 		else
1822 			mp = NULL;
1823 		goto try_bundle;
1824 	}
1825 	seglen = ntohs(sdc->sdh_len);
1826 	chunklen = seglen - sizeof (*sdc);
1827 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1828 		extra = SCTP_ALIGN - extra;
1829 
1830 	/* Find out if we need to piggyback SACK. */
1831 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1832 		sacklen = 0;
1833 	} else {
1834 		sacklen = sizeof (sctp_chunk_hdr_t) +
1835 		    sizeof (sctp_sack_chunk_t) +
1836 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1837 		if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1838 			/* piggybacked SACK doesn't fit */
1839 			sacklen = 0;
1840 		} else {
1841 			/*
1842 			 * OK, we have room to send SACK back.  But we
1843 			 * should send it back to the last fp where we
1844 			 * receive data from, unless sctp_lastdata equals
1845 			 * oldfp, then we should probably not send it
1846 			 * back to that fp.  Also we should check that
1847 			 * the fp is alive.
1848 			 */
1849 			if (sctp->sctp_lastdata != oldfp &&
1850 			    sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) {
1851 				fp = sctp->sctp_lastdata;
1852 			}
1853 		}
1854 	}
1855 
1856 	/*
1857 	 * Cancel RTT measurement if the retransmitted TSN is before the
1858 	 * TSN used for timimg.
1859 	 */
1860 	if (sctp->sctp_out_time != 0 &&
1861 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1862 		sctp->sctp_out_time = 0;
1863 	}
1864 	/* Clear the counter as the RTT calculation may be off. */
1865 	fp->rtt_updates = 0;
1866 	oldfp->rtt_updates = 0;
1867 
1868 	/*
1869 	 * After a timeout, we should change the current faddr so that
1870 	 * new chunks will be sent to the alternate address.
1871 	 */
1872 	sctp_set_faddr_current(sctp, fp);
1873 
1874 	nmp = dupmsg(mp);
1875 	if (nmp == NULL)
1876 		goto restart_timer;
1877 	if (extra > 0) {
1878 		fill = sctp_get_padding(sctp, extra);
1879 		if (fill != NULL) {
1880 			linkb(nmp, fill);
1881 			seglen += extra;
1882 		} else {
1883 			freemsg(nmp);
1884 			goto restart_timer;
1885 		}
1886 	}
1887 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
1888 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1889 	if (head == NULL) {
1890 		freemsg(nmp);
1891 		SCTP_KSTAT(sctps, sctp_rexmit_failed);
1892 		goto restart_timer;
1893 	}
1894 	seglen += sacklen;
1895 
1896 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1897 
1898 	mp = mp->b_next;
1899 
1900 try_bundle:
1901 	/* We can at least and at most send 1 packet at timeout. */
1902 	while (seglen < fp->sfa_pmss) {
1903 		int32_t new_len;
1904 
1905 		/* Go through the list to find more chunks to be bundled. */
1906 		while (mp != NULL) {
1907 			/* Check if the chunk can be bundled. */
1908 			if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
1909 				break;
1910 			mp = mp->b_next;
1911 		}
1912 		/* Go to the next message. */
1913 		if (mp == NULL) {
1914 			for (meta = meta->b_next; meta != NULL;
1915 			    meta = meta->b_next) {
1916 				mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1917 
1918 				if (SCTP_IS_MSG_ABANDONED(meta) ||
1919 				    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
1920 				    sctp)) {
1921 					continue;
1922 				}
1923 
1924 				mp = meta->b_cont;
1925 				goto try_bundle;
1926 			}
1927 			/*
1928 			 * Check if there is a new message which potentially
1929 			 * could be bundled with this retransmission.
1930 			 */
1931 			meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error,
1932 			    seglen, fp->sfa_pmss - seglen, NULL);
1933 			if (error != 0 || meta == NULL) {
1934 				/* No more chunk to be bundled. */
1935 				break;
1936 			} else {
1937 				goto try_bundle;
1938 			}
1939 		}
1940 
1941 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1942 		new_len = ntohs(sdc->sdh_len);
1943 		chunklen = new_len - sizeof (*sdc);
1944 
1945 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
1946 			extra = SCTP_ALIGN - extra;
1947 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
1948 			break;
1949 		if ((nmp = dupmsg(mp)) == NULL)
1950 			break;
1951 
1952 		if (extra > 0) {
1953 			fill = sctp_get_padding(sctp, extra);
1954 			if (fill != NULL) {
1955 				linkb(nmp, fill);
1956 			} else {
1957 				freemsg(nmp);
1958 				break;
1959 			}
1960 		}
1961 		linkb(head, nmp);
1962 
1963 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
1964 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1965 
1966 		seglen = new_len;
1967 		mp = mp->b_next;
1968 	}
1969 done_bundle:
1970 	if ((seglen > fp->sfa_pmss) && fp->isv4) {
1971 		ipha_t *iph = (ipha_t *)head->b_rptr;
1972 
1973 		/*
1974 		 * Path MTU is different from path we thought it would
1975 		 * be when we created chunks, or IP headers have grown.
1976 		 * Need to clear the DF bit.
1977 		 */
1978 		iph->ipha_fragment_offset_and_flags = 0;
1979 	}
1980 	fp->rxt_unacked += seglen;
1981 
1982 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
1983 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
1984 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
1985 	    (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
1986 
1987 	sctp->sctp_rexmitting = B_TRUE;
1988 	sctp->sctp_rxt_nxttsn = first_ua_tsn;
1989 	sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1990 	sctp_set_iplen(sctp, head);
1991 	sctp_add_sendq(sctp, head);
1992 
1993 	/*
1994 	 * Restart the oldfp timer with exponential backoff and
1995 	 * the new fp timer for the retransmitted chunks.
1996 	 */
1997 restart_timer:
1998 	oldfp->strikes++;
1999 	sctp->sctp_strikes++;
2000 	SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
2001 	/*
2002 	 * If there is still some data in the oldfp, restart the
2003 	 * retransmission timer.  If there is no data, the heartbeat will
2004 	 * continue to run so it will do its job in checking the reachability
2005 	 * of the oldfp.
2006 	 */
2007 	if (oldfp != fp && oldfp->suna != 0)
2008 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
2009 
2010 	/*
2011 	 * Should we restart the timer of the new fp?  If there is
2012 	 * outstanding data to the new fp, the timer should be
2013 	 * running already.  So restarting it means that the timer
2014 	 * will fire later for those outstanding data.  But if
2015 	 * we don't restart it, the timer will fire too early for the
2016 	 * just retransmitted chunks to the new fp.  The reason is that we
2017 	 * don't keep a timestamp on when a chunk is retransmitted.
2018 	 * So when the timer fires, it will just search for the
2019 	 * chunk with the earliest TSN sent to new fp.  This probably
2020 	 * is the chunk we just retransmitted.  So for now, let's
2021 	 * be conservative and restart the timer of the new fp.
2022 	 */
2023 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2024 
2025 	sctp->sctp_active = lbolt64;
2026 }
2027 
2028 /*
2029  * This function is called by sctp_ss_rexmit() to create a packet
2030  * to be retransmitted to the given fp.  The given meta and mp
2031  * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2032  * first chunk to be retransmitted.  This is also called when we want
2033  * to retransmit a zero window probe from sctp_rexmit() or when we
2034  * want to retransmit the zero window probe after the window has
2035  * opened from sctp_got_sack().
2036  */
2037 mblk_t *
2038 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
2039     uint_t *packet_len)
2040 {
2041 	uint32_t	seglen = 0;
2042 	uint16_t	chunklen;
2043 	int		extra;
2044 	mblk_t		*nmp;
2045 	mblk_t		*head;
2046 	mblk_t		*fill;
2047 	sctp_data_hdr_t	*sdc;
2048 	sctp_msg_hdr_t	*mhdr;
2049 
2050 	sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2051 	seglen = ntohs(sdc->sdh_len);
2052 	chunklen = seglen - sizeof (*sdc);
2053 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
2054 		extra = SCTP_ALIGN - extra;
2055 
2056 	nmp = dupmsg(*mp);
2057 	if (nmp == NULL)
2058 		return (NULL);
2059 	if (extra > 0) {
2060 		fill = sctp_get_padding(sctp, extra);
2061 		if (fill != NULL) {
2062 			linkb(nmp, fill);
2063 			seglen += extra;
2064 		} else {
2065 			freemsg(nmp);
2066 			return (NULL);
2067 		}
2068 	}
2069 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
2070 	head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
2071 	if (head == NULL) {
2072 		freemsg(nmp);
2073 		return (NULL);
2074 	}
2075 	SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2076 	/*
2077 	 * Don't update the TSN if we are doing a Zero Win Probe.
2078 	 */
2079 	if (!sctp->sctp_zero_win_probe)
2080 		sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2081 	*mp = (*mp)->b_next;
2082 
2083 try_bundle:
2084 	while (seglen < fp->sfa_pmss) {
2085 		int32_t new_len;
2086 
2087 		/*
2088 		 * Go through the list to find more chunks to be bundled.
2089 		 * We should only retransmit sent by unack'ed chunks.  Since
2090 		 * they were sent before, the peer's receive window should
2091 		 * be able to receive them.
2092 		 */
2093 		while (*mp != NULL) {
2094 			/* Check if the chunk can be bundled. */
2095 			if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
2096 				break;
2097 			*mp = (*mp)->b_next;
2098 		}
2099 		/* Go to the next message. */
2100 		if (*mp == NULL) {
2101 			for (*meta = (*meta)->b_next; *meta != NULL;
2102 			    *meta = (*meta)->b_next) {
2103 				mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
2104 
2105 				if (SCTP_IS_MSG_ABANDONED(*meta) ||
2106 				    SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
2107 				    sctp)) {
2108 					continue;
2109 				}
2110 
2111 				*mp = (*meta)->b_cont;
2112 				goto try_bundle;
2113 			}
2114 			/* No more chunk to be bundled. */
2115 			break;
2116 		}
2117 
2118 		sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2119 		/* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2120 		if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
2121 			break;
2122 		new_len = ntohs(sdc->sdh_len);
2123 		chunklen = new_len - sizeof (*sdc);
2124 
2125 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
2126 			extra = SCTP_ALIGN - extra;
2127 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
2128 			break;
2129 		if ((nmp = dupmsg(*mp)) == NULL)
2130 			break;
2131 
2132 		if (extra > 0) {
2133 			fill = sctp_get_padding(sctp, extra);
2134 			if (fill != NULL) {
2135 				linkb(nmp, fill);
2136 			} else {
2137 				freemsg(nmp);
2138 				break;
2139 			}
2140 		}
2141 		linkb(head, nmp);
2142 
2143 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
2144 		SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2145 		/*
2146 		 * Don't update the TSN if we are doing a Zero Win Probe.
2147 		 */
2148 		if (!sctp->sctp_zero_win_probe)
2149 			sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2150 
2151 		seglen = new_len;
2152 		*mp = (*mp)->b_next;
2153 	}
2154 	*packet_len = seglen;
2155 	fp->rxt_unacked += seglen;
2156 	return (head);
2157 }
2158 
2159 /*
2160  * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2161  * advances the cum_tsn but the cum_tsn is still less than what we have sent
2162  * (sctp_rxt_maxtsn) at the time of the timeout.  This SACK is a "partial"
2163  * SACK.  We retransmit unacked chunks without having to wait for another
2164  * timeout.  The rationale is that the SACK should not be "partial" if all the
2165  * lost chunks have been retransmitted.  Since the SACK is "partial,"
2166  * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2167  * be missing.  It is better for us to retransmit them now instead
2168  * of waiting for a timeout.
2169  */
2170 void
2171 sctp_ss_rexmit(sctp_t *sctp)
2172 {
2173 	mblk_t		*meta;
2174 	mblk_t		*mp;
2175 	mblk_t		*pkt;
2176 	sctp_faddr_t	*fp;
2177 	uint_t		pkt_len;
2178 	uint32_t	tot_wnd;
2179 	sctp_data_hdr_t	*sdc;
2180 	int		burst;
2181 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2182 
2183 	ASSERT(!sctp->sctp_zero_win_probe);
2184 
2185 	/*
2186 	 * If the last cum ack is smaller than what we have just
2187 	 * retransmitted, simply return.
2188 	 */
2189 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
2190 		sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
2191 	else
2192 		return;
2193 	ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
2194 
2195 	/*
2196 	 * After a timer fires, sctp_current should be set to the new
2197 	 * fp where the retransmitted chunks are sent.
2198 	 */
2199 	fp = sctp->sctp_current;
2200 
2201 	/*
2202 	 * Since we are retransmitting, we only need to use cwnd to determine
2203 	 * how much we can send as we were allowed (by peer's receive window)
2204 	 * to send those retransmitted chunks previously when they are first
2205 	 * sent.  If we record how much we have retransmitted but
2206 	 * unacknowledged using rxt_unacked, then the amount we can now send
2207 	 * is equal to cwnd minus rxt_unacked.
2208 	 *
2209 	 * The field rxt_unacked is incremented when we retransmit a packet
2210 	 * and decremented when we got a SACK acknowledging something.  And
2211 	 * it is reset when the retransmission timer fires as we assume that
2212 	 * all packets have left the network after a timeout.  If this
2213 	 * assumption is not true, it means that after a timeout, we can
2214 	 * get a SACK acknowledging more than rxt_unacked (its value only
2215 	 * contains what is retransmitted when the timer fires).  So
2216 	 * rxt_unacked will become very big (it is an unsiged int so going
2217 	 * negative means that the value is huge).  This is the reason we
2218 	 * always send at least 1 MSS bytes.
2219 	 *
2220 	 * The reason why we do not have an accurate count is that we
2221 	 * only know how many packets are outstanding (using the TSN numbers).
2222 	 * But we do not know how many bytes those packets contain.  To
2223 	 * have an accurate count, we need to walk through the send list.
2224 	 * As it is not really important to have an accurate count during
2225 	 * retransmission, we skip this walk to save some time.  This should
2226 	 * not make the retransmission too aggressive to cause congestion.
2227 	 */
2228 	if (fp->cwnd <= fp->rxt_unacked)
2229 		tot_wnd = fp->sfa_pmss;
2230 	else
2231 		tot_wnd = fp->cwnd - fp->rxt_unacked;
2232 
2233 	/* Find the first unack'ed chunk */
2234 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
2235 		sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
2236 
2237 		if (SCTP_IS_MSG_ABANDONED(meta) ||
2238 		    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
2239 			continue;
2240 		}
2241 
2242 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2243 			/* Again, this may not be possible */
2244 			if (!SCTP_CHUNK_ISSENT(mp))
2245 				return;
2246 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2247 			if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
2248 				goto found_msg;
2249 		}
2250 	}
2251 
2252 	/* Everything is abandoned... */
2253 	return;
2254 
2255 found_msg:
2256 	if (!fp->timer_running)
2257 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2258 	pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
2259 	if (pkt == NULL) {
2260 		SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2261 		return;
2262 	}
2263 	if ((pkt_len > fp->sfa_pmss) && fp->isv4) {
2264 		ipha_t	*iph = (ipha_t *)pkt->b_rptr;
2265 
2266 		/*
2267 		 * Path MTU is different from path we thought it would
2268 		 * be when we created chunks, or IP headers have grown.
2269 		 *  Need to clear the DF bit.
2270 		 */
2271 		iph->ipha_fragment_offset_and_flags = 0;
2272 	}
2273 	sctp_set_iplen(sctp, pkt);
2274 	sctp_add_sendq(sctp, pkt);
2275 
2276 	/* Check and see if there is more chunk to be retransmitted. */
2277 	if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss ||
2278 	    meta == NULL)
2279 		return;
2280 	if (mp == NULL)
2281 		meta = meta->b_next;
2282 	if (meta == NULL)
2283 		return;
2284 
2285 	/* Retransmit another packet if the window allows. */
2286 	for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
2287 	    meta != NULL && burst > 0; meta = meta->b_next, burst--) {
2288 		if (mp == NULL)
2289 			mp = meta->b_cont;
2290 		for (; mp != NULL; mp = mp->b_next) {
2291 			/* Again, this may not be possible */
2292 			if (!SCTP_CHUNK_ISSENT(mp))
2293 				return;
2294 			if (!SCTP_CHUNK_ISACKED(mp))
2295 				goto found_msg;
2296 		}
2297 	}
2298 }
2299