xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_output.c (revision 981012ac7f476eef9dea020257262f756da2fe4b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/cmn_err.h>
33 #define	_SUN_TPI_VERSION 2
34 #include <sys/tihdr.h>
35 #include <sys/socket.h>
36 #include <sys/stropts.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/socketvar.h>
40 /* swilly code in sys/socketvar.h turns off DEBUG */
41 #ifdef __lint
42 #define	DEBUG
43 #endif
44 
45 #include <inet/common.h>
46 #include <inet/mi.h>
47 #include <inet/ip.h>
48 #include <inet/ip6.h>
49 #include <inet/sctp_ip.h>
50 #include <inet/ipclassifier.h>
51 
52 /*
53  * PR-SCTP comments.
54  *
55  * A message can expire before it gets to the transmit list (i.e. it is still
56  * in the unsent list - unchunked), after it gets to the transmit list, but
57  * before transmission has actually started, or after transmission has begun.
58  * Accordingly, we check for the status of a message in sctp_chunkify() when
59  * the message is being transferred from the unsent list to the transmit list;
60  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
61  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
62  * When we nuke a message in sctp_chunkify(), all we need to do is take it
63  * out of the unsent list and update sctp_unsent; when a message is deemed
64  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
65  * list, update sctp_unsent IFF transmission for the message has not yet begun
66  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
67  * message has started, then we cannot just take it out of the list, we need
68  * to send Forward TSN chunk to the peer so that the peer can clear its
69  * fragment list for this message. However, we cannot just send the Forward
70  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
71  * messages preceeding this abandoned message. So, we send a Forward TSN
72  * IFF all messages prior to this abandoned message has been SACKd, if not
73  * we defer sending the Forward TSN to sctp_cumack(), which will check for
74  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
75  * sctp_rexmit() when we check for retransmissions, we need to determine if
76  * the advanced peer ack point can be moved ahead, and if so, send a Forward
77  * TSN to the peer instead of retransmitting the chunk. Note that when
78  * we send a Forward TSN for a message, there may be yet unsent chunks for
79  * this message; we need to mark all such chunks as abandoned, so that
80  * sctp_cumack() can take the message out of the transmit list, additionally
81  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
82  * decremented when a message/chunk is deemed abandoned), sockfs needs to
83  * be notified so that it can adjust its idea of the queued message.
84  */
85 
86 #include "sctp_impl.h"
87 
88 static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
89 
90 #ifdef	DEBUG
91 static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
92 #endif
93 
94 /*
95  * Called to allocate a header mblk when sending data to SCTP.
96  * Data will follow in b_cont of this mblk.
97  */
98 mblk_t *
99 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
100     int flags)
101 {
102 	mblk_t *mp;
103 	struct T_unitdata_req *tudr;
104 	size_t size;
105 	int error;
106 
107 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
108 	size = MAX(size, sizeof (sctp_msg_hdr_t));
109 	if (flags & SCTP_CAN_BLOCK) {
110 		mp = allocb_wait(size, BPRI_MED, 0, &error);
111 	} else {
112 		mp = allocb(size, BPRI_MED);
113 	}
114 	if (mp) {
115 		tudr = (struct T_unitdata_req *)mp->b_rptr;
116 		tudr->PRIM_type = T_UNITDATA_REQ;
117 		tudr->DEST_length = nlen;
118 		tudr->DEST_offset = sizeof (*tudr);
119 		tudr->OPT_length = clen;
120 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
121 		    _TPI_ALIGN_TOPT(nlen));
122 		if (nlen > 0)
123 			bcopy(name, tudr + 1, nlen);
124 		if (clen > 0)
125 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
126 		mp->b_wptr += (tudr ->OPT_offset + clen);
127 		mp->b_datap->db_type = M_PROTO;
128 	}
129 	return (mp);
130 }
131 
132 /*ARGSUSED2*/
133 int
134 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
135 {
136 	sctp_faddr_t	*fp = NULL;
137 	struct T_unitdata_req	*tudr;
138 	int		error = 0;
139 	mblk_t		*mproto = mp;
140 	in6_addr_t	*addr;
141 	in6_addr_t	tmpaddr;
142 	uint16_t	sid = sctp->sctp_def_stream;
143 	uint32_t	ppid = sctp->sctp_def_ppid;
144 	uint32_t	context = sctp->sctp_def_context;
145 	uint16_t	msg_flags = sctp->sctp_def_flags;
146 	sctp_msg_hdr_t	*sctp_msg_hdr;
147 	uint32_t	msg_len = 0;
148 	uint32_t	timetolive = sctp->sctp_def_timetolive;
149 
150 	ASSERT(DB_TYPE(mproto) == M_PROTO);
151 
152 	mp = mp->b_cont;
153 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
154 
155 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
156 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
157 
158 	/* Get destination address, if specified */
159 	if (tudr->DEST_length > 0) {
160 		sin_t *sin;
161 		sin6_t *sin6;
162 
163 		sin = (struct sockaddr_in *)
164 		    (mproto->b_rptr + tudr->DEST_offset);
165 		switch (sin->sin_family) {
166 		case AF_INET:
167 			if (tudr->DEST_length < sizeof (*sin)) {
168 				return (EINVAL);
169 			}
170 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
171 			addr = &tmpaddr;
172 			break;
173 		case AF_INET6:
174 			if (tudr->DEST_length < sizeof (*sin6)) {
175 				return (EINVAL);
176 			}
177 			sin6 = (struct sockaddr_in6 *)
178 			    (mproto->b_rptr + tudr->DEST_offset);
179 			addr = &sin6->sin6_addr;
180 			break;
181 		default:
182 			return (EAFNOSUPPORT);
183 		}
184 		fp = sctp_lookup_faddr(sctp, addr);
185 		if (fp == NULL) {
186 			return (EINVAL);
187 		}
188 	}
189 	/* Ancillary Data? */
190 	if (tudr->OPT_length > 0) {
191 		struct cmsghdr		*cmsg;
192 		char			*cend;
193 		struct sctp_sndrcvinfo	*sndrcv;
194 
195 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
196 		cend = ((char *)cmsg + tudr->OPT_length);
197 		ASSERT(cend <= (char *)mproto->b_wptr);
198 
199 		for (;;) {
200 			if ((char *)(cmsg + 1) > cend ||
201 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
202 				break;
203 			}
204 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
205 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
206 				if (cmsg->cmsg_len <
207 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
208 					return (EINVAL);
209 				}
210 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
211 				sid = sndrcv->sinfo_stream;
212 				msg_flags = sndrcv->sinfo_flags;
213 				ppid = sndrcv->sinfo_ppid;
214 				context = sndrcv->sinfo_context;
215 				timetolive = sndrcv->sinfo_timetolive;
216 				break;
217 			}
218 			if (cmsg->cmsg_len > 0)
219 				cmsg = CMSG_NEXT(cmsg);
220 			else
221 				break;
222 		}
223 	}
224 	if (msg_flags & MSG_ABORT) {
225 		if (mp && mp->b_cont) {
226 			mblk_t *pump = msgpullup(mp, -1);
227 			if (!pump) {
228 				return (ENOMEM);
229 			}
230 			freemsg(mp);
231 			mp = pump;
232 			mproto->b_cont = mp;
233 		}
234 		RUN_SCTP(sctp);
235 		sctp_user_abort(sctp, mp, B_TRUE);
236 		sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL);
237 		sctp_clean_death(sctp, ECONNRESET);
238 		freemsg(mproto);
239 		goto process_sendq;
240 	}
241 	if (mp == NULL)
242 		goto done;
243 
244 	RUN_SCTP(sctp);
245 
246 	/* Reject any new data requests if we are shutting down */
247 	if (sctp->sctp_state > SCTPS_ESTABLISHED) {
248 		error = EPIPE;
249 		goto unlock_done;
250 	}
251 
252 	/* Re-use the mproto to store relevant info. */
253 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
254 
255 	mproto->b_rptr = mproto->b_datap->db_base;
256 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
257 
258 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
259 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
260 	sctp_msg_hdr->smh_context = context;
261 	sctp_msg_hdr->smh_sid = sid;
262 	sctp_msg_hdr->smh_ppid = ppid;
263 	sctp_msg_hdr->smh_flags = msg_flags;
264 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
265 	sctp_msg_hdr->smh_tob = lbolt64;
266 	for (; mp != NULL; mp = mp->b_cont)
267 		msg_len += MBLKL(mp);
268 	sctp_msg_hdr->smh_msglen = msg_len;
269 
270 	/* User requested specific destination */
271 	SCTP_SET_CHUNK_DEST(mproto, fp);
272 
273 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
274 	    sid >= sctp->sctp_num_ostr) {
275 		/* Send sendfail event */
276 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
277 		    B_FALSE);
278 		error = EINVAL;
279 		goto unlock_done;
280 	}
281 
282 	/* no data */
283 	if (msg_len == 0) {
284 		sctp_sendfail_event(sctp, dupmsg(mproto),
285 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
286 		error = EINVAL;
287 		goto unlock_done;
288 	}
289 
290 	/* Add it to the unsent list */
291 	if (sctp->sctp_xmit_unsent == NULL) {
292 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
293 	} else {
294 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
295 		sctp->sctp_xmit_unsent_tail = mproto;
296 	}
297 	sctp->sctp_unsent += msg_len;
298 	BUMP_LOCAL(sctp->sctp_msgcount);
299 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
300 		sctp_output(sctp);
301 process_sendq:
302 	WAKE_SCTP(sctp);
303 	sctp_process_sendq(sctp);
304 	return (0);
305 unlock_done:
306 	WAKE_SCTP(sctp);
307 done:
308 	return (error);
309 }
310 
311 void
312 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send)
313 {
314 	mblk_t			*mp;
315 	mblk_t			*chunk_mp;
316 	mblk_t			*chunk_head;
317 	mblk_t			*chunk_hdr;
318 	mblk_t			*chunk_tail = NULL;
319 	int			count;
320 	int			chunksize;
321 	sctp_data_hdr_t		*sdc;
322 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
323 	sctp_faddr_t		*fp;
324 	sctp_faddr_t		*fp1;
325 	size_t			xtralen;
326 	sctp_msg_hdr_t		*msg_hdr;
327 	sctp_stack_t	*sctps = sctp->sctp_sctps;
328 
329 	fp = SCTP_CHUNK_DEST(mdblk);
330 	if (fp == NULL)
331 		fp = sctp->sctp_current;
332 	if (fp->isv4)
333 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
334 		    sizeof (*sdc);
335 	else
336 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
337 		    sizeof (*sdc);
338 	count = chunksize = first_len - sizeof (*sdc);
339 nextmsg:
340 	chunk_mp = mdblk->b_cont;
341 
342 	/*
343 	 * If this partially chunked, we ignore the first_len for now
344 	 * and use the one already present. For the unchunked bits, we
345 	 * use the length of the last chunk.
346 	 */
347 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
348 		int	chunk_len;
349 
350 		ASSERT(chunk_mp->b_next != NULL);
351 		mdblk->b_cont = chunk_mp->b_next;
352 		chunk_mp->b_next = NULL;
353 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
354 		mp = mdblk->b_cont;
355 		while (mp->b_next != NULL)
356 			mp = mp->b_next;
357 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
358 		if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
359 			count = chunksize = fp->sfa_pmss - chunk_len;
360 		else
361 			count = chunksize = fp->sfa_pmss;
362 		count = chunksize = count - sizeof (*sdc);
363 	} else {
364 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
365 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
366 			sctp->sctp_xmit_unsent = mdblk->b_next;
367 			if (sctp->sctp_xmit_unsent == NULL)
368 				sctp->sctp_xmit_unsent_tail = NULL;
369 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
370 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
371 			mdblk->b_next = NULL;
372 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
373 			/*
374 			 * Update ULP the amount of queued data, which is
375 			 * sent-unack'ed + unsent.
376 			 */
377 			if (!SCTP_IS_DETACHED(sctp)) {
378 				sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
379 				    sctp->sctp_unacked + sctp->sctp_unsent);
380 			}
381 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
382 			goto try_next;
383 		}
384 		mdblk->b_cont = NULL;
385 	}
386 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
387 nextchunk:
388 	chunk_head = chunk_mp;
389 	chunk_tail = NULL;
390 
391 	/* Skip as many mblk's as we need */
392 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
393 		count -= MBLKL(chunk_mp);
394 		chunk_tail = chunk_mp;
395 		chunk_mp = chunk_mp->b_cont;
396 	}
397 	/* Split the chain, if needed */
398 	if (chunk_mp != NULL) {
399 		if (count > 0) {
400 			mblk_t	*split_mp = dupb(chunk_mp);
401 
402 			if (split_mp == NULL) {
403 				if (mdblk->b_cont == NULL) {
404 					mdblk->b_cont = chunk_head;
405 				} else  {
406 					SCTP_MSG_SET_CHUNKED(mdblk);
407 					ASSERT(chunk_head->b_next == NULL);
408 					chunk_head->b_next = mdblk->b_cont;
409 					mdblk->b_cont = chunk_head;
410 				}
411 				return;
412 			}
413 			if (chunk_tail != NULL) {
414 				chunk_tail->b_cont = split_mp;
415 				chunk_tail = chunk_tail->b_cont;
416 			} else {
417 				chunk_head = chunk_tail = split_mp;
418 			}
419 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
420 			chunk_mp->b_rptr = chunk_tail->b_wptr;
421 			count = 0;
422 		} else if (chunk_tail == NULL) {
423 			goto next;
424 		} else {
425 			chunk_tail->b_cont = NULL;
426 		}
427 	}
428 	/* Alloc chunk hdr, if needed */
429 	if (DB_REF(chunk_head) > 1 ||
430 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
431 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
432 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
433 			if (mdblk->b_cont == NULL) {
434 				if (chunk_mp != NULL)
435 					linkb(chunk_head, chunk_mp);
436 				mdblk->b_cont = chunk_head;
437 			} else {
438 				SCTP_MSG_SET_CHUNKED(mdblk);
439 				if (chunk_mp != NULL)
440 					linkb(chunk_head, chunk_mp);
441 				ASSERT(chunk_head->b_next == NULL);
442 				chunk_head->b_next = mdblk->b_cont;
443 				mdblk->b_cont = chunk_head;
444 			}
445 			return;
446 		}
447 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
448 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
449 		chunk_hdr->b_cont = chunk_head;
450 	} else {
451 		chunk_hdr = chunk_head;
452 		chunk_hdr->b_rptr -= sizeof (*sdc);
453 	}
454 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
455 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
456 	sdc->sdh_id = CHUNK_DATA;
457 	sdc->sdh_flags = 0;
458 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
459 	ASSERT(sdc->sdh_len);
460 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
461 	/*
462 	 * We defer assigning the SSN just before sending the chunk, else
463 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
464 	 * to send a Forward TSN to let the peer know. Some more comments
465 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
466 	 */
467 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
468 
469 	if (mdblk->b_cont == NULL) {
470 		mdblk->b_cont = chunk_hdr;
471 		SCTP_DATA_SET_BBIT(sdc);
472 	} else {
473 		mp = mdblk->b_cont;
474 		while (mp->b_next != NULL)
475 			mp = mp->b_next;
476 		mp->b_next = chunk_hdr;
477 	}
478 
479 	bytes_to_send -= (chunksize - count);
480 	if (chunk_mp != NULL) {
481 next:
482 		count = chunksize = fp->sfa_pmss - sizeof (*sdc);
483 		goto nextchunk;
484 	}
485 	SCTP_DATA_SET_EBIT(sdc);
486 	sctp->sctp_xmit_unsent = mdblk->b_next;
487 	if (mdblk->b_next == NULL) {
488 		sctp->sctp_xmit_unsent_tail = NULL;
489 	}
490 	mdblk->b_next = NULL;
491 
492 	if (sctp->sctp_xmit_tail == NULL) {
493 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
494 	} else {
495 		mp = sctp->sctp_xmit_tail;
496 		while (mp->b_next != NULL)
497 			mp = mp->b_next;
498 		mp->b_next = mdblk;
499 		mdblk->b_prev = mp;
500 	}
501 try_next:
502 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
503 		mdblk = sctp->sctp_xmit_unsent;
504 		fp1 = SCTP_CHUNK_DEST(mdblk);
505 		if (fp1 == NULL)
506 			fp1 = sctp->sctp_current;
507 		if (fp == fp1) {
508 			size_t len = MBLKL(mdblk->b_cont);
509 			if ((count > 0) &&
510 			    ((len > fp->sfa_pmss - sizeof (*sdc)) ||
511 				(len <= count))) {
512 				count -= sizeof (*sdc);
513 				count = chunksize = count - (count & 0x3);
514 			} else {
515 				count = chunksize = fp->sfa_pmss -
516 				    sizeof (*sdc);
517 			}
518 		} else {
519 			if (fp1->isv4)
520 				xtralen = sctp->sctp_hdr_len;
521 			else
522 				xtralen = sctp->sctp_hdr6_len;
523 			xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
524 			count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
525 			fp = fp1;
526 		}
527 		goto nextmsg;
528 	}
529 }
530 
531 void
532 sctp_free_msg(mblk_t *ump)
533 {
534 	mblk_t *mp, *nmp;
535 
536 	for (mp = ump->b_cont; mp; mp = nmp) {
537 		nmp = mp->b_next;
538 		mp->b_next = mp->b_prev = NULL;
539 		freemsg(mp);
540 	}
541 	ASSERT(!ump->b_prev);
542 	ump->b_next = NULL;
543 	freeb(ump);
544 }
545 
546 mblk_t *
547 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
548     int *error)
549 {
550 	int hdrlen;
551 	char *hdr;
552 	int isv4 = fp->isv4;
553 	sctp_stack_t	*sctps = sctp->sctp_sctps;
554 
555 	if (error != NULL)
556 		*error = 0;
557 
558 	if (isv4) {
559 		hdrlen = sctp->sctp_hdr_len;
560 		hdr = sctp->sctp_iphc;
561 	} else {
562 		hdrlen = sctp->sctp_hdr6_len;
563 		hdr = sctp->sctp_iphc6;
564 	}
565 	/*
566 	 * A null fp->ire could mean that the address is 'down'. Similarly,
567 	 * it is possible that the address went down, we tried to send an
568 	 * heartbeat and ended up setting fp->saddr as unspec because we
569 	 * didn't have any usable source address.  In either case
570 	 * sctp_get_ire() will try find an IRE, if available, and set
571 	 * the source address, if needed.  If we still don't have any
572 	 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and
573 	 * we return EHOSTUNREACH.
574 	 */
575 	if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
576 		sctp_get_ire(sctp, fp);
577 		if (fp->state == SCTP_FADDRS_UNREACH) {
578 			if (error != NULL)
579 				*error = EHOSTUNREACH;
580 			return (NULL);
581 		}
582 	}
583 	/* Copy in IP header. */
584 	if ((mp->b_rptr - mp->b_datap->db_base) <
585 	    (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 ||
586 	    !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) {
587 		mblk_t *nmp;
588 
589 		/*
590 		 * This can happen if IP headers are adjusted after
591 		 * data was moved into chunks, or during retransmission,
592 		 * or things like snoop is running.
593 		 */
594 		nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen,
595 		    CONN_CRED(sctp->sctp_connp));
596 		if (nmp == NULL) {
597 			if (error !=  NULL)
598 				*error = ENOMEM;
599 			return (NULL);
600 		}
601 		nmp->b_rptr += sctps->sctps_wroff_xtra;
602 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
603 		nmp->b_cont = mp;
604 		mp = nmp;
605 	} else {
606 		mp->b_rptr -= (hdrlen + sacklen);
607 		mblk_setcred(mp, CONN_CRED(sctp->sctp_connp));
608 	}
609 	bcopy(hdr, mp->b_rptr, hdrlen);
610 	if (sacklen) {
611 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
612 	}
613 	if (fp != sctp->sctp_current) {
614 		/* change addresses in header */
615 		if (isv4) {
616 			ipha_t *iph = (ipha_t *)mp->b_rptr;
617 
618 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
619 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
620 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
621 				    iph->ipha_src);
622 			} else if (sctp->sctp_bound_to_all) {
623 				iph->ipha_src = INADDR_ANY;
624 			}
625 		} else {
626 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
627 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
628 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
629 			} else if (sctp->sctp_bound_to_all) {
630 				V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src);
631 			}
632 		}
633 	}
634 	/*
635 	 * IP will not free this IRE if it is condemned.  SCTP needs to
636 	 * free it.
637 	 */
638 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
639 		IRE_REFRELE_NOTR(fp->ire);
640 		fp->ire = NULL;
641 	}
642 
643 	/* Stash the conn and ire ptr info for IP */
644 	SCTP_STASH_IPINFO(mp, fp->ire);
645 
646 	return (mp);
647 }
648 
649 /*
650  * SCTP requires every chunk to be padded so that the total length
651  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
652  * the specified pad length.
653  */
654 static mblk_t *
655 sctp_get_padding(int pad, sctp_stack_t *sctps)
656 {
657 	mblk_t *fill;
658 
659 	ASSERT(pad < SCTP_ALIGN);
660 	if ((fill = dupb(sctps->sctps_pad_mp)) != NULL) {
661 		fill->b_wptr += pad;
662 		return (fill);
663 	}
664 
665 	/*
666 	 * The memory saving path of reusing the sctp_pad_mp
667 	 * fails may be because it has been dupb() too
668 	 * many times (DBLK_REFMAX).  Use the memory consuming
669 	 * path of allocating the pad mblk.
670 	 */
671 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
672 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
673 		*(int32_t *)fill->b_rptr = 0;
674 		fill->b_wptr += pad;
675 	}
676 	return (fill);
677 }
678 
679 static mblk_t *
680 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
681 {
682 	mblk_t		*meta;
683 	mblk_t		*start_mp = NULL;
684 	mblk_t		*end_mp = NULL;
685 	mblk_t		*mp, *nmp;
686 	mblk_t		*fill;
687 	sctp_data_hdr_t	*sdh;
688 	int		msglen;
689 	int		extra;
690 	sctp_msg_hdr_t	*msg_hdr;
691 	sctp_faddr_t	*old_fp = NULL;
692 	sctp_faddr_t	*chunk_fp;
693 	sctp_stack_t	*sctps = sctp->sctp_sctps;
694 
695 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
696 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
697 		if (SCTP_IS_MSG_ABANDONED(meta) ||
698 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
699 			continue;
700 		}
701 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
702 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
703 				/*
704 				 * Use the same peer address to do fast
705 				 * retransmission.  If the original peer
706 				 * address is dead, switch to the current
707 				 * one.  Record the old one so that we
708 				 * will pick the chunks sent to the old
709 				 * one for fast retransmission.
710 				 */
711 				chunk_fp = SCTP_CHUNK_DEST(mp);
712 				if (*fp == NULL) {
713 					*fp = chunk_fp;
714 					if ((*fp)->state != SCTP_FADDRS_ALIVE) {
715 						old_fp = *fp;
716 						*fp = sctp->sctp_current;
717 					}
718 				} else if (old_fp == NULL && *fp != chunk_fp) {
719 					continue;
720 				} else if (old_fp != NULL &&
721 				    old_fp != chunk_fp) {
722 					continue;
723 				}
724 
725 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
726 				msglen = ntohs(sdh->sdh_len);
727 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
728 					extra = SCTP_ALIGN - extra;
729 				}
730 
731 				/*
732 				 * We still return at least the first message
733 				 * even if that message cannot fit in as
734 				 * PMTU may have changed.
735 				 */
736 				if (*total + msglen + extra >
737 				    (*fp)->sfa_pmss && start_mp != NULL) {
738 					return (start_mp);
739 				}
740 				if ((nmp = dupmsg(mp)) == NULL)
741 					return (start_mp);
742 				if (extra > 0) {
743 					fill = sctp_get_padding(extra, sctps);
744 					if (fill != NULL) {
745 						linkb(nmp, fill);
746 					} else {
747 						return (start_mp);
748 					}
749 				}
750 				BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans);
751 				BUMP_LOCAL(sctp->sctp_rxtchunks);
752 				SCTP_CHUNK_CLEAR_REXMIT(mp);
753 				if (start_mp == NULL) {
754 					start_mp = nmp;
755 				} else {
756 					linkb(end_mp, nmp);
757 				}
758 				end_mp = nmp;
759 				*total += msglen + extra;
760 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
761 				    "tsn %x\n", sdh->sdh_tsn));
762 			}
763 		}
764 	}
765 	/* Clear the flag as there is no more message to be fast rexmitted. */
766 	sctp->sctp_chk_fast_rexmit = B_FALSE;
767 	return (start_mp);
768 }
769 
770 /* A debug function just to make sure that a mblk chain is not broken */
771 #ifdef	DEBUG
772 static boolean_t
773 sctp_verify_chain(mblk_t *head, mblk_t *tail)
774 {
775 	mblk_t	*mp = head;
776 
777 	if (head == NULL || tail == NULL)
778 		return (B_TRUE);
779 	while (mp != NULL) {
780 		if (mp == tail)
781 			return (B_TRUE);
782 		mp = mp->b_next;
783 	}
784 	return (B_FALSE);
785 }
786 #endif
787 
788 /*
789  * Gets the next unsent chunk to transmit. Messages that are abandoned are
790  * skipped. A message can be abandoned if it has a non-zero timetolive and
791  * transmission has not yet started or if it is a partially reliable
792  * message and its time is up (assuming we are PR-SCTP aware).
793  * 'cansend' is used to determine if need to try and chunkify messages from
794  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
795  * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend'
796  * will be set to 0.
797  */
798 mblk_t *
799 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
800     int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp)
801 {
802 	mblk_t		*mp1;
803 	sctp_msg_hdr_t	*msg_hdr;
804 	mblk_t		*tmp_meta;
805 	sctp_faddr_t	*fp1;
806 
807 	ASSERT(error != NULL && mp != NULL);
808 	*error = 0;
809 
810 	ASSERT(sctp->sctp_current != NULL);
811 
812 chunkified:
813 	while (meta != NULL) {
814 		tmp_meta = meta->b_next;
815 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
816 		mp1 = meta->b_cont;
817 		if (SCTP_IS_MSG_ABANDONED(meta))
818 			goto next_msg;
819 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
820 			while (mp1 != NULL) {
821 				if (SCTP_CHUNK_CANSEND(mp1)) {
822 					*mp = mp1;
823 #ifdef	DEBUG
824 					ASSERT(sctp_verify_chain(
825 					    sctp->sctp_xmit_head, meta));
826 #endif
827 					return (meta);
828 				}
829 				mp1 = mp1->b_next;
830 			}
831 			goto next_msg;
832 		}
833 		/*
834 		 * If we come here and the first chunk is sent, then we
835 		 * we are PR-SCTP aware, in which case if the cumulative
836 		 * TSN has moved upto or beyond the first chunk (which
837 		 * means all the previous messages have been cumulative
838 		 * SACK'd), then we send a Forward TSN with the last
839 		 * chunk that was sent in this message. If we can't send
840 		 * a Forward TSN because previous non-abandoned messages
841 		 * have not been acked then we will defer the Forward TSN
842 		 * to sctp_rexmit() or sctp_cumack().
843 		 */
844 		if (SCTP_CHUNK_ISSENT(mp1)) {
845 			*error = sctp_check_abandoned_msg(sctp, meta);
846 			if (*error != 0) {
847 #ifdef	DEBUG
848 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
849 				    sctp->sctp_xmit_tail));
850 #endif
851 				return (NULL);
852 			}
853 			goto next_msg;
854 		}
855 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
856 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
857 		if (meta->b_prev == NULL) {
858 			ASSERT(sctp->sctp_xmit_head == meta);
859 			sctp->sctp_xmit_head = tmp_meta;
860 			if (sctp->sctp_xmit_tail == meta)
861 				sctp->sctp_xmit_tail = tmp_meta;
862 			meta->b_next = NULL;
863 			if (tmp_meta != NULL)
864 				tmp_meta->b_prev = NULL;
865 		} else if (meta->b_next == NULL) {
866 			if (sctp->sctp_xmit_tail == meta)
867 				sctp->sctp_xmit_tail = meta->b_prev;
868 			meta->b_prev->b_next = NULL;
869 			meta->b_prev = NULL;
870 		} else {
871 			meta->b_prev->b_next = tmp_meta;
872 			tmp_meta->b_prev = meta->b_prev;
873 			if (sctp->sctp_xmit_tail == meta)
874 				sctp->sctp_xmit_tail = tmp_meta;
875 			meta->b_prev = NULL;
876 			meta->b_next = NULL;
877 		}
878 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
879 		/*
880 		 * Update ULP the amount of queued data, which is
881 		 * sent-unack'ed + unsent.
882 		 */
883 		if (!SCTP_IS_DETACHED(sctp)) {
884 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
885 			    sctp->sctp_unacked + sctp->sctp_unsent);
886 		}
887 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
888 next_msg:
889 		meta = tmp_meta;
890 	}
891 	/* chunkify, if needed */
892 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
893 		ASSERT(sctp->sctp_unsent > 0);
894 		if (fp == NULL) {
895 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
896 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
897 				fp = sctp->sctp_current;
898 		} else {
899 			/*
900 			 * If user specified destination, try to honor that.
901 			 */
902 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
903 			if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
904 			    fp1 != fp) {
905 				goto chunk_done;
906 			}
907 		}
908 		sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend);
909 		if ((meta = sctp->sctp_xmit_tail) == NULL)
910 			goto chunk_done;
911 		/*
912 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
913 		 * new chunk(s) to the tail, so we need to skip the
914 		 * sctp_xmit_tail, which would have already been processed.
915 		 * This could happen when there is unacked chunks, but
916 		 * nothing new to send.
917 		 * When sctp_chunkify() is called when the transmit queue
918 		 * is empty then we need to start from sctp_xmit_tail.
919 		 */
920 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
921 #ifdef	DEBUG
922 			mp1 = sctp->sctp_xmit_tail->b_cont;
923 			while (mp1 != NULL) {
924 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
925 				mp1 = mp1->b_next;
926 			}
927 #endif
928 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
929 				goto chunk_done;
930 		}
931 		goto chunkified;
932 	}
933 chunk_done:
934 #ifdef	DEBUG
935 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
936 #endif
937 	return (NULL);
938 }
939 
940 void
941 sctp_fast_rexmit(sctp_t *sctp)
942 {
943 	mblk_t		*mp, *head;
944 	int		pktlen = 0;
945 	sctp_faddr_t	*fp = NULL;
946 	sctp_stack_t	*sctps = sctp->sctp_sctps;
947 
948 	ASSERT(sctp->sctp_xmit_head != NULL);
949 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
950 	if (mp == NULL) {
951 		SCTP_KSTAT(sctps, sctp_fr_not_found);
952 		return;
953 	}
954 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
955 		freemsg(mp);
956 		SCTP_KSTAT(sctps, sctp_fr_add_hdr);
957 		return;
958 	}
959 	if ((pktlen > fp->sfa_pmss) && fp->isv4) {
960 		ipha_t *iph = (ipha_t *)head->b_rptr;
961 
962 		iph->ipha_fragment_offset_and_flags = 0;
963 	}
964 
965 	sctp_set_iplen(sctp, head);
966 	sctp_add_sendq(sctp, head);
967 	sctp->sctp_active = fp->lastactive = lbolt64;
968 }
969 
970 void
971 sctp_output(sctp_t *sctp)
972 {
973 	mblk_t			*mp = NULL;
974 	mblk_t			*nmp;
975 	mblk_t			*head;
976 	mblk_t			*meta = sctp->sctp_xmit_tail;
977 	mblk_t			*fill = NULL;
978 	uint16_t 		chunklen;
979 	uint32_t 		cansend;
980 	int32_t			seglen;
981 	int32_t			xtralen;
982 	int32_t			sacklen;
983 	int32_t			pad = 0;
984 	int32_t			pathmax;
985 	int			extra;
986 	int64_t			now = lbolt64;
987 	sctp_faddr_t		*fp;
988 	sctp_faddr_t		*lfp;
989 	sctp_data_hdr_t		*sdc;
990 	int			error;
991 	boolean_t		notsent = B_TRUE;
992 	sctp_stack_t	*sctps = sctp->sctp_sctps;
993 
994 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
995 		sacklen = 0;
996 	} else {
997 		/* send a SACK chunk */
998 		sacklen = sizeof (sctp_chunk_hdr_t) +
999 		    sizeof (sctp_sack_chunk_t) +
1000 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1001 		lfp = sctp->sctp_lastdata;
1002 		ASSERT(lfp != NULL);
1003 		if (lfp->state != SCTP_FADDRS_ALIVE)
1004 			lfp = sctp->sctp_current;
1005 	}
1006 
1007 	cansend = sctp->sctp_frwnd;
1008 	if (sctp->sctp_unsent < cansend)
1009 		cansend = sctp->sctp_unsent;
1010 	if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
1011 	    sctp->sctp_unacked &&
1012 	    (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
1013 	    !sctp->sctp_ndelay) {
1014 		head = NULL;
1015 		fp = sctp->sctp_current;
1016 		goto unsent_data;
1017 	}
1018 	if (meta != NULL)
1019 		mp = meta->b_cont;
1020 	while (cansend > 0) {
1021 		pad = 0;
1022 
1023 		/*
1024 		 * Find first segment eligible for transmit.
1025 		 */
1026 		while (mp != NULL) {
1027 			if (SCTP_CHUNK_CANSEND(mp))
1028 				break;
1029 			mp = mp->b_next;
1030 		}
1031 		if (mp == NULL) {
1032 			meta = sctp_get_msg_to_send(sctp, &mp,
1033 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
1034 			    cansend, NULL);
1035 			if (error != 0 || meta == NULL) {
1036 				head = NULL;
1037 				fp = sctp->sctp_current;
1038 				goto unsent_data;
1039 			}
1040 			sctp->sctp_xmit_tail =  meta;
1041 		}
1042 
1043 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1044 		seglen = ntohs(sdc->sdh_len);
1045 		xtralen = sizeof (*sdc);
1046 		chunklen = seglen - xtralen;
1047 
1048 		/*
1049 		 * Check rwnd.
1050 		 */
1051 		if (chunklen > cansend) {
1052 			head = NULL;
1053 			fp = SCTP_CHUNK_DEST(meta);
1054 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1055 				fp = sctp->sctp_current;
1056 			goto unsent_data;
1057 		}
1058 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1059 			extra = SCTP_ALIGN - extra;
1060 
1061 		/*
1062 		 * Pick destination address, and check cwnd.
1063 		 */
1064 		if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
1065 		    (seglen + sacklen + extra <= lfp->sfa_pmss)) {
1066 			/*
1067 			 * Only include SACK chunk if it can be bundled
1068 			 * with a data chunk, and sent to sctp_lastdata.
1069 			 */
1070 			pathmax = lfp->cwnd - lfp->suna;
1071 
1072 			fp = lfp;
1073 			if ((nmp = dupmsg(mp)) == NULL) {
1074 				head = NULL;
1075 				goto unsent_data;
1076 			}
1077 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1078 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1079 			    &error);
1080 			if (head == NULL) {
1081 				/*
1082 				 * If none of the source addresses are
1083 				 * available (i.e error == EHOSTUNREACH),
1084 				 * pretend we have sent the data. We will
1085 				 * eventually time out trying to retramsmit
1086 				 * the data if the interface never comes up.
1087 				 * If we have already sent some stuff (i.e.,
1088 				 * notsent is B_FALSE) then we are fine, else
1089 				 * just mark this packet as sent.
1090 				 */
1091 				if (notsent && error == EHOSTUNREACH) {
1092 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1093 					    fp, chunklen, meta);
1094 				}
1095 				freemsg(nmp);
1096 				SCTP_KSTAT(sctps, sctp_output_failed);
1097 				goto unsent_data;
1098 			}
1099 			seglen += sacklen;
1100 			xtralen += sacklen;
1101 			sacklen = 0;
1102 		} else {
1103 			fp = SCTP_CHUNK_DEST(meta);
1104 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1105 				fp = sctp->sctp_current;
1106 			/*
1107 			 * If we haven't sent data to this destination for
1108 			 * a while, do slow start again.
1109 			 */
1110 			if (now - fp->lastactive > fp->rto) {
1111 				fp->cwnd = sctps->sctps_slow_start_after_idle *
1112 				    fp->sfa_pmss;
1113 			}
1114 
1115 			pathmax = fp->cwnd - fp->suna;
1116 			if (seglen + extra > pathmax) {
1117 				head = NULL;
1118 				goto unsent_data;
1119 			}
1120 			if ((nmp = dupmsg(mp)) == NULL) {
1121 				head = NULL;
1122 				goto unsent_data;
1123 			}
1124 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1125 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1126 			if (head == NULL) {
1127 				/*
1128 				 * If none of the source addresses are
1129 				 * available (i.e error == EHOSTUNREACH),
1130 				 * pretend we have sent the data. We will
1131 				 * eventually time out trying to retramsmit
1132 				 * the data if the interface never comes up.
1133 				 * If we have already sent some stuff (i.e.,
1134 				 * notsent is B_FALSE) then we are fine, else
1135 				 * just mark this packet as sent.
1136 				 */
1137 				if (notsent && error == EHOSTUNREACH) {
1138 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1139 					    fp, chunklen, meta);
1140 				}
1141 				freemsg(nmp);
1142 				SCTP_KSTAT(sctps, sctp_output_failed);
1143 				goto unsent_data;
1144 			}
1145 		}
1146 		fp->lastactive = now;
1147 		if (pathmax > fp->sfa_pmss)
1148 			pathmax = fp->sfa_pmss;
1149 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1150 		mp = mp->b_next;
1151 
1152 		/* Use this chunk to measure RTT? */
1153 		if (sctp->sctp_out_time == 0) {
1154 			sctp->sctp_out_time = now;
1155 			sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
1156 			ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn));
1157 		}
1158 		if (extra > 0) {
1159 			fill = sctp_get_padding(extra, sctps);
1160 			if (fill != NULL) {
1161 				linkb(head, fill);
1162 				pad = extra;
1163 				seglen += extra;
1164 			} else {
1165 				goto unsent_data;
1166 			}
1167 		}
1168 		/* See if we can bundle more. */
1169 		while (seglen < pathmax) {
1170 			int32_t		new_len;
1171 			int32_t		new_xtralen;
1172 
1173 			while (mp != NULL) {
1174 				if (SCTP_CHUNK_CANSEND(mp))
1175 					break;
1176 				mp = mp->b_next;
1177 			}
1178 			if (mp == NULL) {
1179 				meta = sctp_get_msg_to_send(sctp, &mp,
1180 				    meta->b_next, &error, seglen,
1181 				    (seglen - xtralen) >= cansend ? 0 :
1182 				    cansend - seglen, fp);
1183 				if (error != 0 || meta == NULL)
1184 					break;
1185 				sctp->sctp_xmit_tail =  meta;
1186 			}
1187 			ASSERT(mp != NULL);
1188 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1189 			    fp != SCTP_CHUNK_DEST(meta)) {
1190 				break;
1191 			}
1192 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1193 			chunklen = ntohs(sdc->sdh_len);
1194 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
1195 				extra = SCTP_ALIGN - extra;
1196 
1197 			new_len = seglen + chunklen;
1198 			new_xtralen = xtralen + sizeof (*sdc);
1199 			chunklen -= sizeof (*sdc);
1200 
1201 			if (new_len - new_xtralen > cansend ||
1202 			    new_len + extra > pathmax) {
1203 				break;
1204 			}
1205 			if ((nmp = dupmsg(mp)) == NULL)
1206 				break;
1207 			if (extra > 0) {
1208 				fill = sctp_get_padding(extra, sctps);
1209 				if (fill != NULL) {
1210 					pad += extra;
1211 					new_len += extra;
1212 					linkb(nmp, fill);
1213 				} else {
1214 					freemsg(nmp);
1215 					break;
1216 				}
1217 			}
1218 			seglen = new_len;
1219 			xtralen = new_xtralen;
1220 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1221 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1222 			linkb(head, nmp);
1223 			mp = mp->b_next;
1224 		}
1225 		if ((seglen > fp->sfa_pmss) && fp->isv4) {
1226 			ipha_t *iph = (ipha_t *)head->b_rptr;
1227 
1228 			/*
1229 			 * Path MTU is different from what we thought it would
1230 			 * be when we created chunks, or IP headers have grown.
1231 			 * Need to clear the DF bit.
1232 			 */
1233 			iph->ipha_fragment_offset_and_flags = 0;
1234 		}
1235 		/* xmit segment */
1236 		ASSERT(cansend >= seglen - pad - xtralen);
1237 		cansend -= (seglen - pad - xtralen);
1238 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1239 		    "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1240 		    seglen - xtralen, ntohl(sdc->sdh_tsn),
1241 		    ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1242 		    cansend, sctp->sctp_lastack_rxd));
1243 		sctp_set_iplen(sctp, head);
1244 		sctp_add_sendq(sctp, head);
1245 		/* arm rto timer (if not set) */
1246 		if (!fp->timer_running)
1247 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1248 		notsent = B_FALSE;
1249 	}
1250 	sctp->sctp_active = now;
1251 	return;
1252 unsent_data:
1253 	/* arm persist timer (if rto timer not set) */
1254 	if (!fp->timer_running)
1255 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1256 	if (head != NULL)
1257 		freemsg(head);
1258 }
1259 
1260 /*
1261  * The following two functions initialize and destroy the cache
1262  * associated with the sets used for PR-SCTP.
1263  */
1264 void
1265 sctp_ftsn_sets_init(void)
1266 {
1267 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1268 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1269 	    NULL, 0);
1270 }
1271 
1272 void
1273 sctp_ftsn_sets_fini(void)
1274 {
1275 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1276 }
1277 
1278 
1279 /* Free PR-SCTP sets */
1280 void
1281 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1282 {
1283 	sctp_ftsn_set_t *p;
1284 
1285 	while (s != NULL) {
1286 		p = s->next;
1287 		s->next = NULL;
1288 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1289 		s = p;
1290 	}
1291 }
1292 
1293 /*
1294  * Given a message meta block, meta, this routine creates or modifies
1295  * the set that will be used to generate a Forward TSN chunk. If the
1296  * entry for stream id, sid, for this message already exists, the
1297  * sequence number, ssn, is updated if it is greater than the existing
1298  * one. If an entry for this sid does not exist, one is created if
1299  * the size does not exceed fp->sfa_pmss. We return false in case
1300  * or an error.
1301  */
1302 boolean_t
1303 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1304     uint_t *nsets, uint32_t *slen)
1305 {
1306 	sctp_ftsn_set_t		*p;
1307 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1308 	uint16_t		sid = htons(msg_hdr->smh_sid);
1309 	/* msg_hdr->smh_ssn is already in NBO */
1310 	uint16_t		ssn = msg_hdr->smh_ssn;
1311 
1312 	ASSERT(s != NULL && nsets != NULL);
1313 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1314 
1315 	if (*s == NULL) {
1316 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
1317 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1318 		if (*s == NULL)
1319 			return (B_FALSE);
1320 		(*s)->ftsn_entries.ftsn_sid = sid;
1321 		(*s)->ftsn_entries.ftsn_ssn = ssn;
1322 		(*s)->next = NULL;
1323 		*nsets = 1;
1324 		*slen += sizeof (uint32_t);
1325 		return (B_TRUE);
1326 	}
1327 	for (p = *s; p->next != NULL; p = p->next) {
1328 		if (p->ftsn_entries.ftsn_sid == sid) {
1329 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1330 				p->ftsn_entries.ftsn_ssn = ssn;
1331 			return (B_TRUE);
1332 		}
1333 	}
1334 	/* the last one */
1335 	if (p->ftsn_entries.ftsn_sid == sid) {
1336 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1337 			p->ftsn_entries.ftsn_ssn = ssn;
1338 	} else {
1339 		if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
1340 			return (B_FALSE);
1341 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1342 		    KM_NOSLEEP);
1343 		if (p->next == NULL)
1344 			return (B_FALSE);
1345 		p = p->next;
1346 		p->ftsn_entries.ftsn_sid = sid;
1347 		p->ftsn_entries.ftsn_ssn = ssn;
1348 		p->next = NULL;
1349 		(*nsets)++;
1350 		*slen += sizeof (uint32_t);
1351 	}
1352 	return (B_TRUE);
1353 }
1354 
1355 /*
1356  * Given a set of stream id - sequence number pairs, this routing creates
1357  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1358  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1359  * will add the IP/SCTP header.
1360  */
1361 mblk_t *
1362 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1363     uint_t nsets, uint32_t seglen)
1364 {
1365 	mblk_t			*ftsn_mp;
1366 	sctp_chunk_hdr_t	*ch_hdr;
1367 	uint32_t		*advtsn;
1368 	uint16_t		schlen;
1369 	size_t			xtralen;
1370 	ftsn_entry_t		*ftsn_entry;
1371 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1372 
1373 	seglen += sizeof (sctp_chunk_hdr_t);
1374 	if (fp->isv4)
1375 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
1376 	else
1377 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
1378 	ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp));
1379 	if (ftsn_mp == NULL)
1380 		return (NULL);
1381 	ftsn_mp->b_rptr += xtralen;
1382 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1383 
1384 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1385 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1386 	ch_hdr->sch_flags = 0;
1387 	/*
1388 	 * The cast here should not be an issue since seglen is
1389 	 * the length of the Forward TSN chunk.
1390 	 */
1391 	schlen = (uint16_t)seglen;
1392 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1393 
1394 	advtsn = (uint32_t *)(ch_hdr + 1);
1395 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1396 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1397 	while (nsets > 0) {
1398 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1399 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1400 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1401 		ftsn_entry++;
1402 		sets = sets->next;
1403 		nsets--;
1404 	}
1405 	return (ftsn_mp);
1406 }
1407 
1408 /*
1409  * Given a starting message, the routine steps through all the
1410  * messages whose TSN is less than sctp->sctp_adv_pap and creates
1411  * ftsn sets. The ftsn sets is then used to create an Forward TSN
1412  * chunk. All the messages, that have chunks that are included in the
1413  * ftsn sets, are flagged abandonded. If a message is partially sent
1414  * and is deemed abandoned, all remaining unsent chunks are marked
1415  * abandoned and are deducted from sctp_unsent.
1416  */
1417 void
1418 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1419     sctp_faddr_t *fp, uint32_t *seglen)
1420 {
1421 	mblk_t		*mp1 = mp;
1422 	mblk_t		*mp_head = mp;
1423 	mblk_t		*meta_head = meta;
1424 	mblk_t		*head;
1425 	sctp_ftsn_set_t	*sets = NULL;
1426 	uint_t		nsets = 0;
1427 	uint16_t	clen;
1428 	sctp_data_hdr_t	*sdc;
1429 	uint32_t	sacklen;
1430 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1431 	uint32_t	unsent = 0;
1432 	boolean_t	ubit;
1433 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1434 
1435 	*seglen = sizeof (uint32_t);
1436 
1437 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1438 	while (meta != NULL &&
1439 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1440 		/*
1441 		 * Skip adding FTSN sets for un-ordered messages as they do
1442 		 * not have SSNs.
1443 		 */
1444 		ubit = SCTP_DATA_GET_UBIT(sdc);
1445 		if (!ubit &&
1446 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1447 			meta = NULL;
1448 			sctp->sctp_adv_pap = adv_pap;
1449 			goto ftsn_done;
1450 		}
1451 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1452 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1453 			adv_pap = ntohl(sdc->sdh_tsn);
1454 			mp1 = mp1->b_next;
1455 		}
1456 		meta = meta->b_next;
1457 		if (meta != NULL) {
1458 			mp1 = meta->b_cont;
1459 			if (!SCTP_CHUNK_ISSENT(mp1))
1460 				break;
1461 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1462 		}
1463 	}
1464 ftsn_done:
1465 	/*
1466 	 * Can't compare with sets == NULL, since we don't add any
1467 	 * sets for un-ordered messages.
1468 	 */
1469 	if (meta == meta_head)
1470 		return;
1471 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1472 	sctp_free_ftsn_set(sets);
1473 	if (*nmp == NULL)
1474 		return;
1475 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1476 		sacklen = 0;
1477 	} else {
1478 		sacklen = sizeof (sctp_chunk_hdr_t) +
1479 		    sizeof (sctp_sack_chunk_t) +
1480 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1481 		if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1482 			/* piggybacked SACK doesn't fit */
1483 			sacklen = 0;
1484 		} else {
1485 			fp = sctp->sctp_lastdata;
1486 		}
1487 	}
1488 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1489 	if (head == NULL) {
1490 		freemsg(*nmp);
1491 		*nmp = NULL;
1492 		SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1493 		return;
1494 	}
1495 	*seglen += sacklen;
1496 	*nmp = head;
1497 
1498 	/*
1499 	 * XXXNeed to optimise this, the reason it is done here is so
1500 	 * that we don't have to undo in case of failure.
1501 	 */
1502 	mp1 = mp_head;
1503 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1504 	while (meta_head != NULL &&
1505 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1506 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
1507 			SCTP_MSG_SET_ABANDONED(meta_head);
1508 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1509 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1510 			if (!SCTP_CHUNK_ISACKED(mp1)) {
1511 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1512 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1513 				    meta_head);
1514 			}
1515 			mp1 = mp1->b_next;
1516 		}
1517 		while (mp1 != NULL) {
1518 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1519 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
1520 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1521 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1522 				SCTP_ABANDON_CHUNK(mp1);
1523 			}
1524 			mp1 = mp1->b_next;
1525 		}
1526 		meta_head = meta_head->b_next;
1527 		if (meta_head != NULL) {
1528 			mp1 = meta_head->b_cont;
1529 			if (!SCTP_CHUNK_ISSENT(mp1))
1530 				break;
1531 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1532 		}
1533 	}
1534 	if (unsent > 0) {
1535 		ASSERT(sctp->sctp_unsent >= unsent);
1536 		sctp->sctp_unsent -= unsent;
1537 		/*
1538 		 * Update ULP the amount of queued data, which is
1539 		 * sent-unack'ed + unsent.
1540 		 */
1541 		if (!SCTP_IS_DETACHED(sctp)) {
1542 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
1543 			    sctp->sctp_unacked + sctp->sctp_unsent);
1544 		}
1545 	}
1546 }
1547 
1548 /*
1549  * This function steps through messages starting at meta and checks if
1550  * the message is abandoned. It stops when it hits an unsent chunk or
1551  * a message that has all its chunk acked. This is the only place
1552  * where the sctp_adv_pap is moved forward to indicated abandoned
1553  * messages.
1554  */
1555 void
1556 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1557 {
1558 	uint32_t	tsn = sctp->sctp_adv_pap;
1559 	sctp_data_hdr_t	*sdc;
1560 	sctp_msg_hdr_t	*msg_hdr;
1561 
1562 	ASSERT(mp != NULL);
1563 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1564 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1565 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1566 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
1567 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1568 		return;
1569 	}
1570 	while (meta != NULL) {
1571 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1572 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1573 			tsn = ntohl(sdc->sdh_tsn);
1574 			mp = mp->b_next;
1575 		}
1576 		if (mp != NULL)
1577 			break;
1578 		/*
1579 		 * We continue checking for successive messages only if there
1580 		 * is a chunk marked for retransmission. Else, we might
1581 		 * end up sending FTSN prematurely for chunks that have been
1582 		 * sent, but not yet acked.
1583 		 */
1584 		if ((meta = meta->b_next) != NULL) {
1585 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1586 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
1587 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1588 				break;
1589 			}
1590 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1591 				if (!SCTP_CHUNK_ISSENT(mp)) {
1592 					sctp->sctp_adv_pap = tsn;
1593 					return;
1594 				}
1595 				if (SCTP_CHUNK_WANT_REXMIT(mp))
1596 					break;
1597 			}
1598 			if (mp == NULL)
1599 				break;
1600 		}
1601 	}
1602 	sctp->sctp_adv_pap = tsn;
1603 }
1604 
1605 
1606 /*
1607  * Determine if we should bundle a data chunk with the chunk being
1608  * retransmitted.  We bundle if
1609  *
1610  * - the chunk is sent to the same destination and unack'ed.
1611  *
1612  * OR
1613  *
1614  * - the chunk is unsent, i.e. new data.
1615  */
1616 #define	SCTP_CHUNK_RX_CANBUNDLE(mp, fp)					\
1617 	(!SCTP_CHUNK_ABANDONED((mp)) && 				\
1618 	((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) &&	\
1619 	!SCTP_CHUNK_ISACKED(mp))) ||					\
1620 	(((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1621 	SCTP_CHUNK_FLAG_SENT)))
1622 
1623 /*
1624  * Retransmit first segment which hasn't been acked with cumtsn or send
1625  * a Forward TSN chunk, if appropriate.
1626  */
1627 void
1628 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1629 {
1630 	mblk_t		*mp;
1631 	mblk_t		*nmp = NULL;
1632 	mblk_t		*head;
1633 	mblk_t		*meta = sctp->sctp_xmit_head;
1634 	mblk_t		*fill;
1635 	uint32_t	seglen = 0;
1636 	uint32_t	sacklen;
1637 	uint16_t	chunklen;
1638 	int		extra;
1639 	sctp_data_hdr_t	*sdc;
1640 	sctp_faddr_t	*fp;
1641 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1642 	boolean_t	do_ftsn = B_FALSE;
1643 	boolean_t	ftsn_check = B_TRUE;
1644 	uint32_t	first_ua_tsn;
1645 	sctp_msg_hdr_t	*mhdr;
1646 	uint32_t	tot_wnd;
1647 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1648 
1649 	while (meta != NULL) {
1650 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1651 			uint32_t	tsn;
1652 
1653 			if (!SCTP_CHUNK_ISSENT(mp))
1654 				goto window_probe;
1655 			/*
1656 			 * We break in the following cases -
1657 			 *
1658 			 *	if the advanced peer ack point includes the next
1659 			 *	chunk to be retransmited - possibly the Forward
1660 			 * 	TSN was lost.
1661 			 *
1662 			 *	if we are PRSCTP aware and the next chunk to be
1663 			 *	retransmitted is now abandoned
1664 			 *
1665 			 *	if the next chunk to be retransmitted is for
1666 			 *	the dest on which the timer went off. (this
1667 			 *	message is not abandoned).
1668 			 *
1669 			 * We check for Forward TSN only for the first
1670 			 * eligible chunk to be retransmitted. The reason
1671 			 * being if the first eligible chunk is skipped (say
1672 			 * it was sent to a destination other than oldfp)
1673 			 * then we cannot advance the cum TSN via Forward
1674 			 * TSN chunk.
1675 			 *
1676 			 * Also, ftsn_check is B_TRUE only for the first
1677 			 * eligible chunk, it  will be B_FALSE for all
1678 			 * subsequent candidate messages for retransmission.
1679 			 */
1680 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1681 			tsn = ntohl(sdc->sdh_tsn);
1682 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1683 				if (sctp->sctp_prsctp_aware && ftsn_check) {
1684 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1685 						ASSERT(sctp->sctp_prsctp_aware);
1686 						do_ftsn = B_TRUE;
1687 						goto out;
1688 					} else {
1689 						sctp_check_adv_ack_pt(sctp,
1690 						    meta, mp);
1691 						if (SEQ_GT(sctp->sctp_adv_pap,
1692 						    adv_pap)) {
1693 							do_ftsn = B_TRUE;
1694 							goto out;
1695 						}
1696 					}
1697 					ftsn_check = B_FALSE;
1698 				}
1699 				if (SCTP_CHUNK_DEST(mp) == oldfp)
1700 					goto out;
1701 			}
1702 		}
1703 		meta = meta->b_next;
1704 		if (meta != NULL && sctp->sctp_prsctp_aware) {
1705 			mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1706 
1707 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1708 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1709 				meta = meta->b_next;
1710 			}
1711 		}
1712 	}
1713 window_probe:
1714 	/*
1715 	 * Retransmit fired for a destination which didn't have
1716 	 * any unacked data pending.
1717 	 */
1718 	if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
1719 		/*
1720 		 * Send a window probe. Inflate frwnd to allow
1721 		 * sending one segment.
1722 		 */
1723 		if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc)))
1724 			sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
1725 		/* next TSN to send */
1726 		sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
1727 		sctp_output(sctp);
1728 		/* Last sent TSN */
1729 		sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1730 		ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
1731 		sctp->sctp_zero_win_probe = B_TRUE;
1732 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
1733 	}
1734 	return;
1735 out:
1736 	/*
1737 	 * If were are probing for zero window, don't adjust retransmission
1738 	 * variables, but the timer is still backed off.
1739 	 */
1740 	if (sctp->sctp_zero_win_probe) {
1741 		mblk_t	*pkt;
1742 		uint_t	pkt_len;
1743 
1744 		/*
1745 		 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
1746 		 * and sctp_rxt_maxtsn will specify the ZWP packet.
1747 		 */
1748 		fp = oldfp;
1749 		if (oldfp->state != SCTP_FADDRS_ALIVE)
1750 			fp = sctp_rotate_faddr(sctp, oldfp);
1751 		pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
1752 		if (pkt != NULL) {
1753 			ASSERT(pkt_len <= fp->sfa_pmss);
1754 			sctp_set_iplen(sctp, pkt);
1755 			sctp_add_sendq(sctp, pkt);
1756 		} else {
1757 			SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
1758 		}
1759 		oldfp->strikes++;
1760 		sctp->sctp_strikes++;
1761 		SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
1762 		if (oldfp != fp && oldfp->suna != 0)
1763 			SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto);
1764 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1765 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
1766 		return;
1767 	}
1768 
1769 	/*
1770 	 * Enter slowstart for this destination
1771 	 */
1772 	oldfp->ssthresh = oldfp->cwnd / 2;
1773 	if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
1774 		oldfp->ssthresh = 2 * oldfp->sfa_pmss;
1775 	oldfp->cwnd = oldfp->sfa_pmss;
1776 	oldfp->pba = 0;
1777 	fp = sctp_rotate_faddr(sctp, oldfp);
1778 	ASSERT(fp != NULL);
1779 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1780 
1781 	first_ua_tsn = ntohl(sdc->sdh_tsn);
1782 	if (do_ftsn) {
1783 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1784 		if (nmp == NULL) {
1785 			sctp->sctp_adv_pap = adv_pap;
1786 			goto restart_timer;
1787 		}
1788 		head = nmp;
1789 		/*
1790 		 * Move to the next unabandoned chunk. XXXCheck if meta will
1791 		 * always be marked abandoned.
1792 		 */
1793 		while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
1794 			meta = meta->b_next;
1795 		if (meta != NULL)
1796 			mp = mp->b_cont;
1797 		else
1798 			mp = NULL;
1799 		goto try_bundle;
1800 	}
1801 	seglen = ntohs(sdc->sdh_len);
1802 	chunklen = seglen - sizeof (*sdc);
1803 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1804 		extra = SCTP_ALIGN - extra;
1805 
1806 	/* Find out if we need to piggyback SACK. */
1807 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1808 		sacklen = 0;
1809 	} else {
1810 		sacklen = sizeof (sctp_chunk_hdr_t) +
1811 		    sizeof (sctp_sack_chunk_t) +
1812 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1813 		if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1814 			/* piggybacked SACK doesn't fit */
1815 			sacklen = 0;
1816 		} else {
1817 			/*
1818 			 * OK, we have room to send SACK back.  But we
1819 			 * should send it back to the last fp where we
1820 			 * receive data from, unless sctp_lastdata equals
1821 			 * oldfp, then we should probably not send it
1822 			 * back to that fp.  Also we should check that
1823 			 * the fp is alive.
1824 			 */
1825 			if (sctp->sctp_lastdata != oldfp &&
1826 			    sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) {
1827 				fp = sctp->sctp_lastdata;
1828 			}
1829 		}
1830 	}
1831 
1832 	/*
1833 	 * Cancel RTT measurement if the retransmitted TSN is before the
1834 	 * TSN used for timimg.
1835 	 */
1836 	if (sctp->sctp_out_time != 0 &&
1837 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1838 		sctp->sctp_out_time = 0;
1839 	}
1840 	/* Clear the counter as the RTT calculation may be off. */
1841 	fp->rtt_updates = 0;
1842 	oldfp->rtt_updates = 0;
1843 
1844 	/*
1845 	 * After a timeout, we should change the current faddr so that
1846 	 * new chunks will be sent to the alternate address.
1847 	 */
1848 	sctp_set_faddr_current(sctp, fp);
1849 
1850 	nmp = dupmsg(mp);
1851 	if (nmp == NULL)
1852 		goto restart_timer;
1853 	if (extra > 0) {
1854 		fill = sctp_get_padding(extra, sctps);
1855 		if (fill != NULL) {
1856 			linkb(nmp, fill);
1857 			seglen += extra;
1858 		} else {
1859 			freemsg(nmp);
1860 			goto restart_timer;
1861 		}
1862 	}
1863 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
1864 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1865 	if (head == NULL) {
1866 		freemsg(nmp);
1867 		SCTP_KSTAT(sctps, sctp_rexmit_failed);
1868 		goto restart_timer;
1869 	}
1870 	seglen += sacklen;
1871 
1872 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1873 
1874 	mp = mp->b_next;
1875 
1876 	/* Check how much more we can send. */
1877 	tot_wnd = MIN(fp->cwnd, sctp->sctp_frwnd);
1878 	/*
1879 	 * If the number of outstanding bytes is more than what we are
1880 	 * allowed to send, stop.
1881 	 */
1882 	if (tot_wnd <= chunklen || tot_wnd < fp->suna + chunklen)
1883 		goto done_bundle;
1884 	else
1885 		tot_wnd -= chunklen;
1886 
1887 try_bundle:
1888 	while (seglen < fp->sfa_pmss) {
1889 		int32_t new_len;
1890 
1891 		/* Go through the list to find more chunks to be bundled. */
1892 		while (mp != NULL) {
1893 			/* Check if the chunk can be bundled. */
1894 			if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
1895 				break;
1896 			mp = mp->b_next;
1897 		}
1898 		/* Go to the next message. */
1899 		if (mp == NULL) {
1900 			for (meta = meta->b_next; meta != NULL;
1901 			    meta = meta->b_next) {
1902 				mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1903 
1904 				if (SCTP_IS_MSG_ABANDONED(meta) ||
1905 				    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
1906 				    sctp)) {
1907 					continue;
1908 				}
1909 
1910 				mp = meta->b_cont;
1911 				goto try_bundle;
1912 			}
1913 			/* No more chunk to be bundled. */
1914 			break;
1915 		}
1916 
1917 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1918 		new_len = ntohs(sdc->sdh_len);
1919 		chunklen = new_len - sizeof (*sdc);
1920 		if (chunklen > tot_wnd)
1921 			break;
1922 
1923 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
1924 			extra = SCTP_ALIGN - extra;
1925 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
1926 			break;
1927 		if ((nmp = dupmsg(mp)) == NULL)
1928 			break;
1929 
1930 		if (extra > 0) {
1931 			fill = sctp_get_padding(extra, sctps);
1932 			if (fill != NULL) {
1933 				linkb(nmp, fill);
1934 			} else {
1935 				freemsg(nmp);
1936 				break;
1937 			}
1938 		}
1939 		linkb(head, nmp);
1940 
1941 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
1942 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1943 
1944 		seglen = new_len;
1945 		tot_wnd -= chunklen;
1946 		mp = mp->b_next;
1947 	}
1948 done_bundle:
1949 	if ((seglen > fp->sfa_pmss) && fp->isv4) {
1950 		ipha_t *iph = (ipha_t *)head->b_rptr;
1951 
1952 		/*
1953 		 * Path MTU is different from path we thought it would
1954 		 * be when we created chunks, or IP headers have grown.
1955 		 * Need to clear the DF bit.
1956 		 */
1957 		iph->ipha_fragment_offset_and_flags = 0;
1958 	}
1959 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
1960 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
1961 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
1962 	    (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
1963 
1964 	sctp->sctp_rexmitting = B_TRUE;
1965 	sctp->sctp_rxt_nxttsn = first_ua_tsn;
1966 	sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1967 	sctp_set_iplen(sctp, head);
1968 	sctp_add_sendq(sctp, head);
1969 
1970 	/*
1971 	 * Restart the oldfp timer with exponential backoff and
1972 	 * the new fp timer for the retransmitted chunks.
1973 	 */
1974 restart_timer:
1975 	oldfp->strikes++;
1976 	sctp->sctp_strikes++;
1977 	SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
1978 	if (oldfp->suna != 0)
1979 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
1980 	sctp->sctp_active = lbolt64;
1981 
1982 	/*
1983 	 * Should we restart the timer of the new fp?  If there is
1984 	 * outstanding data to the new fp, the timer should be
1985 	 * running already.  So restarting it means that the timer
1986 	 * will fire later for those outstanding data.  But if
1987 	 * we don't restart it, the timer will fire too early for the
1988 	 * just retransmitted chunks to the new fp.  The reason is that we
1989 	 * don't keep a timestamp on when a chunk is retransmitted.
1990 	 * So when the timer fires, it will just search for the
1991 	 * chunk with the earliest TSN sent to new fp.  This probably
1992 	 * is the chunk we just retransmitted.  So for now, let's
1993 	 * be conservative and restart the timer of the new fp.
1994 	 */
1995 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1996 }
1997 
1998 /*
1999  * The SCTP write put procedure called from IP.
2000  */
2001 void
2002 sctp_wput(queue_t *q, mblk_t *mp)
2003 {
2004 	uchar_t		*rptr;
2005 	t_scalar_t	type;
2006 
2007 	switch (mp->b_datap->db_type) {
2008 	case M_IOCTL:
2009 		sctp_wput_ioctl(q, mp);
2010 		break;
2011 	case M_DATA:
2012 		/* Should be handled in sctp_output() */
2013 		ASSERT(0);
2014 		freemsg(mp);
2015 		break;
2016 	case M_PROTO:
2017 	case M_PCPROTO:
2018 		rptr = mp->b_rptr;
2019 		if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
2020 			type = ((union T_primitives *)rptr)->type;
2021 			/*
2022 			 * There is no "standard" way on how to respond
2023 			 * to T_CAPABILITY_REQ if a module does not
2024 			 * understand it.  And the current TI mod
2025 			 * has problems handling an error ack.  So we
2026 			 * catch the request here and reply with a response
2027 			 * which the TI mod knows how to respond to.
2028 			 */
2029 			switch (type) {
2030 			case T_CAPABILITY_REQ:
2031 				(void) putnextctl1(RD(q), M_ERROR, EPROTO);
2032 				break;
2033 			default:
2034 				if ((mp = mi_tpi_err_ack_alloc(mp,
2035 				    TNOTSUPPORT, 0)) != NULL) {
2036 					qreply(q, mp);
2037 					return;
2038 				}
2039 			}
2040 		}
2041 		/* FALLTHRU */
2042 	default:
2043 		freemsg(mp);
2044 		return;
2045 	}
2046 }
2047 
2048 /*
2049  * This function is called by sctp_ss_rexmit() to create a packet
2050  * to be retransmitted to the given fp.  The given meta and mp
2051  * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2052  * first chunk to be retransmitted. This is also called when we want
2053  * to retransmit a zero window probe from sctp_rexmit() or when we
2054  * want to retransmit the zero window probe after the window has
2055  * opened from sctp_got_sack().
2056  */
2057 mblk_t *
2058 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
2059     uint_t *packet_len)
2060 {
2061 	uint32_t	seglen = 0;
2062 	uint16_t	chunklen;
2063 	int		extra;
2064 	mblk_t		*nmp;
2065 	mblk_t		*head;
2066 	mblk_t		*fill;
2067 	sctp_data_hdr_t	*sdc;
2068 	sctp_msg_hdr_t	*mhdr;
2069 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2070 
2071 	sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2072 	seglen = ntohs(sdc->sdh_len);
2073 	chunklen = seglen - sizeof (*sdc);
2074 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
2075 		extra = SCTP_ALIGN - extra;
2076 
2077 	nmp = dupmsg(*mp);
2078 	if (nmp == NULL)
2079 		return (NULL);
2080 	if (extra > 0) {
2081 		fill = sctp_get_padding(extra, sctps);
2082 		if (fill != NULL) {
2083 			linkb(nmp, fill);
2084 			seglen += extra;
2085 		} else {
2086 			freemsg(nmp);
2087 			return (NULL);
2088 		}
2089 	}
2090 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
2091 	head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
2092 	if (head == NULL) {
2093 		freemsg(nmp);
2094 		return (NULL);
2095 	}
2096 	SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2097 	/*
2098 	 * Don't update the TSN if we are doing a Zero Win Probe.
2099 	 */
2100 	if (!sctp->sctp_zero_win_probe)
2101 		sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2102 	*mp = (*mp)->b_next;
2103 
2104 try_bundle:
2105 	while (seglen < fp->sfa_pmss) {
2106 		int32_t new_len;
2107 
2108 		/*
2109 		 * Go through the list to find more chunks to be bundled.
2110 		 * We should only retransmit sent by unack'ed chunks.  Since
2111 		 * they were sent before, the peer's receive window should
2112 		 * be able to receive them.
2113 		 */
2114 		while (*mp != NULL) {
2115 			/* Check if the chunk can be bundled. */
2116 			if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
2117 				break;
2118 			*mp = (*mp)->b_next;
2119 		}
2120 		/* Go to the next message. */
2121 		if (*mp == NULL) {
2122 			for (*meta = (*meta)->b_next; *meta != NULL;
2123 			    *meta = (*meta)->b_next) {
2124 				mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
2125 
2126 				if (SCTP_IS_MSG_ABANDONED(*meta) ||
2127 				    SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
2128 				    sctp)) {
2129 					continue;
2130 				}
2131 
2132 				*mp = (*meta)->b_cont;
2133 				goto try_bundle;
2134 			}
2135 			/* No more chunk to be bundled. */
2136 			break;
2137 		}
2138 
2139 		sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2140 		/* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2141 		if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
2142 			break;
2143 		new_len = ntohs(sdc->sdh_len);
2144 		chunklen = new_len - sizeof (*sdc);
2145 
2146 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
2147 			extra = SCTP_ALIGN - extra;
2148 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
2149 			break;
2150 		if ((nmp = dupmsg(*mp)) == NULL)
2151 			break;
2152 
2153 		if (extra > 0) {
2154 			fill = sctp_get_padding(extra, sctps);
2155 			if (fill != NULL) {
2156 				linkb(nmp, fill);
2157 			} else {
2158 				freemsg(nmp);
2159 				break;
2160 			}
2161 		}
2162 		linkb(head, nmp);
2163 
2164 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
2165 		SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2166 		/*
2167 		 * Don't update the TSN if we are doing a Zero Win Probe.
2168 		 */
2169 		if (!sctp->sctp_zero_win_probe)
2170 			sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2171 
2172 		seglen = new_len;
2173 		*mp = (*mp)->b_next;
2174 	}
2175 	*packet_len = seglen;
2176 	return (head);
2177 }
2178 
2179 /*
2180  * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2181  * advances the cum_tsn but the cum_tsn is still less than what we have sent
2182  * (sctp_rxt_maxtsn) at the time of the timeout.  This SACK is a "partial"
2183  * SACK.  We retransmit unacked chunks without having to wait for another
2184  * timeout.  The rationale is that the SACK should not be "partial" if all the
2185  * lost chunks have been retransmitted.  Since the SACK is "partial,"
2186  * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2187  * be missing.  It is better for us to retransmit them now instead
2188  * of waiting for a timeout.
2189  */
2190 void
2191 sctp_ss_rexmit(sctp_t *sctp)
2192 {
2193 	mblk_t		*meta;
2194 	mblk_t		*mp;
2195 	mblk_t		*pkt;
2196 	sctp_faddr_t	*fp;
2197 	uint_t		pkt_len;
2198 	uint32_t	tot_wnd;
2199 	sctp_data_hdr_t	*sdc;
2200 	int		burst;
2201 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2202 
2203 	ASSERT(!sctp->sctp_zero_win_probe);
2204 
2205 	/*
2206 	 * If the last cum ack is smaller than what we have just
2207 	 * retransmitted, simply return.
2208 	 */
2209 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
2210 		sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
2211 	else
2212 		return;
2213 	ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
2214 
2215 	/*
2216 	 * After a timer fires, sctp_current should be set to the new
2217 	 * fp where the retransmitted chunks are sent.
2218 	 */
2219 	fp = sctp->sctp_current;
2220 
2221 	/*
2222 	 * Since we are retransmitting, we can only use cwnd to determine
2223 	 * how much we can send as we were allowed to send those chunks
2224 	 * previously.
2225 	 */
2226 	tot_wnd = fp->cwnd;
2227 	/* So we have sent more than we can, just return. */
2228 	if (tot_wnd < fp->suna || tot_wnd - fp->suna < fp->sfa_pmss)
2229 		return;
2230 	else
2231 		tot_wnd -= fp->suna;
2232 
2233 	/* Find the first unack'ed chunk */
2234 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
2235 		sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
2236 
2237 		if (SCTP_IS_MSG_ABANDONED(meta) ||
2238 		    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
2239 			continue;
2240 		}
2241 
2242 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2243 			/* Again, this may not be possible */
2244 			if (!SCTP_CHUNK_ISSENT(mp))
2245 				return;
2246 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2247 			if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
2248 				goto found_msg;
2249 		}
2250 	}
2251 
2252 	/* Everything is abandoned... */
2253 	return;
2254 
2255 found_msg:
2256 	if (!fp->timer_running)
2257 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2258 	pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
2259 	if (pkt == NULL) {
2260 		SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2261 		return;
2262 	}
2263 	if ((pkt_len > fp->sfa_pmss) && fp->isv4) {
2264 		ipha_t	*iph = (ipha_t *)pkt->b_rptr;
2265 
2266 		/*
2267 		 * Path MTU is different from path we thought it would
2268 		 * be when we created chunks, or IP headers have grown.
2269 		 *  Need to clear the DF bit.
2270 		 */
2271 		iph->ipha_fragment_offset_and_flags = 0;
2272 	}
2273 	sctp_set_iplen(sctp, pkt);
2274 	sctp_add_sendq(sctp, pkt);
2275 
2276 	/* Check and see if there is more chunk to be retransmitted. */
2277 	if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss ||
2278 	    meta == NULL)
2279 		return;
2280 	if (mp == NULL)
2281 		meta = meta->b_next;
2282 	if (meta == NULL)
2283 		return;
2284 
2285 	/* Retransmit another packet if the window allows. */
2286 	for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
2287 	    meta != NULL && burst > 0; meta = meta->b_next, burst--) {
2288 		if (mp == NULL)
2289 			mp = meta->b_cont;
2290 		for (; mp != NULL; mp = mp->b_next) {
2291 			/* Again, this may not be possible */
2292 			if (!SCTP_CHUNK_ISSENT(mp))
2293 				return;
2294 			if (!SCTP_CHUNK_ISACKED(mp))
2295 				goto found_msg;
2296 		}
2297 	}
2298 }
2299