xref: /illumos-gate/usr/src/uts/common/inet/sctp/sctp_output.c (revision 8af2c5b9bdbf69a55f079d7ad9483d38fae9f023)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/cmn_err.h>
33 #define	_SUN_TPI_VERSION 2
34 #include <sys/tihdr.h>
35 #include <sys/socket.h>
36 #include <sys/stropts.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/socketvar.h>
40 /* swilly code in sys/socketvar.h turns off DEBUG */
41 #ifdef __lint
42 #define	DEBUG
43 #endif
44 
45 #include <inet/common.h>
46 #include <inet/mi.h>
47 #include <inet/ip.h>
48 #include <inet/ip6.h>
49 #include <inet/sctp_ip.h>
50 #include <inet/ipclassifier.h>
51 
52 /*
53  * PR-SCTP comments.
54  *
55  * A message can expire before it gets to the transmit list (i.e. it is still
56  * in the unsent list - unchunked), after it gets to the transmit list, but
57  * before transmission has actually started, or after transmission has begun.
58  * Accordingly, we check for the status of a message in sctp_chunkify() when
59  * the message is being transferred from the unsent list to the transmit list;
60  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
61  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
62  * When we nuke a message in sctp_chunkify(), all we need to do is take it
63  * out of the unsent list and update sctp_unsent; when a message is deemed
64  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
65  * list, update sctp_unsent IFF transmission for the message has not yet begun
66  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
67  * message has started, then we cannot just take it out of the list, we need
68  * to send Forward TSN chunk to the peer so that the peer can clear its
69  * fragment list for this message. However, we cannot just send the Forward
70  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
71  * messages preceeding this abandoned message. So, we send a Forward TSN
72  * IFF all messages prior to this abandoned message has been SACKd, if not
73  * we defer sending the Forward TSN to sctp_cumack(), which will check for
74  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
75  * sctp_rexmit() when we check for retransmissions, we need to determine if
76  * the advanced peer ack point can be moved ahead, and if so, send a Forward
77  * TSN to the peer instead of retransmitting the chunk. Note that when
78  * we send a Forward TSN for a message, there may be yet unsent chunks for
79  * this message; we need to mark all such chunks as abandoned, so that
80  * sctp_cumack() can take the message out of the transmit list, additionally
81  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
82  * decremented when a message/chunk is deemed abandoned), sockfs needs to
83  * be notified so that it can adjust its idea of the queued message.
84  */
85 
86 #include "sctp_impl.h"
87 
88 static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
89 
90 #ifdef	DEBUG
91 static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
92 #endif
93 
94 /*
95  * Called to allocate a header mblk when sending data to SCTP.
96  * Data will follow in b_cont of this mblk.
97  */
98 mblk_t *
99 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
100     int flags)
101 {
102 	mblk_t *mp;
103 	struct T_unitdata_req *tudr;
104 	size_t size;
105 	int error;
106 
107 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
108 	size = MAX(size, sizeof (sctp_msg_hdr_t));
109 	if (flags & SCTP_CAN_BLOCK) {
110 		mp = allocb_wait(size, BPRI_MED, 0, &error);
111 	} else {
112 		mp = allocb(size, BPRI_MED);
113 	}
114 	if (mp) {
115 		tudr = (struct T_unitdata_req *)mp->b_rptr;
116 		tudr->PRIM_type = T_UNITDATA_REQ;
117 		tudr->DEST_length = nlen;
118 		tudr->DEST_offset = sizeof (*tudr);
119 		tudr->OPT_length = clen;
120 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
121 		    _TPI_ALIGN_TOPT(nlen));
122 		if (nlen > 0)
123 			bcopy(name, tudr + 1, nlen);
124 		if (clen > 0)
125 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
126 		mp->b_wptr += (tudr ->OPT_offset + clen);
127 		mp->b_datap->db_type = M_PROTO;
128 	}
129 	return (mp);
130 }
131 
132 /*ARGSUSED2*/
133 int
134 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
135 {
136 	sctp_faddr_t	*fp = NULL;
137 	struct T_unitdata_req	*tudr;
138 	int		error = 0;
139 	mblk_t		*mproto = mp;
140 	in6_addr_t	*addr;
141 	in6_addr_t	tmpaddr;
142 	uint16_t	sid = sctp->sctp_def_stream;
143 	uint32_t	ppid = sctp->sctp_def_ppid;
144 	uint32_t	context = sctp->sctp_def_context;
145 	uint16_t	msg_flags = sctp->sctp_def_flags;
146 	sctp_msg_hdr_t	*sctp_msg_hdr;
147 	uint32_t	msg_len = 0;
148 	uint32_t	timetolive = sctp->sctp_def_timetolive;
149 
150 	ASSERT(DB_TYPE(mproto) == M_PROTO);
151 
152 	mp = mp->b_cont;
153 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
154 
155 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
156 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
157 
158 	/* Get destination address, if specified */
159 	if (tudr->DEST_length > 0) {
160 		sin_t *sin;
161 		sin6_t *sin6;
162 
163 		sin = (struct sockaddr_in *)
164 		    (mproto->b_rptr + tudr->DEST_offset);
165 		switch (sin->sin_family) {
166 		case AF_INET:
167 			if (tudr->DEST_length < sizeof (*sin)) {
168 				return (EINVAL);
169 			}
170 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
171 			addr = &tmpaddr;
172 			break;
173 		case AF_INET6:
174 			if (tudr->DEST_length < sizeof (*sin6)) {
175 				return (EINVAL);
176 			}
177 			sin6 = (struct sockaddr_in6 *)
178 			    (mproto->b_rptr + tudr->DEST_offset);
179 			addr = &sin6->sin6_addr;
180 			break;
181 		default:
182 			return (EAFNOSUPPORT);
183 		}
184 		fp = sctp_lookup_faddr(sctp, addr);
185 		if (fp == NULL) {
186 			return (EINVAL);
187 		}
188 	}
189 	/* Ancillary Data? */
190 	if (tudr->OPT_length > 0) {
191 		struct cmsghdr		*cmsg;
192 		char			*cend;
193 		struct sctp_sndrcvinfo	*sndrcv;
194 
195 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
196 		cend = ((char *)cmsg + tudr->OPT_length);
197 		ASSERT(cend <= (char *)mproto->b_wptr);
198 
199 		for (;;) {
200 			if ((char *)(cmsg + 1) > cend ||
201 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
202 				break;
203 			}
204 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
205 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
206 				if (cmsg->cmsg_len <
207 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
208 					return (EINVAL);
209 				}
210 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
211 				sid = sndrcv->sinfo_stream;
212 				msg_flags = sndrcv->sinfo_flags;
213 				ppid = sndrcv->sinfo_ppid;
214 				context = sndrcv->sinfo_context;
215 				timetolive = sndrcv->sinfo_timetolive;
216 				break;
217 			}
218 			if (cmsg->cmsg_len > 0)
219 				cmsg = CMSG_NEXT(cmsg);
220 			else
221 				break;
222 		}
223 	}
224 	if (msg_flags & MSG_ABORT) {
225 		if (mp && mp->b_cont) {
226 			mblk_t *pump = msgpullup(mp, -1);
227 			if (!pump) {
228 				return (ENOMEM);
229 			}
230 			freemsg(mp);
231 			mp = pump;
232 			mproto->b_cont = mp;
233 		}
234 		RUN_SCTP(sctp);
235 		sctp_user_abort(sctp, mp, B_TRUE);
236 		sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL);
237 		sctp_clean_death(sctp, ECONNRESET);
238 		freemsg(mproto);
239 		goto process_sendq;
240 	}
241 	if (mp == NULL)
242 		goto done;
243 
244 	RUN_SCTP(sctp);
245 
246 	/* Reject any new data requests if we are shutting down */
247 	if (sctp->sctp_state > SCTPS_ESTABLISHED ||
248 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
249 		error = EPIPE;
250 		goto unlock_done;
251 	}
252 
253 	/* Re-use the mproto to store relevant info. */
254 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
255 
256 	mproto->b_rptr = mproto->b_datap->db_base;
257 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
258 
259 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
260 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
261 	sctp_msg_hdr->smh_context = context;
262 	sctp_msg_hdr->smh_sid = sid;
263 	sctp_msg_hdr->smh_ppid = ppid;
264 	sctp_msg_hdr->smh_flags = msg_flags;
265 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
266 	sctp_msg_hdr->smh_tob = lbolt64;
267 	for (; mp != NULL; mp = mp->b_cont)
268 		msg_len += MBLKL(mp);
269 	sctp_msg_hdr->smh_msglen = msg_len;
270 
271 	/* User requested specific destination */
272 	SCTP_SET_CHUNK_DEST(mproto, fp);
273 
274 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
275 	    sid >= sctp->sctp_num_ostr) {
276 		/* Send sendfail event */
277 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
278 		    B_FALSE);
279 		error = EINVAL;
280 		goto unlock_done;
281 	}
282 
283 	/* no data */
284 	if (msg_len == 0) {
285 		sctp_sendfail_event(sctp, dupmsg(mproto),
286 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
287 		error = EINVAL;
288 		goto unlock_done;
289 	}
290 
291 	/* Add it to the unsent list */
292 	if (sctp->sctp_xmit_unsent == NULL) {
293 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
294 	} else {
295 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
296 		sctp->sctp_xmit_unsent_tail = mproto;
297 	}
298 	sctp->sctp_unsent += msg_len;
299 	BUMP_LOCAL(sctp->sctp_msgcount);
300 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
301 		sctp_output(sctp, UINT_MAX);
302 process_sendq:
303 	WAKE_SCTP(sctp);
304 	sctp_process_sendq(sctp);
305 	return (0);
306 unlock_done:
307 	WAKE_SCTP(sctp);
308 done:
309 	return (error);
310 }
311 
312 void
313 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send)
314 {
315 	mblk_t			*mp;
316 	mblk_t			*chunk_mp;
317 	mblk_t			*chunk_head;
318 	mblk_t			*chunk_hdr;
319 	mblk_t			*chunk_tail = NULL;
320 	int			count;
321 	int			chunksize;
322 	sctp_data_hdr_t		*sdc;
323 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
324 	sctp_faddr_t		*fp;
325 	sctp_faddr_t		*fp1;
326 	size_t			xtralen;
327 	sctp_msg_hdr_t		*msg_hdr;
328 	sctp_stack_t	*sctps = sctp->sctp_sctps;
329 
330 	fp = SCTP_CHUNK_DEST(mdblk);
331 	if (fp == NULL)
332 		fp = sctp->sctp_current;
333 	if (fp->isv4)
334 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
335 		    sizeof (*sdc);
336 	else
337 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
338 		    sizeof (*sdc);
339 	count = chunksize = first_len - sizeof (*sdc);
340 nextmsg:
341 	chunk_mp = mdblk->b_cont;
342 
343 	/*
344 	 * If this partially chunked, we ignore the first_len for now
345 	 * and use the one already present. For the unchunked bits, we
346 	 * use the length of the last chunk.
347 	 */
348 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
349 		int	chunk_len;
350 
351 		ASSERT(chunk_mp->b_next != NULL);
352 		mdblk->b_cont = chunk_mp->b_next;
353 		chunk_mp->b_next = NULL;
354 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
355 		mp = mdblk->b_cont;
356 		while (mp->b_next != NULL)
357 			mp = mp->b_next;
358 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
359 		if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
360 			count = chunksize = fp->sfa_pmss - chunk_len;
361 		else
362 			count = chunksize = fp->sfa_pmss;
363 		count = chunksize = count - sizeof (*sdc);
364 	} else {
365 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
366 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
367 			sctp->sctp_xmit_unsent = mdblk->b_next;
368 			if (sctp->sctp_xmit_unsent == NULL)
369 				sctp->sctp_xmit_unsent_tail = NULL;
370 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
371 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
372 			mdblk->b_next = NULL;
373 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
374 			/*
375 			 * Update ULP the amount of queued data, which is
376 			 * sent-unack'ed + unsent.
377 			 */
378 			if (!SCTP_IS_DETACHED(sctp)) {
379 				sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
380 				    sctp->sctp_unacked + sctp->sctp_unsent);
381 			}
382 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
383 			goto try_next;
384 		}
385 		mdblk->b_cont = NULL;
386 	}
387 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
388 nextchunk:
389 	chunk_head = chunk_mp;
390 	chunk_tail = NULL;
391 
392 	/* Skip as many mblk's as we need */
393 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
394 		count -= MBLKL(chunk_mp);
395 		chunk_tail = chunk_mp;
396 		chunk_mp = chunk_mp->b_cont;
397 	}
398 	/* Split the chain, if needed */
399 	if (chunk_mp != NULL) {
400 		if (count > 0) {
401 			mblk_t	*split_mp = dupb(chunk_mp);
402 
403 			if (split_mp == NULL) {
404 				if (mdblk->b_cont == NULL) {
405 					mdblk->b_cont = chunk_head;
406 				} else  {
407 					SCTP_MSG_SET_CHUNKED(mdblk);
408 					ASSERT(chunk_head->b_next == NULL);
409 					chunk_head->b_next = mdblk->b_cont;
410 					mdblk->b_cont = chunk_head;
411 				}
412 				return;
413 			}
414 			if (chunk_tail != NULL) {
415 				chunk_tail->b_cont = split_mp;
416 				chunk_tail = chunk_tail->b_cont;
417 			} else {
418 				chunk_head = chunk_tail = split_mp;
419 			}
420 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
421 			chunk_mp->b_rptr = chunk_tail->b_wptr;
422 			count = 0;
423 		} else if (chunk_tail == NULL) {
424 			goto next;
425 		} else {
426 			chunk_tail->b_cont = NULL;
427 		}
428 	}
429 	/* Alloc chunk hdr, if needed */
430 	if (DB_REF(chunk_head) > 1 ||
431 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
432 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
433 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
434 			if (mdblk->b_cont == NULL) {
435 				if (chunk_mp != NULL)
436 					linkb(chunk_head, chunk_mp);
437 				mdblk->b_cont = chunk_head;
438 			} else {
439 				SCTP_MSG_SET_CHUNKED(mdblk);
440 				if (chunk_mp != NULL)
441 					linkb(chunk_head, chunk_mp);
442 				ASSERT(chunk_head->b_next == NULL);
443 				chunk_head->b_next = mdblk->b_cont;
444 				mdblk->b_cont = chunk_head;
445 			}
446 			return;
447 		}
448 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
449 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
450 		chunk_hdr->b_cont = chunk_head;
451 	} else {
452 		chunk_hdr = chunk_head;
453 		chunk_hdr->b_rptr -= sizeof (*sdc);
454 	}
455 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
456 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
457 	sdc->sdh_id = CHUNK_DATA;
458 	sdc->sdh_flags = 0;
459 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
460 	ASSERT(sdc->sdh_len);
461 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
462 	/*
463 	 * We defer assigning the SSN just before sending the chunk, else
464 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
465 	 * to send a Forward TSN to let the peer know. Some more comments
466 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
467 	 */
468 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
469 
470 	if (mdblk->b_cont == NULL) {
471 		mdblk->b_cont = chunk_hdr;
472 		SCTP_DATA_SET_BBIT(sdc);
473 	} else {
474 		mp = mdblk->b_cont;
475 		while (mp->b_next != NULL)
476 			mp = mp->b_next;
477 		mp->b_next = chunk_hdr;
478 	}
479 
480 	bytes_to_send -= (chunksize - count);
481 	if (chunk_mp != NULL) {
482 next:
483 		count = chunksize = fp->sfa_pmss - sizeof (*sdc);
484 		goto nextchunk;
485 	}
486 	SCTP_DATA_SET_EBIT(sdc);
487 	sctp->sctp_xmit_unsent = mdblk->b_next;
488 	if (mdblk->b_next == NULL) {
489 		sctp->sctp_xmit_unsent_tail = NULL;
490 	}
491 	mdblk->b_next = NULL;
492 
493 	if (sctp->sctp_xmit_tail == NULL) {
494 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
495 	} else {
496 		mp = sctp->sctp_xmit_tail;
497 		while (mp->b_next != NULL)
498 			mp = mp->b_next;
499 		mp->b_next = mdblk;
500 		mdblk->b_prev = mp;
501 	}
502 try_next:
503 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
504 		mdblk = sctp->sctp_xmit_unsent;
505 		fp1 = SCTP_CHUNK_DEST(mdblk);
506 		if (fp1 == NULL)
507 			fp1 = sctp->sctp_current;
508 		if (fp == fp1) {
509 			size_t len = MBLKL(mdblk->b_cont);
510 			if ((count > 0) &&
511 			    ((len > fp->sfa_pmss - sizeof (*sdc)) ||
512 			    (len <= count))) {
513 				count -= sizeof (*sdc);
514 				count = chunksize = count - (count & 0x3);
515 			} else {
516 				count = chunksize = fp->sfa_pmss -
517 				    sizeof (*sdc);
518 			}
519 		} else {
520 			if (fp1->isv4)
521 				xtralen = sctp->sctp_hdr_len;
522 			else
523 				xtralen = sctp->sctp_hdr6_len;
524 			xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
525 			count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
526 			fp = fp1;
527 		}
528 		goto nextmsg;
529 	}
530 }
531 
532 void
533 sctp_free_msg(mblk_t *ump)
534 {
535 	mblk_t *mp, *nmp;
536 
537 	for (mp = ump->b_cont; mp; mp = nmp) {
538 		nmp = mp->b_next;
539 		mp->b_next = mp->b_prev = NULL;
540 		freemsg(mp);
541 	}
542 	ASSERT(!ump->b_prev);
543 	ump->b_next = NULL;
544 	freeb(ump);
545 }
546 
547 mblk_t *
548 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
549     int *error)
550 {
551 	int hdrlen;
552 	char *hdr;
553 	int isv4 = fp->isv4;
554 	sctp_stack_t	*sctps = sctp->sctp_sctps;
555 
556 	if (error != NULL)
557 		*error = 0;
558 
559 	if (isv4) {
560 		hdrlen = sctp->sctp_hdr_len;
561 		hdr = sctp->sctp_iphc;
562 	} else {
563 		hdrlen = sctp->sctp_hdr6_len;
564 		hdr = sctp->sctp_iphc6;
565 	}
566 	/*
567 	 * A null fp->ire could mean that the address is 'down'. Similarly,
568 	 * it is possible that the address went down, we tried to send an
569 	 * heartbeat and ended up setting fp->saddr as unspec because we
570 	 * didn't have any usable source address.  In either case
571 	 * sctp_get_ire() will try find an IRE, if available, and set
572 	 * the source address, if needed.  If we still don't have any
573 	 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and
574 	 * we return EHOSTUNREACH.
575 	 */
576 	if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
577 		sctp_get_ire(sctp, fp);
578 		if (fp->state == SCTP_FADDRS_UNREACH) {
579 			if (error != NULL)
580 				*error = EHOSTUNREACH;
581 			return (NULL);
582 		}
583 	}
584 	/* Copy in IP header. */
585 	if ((mp->b_rptr - mp->b_datap->db_base) <
586 	    (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 ||
587 	    !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) {
588 		mblk_t *nmp;
589 
590 		/*
591 		 * This can happen if IP headers are adjusted after
592 		 * data was moved into chunks, or during retransmission,
593 		 * or things like snoop is running.
594 		 */
595 		nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen,
596 		    CONN_CRED(sctp->sctp_connp));
597 		if (nmp == NULL) {
598 			if (error !=  NULL)
599 				*error = ENOMEM;
600 			return (NULL);
601 		}
602 		nmp->b_rptr += sctps->sctps_wroff_xtra;
603 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
604 		nmp->b_cont = mp;
605 		mp = nmp;
606 	} else {
607 		mp->b_rptr -= (hdrlen + sacklen);
608 		mblk_setcred(mp, CONN_CRED(sctp->sctp_connp));
609 	}
610 	bcopy(hdr, mp->b_rptr, hdrlen);
611 	if (sacklen) {
612 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
613 	}
614 	if (fp != sctp->sctp_current) {
615 		/* change addresses in header */
616 		if (isv4) {
617 			ipha_t *iph = (ipha_t *)mp->b_rptr;
618 
619 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
620 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
621 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
622 				    iph->ipha_src);
623 			} else if (sctp->sctp_bound_to_all) {
624 				iph->ipha_src = INADDR_ANY;
625 			}
626 		} else {
627 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
628 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
629 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
630 			} else if (sctp->sctp_bound_to_all) {
631 				V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src);
632 			}
633 		}
634 	}
635 	/*
636 	 * IP will not free this IRE if it is condemned.  SCTP needs to
637 	 * free it.
638 	 */
639 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
640 		IRE_REFRELE_NOTR(fp->ire);
641 		fp->ire = NULL;
642 	}
643 
644 	/* Stash the conn and ire ptr info for IP */
645 	SCTP_STASH_IPINFO(mp, fp->ire);
646 
647 	return (mp);
648 }
649 
650 /*
651  * SCTP requires every chunk to be padded so that the total length
652  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
653  * the specified pad length.
654  */
655 static mblk_t *
656 sctp_get_padding(sctp_t *sctp, int pad)
657 {
658 	mblk_t *fill;
659 
660 	ASSERT(pad < SCTP_ALIGN);
661 	ASSERT(sctp->sctp_pad_mp != NULL);
662 	if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) {
663 		fill->b_wptr += pad;
664 		return (fill);
665 	}
666 
667 	/*
668 	 * The memory saving path of reusing the sctp_pad_mp
669 	 * fails may be because it has been dupb() too
670 	 * many times (DBLK_REFMAX).  Use the memory consuming
671 	 * path of allocating the pad mblk.
672 	 */
673 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
674 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
675 		*(int32_t *)fill->b_rptr = 0;
676 		fill->b_wptr += pad;
677 	}
678 	return (fill);
679 }
680 
681 static mblk_t *
682 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
683 {
684 	mblk_t		*meta;
685 	mblk_t		*start_mp = NULL;
686 	mblk_t		*end_mp = NULL;
687 	mblk_t		*mp, *nmp;
688 	mblk_t		*fill;
689 	sctp_data_hdr_t	*sdh;
690 	int		msglen;
691 	int		extra;
692 	sctp_msg_hdr_t	*msg_hdr;
693 	sctp_faddr_t	*old_fp = NULL;
694 	sctp_faddr_t	*chunk_fp;
695 	sctp_stack_t	*sctps = sctp->sctp_sctps;
696 
697 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
698 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
699 		if (SCTP_IS_MSG_ABANDONED(meta) ||
700 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
701 			continue;
702 		}
703 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
704 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
705 				/*
706 				 * Use the same peer address to do fast
707 				 * retransmission.  If the original peer
708 				 * address is dead, switch to the current
709 				 * one.  Record the old one so that we
710 				 * will pick the chunks sent to the old
711 				 * one for fast retransmission.
712 				 */
713 				chunk_fp = SCTP_CHUNK_DEST(mp);
714 				if (*fp == NULL) {
715 					*fp = chunk_fp;
716 					if ((*fp)->state != SCTP_FADDRS_ALIVE) {
717 						old_fp = *fp;
718 						*fp = sctp->sctp_current;
719 					}
720 				} else if (old_fp == NULL && *fp != chunk_fp) {
721 					continue;
722 				} else if (old_fp != NULL &&
723 				    old_fp != chunk_fp) {
724 					continue;
725 				}
726 
727 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
728 				msglen = ntohs(sdh->sdh_len);
729 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
730 					extra = SCTP_ALIGN - extra;
731 				}
732 
733 				/*
734 				 * We still return at least the first message
735 				 * even if that message cannot fit in as
736 				 * PMTU may have changed.
737 				 */
738 				if (*total + msglen + extra >
739 				    (*fp)->sfa_pmss && start_mp != NULL) {
740 					return (start_mp);
741 				}
742 				if ((nmp = dupmsg(mp)) == NULL)
743 					return (start_mp);
744 				if (extra > 0) {
745 					fill = sctp_get_padding(sctp, extra);
746 					if (fill != NULL) {
747 						linkb(nmp, fill);
748 					} else {
749 						return (start_mp);
750 					}
751 				}
752 				BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans);
753 				BUMP_LOCAL(sctp->sctp_rxtchunks);
754 				SCTP_CHUNK_CLEAR_REXMIT(mp);
755 				if (start_mp == NULL) {
756 					start_mp = nmp;
757 				} else {
758 					linkb(end_mp, nmp);
759 				}
760 				end_mp = nmp;
761 				*total += msglen + extra;
762 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
763 				    "tsn %x\n", sdh->sdh_tsn));
764 			}
765 		}
766 	}
767 	/* Clear the flag as there is no more message to be fast rexmitted. */
768 	sctp->sctp_chk_fast_rexmit = B_FALSE;
769 	return (start_mp);
770 }
771 
772 /* A debug function just to make sure that a mblk chain is not broken */
773 #ifdef	DEBUG
774 static boolean_t
775 sctp_verify_chain(mblk_t *head, mblk_t *tail)
776 {
777 	mblk_t	*mp = head;
778 
779 	if (head == NULL || tail == NULL)
780 		return (B_TRUE);
781 	while (mp != NULL) {
782 		if (mp == tail)
783 			return (B_TRUE);
784 		mp = mp->b_next;
785 	}
786 	return (B_FALSE);
787 }
788 #endif
789 
790 /*
791  * Gets the next unsent chunk to transmit. Messages that are abandoned are
792  * skipped. A message can be abandoned if it has a non-zero timetolive and
793  * transmission has not yet started or if it is a partially reliable
794  * message and its time is up (assuming we are PR-SCTP aware).
795  * 'cansend' is used to determine if need to try and chunkify messages from
796  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
797  * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend'
798  * will be set to 0.
799  */
800 mblk_t *
801 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
802     int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp)
803 {
804 	mblk_t		*mp1;
805 	sctp_msg_hdr_t	*msg_hdr;
806 	mblk_t		*tmp_meta;
807 	sctp_faddr_t	*fp1;
808 
809 	ASSERT(error != NULL && mp != NULL);
810 	*error = 0;
811 
812 	ASSERT(sctp->sctp_current != NULL);
813 
814 chunkified:
815 	while (meta != NULL) {
816 		tmp_meta = meta->b_next;
817 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
818 		mp1 = meta->b_cont;
819 		if (SCTP_IS_MSG_ABANDONED(meta))
820 			goto next_msg;
821 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
822 			while (mp1 != NULL) {
823 				if (SCTP_CHUNK_CANSEND(mp1)) {
824 					*mp = mp1;
825 #ifdef	DEBUG
826 					ASSERT(sctp_verify_chain(
827 					    sctp->sctp_xmit_head, meta));
828 #endif
829 					return (meta);
830 				}
831 				mp1 = mp1->b_next;
832 			}
833 			goto next_msg;
834 		}
835 		/*
836 		 * If we come here and the first chunk is sent, then we
837 		 * we are PR-SCTP aware, in which case if the cumulative
838 		 * TSN has moved upto or beyond the first chunk (which
839 		 * means all the previous messages have been cumulative
840 		 * SACK'd), then we send a Forward TSN with the last
841 		 * chunk that was sent in this message. If we can't send
842 		 * a Forward TSN because previous non-abandoned messages
843 		 * have not been acked then we will defer the Forward TSN
844 		 * to sctp_rexmit() or sctp_cumack().
845 		 */
846 		if (SCTP_CHUNK_ISSENT(mp1)) {
847 			*error = sctp_check_abandoned_msg(sctp, meta);
848 			if (*error != 0) {
849 #ifdef	DEBUG
850 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
851 				    sctp->sctp_xmit_tail));
852 #endif
853 				return (NULL);
854 			}
855 			goto next_msg;
856 		}
857 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
858 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
859 		if (meta->b_prev == NULL) {
860 			ASSERT(sctp->sctp_xmit_head == meta);
861 			sctp->sctp_xmit_head = tmp_meta;
862 			if (sctp->sctp_xmit_tail == meta)
863 				sctp->sctp_xmit_tail = tmp_meta;
864 			meta->b_next = NULL;
865 			if (tmp_meta != NULL)
866 				tmp_meta->b_prev = NULL;
867 		} else if (meta->b_next == NULL) {
868 			if (sctp->sctp_xmit_tail == meta)
869 				sctp->sctp_xmit_tail = meta->b_prev;
870 			meta->b_prev->b_next = NULL;
871 			meta->b_prev = NULL;
872 		} else {
873 			meta->b_prev->b_next = tmp_meta;
874 			tmp_meta->b_prev = meta->b_prev;
875 			if (sctp->sctp_xmit_tail == meta)
876 				sctp->sctp_xmit_tail = tmp_meta;
877 			meta->b_prev = NULL;
878 			meta->b_next = NULL;
879 		}
880 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
881 		/*
882 		 * Update ULP the amount of queued data, which is
883 		 * sent-unack'ed + unsent.
884 		 */
885 		if (!SCTP_IS_DETACHED(sctp)) {
886 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
887 			    sctp->sctp_unacked + sctp->sctp_unsent);
888 		}
889 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
890 next_msg:
891 		meta = tmp_meta;
892 	}
893 	/* chunkify, if needed */
894 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
895 		ASSERT(sctp->sctp_unsent > 0);
896 		if (fp == NULL) {
897 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
898 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
899 				fp = sctp->sctp_current;
900 		} else {
901 			/*
902 			 * If user specified destination, try to honor that.
903 			 */
904 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
905 			if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
906 			    fp1 != fp) {
907 				goto chunk_done;
908 			}
909 		}
910 		sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend);
911 		if ((meta = sctp->sctp_xmit_tail) == NULL)
912 			goto chunk_done;
913 		/*
914 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
915 		 * new chunk(s) to the tail, so we need to skip the
916 		 * sctp_xmit_tail, which would have already been processed.
917 		 * This could happen when there is unacked chunks, but
918 		 * nothing new to send.
919 		 * When sctp_chunkify() is called when the transmit queue
920 		 * is empty then we need to start from sctp_xmit_tail.
921 		 */
922 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
923 #ifdef	DEBUG
924 			mp1 = sctp->sctp_xmit_tail->b_cont;
925 			while (mp1 != NULL) {
926 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
927 				mp1 = mp1->b_next;
928 			}
929 #endif
930 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
931 				goto chunk_done;
932 		}
933 		goto chunkified;
934 	}
935 chunk_done:
936 #ifdef	DEBUG
937 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
938 #endif
939 	return (NULL);
940 }
941 
942 void
943 sctp_fast_rexmit(sctp_t *sctp)
944 {
945 	mblk_t		*mp, *head;
946 	int		pktlen = 0;
947 	sctp_faddr_t	*fp = NULL;
948 	sctp_stack_t	*sctps = sctp->sctp_sctps;
949 
950 	ASSERT(sctp->sctp_xmit_head != NULL);
951 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
952 	if (mp == NULL) {
953 		SCTP_KSTAT(sctps, sctp_fr_not_found);
954 		return;
955 	}
956 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
957 		freemsg(mp);
958 		SCTP_KSTAT(sctps, sctp_fr_add_hdr);
959 		return;
960 	}
961 	if ((pktlen > fp->sfa_pmss) && fp->isv4) {
962 		ipha_t *iph = (ipha_t *)head->b_rptr;
963 
964 		iph->ipha_fragment_offset_and_flags = 0;
965 	}
966 
967 	sctp_set_iplen(sctp, head);
968 	sctp_add_sendq(sctp, head);
969 	sctp->sctp_active = fp->lastactive = lbolt64;
970 }
971 
972 void
973 sctp_output(sctp_t *sctp, uint_t num_pkt)
974 {
975 	mblk_t			*mp = NULL;
976 	mblk_t			*nmp;
977 	mblk_t			*head;
978 	mblk_t			*meta = sctp->sctp_xmit_tail;
979 	mblk_t			*fill = NULL;
980 	uint16_t 		chunklen;
981 	uint32_t 		cansend;
982 	int32_t			seglen;
983 	int32_t			xtralen;
984 	int32_t			sacklen;
985 	int32_t			pad = 0;
986 	int32_t			pathmax;
987 	int			extra;
988 	int64_t			now = lbolt64;
989 	sctp_faddr_t		*fp;
990 	sctp_faddr_t		*lfp;
991 	sctp_data_hdr_t		*sdc;
992 	int			error;
993 	boolean_t		notsent = B_TRUE;
994 	sctp_stack_t		*sctps = sctp->sctp_sctps;
995 
996 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
997 		sacklen = 0;
998 	} else {
999 		/* send a SACK chunk */
1000 		sacklen = sizeof (sctp_chunk_hdr_t) +
1001 		    sizeof (sctp_sack_chunk_t) +
1002 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1003 		lfp = sctp->sctp_lastdata;
1004 		ASSERT(lfp != NULL);
1005 		if (lfp->state != SCTP_FADDRS_ALIVE)
1006 			lfp = sctp->sctp_current;
1007 	}
1008 
1009 	cansend = sctp->sctp_frwnd;
1010 	if (sctp->sctp_unsent < cansend)
1011 		cansend = sctp->sctp_unsent;
1012 	if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
1013 	    sctp->sctp_unacked &&
1014 	    (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
1015 	    !sctp->sctp_ndelay) {
1016 		head = NULL;
1017 		fp = sctp->sctp_current;
1018 		goto unsent_data;
1019 	}
1020 	if (meta != NULL)
1021 		mp = meta->b_cont;
1022 	while (cansend > 0 && num_pkt-- != 0) {
1023 		pad = 0;
1024 
1025 		/*
1026 		 * Find first segment eligible for transmit.
1027 		 */
1028 		while (mp != NULL) {
1029 			if (SCTP_CHUNK_CANSEND(mp))
1030 				break;
1031 			mp = mp->b_next;
1032 		}
1033 		if (mp == NULL) {
1034 			meta = sctp_get_msg_to_send(sctp, &mp,
1035 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
1036 			    cansend, NULL);
1037 			if (error != 0 || meta == NULL) {
1038 				head = NULL;
1039 				fp = sctp->sctp_current;
1040 				goto unsent_data;
1041 			}
1042 			sctp->sctp_xmit_tail =  meta;
1043 		}
1044 
1045 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1046 		seglen = ntohs(sdc->sdh_len);
1047 		xtralen = sizeof (*sdc);
1048 		chunklen = seglen - xtralen;
1049 
1050 		/*
1051 		 * Check rwnd.
1052 		 */
1053 		if (chunklen > cansend) {
1054 			head = NULL;
1055 			fp = SCTP_CHUNK_DEST(meta);
1056 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1057 				fp = sctp->sctp_current;
1058 			goto unsent_data;
1059 		}
1060 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1061 			extra = SCTP_ALIGN - extra;
1062 
1063 		/*
1064 		 * Pick destination address, and check cwnd.
1065 		 */
1066 		if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
1067 		    (seglen + sacklen + extra <= lfp->sfa_pmss)) {
1068 			/*
1069 			 * Only include SACK chunk if it can be bundled
1070 			 * with a data chunk, and sent to sctp_lastdata.
1071 			 */
1072 			pathmax = lfp->cwnd - lfp->suna;
1073 
1074 			fp = lfp;
1075 			if ((nmp = dupmsg(mp)) == NULL) {
1076 				head = NULL;
1077 				goto unsent_data;
1078 			}
1079 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1080 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1081 			    &error);
1082 			if (head == NULL) {
1083 				/*
1084 				 * If none of the source addresses are
1085 				 * available (i.e error == EHOSTUNREACH),
1086 				 * pretend we have sent the data. We will
1087 				 * eventually time out trying to retramsmit
1088 				 * the data if the interface never comes up.
1089 				 * If we have already sent some stuff (i.e.,
1090 				 * notsent is B_FALSE) then we are fine, else
1091 				 * just mark this packet as sent.
1092 				 */
1093 				if (notsent && error == EHOSTUNREACH) {
1094 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1095 					    fp, chunklen, meta);
1096 				}
1097 				freemsg(nmp);
1098 				SCTP_KSTAT(sctps, sctp_output_failed);
1099 				goto unsent_data;
1100 			}
1101 			seglen += sacklen;
1102 			xtralen += sacklen;
1103 			sacklen = 0;
1104 		} else {
1105 			fp = SCTP_CHUNK_DEST(meta);
1106 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1107 				fp = sctp->sctp_current;
1108 			/*
1109 			 * If we haven't sent data to this destination for
1110 			 * a while, do slow start again.
1111 			 */
1112 			if (now - fp->lastactive > fp->rto) {
1113 				SET_CWND(fp, fp->sfa_pmss,
1114 				    sctps->sctps_slow_start_after_idle);
1115 			}
1116 
1117 			pathmax = fp->cwnd - fp->suna;
1118 			if (seglen + extra > pathmax) {
1119 				head = NULL;
1120 				goto unsent_data;
1121 			}
1122 			if ((nmp = dupmsg(mp)) == NULL) {
1123 				head = NULL;
1124 				goto unsent_data;
1125 			}
1126 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1127 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1128 			if (head == NULL) {
1129 				/*
1130 				 * If none of the source addresses are
1131 				 * available (i.e error == EHOSTUNREACH),
1132 				 * pretend we have sent the data. We will
1133 				 * eventually time out trying to retramsmit
1134 				 * the data if the interface never comes up.
1135 				 * If we have already sent some stuff (i.e.,
1136 				 * notsent is B_FALSE) then we are fine, else
1137 				 * just mark this packet as sent.
1138 				 */
1139 				if (notsent && error == EHOSTUNREACH) {
1140 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1141 					    fp, chunklen, meta);
1142 				}
1143 				freemsg(nmp);
1144 				SCTP_KSTAT(sctps, sctp_output_failed);
1145 				goto unsent_data;
1146 			}
1147 		}
1148 		fp->lastactive = now;
1149 		if (pathmax > fp->sfa_pmss)
1150 			pathmax = fp->sfa_pmss;
1151 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1152 		mp = mp->b_next;
1153 
1154 		/* Use this chunk to measure RTT? */
1155 		if (sctp->sctp_out_time == 0) {
1156 			sctp->sctp_out_time = now;
1157 			sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
1158 			ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn));
1159 		}
1160 		if (extra > 0) {
1161 			fill = sctp_get_padding(sctp, extra);
1162 			if (fill != NULL) {
1163 				linkb(head, fill);
1164 				pad = extra;
1165 				seglen += extra;
1166 			} else {
1167 				goto unsent_data;
1168 			}
1169 		}
1170 		/* See if we can bundle more. */
1171 		while (seglen < pathmax) {
1172 			int32_t		new_len;
1173 			int32_t		new_xtralen;
1174 
1175 			while (mp != NULL) {
1176 				if (SCTP_CHUNK_CANSEND(mp))
1177 					break;
1178 				mp = mp->b_next;
1179 			}
1180 			if (mp == NULL) {
1181 				meta = sctp_get_msg_to_send(sctp, &mp,
1182 				    meta->b_next, &error, seglen,
1183 				    (seglen - xtralen) >= cansend ? 0 :
1184 				    cansend - seglen, fp);
1185 				if (error != 0 || meta == NULL)
1186 					break;
1187 				sctp->sctp_xmit_tail =  meta;
1188 			}
1189 			ASSERT(mp != NULL);
1190 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1191 			    fp != SCTP_CHUNK_DEST(meta)) {
1192 				break;
1193 			}
1194 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1195 			chunklen = ntohs(sdc->sdh_len);
1196 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
1197 				extra = SCTP_ALIGN - extra;
1198 
1199 			new_len = seglen + chunklen;
1200 			new_xtralen = xtralen + sizeof (*sdc);
1201 			chunklen -= sizeof (*sdc);
1202 
1203 			if (new_len - new_xtralen > cansend ||
1204 			    new_len + extra > pathmax) {
1205 				break;
1206 			}
1207 			if ((nmp = dupmsg(mp)) == NULL)
1208 				break;
1209 			if (extra > 0) {
1210 				fill = sctp_get_padding(sctp, extra);
1211 				if (fill != NULL) {
1212 					pad += extra;
1213 					new_len += extra;
1214 					linkb(nmp, fill);
1215 				} else {
1216 					freemsg(nmp);
1217 					break;
1218 				}
1219 			}
1220 			seglen = new_len;
1221 			xtralen = new_xtralen;
1222 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1223 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1224 			linkb(head, nmp);
1225 			mp = mp->b_next;
1226 		}
1227 		if ((seglen > fp->sfa_pmss) && fp->isv4) {
1228 			ipha_t *iph = (ipha_t *)head->b_rptr;
1229 
1230 			/*
1231 			 * Path MTU is different from what we thought it would
1232 			 * be when we created chunks, or IP headers have grown.
1233 			 * Need to clear the DF bit.
1234 			 */
1235 			iph->ipha_fragment_offset_and_flags = 0;
1236 		}
1237 		/* xmit segment */
1238 		ASSERT(cansend >= seglen - pad - xtralen);
1239 		cansend -= (seglen - pad - xtralen);
1240 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1241 		    "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1242 		    seglen - xtralen, ntohl(sdc->sdh_tsn),
1243 		    ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1244 		    cansend, sctp->sctp_lastack_rxd));
1245 		sctp_set_iplen(sctp, head);
1246 		sctp_add_sendq(sctp, head);
1247 		/* arm rto timer (if not set) */
1248 		if (!fp->timer_running)
1249 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1250 		notsent = B_FALSE;
1251 	}
1252 	sctp->sctp_active = now;
1253 	return;
1254 unsent_data:
1255 	/* arm persist timer (if rto timer not set) */
1256 	if (!fp->timer_running)
1257 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1258 	if (head != NULL)
1259 		freemsg(head);
1260 }
1261 
1262 /*
1263  * The following two functions initialize and destroy the cache
1264  * associated with the sets used for PR-SCTP.
1265  */
1266 void
1267 sctp_ftsn_sets_init(void)
1268 {
1269 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1270 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1271 	    NULL, 0);
1272 }
1273 
1274 void
1275 sctp_ftsn_sets_fini(void)
1276 {
1277 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1278 }
1279 
1280 
1281 /* Free PR-SCTP sets */
1282 void
1283 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1284 {
1285 	sctp_ftsn_set_t *p;
1286 
1287 	while (s != NULL) {
1288 		p = s->next;
1289 		s->next = NULL;
1290 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1291 		s = p;
1292 	}
1293 }
1294 
1295 /*
1296  * Given a message meta block, meta, this routine creates or modifies
1297  * the set that will be used to generate a Forward TSN chunk. If the
1298  * entry for stream id, sid, for this message already exists, the
1299  * sequence number, ssn, is updated if it is greater than the existing
1300  * one. If an entry for this sid does not exist, one is created if
1301  * the size does not exceed fp->sfa_pmss. We return false in case
1302  * or an error.
1303  */
1304 boolean_t
1305 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1306     uint_t *nsets, uint32_t *slen)
1307 {
1308 	sctp_ftsn_set_t		*p;
1309 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1310 	uint16_t		sid = htons(msg_hdr->smh_sid);
1311 	/* msg_hdr->smh_ssn is already in NBO */
1312 	uint16_t		ssn = msg_hdr->smh_ssn;
1313 
1314 	ASSERT(s != NULL && nsets != NULL);
1315 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1316 
1317 	if (*s == NULL) {
1318 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
1319 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1320 		if (*s == NULL)
1321 			return (B_FALSE);
1322 		(*s)->ftsn_entries.ftsn_sid = sid;
1323 		(*s)->ftsn_entries.ftsn_ssn = ssn;
1324 		(*s)->next = NULL;
1325 		*nsets = 1;
1326 		*slen += sizeof (uint32_t);
1327 		return (B_TRUE);
1328 	}
1329 	for (p = *s; p->next != NULL; p = p->next) {
1330 		if (p->ftsn_entries.ftsn_sid == sid) {
1331 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1332 				p->ftsn_entries.ftsn_ssn = ssn;
1333 			return (B_TRUE);
1334 		}
1335 	}
1336 	/* the last one */
1337 	if (p->ftsn_entries.ftsn_sid == sid) {
1338 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1339 			p->ftsn_entries.ftsn_ssn = ssn;
1340 	} else {
1341 		if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
1342 			return (B_FALSE);
1343 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1344 		    KM_NOSLEEP);
1345 		if (p->next == NULL)
1346 			return (B_FALSE);
1347 		p = p->next;
1348 		p->ftsn_entries.ftsn_sid = sid;
1349 		p->ftsn_entries.ftsn_ssn = ssn;
1350 		p->next = NULL;
1351 		(*nsets)++;
1352 		*slen += sizeof (uint32_t);
1353 	}
1354 	return (B_TRUE);
1355 }
1356 
1357 /*
1358  * Given a set of stream id - sequence number pairs, this routing creates
1359  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1360  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1361  * will add the IP/SCTP header.
1362  */
1363 mblk_t *
1364 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1365     uint_t nsets, uint32_t seglen)
1366 {
1367 	mblk_t			*ftsn_mp;
1368 	sctp_chunk_hdr_t	*ch_hdr;
1369 	uint32_t		*advtsn;
1370 	uint16_t		schlen;
1371 	size_t			xtralen;
1372 	ftsn_entry_t		*ftsn_entry;
1373 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1374 
1375 	seglen += sizeof (sctp_chunk_hdr_t);
1376 	if (fp->isv4)
1377 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
1378 	else
1379 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
1380 	ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp));
1381 	if (ftsn_mp == NULL)
1382 		return (NULL);
1383 	ftsn_mp->b_rptr += xtralen;
1384 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1385 
1386 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1387 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1388 	ch_hdr->sch_flags = 0;
1389 	/*
1390 	 * The cast here should not be an issue since seglen is
1391 	 * the length of the Forward TSN chunk.
1392 	 */
1393 	schlen = (uint16_t)seglen;
1394 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1395 
1396 	advtsn = (uint32_t *)(ch_hdr + 1);
1397 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1398 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1399 	while (nsets > 0) {
1400 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1401 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1402 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1403 		ftsn_entry++;
1404 		sets = sets->next;
1405 		nsets--;
1406 	}
1407 	return (ftsn_mp);
1408 }
1409 
1410 /*
1411  * Given a starting message, the routine steps through all the
1412  * messages whose TSN is less than sctp->sctp_adv_pap and creates
1413  * ftsn sets. The ftsn sets is then used to create an Forward TSN
1414  * chunk. All the messages, that have chunks that are included in the
1415  * ftsn sets, are flagged abandonded. If a message is partially sent
1416  * and is deemed abandoned, all remaining unsent chunks are marked
1417  * abandoned and are deducted from sctp_unsent.
1418  */
1419 void
1420 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1421     sctp_faddr_t *fp, uint32_t *seglen)
1422 {
1423 	mblk_t		*mp1 = mp;
1424 	mblk_t		*mp_head = mp;
1425 	mblk_t		*meta_head = meta;
1426 	mblk_t		*head;
1427 	sctp_ftsn_set_t	*sets = NULL;
1428 	uint_t		nsets = 0;
1429 	uint16_t	clen;
1430 	sctp_data_hdr_t	*sdc;
1431 	uint32_t	sacklen;
1432 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1433 	uint32_t	unsent = 0;
1434 	boolean_t	ubit;
1435 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1436 
1437 	*seglen = sizeof (uint32_t);
1438 
1439 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1440 	while (meta != NULL &&
1441 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1442 		/*
1443 		 * Skip adding FTSN sets for un-ordered messages as they do
1444 		 * not have SSNs.
1445 		 */
1446 		ubit = SCTP_DATA_GET_UBIT(sdc);
1447 		if (!ubit &&
1448 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1449 			meta = NULL;
1450 			sctp->sctp_adv_pap = adv_pap;
1451 			goto ftsn_done;
1452 		}
1453 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1454 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1455 			adv_pap = ntohl(sdc->sdh_tsn);
1456 			mp1 = mp1->b_next;
1457 		}
1458 		meta = meta->b_next;
1459 		if (meta != NULL) {
1460 			mp1 = meta->b_cont;
1461 			if (!SCTP_CHUNK_ISSENT(mp1))
1462 				break;
1463 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1464 		}
1465 	}
1466 ftsn_done:
1467 	/*
1468 	 * Can't compare with sets == NULL, since we don't add any
1469 	 * sets for un-ordered messages.
1470 	 */
1471 	if (meta == meta_head)
1472 		return;
1473 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1474 	sctp_free_ftsn_set(sets);
1475 	if (*nmp == NULL)
1476 		return;
1477 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1478 		sacklen = 0;
1479 	} else {
1480 		sacklen = sizeof (sctp_chunk_hdr_t) +
1481 		    sizeof (sctp_sack_chunk_t) +
1482 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1483 		if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1484 			/* piggybacked SACK doesn't fit */
1485 			sacklen = 0;
1486 		} else {
1487 			fp = sctp->sctp_lastdata;
1488 		}
1489 	}
1490 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1491 	if (head == NULL) {
1492 		freemsg(*nmp);
1493 		*nmp = NULL;
1494 		SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1495 		return;
1496 	}
1497 	*seglen += sacklen;
1498 	*nmp = head;
1499 
1500 	/*
1501 	 * XXXNeed to optimise this, the reason it is done here is so
1502 	 * that we don't have to undo in case of failure.
1503 	 */
1504 	mp1 = mp_head;
1505 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1506 	while (meta_head != NULL &&
1507 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1508 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
1509 			SCTP_MSG_SET_ABANDONED(meta_head);
1510 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1511 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1512 			if (!SCTP_CHUNK_ISACKED(mp1)) {
1513 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1514 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1515 				    meta_head);
1516 			}
1517 			mp1 = mp1->b_next;
1518 		}
1519 		while (mp1 != NULL) {
1520 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1521 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
1522 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1523 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1524 				SCTP_ABANDON_CHUNK(mp1);
1525 			}
1526 			mp1 = mp1->b_next;
1527 		}
1528 		meta_head = meta_head->b_next;
1529 		if (meta_head != NULL) {
1530 			mp1 = meta_head->b_cont;
1531 			if (!SCTP_CHUNK_ISSENT(mp1))
1532 				break;
1533 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1534 		}
1535 	}
1536 	if (unsent > 0) {
1537 		ASSERT(sctp->sctp_unsent >= unsent);
1538 		sctp->sctp_unsent -= unsent;
1539 		/*
1540 		 * Update ULP the amount of queued data, which is
1541 		 * sent-unack'ed + unsent.
1542 		 */
1543 		if (!SCTP_IS_DETACHED(sctp)) {
1544 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
1545 			    sctp->sctp_unacked + sctp->sctp_unsent);
1546 		}
1547 	}
1548 }
1549 
1550 /*
1551  * This function steps through messages starting at meta and checks if
1552  * the message is abandoned. It stops when it hits an unsent chunk or
1553  * a message that has all its chunk acked. This is the only place
1554  * where the sctp_adv_pap is moved forward to indicated abandoned
1555  * messages.
1556  */
1557 void
1558 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1559 {
1560 	uint32_t	tsn = sctp->sctp_adv_pap;
1561 	sctp_data_hdr_t	*sdc;
1562 	sctp_msg_hdr_t	*msg_hdr;
1563 
1564 	ASSERT(mp != NULL);
1565 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1566 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1567 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1568 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
1569 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1570 		return;
1571 	}
1572 	while (meta != NULL) {
1573 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1574 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1575 			tsn = ntohl(sdc->sdh_tsn);
1576 			mp = mp->b_next;
1577 		}
1578 		if (mp != NULL)
1579 			break;
1580 		/*
1581 		 * We continue checking for successive messages only if there
1582 		 * is a chunk marked for retransmission. Else, we might
1583 		 * end up sending FTSN prematurely for chunks that have been
1584 		 * sent, but not yet acked.
1585 		 */
1586 		if ((meta = meta->b_next) != NULL) {
1587 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1588 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
1589 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1590 				break;
1591 			}
1592 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1593 				if (!SCTP_CHUNK_ISSENT(mp)) {
1594 					sctp->sctp_adv_pap = tsn;
1595 					return;
1596 				}
1597 				if (SCTP_CHUNK_WANT_REXMIT(mp))
1598 					break;
1599 			}
1600 			if (mp == NULL)
1601 				break;
1602 		}
1603 	}
1604 	sctp->sctp_adv_pap = tsn;
1605 }
1606 
1607 
1608 /*
1609  * Determine if we should bundle a data chunk with the chunk being
1610  * retransmitted.  We bundle if
1611  *
1612  * - the chunk is sent to the same destination and unack'ed.
1613  *
1614  * OR
1615  *
1616  * - the chunk is unsent, i.e. new data.
1617  */
1618 #define	SCTP_CHUNK_RX_CANBUNDLE(mp, fp)					\
1619 	(!SCTP_CHUNK_ABANDONED((mp)) && 				\
1620 	((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) &&	\
1621 	!SCTP_CHUNK_ISACKED(mp))) ||					\
1622 	(((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1623 	SCTP_CHUNK_FLAG_SENT)))
1624 
1625 /*
1626  * Retransmit first segment which hasn't been acked with cumtsn or send
1627  * a Forward TSN chunk, if appropriate.
1628  */
1629 void
1630 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1631 {
1632 	mblk_t		*mp;
1633 	mblk_t		*nmp = NULL;
1634 	mblk_t		*head;
1635 	mblk_t		*meta = sctp->sctp_xmit_head;
1636 	mblk_t		*fill;
1637 	uint32_t	seglen = 0;
1638 	uint32_t	sacklen;
1639 	uint16_t	chunklen;
1640 	int		extra;
1641 	sctp_data_hdr_t	*sdc;
1642 	sctp_faddr_t	*fp;
1643 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1644 	boolean_t	do_ftsn = B_FALSE;
1645 	boolean_t	ftsn_check = B_TRUE;
1646 	uint32_t	first_ua_tsn;
1647 	sctp_msg_hdr_t	*mhdr;
1648 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1649 
1650 	while (meta != NULL) {
1651 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1652 			uint32_t	tsn;
1653 
1654 			if (!SCTP_CHUNK_ISSENT(mp))
1655 				goto window_probe;
1656 			/*
1657 			 * We break in the following cases -
1658 			 *
1659 			 *	if the advanced peer ack point includes the next
1660 			 *	chunk to be retransmited - possibly the Forward
1661 			 * 	TSN was lost.
1662 			 *
1663 			 *	if we are PRSCTP aware and the next chunk to be
1664 			 *	retransmitted is now abandoned
1665 			 *
1666 			 *	if the next chunk to be retransmitted is for
1667 			 *	the dest on which the timer went off. (this
1668 			 *	message is not abandoned).
1669 			 *
1670 			 * We check for Forward TSN only for the first
1671 			 * eligible chunk to be retransmitted. The reason
1672 			 * being if the first eligible chunk is skipped (say
1673 			 * it was sent to a destination other than oldfp)
1674 			 * then we cannot advance the cum TSN via Forward
1675 			 * TSN chunk.
1676 			 *
1677 			 * Also, ftsn_check is B_TRUE only for the first
1678 			 * eligible chunk, it  will be B_FALSE for all
1679 			 * subsequent candidate messages for retransmission.
1680 			 */
1681 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1682 			tsn = ntohl(sdc->sdh_tsn);
1683 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1684 				if (sctp->sctp_prsctp_aware && ftsn_check) {
1685 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1686 						ASSERT(sctp->sctp_prsctp_aware);
1687 						do_ftsn = B_TRUE;
1688 						goto out;
1689 					} else {
1690 						sctp_check_adv_ack_pt(sctp,
1691 						    meta, mp);
1692 						if (SEQ_GT(sctp->sctp_adv_pap,
1693 						    adv_pap)) {
1694 							do_ftsn = B_TRUE;
1695 							goto out;
1696 						}
1697 					}
1698 					ftsn_check = B_FALSE;
1699 				}
1700 				if (SCTP_CHUNK_DEST(mp) == oldfp)
1701 					goto out;
1702 			}
1703 		}
1704 		meta = meta->b_next;
1705 		if (meta != NULL && sctp->sctp_prsctp_aware) {
1706 			mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1707 
1708 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1709 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1710 				meta = meta->b_next;
1711 			}
1712 		}
1713 	}
1714 window_probe:
1715 	/*
1716 	 * Retransmit fired for a destination which didn't have
1717 	 * any unacked data pending.
1718 	 */
1719 	if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
1720 		/*
1721 		 * Send a window probe. Inflate frwnd to allow
1722 		 * sending one segment.
1723 		 */
1724 		if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc)))
1725 			sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
1726 
1727 		/* next TSN to send */
1728 		sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
1729 
1730 		/*
1731 		 * The above sctp_frwnd adjustment is coarse.  The "changed"
1732 		 * sctp_frwnd may allow us to send more than 1 packet.  So
1733 		 * tell sctp_output() to send only 1 packet.
1734 		 */
1735 		sctp_output(sctp, 1);
1736 
1737 		/* Last sent TSN */
1738 		sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1739 		ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
1740 		sctp->sctp_zero_win_probe = B_TRUE;
1741 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
1742 	}
1743 	return;
1744 out:
1745 	/*
1746 	 * After a time out, assume that everything has left the network.  So
1747 	 * we can clear rxt_unacked for the original peer address.
1748 	 */
1749 	oldfp->rxt_unacked = 0;
1750 
1751 	/*
1752 	 * If we were probing for zero window, don't adjust retransmission
1753 	 * variables, but the timer is still backed off.
1754 	 */
1755 	if (sctp->sctp_zero_win_probe) {
1756 		mblk_t	*pkt;
1757 		uint_t	pkt_len;
1758 
1759 		/*
1760 		 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
1761 		 * and sctp_rxt_maxtsn will specify the ZWP packet.
1762 		 */
1763 		fp = oldfp;
1764 		if (oldfp->state != SCTP_FADDRS_ALIVE)
1765 			fp = sctp_rotate_faddr(sctp, oldfp);
1766 		pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
1767 		if (pkt != NULL) {
1768 			ASSERT(pkt_len <= fp->sfa_pmss);
1769 			sctp_set_iplen(sctp, pkt);
1770 			sctp_add_sendq(sctp, pkt);
1771 		} else {
1772 			SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
1773 		}
1774 
1775 		/*
1776 		 * The strikes will be clear by sctp_faddr_alive() when the
1777 		 * other side sends us an ack.
1778 		 */
1779 		oldfp->strikes++;
1780 		sctp->sctp_strikes++;
1781 
1782 		SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
1783 		if (oldfp != fp && oldfp->suna != 0)
1784 			SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto);
1785 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1786 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
1787 		return;
1788 	}
1789 
1790 	/*
1791 	 * Enter slowstart for this destination
1792 	 */
1793 	oldfp->ssthresh = oldfp->cwnd / 2;
1794 	if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
1795 		oldfp->ssthresh = 2 * oldfp->sfa_pmss;
1796 	oldfp->cwnd = oldfp->sfa_pmss;
1797 	oldfp->pba = 0;
1798 	fp = sctp_rotate_faddr(sctp, oldfp);
1799 	ASSERT(fp != NULL);
1800 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1801 
1802 	first_ua_tsn = ntohl(sdc->sdh_tsn);
1803 	if (do_ftsn) {
1804 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1805 		if (nmp == NULL) {
1806 			sctp->sctp_adv_pap = adv_pap;
1807 			goto restart_timer;
1808 		}
1809 		head = nmp;
1810 		/*
1811 		 * Move to the next unabandoned chunk. XXXCheck if meta will
1812 		 * always be marked abandoned.
1813 		 */
1814 		while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
1815 			meta = meta->b_next;
1816 		if (meta != NULL)
1817 			mp = mp->b_cont;
1818 		else
1819 			mp = NULL;
1820 		goto try_bundle;
1821 	}
1822 	seglen = ntohs(sdc->sdh_len);
1823 	chunklen = seglen - sizeof (*sdc);
1824 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1825 		extra = SCTP_ALIGN - extra;
1826 
1827 	/* Find out if we need to piggyback SACK. */
1828 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1829 		sacklen = 0;
1830 	} else {
1831 		sacklen = sizeof (sctp_chunk_hdr_t) +
1832 		    sizeof (sctp_sack_chunk_t) +
1833 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1834 		if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1835 			/* piggybacked SACK doesn't fit */
1836 			sacklen = 0;
1837 		} else {
1838 			/*
1839 			 * OK, we have room to send SACK back.  But we
1840 			 * should send it back to the last fp where we
1841 			 * receive data from, unless sctp_lastdata equals
1842 			 * oldfp, then we should probably not send it
1843 			 * back to that fp.  Also we should check that
1844 			 * the fp is alive.
1845 			 */
1846 			if (sctp->sctp_lastdata != oldfp &&
1847 			    sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) {
1848 				fp = sctp->sctp_lastdata;
1849 			}
1850 		}
1851 	}
1852 
1853 	/*
1854 	 * Cancel RTT measurement if the retransmitted TSN is before the
1855 	 * TSN used for timimg.
1856 	 */
1857 	if (sctp->sctp_out_time != 0 &&
1858 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1859 		sctp->sctp_out_time = 0;
1860 	}
1861 	/* Clear the counter as the RTT calculation may be off. */
1862 	fp->rtt_updates = 0;
1863 	oldfp->rtt_updates = 0;
1864 
1865 	/*
1866 	 * After a timeout, we should change the current faddr so that
1867 	 * new chunks will be sent to the alternate address.
1868 	 */
1869 	sctp_set_faddr_current(sctp, fp);
1870 
1871 	nmp = dupmsg(mp);
1872 	if (nmp == NULL)
1873 		goto restart_timer;
1874 	if (extra > 0) {
1875 		fill = sctp_get_padding(sctp, extra);
1876 		if (fill != NULL) {
1877 			linkb(nmp, fill);
1878 			seglen += extra;
1879 		} else {
1880 			freemsg(nmp);
1881 			goto restart_timer;
1882 		}
1883 	}
1884 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
1885 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1886 	if (head == NULL) {
1887 		freemsg(nmp);
1888 		SCTP_KSTAT(sctps, sctp_rexmit_failed);
1889 		goto restart_timer;
1890 	}
1891 	seglen += sacklen;
1892 
1893 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1894 
1895 	mp = mp->b_next;
1896 
1897 try_bundle:
1898 	/* We can at least and at most send 1 packet at timeout. */
1899 	while (seglen < fp->sfa_pmss) {
1900 		int32_t new_len;
1901 
1902 		/* Go through the list to find more chunks to be bundled. */
1903 		while (mp != NULL) {
1904 			/* Check if the chunk can be bundled. */
1905 			if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
1906 				break;
1907 			mp = mp->b_next;
1908 		}
1909 		/* Go to the next message. */
1910 		if (mp == NULL) {
1911 			for (meta = meta->b_next; meta != NULL;
1912 			    meta = meta->b_next) {
1913 				mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1914 
1915 				if (SCTP_IS_MSG_ABANDONED(meta) ||
1916 				    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
1917 				    sctp)) {
1918 					continue;
1919 				}
1920 
1921 				mp = meta->b_cont;
1922 				goto try_bundle;
1923 			}
1924 			/* No more chunk to be bundled. */
1925 			break;
1926 		}
1927 
1928 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1929 		new_len = ntohs(sdc->sdh_len);
1930 		chunklen = new_len - sizeof (*sdc);
1931 
1932 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
1933 			extra = SCTP_ALIGN - extra;
1934 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
1935 			break;
1936 		if ((nmp = dupmsg(mp)) == NULL)
1937 			break;
1938 
1939 		if (extra > 0) {
1940 			fill = sctp_get_padding(sctp, extra);
1941 			if (fill != NULL) {
1942 				linkb(nmp, fill);
1943 			} else {
1944 				freemsg(nmp);
1945 				break;
1946 			}
1947 		}
1948 		linkb(head, nmp);
1949 
1950 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
1951 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1952 
1953 		seglen = new_len;
1954 		mp = mp->b_next;
1955 	}
1956 done_bundle:
1957 	if ((seglen > fp->sfa_pmss) && fp->isv4) {
1958 		ipha_t *iph = (ipha_t *)head->b_rptr;
1959 
1960 		/*
1961 		 * Path MTU is different from path we thought it would
1962 		 * be when we created chunks, or IP headers have grown.
1963 		 * Need to clear the DF bit.
1964 		 */
1965 		iph->ipha_fragment_offset_and_flags = 0;
1966 	}
1967 	fp->rxt_unacked += seglen;
1968 
1969 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
1970 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
1971 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
1972 	    (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
1973 
1974 	sctp->sctp_rexmitting = B_TRUE;
1975 	sctp->sctp_rxt_nxttsn = first_ua_tsn;
1976 	sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1977 	sctp_set_iplen(sctp, head);
1978 	sctp_add_sendq(sctp, head);
1979 
1980 	/*
1981 	 * Restart the oldfp timer with exponential backoff and
1982 	 * the new fp timer for the retransmitted chunks.
1983 	 */
1984 restart_timer:
1985 	oldfp->strikes++;
1986 	sctp->sctp_strikes++;
1987 	SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
1988 	/*
1989 	 * If there is still some data in the oldfp, restart the
1990 	 * retransmission timer.  If there is no data, the heartbeat will
1991 	 * continue to run so it will do its job in checking the reachability
1992 	 * of the oldfp.
1993 	 */
1994 	if (oldfp != fp && oldfp->suna != 0)
1995 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
1996 
1997 	/*
1998 	 * Should we restart the timer of the new fp?  If there is
1999 	 * outstanding data to the new fp, the timer should be
2000 	 * running already.  So restarting it means that the timer
2001 	 * will fire later for those outstanding data.  But if
2002 	 * we don't restart it, the timer will fire too early for the
2003 	 * just retransmitted chunks to the new fp.  The reason is that we
2004 	 * don't keep a timestamp on when a chunk is retransmitted.
2005 	 * So when the timer fires, it will just search for the
2006 	 * chunk with the earliest TSN sent to new fp.  This probably
2007 	 * is the chunk we just retransmitted.  So for now, let's
2008 	 * be conservative and restart the timer of the new fp.
2009 	 */
2010 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2011 
2012 	sctp->sctp_active = lbolt64;
2013 }
2014 
2015 /*
2016  * The SCTP write put procedure called from IP.
2017  */
2018 void
2019 sctp_wput(queue_t *q, mblk_t *mp)
2020 {
2021 	uchar_t		*rptr;
2022 	t_scalar_t	type;
2023 
2024 	switch (mp->b_datap->db_type) {
2025 	case M_IOCTL:
2026 		sctp_wput_ioctl(q, mp);
2027 		break;
2028 	case M_DATA:
2029 		/* Should be handled in sctp_output() */
2030 		ASSERT(0);
2031 		freemsg(mp);
2032 		break;
2033 	case M_PROTO:
2034 	case M_PCPROTO:
2035 		rptr = mp->b_rptr;
2036 		if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
2037 			type = ((union T_primitives *)rptr)->type;
2038 			/*
2039 			 * There is no "standard" way on how to respond
2040 			 * to T_CAPABILITY_REQ if a module does not
2041 			 * understand it.  And the current TI mod
2042 			 * has problems handling an error ack.  So we
2043 			 * catch the request here and reply with a response
2044 			 * which the TI mod knows how to respond to.
2045 			 */
2046 			switch (type) {
2047 			case T_CAPABILITY_REQ:
2048 				(void) putnextctl1(RD(q), M_ERROR, EPROTO);
2049 				break;
2050 			default:
2051 				if ((mp = mi_tpi_err_ack_alloc(mp,
2052 				    TNOTSUPPORT, 0)) != NULL) {
2053 					qreply(q, mp);
2054 					return;
2055 				}
2056 			}
2057 		}
2058 		/* FALLTHRU */
2059 	default:
2060 		freemsg(mp);
2061 		return;
2062 	}
2063 }
2064 
2065 /*
2066  * This function is called by sctp_ss_rexmit() to create a packet
2067  * to be retransmitted to the given fp.  The given meta and mp
2068  * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2069  * first chunk to be retransmitted.  This is also called when we want
2070  * to retransmit a zero window probe from sctp_rexmit() or when we
2071  * want to retransmit the zero window probe after the window has
2072  * opened from sctp_got_sack().
2073  */
2074 mblk_t *
2075 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
2076     uint_t *packet_len)
2077 {
2078 	uint32_t	seglen = 0;
2079 	uint16_t	chunklen;
2080 	int		extra;
2081 	mblk_t		*nmp;
2082 	mblk_t		*head;
2083 	mblk_t		*fill;
2084 	sctp_data_hdr_t	*sdc;
2085 	sctp_msg_hdr_t	*mhdr;
2086 
2087 	sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2088 	seglen = ntohs(sdc->sdh_len);
2089 	chunklen = seglen - sizeof (*sdc);
2090 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
2091 		extra = SCTP_ALIGN - extra;
2092 
2093 	nmp = dupmsg(*mp);
2094 	if (nmp == NULL)
2095 		return (NULL);
2096 	if (extra > 0) {
2097 		fill = sctp_get_padding(sctp, extra);
2098 		if (fill != NULL) {
2099 			linkb(nmp, fill);
2100 			seglen += extra;
2101 		} else {
2102 			freemsg(nmp);
2103 			return (NULL);
2104 		}
2105 	}
2106 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
2107 	head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
2108 	if (head == NULL) {
2109 		freemsg(nmp);
2110 		return (NULL);
2111 	}
2112 	SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2113 	/*
2114 	 * Don't update the TSN if we are doing a Zero Win Probe.
2115 	 */
2116 	if (!sctp->sctp_zero_win_probe)
2117 		sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2118 	*mp = (*mp)->b_next;
2119 
2120 try_bundle:
2121 	while (seglen < fp->sfa_pmss) {
2122 		int32_t new_len;
2123 
2124 		/*
2125 		 * Go through the list to find more chunks to be bundled.
2126 		 * We should only retransmit sent by unack'ed chunks.  Since
2127 		 * they were sent before, the peer's receive window should
2128 		 * be able to receive them.
2129 		 */
2130 		while (*mp != NULL) {
2131 			/* Check if the chunk can be bundled. */
2132 			if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
2133 				break;
2134 			*mp = (*mp)->b_next;
2135 		}
2136 		/* Go to the next message. */
2137 		if (*mp == NULL) {
2138 			for (*meta = (*meta)->b_next; *meta != NULL;
2139 			    *meta = (*meta)->b_next) {
2140 				mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
2141 
2142 				if (SCTP_IS_MSG_ABANDONED(*meta) ||
2143 				    SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
2144 				    sctp)) {
2145 					continue;
2146 				}
2147 
2148 				*mp = (*meta)->b_cont;
2149 				goto try_bundle;
2150 			}
2151 			/* No more chunk to be bundled. */
2152 			break;
2153 		}
2154 
2155 		sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2156 		/* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2157 		if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
2158 			break;
2159 		new_len = ntohs(sdc->sdh_len);
2160 		chunklen = new_len - sizeof (*sdc);
2161 
2162 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
2163 			extra = SCTP_ALIGN - extra;
2164 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
2165 			break;
2166 		if ((nmp = dupmsg(*mp)) == NULL)
2167 			break;
2168 
2169 		if (extra > 0) {
2170 			fill = sctp_get_padding(sctp, extra);
2171 			if (fill != NULL) {
2172 				linkb(nmp, fill);
2173 			} else {
2174 				freemsg(nmp);
2175 				break;
2176 			}
2177 		}
2178 		linkb(head, nmp);
2179 
2180 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
2181 		SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2182 		/*
2183 		 * Don't update the TSN if we are doing a Zero Win Probe.
2184 		 */
2185 		if (!sctp->sctp_zero_win_probe)
2186 			sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2187 
2188 		seglen = new_len;
2189 		*mp = (*mp)->b_next;
2190 	}
2191 	*packet_len = seglen;
2192 	fp->rxt_unacked += seglen;
2193 	return (head);
2194 }
2195 
2196 /*
2197  * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2198  * advances the cum_tsn but the cum_tsn is still less than what we have sent
2199  * (sctp_rxt_maxtsn) at the time of the timeout.  This SACK is a "partial"
2200  * SACK.  We retransmit unacked chunks without having to wait for another
2201  * timeout.  The rationale is that the SACK should not be "partial" if all the
2202  * lost chunks have been retransmitted.  Since the SACK is "partial,"
2203  * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2204  * be missing.  It is better for us to retransmit them now instead
2205  * of waiting for a timeout.
2206  */
2207 void
2208 sctp_ss_rexmit(sctp_t *sctp)
2209 {
2210 	mblk_t		*meta;
2211 	mblk_t		*mp;
2212 	mblk_t		*pkt;
2213 	sctp_faddr_t	*fp;
2214 	uint_t		pkt_len;
2215 	uint32_t	tot_wnd;
2216 	sctp_data_hdr_t	*sdc;
2217 	int		burst;
2218 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2219 
2220 	ASSERT(!sctp->sctp_zero_win_probe);
2221 
2222 	/*
2223 	 * If the last cum ack is smaller than what we have just
2224 	 * retransmitted, simply return.
2225 	 */
2226 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
2227 		sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
2228 	else
2229 		return;
2230 	ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
2231 
2232 	/*
2233 	 * After a timer fires, sctp_current should be set to the new
2234 	 * fp where the retransmitted chunks are sent.
2235 	 */
2236 	fp = sctp->sctp_current;
2237 
2238 	/*
2239 	 * Since we are retransmitting, we only need to use cwnd to determine
2240 	 * how much we can send as we were allowed (by peer's receive window)
2241 	 * to send those retransmitted chunks previously when they are first
2242 	 * sent.  If we record how much we have retransmitted but
2243 	 * unacknowledged using rxt_unacked, then the amount we can now send
2244 	 * is equal to cwnd minus rxt_unacked.
2245 	 *
2246 	 * The field rxt_unacked is incremented when we retransmit a packet
2247 	 * and decremented when we got a SACK acknowledging something.  And
2248 	 * it is reset when the retransmission timer fires as we assume that
2249 	 * all packets have left the network after a timeout.  If this
2250 	 * assumption is not true, it means that after a timeout, we can
2251 	 * get a SACK acknowledging more than rxt_unacked (its value only
2252 	 * contains what is retransmitted when the timer fires).  So
2253 	 * rxt_unacked will become very big (it is an unsiged int so going
2254 	 * negative means that the value is huge).  This is the reason we
2255 	 * always send at least 1 MSS bytes.
2256 	 *
2257 	 * The reason why we do not have an accurate count is that we
2258 	 * only know how many packets are outstanding (using the TSN numbers).
2259 	 * But we do not know how many bytes those packets contain.  To
2260 	 * have an accurate count, we need to walk through the send list.
2261 	 * As it is not really important to have an accurate count during
2262 	 * retransmission, we skip this walk to save some time.  This should
2263 	 * not make the retransmission too aggressive to cause congestion.
2264 	 */
2265 	if (fp->cwnd <= fp->rxt_unacked)
2266 		tot_wnd = fp->sfa_pmss;
2267 	else
2268 		tot_wnd = fp->cwnd - fp->rxt_unacked;
2269 
2270 	/* Find the first unack'ed chunk */
2271 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
2272 		sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
2273 
2274 		if (SCTP_IS_MSG_ABANDONED(meta) ||
2275 		    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
2276 			continue;
2277 		}
2278 
2279 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2280 			/* Again, this may not be possible */
2281 			if (!SCTP_CHUNK_ISSENT(mp))
2282 				return;
2283 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2284 			if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
2285 				goto found_msg;
2286 		}
2287 	}
2288 
2289 	/* Everything is abandoned... */
2290 	return;
2291 
2292 found_msg:
2293 	if (!fp->timer_running)
2294 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2295 	pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
2296 	if (pkt == NULL) {
2297 		SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2298 		return;
2299 	}
2300 	if ((pkt_len > fp->sfa_pmss) && fp->isv4) {
2301 		ipha_t	*iph = (ipha_t *)pkt->b_rptr;
2302 
2303 		/*
2304 		 * Path MTU is different from path we thought it would
2305 		 * be when we created chunks, or IP headers have grown.
2306 		 *  Need to clear the DF bit.
2307 		 */
2308 		iph->ipha_fragment_offset_and_flags = 0;
2309 	}
2310 	sctp_set_iplen(sctp, pkt);
2311 	sctp_add_sendq(sctp, pkt);
2312 
2313 	/* Check and see if there is more chunk to be retransmitted. */
2314 	if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss ||
2315 	    meta == NULL)
2316 		return;
2317 	if (mp == NULL)
2318 		meta = meta->b_next;
2319 	if (meta == NULL)
2320 		return;
2321 
2322 	/* Retransmit another packet if the window allows. */
2323 	for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
2324 	    meta != NULL && burst > 0; meta = meta->b_next, burst--) {
2325 		if (mp == NULL)
2326 			mp = meta->b_cont;
2327 		for (; mp != NULL; mp = mp->b_next) {
2328 			/* Again, this may not be possible */
2329 			if (!SCTP_CHUNK_ISSENT(mp))
2330 				return;
2331 			if (!SCTP_CHUNK_ISACKED(mp))
2332 				goto found_msg;
2333 		}
2334 	}
2335 }
2336