xref: /titanic_51/usr/src/uts/common/inet/sctp/sctp_output.c (revision 45916cd2fec6e79bca5dee0421bd39e3c2910d1e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/stream.h>
31 #include <sys/cmn_err.h>
32 #define	_SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/socket.h>
35 #include <sys/stropts.h>
36 #include <sys/strsun.h>
37 #include <sys/strsubr.h>
38 #include <sys/socketvar.h>
39 /* swilly code in sys/socketvar.h turns off DEBUG */
40 #ifdef __lint
41 #define	DEBUG
42 #endif
43 
44 #include <inet/common.h>
45 #include <inet/mi.h>
46 #include <inet/ip.h>
47 #include <inet/ip6.h>
48 #include <inet/sctp_ip.h>
49 #include <inet/ipclassifier.h>
50 
51 /*
52  * PR-SCTP comments.
53  *
54  * A message can expire before it gets to the transmit list (i.e. it is still
55  * in the unsent list - unchunked), after it gets to the transmit list, but
56  * before transmission has actually started, or after transmission has begun.
57  * Accordingly, we check for the status of a message in sctp_chunkify() when
58  * the message is being transferred from the unsent list to the transmit list;
59  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
60  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
61  * When we nuke a message in sctp_chunkify(), all we need to do is take it
62  * out of the unsent list and update sctp_unsent; when a message is deemed
63  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
64  * list, update sctp_unsent IFF transmission for the message has not yet begun
65  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
66  * message has started, then we cannot just take it out of the list, we need
67  * to send Forward TSN chunk to the peer so that the peer can clear its
68  * fragment list for this message. However, we cannot just send the Forward
69  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
70  * messages preceeding this abandoned message. So, we send a Forward TSN
71  * IFF all messages prior to this abandoned message has been SACKd, if not
72  * we defer sending the Forward TSN to sctp_cumack(), which will check for
73  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
74  * sctp_rexmit() when we check for retransmissions, we need to determine if
75  * the advanced peer ack point can be moved ahead, and if so, send a Forward
76  * TSN to the peer instead of retransmitting the chunk. Note that when
77  * we send a Forward TSN for a message, there may be yet unsent chunks for
78  * this message; we need to mark all such chunks as abandoned, so that
79  * sctp_cumack() can take the message out of the transmit list, additionally
80  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
81  * decremented when a message/chunk is deemed abandoned), sockfs needs to
82  * be notified so that it can adjust its idea of the queued message.
83  */
84 
85 #include "sctp_impl.h"
86 
87 static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
88 
89 /* Padding mblk for SCTP chunks. */
90 mblk_t *sctp_pad_mp;
91 
92 #ifdef	DEBUG
93 static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
94 #endif
95 
96 /*
97  * Called to allocate a header mblk when sending data to SCTP.
98  * Data will follow in b_cont of this mblk.
99  */
100 mblk_t *
101 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
102     int flags)
103 {
104 	mblk_t *mp;
105 	struct T_unitdata_req *tudr;
106 	size_t size;
107 	int error;
108 
109 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
110 	size = MAX(size, sizeof (sctp_msg_hdr_t));
111 	if (flags & SCTP_CAN_BLOCK) {
112 		mp = allocb_wait(size, BPRI_MED, 0, &error);
113 	} else {
114 		mp = allocb(size, BPRI_MED);
115 	}
116 	if (mp) {
117 		tudr = (struct T_unitdata_req *)mp->b_rptr;
118 		tudr->PRIM_type = T_UNITDATA_REQ;
119 		tudr->DEST_length = nlen;
120 		tudr->DEST_offset = sizeof (*tudr);
121 		tudr->OPT_length = clen;
122 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
123 		    _TPI_ALIGN_TOPT(nlen));
124 		if (nlen > 0)
125 			bcopy(name, tudr + 1, nlen);
126 		if (clen > 0)
127 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
128 		mp->b_wptr += (tudr ->OPT_offset + clen);
129 		mp->b_datap->db_type = M_PROTO;
130 	}
131 	return (mp);
132 }
133 
134 /*ARGSUSED2*/
135 int
136 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
137 {
138 	sctp_faddr_t	*fp = NULL;
139 	struct T_unitdata_req	*tudr;
140 	int		error = 0;
141 	mblk_t		*mproto = mp;
142 	in6_addr_t	*addr;
143 	in6_addr_t	tmpaddr;
144 	uint16_t	sid = sctp->sctp_def_stream;
145 	uint32_t	ppid = sctp->sctp_def_ppid;
146 	uint32_t	context = sctp->sctp_def_context;
147 	uint16_t	msg_flags = sctp->sctp_def_flags;
148 	sctp_msg_hdr_t	*sctp_msg_hdr;
149 	uint32_t	msg_len = 0;
150 	uint32_t	timetolive = sctp->sctp_def_timetolive;
151 
152 	ASSERT(DB_TYPE(mproto) == M_PROTO);
153 
154 	mp = mp->b_cont;
155 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
156 
157 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
158 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
159 
160 	/* Get destination address, if specified */
161 	if (tudr->DEST_length > 0) {
162 		sin_t *sin;
163 		sin6_t *sin6;
164 
165 		sin = (struct sockaddr_in *)
166 		    (mproto->b_rptr + tudr->DEST_offset);
167 		switch (sin->sin_family) {
168 		case AF_INET:
169 			if (tudr->DEST_length < sizeof (*sin)) {
170 				return (EINVAL);
171 			}
172 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
173 			addr = &tmpaddr;
174 			break;
175 		case AF_INET6:
176 			if (tudr->DEST_length < sizeof (*sin6)) {
177 				return (EINVAL);
178 			}
179 			sin6 = (struct sockaddr_in6 *)
180 			    (mproto->b_rptr + tudr->DEST_offset);
181 			addr = &sin6->sin6_addr;
182 			break;
183 		default:
184 			return (EAFNOSUPPORT);
185 		}
186 		fp = sctp_lookup_faddr(sctp, addr);
187 		if (fp == NULL) {
188 			return (EINVAL);
189 		}
190 	}
191 	/* Ancillary Data? */
192 	if (tudr->OPT_length > 0) {
193 		struct cmsghdr		*cmsg;
194 		char			*cend;
195 		struct sctp_sndrcvinfo	*sndrcv;
196 
197 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
198 		cend = ((char *)cmsg + tudr->OPT_length);
199 		ASSERT(cend <= (char *)mproto->b_wptr);
200 
201 		for (;;) {
202 			if ((char *)(cmsg + 1) > cend ||
203 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
204 				break;
205 			}
206 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
207 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
208 				if (cmsg->cmsg_len <
209 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
210 					return (EINVAL);
211 				}
212 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
213 				sid = sndrcv->sinfo_stream;
214 				msg_flags = sndrcv->sinfo_flags;
215 				ppid = sndrcv->sinfo_ppid;
216 				context = sndrcv->sinfo_context;
217 				timetolive = sndrcv->sinfo_timetolive;
218 				break;
219 			}
220 			if (cmsg->cmsg_len > 0)
221 				cmsg = CMSG_NEXT(cmsg);
222 			else
223 				break;
224 		}
225 	}
226 	if (msg_flags & MSG_ABORT) {
227 		if (mp && mp->b_cont) {
228 			mblk_t *pump = msgpullup(mp, -1);
229 			if (!pump) {
230 				return (ENOMEM);
231 			}
232 			freemsg(mp);
233 			mp = pump;
234 			mproto->b_cont = mp;
235 		}
236 		RUN_SCTP(sctp);
237 		sctp_user_abort(sctp, mp, B_TRUE);
238 		sctp_clean_death(sctp, ECONNRESET);
239 		freemsg(mproto);
240 		goto process_sendq;
241 	}
242 	if (mp == NULL)
243 		goto done;
244 
245 	RUN_SCTP(sctp);
246 
247 	/* Reject any new data requests if we are shutting down */
248 	if (sctp->sctp_state > SCTPS_ESTABLISHED) {
249 		error = EPIPE;
250 		goto unlock_done;
251 	}
252 
253 	/* Re-use the mproto to store relevant info. */
254 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
255 
256 	mproto->b_rptr = mproto->b_datap->db_base;
257 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
258 
259 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
260 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
261 	sctp_msg_hdr->smh_context = context;
262 	sctp_msg_hdr->smh_sid = sid;
263 	sctp_msg_hdr->smh_ppid = ppid;
264 	sctp_msg_hdr->smh_flags = msg_flags;
265 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
266 	sctp_msg_hdr->smh_tob = lbolt64;
267 	for (; mp != NULL; mp = mp->b_cont)
268 		msg_len += MBLKL(mp);
269 	sctp_msg_hdr->smh_msglen = msg_len;
270 
271 	/* User requested specific destination */
272 	SCTP_SET_CHUNK_DEST(mproto, fp);
273 
274 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
275 	    sid >= sctp->sctp_num_ostr) {
276 		/* Send sendfail event */
277 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
278 		    B_FALSE);
279 		error = EINVAL;
280 		goto unlock_done;
281 	}
282 
283 	/* no data */
284 	if (msg_len == 0) {
285 		sctp_sendfail_event(sctp, dupmsg(mproto),
286 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
287 		error = EINVAL;
288 		goto unlock_done;
289 	}
290 
291 	/* Add it to the unsent list */
292 	if (sctp->sctp_xmit_unsent == NULL) {
293 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
294 	} else {
295 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
296 		sctp->sctp_xmit_unsent_tail = mproto;
297 	}
298 	sctp->sctp_unsent += msg_len;
299 	BUMP_LOCAL(sctp->sctp_msgcount);
300 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
301 		sctp_output(sctp);
302 process_sendq:
303 	WAKE_SCTP(sctp);
304 	sctp_process_sendq(sctp);
305 	return (0);
306 unlock_done:
307 	WAKE_SCTP(sctp);
308 done:
309 	return (error);
310 }
311 
312 void
313 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send)
314 {
315 	mblk_t			*mp;
316 	mblk_t			*chunk_mp;
317 	mblk_t			*chunk_head;
318 	mblk_t			*chunk_hdr;
319 	mblk_t			*chunk_tail = NULL;
320 	int			count;
321 	int			chunksize;
322 	sctp_data_hdr_t		*sdc;
323 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
324 	sctp_faddr_t		*fp;
325 	sctp_faddr_t		*fp1;
326 	size_t			xtralen;
327 	sctp_msg_hdr_t		*msg_hdr;
328 
329 	fp = SCTP_CHUNK_DEST(mdblk);
330 	if (fp == NULL)
331 		fp = sctp->sctp_current;
332 	if (fp->isv4)
333 		xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra + sizeof (*sdc);
334 	else
335 		xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra + sizeof (*sdc);
336 	count = chunksize = first_len - sizeof (*sdc);
337 nextmsg:
338 	chunk_mp = mdblk->b_cont;
339 
340 	/*
341 	 * If this partially chunked, we ignore the first_len for now
342 	 * and use the one already present. For the unchunked bits, we
343 	 * use the length of the last chunk.
344 	 */
345 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
346 		int	chunk_len;
347 
348 		ASSERT(chunk_mp->b_next != NULL);
349 		mdblk->b_cont = chunk_mp->b_next;
350 		chunk_mp->b_next = NULL;
351 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
352 		mp = mdblk->b_cont;
353 		while (mp->b_next != NULL)
354 			mp = mp->b_next;
355 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
356 		if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
357 			count = chunksize = fp->sfa_pmss - chunk_len;
358 		else
359 			count = chunksize = fp->sfa_pmss;
360 		count = chunksize = count - sizeof (*sdc);
361 	} else {
362 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
363 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
364 			sctp->sctp_xmit_unsent = mdblk->b_next;
365 			if (sctp->sctp_xmit_unsent == NULL)
366 				sctp->sctp_xmit_unsent_tail = NULL;
367 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
368 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
369 			mdblk->b_next = NULL;
370 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
371 			/*
372 			 * Update ULP the amount of queued data, which is
373 			 * sent-unack'ed + unsent.
374 			 */
375 			if (!SCTP_IS_DETACHED(sctp)) {
376 				sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
377 				    sctp->sctp_unacked + sctp->sctp_unsent);
378 			}
379 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
380 			goto try_next;
381 		}
382 		mdblk->b_cont = NULL;
383 	}
384 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
385 nextchunk:
386 	chunk_head = chunk_mp;
387 	chunk_tail = NULL;
388 
389 	/* Skip as many mblk's as we need */
390 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
391 		count -= MBLKL(chunk_mp);
392 		chunk_tail = chunk_mp;
393 		chunk_mp = chunk_mp->b_cont;
394 	}
395 	/* Split the chain, if needed */
396 	if (chunk_mp != NULL) {
397 		if (count > 0) {
398 			mblk_t	*split_mp = dupb(chunk_mp);
399 
400 			if (split_mp == NULL) {
401 				if (mdblk->b_cont == NULL) {
402 					mdblk->b_cont = chunk_head;
403 				} else  {
404 					SCTP_MSG_SET_CHUNKED(mdblk);
405 					ASSERT(chunk_head->b_next == NULL);
406 					chunk_head->b_next = mdblk->b_cont;
407 					mdblk->b_cont = chunk_head;
408 				}
409 				return;
410 			}
411 			if (chunk_tail != NULL) {
412 				chunk_tail->b_cont = split_mp;
413 				chunk_tail = chunk_tail->b_cont;
414 			} else {
415 				chunk_head = chunk_tail = split_mp;
416 			}
417 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
418 			chunk_mp->b_rptr = chunk_tail->b_wptr;
419 			count = 0;
420 		} else if (chunk_tail == NULL) {
421 			goto next;
422 		} else {
423 			chunk_tail->b_cont = NULL;
424 		}
425 	}
426 	/* Alloc chunk hdr, if needed */
427 	if (DB_REF(chunk_head) > 1 ||
428 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
429 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
430 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
431 			if (mdblk->b_cont == NULL) {
432 				if (chunk_mp != NULL)
433 					linkb(chunk_head, chunk_mp);
434 				mdblk->b_cont = chunk_head;
435 			} else {
436 				SCTP_MSG_SET_CHUNKED(mdblk);
437 				if (chunk_mp != NULL)
438 					linkb(chunk_head, chunk_mp);
439 				ASSERT(chunk_head->b_next == NULL);
440 				chunk_head->b_next = mdblk->b_cont;
441 				mdblk->b_cont = chunk_head;
442 			}
443 			return;
444 		}
445 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
446 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
447 		chunk_hdr->b_cont = chunk_head;
448 	} else {
449 		chunk_hdr = chunk_head;
450 		chunk_hdr->b_rptr -= sizeof (*sdc);
451 	}
452 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
453 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
454 	sdc->sdh_id = CHUNK_DATA;
455 	sdc->sdh_flags = 0;
456 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
457 	ASSERT(sdc->sdh_len);
458 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
459 	/*
460 	 * We defer assigning the SSN just before sending the chunk, else
461 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
462 	 * to send a Forward TSN to let the peer know. Some more comments
463 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
464 	 */
465 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
466 
467 	if (mdblk->b_cont == NULL) {
468 		mdblk->b_cont = chunk_hdr;
469 		SCTP_DATA_SET_BBIT(sdc);
470 	} else {
471 		mp = mdblk->b_cont;
472 		while (mp->b_next != NULL)
473 			mp = mp->b_next;
474 		mp->b_next = chunk_hdr;
475 	}
476 
477 	bytes_to_send -= (chunksize - count);
478 	if (chunk_mp != NULL) {
479 next:
480 		count = chunksize = fp->sfa_pmss - sizeof (*sdc);
481 		goto nextchunk;
482 	}
483 	SCTP_DATA_SET_EBIT(sdc);
484 	sctp->sctp_xmit_unsent = mdblk->b_next;
485 	if (mdblk->b_next == NULL) {
486 		sctp->sctp_xmit_unsent_tail = NULL;
487 	}
488 	mdblk->b_next = NULL;
489 
490 	if (sctp->sctp_xmit_tail == NULL) {
491 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
492 	} else {
493 		mp = sctp->sctp_xmit_tail;
494 		while (mp->b_next != NULL)
495 			mp = mp->b_next;
496 		mp->b_next = mdblk;
497 		mdblk->b_prev = mp;
498 	}
499 try_next:
500 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
501 		mdblk = sctp->sctp_xmit_unsent;
502 		fp1 = SCTP_CHUNK_DEST(mdblk);
503 		if (fp1 == NULL)
504 			fp1 = sctp->sctp_current;
505 		if (fp == fp1) {
506 			size_t len = MBLKL(mdblk->b_cont);
507 			if ((count > 0) &&
508 			    ((len > fp->sfa_pmss - sizeof (*sdc)) ||
509 				(len <= count))) {
510 				count -= sizeof (*sdc);
511 				count = chunksize = count - (count & 0x3);
512 			} else {
513 				count = chunksize = fp->sfa_pmss -
514 				    sizeof (*sdc);
515 			}
516 		} else {
517 			if (fp1->isv4)
518 				xtralen = sctp->sctp_hdr_len;
519 			else
520 				xtralen = sctp->sctp_hdr6_len;
521 			xtralen += sctp_wroff_xtra + sizeof (*sdc);
522 			count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
523 			fp = fp1;
524 		}
525 		goto nextmsg;
526 	}
527 }
528 
529 void
530 sctp_free_msg(mblk_t *ump)
531 {
532 	mblk_t *mp, *nmp;
533 
534 	for (mp = ump->b_cont; mp; mp = nmp) {
535 		nmp = mp->b_next;
536 		mp->b_next = mp->b_prev = NULL;
537 		freemsg(mp);
538 	}
539 	ASSERT(!ump->b_prev);
540 	ump->b_next = NULL;
541 	freeb(ump);
542 }
543 
544 mblk_t *
545 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
546     int *error)
547 {
548 	int hdrlen;
549 	char *hdr;
550 	int isv4 = fp->isv4;
551 
552 	if (error != NULL)
553 		*error = 0;
554 
555 	if (isv4) {
556 		hdrlen = sctp->sctp_hdr_len;
557 		hdr = sctp->sctp_iphc;
558 	} else {
559 		hdrlen = sctp->sctp_hdr6_len;
560 		hdr = sctp->sctp_iphc6;
561 	}
562 	/*
563 	 * A null fp->ire could mean that the address is 'down'. Similarly,
564 	 * it is possible that the address went down, we tried to send an
565 	 * heartbeat and ended up setting fp->saddr as unspec because we
566 	 * didn't have any usable source address. In either case
567 	 * sctp_ire2faddr() will try find an IRE, if available, and set
568 	 * the source address, if needed. If we still don't have any
569 	 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and
570 	 * we return EHOSTUNREACH.
571 	 */
572 	if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
573 		sctp_ire2faddr(sctp, fp);
574 		if (fp->state == SCTP_FADDRS_UNREACH) {
575 			if (error != NULL)
576 				*error = EHOSTUNREACH;
577 			return (NULL);
578 		}
579 	}
580 	/* Copy in IP header. */
581 	if ((mp->b_rptr - mp->b_datap->db_base) <
582 	    (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) {
583 		mblk_t *nmp;
584 		/*
585 		 * This can happen if IP headers are adjusted after
586 		 * data was moved into chunks, or during retransmission,
587 		 * or things like snoop is running.
588 		 */
589 		nmp = allocb_cred(sctp_wroff_xtra + hdrlen + sacklen,
590 		    CONN_CRED(sctp->sctp_connp));
591 		if (nmp == NULL) {
592 			if (error !=  NULL)
593 				*error = ENOMEM;
594 			return (NULL);
595 		}
596 		nmp->b_rptr += sctp_wroff_xtra;
597 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
598 		nmp->b_cont = mp;
599 		mp = nmp;
600 	} else {
601 		mp->b_rptr -= (hdrlen + sacklen);
602 		mblk_setcred(mp, CONN_CRED(sctp->sctp_connp));
603 	}
604 	bcopy(hdr, mp->b_rptr, hdrlen);
605 	if (sacklen) {
606 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
607 	}
608 	if (fp != sctp->sctp_current) {
609 		/* change addresses in header */
610 		if (isv4) {
611 			ipha_t *iph = (ipha_t *)mp->b_rptr;
612 
613 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
614 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
615 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
616 				    iph->ipha_src);
617 			} else if (sctp->sctp_bound_to_all) {
618 				iph->ipha_src = INADDR_ANY;
619 			}
620 		} else {
621 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
622 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
623 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
624 			} else if (sctp->sctp_bound_to_all) {
625 				V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src);
626 			}
627 		}
628 	}
629 	/*
630 	 * IP will not free this IRE if it is condemned.  SCTP needs to
631 	 * free it.
632 	 */
633 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
634 		IRE_REFRELE_NOTR(fp->ire);
635 		fp->ire = NULL;
636 	}
637 
638 	/* Stash the conn and ire ptr info for IP */
639 	SCTP_STASH_IPINFO(mp, fp->ire);
640 
641 	return (mp);
642 }
643 
644 /*
645  * SCTP requires every chunk to be padded so that the total length
646  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
647  * the specified pad length.
648  */
649 static mblk_t *
650 sctp_get_padding(int pad)
651 {
652 	mblk_t *fill;
653 
654 	ASSERT(pad < SCTP_ALIGN);
655 	if ((fill = dupb(sctp_pad_mp)) != NULL) {
656 		fill->b_wptr += pad;
657 		return (fill);
658 	}
659 
660 	/*
661 	 * The memory saving path of reusing the sctp_pad_mp
662 	 * fails may be because it has been dupb() too
663 	 * many times (DBLK_REFMAX).  Use the memory consuming
664 	 * path of allocating the pad mblk.
665 	 */
666 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
667 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
668 		*(int32_t *)fill->b_rptr = 0;
669 		fill->b_wptr += pad;
670 	}
671 	return (fill);
672 }
673 
674 static mblk_t *
675 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
676 {
677 	mblk_t		*meta;
678 	mblk_t		*start_mp = NULL;
679 	mblk_t		*end_mp = NULL;
680 	mblk_t		*mp, *nmp;
681 	mblk_t		*fill;
682 	sctp_data_hdr_t	*sdh;
683 	int		msglen;
684 	int		extra;
685 	sctp_msg_hdr_t	*msg_hdr;
686 
687 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
688 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
689 		if (SCTP_IS_MSG_ABANDONED(meta) ||
690 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
691 			continue;
692 		}
693 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
694 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
695 				/*
696 				 * Use the same peer address to do fast
697 				 * retransmission.
698 				 */
699 				if (*fp == NULL) {
700 					*fp = SCTP_CHUNK_DEST(mp);
701 					if ((*fp)->state != SCTP_FADDRS_ALIVE)
702 						*fp = sctp->sctp_current;
703 				} else if (*fp != SCTP_CHUNK_DEST(mp)) {
704 					continue;
705 				}
706 
707 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
708 				msglen = ntohs(sdh->sdh_len);
709 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
710 					extra = SCTP_ALIGN - extra;
711 				}
712 
713 				/*
714 				 * We still return at least the first message
715 				 * even if that message cannot fit in as
716 				 * PMTU may have changed.
717 				 */
718 				if (*total + msglen + extra >
719 				    (*fp)->sfa_pmss && start_mp != NULL) {
720 					return (start_mp);
721 				}
722 				if ((nmp = dupmsg(mp)) == NULL)
723 					return (start_mp);
724 				if (extra > 0) {
725 					fill = sctp_get_padding(extra);
726 					if (fill != NULL) {
727 						linkb(nmp, fill);
728 					} else {
729 						return (start_mp);
730 					}
731 				}
732 				BUMP_MIB(&sctp_mib, sctpOutFastRetrans);
733 				SCTP_CHUNK_CLEAR_REXMIT(mp);
734 				if (start_mp == NULL) {
735 					start_mp = nmp;
736 				} else {
737 					linkb(end_mp, nmp);
738 				}
739 				end_mp = nmp;
740 				*total += msglen + extra;
741 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
742 				    "tsn %x\n", sdh->sdh_tsn));
743 			}
744 		}
745 	}
746 	/* Clear the flag as there is no more message to be fast rexmitted. */
747 	sctp->sctp_chk_fast_rexmit = B_FALSE;
748 	return (start_mp);
749 }
750 
751 /* A debug function just to make sure that a mblk chain is not broken */
752 #ifdef	DEBUG
753 static boolean_t
754 sctp_verify_chain(mblk_t *head, mblk_t *tail)
755 {
756 	mblk_t	*mp = head;
757 
758 	if (head == NULL || tail == NULL)
759 		return (B_TRUE);
760 	while (mp != NULL) {
761 		if (mp == tail)
762 			return (B_TRUE);
763 		mp = mp->b_next;
764 	}
765 	return (B_FALSE);
766 }
767 #endif
768 
769 /*
770  * Gets the next unsent chunk to transmit. Messages that are abandoned are
771  * skipped. A message can be abandoned if it has a non-zero timetolive and
772  * transmission has not yet started or if it is a partially reliable
773  * message and its time is up (assuming we are PR-SCTP aware).
774  * 'cansend' is used to determine if need to try and chunkify messages from
775  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
776  * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend'
777  * will be set to 0.
778  */
779 mblk_t *
780 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
781     int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp)
782 {
783 	mblk_t		*mp1;
784 	sctp_msg_hdr_t	*msg_hdr;
785 	mblk_t		*tmp_meta;
786 	sctp_faddr_t	*fp1;
787 
788 	ASSERT(error != NULL && mp != NULL);
789 	*error = 0;
790 
791 	ASSERT(sctp->sctp_current != NULL);
792 
793 chunkified:
794 	while (meta != NULL) {
795 		tmp_meta = meta->b_next;
796 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
797 		mp1 = meta->b_cont;
798 		if (SCTP_IS_MSG_ABANDONED(meta))
799 			goto next_msg;
800 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
801 			while (mp1 != NULL) {
802 				if (SCTP_CHUNK_CANSEND(mp1)) {
803 					*mp = mp1;
804 #ifdef	DEBUG
805 					ASSERT(sctp_verify_chain(
806 					    sctp->sctp_xmit_head, meta));
807 #endif
808 					return (meta);
809 				}
810 				mp1 = mp1->b_next;
811 			}
812 			goto next_msg;
813 		}
814 		/*
815 		 * If we come here and the first chunk is sent, then we
816 		 * we are PR-SCTP aware, in which case if the cumulative
817 		 * TSN has moved upto or beyond the first chunk (which
818 		 * means all the previous messages have been cumulative
819 		 * SACK'd), then we send a Forward TSN with the last
820 		 * chunk that was sent in this message. If we can't send
821 		 * a Forward TSN because previous non-abandoned messages
822 		 * have not been acked then we will defer the Forward TSN
823 		 * to sctp_rexmit() or sctp_cumack().
824 		 */
825 		if (SCTP_CHUNK_ISSENT(mp1)) {
826 			*error = sctp_check_abandoned_msg(sctp, meta);
827 			if (*error != 0) {
828 #ifdef	DEBUG
829 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
830 				    sctp->sctp_xmit_tail));
831 #endif
832 				return (NULL);
833 			}
834 			goto next_msg;
835 		}
836 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
837 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
838 		if (meta->b_prev == NULL) {
839 			ASSERT(sctp->sctp_xmit_head == meta);
840 			sctp->sctp_xmit_head = tmp_meta;
841 			if (sctp->sctp_xmit_tail == meta)
842 				sctp->sctp_xmit_tail = tmp_meta;
843 			meta->b_next = NULL;
844 			if (tmp_meta != NULL)
845 				tmp_meta->b_prev = NULL;
846 		} else if (meta->b_next == NULL) {
847 			if (sctp->sctp_xmit_tail == meta)
848 				sctp->sctp_xmit_tail = meta->b_prev;
849 			meta->b_prev->b_next = NULL;
850 			meta->b_prev = NULL;
851 		} else {
852 			meta->b_prev->b_next = tmp_meta;
853 			tmp_meta->b_prev = meta->b_prev;
854 			if (sctp->sctp_xmit_tail == meta)
855 				sctp->sctp_xmit_tail = tmp_meta;
856 			meta->b_prev = NULL;
857 			meta->b_next = NULL;
858 		}
859 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
860 		/*
861 		 * Update ULP the amount of queued data, which is
862 		 * sent-unack'ed + unsent.
863 		 */
864 		if (!SCTP_IS_DETACHED(sctp)) {
865 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
866 			    sctp->sctp_unacked + sctp->sctp_unsent);
867 		}
868 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
869 next_msg:
870 		meta = tmp_meta;
871 	}
872 	/* chunkify, if needed */
873 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
874 		ASSERT(sctp->sctp_unsent > 0);
875 		if (fp == NULL) {
876 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
877 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
878 				fp = sctp->sctp_current;
879 		} else {
880 			/*
881 			 * If user specified destination, try to honor that.
882 			 */
883 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
884 			if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
885 			    fp1 != fp) {
886 				goto chunk_done;
887 			}
888 		}
889 		sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend);
890 		if ((meta = sctp->sctp_xmit_tail) == NULL)
891 			goto chunk_done;
892 		/*
893 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
894 		 * new chunk(s) to the tail, so we need to skip the
895 		 * sctp_xmit_tail, which would have already been processed.
896 		 * This could happen when there is unacked chunks, but
897 		 * nothing new to send.
898 		 * When sctp_chunkify() is called when the transmit queue
899 		 * is empty then we need to start from sctp_xmit_tail.
900 		 */
901 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
902 #ifdef	DEBUG
903 			mp1 = sctp->sctp_xmit_tail->b_cont;
904 			while (mp1 != NULL) {
905 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
906 				mp1 = mp1->b_next;
907 			}
908 #endif
909 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
910 				goto chunk_done;
911 		}
912 		goto chunkified;
913 	}
914 chunk_done:
915 #ifdef	DEBUG
916 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
917 #endif
918 	return (NULL);
919 }
920 
921 void
922 sctp_fast_rexmit(sctp_t *sctp)
923 {
924 	mblk_t		*mp, *head;
925 	int		pktlen = 0;
926 	sctp_faddr_t	*fp = NULL;
927 
928 	ASSERT(sctp->sctp_xmit_head != NULL);
929 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
930 	if (mp == NULL)
931 		return;
932 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
933 		freemsg(mp);
934 		return;
935 	}
936 	if ((pktlen > fp->sfa_pmss) && fp->isv4) {
937 		ipha_t *iph = (ipha_t *)head->b_rptr;
938 
939 		iph->ipha_fragment_offset_and_flags = 0;
940 	}
941 
942 	sctp_set_iplen(sctp, head);
943 	sctp_add_sendq(sctp, head);
944 	sctp->sctp_active = fp->lastactive = lbolt64;
945 }
946 
947 void
948 sctp_output(sctp_t *sctp)
949 {
950 	mblk_t			*mp = NULL;
951 	mblk_t			*nmp;
952 	mblk_t			*head;
953 	mblk_t			*meta = sctp->sctp_xmit_tail;
954 	mblk_t			*fill = NULL;
955 	uint16_t 		chunklen;
956 	uint32_t 		cansend;
957 	int32_t			seglen;
958 	int32_t			xtralen;
959 	int32_t			sacklen;
960 	int32_t			pad = 0;
961 	int32_t			pathmax;
962 	int			extra;
963 	int64_t			now = lbolt64;
964 	sctp_faddr_t		*fp;
965 	sctp_faddr_t		*lfp;
966 	sctp_data_hdr_t		*sdc;
967 	int			error;
968 	boolean_t		notsent = B_TRUE;
969 
970 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
971 		sacklen = 0;
972 	} else {
973 		/* send a SACK chunk */
974 		sacklen = sizeof (sctp_chunk_hdr_t) +
975 		    sizeof (sctp_sack_chunk_t) +
976 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
977 		lfp = sctp->sctp_lastdata;
978 		ASSERT(lfp != NULL);
979 		if (lfp->state != SCTP_FADDRS_ALIVE)
980 			lfp = sctp->sctp_current;
981 	}
982 
983 	cansend = sctp->sctp_frwnd;
984 	if (sctp->sctp_unsent < cansend)
985 		cansend = sctp->sctp_unsent;
986 	if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
987 	    sctp->sctp_unacked &&
988 	    (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
989 	    !sctp->sctp_ndelay) {
990 		head = NULL;
991 		fp = sctp->sctp_current;
992 		goto unsent_data;
993 	}
994 	if (meta != NULL)
995 		mp = meta->b_cont;
996 	while (cansend > 0) {
997 		pad = 0;
998 
999 		/*
1000 		 * Find first segment eligible for transmit.
1001 		 */
1002 		while (mp != NULL) {
1003 			if (SCTP_CHUNK_CANSEND(mp))
1004 				break;
1005 			mp = mp->b_next;
1006 		}
1007 		if (mp == NULL) {
1008 			meta = sctp_get_msg_to_send(sctp, &mp,
1009 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
1010 			    cansend, NULL);
1011 			if (error != 0 || meta == NULL) {
1012 				head = NULL;
1013 				fp = sctp->sctp_current;
1014 				goto unsent_data;
1015 			}
1016 			sctp->sctp_xmit_tail =  meta;
1017 		}
1018 
1019 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1020 		seglen = ntohs(sdc->sdh_len);
1021 		xtralen = sizeof (*sdc);
1022 		chunklen = seglen - xtralen;
1023 
1024 		/*
1025 		 * Check rwnd.
1026 		 */
1027 		if (chunklen > cansend) {
1028 			head = NULL;
1029 			fp = SCTP_CHUNK_DEST(meta);
1030 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1031 				fp = sctp->sctp_current;
1032 			goto unsent_data;
1033 		}
1034 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1035 			extra = SCTP_ALIGN - extra;
1036 
1037 		/*
1038 		 * Pick destination address, and check cwnd.
1039 		 */
1040 		if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
1041 		    (seglen + sacklen + extra <= lfp->sfa_pmss)) {
1042 			/*
1043 			 * Only include SACK chunk if it can be bundled
1044 			 * with a data chunk, and sent to sctp_lastdata.
1045 			 */
1046 			pathmax = lfp->cwnd - lfp->suna;
1047 
1048 			fp = lfp;
1049 			if ((nmp = dupmsg(mp)) == NULL) {
1050 				head = NULL;
1051 				goto unsent_data;
1052 			}
1053 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1054 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1055 			    &error);
1056 			if (head == NULL) {
1057 				/*
1058 				 * If none of the source addresses are
1059 				 * available (i.e error == EHOSTUNREACH),
1060 				 * pretend we have sent the data. We will
1061 				 * eventually time out trying to retramsmit
1062 				 * the data if the interface never comes up.
1063 				 * If we have already sent some stuff (i.e.,
1064 				 * notsent is B_FALSE) then we are fine, else
1065 				 * just mark this packet as sent.
1066 				 */
1067 				if (notsent && error == EHOSTUNREACH) {
1068 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1069 					    fp, chunklen, meta);
1070 				}
1071 				freemsg(nmp);
1072 				goto unsent_data;
1073 			}
1074 			seglen += sacklen;
1075 			xtralen += sacklen;
1076 			sacklen = 0;
1077 		} else {
1078 			fp = SCTP_CHUNK_DEST(meta);
1079 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1080 				fp = sctp->sctp_current;
1081 			/*
1082 			 * If we haven't sent data to this destination for
1083 			 * a while, do slow start again.
1084 			 */
1085 			if (now - fp->lastactive > fp->rto) {
1086 				fp->cwnd = sctp_slow_start_after_idle *
1087 				    fp->sfa_pmss;
1088 			}
1089 			fp->lastactive = now;
1090 
1091 			pathmax = fp->cwnd - fp->suna;
1092 			if (seglen + extra > pathmax) {
1093 				head = NULL;
1094 				goto unsent_data;
1095 			}
1096 			if ((nmp = dupmsg(mp)) == NULL) {
1097 				head = NULL;
1098 				goto unsent_data;
1099 			}
1100 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1101 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1102 			if (head == NULL) {
1103 				/*
1104 				 * If none of the source addresses are
1105 				 * available (i.e error == EHOSTUNREACH),
1106 				 * pretend we have sent the data. We will
1107 				 * eventually time out trying to retramsmit
1108 				 * the data if the interface never comes up.
1109 				 * If we have already sent some stuff (i.e.,
1110 				 * notsent is B_FALSE) then we are fine, else
1111 				 * just mark this packet as sent.
1112 				 */
1113 				if (notsent && error == EHOSTUNREACH) {
1114 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1115 					    fp, chunklen, meta);
1116 				}
1117 				freemsg(nmp);
1118 				goto unsent_data;
1119 			}
1120 		}
1121 		if (pathmax > fp->sfa_pmss)
1122 			pathmax = fp->sfa_pmss;
1123 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1124 		mp = mp->b_next;
1125 
1126 		/* Use this chunk to measure RTT? */
1127 		if (sctp->sctp_out_time == 0) {
1128 			sctp->sctp_out_time = now;
1129 			sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
1130 		}
1131 		if (extra > 0) {
1132 			fill = sctp_get_padding(extra);
1133 			if (fill != NULL) {
1134 				linkb(head, fill);
1135 				pad = extra;
1136 				seglen += extra;
1137 			} else {
1138 				goto unsent_data;
1139 			}
1140 		}
1141 		/* See if we can bundle more. */
1142 		while (seglen < pathmax) {
1143 			int32_t		new_len;
1144 			int32_t		new_xtralen;
1145 
1146 			while (mp != NULL) {
1147 				if (SCTP_CHUNK_CANSEND(mp))
1148 					break;
1149 				mp = mp->b_next;
1150 			}
1151 			if (mp == NULL) {
1152 				meta = sctp_get_msg_to_send(sctp, &mp,
1153 				    meta->b_next, &error, seglen,
1154 				    (seglen - xtralen) >= cansend ? 0 :
1155 				    cansend - seglen, fp);
1156 				if (error != 0 || meta == NULL)
1157 					break;
1158 				sctp->sctp_xmit_tail =  meta;
1159 			}
1160 			ASSERT(mp != NULL);
1161 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1162 			    fp != SCTP_CHUNK_DEST(meta)) {
1163 				break;
1164 			}
1165 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1166 			chunklen = ntohs(sdc->sdh_len);
1167 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
1168 				extra = SCTP_ALIGN - extra;
1169 
1170 			new_len = seglen + chunklen;
1171 			new_xtralen = xtralen + sizeof (*sdc);
1172 			chunklen -= sizeof (*sdc);
1173 
1174 			if (new_len - new_xtralen > cansend ||
1175 			    new_len + extra > pathmax) {
1176 				break;
1177 			}
1178 			if ((nmp = dupmsg(mp)) == NULL)
1179 				break;
1180 			if (extra > 0) {
1181 				fill = sctp_get_padding(extra);
1182 				if (fill != NULL) {
1183 					pad += extra;
1184 					new_len += extra;
1185 					linkb(nmp, fill);
1186 				} else {
1187 					freemsg(nmp);
1188 					break;
1189 				}
1190 			}
1191 			seglen = new_len;
1192 			xtralen = new_xtralen;
1193 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1194 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1195 			linkb(head, nmp);
1196 			mp = mp->b_next;
1197 		}
1198 		if ((seglen > fp->sfa_pmss) && fp->isv4) {
1199 			ipha_t *iph = (ipha_t *)head->b_rptr;
1200 
1201 			/*
1202 			 * Path MTU is different from what we thought it would
1203 			 * be when we created chunks, or IP headers have grown.
1204 			 * Need to clear the DF bit.
1205 			 */
1206 			iph->ipha_fragment_offset_and_flags = 0;
1207 		}
1208 		/* xmit segment */
1209 		ASSERT(cansend >= seglen - pad - xtralen);
1210 		cansend -= (seglen - pad - xtralen);
1211 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1212 		    "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1213 		    seglen - xtralen, ntohl(sdc->sdh_tsn),
1214 		    ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1215 		    cansend, sctp->sctp_lastack_rxd));
1216 		sctp_set_iplen(sctp, head);
1217 		sctp_add_sendq(sctp, head);
1218 		/* arm rto timer (if not set) */
1219 		if (!fp->timer_running)
1220 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1221 		notsent = B_FALSE;
1222 	}
1223 	sctp->sctp_active = now;
1224 	return;
1225 unsent_data:
1226 	/* arm persist timer (if rto timer not set) */
1227 	if (!fp->timer_running)
1228 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1229 	if (head != NULL)
1230 		freemsg(head);
1231 }
1232 
1233 /*
1234  * The following two functions initialize and destroy the cache
1235  * associated with the sets used for PR-SCTP.
1236  */
1237 void
1238 sctp_ftsn_sets_init(void)
1239 {
1240 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1241 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1242 	    NULL, 0);
1243 }
1244 
1245 void
1246 sctp_ftsn_sets_fini(void)
1247 {
1248 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1249 }
1250 
1251 
1252 /* Free PR-SCTP sets */
1253 void
1254 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1255 {
1256 	sctp_ftsn_set_t *p;
1257 
1258 	while (s != NULL) {
1259 		p = s->next;
1260 		s->next = NULL;
1261 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1262 		s = p;
1263 	}
1264 }
1265 
1266 /*
1267  * Given a message meta block, meta, this routine creates or modifies
1268  * the set that will be used to generate a Forward TSN chunk. If the
1269  * entry for stream id, sid, for this message already exists, the
1270  * sequence number, ssn, is updated if it is greater than the existing
1271  * one. If an entry for this sid does not exist, one is created if
1272  * the size does not exceed fp->sfa_pmss. We return false in case
1273  * or an error.
1274  */
1275 boolean_t
1276 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1277     uint_t *nsets, uint32_t *slen)
1278 {
1279 	sctp_ftsn_set_t		*p;
1280 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1281 	uint16_t		sid = htons(msg_hdr->smh_sid);
1282 	/* msg_hdr->smh_ssn is already in NBO */
1283 	uint16_t		ssn = msg_hdr->smh_ssn;
1284 
1285 	ASSERT(s != NULL && nsets != NULL);
1286 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1287 
1288 	if (*s == NULL) {
1289 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
1290 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1291 		if (*s == NULL)
1292 			return (B_FALSE);
1293 		(*s)->ftsn_entries.ftsn_sid = sid;
1294 		(*s)->ftsn_entries.ftsn_ssn = ssn;
1295 		(*s)->next = NULL;
1296 		*nsets = 1;
1297 		*slen += sizeof (uint32_t);
1298 		return (B_TRUE);
1299 	}
1300 	for (p = *s; p->next != NULL; p = p->next) {
1301 		if (p->ftsn_entries.ftsn_sid == sid) {
1302 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1303 				p->ftsn_entries.ftsn_ssn = ssn;
1304 			return (B_TRUE);
1305 		}
1306 	}
1307 	/* the last one */
1308 	if (p->ftsn_entries.ftsn_sid == sid) {
1309 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1310 			p->ftsn_entries.ftsn_ssn = ssn;
1311 	} else {
1312 		if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
1313 			return (B_FALSE);
1314 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1315 		    KM_NOSLEEP);
1316 		if (p->next == NULL)
1317 			return (B_FALSE);
1318 		p = p->next;
1319 		p->ftsn_entries.ftsn_sid = sid;
1320 		p->ftsn_entries.ftsn_ssn = ssn;
1321 		p->next = NULL;
1322 		(*nsets)++;
1323 		*slen += sizeof (uint32_t);
1324 	}
1325 	return (B_TRUE);
1326 }
1327 
1328 /*
1329  * Given a set of stream id - sequence number pairs, this routing creates
1330  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1331  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1332  * will add the IP/SCTP header.
1333  */
1334 mblk_t *
1335 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1336     uint_t nsets, uint32_t seglen)
1337 {
1338 	mblk_t			*ftsn_mp;
1339 	sctp_chunk_hdr_t	*ch_hdr;
1340 	uint32_t		*advtsn;
1341 	uint16_t		schlen;
1342 	size_t			xtralen;
1343 	ftsn_entry_t		*ftsn_entry;
1344 
1345 	seglen += sizeof (sctp_chunk_hdr_t);
1346 	if (fp->isv4)
1347 		xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra;
1348 	else
1349 		xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra;
1350 	ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp));
1351 	if (ftsn_mp == NULL)
1352 		return (NULL);
1353 	ftsn_mp->b_rptr += xtralen;
1354 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1355 
1356 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1357 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1358 	ch_hdr->sch_flags = 0;
1359 	/*
1360 	 * The cast here should not be an issue since seglen is
1361 	 * the length of the Forward TSN chunk.
1362 	 */
1363 	schlen = (uint16_t)seglen;
1364 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1365 
1366 	advtsn = (uint32_t *)(ch_hdr + 1);
1367 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1368 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1369 	while (nsets > 0) {
1370 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1371 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1372 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1373 		ftsn_entry++;
1374 		sets = sets->next;
1375 		nsets--;
1376 	}
1377 	return (ftsn_mp);
1378 }
1379 
1380 /*
1381  * Given a starting message, the routine steps through all the
1382  * messages whose TSN is less than sctp->sctp_adv_pap and creates
1383  * ftsn sets. The ftsn sets is then used to create an Forward TSN
1384  * chunk. All the messages, that have chunks that are included in the
1385  * ftsn sets, are flagged abandonded. If a message is partially sent
1386  * and is deemed abandoned, all remaining unsent chunks are marked
1387  * abandoned and are deducted from sctp_unsent.
1388  */
1389 void
1390 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1391     sctp_faddr_t *fp, uint32_t *seglen)
1392 {
1393 	mblk_t		*mp1 = mp;
1394 	mblk_t		*mp_head = mp;
1395 	mblk_t		*meta_head = meta;
1396 	mblk_t		*head;
1397 	sctp_ftsn_set_t	*sets = NULL;
1398 	uint_t		nsets = 0;
1399 	uint16_t	clen;
1400 	sctp_data_hdr_t	*sdc;
1401 	uint32_t	sacklen;
1402 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1403 	uint32_t	unsent = 0;
1404 	boolean_t	ubit;
1405 
1406 	*seglen = sizeof (uint32_t);
1407 
1408 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1409 	while (meta != NULL &&
1410 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1411 		/*
1412 		 * Skip adding FTSN sets for un-ordered messages as they do
1413 		 * not have SSNs.
1414 		 */
1415 		ubit = SCTP_DATA_GET_UBIT(sdc);
1416 		if (!ubit &&
1417 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1418 			meta = NULL;
1419 			sctp->sctp_adv_pap = adv_pap;
1420 			goto ftsn_done;
1421 		}
1422 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1423 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1424 			adv_pap = ntohl(sdc->sdh_tsn);
1425 			mp1 = mp1->b_next;
1426 		}
1427 		meta = meta->b_next;
1428 		if (meta != NULL) {
1429 			mp1 = meta->b_cont;
1430 			if (!SCTP_CHUNK_ISSENT(mp1))
1431 				break;
1432 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1433 		}
1434 	}
1435 ftsn_done:
1436 	/*
1437 	 * Can't compare with sets == NULL, since we don't add any
1438 	 * sets for un-ordered messages.
1439 	 */
1440 	if (meta == meta_head)
1441 		return;
1442 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1443 	sctp_free_ftsn_set(sets);
1444 	if (*nmp == NULL)
1445 		return;
1446 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1447 		sacklen = 0;
1448 	} else {
1449 		sacklen = sizeof (sctp_chunk_hdr_t) +
1450 		    sizeof (sctp_sack_chunk_t) +
1451 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1452 		if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1453 			/* piggybacked SACK doesn't fit */
1454 			sacklen = 0;
1455 		} else {
1456 			fp = sctp->sctp_lastdata;
1457 		}
1458 	}
1459 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1460 	if (head == NULL) {
1461 		freemsg(*nmp);
1462 		*nmp = NULL;
1463 		return;
1464 	}
1465 	*seglen += sacklen;
1466 	*nmp = head;
1467 
1468 	/*
1469 	 * XXXNeed to optimise this, the reason it is done here is so
1470 	 * that we don't have to undo in case of failure.
1471 	 */
1472 	mp1 = mp_head;
1473 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1474 	while (meta_head != NULL &&
1475 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1476 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
1477 			SCTP_MSG_SET_ABANDONED(meta_head);
1478 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1479 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1480 			if (!SCTP_CHUNK_ISACKED(mp1)) {
1481 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1482 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1483 				    meta_head);
1484 			}
1485 			mp1 = mp1->b_next;
1486 		}
1487 		while (mp1 != NULL) {
1488 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1489 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
1490 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1491 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1492 				SCTP_ABANDON_CHUNK(mp1);
1493 			}
1494 			mp1 = mp1->b_next;
1495 		}
1496 		meta_head = meta_head->b_next;
1497 		if (meta_head != NULL) {
1498 			mp1 = meta_head->b_cont;
1499 			if (!SCTP_CHUNK_ISSENT(mp1))
1500 				break;
1501 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1502 		}
1503 	}
1504 	if (unsent > 0) {
1505 		ASSERT(sctp->sctp_unsent >= unsent);
1506 		sctp->sctp_unsent -= unsent;
1507 		/*
1508 		 * Update ULP the amount of queued data, which is
1509 		 * sent-unack'ed + unsent.
1510 		 */
1511 		if (!SCTP_IS_DETACHED(sctp)) {
1512 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
1513 			    sctp->sctp_unacked + sctp->sctp_unsent);
1514 		}
1515 	}
1516 }
1517 
1518 /*
1519  * This function steps through messages starting at meta and checks if
1520  * the message is abandoned. It stops when it hits an unsent chunk or
1521  * a message that has all its chunk acked. This is the only place
1522  * where the sctp_adv_pap is moved forward to indicated abandoned
1523  * messages.
1524  */
1525 void
1526 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1527 {
1528 	uint32_t	tsn = sctp->sctp_adv_pap;
1529 	sctp_data_hdr_t	*sdc;
1530 	sctp_msg_hdr_t	*msg_hdr;
1531 
1532 	ASSERT(mp != NULL);
1533 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1534 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1535 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1536 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
1537 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1538 		return;
1539 	}
1540 	while (meta != NULL) {
1541 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1542 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1543 			tsn = ntohl(sdc->sdh_tsn);
1544 			mp = mp->b_next;
1545 		}
1546 		if (mp != NULL)
1547 			break;
1548 		/*
1549 		 * We continue checking for successive messages only if there
1550 		 * is a chunk marked for retransmission. Else, we might
1551 		 * end up sending FTSN prematurely for chunks that have been
1552 		 * sent, but not yet acked.
1553 		 */
1554 		if ((meta = meta->b_next) != NULL) {
1555 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1556 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
1557 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1558 				break;
1559 			}
1560 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1561 				if (!SCTP_CHUNK_ISSENT(mp)) {
1562 					sctp->sctp_adv_pap = tsn;
1563 					return;
1564 				}
1565 				if (SCTP_CHUNK_WANT_REXMIT(mp))
1566 					break;
1567 			}
1568 			if (mp == NULL)
1569 				break;
1570 		}
1571 	}
1572 	sctp->sctp_adv_pap = tsn;
1573 }
1574 
1575 /*
1576  * Retransmit first segment which hasn't been acked with cumtsn or send
1577  * a Forward TSN chunk, if appropriate.
1578  */
1579 void
1580 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1581 {
1582 	mblk_t		*mp;
1583 	mblk_t		*nmp = NULL;
1584 	mblk_t		*head;
1585 	mblk_t		*meta = sctp->sctp_xmit_head;
1586 	mblk_t		*fill;
1587 	uint32_t	seglen = 0;
1588 	uint32_t	sacklen;
1589 	uint16_t	chunklen;
1590 	int		extra;
1591 	sctp_data_hdr_t	*sdc;
1592 	sctp_faddr_t	*fp;
1593 	int		error;
1594 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1595 	boolean_t	do_ftsn = B_FALSE;
1596 	boolean_t	ftsn_check = B_TRUE;
1597 
1598 	while (meta != NULL) {
1599 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1600 			uint32_t	tsn;
1601 
1602 			if (!SCTP_CHUNK_ISSENT(mp))
1603 				goto window_probe;
1604 			/*
1605 			 * We break in the following cases -
1606 			 *
1607 			 *	if the advanced peer ack point includes the next
1608 			 *	chunk to be retransmited - possibly the Forward
1609 			 * 	TSN was lost.
1610 			 *
1611 			 *	if we are PRSCTP aware and the next chunk to be
1612 			 *	retransmitted is now abandoned
1613 			 *
1614 			 *	if the next chunk to be retransmitted is for
1615 			 *	the dest on which the timer went off. (this
1616 			 *	message is not abandoned).
1617 			 *
1618 			 * We check for Forward TSN only for the first
1619 			 * eligible chunk to be retransmitted. The reason
1620 			 * being if the first eligible chunk is skipped (say
1621 			 * it was sent to a destination other than oldfp)
1622 			 * then we cannot advance the cum TSN via Forward
1623 			 * TSN chunk.
1624 			 *
1625 			 * Also, ftsn_check is B_TRUE only for the first
1626 			 * eligible chunk, it  will be B_FALSE for all
1627 			 * subsequent candidate messages for retransmission.
1628 			 */
1629 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1630 			tsn = ntohl(sdc->sdh_tsn);
1631 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1632 				if (sctp->sctp_prsctp_aware && ftsn_check) {
1633 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1634 						ASSERT(sctp->sctp_prsctp_aware);
1635 						do_ftsn = B_TRUE;
1636 						goto out;
1637 					} else {
1638 						sctp_check_adv_ack_pt(sctp,
1639 						    meta, mp);
1640 						if (SEQ_GT(sctp->sctp_adv_pap,
1641 						    adv_pap)) {
1642 							do_ftsn = B_TRUE;
1643 							goto out;
1644 						}
1645 					}
1646 					ftsn_check = B_FALSE;
1647 				}
1648 				if (SCTP_CHUNK_DEST(mp) == oldfp)
1649 					goto out;
1650 			}
1651 		}
1652 		meta = meta->b_next;
1653 		if (meta != NULL && sctp->sctp_prsctp_aware) {
1654 			sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1655 
1656 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1657 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1658 				meta = meta->b_next;
1659 			}
1660 		}
1661 	}
1662 window_probe:
1663 	/*
1664 	 * Retransmit fired for a destination which didn't have
1665 	 * any unacked data pending.
1666 	 */
1667 	if (!sctp->sctp_unacked && sctp->sctp_unsent) {
1668 		/*
1669 		 * Send a window probe. Inflate frwnd to allow
1670 		 * sending one segment.
1671 		 */
1672 		if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) {
1673 			sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
1674 		}
1675 		BUMP_MIB(&sctp_mib, sctpOutWinProbe);
1676 		sctp_output(sctp);
1677 	}
1678 	return;
1679 out:
1680 	/*
1681 	 * Enter slowstart for this destination
1682 	 */
1683 	oldfp->ssthresh = oldfp->cwnd / 2;
1684 	if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
1685 		oldfp->ssthresh = 2 * oldfp->sfa_pmss;
1686 	oldfp->cwnd = oldfp->sfa_pmss;
1687 	oldfp->pba = 0;
1688 	fp = sctp_rotate_faddr(sctp, oldfp);
1689 	ASSERT(fp != NULL);
1690 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1691 
1692 	if (do_ftsn) {
1693 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1694 		if (nmp == NULL) {
1695 			sctp->sctp_adv_pap = adv_pap;
1696 			goto restart_timer;
1697 		}
1698 		head = nmp;
1699 		mp = NULL;
1700 		meta = sctp->sctp_xmit_tail;
1701 		if (meta != NULL)
1702 			mp = meta->b_cont;
1703 		goto try_bundle;
1704 	}
1705 	seglen = ntohs(sdc->sdh_len);
1706 	chunklen = seglen - sizeof (*sdc);
1707 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1708 		extra = SCTP_ALIGN - extra;
1709 
1710 	/*
1711 	 * Cancel RTT measurement if the retransmitted TSN is before the
1712 	 * TSN used for timimg.
1713 	 */
1714 	if (sctp->sctp_out_time != 0 &&
1715 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1716 		sctp->sctp_out_time = 0;
1717 	}
1718 	/* Clear the counter as the RTT calculation may be off. */
1719 	fp->rtt_updates = 0;
1720 
1721 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1722 		sacklen = 0;
1723 	} else {
1724 		sacklen = sizeof (sctp_chunk_hdr_t) +
1725 		    sizeof (sctp_sack_chunk_t) +
1726 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1727 		if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1728 			/* piggybacked SACK doesn't fit */
1729 			sacklen = 0;
1730 		} else {
1731 			fp = sctp->sctp_lastdata;
1732 		}
1733 	}
1734 
1735 	nmp = dupmsg(mp);
1736 	if (nmp == NULL)
1737 		goto restart_timer;
1738 	if (extra > 0) {
1739 		fill = sctp_get_padding(extra);
1740 		if (fill != NULL) {
1741 			linkb(nmp, fill);
1742 			seglen += extra;
1743 		} else {
1744 			freemsg(nmp);
1745 			goto restart_timer;
1746 		}
1747 	}
1748 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
1749 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1750 	if (head == NULL) {
1751 		freemsg(nmp);
1752 		goto restart_timer;
1753 	}
1754 	seglen += sacklen;
1755 
1756 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1757 
1758 	mp = mp->b_next;
1759 try_bundle:
1760 	while (seglen < fp->sfa_pmss) {
1761 		int32_t new_len;
1762 
1763 		while (mp != NULL) {
1764 			if (SCTP_CHUNK_CANSEND(mp))
1765 				break;
1766 			mp = mp->b_next;
1767 		}
1768 		if (mp == NULL) {
1769 			meta = sctp_get_msg_to_send(sctp, &mp, meta->b_next,
1770 			    &error, 0, 0, oldfp);
1771 			if (error != 0 || meta == NULL)
1772 				break;
1773 			ASSERT(mp != NULL);
1774 			sctp->sctp_xmit_tail = meta;
1775 		}
1776 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1777 		chunklen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1778 		new_len = seglen + ntohs(sdc->sdh_len);
1779 
1780 		if (seglen & (SCTP_ALIGN - 1)) {
1781 			extra = SCTP_ALIGN - (seglen & (SCTP_ALIGN - 1));
1782 
1783 			if (new_len + extra > fp->sfa_pmss) {
1784 				break;
1785 			}
1786 			fill = sctp_get_padding(extra);
1787 			if (fill != NULL) {
1788 				new_len += extra;
1789 				linkb(head, fill);
1790 			} else {
1791 				break;
1792 			}
1793 		} else {
1794 			if (new_len > fp->sfa_pmss) {
1795 				break;
1796 			}
1797 		}
1798 		if ((nmp = dupmsg(mp)) == NULL) {
1799 			break;
1800 		}
1801 		seglen = new_len;
1802 
1803 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
1804 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1805 		linkb(head, nmp);
1806 		mp = mp->b_next;
1807 	}
1808 	if ((seglen > fp->sfa_pmss) && fp->isv4) {
1809 		ipha_t *iph = (ipha_t *)head->b_rptr;
1810 
1811 		/*
1812 		 * Path MTU is different from path we thought it would
1813 		 * be when we created chunks, or IP headers have grown.
1814 		 * Need to clear the DF bit.
1815 		 */
1816 		iph->ipha_fragment_offset_and_flags = 0;
1817 	}
1818 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
1819 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
1820 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
1821 	    (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
1822 
1823 	sctp_set_iplen(sctp, head);
1824 	sctp_add_sendq(sctp, head);
1825 
1826 	/*
1827 	 * Restart timer with exponential backoff
1828 	 */
1829 restart_timer:
1830 	oldfp->strikes++;
1831 	sctp->sctp_strikes++;
1832 	SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
1833 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1834 	if (oldfp->suna != 0)
1835 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
1836 	sctp->sctp_active = lbolt64;
1837 }
1838 
1839 /*
1840  * The SCTP write put procedure called from IP.
1841  */
1842 void
1843 sctp_wput(queue_t *q, mblk_t *mp)
1844 {
1845 	uchar_t		*rptr;
1846 	t_scalar_t	type;
1847 
1848 	switch (mp->b_datap->db_type) {
1849 	case M_IOCTL:
1850 		sctp_wput_ioctl(q, mp);
1851 		break;
1852 	case M_DATA:
1853 		/* Should be handled in sctp_output() */
1854 		ASSERT(0);
1855 		freemsg(mp);
1856 		break;
1857 	case M_PROTO:
1858 	case M_PCPROTO:
1859 		rptr = mp->b_rptr;
1860 		if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
1861 			type = ((union T_primitives *)rptr)->type;
1862 			/*
1863 			 * There is no "standard" way on how to respond
1864 			 * to T_CAPABILITY_REQ if a module does not
1865 			 * understand it.  And the current TI mod
1866 			 * has problems handling an error ack.  So we
1867 			 * catch the request here and reply with a response
1868 			 * which the TI mod knows how to respond to.
1869 			 */
1870 			switch (type) {
1871 			case T_CAPABILITY_REQ:
1872 				(void) putnextctl1(RD(q), M_ERROR, EPROTO);
1873 				break;
1874 			default:
1875 				if ((mp = mi_tpi_err_ack_alloc(mp,
1876 				    TNOTSUPPORT, 0)) != NULL) {
1877 					qreply(q, mp);
1878 					return;
1879 				}
1880 			}
1881 		}
1882 		/* FALLTHRU */
1883 	default:
1884 		freemsg(mp);
1885 		return;
1886 	}
1887 }
1888