xref: /titanic_41/usr/src/uts/common/inet/sctp/sctp_output.c (revision 70025d765b044c6d8594bb965a2247a61e991a99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/cmn_err.h>
33 #define	_SUN_TPI_VERSION 2
34 #include <sys/tihdr.h>
35 #include <sys/socket.h>
36 #include <sys/stropts.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/socketvar.h>
40 
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 #include <netinet/tcp_seq.h>
44 #include <netinet/sctp.h>
45 
46 #include <inet/common.h>
47 #include <inet/mi.h>
48 #include <inet/ip.h>
49 #include <inet/ip6.h>
50 #include <inet/ip_ire.h>
51 #include <inet/sctp_ip.h>
52 #include <inet/ipclassifier.h>
53 
54 /*
55  * PR-SCTP comments.
56  *
57  * A message can expire before it gets to the transmit list (i.e. it is still
58  * in the unsent list - unchunked), after it gets to the transmit list, but
59  * before transmission has actually started, or after transmission has begun.
60  * Accordingly, we check for the status of a message in sctp_chunkify() when
61  * the message is being transferred from the unsent list to the transmit list;
62  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
63  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
64  * When we nuke a message in sctp_chunkify(), all we need to do is take it
65  * out of the unsent list and update sctp_unsent; when a message is deemed
66  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
67  * list, update sctp_unsent IFF transmission for the message has not yet begun
68  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
69  * message has started, then we cannot just take it out of the list, we need
70  * to send Forward TSN chunk to the peer so that the peer can clear its
71  * fragment list for this message. However, we cannot just send the Forward
72  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
73  * messages preceeding this abandoned message. So, we send a Forward TSN
74  * IFF all messages prior to this abandoned message has been SACKd, if not
75  * we defer sending the Forward TSN to sctp_cumack(), which will check for
76  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
77  * sctp_rexmit() when we check for retransmissions, we need to determine if
78  * the advanced peer ack point can be moved ahead, and if so, send a Forward
79  * TSN to the peer instead of retransmitting the chunk. Note that when
80  * we send a Forward TSN for a message, there may be yet unsent chunks for
81  * this message; we need to mark all such chunks as abandoned, so that
82  * sctp_cumack() can take the message out of the transmit list, additionally
83  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
84  * decremented when a message/chunk is deemed abandoned), sockfs needs to
85  * be notified so that it can adjust its idea of the queued message.
86  */
87 
88 #include "sctp_impl.h"
89 
90 static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
91 
92 /* Padding mblk for SCTP chunks. */
93 mblk_t *sctp_pad_mp;
94 
95 #ifdef	DEBUG
96 static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
97 #endif
98 
99 /*
100  * Called to allocate a header mblk when sending data to SCTP.
101  * Data will follow in b_cont of this mblk.
102  */
103 mblk_t *
104 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
105     int flags)
106 {
107 	mblk_t *mp;
108 	struct T_unitdata_req *tudr;
109 	size_t size;
110 	int error;
111 
112 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
113 	size = MAX(size, sizeof (sctp_msg_hdr_t));
114 	if (flags & SCTP_CAN_BLOCK) {
115 		mp = allocb_wait(size, BPRI_MED, 0, &error);
116 	} else {
117 		mp = allocb(size, BPRI_MED);
118 	}
119 	if (mp) {
120 		tudr = (struct T_unitdata_req *)mp->b_rptr;
121 		tudr->PRIM_type = T_UNITDATA_REQ;
122 		tudr->DEST_length = nlen;
123 		tudr->DEST_offset = sizeof (*tudr);
124 		tudr->OPT_length = clen;
125 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
126 		    _TPI_ALIGN_TOPT(nlen));
127 		if (nlen > 0)
128 			bcopy(name, tudr + 1, nlen);
129 		if (clen > 0)
130 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
131 		mp->b_wptr += (tudr ->OPT_offset + clen);
132 		mp->b_datap->db_type = M_PROTO;
133 	}
134 	return (mp);
135 }
136 
137 /*ARGSUSED2*/
138 int
139 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
140 {
141 	sctp_faddr_t	*fp = NULL;
142 	struct T_unitdata_req	*tudr;
143 	int		error = 0;
144 	mblk_t		*mproto = mp;
145 	in6_addr_t	*addr;
146 	in6_addr_t	tmpaddr;
147 	uint16_t	sid = sctp->sctp_def_stream;
148 	uint32_t	ppid = sctp->sctp_def_ppid;
149 	uint32_t	context = sctp->sctp_def_context;
150 	uint16_t	msg_flags = sctp->sctp_def_flags;
151 	sctp_msg_hdr_t	*sctp_msg_hdr;
152 	uint32_t	msg_len = 0;
153 	uint32_t	timetolive = sctp->sctp_def_timetolive;
154 
155 	ASSERT(DB_TYPE(mproto) == M_PROTO);
156 
157 	mp = mp->b_cont;
158 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
159 
160 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
161 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
162 
163 	/* Get destination address, if specified */
164 	if (tudr->DEST_length > 0) {
165 		sin_t *sin;
166 		sin6_t *sin6;
167 
168 		sin = (struct sockaddr_in *)
169 		    (mproto->b_rptr + tudr->DEST_offset);
170 		switch (sin->sin_family) {
171 		case AF_INET:
172 			if (tudr->DEST_length < sizeof (*sin)) {
173 				return (EINVAL);
174 			}
175 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
176 			addr = &tmpaddr;
177 			break;
178 		case AF_INET6:
179 			if (tudr->DEST_length < sizeof (*sin6)) {
180 				return (EINVAL);
181 			}
182 			sin6 = (struct sockaddr_in6 *)
183 			    (mproto->b_rptr + tudr->DEST_offset);
184 			addr = &sin6->sin6_addr;
185 			break;
186 		default:
187 			return (EAFNOSUPPORT);
188 		}
189 		fp = sctp_lookup_faddr(sctp, addr);
190 		if (fp == NULL) {
191 			return (EINVAL);
192 		}
193 	}
194 	/* Ancillary Data? */
195 	if (tudr->OPT_length > 0) {
196 		struct cmsghdr		*cmsg;
197 		char			*cend;
198 		struct sctp_sndrcvinfo	*sndrcv;
199 
200 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
201 		cend = ((char *)cmsg + tudr->OPT_length);
202 		ASSERT(cend <= (char *)mproto->b_wptr);
203 
204 		for (;;) {
205 			if ((char *)(cmsg + 1) > cend ||
206 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
207 				break;
208 			}
209 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
210 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
211 				if (cmsg->cmsg_len <
212 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
213 					return (EINVAL);
214 				}
215 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
216 				sid = sndrcv->sinfo_stream;
217 				msg_flags = sndrcv->sinfo_flags;
218 				ppid = sndrcv->sinfo_ppid;
219 				context = sndrcv->sinfo_context;
220 				timetolive = sndrcv->sinfo_timetolive;
221 				break;
222 			}
223 			if (cmsg->cmsg_len > 0)
224 				cmsg = CMSG_NEXT(cmsg);
225 			else
226 				break;
227 		}
228 	}
229 	if (msg_flags & MSG_ABORT) {
230 		if (mp && mp->b_cont) {
231 			mblk_t *pump = msgpullup(mp, -1);
232 			if (!pump) {
233 				return (ENOMEM);
234 			}
235 			freemsg(mp);
236 			mp = pump;
237 			mproto->b_cont = mp;
238 		}
239 		RUN_SCTP(sctp);
240 		sctp_user_abort(sctp, mp, B_TRUE);
241 		sctp_clean_death(sctp, ECONNRESET);
242 		freemsg(mproto);
243 		goto process_sendq;
244 	}
245 	if (mp == NULL)
246 		goto done;
247 
248 	RUN_SCTP(sctp);
249 
250 	/* Reject any new data requests if we are shutting down */
251 	if (sctp->sctp_state > SCTPS_ESTABLISHED) {
252 		error = EPIPE;
253 		goto unlock_done;
254 	}
255 
256 	/* Re-use the mproto to store relevant info. */
257 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
258 
259 	mproto->b_rptr = mproto->b_datap->db_base;
260 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
261 
262 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
263 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
264 	sctp_msg_hdr->smh_context = context;
265 	sctp_msg_hdr->smh_sid = sid;
266 	sctp_msg_hdr->smh_ppid = ppid;
267 	sctp_msg_hdr->smh_flags = msg_flags;
268 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
269 	sctp_msg_hdr->smh_tob = lbolt64;
270 	for (; mp != NULL; mp = mp->b_cont)
271 		msg_len += MBLKL(mp);
272 	sctp_msg_hdr->smh_msglen = msg_len;
273 
274 	/* User requested specific destination */
275 	SCTP_SET_CHUNK_DEST(mproto, fp);
276 
277 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
278 	    sid >= sctp->sctp_num_ostr) {
279 		/* Send sendfail event */
280 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
281 		    B_FALSE);
282 		error = EINVAL;
283 		goto unlock_done;
284 	}
285 
286 	/* no data */
287 	if (msg_len == 0) {
288 		sctp_sendfail_event(sctp, dupmsg(mproto),
289 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
290 		error = EINVAL;
291 		goto unlock_done;
292 	}
293 
294 	/* Add it to the unsent list */
295 	if (sctp->sctp_xmit_unsent == NULL) {
296 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
297 	} else {
298 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
299 		sctp->sctp_xmit_unsent_tail = mproto;
300 	}
301 	sctp->sctp_unsent += msg_len;
302 	BUMP_LOCAL(sctp->sctp_msgcount);
303 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
304 		sctp_output(sctp);
305 process_sendq:
306 	WAKE_SCTP(sctp);
307 	sctp_process_sendq(sctp);
308 	return (0);
309 unlock_done:
310 	WAKE_SCTP(sctp);
311 done:
312 	return (error);
313 }
314 
315 void
316 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send)
317 {
318 	mblk_t			*mp;
319 	mblk_t			*chunk_mp;
320 	mblk_t			*chunk_head;
321 	mblk_t			*chunk_hdr;
322 	mblk_t			*chunk_tail = NULL;
323 	int			count;
324 	int			chunksize;
325 	sctp_data_hdr_t		*sdc;
326 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
327 	sctp_faddr_t		*fp;
328 	sctp_faddr_t		*fp1;
329 	size_t			xtralen;
330 	sctp_msg_hdr_t		*msg_hdr;
331 
332 	fp = SCTP_CHUNK_DEST(mdblk);
333 	if (fp == NULL)
334 		fp = sctp->sctp_current;
335 	if (fp->isv4)
336 		xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra + sizeof (*sdc);
337 	else
338 		xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra + sizeof (*sdc);
339 	count = chunksize = first_len - sizeof (*sdc);
340 nextmsg:
341 	chunk_mp = mdblk->b_cont;
342 
343 	/*
344 	 * If this partially chunked, we ignore the first_len for now
345 	 * and use the one already present. For the unchunked bits, we
346 	 * use the length of the last chunk.
347 	 */
348 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
349 		int	chunk_len;
350 
351 		ASSERT(chunk_mp->b_next != NULL);
352 		mdblk->b_cont = chunk_mp->b_next;
353 		chunk_mp->b_next = NULL;
354 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
355 		mp = mdblk->b_cont;
356 		while (mp->b_next != NULL)
357 			mp = mp->b_next;
358 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
359 		if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
360 			count = chunksize = fp->sfa_pmss - chunk_len;
361 		else
362 			count = chunksize = fp->sfa_pmss;
363 		count = chunksize = count - sizeof (*sdc);
364 	} else {
365 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
366 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
367 			sctp->sctp_xmit_unsent = mdblk->b_next;
368 			if (sctp->sctp_xmit_unsent == NULL)
369 				sctp->sctp_xmit_unsent_tail = NULL;
370 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
371 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
372 			mdblk->b_next = NULL;
373 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
374 			/*
375 			 * Update ULP the amount of queued data, which is
376 			 * sent-unack'ed + unsent.
377 			 */
378 			if (!SCTP_IS_DETACHED(sctp)) {
379 				sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
380 				    sctp->sctp_unacked + sctp->sctp_unsent);
381 			}
382 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
383 			goto try_next;
384 		}
385 		mdblk->b_cont = NULL;
386 	}
387 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
388 nextchunk:
389 	chunk_head = chunk_mp;
390 	chunk_tail = NULL;
391 
392 	/* Skip as many mblk's as we need */
393 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
394 		count -= MBLKL(chunk_mp);
395 		chunk_tail = chunk_mp;
396 		chunk_mp = chunk_mp->b_cont;
397 	}
398 	/* Split the chain, if needed */
399 	if (chunk_mp != NULL) {
400 		if (count > 0) {
401 			mblk_t	*split_mp = dupb(chunk_mp);
402 
403 			if (split_mp == NULL) {
404 				if (mdblk->b_cont == NULL) {
405 					mdblk->b_cont = chunk_head;
406 				} else  {
407 					SCTP_MSG_SET_CHUNKED(mdblk);
408 					ASSERT(chunk_head->b_next == NULL);
409 					chunk_head->b_next = mdblk->b_cont;
410 					mdblk->b_cont = chunk_head;
411 				}
412 				return;
413 			}
414 			if (chunk_tail != NULL) {
415 				chunk_tail->b_cont = split_mp;
416 				chunk_tail = chunk_tail->b_cont;
417 			} else {
418 				chunk_head = chunk_tail = split_mp;
419 			}
420 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
421 			chunk_mp->b_rptr = chunk_tail->b_wptr;
422 			count = 0;
423 		} else if (chunk_tail == NULL) {
424 			goto next;
425 		} else {
426 			chunk_tail->b_cont = NULL;
427 		}
428 	}
429 	/* Alloc chunk hdr, if needed */
430 	if (DB_REF(chunk_head) > 1 ||
431 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
432 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
433 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
434 			if (mdblk->b_cont == NULL) {
435 				if (chunk_mp != NULL)
436 					linkb(chunk_head, chunk_mp);
437 				mdblk->b_cont = chunk_head;
438 			} else {
439 				SCTP_MSG_SET_CHUNKED(mdblk);
440 				if (chunk_mp != NULL)
441 					linkb(chunk_head, chunk_mp);
442 				ASSERT(chunk_head->b_next == NULL);
443 				chunk_head->b_next = mdblk->b_cont;
444 				mdblk->b_cont = chunk_head;
445 			}
446 			return;
447 		}
448 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
449 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
450 		chunk_hdr->b_cont = chunk_head;
451 	} else {
452 		chunk_hdr = chunk_head;
453 		chunk_hdr->b_rptr -= sizeof (*sdc);
454 	}
455 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
456 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
457 	sdc->sdh_id = CHUNK_DATA;
458 	sdc->sdh_flags = 0;
459 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
460 	ASSERT(sdc->sdh_len);
461 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
462 	/*
463 	 * We defer assigning the SSN just before sending the chunk, else
464 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
465 	 * to send a Forward TSN to let the peer know. Some more comments
466 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
467 	 */
468 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
469 
470 	if (mdblk->b_cont == NULL) {
471 		mdblk->b_cont = chunk_hdr;
472 		SCTP_DATA_SET_BBIT(sdc);
473 	} else {
474 		mp = mdblk->b_cont;
475 		while (mp->b_next != NULL)
476 			mp = mp->b_next;
477 		mp->b_next = chunk_hdr;
478 	}
479 
480 	bytes_to_send -= (chunksize - count);
481 	if (chunk_mp != NULL) {
482 next:
483 		count = chunksize = fp->sfa_pmss - sizeof (*sdc);
484 		goto nextchunk;
485 	}
486 	SCTP_DATA_SET_EBIT(sdc);
487 	sctp->sctp_xmit_unsent = mdblk->b_next;
488 	if (mdblk->b_next == NULL) {
489 		sctp->sctp_xmit_unsent_tail = NULL;
490 	}
491 	mdblk->b_next = NULL;
492 
493 	if (sctp->sctp_xmit_tail == NULL) {
494 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
495 	} else {
496 		mp = sctp->sctp_xmit_tail;
497 		while (mp->b_next != NULL)
498 			mp = mp->b_next;
499 		mp->b_next = mdblk;
500 		mdblk->b_prev = mp;
501 	}
502 try_next:
503 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
504 		mdblk = sctp->sctp_xmit_unsent;
505 		fp1 = SCTP_CHUNK_DEST(mdblk);
506 		if (fp1 == NULL)
507 			fp1 = sctp->sctp_current;
508 		if (fp == fp1) {
509 			size_t len = MBLKL(mdblk->b_cont);
510 			if ((count > 0) &&
511 			    ((len > fp->sfa_pmss - sizeof (*sdc)) ||
512 				(len <= count))) {
513 				count -= sizeof (*sdc);
514 				count = chunksize = count - (count & 0x3);
515 			} else {
516 				count = chunksize = fp->sfa_pmss -
517 				    sizeof (*sdc);
518 			}
519 		} else {
520 			if (fp1->isv4)
521 				xtralen = sctp->sctp_hdr_len;
522 			else
523 				xtralen = sctp->sctp_hdr6_len;
524 			xtralen += sctp_wroff_xtra + sizeof (*sdc);
525 			count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
526 			fp = fp1;
527 		}
528 		goto nextmsg;
529 	}
530 }
531 
532 void
533 sctp_free_msg(mblk_t *ump)
534 {
535 	mblk_t *mp, *nmp;
536 
537 	for (mp = ump->b_cont; mp; mp = nmp) {
538 		nmp = mp->b_next;
539 		mp->b_next = mp->b_prev = NULL;
540 		freemsg(mp);
541 	}
542 	ASSERT(!ump->b_prev);
543 	ump->b_next = NULL;
544 	freeb(ump);
545 }
546 
547 mblk_t *
548 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
549     int *error)
550 {
551 	int hdrlen;
552 	char *hdr;
553 	int isv4 = fp->isv4;
554 
555 	if (error != NULL)
556 		*error = 0;
557 
558 	if (isv4) {
559 		hdrlen = sctp->sctp_hdr_len;
560 		hdr = sctp->sctp_iphc;
561 	} else {
562 		hdrlen = sctp->sctp_hdr6_len;
563 		hdr = sctp->sctp_iphc6;
564 	}
565 	/*
566 	 * A null fp->ire could mean that the address is 'down'. Similarly,
567 	 * it is possible that the address went down, we tried to send an
568 	 * heartbeat and ended up setting fp->saddr as unspec because we
569 	 * didn't have any usable source address. In either case
570 	 * sctp_ire2faddr() will try find an IRE, if available, and set
571 	 * the source address, if needed. If we still don't have any
572 	 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and
573 	 * we return EHOSTUNREACH.
574 	 */
575 	if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
576 		sctp_ire2faddr(sctp, fp);
577 		if (fp->state == SCTP_FADDRS_UNREACH) {
578 			if (error != NULL)
579 				*error = EHOSTUNREACH;
580 			return (NULL);
581 		}
582 	}
583 	/* Copy in IP header. */
584 	if ((mp->b_rptr - mp->b_datap->db_base) <
585 	    (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) {
586 		mblk_t *nmp;
587 		/*
588 		 * This can happen if IP headers are adjusted after
589 		 * data was moved into chunks, or during retransmission,
590 		 * or things like snoop is running.
591 		 */
592 		nmp = allocb(sctp_wroff_xtra + hdrlen + sacklen, BPRI_MED);
593 		if (nmp == NULL) {
594 			if (error !=  NULL)
595 				*error = ENOMEM;
596 			return (NULL);
597 		}
598 		nmp->b_rptr += sctp_wroff_xtra;
599 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
600 		nmp->b_cont = mp;
601 		mp = nmp;
602 	} else {
603 		mp->b_rptr -= (hdrlen + sacklen);
604 	}
605 	bcopy(hdr, mp->b_rptr, hdrlen);
606 	if (sacklen) {
607 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
608 	}
609 	if (fp != sctp->sctp_current) {
610 		/* change addresses in header */
611 		if (isv4) {
612 			ipha_t *iph = (ipha_t *)mp->b_rptr;
613 
614 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
615 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
616 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
617 				    iph->ipha_src);
618 			} else if (sctp->sctp_bound_to_all) {
619 				iph->ipha_src = INADDR_ANY;
620 			}
621 		} else {
622 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
623 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
624 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
625 			} else if (sctp->sctp_bound_to_all) {
626 				V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src);
627 			}
628 		}
629 	}
630 	/*
631 	 * IP will not free this IRE if it is condemned.  SCTP needs to
632 	 * free it.
633 	 */
634 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
635 		IRE_REFRELE_NOTR(fp->ire);
636 		fp->ire = NULL;
637 	}
638 
639 	/* Stash the conn and ire ptr info for IP */
640 	SCTP_STASH_IPINFO(mp, fp->ire);
641 
642 	return (mp);
643 }
644 
645 /*
646  * SCTP requires every chunk to be padded so that the total length
647  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
648  * the specified pad length.
649  */
650 static mblk_t *
651 sctp_get_padding(int pad)
652 {
653 	mblk_t *fill;
654 
655 	ASSERT(pad < SCTP_ALIGN);
656 	if ((fill = dupb(sctp_pad_mp)) != NULL) {
657 		fill->b_wptr += pad;
658 		return (fill);
659 	}
660 
661 	/*
662 	 * The memory saving path of reusing the sctp_pad_mp
663 	 * fails may be because it has been dupb() too
664 	 * many times (DBLK_REFMAX).  Use the memory consuming
665 	 * path of allocating the pad mblk.
666 	 */
667 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
668 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
669 		*(int32_t *)fill->b_rptr = 0;
670 		fill->b_wptr += pad;
671 	}
672 	return (fill);
673 }
674 
675 static mblk_t *
676 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
677 {
678 	mblk_t		*meta;
679 	mblk_t		*start_mp = NULL;
680 	mblk_t		*end_mp = NULL;
681 	mblk_t		*mp, *nmp;
682 	mblk_t		*fill;
683 	sctp_data_hdr_t	*sdh;
684 	int		msglen;
685 	int		extra;
686 	sctp_msg_hdr_t	*msg_hdr;
687 
688 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
689 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
690 		if (SCTP_IS_MSG_ABANDONED(meta) ||
691 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
692 			continue;
693 		}
694 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
695 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
696 				/*
697 				 * Use the same peer address to do fast
698 				 * retransmission.
699 				 */
700 				if (*fp == NULL) {
701 					*fp = SCTP_CHUNK_DEST(mp);
702 					if ((*fp)->state != SCTP_FADDRS_ALIVE)
703 						*fp = sctp->sctp_current;
704 				} else if (*fp != SCTP_CHUNK_DEST(mp)) {
705 					continue;
706 				}
707 
708 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
709 				msglen = ntohs(sdh->sdh_len);
710 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
711 					extra = SCTP_ALIGN - extra;
712 				}
713 
714 				/*
715 				 * We still return at least the first message
716 				 * even if that message cannot fit in as
717 				 * PMTU may have changed.
718 				 */
719 				if (*total + msglen + extra >
720 				    (*fp)->sfa_pmss && start_mp != NULL) {
721 					return (start_mp);
722 				}
723 				if ((nmp = dupmsg(mp)) == NULL)
724 					return (start_mp);
725 				if (extra > 0) {
726 					fill = sctp_get_padding(extra);
727 					if (fill != NULL) {
728 						linkb(nmp, fill);
729 					} else {
730 						return (start_mp);
731 					}
732 				}
733 				BUMP_MIB(&sctp_mib, sctpOutFastRetrans);
734 				SCTP_CHUNK_CLEAR_REXMIT(mp);
735 				if (start_mp == NULL) {
736 					start_mp = nmp;
737 				} else {
738 					linkb(end_mp, nmp);
739 				}
740 				end_mp = nmp;
741 				*total += msglen + extra;
742 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
743 				    "tsn %x\n", sdh->sdh_tsn));
744 			}
745 		}
746 	}
747 	/* Clear the flag as there is no more message to be fast rexmitted. */
748 	sctp->sctp_chk_fast_rexmit = B_FALSE;
749 	return (start_mp);
750 }
751 
752 /* A debug function just to make sure that a mblk chain is not broken */
753 #ifdef	DEBUG
754 static boolean_t
755 sctp_verify_chain(mblk_t *head, mblk_t *tail)
756 {
757 	mblk_t	*mp = head;
758 
759 	if (head == NULL || tail == NULL)
760 		return (B_TRUE);
761 	while (mp != NULL) {
762 		if (mp == tail)
763 			return (B_TRUE);
764 		mp = mp->b_next;
765 	}
766 	return (B_FALSE);
767 }
768 #endif
769 
770 /*
771  * Gets the next unsent chunk to transmit. Messages that are abandoned are
772  * skipped. A message can be abandoned if it has a non-zero timetolive and
773  * transmission has not yet started or if it is a partially reliable
774  * message and its time is up (assuming we are PR-SCTP aware).
775  * 'cansend' is used to determine if need to try and chunkify messages from
776  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
777  * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend'
778  * will be set to 0.
779  */
780 mblk_t *
781 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
782     int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp)
783 {
784 	mblk_t		*mp1;
785 	sctp_msg_hdr_t	*msg_hdr;
786 	mblk_t		*tmp_meta;
787 	sctp_faddr_t	*fp1;
788 
789 	ASSERT(error != NULL && mp != NULL);
790 	*error = 0;
791 
792 	ASSERT(sctp->sctp_current != NULL);
793 
794 chunkified:
795 	while (meta != NULL) {
796 		tmp_meta = meta->b_next;
797 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
798 		mp1 = meta->b_cont;
799 		if (SCTP_IS_MSG_ABANDONED(meta))
800 			goto next_msg;
801 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
802 			while (mp1 != NULL) {
803 				if (SCTP_CHUNK_CANSEND(mp1)) {
804 					*mp = mp1;
805 #ifdef	DEBUG
806 					ASSERT(sctp_verify_chain(
807 					    sctp->sctp_xmit_head, meta));
808 #endif
809 					return (meta);
810 				}
811 				mp1 = mp1->b_next;
812 			}
813 			goto next_msg;
814 		}
815 		/*
816 		 * If we come here and the first chunk is sent, then we
817 		 * we are PR-SCTP aware, in which case if the cumulative
818 		 * TSN has moved upto or beyond the first chunk (which
819 		 * means all the previous messages have been cumulative
820 		 * SACK'd), then we send a Forward TSN with the last
821 		 * chunk that was sent in this message. If we can't send
822 		 * a Forward TSN because previous non-abandoned messages
823 		 * have not been acked then we will defer the Forward TSN
824 		 * to sctp_rexmit() or sctp_cumack().
825 		 */
826 		if (SCTP_CHUNK_ISSENT(mp1)) {
827 			*error = sctp_check_abandoned_msg(sctp, meta);
828 			if (*error != 0) {
829 #ifdef	DEBUG
830 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
831 				    sctp->sctp_xmit_tail));
832 #endif
833 				return (NULL);
834 			}
835 			goto next_msg;
836 		}
837 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
838 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
839 		if (meta->b_prev == NULL) {
840 			ASSERT(sctp->sctp_xmit_head == meta);
841 			sctp->sctp_xmit_head = tmp_meta;
842 			if (sctp->sctp_xmit_tail == meta)
843 				sctp->sctp_xmit_tail = tmp_meta;
844 			meta->b_next = NULL;
845 			if (tmp_meta != NULL)
846 				tmp_meta->b_prev = NULL;
847 		} else if (meta->b_next == NULL) {
848 			if (sctp->sctp_xmit_tail == meta)
849 				sctp->sctp_xmit_tail = meta->b_prev;
850 			meta->b_prev->b_next = NULL;
851 			meta->b_prev = NULL;
852 		} else {
853 			meta->b_prev->b_next = tmp_meta;
854 			tmp_meta->b_prev = meta->b_prev;
855 			if (sctp->sctp_xmit_tail == meta)
856 				sctp->sctp_xmit_tail = tmp_meta;
857 			meta->b_prev = NULL;
858 			meta->b_next = NULL;
859 		}
860 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
861 		/*
862 		 * Update ULP the amount of queued data, which is
863 		 * sent-unack'ed + unsent.
864 		 */
865 		if (!SCTP_IS_DETACHED(sctp)) {
866 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
867 			    sctp->sctp_unacked + sctp->sctp_unsent);
868 		}
869 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
870 next_msg:
871 		meta = tmp_meta;
872 	}
873 	/* chunkify, if needed */
874 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
875 		ASSERT(sctp->sctp_unsent > 0);
876 		if (fp == NULL) {
877 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
878 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
879 				fp = sctp->sctp_current;
880 		} else {
881 			/*
882 			 * If user specified destination, try to honor that.
883 			 */
884 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
885 			if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
886 			    fp1 != fp) {
887 				goto chunk_done;
888 			}
889 		}
890 		sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend);
891 		if ((meta = sctp->sctp_xmit_tail) == NULL)
892 			goto chunk_done;
893 		/*
894 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
895 		 * new chunk(s) to the tail, so we need to skip the
896 		 * sctp_xmit_tail, which would have already been processed.
897 		 * This could happen when there is unacked chunks, but
898 		 * nothing new to send.
899 		 * When sctp_chunkify() is called when the transmit queue
900 		 * is empty then we need to start from sctp_xmit_tail.
901 		 */
902 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
903 #ifdef	DEBUG
904 			mp1 = sctp->sctp_xmit_tail->b_cont;
905 			while (mp1 != NULL) {
906 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
907 				mp1 = mp1->b_next;
908 			}
909 #endif
910 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
911 				goto chunk_done;
912 		}
913 		goto chunkified;
914 	}
915 chunk_done:
916 #ifdef	DEBUG
917 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
918 #endif
919 	return (NULL);
920 }
921 
922 void
923 sctp_fast_rexmit(sctp_t *sctp)
924 {
925 	mblk_t		*mp, *head;
926 	int		pktlen = 0;
927 	sctp_faddr_t	*fp = NULL;
928 
929 	ASSERT(sctp->sctp_xmit_head != NULL);
930 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
931 	if (mp == NULL)
932 		return;
933 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
934 		freemsg(mp);
935 		return;
936 	}
937 	if ((pktlen > fp->sfa_pmss) && fp->isv4) {
938 		ipha_t *iph = (ipha_t *)head->b_rptr;
939 
940 		iph->ipha_fragment_offset_and_flags = 0;
941 	}
942 
943 	sctp_set_iplen(sctp, head);
944 	sctp_add_sendq(sctp, head);
945 	sctp->sctp_active = fp->lastactive = lbolt64;
946 }
947 
948 void
949 sctp_output(sctp_t *sctp)
950 {
951 	mblk_t			*mp = NULL;
952 	mblk_t			*nmp;
953 	mblk_t			*head;
954 	mblk_t			*meta = sctp->sctp_xmit_tail;
955 	mblk_t			*fill = NULL;
956 	uint16_t 		chunklen;
957 	uint32_t 		cansend;
958 	int32_t			seglen;
959 	int32_t			xtralen;
960 	int32_t			sacklen;
961 	int32_t			pad = 0;
962 	int32_t			pathmax;
963 	int			extra;
964 	int64_t			now = lbolt64;
965 	sctp_faddr_t		*fp;
966 	sctp_faddr_t		*lfp;
967 	sctp_data_hdr_t		*sdc;
968 	int			error;
969 	boolean_t		notsent = B_TRUE;
970 
971 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
972 		sacklen = 0;
973 	} else {
974 		/* send a SACK chunk */
975 		sacklen = sizeof (sctp_chunk_hdr_t) +
976 		    sizeof (sctp_sack_chunk_t) +
977 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
978 		lfp = sctp->sctp_lastdata;
979 		ASSERT(lfp != NULL);
980 		if (lfp->state != SCTP_FADDRS_ALIVE)
981 			lfp = sctp->sctp_current;
982 	}
983 
984 	cansend = sctp->sctp_frwnd;
985 	if (sctp->sctp_unsent < cansend)
986 		cansend = sctp->sctp_unsent;
987 	if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
988 	    sctp->sctp_unacked &&
989 	    (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
990 	    !sctp->sctp_ndelay) {
991 		head = NULL;
992 		fp = sctp->sctp_current;
993 		goto unsent_data;
994 	}
995 	if (meta != NULL)
996 		mp = meta->b_cont;
997 	while (cansend > 0) {
998 		pad = 0;
999 
1000 		/*
1001 		 * Find first segment eligible for transmit.
1002 		 */
1003 		while (mp != NULL) {
1004 			if (SCTP_CHUNK_CANSEND(mp))
1005 				break;
1006 			mp = mp->b_next;
1007 		}
1008 		if (mp == NULL) {
1009 			meta = sctp_get_msg_to_send(sctp, &mp,
1010 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
1011 			    cansend, NULL);
1012 			if (error != 0 || meta == NULL) {
1013 				head = NULL;
1014 				fp = sctp->sctp_current;
1015 				goto unsent_data;
1016 			}
1017 			sctp->sctp_xmit_tail =  meta;
1018 		}
1019 
1020 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1021 		seglen = ntohs(sdc->sdh_len);
1022 		xtralen = sizeof (*sdc);
1023 		chunklen = seglen - xtralen;
1024 
1025 		/*
1026 		 * Check rwnd.
1027 		 */
1028 		if (chunklen > cansend) {
1029 			head = NULL;
1030 			fp = SCTP_CHUNK_DEST(meta);
1031 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1032 				fp = sctp->sctp_current;
1033 			goto unsent_data;
1034 		}
1035 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1036 			extra = SCTP_ALIGN - extra;
1037 
1038 		/*
1039 		 * Pick destination address, and check cwnd.
1040 		 */
1041 		if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
1042 		    (seglen + sacklen + extra <= lfp->sfa_pmss)) {
1043 			/*
1044 			 * Only include SACK chunk if it can be bundled
1045 			 * with a data chunk, and sent to sctp_lastdata.
1046 			 */
1047 			pathmax = lfp->cwnd - lfp->suna;
1048 
1049 			fp = lfp;
1050 			if ((nmp = dupmsg(mp)) == NULL) {
1051 				head = NULL;
1052 				goto unsent_data;
1053 			}
1054 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1055 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1056 			    &error);
1057 			if (head == NULL) {
1058 				/*
1059 				 * If none of the source addresses are
1060 				 * available (i.e error == EHOSTUNREACH),
1061 				 * pretend we have sent the data. We will
1062 				 * eventually time out trying to retramsmit
1063 				 * the data if the interface never comes up.
1064 				 * If we have already sent some stuff (i.e.,
1065 				 * notsent is B_FALSE) then we are fine, else
1066 				 * just mark this packet as sent.
1067 				 */
1068 				if (notsent && error == EHOSTUNREACH) {
1069 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1070 					    fp, chunklen, meta);
1071 				}
1072 				freemsg(nmp);
1073 				goto unsent_data;
1074 			}
1075 			seglen += sacklen;
1076 			xtralen += sacklen;
1077 			sacklen = 0;
1078 		} else {
1079 			fp = SCTP_CHUNK_DEST(meta);
1080 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1081 				fp = sctp->sctp_current;
1082 			/*
1083 			 * If we haven't sent data to this destination for
1084 			 * a while, do slow start again.
1085 			 */
1086 			if (now - fp->lastactive > fp->rto) {
1087 				fp->cwnd = sctp_slow_start_after_idle *
1088 				    fp->sfa_pmss;
1089 			}
1090 			fp->lastactive = now;
1091 
1092 			pathmax = fp->cwnd - fp->suna;
1093 			if (seglen + extra > pathmax) {
1094 				head = NULL;
1095 				goto unsent_data;
1096 			}
1097 			if ((nmp = dupmsg(mp)) == NULL) {
1098 				head = NULL;
1099 				goto unsent_data;
1100 			}
1101 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1102 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1103 			if (head == NULL) {
1104 				/*
1105 				 * If none of the source addresses are
1106 				 * available (i.e error == EHOSTUNREACH),
1107 				 * pretend we have sent the data. We will
1108 				 * eventually time out trying to retramsmit
1109 				 * the data if the interface never comes up.
1110 				 * If we have already sent some stuff (i.e.,
1111 				 * notsent is B_FALSE) then we are fine, else
1112 				 * just mark this packet as sent.
1113 				 */
1114 				if (notsent && error == EHOSTUNREACH) {
1115 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1116 					    fp, chunklen, meta);
1117 				}
1118 				freemsg(nmp);
1119 				goto unsent_data;
1120 			}
1121 		}
1122 		if (pathmax > fp->sfa_pmss)
1123 			pathmax = fp->sfa_pmss;
1124 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1125 		mp = mp->b_next;
1126 
1127 		/* Use this chunk to measure RTT? */
1128 		if (sctp->sctp_out_time == 0) {
1129 			sctp->sctp_out_time = now;
1130 			sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
1131 		}
1132 		if (extra > 0) {
1133 			fill = sctp_get_padding(extra);
1134 			if (fill != NULL) {
1135 				linkb(head, fill);
1136 				pad = extra;
1137 				seglen += extra;
1138 			} else {
1139 				goto unsent_data;
1140 			}
1141 		}
1142 		/* See if we can bundle more. */
1143 		while (seglen < pathmax) {
1144 			int32_t		new_len;
1145 			int32_t		new_xtralen;
1146 
1147 			while (mp != NULL) {
1148 				if (SCTP_CHUNK_CANSEND(mp))
1149 					break;
1150 				mp = mp->b_next;
1151 			}
1152 			if (mp == NULL) {
1153 				meta = sctp_get_msg_to_send(sctp, &mp,
1154 				    meta->b_next, &error, seglen,
1155 				    (seglen - xtralen) >= cansend ? 0 :
1156 				    cansend - seglen, fp);
1157 				if (error != 0 || meta == NULL)
1158 					break;
1159 				sctp->sctp_xmit_tail =  meta;
1160 			}
1161 			ASSERT(mp != NULL);
1162 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1163 			    fp != SCTP_CHUNK_DEST(meta)) {
1164 				break;
1165 			}
1166 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1167 			chunklen = ntohs(sdc->sdh_len);
1168 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
1169 				extra = SCTP_ALIGN - extra;
1170 
1171 			new_len = seglen + chunklen;
1172 			new_xtralen = xtralen + sizeof (*sdc);
1173 			chunklen -= sizeof (*sdc);
1174 
1175 			if (new_len - new_xtralen > cansend ||
1176 			    new_len + extra > pathmax) {
1177 				break;
1178 			}
1179 			if ((nmp = dupmsg(mp)) == NULL)
1180 				break;
1181 			if (extra > 0) {
1182 				fill = sctp_get_padding(extra);
1183 				if (fill != NULL) {
1184 					pad += extra;
1185 					new_len += extra;
1186 					linkb(nmp, fill);
1187 				} else {
1188 					freemsg(nmp);
1189 					break;
1190 				}
1191 			}
1192 			seglen = new_len;
1193 			xtralen = new_xtralen;
1194 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1195 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1196 			linkb(head, nmp);
1197 			mp = mp->b_next;
1198 		}
1199 		if ((seglen > fp->sfa_pmss) && fp->isv4) {
1200 			ipha_t *iph = (ipha_t *)head->b_rptr;
1201 
1202 			/*
1203 			 * Path MTU is different from what we thought it would
1204 			 * be when we created chunks, or IP headers have grown.
1205 			 * Need to clear the DF bit.
1206 			 */
1207 			iph->ipha_fragment_offset_and_flags = 0;
1208 		}
1209 		/* xmit segment */
1210 		ASSERT(cansend >= seglen - pad - xtralen);
1211 		cansend -= (seglen - pad - xtralen);
1212 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1213 			"ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1214 			seglen - xtralen, ntohl(sdc->sdh_tsn),
1215 			ntohs(sdc->sdh_ssn), fp, sctp->sctp_frwnd, cansend,
1216 			sctp->sctp_lastack_rxd));
1217 		sctp_set_iplen(sctp, head);
1218 		sctp_add_sendq(sctp, head);
1219 		/* arm rto timer (if not set) */
1220 		if (!fp->timer_running)
1221 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1222 		notsent = B_FALSE;
1223 	}
1224 	sctp->sctp_active = now;
1225 	return;
1226 unsent_data:
1227 	/* arm persist timer (if rto timer not set) */
1228 	if (!fp->timer_running)
1229 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1230 	if (head != NULL)
1231 		freemsg(head);
1232 }
1233 
1234 /*
1235  * The following two functions initialize and destroy the cache
1236  * associated with the sets used for PR-SCTP.
1237  */
1238 void
1239 sctp_ftsn_sets_init(void)
1240 {
1241 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1242 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1243 	    NULL, 0);
1244 }
1245 
1246 void
1247 sctp_ftsn_sets_fini(void)
1248 {
1249 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1250 }
1251 
1252 
1253 /* Free PR-SCTP sets */
1254 void
1255 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1256 {
1257 	sctp_ftsn_set_t *p;
1258 
1259 	while (s != NULL) {
1260 		p = s->next;
1261 		s->next = NULL;
1262 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1263 		s = p;
1264 	}
1265 }
1266 
1267 /*
1268  * Given a message meta block, meta, this routine creates or modifies
1269  * the set that will be used to generate a Forward TSN chunk. If the
1270  * entry for stream id, sid, for this message already exists, the
1271  * sequence number, ssn, is updated if it is greater than the existing
1272  * one. If an entry for this sid does not exist, one is created if
1273  * the size does not exceed fp->sfa_pmss. We return false in case
1274  * or an error.
1275  */
1276 boolean_t
1277 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1278     uint_t *nsets, uint32_t *slen)
1279 {
1280 	sctp_ftsn_set_t		*p;
1281 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1282 	uint16_t		sid = htons(msg_hdr->smh_sid);
1283 	/* msg_hdr->smh_ssn is already in NBO */
1284 	uint16_t		ssn = msg_hdr->smh_ssn;
1285 
1286 	ASSERT(s != NULL && nsets != NULL);
1287 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1288 
1289 	if (*s == NULL) {
1290 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
1291 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1292 		if (*s == NULL)
1293 			return (B_FALSE);
1294 		(*s)->ftsn_entries.ftsn_sid = sid;
1295 		(*s)->ftsn_entries.ftsn_ssn = ssn;
1296 		(*s)->next = NULL;
1297 		*nsets = 1;
1298 		*slen += sizeof (uint32_t);
1299 		return (B_TRUE);
1300 	}
1301 	for (p = *s; p->next != NULL; p = p->next) {
1302 		if (p->ftsn_entries.ftsn_sid == sid) {
1303 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1304 				p->ftsn_entries.ftsn_ssn = ssn;
1305 			return (B_TRUE);
1306 		}
1307 	}
1308 	/* the last one */
1309 	if (p->ftsn_entries.ftsn_sid == sid) {
1310 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1311 			p->ftsn_entries.ftsn_ssn = ssn;
1312 	} else {
1313 		if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
1314 			return (B_FALSE);
1315 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1316 		    KM_NOSLEEP);
1317 		if (p->next == NULL)
1318 			return (B_FALSE);
1319 		p = p->next;
1320 		p->ftsn_entries.ftsn_sid = sid;
1321 		p->ftsn_entries.ftsn_ssn = ssn;
1322 		p->next = NULL;
1323 		(*nsets)++;
1324 		*slen += sizeof (uint32_t);
1325 	}
1326 	return (B_TRUE);
1327 }
1328 
1329 /*
1330  * Given a set of stream id - sequence number pairs, this routing creates
1331  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1332  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1333  * will add the IP/SCTP header.
1334  */
1335 mblk_t *
1336 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1337     uint_t nsets, uint32_t seglen)
1338 {
1339 	mblk_t			*ftsn_mp;
1340 	sctp_chunk_hdr_t	*ch_hdr;
1341 	uint32_t		*advtsn;
1342 	uint16_t		schlen;
1343 	size_t			xtralen;
1344 	ftsn_entry_t		*ftsn_entry;
1345 
1346 	seglen += sizeof (sctp_chunk_hdr_t);
1347 	if (fp->isv4)
1348 		xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra;
1349 	else
1350 		xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra;
1351 	ftsn_mp = allocb(xtralen + seglen, BPRI_MED);
1352 	if (ftsn_mp == NULL)
1353 		return (NULL);
1354 	ftsn_mp->b_rptr += xtralen;
1355 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1356 
1357 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1358 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1359 	ch_hdr->sch_flags = 0;
1360 	/*
1361 	 * The cast here should not be an issue since seglen is
1362 	 * the length of the Forward TSN chunk.
1363 	 */
1364 	schlen = (uint16_t)seglen;
1365 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1366 
1367 	advtsn = (uint32_t *)(ch_hdr + 1);
1368 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1369 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1370 	while (nsets > 0) {
1371 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1372 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1373 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1374 		ftsn_entry++;
1375 		sets = sets->next;
1376 		nsets--;
1377 	}
1378 	return (ftsn_mp);
1379 }
1380 
1381 /*
1382  * Given a starting message, the routine steps through all the
1383  * messages whose TSN is less than sctp->sctp_adv_pap and creates
1384  * ftsn sets. The ftsn sets is then used to create an Forward TSN
1385  * chunk. All the messages, that have chunks that are included in the
1386  * ftsn sets, are flagged abandonded. If a message is partially sent
1387  * and is deemed abandoned, all remaining unsent chunks are marked
1388  * abandoned and are deducted from sctp_unsent.
1389  */
1390 void
1391 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1392     sctp_faddr_t *fp, uint32_t *seglen)
1393 {
1394 	mblk_t		*mp1 = mp;
1395 	mblk_t		*mp_head = mp;
1396 	mblk_t		*meta_head = meta;
1397 	mblk_t		*head;
1398 	sctp_ftsn_set_t	*sets = NULL;
1399 	uint_t		nsets = 0;
1400 	uint16_t	clen;
1401 	sctp_data_hdr_t	*sdc;
1402 	uint32_t	sacklen;
1403 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1404 	uint32_t	unsent = 0;
1405 	boolean_t	ubit;
1406 
1407 	*seglen = sizeof (uint32_t);
1408 
1409 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1410 	while (meta != NULL &&
1411 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1412 		/*
1413 		 * Skip adding FTSN sets for un-ordered messages as they do
1414 		 * not have SSNs.
1415 		 */
1416 		ubit = SCTP_DATA_GET_UBIT(sdc);
1417 		if (!ubit &&
1418 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1419 			meta = NULL;
1420 			sctp->sctp_adv_pap = adv_pap;
1421 			goto ftsn_done;
1422 		}
1423 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1424 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1425 			adv_pap = ntohl(sdc->sdh_tsn);
1426 			mp1 = mp1->b_next;
1427 		}
1428 		meta = meta->b_next;
1429 		if (meta != NULL) {
1430 			mp1 = meta->b_cont;
1431 			if (!SCTP_CHUNK_ISSENT(mp1))
1432 				break;
1433 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1434 		}
1435 	}
1436 ftsn_done:
1437 	/*
1438 	 * Can't compare with sets == NULL, since we don't add any
1439 	 * sets for un-ordered messages.
1440 	 */
1441 	if (meta == meta_head)
1442 		return;
1443 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1444 	sctp_free_ftsn_set(sets);
1445 	if (*nmp == NULL)
1446 		return;
1447 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1448 		sacklen = 0;
1449 	} else {
1450 		sacklen = sizeof (sctp_chunk_hdr_t) +
1451 		    sizeof (sctp_sack_chunk_t) +
1452 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1453 		if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1454 			/* piggybacked SACK doesn't fit */
1455 			sacklen = 0;
1456 		} else {
1457 			fp = sctp->sctp_lastdata;
1458 		}
1459 	}
1460 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1461 	if (head == NULL) {
1462 		freemsg(*nmp);
1463 		*nmp = NULL;
1464 		return;
1465 	}
1466 	*seglen += sacklen;
1467 	*nmp = head;
1468 
1469 	/*
1470 	 * XXXNeed to optimise this, the reason it is done here is so
1471 	 * that we don't have to undo in case of failure.
1472 	 */
1473 	mp1 = mp_head;
1474 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1475 	while (meta_head != NULL &&
1476 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1477 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
1478 			SCTP_MSG_SET_ABANDONED(meta_head);
1479 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1480 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1481 			if (!SCTP_CHUNK_ISACKED(mp1)) {
1482 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1483 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1484 				    meta_head);
1485 			}
1486 			mp1 = mp1->b_next;
1487 		}
1488 		while (mp1 != NULL) {
1489 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1490 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
1491 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1492 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1493 				SCTP_ABANDON_CHUNK(mp1);
1494 			}
1495 			mp1 = mp1->b_next;
1496 		}
1497 		meta_head = meta_head->b_next;
1498 		if (meta_head != NULL) {
1499 			mp1 = meta_head->b_cont;
1500 			if (!SCTP_CHUNK_ISSENT(mp1))
1501 				break;
1502 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1503 		}
1504 	}
1505 	if (unsent > 0) {
1506 		ASSERT(sctp->sctp_unsent >= unsent);
1507 		sctp->sctp_unsent -= unsent;
1508 		/*
1509 		 * Update ULP the amount of queued data, which is
1510 		 * sent-unack'ed + unsent.
1511 		 */
1512 		if (!SCTP_IS_DETACHED(sctp)) {
1513 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
1514 			    sctp->sctp_unacked + sctp->sctp_unsent);
1515 		}
1516 	}
1517 }
1518 
1519 /*
1520  * This function steps through messages starting at meta and checks if
1521  * the message is abandoned. It stops when it hits an unsent chunk or
1522  * a message that has all its chunk acked. This is the only place
1523  * where the sctp_adv_pap is moved forward to indicated abandoned
1524  * messages.
1525  */
1526 void
1527 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1528 {
1529 	uint32_t	tsn = sctp->sctp_adv_pap;
1530 	sctp_data_hdr_t	*sdc;
1531 	sctp_msg_hdr_t	*msg_hdr;
1532 
1533 	ASSERT(mp != NULL);
1534 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1535 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1536 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1537 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
1538 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1539 		return;
1540 	}
1541 	while (meta != NULL) {
1542 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1543 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1544 			tsn = ntohl(sdc->sdh_tsn);
1545 			mp = mp->b_next;
1546 		}
1547 		if (mp != NULL)
1548 			break;
1549 		/*
1550 		 * We continue checking for successive messages only if there
1551 		 * is a chunk marked for retransmission. Else, we might
1552 		 * end up sending FTSN prematurely for chunks that have been
1553 		 * sent, but not yet acked.
1554 		 */
1555 		if ((meta = meta->b_next) != NULL) {
1556 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1557 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
1558 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1559 				break;
1560 			}
1561 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1562 				if (!SCTP_CHUNK_ISSENT(mp)) {
1563 					sctp->sctp_adv_pap = tsn;
1564 					return;
1565 				}
1566 				if (SCTP_CHUNK_WANT_REXMIT(mp))
1567 					break;
1568 			}
1569 			if (mp == NULL)
1570 				break;
1571 		}
1572 	}
1573 	sctp->sctp_adv_pap = tsn;
1574 }
1575 
1576 /*
1577  * Retransmit first segment which hasn't been acked with cumtsn or send
1578  * a Forward TSN chunk, if appropriate.
1579  */
1580 void
1581 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1582 {
1583 	mblk_t		*mp;
1584 	mblk_t		*nmp = NULL;
1585 	mblk_t		*head;
1586 	mblk_t		*meta = sctp->sctp_xmit_head;
1587 	mblk_t		*fill;
1588 	uint32_t	seglen = 0;
1589 	uint32_t	sacklen;
1590 	uint16_t	chunklen;
1591 	int		extra;
1592 	sctp_data_hdr_t	*sdc;
1593 	sctp_faddr_t	*fp;
1594 	int		error;
1595 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1596 	boolean_t	do_ftsn = B_FALSE;
1597 	boolean_t	ftsn_check = B_TRUE;
1598 
1599 	while (meta != NULL) {
1600 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1601 			uint32_t	tsn;
1602 
1603 			if (!SCTP_CHUNK_ISSENT(mp))
1604 				goto window_probe;
1605 			/*
1606 			 * We break in the following cases -
1607 			 *
1608 			 *	if the advanced peer ack point includes the next
1609 			 *	chunk to be retransmited - possibly the Forward
1610 			 * 	TSN was lost.
1611 			 *
1612 			 *	if we are PRSCTP aware and the next chunk to be
1613 			 *	retransmitted is now abandoned
1614 			 *
1615 			 *	if the next chunk to be retransmitted is for
1616 			 *	the dest on which the timer went off. (this
1617 			 *	message is not abandoned).
1618 			 *
1619 			 * We check for Forward TSN only for the first
1620 			 * eligible chunk to be retransmitted. The reason
1621 			 * being if the first eligible chunk is skipped (say
1622 			 * it was sent to a destination other than oldfp)
1623 			 * then we cannot advance the cum TSN via Forward
1624 			 * TSN chunk.
1625 			 *
1626 			 * Also, ftsn_check is B_TRUE only for the first
1627 			 * eligible chunk, it  will be B_FALSE for all
1628 			 * subsequent candidate messages for retransmission.
1629 			 */
1630 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1631 			tsn = ntohl(sdc->sdh_tsn);
1632 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1633 				if (sctp->sctp_prsctp_aware && ftsn_check) {
1634 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1635 						ASSERT(sctp->sctp_prsctp_aware);
1636 						do_ftsn = B_TRUE;
1637 						goto out;
1638 					} else {
1639 						sctp_check_adv_ack_pt(sctp,
1640 						    meta, mp);
1641 						if (SEQ_GT(sctp->sctp_adv_pap,
1642 						    adv_pap)) {
1643 							do_ftsn = B_TRUE;
1644 							goto out;
1645 						}
1646 					}
1647 					ftsn_check = B_FALSE;
1648 				}
1649 				if (SCTP_CHUNK_DEST(mp) == oldfp)
1650 					goto out;
1651 			}
1652 		}
1653 		meta = meta->b_next;
1654 		if (meta != NULL && sctp->sctp_prsctp_aware) {
1655 			sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1656 
1657 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1658 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1659 				meta = meta->b_next;
1660 			}
1661 		}
1662 	}
1663 window_probe:
1664 	/*
1665 	 * Retransmit fired for a destination which didn't have
1666 	 * any unacked data pending.
1667 	 */
1668 	if (!sctp->sctp_unacked && sctp->sctp_unsent) {
1669 		/*
1670 		 * Send a window probe. Inflate frwnd to allow
1671 		 * sending one segment.
1672 		 */
1673 		if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) {
1674 			sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
1675 		}
1676 		BUMP_MIB(&sctp_mib, sctpOutWinProbe);
1677 		sctp_output(sctp);
1678 	}
1679 	return;
1680 out:
1681 	/*
1682 	 * Enter slowstart for this destination
1683 	 */
1684 	oldfp->ssthresh = oldfp->cwnd / 2;
1685 	if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
1686 		oldfp->ssthresh = 2 * oldfp->sfa_pmss;
1687 	oldfp->cwnd = oldfp->sfa_pmss;
1688 	oldfp->pba = 0;
1689 	fp = sctp_rotate_faddr(sctp, oldfp);
1690 	ASSERT(fp != NULL);
1691 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1692 
1693 	if (do_ftsn) {
1694 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1695 		if (nmp == NULL) {
1696 			sctp->sctp_adv_pap = adv_pap;
1697 			goto restart_timer;
1698 		}
1699 		head = nmp;
1700 		mp = NULL;
1701 		meta = sctp->sctp_xmit_tail;
1702 		if (meta != NULL)
1703 			mp = meta->b_cont;
1704 		goto try_bundle;
1705 	}
1706 	seglen = ntohs(sdc->sdh_len);
1707 	chunklen = seglen - sizeof (*sdc);
1708 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1709 		extra = SCTP_ALIGN - extra;
1710 
1711 	/*
1712 	 * Cancel RTT measurement if the retransmitted TSN is before the
1713 	 * TSN used for timimg.
1714 	 */
1715 	if (sctp->sctp_out_time != 0 &&
1716 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1717 		sctp->sctp_out_time = 0;
1718 	}
1719 	/* Clear the counter as the RTT calculation may be off. */
1720 	fp->rtt_updates = 0;
1721 
1722 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1723 		sacklen = 0;
1724 	} else {
1725 		sacklen = sizeof (sctp_chunk_hdr_t) +
1726 		    sizeof (sctp_sack_chunk_t) +
1727 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1728 		if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1729 			/* piggybacked SACK doesn't fit */
1730 			sacklen = 0;
1731 		} else {
1732 			fp = sctp->sctp_lastdata;
1733 		}
1734 	}
1735 
1736 	nmp = dupmsg(mp);
1737 	if (nmp == NULL)
1738 		goto restart_timer;
1739 	if (extra > 0) {
1740 		fill = sctp_get_padding(extra);
1741 		if (fill != NULL) {
1742 			linkb(nmp, fill);
1743 			seglen += extra;
1744 		} else {
1745 			freemsg(nmp);
1746 			goto restart_timer;
1747 		}
1748 	}
1749 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
1750 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1751 	if (head == NULL) {
1752 		freemsg(nmp);
1753 		goto restart_timer;
1754 	}
1755 	seglen += sacklen;
1756 
1757 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1758 
1759 	mp = mp->b_next;
1760 try_bundle:
1761 	while (seglen < fp->sfa_pmss) {
1762 		int32_t new_len;
1763 
1764 		while (mp != NULL) {
1765 			if (SCTP_CHUNK_CANSEND(mp))
1766 				break;
1767 			mp = mp->b_next;
1768 		}
1769 		if (mp == NULL) {
1770 			meta = sctp_get_msg_to_send(sctp, &mp, meta->b_next,
1771 			    &error, 0, 0, oldfp);
1772 			if (error != 0 || meta == NULL)
1773 				break;
1774 			ASSERT(mp != NULL);
1775 			sctp->sctp_xmit_tail = meta;
1776 		}
1777 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1778 		chunklen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1779 		new_len = seglen + ntohs(sdc->sdh_len);
1780 
1781 		if (seglen & (SCTP_ALIGN - 1)) {
1782 			extra = SCTP_ALIGN - (seglen & (SCTP_ALIGN - 1));
1783 
1784 			if (new_len + extra > fp->sfa_pmss) {
1785 				break;
1786 			}
1787 			fill = sctp_get_padding(extra);
1788 			if (fill != NULL) {
1789 				new_len += extra;
1790 				linkb(head, fill);
1791 			} else {
1792 				break;
1793 			}
1794 		} else {
1795 			if (new_len > fp->sfa_pmss) {
1796 				break;
1797 			}
1798 		}
1799 		if ((nmp = dupmsg(mp)) == NULL) {
1800 			break;
1801 		}
1802 		seglen = new_len;
1803 
1804 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
1805 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1806 		linkb(head, nmp);
1807 		mp = mp->b_next;
1808 	}
1809 	if ((seglen > fp->sfa_pmss) && fp->isv4) {
1810 		ipha_t *iph = (ipha_t *)head->b_rptr;
1811 
1812 		/*
1813 		 * Path MTU is different from path we thought it would
1814 		 * be when we created chunks, or IP headers have grown.
1815 		 * Need to clear the DF bit.
1816 		 */
1817 		iph->ipha_fragment_offset_and_flags = 0;
1818 	}
1819 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
1820 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
1821 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), fp,
1822 	    sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
1823 
1824 	sctp_set_iplen(sctp, head);
1825 	sctp_add_sendq(sctp, head);
1826 
1827 	/*
1828 	 * Restart timer with exponential backoff
1829 	 */
1830 restart_timer:
1831 	oldfp->strikes++;
1832 	sctp->sctp_strikes++;
1833 	SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
1834 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1835 	if (oldfp->suna != 0)
1836 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
1837 	sctp->sctp_active = lbolt64;
1838 }
1839 
1840 /*
1841  * The SCTP write put procedure called from IP.
1842  */
1843 void
1844 sctp_wput(queue_t *q, mblk_t *mp)
1845 {
1846 	uchar_t		*rptr;
1847 	t_scalar_t	type;
1848 
1849 	switch (mp->b_datap->db_type) {
1850 	case M_IOCTL:
1851 		sctp_wput_ioctl(q, mp);
1852 		break;
1853 	case M_DATA:
1854 		/* Should be handled in sctp_output() */
1855 		ASSERT(0);
1856 		freemsg(mp);
1857 		break;
1858 	case M_PROTO:
1859 	case M_PCPROTO:
1860 		rptr = mp->b_rptr;
1861 		if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
1862 			type = ((union T_primitives *)rptr)->type;
1863 			/*
1864 			 * There is no "standard" way on how to respond
1865 			 * to T_CAPABILITY_REQ if a module does not
1866 			 * understand it.  And the current TI mod
1867 			 * has problems handling an error ack.  So we
1868 			 * catch the request here and reply with a response
1869 			 * which the TI mod knows how to respond to.
1870 			 */
1871 			switch (type) {
1872 			case T_CAPABILITY_REQ:
1873 				(void) putnextctl1(RD(q), M_ERROR, EPROTO);
1874 				break;
1875 			default:
1876 				if ((mp = mi_tpi_err_ack_alloc(mp,
1877 				    TNOTSUPPORT, 0)) != NULL) {
1878 					qreply(q, mp);
1879 					return;
1880 				}
1881 			}
1882 		}
1883 		/* FALLTHRU */
1884 	default:
1885 		freemsg(mp);
1886 		return;
1887 	}
1888 }
1889