xref: /titanic_41/usr/src/uts/common/inet/sctp/sctp_output.c (revision 0b6016e6ff70af39f99c9cc28e0c2207c8f5413c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/cmn_err.h>
33 #define	_SUN_TPI_VERSION 2
34 #include <sys/tihdr.h>
35 #include <sys/socket.h>
36 #include <sys/stropts.h>
37 #include <sys/strsun.h>
38 #include <sys/strsubr.h>
39 #include <sys/socketvar.h>
40 /* swilly code in sys/socketvar.h turns off DEBUG */
41 #ifdef __lint
42 #define	DEBUG
43 #endif
44 
45 #include <inet/common.h>
46 #include <inet/mi.h>
47 #include <inet/ip.h>
48 #include <inet/ip6.h>
49 #include <inet/sctp_ip.h>
50 #include <inet/ipclassifier.h>
51 
52 /*
53  * PR-SCTP comments.
54  *
55  * A message can expire before it gets to the transmit list (i.e. it is still
56  * in the unsent list - unchunked), after it gets to the transmit list, but
57  * before transmission has actually started, or after transmission has begun.
58  * Accordingly, we check for the status of a message in sctp_chunkify() when
59  * the message is being transferred from the unsent list to the transmit list;
60  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
61  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
62  * When we nuke a message in sctp_chunkify(), all we need to do is take it
63  * out of the unsent list and update sctp_unsent; when a message is deemed
64  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
65  * list, update sctp_unsent IFF transmission for the message has not yet begun
66  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
67  * message has started, then we cannot just take it out of the list, we need
68  * to send Forward TSN chunk to the peer so that the peer can clear its
69  * fragment list for this message. However, we cannot just send the Forward
70  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
71  * messages preceeding this abandoned message. So, we send a Forward TSN
72  * IFF all messages prior to this abandoned message has been SACKd, if not
73  * we defer sending the Forward TSN to sctp_cumack(), which will check for
74  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
75  * sctp_rexmit() when we check for retransmissions, we need to determine if
76  * the advanced peer ack point can be moved ahead, and if so, send a Forward
77  * TSN to the peer instead of retransmitting the chunk. Note that when
78  * we send a Forward TSN for a message, there may be yet unsent chunks for
79  * this message; we need to mark all such chunks as abandoned, so that
80  * sctp_cumack() can take the message out of the transmit list, additionally
81  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
82  * decremented when a message/chunk is deemed abandoned), sockfs needs to
83  * be notified so that it can adjust its idea of the queued message.
84  */
85 
86 #include "sctp_impl.h"
87 
88 static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
89 
90 /* Padding mblk for SCTP chunks. */
91 mblk_t *sctp_pad_mp;
92 
93 #ifdef	DEBUG
94 static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
95 #endif
96 
97 /*
98  * Called to allocate a header mblk when sending data to SCTP.
99  * Data will follow in b_cont of this mblk.
100  */
101 mblk_t *
102 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
103     int flags)
104 {
105 	mblk_t *mp;
106 	struct T_unitdata_req *tudr;
107 	size_t size;
108 	int error;
109 
110 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
111 	size = MAX(size, sizeof (sctp_msg_hdr_t));
112 	if (flags & SCTP_CAN_BLOCK) {
113 		mp = allocb_wait(size, BPRI_MED, 0, &error);
114 	} else {
115 		mp = allocb(size, BPRI_MED);
116 	}
117 	if (mp) {
118 		tudr = (struct T_unitdata_req *)mp->b_rptr;
119 		tudr->PRIM_type = T_UNITDATA_REQ;
120 		tudr->DEST_length = nlen;
121 		tudr->DEST_offset = sizeof (*tudr);
122 		tudr->OPT_length = clen;
123 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
124 		    _TPI_ALIGN_TOPT(nlen));
125 		if (nlen > 0)
126 			bcopy(name, tudr + 1, nlen);
127 		if (clen > 0)
128 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
129 		mp->b_wptr += (tudr ->OPT_offset + clen);
130 		mp->b_datap->db_type = M_PROTO;
131 	}
132 	return (mp);
133 }
134 
135 /*ARGSUSED2*/
136 int
137 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
138 {
139 	sctp_faddr_t	*fp = NULL;
140 	struct T_unitdata_req	*tudr;
141 	int		error = 0;
142 	mblk_t		*mproto = mp;
143 	in6_addr_t	*addr;
144 	in6_addr_t	tmpaddr;
145 	uint16_t	sid = sctp->sctp_def_stream;
146 	uint32_t	ppid = sctp->sctp_def_ppid;
147 	uint32_t	context = sctp->sctp_def_context;
148 	uint16_t	msg_flags = sctp->sctp_def_flags;
149 	sctp_msg_hdr_t	*sctp_msg_hdr;
150 	uint32_t	msg_len = 0;
151 	uint32_t	timetolive = sctp->sctp_def_timetolive;
152 
153 	ASSERT(DB_TYPE(mproto) == M_PROTO);
154 
155 	mp = mp->b_cont;
156 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
157 
158 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
159 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
160 
161 	/* Get destination address, if specified */
162 	if (tudr->DEST_length > 0) {
163 		sin_t *sin;
164 		sin6_t *sin6;
165 
166 		sin = (struct sockaddr_in *)
167 		    (mproto->b_rptr + tudr->DEST_offset);
168 		switch (sin->sin_family) {
169 		case AF_INET:
170 			if (tudr->DEST_length < sizeof (*sin)) {
171 				return (EINVAL);
172 			}
173 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
174 			addr = &tmpaddr;
175 			break;
176 		case AF_INET6:
177 			if (tudr->DEST_length < sizeof (*sin6)) {
178 				return (EINVAL);
179 			}
180 			sin6 = (struct sockaddr_in6 *)
181 			    (mproto->b_rptr + tudr->DEST_offset);
182 			addr = &sin6->sin6_addr;
183 			break;
184 		default:
185 			return (EAFNOSUPPORT);
186 		}
187 		fp = sctp_lookup_faddr(sctp, addr);
188 		if (fp == NULL) {
189 			return (EINVAL);
190 		}
191 	}
192 	/* Ancillary Data? */
193 	if (tudr->OPT_length > 0) {
194 		struct cmsghdr		*cmsg;
195 		char			*cend;
196 		struct sctp_sndrcvinfo	*sndrcv;
197 
198 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
199 		cend = ((char *)cmsg + tudr->OPT_length);
200 		ASSERT(cend <= (char *)mproto->b_wptr);
201 
202 		for (;;) {
203 			if ((char *)(cmsg + 1) > cend ||
204 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
205 				break;
206 			}
207 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
208 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
209 				if (cmsg->cmsg_len <
210 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
211 					return (EINVAL);
212 				}
213 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
214 				sid = sndrcv->sinfo_stream;
215 				msg_flags = sndrcv->sinfo_flags;
216 				ppid = sndrcv->sinfo_ppid;
217 				context = sndrcv->sinfo_context;
218 				timetolive = sndrcv->sinfo_timetolive;
219 				break;
220 			}
221 			if (cmsg->cmsg_len > 0)
222 				cmsg = CMSG_NEXT(cmsg);
223 			else
224 				break;
225 		}
226 	}
227 	if (msg_flags & MSG_ABORT) {
228 		if (mp && mp->b_cont) {
229 			mblk_t *pump = msgpullup(mp, -1);
230 			if (!pump) {
231 				return (ENOMEM);
232 			}
233 			freemsg(mp);
234 			mp = pump;
235 			mproto->b_cont = mp;
236 		}
237 		RUN_SCTP(sctp);
238 		sctp_user_abort(sctp, mp, B_TRUE);
239 		sctp_clean_death(sctp, ECONNRESET);
240 		freemsg(mproto);
241 		goto process_sendq;
242 	}
243 	if (mp == NULL)
244 		goto done;
245 
246 	RUN_SCTP(sctp);
247 
248 	/* Reject any new data requests if we are shutting down */
249 	if (sctp->sctp_state > SCTPS_ESTABLISHED) {
250 		error = EPIPE;
251 		goto unlock_done;
252 	}
253 
254 	/* Re-use the mproto to store relevant info. */
255 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
256 
257 	mproto->b_rptr = mproto->b_datap->db_base;
258 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
259 
260 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
261 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
262 	sctp_msg_hdr->smh_context = context;
263 	sctp_msg_hdr->smh_sid = sid;
264 	sctp_msg_hdr->smh_ppid = ppid;
265 	sctp_msg_hdr->smh_flags = msg_flags;
266 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
267 	sctp_msg_hdr->smh_tob = lbolt64;
268 	for (; mp != NULL; mp = mp->b_cont)
269 		msg_len += MBLKL(mp);
270 	sctp_msg_hdr->smh_msglen = msg_len;
271 
272 	/* User requested specific destination */
273 	SCTP_SET_CHUNK_DEST(mproto, fp);
274 
275 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
276 	    sid >= sctp->sctp_num_ostr) {
277 		/* Send sendfail event */
278 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
279 		    B_FALSE);
280 		error = EINVAL;
281 		goto unlock_done;
282 	}
283 
284 	/* no data */
285 	if (msg_len == 0) {
286 		sctp_sendfail_event(sctp, dupmsg(mproto),
287 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
288 		error = EINVAL;
289 		goto unlock_done;
290 	}
291 
292 	/* Add it to the unsent list */
293 	if (sctp->sctp_xmit_unsent == NULL) {
294 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
295 	} else {
296 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
297 		sctp->sctp_xmit_unsent_tail = mproto;
298 	}
299 	sctp->sctp_unsent += msg_len;
300 	BUMP_LOCAL(sctp->sctp_msgcount);
301 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
302 		sctp_output(sctp);
303 process_sendq:
304 	WAKE_SCTP(sctp);
305 	sctp_process_sendq(sctp);
306 	return (0);
307 unlock_done:
308 	WAKE_SCTP(sctp);
309 done:
310 	return (error);
311 }
312 
313 void
314 sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send)
315 {
316 	mblk_t			*mp;
317 	mblk_t			*chunk_mp;
318 	mblk_t			*chunk_head;
319 	mblk_t			*chunk_hdr;
320 	mblk_t			*chunk_tail = NULL;
321 	int			count;
322 	int			chunksize;
323 	sctp_data_hdr_t		*sdc;
324 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
325 	sctp_faddr_t		*fp;
326 	sctp_faddr_t		*fp1;
327 	size_t			xtralen;
328 	sctp_msg_hdr_t		*msg_hdr;
329 
330 	fp = SCTP_CHUNK_DEST(mdblk);
331 	if (fp == NULL)
332 		fp = sctp->sctp_current;
333 	if (fp->isv4)
334 		xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra + sizeof (*sdc);
335 	else
336 		xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra + sizeof (*sdc);
337 	count = chunksize = first_len - sizeof (*sdc);
338 nextmsg:
339 	chunk_mp = mdblk->b_cont;
340 
341 	/*
342 	 * If this partially chunked, we ignore the first_len for now
343 	 * and use the one already present. For the unchunked bits, we
344 	 * use the length of the last chunk.
345 	 */
346 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
347 		int	chunk_len;
348 
349 		ASSERT(chunk_mp->b_next != NULL);
350 		mdblk->b_cont = chunk_mp->b_next;
351 		chunk_mp->b_next = NULL;
352 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
353 		mp = mdblk->b_cont;
354 		while (mp->b_next != NULL)
355 			mp = mp->b_next;
356 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
357 		if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
358 			count = chunksize = fp->sfa_pmss - chunk_len;
359 		else
360 			count = chunksize = fp->sfa_pmss;
361 		count = chunksize = count - sizeof (*sdc);
362 	} else {
363 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
364 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
365 			sctp->sctp_xmit_unsent = mdblk->b_next;
366 			if (sctp->sctp_xmit_unsent == NULL)
367 				sctp->sctp_xmit_unsent_tail = NULL;
368 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
369 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
370 			mdblk->b_next = NULL;
371 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
372 			/*
373 			 * Update ULP the amount of queued data, which is
374 			 * sent-unack'ed + unsent.
375 			 */
376 			if (!SCTP_IS_DETACHED(sctp)) {
377 				sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
378 				    sctp->sctp_unacked + sctp->sctp_unsent);
379 			}
380 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
381 			goto try_next;
382 		}
383 		mdblk->b_cont = NULL;
384 	}
385 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
386 nextchunk:
387 	chunk_head = chunk_mp;
388 	chunk_tail = NULL;
389 
390 	/* Skip as many mblk's as we need */
391 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
392 		count -= MBLKL(chunk_mp);
393 		chunk_tail = chunk_mp;
394 		chunk_mp = chunk_mp->b_cont;
395 	}
396 	/* Split the chain, if needed */
397 	if (chunk_mp != NULL) {
398 		if (count > 0) {
399 			mblk_t	*split_mp = dupb(chunk_mp);
400 
401 			if (split_mp == NULL) {
402 				if (mdblk->b_cont == NULL) {
403 					mdblk->b_cont = chunk_head;
404 				} else  {
405 					SCTP_MSG_SET_CHUNKED(mdblk);
406 					ASSERT(chunk_head->b_next == NULL);
407 					chunk_head->b_next = mdblk->b_cont;
408 					mdblk->b_cont = chunk_head;
409 				}
410 				return;
411 			}
412 			if (chunk_tail != NULL) {
413 				chunk_tail->b_cont = split_mp;
414 				chunk_tail = chunk_tail->b_cont;
415 			} else {
416 				chunk_head = chunk_tail = split_mp;
417 			}
418 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
419 			chunk_mp->b_rptr = chunk_tail->b_wptr;
420 			count = 0;
421 		} else if (chunk_tail == NULL) {
422 			goto next;
423 		} else {
424 			chunk_tail->b_cont = NULL;
425 		}
426 	}
427 	/* Alloc chunk hdr, if needed */
428 	if (DB_REF(chunk_head) > 1 ||
429 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
430 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
431 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
432 			if (mdblk->b_cont == NULL) {
433 				if (chunk_mp != NULL)
434 					linkb(chunk_head, chunk_mp);
435 				mdblk->b_cont = chunk_head;
436 			} else {
437 				SCTP_MSG_SET_CHUNKED(mdblk);
438 				if (chunk_mp != NULL)
439 					linkb(chunk_head, chunk_mp);
440 				ASSERT(chunk_head->b_next == NULL);
441 				chunk_head->b_next = mdblk->b_cont;
442 				mdblk->b_cont = chunk_head;
443 			}
444 			return;
445 		}
446 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
447 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
448 		chunk_hdr->b_cont = chunk_head;
449 	} else {
450 		chunk_hdr = chunk_head;
451 		chunk_hdr->b_rptr -= sizeof (*sdc);
452 	}
453 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
454 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
455 	sdc->sdh_id = CHUNK_DATA;
456 	sdc->sdh_flags = 0;
457 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
458 	ASSERT(sdc->sdh_len);
459 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
460 	/*
461 	 * We defer assigning the SSN just before sending the chunk, else
462 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
463 	 * to send a Forward TSN to let the peer know. Some more comments
464 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
465 	 */
466 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
467 
468 	if (mdblk->b_cont == NULL) {
469 		mdblk->b_cont = chunk_hdr;
470 		SCTP_DATA_SET_BBIT(sdc);
471 	} else {
472 		mp = mdblk->b_cont;
473 		while (mp->b_next != NULL)
474 			mp = mp->b_next;
475 		mp->b_next = chunk_hdr;
476 	}
477 
478 	bytes_to_send -= (chunksize - count);
479 	if (chunk_mp != NULL) {
480 next:
481 		count = chunksize = fp->sfa_pmss - sizeof (*sdc);
482 		goto nextchunk;
483 	}
484 	SCTP_DATA_SET_EBIT(sdc);
485 	sctp->sctp_xmit_unsent = mdblk->b_next;
486 	if (mdblk->b_next == NULL) {
487 		sctp->sctp_xmit_unsent_tail = NULL;
488 	}
489 	mdblk->b_next = NULL;
490 
491 	if (sctp->sctp_xmit_tail == NULL) {
492 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
493 	} else {
494 		mp = sctp->sctp_xmit_tail;
495 		while (mp->b_next != NULL)
496 			mp = mp->b_next;
497 		mp->b_next = mdblk;
498 		mdblk->b_prev = mp;
499 	}
500 try_next:
501 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
502 		mdblk = sctp->sctp_xmit_unsent;
503 		fp1 = SCTP_CHUNK_DEST(mdblk);
504 		if (fp1 == NULL)
505 			fp1 = sctp->sctp_current;
506 		if (fp == fp1) {
507 			size_t len = MBLKL(mdblk->b_cont);
508 			if ((count > 0) &&
509 			    ((len > fp->sfa_pmss - sizeof (*sdc)) ||
510 				(len <= count))) {
511 				count -= sizeof (*sdc);
512 				count = chunksize = count - (count & 0x3);
513 			} else {
514 				count = chunksize = fp->sfa_pmss -
515 				    sizeof (*sdc);
516 			}
517 		} else {
518 			if (fp1->isv4)
519 				xtralen = sctp->sctp_hdr_len;
520 			else
521 				xtralen = sctp->sctp_hdr6_len;
522 			xtralen += sctp_wroff_xtra + sizeof (*sdc);
523 			count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
524 			fp = fp1;
525 		}
526 		goto nextmsg;
527 	}
528 }
529 
530 void
531 sctp_free_msg(mblk_t *ump)
532 {
533 	mblk_t *mp, *nmp;
534 
535 	for (mp = ump->b_cont; mp; mp = nmp) {
536 		nmp = mp->b_next;
537 		mp->b_next = mp->b_prev = NULL;
538 		freemsg(mp);
539 	}
540 	ASSERT(!ump->b_prev);
541 	ump->b_next = NULL;
542 	freeb(ump);
543 }
544 
545 mblk_t *
546 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
547     int *error)
548 {
549 	int hdrlen;
550 	char *hdr;
551 	int isv4 = fp->isv4;
552 
553 	if (error != NULL)
554 		*error = 0;
555 
556 	if (isv4) {
557 		hdrlen = sctp->sctp_hdr_len;
558 		hdr = sctp->sctp_iphc;
559 	} else {
560 		hdrlen = sctp->sctp_hdr6_len;
561 		hdr = sctp->sctp_iphc6;
562 	}
563 	/*
564 	 * A null fp->ire could mean that the address is 'down'. Similarly,
565 	 * it is possible that the address went down, we tried to send an
566 	 * heartbeat and ended up setting fp->saddr as unspec because we
567 	 * didn't have any usable source address.  In either case
568 	 * sctp_get_ire() will try find an IRE, if available, and set
569 	 * the source address, if needed.  If we still don't have any
570 	 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and
571 	 * we return EHOSTUNREACH.
572 	 */
573 	if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
574 		sctp_get_ire(sctp, fp);
575 		if (fp->state == SCTP_FADDRS_UNREACH) {
576 			if (error != NULL)
577 				*error = EHOSTUNREACH;
578 			return (NULL);
579 		}
580 	}
581 	/* Copy in IP header. */
582 	if ((mp->b_rptr - mp->b_datap->db_base) <
583 	    (sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 ||
584 	    !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) {
585 		mblk_t *nmp;
586 
587 		/*
588 		 * This can happen if IP headers are adjusted after
589 		 * data was moved into chunks, or during retransmission,
590 		 * or things like snoop is running.
591 		 */
592 		nmp = allocb_cred(sctp_wroff_xtra + hdrlen + sacklen,
593 		    CONN_CRED(sctp->sctp_connp));
594 		if (nmp == NULL) {
595 			if (error !=  NULL)
596 				*error = ENOMEM;
597 			return (NULL);
598 		}
599 		nmp->b_rptr += sctp_wroff_xtra;
600 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
601 		nmp->b_cont = mp;
602 		mp = nmp;
603 	} else {
604 		mp->b_rptr -= (hdrlen + sacklen);
605 		mblk_setcred(mp, CONN_CRED(sctp->sctp_connp));
606 	}
607 	bcopy(hdr, mp->b_rptr, hdrlen);
608 	if (sacklen) {
609 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
610 	}
611 	if (fp != sctp->sctp_current) {
612 		/* change addresses in header */
613 		if (isv4) {
614 			ipha_t *iph = (ipha_t *)mp->b_rptr;
615 
616 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
617 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
618 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
619 				    iph->ipha_src);
620 			} else if (sctp->sctp_bound_to_all) {
621 				iph->ipha_src = INADDR_ANY;
622 			}
623 		} else {
624 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
625 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
626 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
627 			} else if (sctp->sctp_bound_to_all) {
628 				V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src);
629 			}
630 		}
631 	}
632 	/*
633 	 * IP will not free this IRE if it is condemned.  SCTP needs to
634 	 * free it.
635 	 */
636 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
637 		IRE_REFRELE_NOTR(fp->ire);
638 		fp->ire = NULL;
639 	}
640 
641 	/* Stash the conn and ire ptr info for IP */
642 	SCTP_STASH_IPINFO(mp, fp->ire);
643 
644 	return (mp);
645 }
646 
647 /*
648  * SCTP requires every chunk to be padded so that the total length
649  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
650  * the specified pad length.
651  */
652 static mblk_t *
653 sctp_get_padding(int pad)
654 {
655 	mblk_t *fill;
656 
657 	ASSERT(pad < SCTP_ALIGN);
658 	if ((fill = dupb(sctp_pad_mp)) != NULL) {
659 		fill->b_wptr += pad;
660 		return (fill);
661 	}
662 
663 	/*
664 	 * The memory saving path of reusing the sctp_pad_mp
665 	 * fails may be because it has been dupb() too
666 	 * many times (DBLK_REFMAX).  Use the memory consuming
667 	 * path of allocating the pad mblk.
668 	 */
669 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
670 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
671 		*(int32_t *)fill->b_rptr = 0;
672 		fill->b_wptr += pad;
673 	}
674 	return (fill);
675 }
676 
677 static mblk_t *
678 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
679 {
680 	mblk_t		*meta;
681 	mblk_t		*start_mp = NULL;
682 	mblk_t		*end_mp = NULL;
683 	mblk_t		*mp, *nmp;
684 	mblk_t		*fill;
685 	sctp_data_hdr_t	*sdh;
686 	int		msglen;
687 	int		extra;
688 	sctp_msg_hdr_t	*msg_hdr;
689 	sctp_faddr_t	*old_fp = NULL;
690 	sctp_faddr_t	*chunk_fp;
691 
692 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
693 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
694 		if (SCTP_IS_MSG_ABANDONED(meta) ||
695 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
696 			continue;
697 		}
698 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
699 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
700 				/*
701 				 * Use the same peer address to do fast
702 				 * retransmission.  If the original peer
703 				 * address is dead, switch to the current
704 				 * one.  Record the old one so that we
705 				 * will pick the chunks sent to the old
706 				 * one for fast retransmission.
707 				 */
708 				chunk_fp = SCTP_CHUNK_DEST(mp);
709 				if (*fp == NULL) {
710 					*fp = chunk_fp;
711 					if ((*fp)->state != SCTP_FADDRS_ALIVE) {
712 						old_fp = *fp;
713 						*fp = sctp->sctp_current;
714 					}
715 				} else if (old_fp == NULL && *fp != chunk_fp) {
716 					continue;
717 				} else if (old_fp != NULL &&
718 				    old_fp != chunk_fp) {
719 					continue;
720 				}
721 
722 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
723 				msglen = ntohs(sdh->sdh_len);
724 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
725 					extra = SCTP_ALIGN - extra;
726 				}
727 
728 				/*
729 				 * We still return at least the first message
730 				 * even if that message cannot fit in as
731 				 * PMTU may have changed.
732 				 */
733 				if (*total + msglen + extra >
734 				    (*fp)->sfa_pmss && start_mp != NULL) {
735 					return (start_mp);
736 				}
737 				if ((nmp = dupmsg(mp)) == NULL)
738 					return (start_mp);
739 				if (extra > 0) {
740 					fill = sctp_get_padding(extra);
741 					if (fill != NULL) {
742 						linkb(nmp, fill);
743 					} else {
744 						return (start_mp);
745 					}
746 				}
747 				BUMP_MIB(&sctp_mib, sctpOutFastRetrans);
748 				BUMP_LOCAL(sctp->sctp_rxtchunks);
749 				SCTP_CHUNK_CLEAR_REXMIT(mp);
750 				if (start_mp == NULL) {
751 					start_mp = nmp;
752 				} else {
753 					linkb(end_mp, nmp);
754 				}
755 				end_mp = nmp;
756 				*total += msglen + extra;
757 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
758 				    "tsn %x\n", sdh->sdh_tsn));
759 			}
760 		}
761 	}
762 	/* Clear the flag as there is no more message to be fast rexmitted. */
763 	sctp->sctp_chk_fast_rexmit = B_FALSE;
764 	return (start_mp);
765 }
766 
767 /* A debug function just to make sure that a mblk chain is not broken */
768 #ifdef	DEBUG
769 static boolean_t
770 sctp_verify_chain(mblk_t *head, mblk_t *tail)
771 {
772 	mblk_t	*mp = head;
773 
774 	if (head == NULL || tail == NULL)
775 		return (B_TRUE);
776 	while (mp != NULL) {
777 		if (mp == tail)
778 			return (B_TRUE);
779 		mp = mp->b_next;
780 	}
781 	return (B_FALSE);
782 }
783 #endif
784 
785 /*
786  * Gets the next unsent chunk to transmit. Messages that are abandoned are
787  * skipped. A message can be abandoned if it has a non-zero timetolive and
788  * transmission has not yet started or if it is a partially reliable
789  * message and its time is up (assuming we are PR-SCTP aware).
790  * 'cansend' is used to determine if need to try and chunkify messages from
791  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
792  * When called from sctp_rexmit(), we don't want to chunkify, so 'cansend'
793  * will be set to 0.
794  */
795 mblk_t *
796 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
797     int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp)
798 {
799 	mblk_t		*mp1;
800 	sctp_msg_hdr_t	*msg_hdr;
801 	mblk_t		*tmp_meta;
802 	sctp_faddr_t	*fp1;
803 
804 	ASSERT(error != NULL && mp != NULL);
805 	*error = 0;
806 
807 	ASSERT(sctp->sctp_current != NULL);
808 
809 chunkified:
810 	while (meta != NULL) {
811 		tmp_meta = meta->b_next;
812 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
813 		mp1 = meta->b_cont;
814 		if (SCTP_IS_MSG_ABANDONED(meta))
815 			goto next_msg;
816 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
817 			while (mp1 != NULL) {
818 				if (SCTP_CHUNK_CANSEND(mp1)) {
819 					*mp = mp1;
820 #ifdef	DEBUG
821 					ASSERT(sctp_verify_chain(
822 					    sctp->sctp_xmit_head, meta));
823 #endif
824 					return (meta);
825 				}
826 				mp1 = mp1->b_next;
827 			}
828 			goto next_msg;
829 		}
830 		/*
831 		 * If we come here and the first chunk is sent, then we
832 		 * we are PR-SCTP aware, in which case if the cumulative
833 		 * TSN has moved upto or beyond the first chunk (which
834 		 * means all the previous messages have been cumulative
835 		 * SACK'd), then we send a Forward TSN with the last
836 		 * chunk that was sent in this message. If we can't send
837 		 * a Forward TSN because previous non-abandoned messages
838 		 * have not been acked then we will defer the Forward TSN
839 		 * to sctp_rexmit() or sctp_cumack().
840 		 */
841 		if (SCTP_CHUNK_ISSENT(mp1)) {
842 			*error = sctp_check_abandoned_msg(sctp, meta);
843 			if (*error != 0) {
844 #ifdef	DEBUG
845 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
846 				    sctp->sctp_xmit_tail));
847 #endif
848 				return (NULL);
849 			}
850 			goto next_msg;
851 		}
852 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
853 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
854 		if (meta->b_prev == NULL) {
855 			ASSERT(sctp->sctp_xmit_head == meta);
856 			sctp->sctp_xmit_head = tmp_meta;
857 			if (sctp->sctp_xmit_tail == meta)
858 				sctp->sctp_xmit_tail = tmp_meta;
859 			meta->b_next = NULL;
860 			if (tmp_meta != NULL)
861 				tmp_meta->b_prev = NULL;
862 		} else if (meta->b_next == NULL) {
863 			if (sctp->sctp_xmit_tail == meta)
864 				sctp->sctp_xmit_tail = meta->b_prev;
865 			meta->b_prev->b_next = NULL;
866 			meta->b_prev = NULL;
867 		} else {
868 			meta->b_prev->b_next = tmp_meta;
869 			tmp_meta->b_prev = meta->b_prev;
870 			if (sctp->sctp_xmit_tail == meta)
871 				sctp->sctp_xmit_tail = tmp_meta;
872 			meta->b_prev = NULL;
873 			meta->b_next = NULL;
874 		}
875 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
876 		/*
877 		 * Update ULP the amount of queued data, which is
878 		 * sent-unack'ed + unsent.
879 		 */
880 		if (!SCTP_IS_DETACHED(sctp)) {
881 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
882 			    sctp->sctp_unacked + sctp->sctp_unsent);
883 		}
884 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
885 next_msg:
886 		meta = tmp_meta;
887 	}
888 	/* chunkify, if needed */
889 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
890 		ASSERT(sctp->sctp_unsent > 0);
891 		if (fp == NULL) {
892 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
893 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
894 				fp = sctp->sctp_current;
895 		} else {
896 			/*
897 			 * If user specified destination, try to honor that.
898 			 */
899 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
900 			if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
901 			    fp1 != fp) {
902 				goto chunk_done;
903 			}
904 		}
905 		sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend);
906 		if ((meta = sctp->sctp_xmit_tail) == NULL)
907 			goto chunk_done;
908 		/*
909 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
910 		 * new chunk(s) to the tail, so we need to skip the
911 		 * sctp_xmit_tail, which would have already been processed.
912 		 * This could happen when there is unacked chunks, but
913 		 * nothing new to send.
914 		 * When sctp_chunkify() is called when the transmit queue
915 		 * is empty then we need to start from sctp_xmit_tail.
916 		 */
917 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
918 #ifdef	DEBUG
919 			mp1 = sctp->sctp_xmit_tail->b_cont;
920 			while (mp1 != NULL) {
921 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
922 				mp1 = mp1->b_next;
923 			}
924 #endif
925 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
926 				goto chunk_done;
927 		}
928 		goto chunkified;
929 	}
930 chunk_done:
931 #ifdef	DEBUG
932 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
933 #endif
934 	return (NULL);
935 }
936 
937 void
938 sctp_fast_rexmit(sctp_t *sctp)
939 {
940 	mblk_t		*mp, *head;
941 	int		pktlen = 0;
942 	sctp_faddr_t	*fp = NULL;
943 
944 	ASSERT(sctp->sctp_xmit_head != NULL);
945 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
946 	if (mp == NULL) {
947 		SCTP_KSTAT(sctp_fr_not_found);
948 		return;
949 	}
950 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
951 		freemsg(mp);
952 		SCTP_KSTAT(sctp_fr_add_hdr);
953 		return;
954 	}
955 	if ((pktlen > fp->sfa_pmss) && fp->isv4) {
956 		ipha_t *iph = (ipha_t *)head->b_rptr;
957 
958 		iph->ipha_fragment_offset_and_flags = 0;
959 	}
960 
961 	sctp_set_iplen(sctp, head);
962 	sctp_add_sendq(sctp, head);
963 	sctp->sctp_active = fp->lastactive = lbolt64;
964 }
965 
966 void
967 sctp_output(sctp_t *sctp)
968 {
969 	mblk_t			*mp = NULL;
970 	mblk_t			*nmp;
971 	mblk_t			*head;
972 	mblk_t			*meta = sctp->sctp_xmit_tail;
973 	mblk_t			*fill = NULL;
974 	uint16_t 		chunklen;
975 	uint32_t 		cansend;
976 	int32_t			seglen;
977 	int32_t			xtralen;
978 	int32_t			sacklen;
979 	int32_t			pad = 0;
980 	int32_t			pathmax;
981 	int			extra;
982 	int64_t			now = lbolt64;
983 	sctp_faddr_t		*fp;
984 	sctp_faddr_t		*lfp;
985 	sctp_data_hdr_t		*sdc;
986 	int			error;
987 	boolean_t		notsent = B_TRUE;
988 
989 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
990 		sacklen = 0;
991 	} else {
992 		/* send a SACK chunk */
993 		sacklen = sizeof (sctp_chunk_hdr_t) +
994 		    sizeof (sctp_sack_chunk_t) +
995 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
996 		lfp = sctp->sctp_lastdata;
997 		ASSERT(lfp != NULL);
998 		if (lfp->state != SCTP_FADDRS_ALIVE)
999 			lfp = sctp->sctp_current;
1000 	}
1001 
1002 	cansend = sctp->sctp_frwnd;
1003 	if (sctp->sctp_unsent < cansend)
1004 		cansend = sctp->sctp_unsent;
1005 	if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
1006 	    sctp->sctp_unacked &&
1007 	    (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
1008 	    !sctp->sctp_ndelay) {
1009 		head = NULL;
1010 		fp = sctp->sctp_current;
1011 		goto unsent_data;
1012 	}
1013 	if (meta != NULL)
1014 		mp = meta->b_cont;
1015 	while (cansend > 0) {
1016 		pad = 0;
1017 
1018 		/*
1019 		 * Find first segment eligible for transmit.
1020 		 */
1021 		while (mp != NULL) {
1022 			if (SCTP_CHUNK_CANSEND(mp))
1023 				break;
1024 			mp = mp->b_next;
1025 		}
1026 		if (mp == NULL) {
1027 			meta = sctp_get_msg_to_send(sctp, &mp,
1028 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
1029 			    cansend, NULL);
1030 			if (error != 0 || meta == NULL) {
1031 				head = NULL;
1032 				fp = sctp->sctp_current;
1033 				goto unsent_data;
1034 			}
1035 			sctp->sctp_xmit_tail =  meta;
1036 		}
1037 
1038 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1039 		seglen = ntohs(sdc->sdh_len);
1040 		xtralen = sizeof (*sdc);
1041 		chunklen = seglen - xtralen;
1042 
1043 		/*
1044 		 * Check rwnd.
1045 		 */
1046 		if (chunklen > cansend) {
1047 			head = NULL;
1048 			fp = SCTP_CHUNK_DEST(meta);
1049 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1050 				fp = sctp->sctp_current;
1051 			goto unsent_data;
1052 		}
1053 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1054 			extra = SCTP_ALIGN - extra;
1055 
1056 		/*
1057 		 * Pick destination address, and check cwnd.
1058 		 */
1059 		if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
1060 		    (seglen + sacklen + extra <= lfp->sfa_pmss)) {
1061 			/*
1062 			 * Only include SACK chunk if it can be bundled
1063 			 * with a data chunk, and sent to sctp_lastdata.
1064 			 */
1065 			pathmax = lfp->cwnd - lfp->suna;
1066 
1067 			fp = lfp;
1068 			if ((nmp = dupmsg(mp)) == NULL) {
1069 				head = NULL;
1070 				goto unsent_data;
1071 			}
1072 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1073 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1074 			    &error);
1075 			if (head == NULL) {
1076 				/*
1077 				 * If none of the source addresses are
1078 				 * available (i.e error == EHOSTUNREACH),
1079 				 * pretend we have sent the data. We will
1080 				 * eventually time out trying to retramsmit
1081 				 * the data if the interface never comes up.
1082 				 * If we have already sent some stuff (i.e.,
1083 				 * notsent is B_FALSE) then we are fine, else
1084 				 * just mark this packet as sent.
1085 				 */
1086 				if (notsent && error == EHOSTUNREACH) {
1087 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1088 					    fp, chunklen, meta);
1089 				}
1090 				freemsg(nmp);
1091 				SCTP_KSTAT(sctp_output_failed);
1092 				goto unsent_data;
1093 			}
1094 			seglen += sacklen;
1095 			xtralen += sacklen;
1096 			sacklen = 0;
1097 		} else {
1098 			fp = SCTP_CHUNK_DEST(meta);
1099 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
1100 				fp = sctp->sctp_current;
1101 			/*
1102 			 * If we haven't sent data to this destination for
1103 			 * a while, do slow start again.
1104 			 */
1105 			if (now - fp->lastactive > fp->rto) {
1106 				fp->cwnd = sctp_slow_start_after_idle *
1107 				    fp->sfa_pmss;
1108 			}
1109 
1110 			pathmax = fp->cwnd - fp->suna;
1111 			if (seglen + extra > pathmax) {
1112 				head = NULL;
1113 				goto unsent_data;
1114 			}
1115 			if ((nmp = dupmsg(mp)) == NULL) {
1116 				head = NULL;
1117 				goto unsent_data;
1118 			}
1119 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1120 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1121 			if (head == NULL) {
1122 				/*
1123 				 * If none of the source addresses are
1124 				 * available (i.e error == EHOSTUNREACH),
1125 				 * pretend we have sent the data. We will
1126 				 * eventually time out trying to retramsmit
1127 				 * the data if the interface never comes up.
1128 				 * If we have already sent some stuff (i.e.,
1129 				 * notsent is B_FALSE) then we are fine, else
1130 				 * just mark this packet as sent.
1131 				 */
1132 				if (notsent && error == EHOSTUNREACH) {
1133 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1134 					    fp, chunklen, meta);
1135 				}
1136 				freemsg(nmp);
1137 				SCTP_KSTAT(sctp_output_failed);
1138 				goto unsent_data;
1139 			}
1140 		}
1141 		fp->lastactive = now;
1142 		if (pathmax > fp->sfa_pmss)
1143 			pathmax = fp->sfa_pmss;
1144 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1145 		mp = mp->b_next;
1146 
1147 		/* Use this chunk to measure RTT? */
1148 		if (sctp->sctp_out_time == 0) {
1149 			sctp->sctp_out_time = now;
1150 			sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
1151 			ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn));
1152 		}
1153 		if (extra > 0) {
1154 			fill = sctp_get_padding(extra);
1155 			if (fill != NULL) {
1156 				linkb(head, fill);
1157 				pad = extra;
1158 				seglen += extra;
1159 			} else {
1160 				goto unsent_data;
1161 			}
1162 		}
1163 		/* See if we can bundle more. */
1164 		while (seglen < pathmax) {
1165 			int32_t		new_len;
1166 			int32_t		new_xtralen;
1167 
1168 			while (mp != NULL) {
1169 				if (SCTP_CHUNK_CANSEND(mp))
1170 					break;
1171 				mp = mp->b_next;
1172 			}
1173 			if (mp == NULL) {
1174 				meta = sctp_get_msg_to_send(sctp, &mp,
1175 				    meta->b_next, &error, seglen,
1176 				    (seglen - xtralen) >= cansend ? 0 :
1177 				    cansend - seglen, fp);
1178 				if (error != 0 || meta == NULL)
1179 					break;
1180 				sctp->sctp_xmit_tail =  meta;
1181 			}
1182 			ASSERT(mp != NULL);
1183 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1184 			    fp != SCTP_CHUNK_DEST(meta)) {
1185 				break;
1186 			}
1187 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1188 			chunklen = ntohs(sdc->sdh_len);
1189 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
1190 				extra = SCTP_ALIGN - extra;
1191 
1192 			new_len = seglen + chunklen;
1193 			new_xtralen = xtralen + sizeof (*sdc);
1194 			chunklen -= sizeof (*sdc);
1195 
1196 			if (new_len - new_xtralen > cansend ||
1197 			    new_len + extra > pathmax) {
1198 				break;
1199 			}
1200 			if ((nmp = dupmsg(mp)) == NULL)
1201 				break;
1202 			if (extra > 0) {
1203 				fill = sctp_get_padding(extra);
1204 				if (fill != NULL) {
1205 					pad += extra;
1206 					new_len += extra;
1207 					linkb(nmp, fill);
1208 				} else {
1209 					freemsg(nmp);
1210 					break;
1211 				}
1212 			}
1213 			seglen = new_len;
1214 			xtralen = new_xtralen;
1215 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1216 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1217 			linkb(head, nmp);
1218 			mp = mp->b_next;
1219 		}
1220 		if ((seglen > fp->sfa_pmss) && fp->isv4) {
1221 			ipha_t *iph = (ipha_t *)head->b_rptr;
1222 
1223 			/*
1224 			 * Path MTU is different from what we thought it would
1225 			 * be when we created chunks, or IP headers have grown.
1226 			 * Need to clear the DF bit.
1227 			 */
1228 			iph->ipha_fragment_offset_and_flags = 0;
1229 		}
1230 		/* xmit segment */
1231 		ASSERT(cansend >= seglen - pad - xtralen);
1232 		cansend -= (seglen - pad - xtralen);
1233 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1234 		    "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1235 		    seglen - xtralen, ntohl(sdc->sdh_tsn),
1236 		    ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1237 		    cansend, sctp->sctp_lastack_rxd));
1238 		sctp_set_iplen(sctp, head);
1239 		sctp_add_sendq(sctp, head);
1240 		/* arm rto timer (if not set) */
1241 		if (!fp->timer_running)
1242 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1243 		notsent = B_FALSE;
1244 	}
1245 	sctp->sctp_active = now;
1246 	return;
1247 unsent_data:
1248 	/* arm persist timer (if rto timer not set) */
1249 	if (!fp->timer_running)
1250 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1251 	if (head != NULL)
1252 		freemsg(head);
1253 }
1254 
1255 /*
1256  * The following two functions initialize and destroy the cache
1257  * associated with the sets used for PR-SCTP.
1258  */
1259 void
1260 sctp_ftsn_sets_init(void)
1261 {
1262 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1263 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1264 	    NULL, 0);
1265 }
1266 
1267 void
1268 sctp_ftsn_sets_fini(void)
1269 {
1270 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1271 }
1272 
1273 
1274 /* Free PR-SCTP sets */
1275 void
1276 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1277 {
1278 	sctp_ftsn_set_t *p;
1279 
1280 	while (s != NULL) {
1281 		p = s->next;
1282 		s->next = NULL;
1283 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1284 		s = p;
1285 	}
1286 }
1287 
1288 /*
1289  * Given a message meta block, meta, this routine creates or modifies
1290  * the set that will be used to generate a Forward TSN chunk. If the
1291  * entry for stream id, sid, for this message already exists, the
1292  * sequence number, ssn, is updated if it is greater than the existing
1293  * one. If an entry for this sid does not exist, one is created if
1294  * the size does not exceed fp->sfa_pmss. We return false in case
1295  * or an error.
1296  */
1297 boolean_t
1298 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1299     uint_t *nsets, uint32_t *slen)
1300 {
1301 	sctp_ftsn_set_t		*p;
1302 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1303 	uint16_t		sid = htons(msg_hdr->smh_sid);
1304 	/* msg_hdr->smh_ssn is already in NBO */
1305 	uint16_t		ssn = msg_hdr->smh_ssn;
1306 
1307 	ASSERT(s != NULL && nsets != NULL);
1308 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1309 
1310 	if (*s == NULL) {
1311 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
1312 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1313 		if (*s == NULL)
1314 			return (B_FALSE);
1315 		(*s)->ftsn_entries.ftsn_sid = sid;
1316 		(*s)->ftsn_entries.ftsn_ssn = ssn;
1317 		(*s)->next = NULL;
1318 		*nsets = 1;
1319 		*slen += sizeof (uint32_t);
1320 		return (B_TRUE);
1321 	}
1322 	for (p = *s; p->next != NULL; p = p->next) {
1323 		if (p->ftsn_entries.ftsn_sid == sid) {
1324 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1325 				p->ftsn_entries.ftsn_ssn = ssn;
1326 			return (B_TRUE);
1327 		}
1328 	}
1329 	/* the last one */
1330 	if (p->ftsn_entries.ftsn_sid == sid) {
1331 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1332 			p->ftsn_entries.ftsn_ssn = ssn;
1333 	} else {
1334 		if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
1335 			return (B_FALSE);
1336 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1337 		    KM_NOSLEEP);
1338 		if (p->next == NULL)
1339 			return (B_FALSE);
1340 		p = p->next;
1341 		p->ftsn_entries.ftsn_sid = sid;
1342 		p->ftsn_entries.ftsn_ssn = ssn;
1343 		p->next = NULL;
1344 		(*nsets)++;
1345 		*slen += sizeof (uint32_t);
1346 	}
1347 	return (B_TRUE);
1348 }
1349 
1350 /*
1351  * Given a set of stream id - sequence number pairs, this routing creates
1352  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1353  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1354  * will add the IP/SCTP header.
1355  */
1356 mblk_t *
1357 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1358     uint_t nsets, uint32_t seglen)
1359 {
1360 	mblk_t			*ftsn_mp;
1361 	sctp_chunk_hdr_t	*ch_hdr;
1362 	uint32_t		*advtsn;
1363 	uint16_t		schlen;
1364 	size_t			xtralen;
1365 	ftsn_entry_t		*ftsn_entry;
1366 
1367 	seglen += sizeof (sctp_chunk_hdr_t);
1368 	if (fp->isv4)
1369 		xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra;
1370 	else
1371 		xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra;
1372 	ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp));
1373 	if (ftsn_mp == NULL)
1374 		return (NULL);
1375 	ftsn_mp->b_rptr += xtralen;
1376 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1377 
1378 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1379 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1380 	ch_hdr->sch_flags = 0;
1381 	/*
1382 	 * The cast here should not be an issue since seglen is
1383 	 * the length of the Forward TSN chunk.
1384 	 */
1385 	schlen = (uint16_t)seglen;
1386 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1387 
1388 	advtsn = (uint32_t *)(ch_hdr + 1);
1389 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1390 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1391 	while (nsets > 0) {
1392 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1393 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1394 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1395 		ftsn_entry++;
1396 		sets = sets->next;
1397 		nsets--;
1398 	}
1399 	return (ftsn_mp);
1400 }
1401 
1402 /*
1403  * Given a starting message, the routine steps through all the
1404  * messages whose TSN is less than sctp->sctp_adv_pap and creates
1405  * ftsn sets. The ftsn sets is then used to create an Forward TSN
1406  * chunk. All the messages, that have chunks that are included in the
1407  * ftsn sets, are flagged abandonded. If a message is partially sent
1408  * and is deemed abandoned, all remaining unsent chunks are marked
1409  * abandoned and are deducted from sctp_unsent.
1410  */
1411 void
1412 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1413     sctp_faddr_t *fp, uint32_t *seglen)
1414 {
1415 	mblk_t		*mp1 = mp;
1416 	mblk_t		*mp_head = mp;
1417 	mblk_t		*meta_head = meta;
1418 	mblk_t		*head;
1419 	sctp_ftsn_set_t	*sets = NULL;
1420 	uint_t		nsets = 0;
1421 	uint16_t	clen;
1422 	sctp_data_hdr_t	*sdc;
1423 	uint32_t	sacklen;
1424 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1425 	uint32_t	unsent = 0;
1426 	boolean_t	ubit;
1427 
1428 	*seglen = sizeof (uint32_t);
1429 
1430 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1431 	while (meta != NULL &&
1432 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1433 		/*
1434 		 * Skip adding FTSN sets for un-ordered messages as they do
1435 		 * not have SSNs.
1436 		 */
1437 		ubit = SCTP_DATA_GET_UBIT(sdc);
1438 		if (!ubit &&
1439 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1440 			meta = NULL;
1441 			sctp->sctp_adv_pap = adv_pap;
1442 			goto ftsn_done;
1443 		}
1444 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1445 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1446 			adv_pap = ntohl(sdc->sdh_tsn);
1447 			mp1 = mp1->b_next;
1448 		}
1449 		meta = meta->b_next;
1450 		if (meta != NULL) {
1451 			mp1 = meta->b_cont;
1452 			if (!SCTP_CHUNK_ISSENT(mp1))
1453 				break;
1454 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1455 		}
1456 	}
1457 ftsn_done:
1458 	/*
1459 	 * Can't compare with sets == NULL, since we don't add any
1460 	 * sets for un-ordered messages.
1461 	 */
1462 	if (meta == meta_head)
1463 		return;
1464 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1465 	sctp_free_ftsn_set(sets);
1466 	if (*nmp == NULL)
1467 		return;
1468 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1469 		sacklen = 0;
1470 	} else {
1471 		sacklen = sizeof (sctp_chunk_hdr_t) +
1472 		    sizeof (sctp_sack_chunk_t) +
1473 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1474 		if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1475 			/* piggybacked SACK doesn't fit */
1476 			sacklen = 0;
1477 		} else {
1478 			fp = sctp->sctp_lastdata;
1479 		}
1480 	}
1481 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1482 	if (head == NULL) {
1483 		freemsg(*nmp);
1484 		*nmp = NULL;
1485 		SCTP_KSTAT(sctp_send_ftsn_failed);
1486 		return;
1487 	}
1488 	*seglen += sacklen;
1489 	*nmp = head;
1490 
1491 	/*
1492 	 * XXXNeed to optimise this, the reason it is done here is so
1493 	 * that we don't have to undo in case of failure.
1494 	 */
1495 	mp1 = mp_head;
1496 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1497 	while (meta_head != NULL &&
1498 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1499 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
1500 			SCTP_MSG_SET_ABANDONED(meta_head);
1501 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1502 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1503 			if (!SCTP_CHUNK_ISACKED(mp1)) {
1504 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1505 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1506 				    meta_head);
1507 			}
1508 			mp1 = mp1->b_next;
1509 		}
1510 		while (mp1 != NULL) {
1511 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1512 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
1513 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1514 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1515 				SCTP_ABANDON_CHUNK(mp1);
1516 			}
1517 			mp1 = mp1->b_next;
1518 		}
1519 		meta_head = meta_head->b_next;
1520 		if (meta_head != NULL) {
1521 			mp1 = meta_head->b_cont;
1522 			if (!SCTP_CHUNK_ISSENT(mp1))
1523 				break;
1524 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1525 		}
1526 	}
1527 	if (unsent > 0) {
1528 		ASSERT(sctp->sctp_unsent >= unsent);
1529 		sctp->sctp_unsent -= unsent;
1530 		/*
1531 		 * Update ULP the amount of queued data, which is
1532 		 * sent-unack'ed + unsent.
1533 		 */
1534 		if (!SCTP_IS_DETACHED(sctp)) {
1535 			sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
1536 			    sctp->sctp_unacked + sctp->sctp_unsent);
1537 		}
1538 	}
1539 }
1540 
1541 /*
1542  * This function steps through messages starting at meta and checks if
1543  * the message is abandoned. It stops when it hits an unsent chunk or
1544  * a message that has all its chunk acked. This is the only place
1545  * where the sctp_adv_pap is moved forward to indicated abandoned
1546  * messages.
1547  */
1548 void
1549 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1550 {
1551 	uint32_t	tsn = sctp->sctp_adv_pap;
1552 	sctp_data_hdr_t	*sdc;
1553 	sctp_msg_hdr_t	*msg_hdr;
1554 
1555 	ASSERT(mp != NULL);
1556 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1557 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1558 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1559 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
1560 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1561 		return;
1562 	}
1563 	while (meta != NULL) {
1564 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1565 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1566 			tsn = ntohl(sdc->sdh_tsn);
1567 			mp = mp->b_next;
1568 		}
1569 		if (mp != NULL)
1570 			break;
1571 		/*
1572 		 * We continue checking for successive messages only if there
1573 		 * is a chunk marked for retransmission. Else, we might
1574 		 * end up sending FTSN prematurely for chunks that have been
1575 		 * sent, but not yet acked.
1576 		 */
1577 		if ((meta = meta->b_next) != NULL) {
1578 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1579 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
1580 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1581 				break;
1582 			}
1583 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1584 				if (!SCTP_CHUNK_ISSENT(mp)) {
1585 					sctp->sctp_adv_pap = tsn;
1586 					return;
1587 				}
1588 				if (SCTP_CHUNK_WANT_REXMIT(mp))
1589 					break;
1590 			}
1591 			if (mp == NULL)
1592 				break;
1593 		}
1594 	}
1595 	sctp->sctp_adv_pap = tsn;
1596 }
1597 
1598 
1599 /*
1600  * Determine if we should bundle a data chunk with the chunk being
1601  * retransmitted.  We bundle if
1602  *
1603  * - the chunk is sent to the same destination and unack'ed.
1604  *
1605  * OR
1606  *
1607  * - the chunk is unsent, i.e. new data.
1608  */
1609 #define	SCTP_CHUNK_RX_CANBUNDLE(mp, fp)					\
1610 	(!SCTP_CHUNK_ABANDONED((mp)) && 				\
1611 	((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) &&	\
1612 	!SCTP_CHUNK_ISACKED(mp))) ||					\
1613 	(((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1614 	SCTP_CHUNK_FLAG_SENT)))
1615 
1616 /*
1617  * Retransmit first segment which hasn't been acked with cumtsn or send
1618  * a Forward TSN chunk, if appropriate.
1619  */
1620 void
1621 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1622 {
1623 	mblk_t		*mp;
1624 	mblk_t		*nmp = NULL;
1625 	mblk_t		*head;
1626 	mblk_t		*meta = sctp->sctp_xmit_head;
1627 	mblk_t		*fill;
1628 	uint32_t	seglen = 0;
1629 	uint32_t	sacklen;
1630 	uint16_t	chunklen;
1631 	int		extra;
1632 	sctp_data_hdr_t	*sdc;
1633 	sctp_faddr_t	*fp;
1634 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1635 	boolean_t	do_ftsn = B_FALSE;
1636 	boolean_t	ftsn_check = B_TRUE;
1637 	uint32_t	first_ua_tsn;
1638 	sctp_msg_hdr_t	*mhdr;
1639 	uint32_t	tot_wnd;
1640 
1641 	while (meta != NULL) {
1642 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1643 			uint32_t	tsn;
1644 
1645 			if (!SCTP_CHUNK_ISSENT(mp))
1646 				goto window_probe;
1647 			/*
1648 			 * We break in the following cases -
1649 			 *
1650 			 *	if the advanced peer ack point includes the next
1651 			 *	chunk to be retransmited - possibly the Forward
1652 			 * 	TSN was lost.
1653 			 *
1654 			 *	if we are PRSCTP aware and the next chunk to be
1655 			 *	retransmitted is now abandoned
1656 			 *
1657 			 *	if the next chunk to be retransmitted is for
1658 			 *	the dest on which the timer went off. (this
1659 			 *	message is not abandoned).
1660 			 *
1661 			 * We check for Forward TSN only for the first
1662 			 * eligible chunk to be retransmitted. The reason
1663 			 * being if the first eligible chunk is skipped (say
1664 			 * it was sent to a destination other than oldfp)
1665 			 * then we cannot advance the cum TSN via Forward
1666 			 * TSN chunk.
1667 			 *
1668 			 * Also, ftsn_check is B_TRUE only for the first
1669 			 * eligible chunk, it  will be B_FALSE for all
1670 			 * subsequent candidate messages for retransmission.
1671 			 */
1672 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1673 			tsn = ntohl(sdc->sdh_tsn);
1674 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1675 				if (sctp->sctp_prsctp_aware && ftsn_check) {
1676 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1677 						ASSERT(sctp->sctp_prsctp_aware);
1678 						do_ftsn = B_TRUE;
1679 						goto out;
1680 					} else {
1681 						sctp_check_adv_ack_pt(sctp,
1682 						    meta, mp);
1683 						if (SEQ_GT(sctp->sctp_adv_pap,
1684 						    adv_pap)) {
1685 							do_ftsn = B_TRUE;
1686 							goto out;
1687 						}
1688 					}
1689 					ftsn_check = B_FALSE;
1690 				}
1691 				if (SCTP_CHUNK_DEST(mp) == oldfp)
1692 					goto out;
1693 			}
1694 		}
1695 		meta = meta->b_next;
1696 		if (meta != NULL && sctp->sctp_prsctp_aware) {
1697 			mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1698 
1699 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1700 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1701 				meta = meta->b_next;
1702 			}
1703 		}
1704 	}
1705 window_probe:
1706 	/*
1707 	 * Retransmit fired for a destination which didn't have
1708 	 * any unacked data pending.
1709 	 */
1710 	if (!sctp->sctp_unacked && sctp->sctp_unsent) {
1711 		/*
1712 		 * Send a window probe. Inflate frwnd to allow
1713 		 * sending one segment.
1714 		 */
1715 		if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) {
1716 			sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
1717 		}
1718 		BUMP_MIB(&sctp_mib, sctpOutWinProbe);
1719 		sctp_output(sctp);
1720 	}
1721 	return;
1722 out:
1723 	/*
1724 	 * Enter slowstart for this destination
1725 	 */
1726 	oldfp->ssthresh = oldfp->cwnd / 2;
1727 	if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
1728 		oldfp->ssthresh = 2 * oldfp->sfa_pmss;
1729 	oldfp->cwnd = oldfp->sfa_pmss;
1730 	oldfp->pba = 0;
1731 	fp = sctp_rotate_faddr(sctp, oldfp);
1732 	ASSERT(fp != NULL);
1733 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1734 
1735 	first_ua_tsn = ntohl(sdc->sdh_tsn);
1736 	if (do_ftsn) {
1737 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1738 		if (nmp == NULL) {
1739 			sctp->sctp_adv_pap = adv_pap;
1740 			goto restart_timer;
1741 		}
1742 		head = nmp;
1743 		/*
1744 		 * Move to the next unabandoned chunk. XXXCheck if meta will
1745 		 * always be marked abandoned.
1746 		 */
1747 		while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
1748 			meta = meta->b_next;
1749 		if (meta != NULL)
1750 			mp = mp->b_cont;
1751 		else
1752 			mp = NULL;
1753 		goto try_bundle;
1754 	}
1755 	seglen = ntohs(sdc->sdh_len);
1756 	chunklen = seglen - sizeof (*sdc);
1757 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1758 		extra = SCTP_ALIGN - extra;
1759 
1760 	/* Find out if we need to piggyback SACK. */
1761 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1762 		sacklen = 0;
1763 	} else {
1764 		sacklen = sizeof (sctp_chunk_hdr_t) +
1765 		    sizeof (sctp_sack_chunk_t) +
1766 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1767 		if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
1768 			/* piggybacked SACK doesn't fit */
1769 			sacklen = 0;
1770 		} else {
1771 			/*
1772 			 * OK, we have room to send SACK back.  But we
1773 			 * should send it back to the last fp where we
1774 			 * receive data from, unless sctp_lastdata equals
1775 			 * oldfp, then we should probably not send it
1776 			 * back to that fp.  Also we should check that
1777 			 * the fp is alive.
1778 			 */
1779 			if (sctp->sctp_lastdata != oldfp &&
1780 			    sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) {
1781 				fp = sctp->sctp_lastdata;
1782 			}
1783 		}
1784 	}
1785 
1786 	/*
1787 	 * Cancel RTT measurement if the retransmitted TSN is before the
1788 	 * TSN used for timimg.
1789 	 */
1790 	if (sctp->sctp_out_time != 0 &&
1791 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1792 		sctp->sctp_out_time = 0;
1793 	}
1794 	/* Clear the counter as the RTT calculation may be off. */
1795 	fp->rtt_updates = 0;
1796 	oldfp->rtt_updates = 0;
1797 
1798 	/*
1799 	 * After a timeout, we should change the current faddr so that
1800 	 * new chunks will be sent to the alternate address.
1801 	 */
1802 	sctp_set_faddr_current(sctp, fp);
1803 
1804 	nmp = dupmsg(mp);
1805 	if (nmp == NULL)
1806 		goto restart_timer;
1807 	if (extra > 0) {
1808 		fill = sctp_get_padding(extra);
1809 		if (fill != NULL) {
1810 			linkb(nmp, fill);
1811 			seglen += extra;
1812 		} else {
1813 			freemsg(nmp);
1814 			goto restart_timer;
1815 		}
1816 	}
1817 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
1818 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1819 	if (head == NULL) {
1820 		freemsg(nmp);
1821 		SCTP_KSTAT(sctp_rexmit_failed);
1822 		goto restart_timer;
1823 	}
1824 	seglen += sacklen;
1825 
1826 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1827 
1828 	mp = mp->b_next;
1829 
1830 	/* Check how much more we can send. */
1831 	tot_wnd = MIN(fp->cwnd, sctp->sctp_frwnd);
1832 	/*
1833 	 * If the number of outstanding bytes is more than what we are
1834 	 * allowed to send, stop.
1835 	 */
1836 	if (tot_wnd <= chunklen || tot_wnd < fp->suna + chunklen)
1837 		goto done_bundle;
1838 	else
1839 		tot_wnd -= chunklen;
1840 
1841 try_bundle:
1842 	while (seglen < fp->sfa_pmss) {
1843 		int32_t new_len;
1844 
1845 		/* Go through the list to find more chunks to be bundled. */
1846 		while (mp != NULL) {
1847 			/* Check if the chunk can be bundled. */
1848 			if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
1849 				break;
1850 			mp = mp->b_next;
1851 		}
1852 		/* Go to the next message. */
1853 		if (mp == NULL) {
1854 			for (meta = meta->b_next; meta != NULL;
1855 			    meta = meta->b_next) {
1856 				mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1857 
1858 				if (SCTP_IS_MSG_ABANDONED(meta) ||
1859 				    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
1860 				    sctp)) {
1861 					continue;
1862 				}
1863 
1864 				mp = meta->b_cont;
1865 				goto try_bundle;
1866 			}
1867 			/* No more chunk to be bundled. */
1868 			break;
1869 		}
1870 
1871 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1872 		new_len = ntohs(sdc->sdh_len);
1873 		chunklen = new_len - sizeof (*sdc);
1874 		if (chunklen > tot_wnd)
1875 			break;
1876 
1877 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
1878 			extra = SCTP_ALIGN - extra;
1879 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
1880 			break;
1881 		if ((nmp = dupmsg(mp)) == NULL)
1882 			break;
1883 
1884 		if (extra > 0) {
1885 			fill = sctp_get_padding(extra);
1886 			if (fill != NULL) {
1887 				linkb(nmp, fill);
1888 			} else {
1889 				freemsg(nmp);
1890 				break;
1891 			}
1892 		}
1893 		linkb(head, nmp);
1894 
1895 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
1896 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1897 
1898 		seglen = new_len;
1899 		tot_wnd -= chunklen;
1900 		mp = mp->b_next;
1901 	}
1902 done_bundle:
1903 	if ((seglen > fp->sfa_pmss) && fp->isv4) {
1904 		ipha_t *iph = (ipha_t *)head->b_rptr;
1905 
1906 		/*
1907 		 * Path MTU is different from path we thought it would
1908 		 * be when we created chunks, or IP headers have grown.
1909 		 * Need to clear the DF bit.
1910 		 */
1911 		iph->ipha_fragment_offset_and_flags = 0;
1912 	}
1913 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
1914 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
1915 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
1916 	    (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
1917 
1918 	sctp->sctp_rexmitting = B_TRUE;
1919 	sctp->sctp_rxt_nxttsn = first_ua_tsn;
1920 	sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1921 	sctp_set_iplen(sctp, head);
1922 	sctp_add_sendq(sctp, head);
1923 
1924 	/*
1925 	 * Restart the oldfp timer with exponential backoff and
1926 	 * the new fp timer for the retransmitted chunks.
1927 	 */
1928 restart_timer:
1929 	oldfp->strikes++;
1930 	sctp->sctp_strikes++;
1931 	SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
1932 	if (oldfp->suna != 0)
1933 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
1934 	sctp->sctp_active = lbolt64;
1935 
1936 	/*
1937 	 * Should we restart the timer of the new fp?  If there is
1938 	 * outstanding data to the new fp, the timer should be
1939 	 * running already.  So restarting it means that the timer
1940 	 * will fire later for those outstanding data.  But if
1941 	 * we don't restart it, the timer will fire too early for the
1942 	 * just retransmitted chunks to the new fp.  The reason is that we
1943 	 * don't keep a timestamp on when a chunk is retransmitted.
1944 	 * So when the timer fires, it will just search for the
1945 	 * chunk with the earliest TSN sent to new fp.  This probably
1946 	 * is the chunk we just retransmitted.  So for now, let's
1947 	 * be conservative and restart the timer of the new fp.
1948 	 */
1949 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
1950 }
1951 
1952 /*
1953  * The SCTP write put procedure called from IP.
1954  */
1955 void
1956 sctp_wput(queue_t *q, mblk_t *mp)
1957 {
1958 	uchar_t		*rptr;
1959 	t_scalar_t	type;
1960 
1961 	switch (mp->b_datap->db_type) {
1962 	case M_IOCTL:
1963 		sctp_wput_ioctl(q, mp);
1964 		break;
1965 	case M_DATA:
1966 		/* Should be handled in sctp_output() */
1967 		ASSERT(0);
1968 		freemsg(mp);
1969 		break;
1970 	case M_PROTO:
1971 	case M_PCPROTO:
1972 		rptr = mp->b_rptr;
1973 		if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
1974 			type = ((union T_primitives *)rptr)->type;
1975 			/*
1976 			 * There is no "standard" way on how to respond
1977 			 * to T_CAPABILITY_REQ if a module does not
1978 			 * understand it.  And the current TI mod
1979 			 * has problems handling an error ack.  So we
1980 			 * catch the request here and reply with a response
1981 			 * which the TI mod knows how to respond to.
1982 			 */
1983 			switch (type) {
1984 			case T_CAPABILITY_REQ:
1985 				(void) putnextctl1(RD(q), M_ERROR, EPROTO);
1986 				break;
1987 			default:
1988 				if ((mp = mi_tpi_err_ack_alloc(mp,
1989 				    TNOTSUPPORT, 0)) != NULL) {
1990 					qreply(q, mp);
1991 					return;
1992 				}
1993 			}
1994 		}
1995 		/* FALLTHRU */
1996 	default:
1997 		freemsg(mp);
1998 		return;
1999 	}
2000 }
2001 
2002 /*
2003  * This function is called by sctp_ss_rexmit() to create a packet
2004  * to be retransmitted to the given fp.  The given meta and mp
2005  * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2006  * first chunk to be retransmitted.
2007  */
2008 static mblk_t *
2009 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
2010     uint_t *packet_len)
2011 {
2012 	uint32_t	seglen = 0;
2013 	uint16_t	chunklen;
2014 	int		extra;
2015 	mblk_t		*nmp;
2016 	mblk_t		*head;
2017 	mblk_t		*fill;
2018 	sctp_data_hdr_t	*sdc;
2019 	sctp_msg_hdr_t	*mhdr;
2020 
2021 	sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2022 	seglen = ntohs(sdc->sdh_len);
2023 	chunklen = seglen - sizeof (*sdc);
2024 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
2025 		extra = SCTP_ALIGN - extra;
2026 
2027 	nmp = dupmsg(*mp);
2028 	if (nmp == NULL)
2029 		return (NULL);
2030 	if (extra > 0) {
2031 		fill = sctp_get_padding(extra);
2032 		if (fill != NULL) {
2033 			linkb(nmp, fill);
2034 			seglen += extra;
2035 		} else {
2036 			freemsg(nmp);
2037 			return (NULL);
2038 		}
2039 	}
2040 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
2041 	head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
2042 	if (head == NULL) {
2043 		freemsg(nmp);
2044 		return (NULL);
2045 	}
2046 	SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2047 	sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2048 	*mp = (*mp)->b_next;
2049 
2050 try_bundle:
2051 	while (seglen < fp->sfa_pmss) {
2052 		int32_t new_len;
2053 
2054 		/*
2055 		 * Go through the list to find more chunks to be bundled.
2056 		 * We should only retransmit sent by unack'ed chunks.  Since
2057 		 * they were sent before, the peer's receive window should
2058 		 * be able to receive them.
2059 		 */
2060 		while (*mp != NULL) {
2061 			/* Check if the chunk can be bundled. */
2062 			if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
2063 				break;
2064 			*mp = (*mp)->b_next;
2065 		}
2066 		/* Go to the next message. */
2067 		if (*mp == NULL) {
2068 			for (*meta = (*meta)->b_next; *meta != NULL;
2069 			    *meta = (*meta)->b_next) {
2070 				mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
2071 
2072 				if (SCTP_IS_MSG_ABANDONED(*meta) ||
2073 				    SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
2074 				    sctp)) {
2075 					continue;
2076 				}
2077 
2078 				*mp = (*meta)->b_cont;
2079 				goto try_bundle;
2080 			}
2081 			/* No more chunk to be bundled. */
2082 			break;
2083 		}
2084 
2085 		sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2086 		/* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2087 		if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
2088 			break;
2089 		new_len = ntohs(sdc->sdh_len);
2090 		chunklen = new_len - sizeof (*sdc);
2091 
2092 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
2093 			extra = SCTP_ALIGN - extra;
2094 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
2095 			break;
2096 		if ((nmp = dupmsg(*mp)) == NULL)
2097 			break;
2098 
2099 		if (extra > 0) {
2100 			fill = sctp_get_padding(extra);
2101 			if (fill != NULL) {
2102 				linkb(nmp, fill);
2103 			} else {
2104 				freemsg(nmp);
2105 				break;
2106 			}
2107 		}
2108 		linkb(head, nmp);
2109 
2110 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
2111 		SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2112 		sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2113 
2114 		seglen = new_len;
2115 		*mp = (*mp)->b_next;
2116 	}
2117 	*packet_len = seglen;
2118 	return (head);
2119 }
2120 
2121 /*
2122  * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2123  * advances the cum_tsn but the cum_tsn is still less than what we have sent
2124  * (sctp_rxt_maxtsn) at the time of the timeout.  This SACK is a "partial"
2125  * SACK.  We retransmit unacked chunks without having to wait for another
2126  * timeout.  The rationale is that the SACK should not be "partial" if all the
2127  * lost chunks have been retransmitted.  Since the SACK is "partial,"
2128  * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2129  * be missing.  It is better for us to retransmit them now instead
2130  * of waiting for a timeout.
2131  */
2132 void
2133 sctp_ss_rexmit(sctp_t *sctp)
2134 {
2135 	mblk_t		*meta;
2136 	mblk_t		*mp;
2137 	mblk_t		*pkt;
2138 	sctp_faddr_t	*fp;
2139 	uint_t		pkt_len;
2140 	uint32_t	tot_wnd;
2141 	sctp_data_hdr_t	*sdc;
2142 	int		burst;
2143 
2144 	/*
2145 	 * If the last cum ack is smaller than what we have just
2146 	 * retransmitted, simply return.
2147 	 */
2148 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
2149 		sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
2150 	else
2151 		return;
2152 
2153 	ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
2154 
2155 	/*
2156 	 * After a timer fires, sctp_current should be set to the new
2157 	 * fp where the retransmitted chunks are sent.
2158 	 */
2159 	fp = sctp->sctp_current;
2160 
2161 	/*
2162 	 * Since we are retransmitting, we can only use cwnd to determine
2163 	 * how much we can send as we were allowed to send those chunks
2164 	 * previously.
2165 	 */
2166 	tot_wnd = fp->cwnd;
2167 	/* So we have sent more than we can, just return. */
2168 	if (tot_wnd < fp->suna || tot_wnd - fp->suna < fp->sfa_pmss)
2169 		return;
2170 	else
2171 		tot_wnd -= fp->suna;
2172 
2173 	/* Find the first unack'ed chunk */
2174 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
2175 		sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
2176 
2177 		if (SCTP_IS_MSG_ABANDONED(meta) ||
2178 		    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
2179 			continue;
2180 		}
2181 
2182 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2183 			/* Again, this may not be possible */
2184 			if (!SCTP_CHUNK_ISSENT(mp))
2185 				return;
2186 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2187 			if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
2188 				goto found_msg;
2189 		}
2190 	}
2191 
2192 	/* Everything is abandoned... */
2193 	return;
2194 
2195 found_msg:
2196 	if (!fp->timer_running)
2197 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2198 	pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
2199 	if (pkt == NULL) {
2200 		SCTP_KSTAT(sctp_ss_rexmit_failed);
2201 		return;
2202 	}
2203 	if ((pkt_len > fp->sfa_pmss) && fp->isv4) {
2204 		ipha_t	*iph = (ipha_t *)pkt->b_rptr;
2205 
2206 		/*
2207 		 * Path MTU is different from path we thought it would
2208 		 * be when we created chunks, or IP headers have grown.
2209 		 *  Need to clear the DF bit.
2210 		 */
2211 		iph->ipha_fragment_offset_and_flags = 0;
2212 	}
2213 	sctp_set_iplen(sctp, pkt);
2214 	sctp_add_sendq(sctp, pkt);
2215 
2216 	/* Check and see if there is more chunk to be retransmitted. */
2217 	if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss ||
2218 	    meta == NULL)
2219 		return;
2220 	if (mp == NULL)
2221 		meta = meta->b_next;
2222 	if (meta == NULL)
2223 		return;
2224 
2225 	/* Retransmit another packet if the window allows. */
2226 	for (tot_wnd -= pkt_len, burst = sctp_maxburst - 1;
2227 	    meta != NULL && burst > 0; meta = meta->b_next, burst--) {
2228 		if (mp == NULL)
2229 			mp = meta->b_cont;
2230 		for (; mp != NULL; mp = mp->b_next) {
2231 			/* Again, this may not be possible */
2232 			if (!SCTP_CHUNK_ISSENT(mp))
2233 				return;
2234 			if (!SCTP_CHUNK_ISACKED(mp))
2235 				goto found_msg;
2236 		}
2237 	}
2238 }
2239