xref: /titanic_41/usr/src/uts/common/inet/sctp/sctp_output.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #define	_SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/socket.h>
33 #include <sys/stropts.h>
34 #include <sys/strsun.h>
35 #include <sys/strsubr.h>
36 #include <sys/socketvar.h>
37 #include <inet/common.h>
38 #include <inet/mi.h>
39 #include <inet/ip.h>
40 #include <inet/ip_ire.h>
41 #include <inet/ip6.h>
42 #include <inet/sctp_ip.h>
43 #include <inet/ipclassifier.h>
44 
45 /*
46  * PR-SCTP comments.
47  *
48  * A message can expire before it gets to the transmit list (i.e. it is still
49  * in the unsent list - unchunked), after it gets to the transmit list, but
50  * before transmission has actually started, or after transmission has begun.
51  * Accordingly, we check for the status of a message in sctp_chunkify() when
52  * the message is being transferred from the unsent list to the transmit list;
53  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
54  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
55  * When we nuke a message in sctp_chunkify(), all we need to do is take it
56  * out of the unsent list and update sctp_unsent; when a message is deemed
57  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
58  * list, update sctp_unsent IFF transmission for the message has not yet begun
59  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
60  * message has started, then we cannot just take it out of the list, we need
61  * to send Forward TSN chunk to the peer so that the peer can clear its
62  * fragment list for this message. However, we cannot just send the Forward
63  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
64  * messages preceeding this abandoned message. So, we send a Forward TSN
65  * IFF all messages prior to this abandoned message has been SACKd, if not
66  * we defer sending the Forward TSN to sctp_cumack(), which will check for
67  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
68  * sctp_rexmit() when we check for retransmissions, we need to determine if
69  * the advanced peer ack point can be moved ahead, and if so, send a Forward
70  * TSN to the peer instead of retransmitting the chunk. Note that when
71  * we send a Forward TSN for a message, there may be yet unsent chunks for
72  * this message; we need to mark all such chunks as abandoned, so that
73  * sctp_cumack() can take the message out of the transmit list, additionally
74  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
75  * decremented when a message/chunk is deemed abandoned), sockfs needs to
76  * be notified so that it can adjust its idea of the queued message.
77  */
78 
79 #include "sctp_impl.h"
80 
81 static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
82 static mblk_t			*sctp_chunkify(sctp_t *, int, int, int);
83 
84 #ifdef	DEBUG
85 static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
86 #endif
87 
88 /*
89  * Called to allocate a header mblk when sending data to SCTP.
90  * Data will follow in b_cont of this mblk.
91  */
92 mblk_t *
sctp_alloc_hdr(const char * name,int nlen,const char * control,int clen,int flags)93 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
94     int flags)
95 {
96 	mblk_t *mp;
97 	struct T_unitdata_req *tudr;
98 	size_t size;
99 	int error;
100 
101 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
102 	size = MAX(size, sizeof (sctp_msg_hdr_t));
103 	if (flags & SCTP_CAN_BLOCK) {
104 		mp = allocb_wait(size, BPRI_MED, 0, &error);
105 	} else {
106 		mp = allocb(size, BPRI_MED);
107 	}
108 	if (mp) {
109 		tudr = (struct T_unitdata_req *)mp->b_rptr;
110 		tudr->PRIM_type = T_UNITDATA_REQ;
111 		tudr->DEST_length = nlen;
112 		tudr->DEST_offset = sizeof (*tudr);
113 		tudr->OPT_length = clen;
114 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
115 		    _TPI_ALIGN_TOPT(nlen));
116 		if (nlen > 0)
117 			bcopy(name, tudr + 1, nlen);
118 		if (clen > 0)
119 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
120 		mp->b_wptr += (tudr ->OPT_offset + clen);
121 		mp->b_datap->db_type = M_PROTO;
122 	}
123 	return (mp);
124 }
125 
126 /*ARGSUSED2*/
127 int
sctp_sendmsg(sctp_t * sctp,mblk_t * mp,int flags)128 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
129 {
130 	sctp_faddr_t	*fp = NULL;
131 	struct T_unitdata_req	*tudr;
132 	int		error = 0;
133 	mblk_t		*mproto = mp;
134 	in6_addr_t	*addr;
135 	in6_addr_t	tmpaddr;
136 	uint16_t	sid = sctp->sctp_def_stream;
137 	uint32_t	ppid = sctp->sctp_def_ppid;
138 	uint32_t	context = sctp->sctp_def_context;
139 	uint16_t	msg_flags = sctp->sctp_def_flags;
140 	sctp_msg_hdr_t	*sctp_msg_hdr;
141 	uint32_t	msg_len = 0;
142 	uint32_t	timetolive = sctp->sctp_def_timetolive;
143 	conn_t		*connp = sctp->sctp_connp;
144 
145 	ASSERT(DB_TYPE(mproto) == M_PROTO);
146 
147 	mp = mp->b_cont;
148 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
149 
150 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
151 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
152 
153 	/* Get destination address, if specified */
154 	if (tudr->DEST_length > 0) {
155 		sin_t *sin;
156 		sin6_t *sin6;
157 
158 		sin = (struct sockaddr_in *)
159 		    (mproto->b_rptr + tudr->DEST_offset);
160 		switch (sin->sin_family) {
161 		case AF_INET:
162 			if (tudr->DEST_length < sizeof (*sin)) {
163 				return (EINVAL);
164 			}
165 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
166 			addr = &tmpaddr;
167 			break;
168 		case AF_INET6:
169 			if (tudr->DEST_length < sizeof (*sin6)) {
170 				return (EINVAL);
171 			}
172 			sin6 = (struct sockaddr_in6 *)
173 			    (mproto->b_rptr + tudr->DEST_offset);
174 			addr = &sin6->sin6_addr;
175 			break;
176 		default:
177 			return (EAFNOSUPPORT);
178 		}
179 		fp = sctp_lookup_faddr(sctp, addr);
180 		if (fp == NULL) {
181 			return (EINVAL);
182 		}
183 	}
184 	/* Ancillary Data? */
185 	if (tudr->OPT_length > 0) {
186 		struct cmsghdr		*cmsg;
187 		char			*cend;
188 		struct sctp_sndrcvinfo	*sndrcv;
189 
190 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
191 		cend = ((char *)cmsg + tudr->OPT_length);
192 		ASSERT(cend <= (char *)mproto->b_wptr);
193 
194 		for (;;) {
195 			if ((char *)(cmsg + 1) > cend ||
196 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
197 				break;
198 			}
199 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
200 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
201 				if (cmsg->cmsg_len <
202 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
203 					return (EINVAL);
204 				}
205 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
206 				sid = sndrcv->sinfo_stream;
207 				msg_flags = sndrcv->sinfo_flags;
208 				ppid = sndrcv->sinfo_ppid;
209 				context = sndrcv->sinfo_context;
210 				timetolive = sndrcv->sinfo_timetolive;
211 				break;
212 			}
213 			if (cmsg->cmsg_len > 0)
214 				cmsg = CMSG_NEXT(cmsg);
215 			else
216 				break;
217 		}
218 	}
219 	if (msg_flags & MSG_ABORT) {
220 		if (mp && mp->b_cont) {
221 			mblk_t *pump = msgpullup(mp, -1);
222 			if (!pump) {
223 				return (ENOMEM);
224 			}
225 			freemsg(mp);
226 			mp = pump;
227 			mproto->b_cont = mp;
228 		}
229 		RUN_SCTP(sctp);
230 		sctp_user_abort(sctp, mp);
231 		freemsg(mproto);
232 		goto done2;
233 	}
234 	if (mp == NULL)
235 		goto done;
236 
237 	RUN_SCTP(sctp);
238 
239 	/* Reject any new data requests if we are shutting down */
240 	if (sctp->sctp_state > SCTPS_ESTABLISHED ||
241 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
242 		error = EPIPE;
243 		goto unlock_done;
244 	}
245 
246 	/* Re-use the mproto to store relevant info. */
247 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
248 
249 	mproto->b_rptr = mproto->b_datap->db_base;
250 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
251 
252 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
253 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
254 	sctp_msg_hdr->smh_context = context;
255 	sctp_msg_hdr->smh_sid = sid;
256 	sctp_msg_hdr->smh_ppid = ppid;
257 	sctp_msg_hdr->smh_flags = msg_flags;
258 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
259 	sctp_msg_hdr->smh_tob = ddi_get_lbolt64();
260 	for (; mp != NULL; mp = mp->b_cont)
261 		msg_len += MBLKL(mp);
262 	sctp_msg_hdr->smh_msglen = msg_len;
263 
264 	/* User requested specific destination */
265 	SCTP_SET_CHUNK_DEST(mproto, fp);
266 
267 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
268 	    sid >= sctp->sctp_num_ostr) {
269 		/* Send sendfail event */
270 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
271 		    B_FALSE);
272 		error = EINVAL;
273 		goto unlock_done;
274 	}
275 
276 	/* no data */
277 	if (msg_len == 0) {
278 		sctp_sendfail_event(sctp, dupmsg(mproto),
279 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
280 		error = EINVAL;
281 		goto unlock_done;
282 	}
283 
284 	/* Add it to the unsent list */
285 	if (sctp->sctp_xmit_unsent == NULL) {
286 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
287 	} else {
288 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
289 		sctp->sctp_xmit_unsent_tail = mproto;
290 	}
291 	sctp->sctp_unsent += msg_len;
292 	BUMP_LOCAL(sctp->sctp_msgcount);
293 	/*
294 	 * Notify sockfs if the tx queue is full.
295 	 */
296 	if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) {
297 		sctp->sctp_txq_full = 1;
298 		sctp->sctp_ulp_txq_full(sctp->sctp_ulpd, B_TRUE);
299 	}
300 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
301 		sctp_output(sctp, UINT_MAX);
302 done2:
303 	WAKE_SCTP(sctp);
304 	return (0);
305 unlock_done:
306 	WAKE_SCTP(sctp);
307 done:
308 	return (error);
309 }
310 
311 /*
312  * While there are messages on sctp_xmit_unsent, detach each one. For each:
313  * allocate space for the chunk header, fill in the data chunk, and fill in
314  * the chunk header. Then append it to sctp_xmit_tail.
315  * Return after appending as many bytes as required (bytes_to_send).
316  * We also return if we've appended one or more chunks, and find a subsequent
317  * unsent message is too big to fit in the segment.
318  */
319 mblk_t *
sctp_chunkify(sctp_t * sctp,int mss,int firstseg_len,int bytes_to_send)320 sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send)
321 {
322 	mblk_t			*mp;
323 	mblk_t			*chunk_mp;
324 	mblk_t			*chunk_head;
325 	mblk_t			*chunk_hdr;
326 	mblk_t			*chunk_tail = NULL;
327 	int			count;
328 	int			chunksize;
329 	sctp_data_hdr_t		*sdc;
330 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
331 	sctp_faddr_t		*fp;
332 	sctp_faddr_t		*fp1;
333 	size_t			xtralen;
334 	sctp_msg_hdr_t		*msg_hdr;
335 	sctp_stack_t		*sctps = sctp->sctp_sctps;
336 	sctp_msg_hdr_t		*next_msg_hdr;
337 	size_t			nextlen;
338 	int			remaining_len = mss - firstseg_len;
339 
340 	ASSERT(remaining_len >= 0);
341 
342 	fp = SCTP_CHUNK_DEST(mdblk);
343 	if (fp == NULL)
344 		fp = sctp->sctp_current;
345 	if (fp->sf_isv4)
346 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
347 		    sizeof (*sdc);
348 	else
349 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
350 		    sizeof (*sdc);
351 	count = chunksize = remaining_len - sizeof (*sdc);
352 nextmsg:
353 	next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr;
354 	nextlen = next_msg_hdr->smh_msglen;
355 	/*
356 	 * Will the entire next message fit in the current packet ?
357 	 * if not, leave it on the unsent list.
358 	 */
359 	if ((firstseg_len != 0) && (nextlen > remaining_len))
360 		return (NULL);
361 
362 	chunk_mp = mdblk->b_cont;
363 
364 	/*
365 	 * If this partially chunked, we ignore the next one for now and
366 	 * use the one already present. For the unchunked bits, we use the
367 	 * length of the last chunk.
368 	 */
369 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
370 		int	chunk_len;
371 
372 		ASSERT(chunk_mp->b_next != NULL);
373 		mdblk->b_cont = chunk_mp->b_next;
374 		chunk_mp->b_next = NULL;
375 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
376 		mp = mdblk->b_cont;
377 		while (mp->b_next != NULL)
378 			mp = mp->b_next;
379 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
380 		if (fp->sf_pmss - chunk_len > sizeof (*sdc))
381 			count = chunksize = fp->sf_pmss - chunk_len;
382 		else
383 			count = chunksize = fp->sf_pmss;
384 		count = chunksize = count - sizeof (*sdc);
385 	} else {
386 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
387 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
388 			sctp->sctp_xmit_unsent = mdblk->b_next;
389 			if (sctp->sctp_xmit_unsent == NULL)
390 				sctp->sctp_xmit_unsent_tail = NULL;
391 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
392 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
393 			mdblk->b_next = NULL;
394 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
395 			/*
396 			 * Update ULP the amount of queued data, which is
397 			 * sent-unack'ed + unsent.
398 			 */
399 			if (!SCTP_IS_DETACHED(sctp))
400 				SCTP_TXQ_UPDATE(sctp);
401 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
402 			goto try_next;
403 		}
404 		mdblk->b_cont = NULL;
405 	}
406 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
407 nextchunk:
408 	chunk_head = chunk_mp;
409 	chunk_tail = NULL;
410 
411 	/* Skip as many mblk's as we need */
412 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
413 		count -= MBLKL(chunk_mp);
414 		chunk_tail = chunk_mp;
415 		chunk_mp = chunk_mp->b_cont;
416 	}
417 	/* Split the chain, if needed */
418 	if (chunk_mp != NULL) {
419 		if (count > 0) {
420 			mblk_t	*split_mp = dupb(chunk_mp);
421 
422 			if (split_mp == NULL) {
423 				if (mdblk->b_cont == NULL) {
424 					mdblk->b_cont = chunk_head;
425 				} else  {
426 					SCTP_MSG_SET_CHUNKED(mdblk);
427 					ASSERT(chunk_head->b_next == NULL);
428 					chunk_head->b_next = mdblk->b_cont;
429 					mdblk->b_cont = chunk_head;
430 				}
431 				return (sctp->sctp_xmit_tail);
432 			}
433 			if (chunk_tail != NULL) {
434 				chunk_tail->b_cont = split_mp;
435 				chunk_tail = chunk_tail->b_cont;
436 			} else {
437 				chunk_head = chunk_tail = split_mp;
438 			}
439 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
440 			chunk_mp->b_rptr = chunk_tail->b_wptr;
441 			count = 0;
442 		} else if (chunk_tail == NULL) {
443 			goto next;
444 		} else {
445 			chunk_tail->b_cont = NULL;
446 		}
447 	}
448 	/* Alloc chunk hdr, if needed */
449 	if (DB_REF(chunk_head) > 1 ||
450 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
451 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
452 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
453 			if (mdblk->b_cont == NULL) {
454 				if (chunk_mp != NULL)
455 					linkb(chunk_head, chunk_mp);
456 				mdblk->b_cont = chunk_head;
457 			} else {
458 				SCTP_MSG_SET_CHUNKED(mdblk);
459 				if (chunk_mp != NULL)
460 					linkb(chunk_head, chunk_mp);
461 				ASSERT(chunk_head->b_next == NULL);
462 				chunk_head->b_next = mdblk->b_cont;
463 				mdblk->b_cont = chunk_head;
464 			}
465 			return (sctp->sctp_xmit_tail);
466 		}
467 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
468 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
469 		chunk_hdr->b_cont = chunk_head;
470 	} else {
471 		chunk_hdr = chunk_head;
472 		chunk_hdr->b_rptr -= sizeof (*sdc);
473 	}
474 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
475 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
476 	sdc->sdh_id = CHUNK_DATA;
477 	sdc->sdh_flags = 0;
478 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
479 	ASSERT(sdc->sdh_len);
480 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
481 	/*
482 	 * We defer assigning the SSN just before sending the chunk, else
483 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
484 	 * to send a Forward TSN to let the peer know. Some more comments
485 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
486 	 */
487 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
488 
489 	if (mdblk->b_cont == NULL) {
490 		mdblk->b_cont = chunk_hdr;
491 		SCTP_DATA_SET_BBIT(sdc);
492 	} else {
493 		mp = mdblk->b_cont;
494 		while (mp->b_next != NULL)
495 			mp = mp->b_next;
496 		mp->b_next = chunk_hdr;
497 	}
498 
499 	bytes_to_send -= (chunksize - count);
500 	if (chunk_mp != NULL) {
501 next:
502 		count = chunksize = fp->sf_pmss - sizeof (*sdc);
503 		goto nextchunk;
504 	}
505 	SCTP_DATA_SET_EBIT(sdc);
506 	sctp->sctp_xmit_unsent = mdblk->b_next;
507 	if (mdblk->b_next == NULL) {
508 		sctp->sctp_xmit_unsent_tail = NULL;
509 	}
510 	mdblk->b_next = NULL;
511 
512 	if (sctp->sctp_xmit_tail == NULL) {
513 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
514 	} else {
515 		mp = sctp->sctp_xmit_tail;
516 		while (mp->b_next != NULL)
517 			mp = mp->b_next;
518 		mp->b_next = mdblk;
519 		mdblk->b_prev = mp;
520 	}
521 try_next:
522 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
523 		mdblk = sctp->sctp_xmit_unsent;
524 		fp1 = SCTP_CHUNK_DEST(mdblk);
525 		if (fp1 == NULL)
526 			fp1 = sctp->sctp_current;
527 		if (fp == fp1) {
528 			size_t len = MBLKL(mdblk->b_cont);
529 			if ((count > 0) &&
530 			    ((len > fp->sf_pmss - sizeof (*sdc)) ||
531 			    (len <= count))) {
532 				count -= sizeof (*sdc);
533 				count = chunksize = count - (count & 0x3);
534 			} else {
535 				count = chunksize = fp->sf_pmss -
536 				    sizeof (*sdc);
537 			}
538 		} else {
539 			if (fp1->sf_isv4)
540 				xtralen = sctp->sctp_hdr_len;
541 			else
542 				xtralen = sctp->sctp_hdr6_len;
543 			xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
544 			count = chunksize = fp1->sf_pmss - sizeof (*sdc);
545 			fp = fp1;
546 		}
547 		goto nextmsg;
548 	}
549 	return (sctp->sctp_xmit_tail);
550 }
551 
552 void
sctp_free_msg(mblk_t * ump)553 sctp_free_msg(mblk_t *ump)
554 {
555 	mblk_t *mp, *nmp;
556 
557 	for (mp = ump->b_cont; mp; mp = nmp) {
558 		nmp = mp->b_next;
559 		mp->b_next = mp->b_prev = NULL;
560 		freemsg(mp);
561 	}
562 	ASSERT(!ump->b_prev);
563 	ump->b_next = NULL;
564 	freeb(ump);
565 }
566 
567 mblk_t *
sctp_add_proto_hdr(sctp_t * sctp,sctp_faddr_t * fp,mblk_t * mp,int sacklen,int * error)568 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
569     int *error)
570 {
571 	int hdrlen;
572 	uchar_t *hdr;
573 	int isv4 = fp->sf_isv4;
574 	sctp_stack_t	*sctps = sctp->sctp_sctps;
575 
576 	if (error != NULL)
577 		*error = 0;
578 
579 	if (isv4) {
580 		hdrlen = sctp->sctp_hdr_len;
581 		hdr = sctp->sctp_iphc;
582 	} else {
583 		hdrlen = sctp->sctp_hdr6_len;
584 		hdr = sctp->sctp_iphc6;
585 	}
586 	/*
587 	 * A reject|blackhole could mean that the address is 'down'. Similarly,
588 	 * it is possible that the address went down, we tried to send an
589 	 * heartbeat and ended up setting fp->sf_saddr as unspec because we
590 	 * didn't have any usable source address.  In either case
591 	 * sctp_get_dest() will try find an IRE, if available, and set
592 	 * the source address, if needed.  If we still don't have any
593 	 * usable source address, fp->sf_state will be SCTP_FADDRS_UNREACH and
594 	 * we return EHOSTUNREACH.
595 	 */
596 	ASSERT(fp->sf_ixa->ixa_ire != NULL);
597 	if ((fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
598 	    SCTP_IS_ADDR_UNSPEC(fp->sf_isv4, fp->sf_saddr)) {
599 		sctp_get_dest(sctp, fp);
600 		if (fp->sf_state == SCTP_FADDRS_UNREACH) {
601 			if (error != NULL)
602 				*error = EHOSTUNREACH;
603 			return (NULL);
604 		}
605 	}
606 	/* Copy in IP header. */
607 	if ((mp->b_rptr - mp->b_datap->db_base) <
608 	    (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) {
609 		mblk_t *nmp;
610 
611 		/*
612 		 * This can happen if IP headers are adjusted after
613 		 * data was moved into chunks, or during retransmission,
614 		 * or things like snoop is running.
615 		 */
616 		nmp = allocb(sctps->sctps_wroff_xtra + hdrlen + sacklen,
617 		    BPRI_MED);
618 		if (nmp == NULL) {
619 			if (error !=  NULL)
620 				*error = ENOMEM;
621 			return (NULL);
622 		}
623 		nmp->b_rptr += sctps->sctps_wroff_xtra;
624 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
625 		nmp->b_cont = mp;
626 		mp = nmp;
627 	} else {
628 		mp->b_rptr -= (hdrlen + sacklen);
629 	}
630 	bcopy(hdr, mp->b_rptr, hdrlen);
631 	if (sacklen) {
632 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
633 	}
634 	if (fp != sctp->sctp_current) {
635 		/* change addresses in header */
636 		if (isv4) {
637 			ipha_t *iph = (ipha_t *)mp->b_rptr;
638 
639 			IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, iph->ipha_dst);
640 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->sf_saddr)) {
641 				IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr,
642 				    iph->ipha_src);
643 			} else if (sctp->sctp_bound_to_all) {
644 				iph->ipha_src = INADDR_ANY;
645 			}
646 		} else {
647 			ip6_t *ip6h = (ip6_t *)mp->b_rptr;
648 
649 			ip6h->ip6_dst = fp->sf_faddr;
650 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->sf_saddr)) {
651 				ip6h->ip6_src = fp->sf_saddr;
652 			} else if (sctp->sctp_bound_to_all) {
653 				ip6h->ip6_src = ipv6_all_zeros;
654 			}
655 		}
656 	}
657 	return (mp);
658 }
659 
660 /*
661  * SCTP requires every chunk to be padded so that the total length
662  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
663  * the specified pad length.
664  */
665 static mblk_t *
sctp_get_padding(sctp_t * sctp,int pad)666 sctp_get_padding(sctp_t *sctp, int pad)
667 {
668 	mblk_t *fill;
669 
670 	ASSERT(pad < SCTP_ALIGN);
671 	ASSERT(sctp->sctp_pad_mp != NULL);
672 	if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) {
673 		fill->b_wptr += pad;
674 		return (fill);
675 	}
676 
677 	/*
678 	 * The memory saving path of reusing the sctp_pad_mp
679 	 * fails may be because it has been dupb() too
680 	 * many times (DBLK_REFMAX).  Use the memory consuming
681 	 * path of allocating the pad mblk.
682 	 */
683 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
684 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
685 		*(int32_t *)fill->b_rptr = 0;
686 		fill->b_wptr += pad;
687 	}
688 	return (fill);
689 }
690 
691 static mblk_t *
sctp_find_fast_rexmit_mblks(sctp_t * sctp,int * total,sctp_faddr_t ** fp)692 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
693 {
694 	mblk_t		*meta;
695 	mblk_t		*start_mp = NULL;
696 	mblk_t		*end_mp = NULL;
697 	mblk_t		*mp, *nmp;
698 	mblk_t		*fill;
699 	sctp_data_hdr_t	*sdh;
700 	int		msglen;
701 	int		extra;
702 	sctp_msg_hdr_t	*msg_hdr;
703 	sctp_faddr_t	*old_fp = NULL;
704 	sctp_faddr_t	*chunk_fp;
705 	sctp_stack_t	*sctps = sctp->sctp_sctps;
706 
707 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
708 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
709 		if (SCTP_IS_MSG_ABANDONED(meta) ||
710 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
711 			continue;
712 		}
713 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
714 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
715 				/*
716 				 * Use the same peer address to do fast
717 				 * retransmission.  If the original peer
718 				 * address is dead, switch to the current
719 				 * one.  Record the old one so that we
720 				 * will pick the chunks sent to the old
721 				 * one for fast retransmission.
722 				 */
723 				chunk_fp = SCTP_CHUNK_DEST(mp);
724 				if (*fp == NULL) {
725 					*fp = chunk_fp;
726 					if ((*fp)->sf_state !=
727 					    SCTP_FADDRS_ALIVE) {
728 						old_fp = *fp;
729 						*fp = sctp->sctp_current;
730 					}
731 				} else if (old_fp == NULL && *fp != chunk_fp) {
732 					continue;
733 				} else if (old_fp != NULL &&
734 				    old_fp != chunk_fp) {
735 					continue;
736 				}
737 
738 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
739 				msglen = ntohs(sdh->sdh_len);
740 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
741 					extra = SCTP_ALIGN - extra;
742 				}
743 
744 				/*
745 				 * We still return at least the first message
746 				 * even if that message cannot fit in as
747 				 * PMTU may have changed.
748 				 */
749 				if (*total + msglen + extra >
750 				    (*fp)->sf_pmss && start_mp != NULL) {
751 					return (start_mp);
752 				}
753 				if ((nmp = dupmsg(mp)) == NULL)
754 					return (start_mp);
755 				if (extra > 0) {
756 					fill = sctp_get_padding(sctp, extra);
757 					if (fill != NULL) {
758 						linkb(nmp, fill);
759 					} else {
760 						return (start_mp);
761 					}
762 				}
763 				SCTPS_BUMP_MIB(sctps, sctpOutFastRetrans);
764 				BUMP_LOCAL(sctp->sctp_rxtchunks);
765 				SCTP_CHUNK_CLEAR_REXMIT(mp);
766 				if (start_mp == NULL) {
767 					start_mp = nmp;
768 				} else {
769 					linkb(end_mp, nmp);
770 				}
771 				end_mp = nmp;
772 				*total += msglen + extra;
773 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
774 				    "tsn %x\n", sdh->sdh_tsn));
775 			}
776 		}
777 	}
778 	/* Clear the flag as there is no more message to be fast rexmitted. */
779 	sctp->sctp_chk_fast_rexmit = B_FALSE;
780 	return (start_mp);
781 }
782 
783 /* A debug function just to make sure that a mblk chain is not broken */
784 #ifdef	DEBUG
785 static boolean_t
sctp_verify_chain(mblk_t * head,mblk_t * tail)786 sctp_verify_chain(mblk_t *head, mblk_t *tail)
787 {
788 	mblk_t	*mp = head;
789 
790 	if (head == NULL || tail == NULL)
791 		return (B_TRUE);
792 	while (mp != NULL) {
793 		if (mp == tail)
794 			return (B_TRUE);
795 		mp = mp->b_next;
796 	}
797 	return (B_FALSE);
798 }
799 #endif
800 
801 /*
802  * Gets the next unsent chunk to transmit. Messages that are abandoned are
803  * skipped. A message can be abandoned if it has a non-zero timetolive and
804  * transmission has not yet started or if it is a partially reliable
805  * message and its time is up (assuming we are PR-SCTP aware).
806  * We only return a chunk if it will fit entirely in the current packet.
807  * 'cansend' is used to determine if need to try and chunkify messages from
808  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
809  *
810  * firstseg_len indicates the space already used, cansend represents remaining
811  * space in the window, ((sf_pmss - firstseg_len) can therefore reasonably
812  * be used to compute the cansend arg).
813  */
814 mblk_t *
sctp_get_msg_to_send(sctp_t * sctp,mblk_t ** mp,mblk_t * meta,int * error,int32_t firstseg_len,uint32_t cansend,sctp_faddr_t * fp)815 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
816     int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp)
817 {
818 	mblk_t		*mp1;
819 	sctp_msg_hdr_t	*msg_hdr;
820 	mblk_t		*tmp_meta;
821 	sctp_faddr_t	*fp1;
822 
823 	ASSERT(error != NULL && mp != NULL);
824 	*error = 0;
825 
826 	ASSERT(sctp->sctp_current != NULL);
827 
828 chunkified:
829 	while (meta != NULL) {
830 		tmp_meta = meta->b_next;
831 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
832 		mp1 = meta->b_cont;
833 		if (SCTP_IS_MSG_ABANDONED(meta))
834 			goto next_msg;
835 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
836 			while (mp1 != NULL) {
837 				if (SCTP_CHUNK_CANSEND(mp1)) {
838 					*mp = mp1;
839 #ifdef	DEBUG
840 					ASSERT(sctp_verify_chain(
841 					    sctp->sctp_xmit_head, meta));
842 #endif
843 					return (meta);
844 				}
845 				mp1 = mp1->b_next;
846 			}
847 			goto next_msg;
848 		}
849 		/*
850 		 * If we come here and the first chunk is sent, then we
851 		 * we are PR-SCTP aware, in which case if the cumulative
852 		 * TSN has moved upto or beyond the first chunk (which
853 		 * means all the previous messages have been cumulative
854 		 * SACK'd), then we send a Forward TSN with the last
855 		 * chunk that was sent in this message. If we can't send
856 		 * a Forward TSN because previous non-abandoned messages
857 		 * have not been acked then we will defer the Forward TSN
858 		 * to sctp_rexmit() or sctp_cumack().
859 		 */
860 		if (SCTP_CHUNK_ISSENT(mp1)) {
861 			*error = sctp_check_abandoned_msg(sctp, meta);
862 			if (*error != 0) {
863 #ifdef	DEBUG
864 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
865 				    sctp->sctp_xmit_tail));
866 #endif
867 				return (NULL);
868 			}
869 			goto next_msg;
870 		}
871 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
872 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
873 		if (meta->b_prev == NULL) {
874 			ASSERT(sctp->sctp_xmit_head == meta);
875 			sctp->sctp_xmit_head = tmp_meta;
876 			if (sctp->sctp_xmit_tail == meta)
877 				sctp->sctp_xmit_tail = tmp_meta;
878 			meta->b_next = NULL;
879 			if (tmp_meta != NULL)
880 				tmp_meta->b_prev = NULL;
881 		} else if (meta->b_next == NULL) {
882 			if (sctp->sctp_xmit_tail == meta)
883 				sctp->sctp_xmit_tail = meta->b_prev;
884 			meta->b_prev->b_next = NULL;
885 			meta->b_prev = NULL;
886 		} else {
887 			meta->b_prev->b_next = tmp_meta;
888 			tmp_meta->b_prev = meta->b_prev;
889 			if (sctp->sctp_xmit_tail == meta)
890 				sctp->sctp_xmit_tail = tmp_meta;
891 			meta->b_prev = NULL;
892 			meta->b_next = NULL;
893 		}
894 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
895 		/*
896 		 * Update ULP the amount of queued data, which is
897 		 * sent-unack'ed + unsent.
898 		 */
899 		if (!SCTP_IS_DETACHED(sctp))
900 			SCTP_TXQ_UPDATE(sctp);
901 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
902 next_msg:
903 		meta = tmp_meta;
904 	}
905 	/* chunkify, if needed */
906 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
907 		ASSERT(sctp->sctp_unsent > 0);
908 		if (fp == NULL) {
909 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
910 			if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
911 				fp = sctp->sctp_current;
912 		} else {
913 			/*
914 			 * If user specified destination, try to honor that.
915 			 */
916 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
917 			if (fp1 != NULL && fp1->sf_state == SCTP_FADDRS_ALIVE &&
918 			    fp1 != fp) {
919 				goto chunk_done;
920 			}
921 		}
922 		meta = sctp_chunkify(sctp, fp->sf_pmss, firstseg_len, cansend);
923 		if (meta == NULL)
924 			goto chunk_done;
925 		/*
926 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
927 		 * new chunk(s) to the tail, so we need to skip the
928 		 * sctp_xmit_tail, which would have already been processed.
929 		 * This could happen when there is unacked chunks, but
930 		 * nothing new to send.
931 		 * When sctp_chunkify() is called when the transmit queue
932 		 * is empty then we need to start from sctp_xmit_tail.
933 		 */
934 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
935 #ifdef	DEBUG
936 			mp1 = sctp->sctp_xmit_tail->b_cont;
937 			while (mp1 != NULL) {
938 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
939 				mp1 = mp1->b_next;
940 			}
941 #endif
942 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
943 				goto chunk_done;
944 		}
945 		goto chunkified;
946 	}
947 chunk_done:
948 #ifdef	DEBUG
949 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
950 #endif
951 	return (NULL);
952 }
953 
954 void
sctp_fast_rexmit(sctp_t * sctp)955 sctp_fast_rexmit(sctp_t *sctp)
956 {
957 	mblk_t		*mp, *head;
958 	int		pktlen = 0;
959 	sctp_faddr_t	*fp = NULL;
960 	sctp_stack_t	*sctps = sctp->sctp_sctps;
961 
962 	ASSERT(sctp->sctp_xmit_head != NULL);
963 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
964 	if (mp == NULL) {
965 		SCTP_KSTAT(sctps, sctp_fr_not_found);
966 		return;
967 	}
968 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
969 		freemsg(mp);
970 		SCTP_KSTAT(sctps, sctp_fr_add_hdr);
971 		return;
972 	}
973 	if ((pktlen > fp->sf_pmss) && fp->sf_isv4) {
974 		ipha_t *iph = (ipha_t *)head->b_rptr;
975 
976 		iph->ipha_fragment_offset_and_flags = 0;
977 	}
978 
979 	sctp_set_iplen(sctp, head, fp->sf_ixa);
980 	(void) conn_ip_output(head, fp->sf_ixa);
981 	BUMP_LOCAL(sctp->sctp_opkts);
982 	sctp->sctp_active = fp->sf_lastactive = ddi_get_lbolt64();
983 }
984 
985 void
sctp_output(sctp_t * sctp,uint_t num_pkt)986 sctp_output(sctp_t *sctp, uint_t num_pkt)
987 {
988 	mblk_t			*mp = NULL;
989 	mblk_t			*nmp;
990 	mblk_t			*head;
991 	mblk_t			*meta = sctp->sctp_xmit_tail;
992 	mblk_t			*fill = NULL;
993 	uint16_t 		chunklen;
994 	uint32_t 		cansend;
995 	int32_t			seglen;
996 	int32_t			xtralen;
997 	int32_t			sacklen;
998 	int32_t			pad = 0;
999 	int32_t			pathmax;
1000 	int			extra;
1001 	int64_t			now = LBOLT_FASTPATH64;
1002 	sctp_faddr_t		*fp;
1003 	sctp_faddr_t		*lfp;
1004 	sctp_data_hdr_t		*sdc;
1005 	int			error;
1006 	boolean_t		notsent = B_TRUE;
1007 	sctp_stack_t		*sctps = sctp->sctp_sctps;
1008 	uint32_t		tsn;
1009 
1010 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1011 		sacklen = 0;
1012 	} else {
1013 		/* send a SACK chunk */
1014 		sacklen = sizeof (sctp_chunk_hdr_t) +
1015 		    sizeof (sctp_sack_chunk_t) +
1016 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1017 		lfp = sctp->sctp_lastdata;
1018 		ASSERT(lfp != NULL);
1019 		if (lfp->sf_state != SCTP_FADDRS_ALIVE)
1020 			lfp = sctp->sctp_current;
1021 	}
1022 
1023 	cansend = sctp->sctp_frwnd;
1024 	if (sctp->sctp_unsent < cansend)
1025 		cansend = sctp->sctp_unsent;
1026 
1027 	/*
1028 	 * Start persist timer if unable to send or when
1029 	 * trying to send into a zero window. This timer
1030 	 * ensures the blocked send attempt is retried.
1031 	 */
1032 	if ((cansend < sctp->sctp_current->sf_pmss / 2) &&
1033 	    (sctp->sctp_unacked != 0) &&
1034 	    (sctp->sctp_unacked < sctp->sctp_current->sf_pmss) &&
1035 	    !sctp->sctp_ndelay ||
1036 	    (cansend == 0 && sctp->sctp_unacked == 0 &&
1037 	    sctp->sctp_unsent != 0)) {
1038 		head = NULL;
1039 		fp = sctp->sctp_current;
1040 		goto unsent_data;
1041 	}
1042 	if (meta != NULL)
1043 		mp = meta->b_cont;
1044 	while (cansend > 0 && num_pkt-- != 0) {
1045 		pad = 0;
1046 
1047 		/*
1048 		 * Find first segment eligible for transmit.
1049 		 */
1050 		while (mp != NULL) {
1051 			if (SCTP_CHUNK_CANSEND(mp))
1052 				break;
1053 			mp = mp->b_next;
1054 		}
1055 		if (mp == NULL) {
1056 			meta = sctp_get_msg_to_send(sctp, &mp,
1057 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
1058 			    cansend, NULL);
1059 			if (error != 0 || meta == NULL) {
1060 				head = NULL;
1061 				fp = sctp->sctp_current;
1062 				goto unsent_data;
1063 			}
1064 			sctp->sctp_xmit_tail =  meta;
1065 		}
1066 
1067 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1068 		seglen = ntohs(sdc->sdh_len);
1069 		xtralen = sizeof (*sdc);
1070 		chunklen = seglen - xtralen;
1071 
1072 		/*
1073 		 * Check rwnd.
1074 		 */
1075 		if (chunklen > cansend) {
1076 			head = NULL;
1077 			fp = SCTP_CHUNK_DEST(meta);
1078 			if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
1079 				fp = sctp->sctp_current;
1080 			goto unsent_data;
1081 		}
1082 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1083 			extra = SCTP_ALIGN - extra;
1084 
1085 		/*
1086 		 * Pick destination address, and check cwnd.
1087 		 */
1088 		if (sacklen > 0 && (seglen + extra <= lfp->sf_cwnd -
1089 		    lfp->sf_suna) &&
1090 		    (seglen + sacklen + extra <= lfp->sf_pmss)) {
1091 			/*
1092 			 * Only include SACK chunk if it can be bundled
1093 			 * with a data chunk, and sent to sctp_lastdata.
1094 			 */
1095 			pathmax = lfp->sf_cwnd - lfp->sf_suna;
1096 
1097 			fp = lfp;
1098 			if ((nmp = dupmsg(mp)) == NULL) {
1099 				head = NULL;
1100 				goto unsent_data;
1101 			}
1102 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1103 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1104 			    &error);
1105 			if (head == NULL) {
1106 				/*
1107 				 * If none of the source addresses are
1108 				 * available (i.e error == EHOSTUNREACH),
1109 				 * pretend we have sent the data. We will
1110 				 * eventually time out trying to retramsmit
1111 				 * the data if the interface never comes up.
1112 				 * If we have already sent some stuff (i.e.,
1113 				 * notsent is B_FALSE) then we are fine, else
1114 				 * just mark this packet as sent.
1115 				 */
1116 				if (notsent && error == EHOSTUNREACH) {
1117 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1118 					    fp, chunklen, meta);
1119 				}
1120 				freemsg(nmp);
1121 				SCTP_KSTAT(sctps, sctp_output_failed);
1122 				goto unsent_data;
1123 			}
1124 			seglen += sacklen;
1125 			xtralen += sacklen;
1126 			sacklen = 0;
1127 		} else {
1128 			fp = SCTP_CHUNK_DEST(meta);
1129 			if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
1130 				fp = sctp->sctp_current;
1131 			/*
1132 			 * If we haven't sent data to this destination for
1133 			 * a while, do slow start again.
1134 			 */
1135 			if (now - fp->sf_lastactive > fp->sf_rto) {
1136 				SET_CWND(fp, fp->sf_pmss,
1137 				    sctps->sctps_slow_start_after_idle);
1138 			}
1139 
1140 			pathmax = fp->sf_cwnd - fp->sf_suna;
1141 			if (seglen + extra > pathmax) {
1142 				head = NULL;
1143 				goto unsent_data;
1144 			}
1145 			if ((nmp = dupmsg(mp)) == NULL) {
1146 				head = NULL;
1147 				goto unsent_data;
1148 			}
1149 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1150 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1151 			if (head == NULL) {
1152 				/*
1153 				 * If none of the source addresses are
1154 				 * available (i.e error == EHOSTUNREACH),
1155 				 * pretend we have sent the data. We will
1156 				 * eventually time out trying to retramsmit
1157 				 * the data if the interface never comes up.
1158 				 * If we have already sent some stuff (i.e.,
1159 				 * notsent is B_FALSE) then we are fine, else
1160 				 * just mark this packet as sent.
1161 				 */
1162 				if (notsent && error == EHOSTUNREACH) {
1163 					SCTP_CHUNK_SENT(sctp, mp, sdc,
1164 					    fp, chunklen, meta);
1165 				}
1166 				freemsg(nmp);
1167 				SCTP_KSTAT(sctps, sctp_output_failed);
1168 				goto unsent_data;
1169 			}
1170 		}
1171 		fp->sf_lastactive = now;
1172 		if (pathmax > fp->sf_pmss)
1173 			pathmax = fp->sf_pmss;
1174 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1175 		mp = mp->b_next;
1176 
1177 		/*
1178 		 * Use this chunk to measure RTT?
1179 		 * Must not be a retransmision of an earlier chunk,
1180 		 * ensure the tsn is current.
1181 		 */
1182 		tsn = ntohl(sdc->sdh_tsn);
1183 		if (sctp->sctp_out_time == 0 && tsn == (sctp->sctp_ltsn - 1)) {
1184 			sctp->sctp_out_time = now;
1185 			sctp->sctp_rtt_tsn = tsn;
1186 		}
1187 		if (extra > 0) {
1188 			fill = sctp_get_padding(sctp, extra);
1189 			if (fill != NULL) {
1190 				linkb(head, fill);
1191 				pad = extra;
1192 				seglen += extra;
1193 			} else {
1194 				goto unsent_data;
1195 			}
1196 		}
1197 		/*
1198 		 * Bundle chunks. We linkb() the chunks together to send
1199 		 * downstream in a single packet.
1200 		 * Partial chunks MUST NOT be bundled with full chunks, so we
1201 		 * rely on sctp_get_msg_to_send() to only return messages that
1202 		 * will fit entirely in the current packet.
1203 		 */
1204 		while (seglen < pathmax) {
1205 			int32_t		new_len;
1206 			int32_t		new_xtralen;
1207 
1208 			while (mp != NULL) {
1209 				if (SCTP_CHUNK_CANSEND(mp))
1210 					break;
1211 				mp = mp->b_next;
1212 			}
1213 			if (mp == NULL) {
1214 				meta = sctp_get_msg_to_send(sctp, &mp,
1215 				    meta->b_next, &error, seglen,
1216 				    (seglen - xtralen) >= cansend ? 0 :
1217 				    cansend - seglen, fp);
1218 				if (error != 0)
1219 					break;
1220 				/* If no more eligible chunks, cease bundling */
1221 				if (meta == NULL)
1222 					break;
1223 				sctp->sctp_xmit_tail =  meta;
1224 			}
1225 			ASSERT(mp != NULL);
1226 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1227 			    fp != SCTP_CHUNK_DEST(meta)) {
1228 				break;
1229 			}
1230 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1231 			chunklen = ntohs(sdc->sdh_len);
1232 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
1233 				extra = SCTP_ALIGN - extra;
1234 
1235 			new_len = seglen + chunklen;
1236 			new_xtralen = xtralen + sizeof (*sdc);
1237 			chunklen -= sizeof (*sdc);
1238 
1239 			if (new_len - new_xtralen > cansend ||
1240 			    new_len + extra > pathmax) {
1241 				break;
1242 			}
1243 			if ((nmp = dupmsg(mp)) == NULL)
1244 				break;
1245 			if (extra > 0) {
1246 				fill = sctp_get_padding(sctp, extra);
1247 				if (fill != NULL) {
1248 					pad += extra;
1249 					new_len += extra;
1250 					linkb(nmp, fill);
1251 				} else {
1252 					freemsg(nmp);
1253 					break;
1254 				}
1255 			}
1256 			seglen = new_len;
1257 			xtralen = new_xtralen;
1258 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
1259 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1260 			linkb(head, nmp);
1261 			mp = mp->b_next;
1262 		}
1263 		if ((seglen > fp->sf_pmss) && fp->sf_isv4) {
1264 			ipha_t *iph = (ipha_t *)head->b_rptr;
1265 
1266 			/*
1267 			 * Path MTU is different from what we thought it would
1268 			 * be when we created chunks, or IP headers have grown.
1269 			 * Need to clear the DF bit.
1270 			 */
1271 			iph->ipha_fragment_offset_and_flags = 0;
1272 		}
1273 		/* xmit segment */
1274 		ASSERT(cansend >= seglen - pad - xtralen);
1275 		cansend -= (seglen - pad - xtralen);
1276 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1277 		    "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1278 		    seglen - xtralen, ntohl(sdc->sdh_tsn),
1279 		    ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1280 		    cansend, sctp->sctp_lastack_rxd));
1281 		sctp_set_iplen(sctp, head, fp->sf_ixa);
1282 		(void) conn_ip_output(head, fp->sf_ixa);
1283 		BUMP_LOCAL(sctp->sctp_opkts);
1284 		/* arm rto timer (if not set) */
1285 		if (!fp->sf_timer_running)
1286 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1287 		notsent = B_FALSE;
1288 	}
1289 	sctp->sctp_active = now;
1290 	return;
1291 unsent_data:
1292 	/* arm persist timer (if rto timer not set) */
1293 	if (!fp->sf_timer_running)
1294 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1295 	if (head != NULL)
1296 		freemsg(head);
1297 }
1298 
1299 /*
1300  * The following two functions initialize and destroy the cache
1301  * associated with the sets used for PR-SCTP.
1302  */
1303 void
sctp_ftsn_sets_init(void)1304 sctp_ftsn_sets_init(void)
1305 {
1306 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1307 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1308 	    NULL, 0);
1309 }
1310 
1311 void
sctp_ftsn_sets_fini(void)1312 sctp_ftsn_sets_fini(void)
1313 {
1314 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1315 }
1316 
1317 
1318 /* Free PR-SCTP sets */
1319 void
sctp_free_ftsn_set(sctp_ftsn_set_t * s)1320 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1321 {
1322 	sctp_ftsn_set_t *p;
1323 
1324 	while (s != NULL) {
1325 		p = s->next;
1326 		s->next = NULL;
1327 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1328 		s = p;
1329 	}
1330 }
1331 
1332 /*
1333  * Given a message meta block, meta, this routine creates or modifies
1334  * the set that will be used to generate a Forward TSN chunk. If the
1335  * entry for stream id, sid, for this message already exists, the
1336  * sequence number, ssn, is updated if it is greater than the existing
1337  * one. If an entry for this sid does not exist, one is created if
1338  * the size does not exceed fp->sf_pmss. We return false in case
1339  * or an error.
1340  */
1341 boolean_t
sctp_add_ftsn_set(sctp_ftsn_set_t ** s,sctp_faddr_t * fp,mblk_t * meta,uint_t * nsets,uint32_t * slen)1342 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1343     uint_t *nsets, uint32_t *slen)
1344 {
1345 	sctp_ftsn_set_t		*p;
1346 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1347 	uint16_t		sid = htons(msg_hdr->smh_sid);
1348 	/* msg_hdr->smh_ssn is already in NBO */
1349 	uint16_t		ssn = msg_hdr->smh_ssn;
1350 
1351 	ASSERT(s != NULL && nsets != NULL);
1352 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1353 
1354 	if (*s == NULL) {
1355 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sf_pmss);
1356 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1357 		if (*s == NULL)
1358 			return (B_FALSE);
1359 		(*s)->ftsn_entries.ftsn_sid = sid;
1360 		(*s)->ftsn_entries.ftsn_ssn = ssn;
1361 		(*s)->next = NULL;
1362 		*nsets = 1;
1363 		*slen += sizeof (uint32_t);
1364 		return (B_TRUE);
1365 	}
1366 	for (p = *s; p->next != NULL; p = p->next) {
1367 		if (p->ftsn_entries.ftsn_sid == sid) {
1368 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1369 				p->ftsn_entries.ftsn_ssn = ssn;
1370 			return (B_TRUE);
1371 		}
1372 	}
1373 	/* the last one */
1374 	if (p->ftsn_entries.ftsn_sid == sid) {
1375 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1376 			p->ftsn_entries.ftsn_ssn = ssn;
1377 	} else {
1378 		if ((*slen + sizeof (uint32_t)) > fp->sf_pmss)
1379 			return (B_FALSE);
1380 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1381 		    KM_NOSLEEP);
1382 		if (p->next == NULL)
1383 			return (B_FALSE);
1384 		p = p->next;
1385 		p->ftsn_entries.ftsn_sid = sid;
1386 		p->ftsn_entries.ftsn_ssn = ssn;
1387 		p->next = NULL;
1388 		(*nsets)++;
1389 		*slen += sizeof (uint32_t);
1390 	}
1391 	return (B_TRUE);
1392 }
1393 
1394 /*
1395  * Given a set of stream id - sequence number pairs, this routing creates
1396  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1397  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1398  * will add the IP/SCTP header.
1399  */
1400 mblk_t *
sctp_make_ftsn_chunk(sctp_t * sctp,sctp_faddr_t * fp,sctp_ftsn_set_t * sets,uint_t nsets,uint32_t seglen)1401 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1402     uint_t nsets, uint32_t seglen)
1403 {
1404 	mblk_t			*ftsn_mp;
1405 	sctp_chunk_hdr_t	*ch_hdr;
1406 	uint32_t		*advtsn;
1407 	uint16_t		schlen;
1408 	size_t			xtralen;
1409 	ftsn_entry_t		*ftsn_entry;
1410 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1411 
1412 	seglen += sizeof (sctp_chunk_hdr_t);
1413 	if (fp->sf_isv4)
1414 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
1415 	else
1416 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
1417 	ftsn_mp = allocb(xtralen + seglen, BPRI_MED);
1418 	if (ftsn_mp == NULL)
1419 		return (NULL);
1420 	ftsn_mp->b_rptr += xtralen;
1421 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1422 
1423 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1424 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1425 	ch_hdr->sch_flags = 0;
1426 	/*
1427 	 * The cast here should not be an issue since seglen is
1428 	 * the length of the Forward TSN chunk.
1429 	 */
1430 	schlen = (uint16_t)seglen;
1431 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1432 
1433 	advtsn = (uint32_t *)(ch_hdr + 1);
1434 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1435 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1436 	while (nsets > 0) {
1437 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1438 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1439 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1440 		ftsn_entry++;
1441 		sets = sets->next;
1442 		nsets--;
1443 	}
1444 	return (ftsn_mp);
1445 }
1446 
1447 /*
1448  * Given a starting message, the routine steps through all the
1449  * messages whose TSN is less than sctp->sctp_adv_pap and creates
1450  * ftsn sets. The ftsn sets is then used to create an Forward TSN
1451  * chunk. All the messages, that have chunks that are included in the
1452  * ftsn sets, are flagged abandonded. If a message is partially sent
1453  * and is deemed abandoned, all remaining unsent chunks are marked
1454  * abandoned and are deducted from sctp_unsent.
1455  */
1456 void
sctp_make_ftsns(sctp_t * sctp,mblk_t * meta,mblk_t * mp,mblk_t ** nmp,sctp_faddr_t * fp,uint32_t * seglen)1457 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1458     sctp_faddr_t *fp, uint32_t *seglen)
1459 {
1460 	mblk_t		*mp1 = mp;
1461 	mblk_t		*mp_head = mp;
1462 	mblk_t		*meta_head = meta;
1463 	mblk_t		*head;
1464 	sctp_ftsn_set_t	*sets = NULL;
1465 	uint_t		nsets = 0;
1466 	uint16_t	clen;
1467 	sctp_data_hdr_t	*sdc;
1468 	uint32_t	sacklen;
1469 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1470 	uint32_t	unsent = 0;
1471 	boolean_t	ubit;
1472 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1473 
1474 	*seglen = sizeof (uint32_t);
1475 
1476 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1477 	while (meta != NULL &&
1478 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1479 		/*
1480 		 * Skip adding FTSN sets for un-ordered messages as they do
1481 		 * not have SSNs.
1482 		 */
1483 		ubit = SCTP_DATA_GET_UBIT(sdc);
1484 		if (!ubit &&
1485 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1486 			meta = NULL;
1487 			sctp->sctp_adv_pap = adv_pap;
1488 			goto ftsn_done;
1489 		}
1490 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1491 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1492 			adv_pap = ntohl(sdc->sdh_tsn);
1493 			mp1 = mp1->b_next;
1494 		}
1495 		meta = meta->b_next;
1496 		if (meta != NULL) {
1497 			mp1 = meta->b_cont;
1498 			if (!SCTP_CHUNK_ISSENT(mp1))
1499 				break;
1500 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1501 		}
1502 	}
1503 ftsn_done:
1504 	/*
1505 	 * Can't compare with sets == NULL, since we don't add any
1506 	 * sets for un-ordered messages.
1507 	 */
1508 	if (meta == meta_head)
1509 		return;
1510 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1511 	sctp_free_ftsn_set(sets);
1512 	if (*nmp == NULL)
1513 		return;
1514 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1515 		sacklen = 0;
1516 	} else {
1517 		sacklen = sizeof (sctp_chunk_hdr_t) +
1518 		    sizeof (sctp_sack_chunk_t) +
1519 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1520 		if (*seglen + sacklen > sctp->sctp_lastdata->sf_pmss) {
1521 			/* piggybacked SACK doesn't fit */
1522 			sacklen = 0;
1523 		} else {
1524 			fp = sctp->sctp_lastdata;
1525 		}
1526 	}
1527 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1528 	if (head == NULL) {
1529 		freemsg(*nmp);
1530 		*nmp = NULL;
1531 		SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1532 		return;
1533 	}
1534 	*seglen += sacklen;
1535 	*nmp = head;
1536 
1537 	/*
1538 	 * XXXNeed to optimise this, the reason it is done here is so
1539 	 * that we don't have to undo in case of failure.
1540 	 */
1541 	mp1 = mp_head;
1542 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1543 	while (meta_head != NULL &&
1544 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1545 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
1546 			SCTP_MSG_SET_ABANDONED(meta_head);
1547 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1548 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1549 			if (!SCTP_CHUNK_ISACKED(mp1)) {
1550 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1551 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1552 				    meta_head);
1553 			}
1554 			mp1 = mp1->b_next;
1555 		}
1556 		while (mp1 != NULL) {
1557 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1558 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
1559 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1560 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1561 				SCTP_ABANDON_CHUNK(mp1);
1562 			}
1563 			mp1 = mp1->b_next;
1564 		}
1565 		meta_head = meta_head->b_next;
1566 		if (meta_head != NULL) {
1567 			mp1 = meta_head->b_cont;
1568 			if (!SCTP_CHUNK_ISSENT(mp1))
1569 				break;
1570 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1571 		}
1572 	}
1573 	if (unsent > 0) {
1574 		ASSERT(sctp->sctp_unsent >= unsent);
1575 		sctp->sctp_unsent -= unsent;
1576 		/*
1577 		 * Update ULP the amount of queued data, which is
1578 		 * sent-unack'ed + unsent.
1579 		 */
1580 		if (!SCTP_IS_DETACHED(sctp))
1581 			SCTP_TXQ_UPDATE(sctp);
1582 	}
1583 }
1584 
1585 /*
1586  * This function steps through messages starting at meta and checks if
1587  * the message is abandoned. It stops when it hits an unsent chunk or
1588  * a message that has all its chunk acked. This is the only place
1589  * where the sctp_adv_pap is moved forward to indicated abandoned
1590  * messages.
1591  */
1592 void
sctp_check_adv_ack_pt(sctp_t * sctp,mblk_t * meta,mblk_t * mp)1593 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1594 {
1595 	uint32_t	tsn = sctp->sctp_adv_pap;
1596 	sctp_data_hdr_t	*sdc;
1597 	sctp_msg_hdr_t	*msg_hdr;
1598 
1599 	ASSERT(mp != NULL);
1600 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1601 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1602 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1603 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
1604 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1605 		return;
1606 	}
1607 	while (meta != NULL) {
1608 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1609 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1610 			tsn = ntohl(sdc->sdh_tsn);
1611 			mp = mp->b_next;
1612 		}
1613 		if (mp != NULL)
1614 			break;
1615 		/*
1616 		 * We continue checking for successive messages only if there
1617 		 * is a chunk marked for retransmission. Else, we might
1618 		 * end up sending FTSN prematurely for chunks that have been
1619 		 * sent, but not yet acked.
1620 		 */
1621 		if ((meta = meta->b_next) != NULL) {
1622 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1623 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
1624 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1625 				break;
1626 			}
1627 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1628 				if (!SCTP_CHUNK_ISSENT(mp)) {
1629 					sctp->sctp_adv_pap = tsn;
1630 					return;
1631 				}
1632 				if (SCTP_CHUNK_WANT_REXMIT(mp))
1633 					break;
1634 			}
1635 			if (mp == NULL)
1636 				break;
1637 		}
1638 	}
1639 	sctp->sctp_adv_pap = tsn;
1640 }
1641 
1642 
1643 /*
1644  * Determine if we should bundle a data chunk with the chunk being
1645  * retransmitted.  We bundle if
1646  *
1647  * - the chunk is sent to the same destination and unack'ed.
1648  *
1649  * OR
1650  *
1651  * - the chunk is unsent, i.e. new data.
1652  */
1653 #define	SCTP_CHUNK_RX_CANBUNDLE(mp, fp)					\
1654 	(!SCTP_CHUNK_ABANDONED((mp)) && 				\
1655 	((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) &&	\
1656 	!SCTP_CHUNK_ISACKED(mp))) ||					\
1657 	(((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1658 	SCTP_CHUNK_FLAG_SENT)))
1659 
1660 /*
1661  * Retransmit first segment which hasn't been acked with cumtsn or send
1662  * a Forward TSN chunk, if appropriate.
1663  */
1664 void
sctp_rexmit(sctp_t * sctp,sctp_faddr_t * oldfp)1665 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1666 {
1667 	mblk_t		*mp;
1668 	mblk_t		*nmp = NULL;
1669 	mblk_t		*head;
1670 	mblk_t		*meta = sctp->sctp_xmit_head;
1671 	mblk_t		*fill;
1672 	uint32_t	seglen = 0;
1673 	uint32_t	sacklen;
1674 	uint16_t	chunklen;
1675 	int		extra;
1676 	sctp_data_hdr_t	*sdc;
1677 	sctp_faddr_t	*fp;
1678 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1679 	boolean_t	do_ftsn = B_FALSE;
1680 	boolean_t	ftsn_check = B_TRUE;
1681 	uint32_t	first_ua_tsn;
1682 	sctp_msg_hdr_t	*mhdr;
1683 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1684 	int		error;
1685 
1686 	while (meta != NULL) {
1687 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1688 			uint32_t	tsn;
1689 
1690 			if (!SCTP_CHUNK_ISSENT(mp))
1691 				goto window_probe;
1692 			/*
1693 			 * We break in the following cases -
1694 			 *
1695 			 *	if the advanced peer ack point includes the next
1696 			 *	chunk to be retransmited - possibly the Forward
1697 			 * 	TSN was lost.
1698 			 *
1699 			 *	if we are PRSCTP aware and the next chunk to be
1700 			 *	retransmitted is now abandoned
1701 			 *
1702 			 *	if the next chunk to be retransmitted is for
1703 			 *	the dest on which the timer went off. (this
1704 			 *	message is not abandoned).
1705 			 *
1706 			 * We check for Forward TSN only for the first
1707 			 * eligible chunk to be retransmitted. The reason
1708 			 * being if the first eligible chunk is skipped (say
1709 			 * it was sent to a destination other than oldfp)
1710 			 * then we cannot advance the cum TSN via Forward
1711 			 * TSN chunk.
1712 			 *
1713 			 * Also, ftsn_check is B_TRUE only for the first
1714 			 * eligible chunk, it  will be B_FALSE for all
1715 			 * subsequent candidate messages for retransmission.
1716 			 */
1717 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
1718 			tsn = ntohl(sdc->sdh_tsn);
1719 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1720 				if (sctp->sctp_prsctp_aware && ftsn_check) {
1721 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1722 						ASSERT(sctp->sctp_prsctp_aware);
1723 						do_ftsn = B_TRUE;
1724 						goto out;
1725 					} else {
1726 						sctp_check_adv_ack_pt(sctp,
1727 						    meta, mp);
1728 						if (SEQ_GT(sctp->sctp_adv_pap,
1729 						    adv_pap)) {
1730 							do_ftsn = B_TRUE;
1731 							goto out;
1732 						}
1733 					}
1734 					ftsn_check = B_FALSE;
1735 				}
1736 				if (SCTP_CHUNK_DEST(mp) == oldfp)
1737 					goto out;
1738 			}
1739 		}
1740 		meta = meta->b_next;
1741 		if (meta != NULL && sctp->sctp_prsctp_aware) {
1742 			mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1743 
1744 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1745 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1746 				meta = meta->b_next;
1747 			}
1748 		}
1749 	}
1750 window_probe:
1751 	/*
1752 	 * Retransmit fired for a destination which didn't have
1753 	 * any unacked data pending.
1754 	 */
1755 	if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
1756 		/*
1757 		 * Send a window probe. Inflate frwnd to allow
1758 		 * sending one segment.
1759 		 */
1760 		if (sctp->sctp_frwnd < (oldfp->sf_pmss - sizeof (*sdc)))
1761 			sctp->sctp_frwnd = oldfp->sf_pmss - sizeof (*sdc);
1762 
1763 		/* next TSN to send */
1764 		sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
1765 
1766 		/*
1767 		 * The above sctp_frwnd adjustment is coarse.  The "changed"
1768 		 * sctp_frwnd may allow us to send more than 1 packet.  So
1769 		 * tell sctp_output() to send only 1 packet.
1770 		 */
1771 		sctp_output(sctp, 1);
1772 
1773 		/* Last sent TSN */
1774 		sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1775 		ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
1776 		sctp->sctp_zero_win_probe = B_TRUE;
1777 		SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
1778 	}
1779 	return;
1780 out:
1781 	/*
1782 	 * After a time out, assume that everything has left the network.  So
1783 	 * we can clear rxt_unacked for the original peer address.
1784 	 */
1785 	oldfp->sf_rxt_unacked = 0;
1786 
1787 	/*
1788 	 * If we were probing for zero window, don't adjust retransmission
1789 	 * variables, but the timer is still backed off.
1790 	 */
1791 	if (sctp->sctp_zero_win_probe) {
1792 		mblk_t	*pkt;
1793 		uint_t	pkt_len;
1794 
1795 		/*
1796 		 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
1797 		 * and sctp_rxt_maxtsn will specify the ZWP packet.
1798 		 */
1799 		fp = oldfp;
1800 		if (oldfp->sf_state != SCTP_FADDRS_ALIVE)
1801 			fp = sctp_rotate_faddr(sctp, oldfp);
1802 		pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
1803 		if (pkt != NULL) {
1804 			ASSERT(pkt_len <= fp->sf_pmss);
1805 			sctp_set_iplen(sctp, pkt, fp->sf_ixa);
1806 			(void) conn_ip_output(pkt, fp->sf_ixa);
1807 			BUMP_LOCAL(sctp->sctp_opkts);
1808 		} else {
1809 			SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
1810 		}
1811 
1812 		/*
1813 		 * The strikes will be clear by sctp_faddr_alive() when the
1814 		 * other side sends us an ack.
1815 		 */
1816 		oldfp->sf_strikes++;
1817 		sctp->sctp_strikes++;
1818 
1819 		SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max);
1820 		if (oldfp != fp && oldfp->sf_suna != 0)
1821 			SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->sf_rto);
1822 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1823 		SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
1824 		return;
1825 	}
1826 
1827 	/*
1828 	 * Enter slowstart for this destination
1829 	 */
1830 	oldfp->sf_ssthresh = oldfp->sf_cwnd / 2;
1831 	if (oldfp->sf_ssthresh < 2 * oldfp->sf_pmss)
1832 		oldfp->sf_ssthresh = 2 * oldfp->sf_pmss;
1833 	oldfp->sf_cwnd = oldfp->sf_pmss;
1834 	oldfp->sf_pba = 0;
1835 	fp = sctp_rotate_faddr(sctp, oldfp);
1836 	ASSERT(fp != NULL);
1837 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
1838 
1839 	first_ua_tsn = ntohl(sdc->sdh_tsn);
1840 	if (do_ftsn) {
1841 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1842 		if (nmp == NULL) {
1843 			sctp->sctp_adv_pap = adv_pap;
1844 			goto restart_timer;
1845 		}
1846 		head = nmp;
1847 		/*
1848 		 * Move to the next unabandoned chunk. XXXCheck if meta will
1849 		 * always be marked abandoned.
1850 		 */
1851 		while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
1852 			meta = meta->b_next;
1853 		if (meta != NULL)
1854 			mp = mp->b_cont;
1855 		else
1856 			mp = NULL;
1857 		goto try_bundle;
1858 	}
1859 	seglen = ntohs(sdc->sdh_len);
1860 	chunklen = seglen - sizeof (*sdc);
1861 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1862 		extra = SCTP_ALIGN - extra;
1863 
1864 	/* Find out if we need to piggyback SACK. */
1865 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1866 		sacklen = 0;
1867 	} else {
1868 		sacklen = sizeof (sctp_chunk_hdr_t) +
1869 		    sizeof (sctp_sack_chunk_t) +
1870 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1871 		if (seglen + sacklen > sctp->sctp_lastdata->sf_pmss) {
1872 			/* piggybacked SACK doesn't fit */
1873 			sacklen = 0;
1874 		} else {
1875 			/*
1876 			 * OK, we have room to send SACK back.  But we
1877 			 * should send it back to the last fp where we
1878 			 * receive data from, unless sctp_lastdata equals
1879 			 * oldfp, then we should probably not send it
1880 			 * back to that fp.  Also we should check that
1881 			 * the fp is alive.
1882 			 */
1883 			if (sctp->sctp_lastdata != oldfp &&
1884 			    sctp->sctp_lastdata->sf_state ==
1885 			    SCTP_FADDRS_ALIVE) {
1886 				fp = sctp->sctp_lastdata;
1887 			}
1888 		}
1889 	}
1890 
1891 	/*
1892 	 * Cancel RTT measurement if the retransmitted TSN is before the
1893 	 * TSN used for timimg.
1894 	 */
1895 	if (sctp->sctp_out_time != 0 &&
1896 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1897 		sctp->sctp_out_time = 0;
1898 	}
1899 	/* Clear the counter as the RTT calculation may be off. */
1900 	fp->sf_rtt_updates = 0;
1901 	oldfp->sf_rtt_updates = 0;
1902 
1903 	/*
1904 	 * After a timeout, we should change the current faddr so that
1905 	 * new chunks will be sent to the alternate address.
1906 	 */
1907 	sctp_set_faddr_current(sctp, fp);
1908 
1909 	nmp = dupmsg(mp);
1910 	if (nmp == NULL)
1911 		goto restart_timer;
1912 	if (extra > 0) {
1913 		fill = sctp_get_padding(sctp, extra);
1914 		if (fill != NULL) {
1915 			linkb(nmp, fill);
1916 			seglen += extra;
1917 		} else {
1918 			freemsg(nmp);
1919 			goto restart_timer;
1920 		}
1921 	}
1922 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
1923 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1924 	if (head == NULL) {
1925 		freemsg(nmp);
1926 		SCTP_KSTAT(sctps, sctp_rexmit_failed);
1927 		goto restart_timer;
1928 	}
1929 	seglen += sacklen;
1930 
1931 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1932 
1933 	mp = mp->b_next;
1934 
1935 try_bundle:
1936 	/* We can at least and at most send 1 packet at timeout. */
1937 	while (seglen < fp->sf_pmss) {
1938 		int32_t new_len;
1939 
1940 		/* Go through the list to find more chunks to be bundled. */
1941 		while (mp != NULL) {
1942 			/* Check if the chunk can be bundled. */
1943 			if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
1944 				break;
1945 			mp = mp->b_next;
1946 		}
1947 		/* Go to the next message. */
1948 		if (mp == NULL) {
1949 			for (meta = meta->b_next; meta != NULL;
1950 			    meta = meta->b_next) {
1951 				mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1952 
1953 				if (SCTP_IS_MSG_ABANDONED(meta) ||
1954 				    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
1955 				    sctp)) {
1956 					continue;
1957 				}
1958 
1959 				mp = meta->b_cont;
1960 				goto try_bundle;
1961 			}
1962 			/*
1963 			 * Check if there is a new message which potentially
1964 			 * could be bundled with this retransmission.
1965 			 */
1966 			meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error,
1967 			    seglen, fp->sf_pmss - seglen, NULL);
1968 			if (error != 0 || meta == NULL) {
1969 				/* No more chunk to be bundled. */
1970 				break;
1971 			} else {
1972 				goto try_bundle;
1973 			}
1974 		}
1975 
1976 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
1977 		new_len = ntohs(sdc->sdh_len);
1978 		chunklen = new_len - sizeof (*sdc);
1979 
1980 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
1981 			extra = SCTP_ALIGN - extra;
1982 		if ((new_len = seglen + new_len + extra) > fp->sf_pmss)
1983 			break;
1984 		if ((nmp = dupmsg(mp)) == NULL)
1985 			break;
1986 
1987 		if (extra > 0) {
1988 			fill = sctp_get_padding(sctp, extra);
1989 			if (fill != NULL) {
1990 				linkb(nmp, fill);
1991 			} else {
1992 				freemsg(nmp);
1993 				break;
1994 			}
1995 		}
1996 		linkb(head, nmp);
1997 
1998 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
1999 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
2000 
2001 		seglen = new_len;
2002 		mp = mp->b_next;
2003 	}
2004 done_bundle:
2005 	if ((seglen > fp->sf_pmss) && fp->sf_isv4) {
2006 		ipha_t *iph = (ipha_t *)head->b_rptr;
2007 
2008 		/*
2009 		 * Path MTU is different from path we thought it would
2010 		 * be when we created chunks, or IP headers have grown.
2011 		 * Need to clear the DF bit.
2012 		 */
2013 		iph->ipha_fragment_offset_and_flags = 0;
2014 	}
2015 	fp->sf_rxt_unacked += seglen;
2016 
2017 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
2018 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
2019 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
2020 	    (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
2021 
2022 	sctp->sctp_rexmitting = B_TRUE;
2023 	sctp->sctp_rxt_nxttsn = first_ua_tsn;
2024 	sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
2025 	sctp_set_iplen(sctp, head, fp->sf_ixa);
2026 	(void) conn_ip_output(head, fp->sf_ixa);
2027 	BUMP_LOCAL(sctp->sctp_opkts);
2028 
2029 	/*
2030 	 * Restart the oldfp timer with exponential backoff and
2031 	 * the new fp timer for the retransmitted chunks.
2032 	 */
2033 restart_timer:
2034 	oldfp->sf_strikes++;
2035 	sctp->sctp_strikes++;
2036 	SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max);
2037 	/*
2038 	 * If there is still some data in the oldfp, restart the
2039 	 * retransmission timer.  If there is no data, the heartbeat will
2040 	 * continue to run so it will do its job in checking the reachability
2041 	 * of the oldfp.
2042 	 */
2043 	if (oldfp != fp && oldfp->sf_suna != 0)
2044 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->sf_rto);
2045 
2046 	/*
2047 	 * Should we restart the timer of the new fp?  If there is
2048 	 * outstanding data to the new fp, the timer should be
2049 	 * running already.  So restarting it means that the timer
2050 	 * will fire later for those outstanding data.  But if
2051 	 * we don't restart it, the timer will fire too early for the
2052 	 * just retransmitted chunks to the new fp.  The reason is that we
2053 	 * don't keep a timestamp on when a chunk is retransmitted.
2054 	 * So when the timer fires, it will just search for the
2055 	 * chunk with the earliest TSN sent to new fp.  This probably
2056 	 * is the chunk we just retransmitted.  So for now, let's
2057 	 * be conservative and restart the timer of the new fp.
2058 	 */
2059 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2060 
2061 	sctp->sctp_active = ddi_get_lbolt64();
2062 }
2063 
2064 /*
2065  * This function is called by sctp_ss_rexmit() to create a packet
2066  * to be retransmitted to the given fp.  The given meta and mp
2067  * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2068  * first chunk to be retransmitted.  This is also called when we want
2069  * to retransmit a zero window probe from sctp_rexmit() or when we
2070  * want to retransmit the zero window probe after the window has
2071  * opened from sctp_got_sack().
2072  */
2073 mblk_t *
sctp_rexmit_packet(sctp_t * sctp,mblk_t ** meta,mblk_t ** mp,sctp_faddr_t * fp,uint_t * packet_len)2074 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
2075     uint_t *packet_len)
2076 {
2077 	uint32_t	seglen = 0;
2078 	uint16_t	chunklen;
2079 	int		extra;
2080 	mblk_t		*nmp;
2081 	mblk_t		*head;
2082 	mblk_t		*fill;
2083 	sctp_data_hdr_t	*sdc;
2084 	sctp_msg_hdr_t	*mhdr;
2085 
2086 	sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2087 	seglen = ntohs(sdc->sdh_len);
2088 	chunklen = seglen - sizeof (*sdc);
2089 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
2090 		extra = SCTP_ALIGN - extra;
2091 
2092 	nmp = dupmsg(*mp);
2093 	if (nmp == NULL)
2094 		return (NULL);
2095 	if (extra > 0) {
2096 		fill = sctp_get_padding(sctp, extra);
2097 		if (fill != NULL) {
2098 			linkb(nmp, fill);
2099 			seglen += extra;
2100 		} else {
2101 			freemsg(nmp);
2102 			return (NULL);
2103 		}
2104 	}
2105 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
2106 	head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
2107 	if (head == NULL) {
2108 		freemsg(nmp);
2109 		return (NULL);
2110 	}
2111 	SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2112 	/*
2113 	 * Don't update the TSN if we are doing a Zero Win Probe.
2114 	 */
2115 	if (!sctp->sctp_zero_win_probe)
2116 		sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2117 	*mp = (*mp)->b_next;
2118 
2119 try_bundle:
2120 	while (seglen < fp->sf_pmss) {
2121 		int32_t new_len;
2122 
2123 		/*
2124 		 * Go through the list to find more chunks to be bundled.
2125 		 * We should only retransmit sent by unack'ed chunks.  Since
2126 		 * they were sent before, the peer's receive window should
2127 		 * be able to receive them.
2128 		 */
2129 		while (*mp != NULL) {
2130 			/* Check if the chunk can be bundled. */
2131 			if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
2132 				break;
2133 			*mp = (*mp)->b_next;
2134 		}
2135 		/* Go to the next message. */
2136 		if (*mp == NULL) {
2137 			for (*meta = (*meta)->b_next; *meta != NULL;
2138 			    *meta = (*meta)->b_next) {
2139 				mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
2140 
2141 				if (SCTP_IS_MSG_ABANDONED(*meta) ||
2142 				    SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
2143 				    sctp)) {
2144 					continue;
2145 				}
2146 
2147 				*mp = (*meta)->b_cont;
2148 				goto try_bundle;
2149 			}
2150 			/* No more chunk to be bundled. */
2151 			break;
2152 		}
2153 
2154 		sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2155 		/* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2156 		if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
2157 			break;
2158 		new_len = ntohs(sdc->sdh_len);
2159 		chunklen = new_len - sizeof (*sdc);
2160 
2161 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
2162 			extra = SCTP_ALIGN - extra;
2163 		if ((new_len = seglen + new_len + extra) > fp->sf_pmss)
2164 			break;
2165 		if ((nmp = dupmsg(*mp)) == NULL)
2166 			break;
2167 
2168 		if (extra > 0) {
2169 			fill = sctp_get_padding(sctp, extra);
2170 			if (fill != NULL) {
2171 				linkb(nmp, fill);
2172 			} else {
2173 				freemsg(nmp);
2174 				break;
2175 			}
2176 		}
2177 		linkb(head, nmp);
2178 
2179 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
2180 		SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2181 		/*
2182 		 * Don't update the TSN if we are doing a Zero Win Probe.
2183 		 */
2184 		if (!sctp->sctp_zero_win_probe)
2185 			sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2186 
2187 		seglen = new_len;
2188 		*mp = (*mp)->b_next;
2189 	}
2190 	*packet_len = seglen;
2191 	fp->sf_rxt_unacked += seglen;
2192 	return (head);
2193 }
2194 
2195 /*
2196  * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2197  * advances the cum_tsn but the cum_tsn is still less than what we have sent
2198  * (sctp_rxt_maxtsn) at the time of the timeout.  This SACK is a "partial"
2199  * SACK.  We retransmit unacked chunks without having to wait for another
2200  * timeout.  The rationale is that the SACK should not be "partial" if all the
2201  * lost chunks have been retransmitted.  Since the SACK is "partial,"
2202  * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2203  * be missing.  It is better for us to retransmit them now instead
2204  * of waiting for a timeout.
2205  */
2206 void
sctp_ss_rexmit(sctp_t * sctp)2207 sctp_ss_rexmit(sctp_t *sctp)
2208 {
2209 	mblk_t		*meta;
2210 	mblk_t		*mp;
2211 	mblk_t		*pkt;
2212 	sctp_faddr_t	*fp;
2213 	uint_t		pkt_len;
2214 	uint32_t	tot_wnd;
2215 	sctp_data_hdr_t	*sdc;
2216 	int		burst;
2217 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2218 
2219 	ASSERT(!sctp->sctp_zero_win_probe);
2220 
2221 	/*
2222 	 * If the last cum ack is smaller than what we have just
2223 	 * retransmitted, simply return.
2224 	 */
2225 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
2226 		sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
2227 	else
2228 		return;
2229 	ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
2230 
2231 	/*
2232 	 * After a timer fires, sctp_current should be set to the new
2233 	 * fp where the retransmitted chunks are sent.
2234 	 */
2235 	fp = sctp->sctp_current;
2236 
2237 	/*
2238 	 * Since we are retransmitting, we only need to use cwnd to determine
2239 	 * how much we can send as we were allowed (by peer's receive window)
2240 	 * to send those retransmitted chunks previously when they are first
2241 	 * sent.  If we record how much we have retransmitted but
2242 	 * unacknowledged using rxt_unacked, then the amount we can now send
2243 	 * is equal to cwnd minus rxt_unacked.
2244 	 *
2245 	 * The field rxt_unacked is incremented when we retransmit a packet
2246 	 * and decremented when we got a SACK acknowledging something.  And
2247 	 * it is reset when the retransmission timer fires as we assume that
2248 	 * all packets have left the network after a timeout.  If this
2249 	 * assumption is not true, it means that after a timeout, we can
2250 	 * get a SACK acknowledging more than rxt_unacked (its value only
2251 	 * contains what is retransmitted when the timer fires).  So
2252 	 * rxt_unacked will become very big (it is an unsiged int so going
2253 	 * negative means that the value is huge).  This is the reason we
2254 	 * always send at least 1 MSS bytes.
2255 	 *
2256 	 * The reason why we do not have an accurate count is that we
2257 	 * only know how many packets are outstanding (using the TSN numbers).
2258 	 * But we do not know how many bytes those packets contain.  To
2259 	 * have an accurate count, we need to walk through the send list.
2260 	 * As it is not really important to have an accurate count during
2261 	 * retransmission, we skip this walk to save some time.  This should
2262 	 * not make the retransmission too aggressive to cause congestion.
2263 	 */
2264 	if (fp->sf_cwnd <= fp->sf_rxt_unacked)
2265 		tot_wnd = fp->sf_pmss;
2266 	else
2267 		tot_wnd = fp->sf_cwnd - fp->sf_rxt_unacked;
2268 
2269 	/* Find the first unack'ed chunk */
2270 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
2271 		sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
2272 
2273 		if (SCTP_IS_MSG_ABANDONED(meta) ||
2274 		    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
2275 			continue;
2276 		}
2277 
2278 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2279 			/* Again, this may not be possible */
2280 			if (!SCTP_CHUNK_ISSENT(mp))
2281 				return;
2282 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2283 			if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
2284 				goto found_msg;
2285 		}
2286 	}
2287 
2288 	/* Everything is abandoned... */
2289 	return;
2290 
2291 found_msg:
2292 	if (!fp->sf_timer_running)
2293 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2294 	pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
2295 	if (pkt == NULL) {
2296 		SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2297 		return;
2298 	}
2299 	if ((pkt_len > fp->sf_pmss) && fp->sf_isv4) {
2300 		ipha_t	*iph = (ipha_t *)pkt->b_rptr;
2301 
2302 		/*
2303 		 * Path MTU is different from path we thought it would
2304 		 * be when we created chunks, or IP headers have grown.
2305 		 *  Need to clear the DF bit.
2306 		 */
2307 		iph->ipha_fragment_offset_and_flags = 0;
2308 	}
2309 	sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2310 	(void) conn_ip_output(pkt, fp->sf_ixa);
2311 	BUMP_LOCAL(sctp->sctp_opkts);
2312 
2313 	/* Check and see if there is more chunk to be retransmitted. */
2314 	if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sf_pmss ||
2315 	    meta == NULL)
2316 		return;
2317 	if (mp == NULL)
2318 		meta = meta->b_next;
2319 	if (meta == NULL)
2320 		return;
2321 
2322 	/* Retransmit another packet if the window allows. */
2323 	for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
2324 	    meta != NULL && burst > 0; meta = meta->b_next, burst--) {
2325 		if (mp == NULL)
2326 			mp = meta->b_cont;
2327 		for (; mp != NULL; mp = mp->b_next) {
2328 			/* Again, this may not be possible */
2329 			if (!SCTP_CHUNK_ISSENT(mp))
2330 				return;
2331 			if (!SCTP_CHUNK_ISACKED(mp))
2332 				goto found_msg;
2333 		}
2334 	}
2335 }
2336