xref: /illumos-gate/usr/src/uts/common/inet/sctp/sctp_input.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2024 Oxide Computer Company
25  */
26 
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/stream.h>
30 #include <sys/cmn_err.h>
31 #include <sys/kmem.h>
32 #define	_SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/socket.h>
35 #include <sys/strsun.h>
36 #include <sys/strsubr.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/ip6.h>
40 #include <netinet/tcp_seq.h>
41 #include <netinet/sctp.h>
42 
43 #include <inet/common.h>
44 #include <inet/ip.h>
45 #include <inet/ip_if.h>
46 #include <inet/ip6.h>
47 #include <inet/mib2.h>
48 #include <inet/ipclassifier.h>
49 #include <inet/ipp_common.h>
50 #include <inet/ipsec_impl.h>
51 #include <inet/sctp_ip.h>
52 
53 #include "sctp_impl.h"
54 #include "sctp_asconf.h"
55 #include "sctp_addr.h"
56 
57 static struct kmem_cache *sctp_kmem_set_cache;
58 
59 /*
60  * PR-SCTP comments.
61  *
62  * When we get a valid Forward TSN chunk, we check the fragment list for this
63  * SSN and preceeding SSNs free all them. Further, if this Forward TSN causes
64  * the next expected SSN to be present in the stream queue, we deliver any
65  * such stranded messages upstream. We also update the SACK info. appropriately.
66  * When checking for advancing the cumulative ack (in sctp_cumack()) we must
67  * check for abandoned chunks and messages. While traversing the tramsmit
68  * list if we come across an abandoned chunk, we can skip the message (i.e.
69  * take it out of the (re)transmit list) since this message, and hence this
70  * chunk, has been marked abandoned by sctp_rexmit(). If we come across an
71  * unsent chunk for a message this now abandoned we need to check if a
72  * Forward TSN needs to be sent, this could be a case where we deferred sending
73  * a Forward TSN in sctp_get_msg_to_send(). Further, after processing a
74  * SACK we check if the Advanced peer ack point can be moved ahead, i.e.
75  * if we can send a Forward TSN via sctp_check_abandoned_data().
76  */
77 void
78 sctp_free_set(sctp_set_t *s)
79 {
80 	sctp_set_t *p;
81 
82 	while (s) {
83 		p = s->next;
84 		kmem_cache_free(sctp_kmem_set_cache, s);
85 		s = p;
86 	}
87 }
88 
89 static void
90 sctp_ack_add(sctp_set_t **head, uint32_t tsn, int *num)
91 {
92 	sctp_set_t *p, *t;
93 
94 	if (head == NULL || num == NULL)
95 		return;
96 
97 	ASSERT(*num >= 0);
98 	ASSERT((*num == 0 && *head == NULL) || (*num > 0 && *head != NULL));
99 
100 	if (*head == NULL) {
101 		*head = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
102 		if (*head == NULL)
103 			return;
104 		(*head)->prev = (*head)->next = NULL;
105 		(*head)->begin = tsn;
106 		(*head)->end = tsn;
107 		*num = 1;
108 		return;
109 	}
110 
111 	ASSERT((*head)->prev == NULL);
112 
113 	/*
114 	 * Handle this special case here so we don't have to check
115 	 * for it each time in the loop.
116 	 */
117 	if (SEQ_LT(tsn + 1, (*head)->begin)) {
118 		/* add a new set, and move the head pointer */
119 		t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
120 		if (t == NULL)
121 			return;
122 		t->next = *head;
123 		t->prev = NULL;
124 		(*head)->prev = t;
125 		t->begin = tsn;
126 		t->end = tsn;
127 		(*num)++;
128 		*head = t;
129 		return;
130 	}
131 
132 	/*
133 	 * We need to handle the following cases, where p points to
134 	 * the current set (as we walk through the loop):
135 	 *
136 	 * 1. tsn is entirely less than p; create a new set before p.
137 	 * 2. tsn borders p from less; coalesce p with tsn.
138 	 * 3. tsn is withing p; do nothing.
139 	 * 4. tsn borders p from greater; coalesce p with tsn.
140 	 * 4a. p may now border p->next from less; if so, coalesce those
141 	 *    two sets.
142 	 * 5. tsn is entirely greater then all sets; add a new set at
143 	 *    the end.
144 	 */
145 	for (p = *head; ; p = p->next) {
146 		if (SEQ_LT(tsn + 1, p->begin)) {
147 			/* 1: add a new set before p. */
148 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
149 			if (t == NULL)
150 				return;
151 			t->next = p;
152 			t->prev = NULL;
153 			t->begin = tsn;
154 			t->end = tsn;
155 			if (p->prev) {
156 				t->prev = p->prev;
157 				p->prev->next = t;
158 			}
159 			p->prev = t;
160 			(*num)++;
161 			return;
162 		}
163 
164 		if ((tsn + 1) == p->begin) {
165 			/* 2: adjust p->begin */
166 			p->begin = tsn;
167 			return;
168 		}
169 
170 		if (SEQ_GEQ(tsn, p->begin) && SEQ_LEQ(tsn, p->end)) {
171 			/* 3; do nothing */
172 			return;
173 		}
174 
175 		if ((p->end + 1) == tsn) {
176 			/* 4; adjust p->end */
177 			p->end = tsn;
178 
179 			if (p->next != NULL && (tsn + 1) == p->next->begin) {
180 				/* 4a: coalesce p and p->next */
181 				t = p->next;
182 				p->end = t->end;
183 				p->next = t->next;
184 				if (t->next != NULL)
185 					t->next->prev = p;
186 				kmem_cache_free(sctp_kmem_set_cache, t);
187 				(*num)--;
188 			}
189 			return;
190 		}
191 
192 		if (p->next == NULL) {
193 			/* 5: add new set at the end */
194 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
195 			if (t == NULL)
196 				return;
197 			t->next = NULL;
198 			t->prev = p;
199 			t->begin = tsn;
200 			t->end = tsn;
201 			p->next = t;
202 			(*num)++;
203 			return;
204 		}
205 
206 		if (SEQ_GT(tsn, p->end + 1))
207 			continue;
208 	}
209 }
210 
211 static void
212 sctp_ack_rem(sctp_set_t **head, uint32_t end, int *num)
213 {
214 	sctp_set_t *p, *t;
215 
216 	if (head == NULL || *head == NULL || num == NULL)
217 		return;
218 
219 	/* Nothing to remove */
220 	if (SEQ_LT(end, (*head)->begin))
221 		return;
222 
223 	/* Find out where to start removing sets */
224 	for (p = *head; p->next; p = p->next) {
225 		if (SEQ_LEQ(end, p->end))
226 			break;
227 	}
228 
229 	if (SEQ_LT(end, p->end) && SEQ_GEQ(end, p->begin)) {
230 		/* adjust p */
231 		p->begin = end + 1;
232 		/* all done */
233 		if (p == *head)
234 			return;
235 	} else if (SEQ_GEQ(end, p->end)) {
236 		/* remove this set too */
237 		p = p->next;
238 	}
239 
240 	/* unlink everything before this set */
241 	t = *head;
242 	*head = p;
243 	if (p != NULL && p->prev != NULL) {
244 		p->prev->next = NULL;
245 		p->prev = NULL;
246 	}
247 
248 	sctp_free_set(t);
249 
250 	/* recount the number of sets */
251 	*num = 0;
252 
253 	for (p = *head; p != NULL; p = p->next)
254 		(*num)++;
255 }
256 
257 void
258 sctp_sets_init()
259 {
260 	sctp_kmem_set_cache = kmem_cache_create("sctp_set_cache",
261 	    sizeof (sctp_set_t), 0, NULL, NULL, NULL, NULL,
262 	    NULL, 0);
263 }
264 
265 void
266 sctp_sets_fini()
267 {
268 	kmem_cache_destroy(sctp_kmem_set_cache);
269 }
270 
271 sctp_chunk_hdr_t *
272 sctp_first_chunk(uchar_t *rptr, ssize_t remaining)
273 {
274 	sctp_chunk_hdr_t *ch;
275 	uint16_t ch_len;
276 
277 	if (remaining < sizeof (*ch)) {
278 		return (NULL);
279 	}
280 
281 	ch = (sctp_chunk_hdr_t *)rptr;
282 	ch_len = ntohs(ch->sch_len);
283 
284 	if (ch_len < sizeof (*ch) || remaining < ch_len) {
285 		return (NULL);
286 	}
287 
288 	return (ch);
289 }
290 
291 sctp_chunk_hdr_t *
292 sctp_next_chunk(sctp_chunk_hdr_t *ch, ssize_t *remaining)
293 {
294 	int pad;
295 	uint16_t ch_len;
296 
297 	if (!ch) {
298 		return (NULL);
299 	}
300 
301 	ch_len = ntohs(ch->sch_len);
302 
303 	if ((pad = ch_len & (SCTP_ALIGN - 1)) != 0) {
304 		pad = SCTP_ALIGN - pad;
305 	}
306 
307 	*remaining -= (ch_len + pad);
308 	ch = (sctp_chunk_hdr_t *)((char *)ch + ch_len + pad);
309 
310 	return (sctp_first_chunk((uchar_t *)ch, *remaining));
311 }
312 
313 /*
314  * Attach ancillary data to a received SCTP segments.
315  * If the source address (fp) is not the primary, send up a
316  * unitdata_ind so recvfrom() can populate the msg_name field.
317  * If ancillary data is also requested, we append it to the
318  * unitdata_req. Otherwise, we just send up an optdata_ind.
319  */
320 static int
321 sctp_input_add_ancillary(sctp_t *sctp, mblk_t **mp, sctp_data_hdr_t *dcp,
322     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
323 {
324 	struct T_unitdata_ind	*tudi;
325 	int			optlen;
326 	int			hdrlen;
327 	uchar_t			*optptr;
328 	struct cmsghdr		*cmsg;
329 	mblk_t			*mp1;
330 	struct sockaddr_in6	sin_buf[1];
331 	struct sockaddr_in6	*sin6;
332 	struct sockaddr_in	*sin4;
333 	crb_t			 addflag;	/* Which pieces to add */
334 	conn_t			*connp = sctp->sctp_connp;
335 
336 	sin4 = NULL;
337 	sin6 = NULL;
338 
339 	optlen = hdrlen = 0;
340 	addflag.crb_all = 0;
341 
342 	/* Figure out address size */
343 	if (connp->conn_family == AF_INET) {
344 		sin4 = (struct sockaddr_in *)sin_buf;
345 		sin4->sin_family = AF_INET;
346 		sin4->sin_port = connp->conn_fport;
347 		IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, sin4->sin_addr.s_addr);
348 		hdrlen = sizeof (*tudi) + sizeof (*sin4);
349 	} else {
350 		sin6 = sin_buf;
351 		sin6->sin6_family = AF_INET6;
352 		sin6->sin6_port = connp->conn_fport;
353 		sin6->sin6_addr = fp->sf_faddr;
354 		hdrlen = sizeof (*tudi) + sizeof (*sin6);
355 	}
356 	/* If app asked to receive send / recv info */
357 	if (sctp->sctp_recvsndrcvinfo)
358 		optlen += sizeof (*cmsg) + sizeof (struct sctp_sndrcvinfo);
359 
360 	if (connp->conn_recv_ancillary.crb_all == 0)
361 		goto noancillary;
362 
363 	if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
364 	    ira->ira_ruifindex != sctp->sctp_recvifindex) {
365 		optlen += sizeof (*cmsg) + sizeof (struct in6_pktinfo);
366 		if (hdrlen == 0)
367 			hdrlen = sizeof (struct T_unitdata_ind);
368 		addflag.crb_ip_recvpktinfo = 1;
369 	}
370 	/* If app asked for hoplimit and it has changed ... */
371 	if (connp->conn_recv_ancillary.crb_ipv6_recvhoplimit &&
372 	    ipp->ipp_hoplimit != sctp->sctp_recvhops) {
373 		optlen += sizeof (*cmsg) + sizeof (uint_t);
374 		if (hdrlen == 0)
375 			hdrlen = sizeof (struct T_unitdata_ind);
376 		addflag.crb_ipv6_recvhoplimit = 1;
377 	}
378 	/* If app asked for tclass and it has changed ... */
379 	if (connp->conn_recv_ancillary.crb_ipv6_recvtclass &&
380 	    ipp->ipp_tclass != sctp->sctp_recvtclass) {
381 		optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
382 		if (hdrlen == 0)
383 			hdrlen = sizeof (struct T_unitdata_ind);
384 		addflag.crb_ipv6_recvtclass = 1;
385 	}
386 	/* If app asked for hopbyhop headers and it has changed ... */
387 	if (connp->conn_recv_ancillary.crb_ipv6_recvhopopts &&
388 	    ip_cmpbuf(sctp->sctp_hopopts, sctp->sctp_hopoptslen,
389 	    (ipp->ipp_fields & IPPF_HOPOPTS),
390 	    ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
391 		optlen += sizeof (*cmsg) + ipp->ipp_hopoptslen -
392 		    sctp->sctp_v6label_len;
393 		if (hdrlen == 0)
394 			hdrlen = sizeof (struct T_unitdata_ind);
395 		addflag.crb_ipv6_recvhopopts = 1;
396 		if (!ip_allocbuf((void **)&sctp->sctp_hopopts,
397 		    &sctp->sctp_hopoptslen,
398 		    (ipp->ipp_fields & IPPF_HOPOPTS),
399 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen))
400 			return (-1);
401 	}
402 	/* If app asked for dst headers before routing headers ... */
403 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts &&
404 	    ip_cmpbuf(sctp->sctp_rthdrdstopts, sctp->sctp_rthdrdstoptslen,
405 	    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
406 	    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen)) {
407 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
408 		if (hdrlen == 0)
409 			hdrlen = sizeof (struct T_unitdata_ind);
410 		addflag.crb_ipv6_recvrthdrdstopts = 1;
411 		if (!ip_allocbuf((void **)&sctp->sctp_rthdrdstopts,
412 		    &sctp->sctp_rthdrdstoptslen,
413 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
414 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen))
415 			return (-1);
416 	}
417 	/* If app asked for routing headers and it has changed ... */
418 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdr &&
419 	    ip_cmpbuf(sctp->sctp_rthdr, sctp->sctp_rthdrlen,
420 	    (ipp->ipp_fields & IPPF_RTHDR),
421 	    ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
422 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrlen;
423 		if (hdrlen == 0)
424 			hdrlen = sizeof (struct T_unitdata_ind);
425 		addflag.crb_ipv6_recvrthdr = 1;
426 		if (!ip_allocbuf((void **)&sctp->sctp_rthdr,
427 		    &sctp->sctp_rthdrlen,
428 		    (ipp->ipp_fields & IPPF_RTHDR),
429 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen))
430 			return (-1);
431 	}
432 	/* If app asked for dest headers and it has changed ... */
433 	if (connp->conn_recv_ancillary.crb_ipv6_recvdstopts &&
434 	    ip_cmpbuf(sctp->sctp_dstopts, sctp->sctp_dstoptslen,
435 	    (ipp->ipp_fields & IPPF_DSTOPTS),
436 	    ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
437 		optlen += sizeof (*cmsg) + ipp->ipp_dstoptslen;
438 		if (hdrlen == 0)
439 			hdrlen = sizeof (struct T_unitdata_ind);
440 		addflag.crb_ipv6_recvdstopts = 1;
441 		if (!ip_allocbuf((void **)&sctp->sctp_dstopts,
442 		    &sctp->sctp_dstoptslen,
443 		    (ipp->ipp_fields & IPPF_DSTOPTS),
444 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen))
445 			return (-1);
446 	}
447 noancillary:
448 	/* Nothing to add */
449 	if (hdrlen == 0)
450 		return (-1);
451 
452 	mp1 = allocb(hdrlen + optlen + sizeof (void *), BPRI_MED);
453 	if (mp1 == NULL)
454 		return (-1);
455 	mp1->b_cont = *mp;
456 	*mp = mp1;
457 	mp1->b_rptr += sizeof (void *);  /* pointer worth of padding */
458 	mp1->b_wptr = mp1->b_rptr + hdrlen + optlen;
459 	DB_TYPE(mp1) = M_PROTO;
460 	tudi = (struct T_unitdata_ind *)mp1->b_rptr;
461 	tudi->PRIM_type = T_UNITDATA_IND;
462 	tudi->SRC_length = sin4 ? sizeof (*sin4) : sizeof (*sin6);
463 	tudi->SRC_offset = sizeof (*tudi);
464 	tudi->OPT_offset = sizeof (*tudi) + tudi->SRC_length;
465 	tudi->OPT_length = optlen;
466 	if (sin4) {
467 		bcopy(sin4, tudi + 1, sizeof (*sin4));
468 	} else {
469 		bcopy(sin6, tudi + 1, sizeof (*sin6));
470 	}
471 	optptr = (uchar_t *)tudi + tudi->OPT_offset;
472 
473 	if (sctp->sctp_recvsndrcvinfo) {
474 		/* XXX need backout method if memory allocation fails. */
475 		struct sctp_sndrcvinfo *sri;
476 
477 		cmsg = (struct cmsghdr *)optptr;
478 		cmsg->cmsg_level = IPPROTO_SCTP;
479 		cmsg->cmsg_type = SCTP_SNDRCV;
480 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*sri);
481 		optptr += sizeof (*cmsg);
482 
483 		sri = (struct sctp_sndrcvinfo *)(cmsg + 1);
484 		ASSERT(OK_32PTR(sri));
485 		sri->sinfo_stream = ntohs(dcp->sdh_sid);
486 		sri->sinfo_ssn = ntohs(dcp->sdh_ssn);
487 		if (SCTP_DATA_GET_UBIT(dcp)) {
488 			sri->sinfo_flags = MSG_UNORDERED;
489 		} else {
490 			sri->sinfo_flags = 0;
491 		}
492 		sri->sinfo_ppid = dcp->sdh_payload_id;
493 		sri->sinfo_context = 0;
494 		sri->sinfo_timetolive = 0;
495 		sri->sinfo_tsn = ntohl(dcp->sdh_tsn);
496 		sri->sinfo_cumtsn = sctp->sctp_ftsn;
497 		sri->sinfo_assoc_id = 0;
498 
499 		optptr += sizeof (*sri);
500 	}
501 
502 	/*
503 	 * If app asked for pktinfo and the index has changed ...
504 	 * Note that the local address never changes for the connection.
505 	 */
506 	if (addflag.crb_ip_recvpktinfo) {
507 		struct in6_pktinfo *pkti;
508 		uint_t ifindex;
509 
510 		ifindex = ira->ira_ruifindex;
511 		cmsg = (struct cmsghdr *)optptr;
512 		cmsg->cmsg_level = IPPROTO_IPV6;
513 		cmsg->cmsg_type = IPV6_PKTINFO;
514 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*pkti);
515 		optptr += sizeof (*cmsg);
516 
517 		pkti = (struct in6_pktinfo *)optptr;
518 		if (connp->conn_family == AF_INET6)
519 			pkti->ipi6_addr = sctp->sctp_ip6h->ip6_src;
520 		else
521 			IN6_IPADDR_TO_V4MAPPED(sctp->sctp_ipha->ipha_src,
522 			    &pkti->ipi6_addr);
523 
524 		pkti->ipi6_ifindex = ifindex;
525 		optptr += sizeof (*pkti);
526 		ASSERT(OK_32PTR(optptr));
527 		/* Save as "last" value */
528 		sctp->sctp_recvifindex = ifindex;
529 	}
530 	/* If app asked for hoplimit and it has changed ... */
531 	if (addflag.crb_ipv6_recvhoplimit) {
532 		cmsg = (struct cmsghdr *)optptr;
533 		cmsg->cmsg_level = IPPROTO_IPV6;
534 		cmsg->cmsg_type = IPV6_HOPLIMIT;
535 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
536 		optptr += sizeof (*cmsg);
537 
538 		*(uint_t *)optptr = ipp->ipp_hoplimit;
539 		optptr += sizeof (uint_t);
540 		ASSERT(OK_32PTR(optptr));
541 		/* Save as "last" value */
542 		sctp->sctp_recvhops = ipp->ipp_hoplimit;
543 	}
544 	/* If app asked for tclass and it has changed ... */
545 	if (addflag.crb_ipv6_recvtclass) {
546 		cmsg = (struct cmsghdr *)optptr;
547 		cmsg->cmsg_level = IPPROTO_IPV6;
548 		cmsg->cmsg_type = IPV6_TCLASS;
549 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
550 		optptr += sizeof (*cmsg);
551 
552 		*(uint_t *)optptr = ipp->ipp_tclass;
553 		optptr += sizeof (uint_t);
554 		ASSERT(OK_32PTR(optptr));
555 		/* Save as "last" value */
556 		sctp->sctp_recvtclass = ipp->ipp_tclass;
557 	}
558 	if (addflag.crb_ipv6_recvhopopts) {
559 		cmsg = (struct cmsghdr *)optptr;
560 		cmsg->cmsg_level = IPPROTO_IPV6;
561 		cmsg->cmsg_type = IPV6_HOPOPTS;
562 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_hopoptslen;
563 		optptr += sizeof (*cmsg);
564 
565 		bcopy(ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
566 		optptr += ipp->ipp_hopoptslen;
567 		ASSERT(OK_32PTR(optptr));
568 		/* Save as last value */
569 		ip_savebuf((void **)&sctp->sctp_hopopts,
570 		    &sctp->sctp_hopoptslen,
571 		    (ipp->ipp_fields & IPPF_HOPOPTS),
572 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen);
573 	}
574 	if (addflag.crb_ipv6_recvrthdrdstopts) {
575 		cmsg = (struct cmsghdr *)optptr;
576 		cmsg->cmsg_level = IPPROTO_IPV6;
577 		cmsg->cmsg_type = IPV6_RTHDRDSTOPTS;
578 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
579 		optptr += sizeof (*cmsg);
580 
581 		bcopy(ipp->ipp_rthdrdstopts, optptr, ipp->ipp_rthdrdstoptslen);
582 		optptr += ipp->ipp_rthdrdstoptslen;
583 		ASSERT(OK_32PTR(optptr));
584 		/* Save as last value */
585 		ip_savebuf((void **)&sctp->sctp_rthdrdstopts,
586 		    &sctp->sctp_rthdrdstoptslen,
587 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
588 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen);
589 	}
590 	if (addflag.crb_ipv6_recvrthdr) {
591 		cmsg = (struct cmsghdr *)optptr;
592 		cmsg->cmsg_level = IPPROTO_IPV6;
593 		cmsg->cmsg_type = IPV6_RTHDR;
594 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrlen;
595 		optptr += sizeof (*cmsg);
596 
597 		bcopy(ipp->ipp_rthdr, optptr, ipp->ipp_rthdrlen);
598 		optptr += ipp->ipp_rthdrlen;
599 		ASSERT(OK_32PTR(optptr));
600 		/* Save as last value */
601 		ip_savebuf((void **)&sctp->sctp_rthdr,
602 		    &sctp->sctp_rthdrlen,
603 		    (ipp->ipp_fields & IPPF_RTHDR),
604 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen);
605 	}
606 	if (addflag.crb_ipv6_recvdstopts) {
607 		cmsg = (struct cmsghdr *)optptr;
608 		cmsg->cmsg_level = IPPROTO_IPV6;
609 		cmsg->cmsg_type = IPV6_DSTOPTS;
610 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_dstoptslen;
611 		optptr += sizeof (*cmsg);
612 
613 		bcopy(ipp->ipp_dstopts, optptr, ipp->ipp_dstoptslen);
614 		optptr += ipp->ipp_dstoptslen;
615 		ASSERT(OK_32PTR(optptr));
616 		/* Save as last value */
617 		ip_savebuf((void **)&sctp->sctp_dstopts,
618 		    &sctp->sctp_dstoptslen,
619 		    (ipp->ipp_fields & IPPF_DSTOPTS),
620 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen);
621 	}
622 
623 	ASSERT(optptr == mp1->b_wptr);
624 
625 	return (0);
626 }
627 
628 void
629 sctp_free_reass(sctp_instr_t *sip)
630 {
631 	mblk_t *mp, *mpnext, *mctl;
632 #ifdef	DEBUG
633 	sctp_reass_t	*srp;
634 #endif
635 
636 	for (mp = sip->istr_reass; mp != NULL; mp = mpnext) {
637 		mpnext = mp->b_next;
638 		mp->b_next = NULL;
639 		mp->b_prev = NULL;
640 		if (DB_TYPE(mp) == M_CTL) {
641 			mctl = mp;
642 #ifdef	DEBUG
643 			srp = (sctp_reass_t *)DB_BASE(mctl);
644 			/* Partial delivery can leave empty srp */
645 			ASSERT(mp->b_cont != NULL || srp->sr_got == 0);
646 #endif
647 			mp = mp->b_cont;
648 			mctl->b_cont = NULL;
649 			freeb(mctl);
650 		}
651 		freemsg(mp);
652 	}
653 	sip->istr_reass = NULL;
654 }
655 
656 /*
657  * If the series of data fragments of which dmp is a part is successfully
658  * reassembled, the first mblk in the series is returned. dc is adjusted
659  * to point at the data chunk in the lead mblk, and b_rptr also points to
660  * the data chunk; the following mblk's b_rptr's point at the actual payload.
661  *
662  * If the series is not yet reassembled, NULL is returned. dc is not changed.
663  * XXX should probably move this up into the state machine.
664  */
665 
666 /* Fragment list for un-ordered messages. Partial delivery is not supported */
667 static mblk_t *
668 sctp_uodata_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc)
669 {
670 	mblk_t		*hmp;
671 	mblk_t		*begin = NULL;
672 	mblk_t		*end = NULL;
673 	sctp_data_hdr_t	*qdc;
674 	uint32_t	ntsn;
675 	uint32_t	tsn = ntohl((*dc)->sdh_tsn);
676 #ifdef	DEBUG
677 	mblk_t		*mp1;
678 #endif
679 
680 	/* First frag. */
681 	if (sctp->sctp_uo_frags == NULL) {
682 		sctp->sctp_uo_frags = dmp;
683 		return (NULL);
684 	}
685 	hmp = sctp->sctp_uo_frags;
686 	/*
687 	 * Insert the segment according to the TSN, fragmented unordered
688 	 * chunks are sequenced by TSN.
689 	 */
690 	while (hmp != NULL) {
691 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
692 		ntsn = ntohl(qdc->sdh_tsn);
693 		if (SEQ_GT(ntsn, tsn)) {
694 			if (hmp->b_prev == NULL) {
695 				dmp->b_next = hmp;
696 				hmp->b_prev = dmp;
697 				sctp->sctp_uo_frags = dmp;
698 			} else {
699 				dmp->b_next = hmp;
700 				dmp->b_prev = hmp->b_prev;
701 				hmp->b_prev->b_next = dmp;
702 				hmp->b_prev = dmp;
703 			}
704 			break;
705 		}
706 		if (hmp->b_next == NULL) {
707 			hmp->b_next = dmp;
708 			dmp->b_prev = hmp;
709 			break;
710 		}
711 		hmp = hmp->b_next;
712 	}
713 	/* check if we completed a msg */
714 	if (SCTP_DATA_GET_BBIT(*dc)) {
715 		begin = dmp;
716 	} else if (SCTP_DATA_GET_EBIT(*dc)) {
717 		end = dmp;
718 	}
719 	/*
720 	 * We walk consecutive TSNs backwards till we get a seg. with
721 	 * the B bit
722 	 */
723 	if (begin == NULL) {
724 		for (hmp = dmp->b_prev; hmp != NULL; hmp = hmp->b_prev) {
725 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
726 			ntsn = ntohl(qdc->sdh_tsn);
727 			if ((int32_t)(tsn - ntsn) > 1) {
728 				return (NULL);
729 			}
730 			if (SCTP_DATA_GET_BBIT(qdc)) {
731 				begin = hmp;
732 				break;
733 			}
734 			tsn = ntsn;
735 		}
736 	}
737 	tsn = ntohl((*dc)->sdh_tsn);
738 	/*
739 	 * We walk consecutive TSNs till we get a seg. with the E bit
740 	 */
741 	if (end == NULL) {
742 		for (hmp = dmp->b_next; hmp != NULL; hmp = hmp->b_next) {
743 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
744 			ntsn = ntohl(qdc->sdh_tsn);
745 			if ((int32_t)(ntsn - tsn) > 1) {
746 				return (NULL);
747 			}
748 			if (SCTP_DATA_GET_EBIT(qdc)) {
749 				end = hmp;
750 				break;
751 			}
752 			tsn = ntsn;
753 		}
754 	}
755 	if (begin == NULL || end == NULL) {
756 		return (NULL);
757 	}
758 	/* Got one!, Remove the msg from the list */
759 	if (sctp->sctp_uo_frags == begin) {
760 		ASSERT(begin->b_prev == NULL);
761 		sctp->sctp_uo_frags = end->b_next;
762 		if (end->b_next != NULL)
763 			end->b_next->b_prev = NULL;
764 	} else {
765 		begin->b_prev->b_next = end->b_next;
766 		if (end->b_next != NULL)
767 			end->b_next->b_prev = begin->b_prev;
768 	}
769 	begin->b_prev = NULL;
770 	end->b_next = NULL;
771 
772 	/*
773 	 * Null out b_next and b_prev and chain using b_cont.
774 	 */
775 	dmp = end = begin;
776 	hmp = begin->b_next;
777 	*dc = (sctp_data_hdr_t *)begin->b_rptr;
778 	begin->b_next = NULL;
779 	while (hmp != NULL) {
780 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
781 		hmp->b_rptr = (uchar_t *)(qdc + 1);
782 		end = hmp->b_next;
783 		dmp->b_cont = hmp;
784 		dmp = hmp;
785 
786 		if (end != NULL)
787 			hmp->b_next = NULL;
788 		hmp->b_prev = NULL;
789 		hmp = end;
790 	}
791 	BUMP_LOCAL(sctp->sctp_reassmsgs);
792 #ifdef	DEBUG
793 	mp1 = begin;
794 	while (mp1 != NULL) {
795 		ASSERT(mp1->b_next == NULL);
796 		ASSERT(mp1->b_prev == NULL);
797 		mp1 = mp1->b_cont;
798 	}
799 #endif
800 	return (begin);
801 }
802 
803 /*
804  * Try partial delivery.
805  */
806 static mblk_t *
807 sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
808     sctp_data_hdr_t **dc)
809 {
810 	mblk_t		*mp;
811 	mblk_t		*dmp;
812 	mblk_t		*qmp;
813 	mblk_t		*prev;
814 	sctp_data_hdr_t	*qdc;
815 	uint32_t	tsn;
816 
817 	ASSERT(DB_TYPE(hmp) == M_CTL);
818 
819 	dprint(4, ("trypartial: got=%d, needed=%d\n",
820 	    (int)(srp->sr_got), (int)(srp->sr_needed)));
821 
822 	mp = hmp->b_cont;
823 	qdc = (sctp_data_hdr_t *)mp->b_rptr;
824 
825 	ASSERT(SCTP_DATA_GET_BBIT(qdc) && srp->sr_hasBchunk);
826 
827 	tsn = ntohl(qdc->sdh_tsn) + 1;
828 
829 	/*
830 	 * This loop has two exit conditions: the
831 	 * end of received chunks has been reached, or
832 	 * there is a break in the sequence. We want
833 	 * to chop the reassembly list as follows (the
834 	 * numbers are TSNs):
835 	 *   10 -> 11 ->	(end of chunks)
836 	 *   10 -> 11 -> | 13   (break in sequence)
837 	 */
838 	prev = mp;
839 	mp = mp->b_cont;
840 	while (mp != NULL) {
841 		qdc = (sctp_data_hdr_t *)mp->b_rptr;
842 		if (ntohl(qdc->sdh_tsn) != tsn)
843 			break;
844 		prev = mp;
845 		mp = mp->b_cont;
846 		tsn++;
847 	}
848 	/*
849 	 * We are sending all the fragments upstream, we have to retain
850 	 * the srp info for further fragments.
851 	 */
852 	if (mp == NULL) {
853 		dmp = hmp->b_cont;
854 		hmp->b_cont = NULL;
855 		srp->sr_nexttsn = tsn;
856 		srp->sr_msglen = 0;
857 		srp->sr_needed = 0;
858 		srp->sr_got = 0;
859 		srp->sr_tail = NULL;
860 	} else {
861 		/*
862 		 * There is a gap then some ordered frags which are not
863 		 * the next deliverable tsn. When the next deliverable
864 		 * frag arrives it will be set as the new list head in
865 		 * sctp_data_frag() by setting the B bit.
866 		 */
867 		dmp = hmp->b_cont;
868 		hmp->b_cont = mp;
869 	}
870 	srp->sr_hasBchunk = B_FALSE;
871 	/*
872 	 * mp now points at the last chunk in the sequence,
873 	 * and prev points to mp's previous in the list.
874 	 * We chop the list at prev. Subsequent fragment
875 	 * deliveries will follow the normal reassembly
876 	 * path unless they too exceed the sctp_pd_point.
877 	 */
878 	prev->b_cont = NULL;
879 	srp->sr_partial_delivered = B_TRUE;
880 
881 	dprint(4, ("trypartial: got some, got=%d, needed=%d\n",
882 	    (int)(srp->sr_got), (int)(srp->sr_needed)));
883 
884 	/*
885 	 * Adjust all mblk's except the lead so their rptr's point to the
886 	 * payload. sctp_data_chunk() will need to process the lead's
887 	 * data chunk section, so leave it's rptr pointing at the data chunk.
888 	 */
889 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
890 	if (srp->sr_tail != NULL) {
891 		srp->sr_got--;
892 		ASSERT(srp->sr_got != 0);
893 		if (srp->sr_needed != 0) {
894 			srp->sr_needed--;
895 			ASSERT(srp->sr_needed != 0);
896 		}
897 		srp->sr_msglen -= ntohs((*dc)->sdh_len);
898 	}
899 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
900 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
901 		qmp->b_rptr = (uchar_t *)(qdc + 1);
902 
903 		/*
904 		 * Deduct the balance from got and needed here, now that
905 		 * we know we are actually delivering these data.
906 		 */
907 		if (srp->sr_tail != NULL) {
908 			srp->sr_got--;
909 			ASSERT(srp->sr_got != 0);
910 			if (srp->sr_needed != 0) {
911 				srp->sr_needed--;
912 				ASSERT(srp->sr_needed != 0);
913 			}
914 			srp->sr_msglen -= ntohs(qdc->sdh_len);
915 		}
916 	}
917 	ASSERT(srp->sr_msglen == 0);
918 	BUMP_LOCAL(sctp->sctp_reassmsgs);
919 
920 	return (dmp);
921 }
922 
923 /*
924  * Handle received fragments for ordered delivery to upper layer protocol.
925  * Manage the per message reassembly queue and if this fragment completes
926  * reassembly of the message, or qualifies the already reassembled data
927  * for partial delivery, prepare the message for delivery upstream.
928  *
929  * tpfinished in the caller remains set only when the incoming fragment
930  * has completed the reassembly of the message associated with its ssn.
931  */
932 static mblk_t *
933 sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
934     sctp_instr_t *sip, boolean_t *tpfinished)
935 {
936 	mblk_t		*reassq_curr, *reassq_next, *reassq_prev;
937 	mblk_t		*new_reassq;
938 	mblk_t		*qmp;
939 	mblk_t		*first_mp;
940 	sctp_reass_t	*srp;
941 	sctp_data_hdr_t	*qdc;
942 	sctp_data_hdr_t	*bdc;
943 	sctp_data_hdr_t	*edc;
944 	uint32_t	tsn;
945 	uint16_t	fraglen = 0;
946 
947 	reassq_curr = NULL;
948 	*error = 0;
949 
950 	/*
951 	 * Find the reassembly queue for this data chunk, if none
952 	 * yet exists, a new per message queue will be created and
953 	 * appended to the end of the list of per message queues.
954 	 *
955 	 * sip points on sctp_instr_t representing instream messages
956 	 * as yet undelivered for this stream (sid) of the association.
957 	 */
958 	reassq_next = reassq_prev = sip->istr_reass;
959 	for (; reassq_next != NULL; reassq_next = reassq_next->b_next) {
960 		srp = (sctp_reass_t *)DB_BASE(reassq_next);
961 		if (ntohs((*dc)->sdh_ssn) == srp->sr_ssn) {
962 			reassq_curr = reassq_next;
963 			goto foundit;
964 		} else if (SSN_GT(srp->sr_ssn, ntohs((*dc)->sdh_ssn)))
965 			break;
966 		reassq_prev = reassq_next;
967 	}
968 
969 	/*
970 	 * First fragment of this message received, allocate a M_CTL that
971 	 * will head the reassembly queue for this message. The message
972 	 * and all its fragments are identified by having the same ssn.
973 	 *
974 	 * Arriving fragments will be inserted in tsn order on the
975 	 * reassembly queue for this message (ssn), linked by b_cont.
976 	 */
977 	if ((new_reassq = allocb(sizeof (*srp), BPRI_MED)) == NULL) {
978 		*error = ENOMEM;
979 		return (NULL);
980 	}
981 	DB_TYPE(new_reassq) = M_CTL;
982 	srp = (sctp_reass_t *)DB_BASE(new_reassq);
983 	new_reassq->b_cont = dmp;
984 
985 	/*
986 	 * All per ssn reassembly queues, (one for each message) on
987 	 * this stream are doubly linked by b_next/b_prev back to the
988 	 * instr_reass of the instream structure associated with this
989 	 * stream id, (sip is initialized as sctp->sctp_instr[sid]).
990 	 * Insert the new reassembly queue in the correct (ssn) order.
991 	 */
992 	if (reassq_next != NULL) {
993 		if (sip->istr_reass == reassq_next) {
994 			/* head insertion */
995 			sip->istr_reass = new_reassq;
996 			new_reassq->b_next = reassq_next;
997 			new_reassq->b_prev = NULL;
998 			reassq_next->b_prev = new_reassq;
999 		} else {
1000 			/* mid queue insertion */
1001 			reassq_prev->b_next = new_reassq;
1002 			new_reassq->b_prev = reassq_prev;
1003 			new_reassq->b_next = reassq_next;
1004 			reassq_next->b_prev = new_reassq;
1005 		}
1006 	} else {
1007 		/* place new reassembly queue at the end */
1008 		if (sip->istr_reass == NULL) {
1009 			sip->istr_reass = new_reassq;
1010 			new_reassq->b_prev = NULL;
1011 		} else {
1012 			reassq_prev->b_next = new_reassq;
1013 			new_reassq->b_prev = reassq_prev;
1014 		}
1015 		new_reassq->b_next = NULL;
1016 	}
1017 	srp->sr_partial_delivered = B_FALSE;
1018 	srp->sr_ssn = ntohs((*dc)->sdh_ssn);
1019 	srp->sr_hasBchunk = B_FALSE;
1020 empty_srp:
1021 	srp->sr_needed = 0;
1022 	srp->sr_got = 1;
1023 	/* tail always the highest tsn on the reassembly queue for this ssn */
1024 	srp->sr_tail = dmp;
1025 	if (SCTP_DATA_GET_BBIT(*dc)) {
1026 		/* Incoming frag is flagged as the beginning of message */
1027 		srp->sr_msglen = ntohs((*dc)->sdh_len);
1028 		srp->sr_nexttsn = ntohl((*dc)->sdh_tsn) + 1;
1029 		srp->sr_hasBchunk = B_TRUE;
1030 	} else if (srp->sr_partial_delivered &&
1031 	    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1032 		/*
1033 		 * The real beginning fragment of the message was already
1034 		 * delivered upward, so this is the earliest frag expected.
1035 		 * Fake the B-bit then see if this frag also completes the
1036 		 * message.
1037 		 */
1038 		SCTP_DATA_SET_BBIT(*dc);
1039 		srp->sr_hasBchunk = B_TRUE;
1040 		srp->sr_msglen = ntohs((*dc)->sdh_len);
1041 		if (SCTP_DATA_GET_EBIT(*dc)) {
1042 			/* This frag is marked as the end of message */
1043 			srp->sr_needed = 1;
1044 			/* Got all fragments of this message now */
1045 			goto frag_done;
1046 		}
1047 		srp->sr_nexttsn++;
1048 	}
1049 
1050 	/* The only fragment of this message currently queued */
1051 	*tpfinished = B_FALSE;
1052 	return (NULL);
1053 foundit:
1054 	/*
1055 	 * This message already has a reassembly queue. Insert the new frag
1056 	 * in the reassembly queue. Try the tail first, on the assumption
1057 	 * that the fragments are arriving in order.
1058 	 */
1059 	qmp = srp->sr_tail;
1060 
1061 	/*
1062 	 * A NULL tail means all existing fragments of the message have
1063 	 * been entirely consumed during a partially delivery.
1064 	 */
1065 	if (qmp == NULL) {
1066 		ASSERT(srp->sr_got == 0 && srp->sr_needed == 0 &&
1067 		    srp->sr_partial_delivered);
1068 		ASSERT(reassq_curr->b_cont == NULL);
1069 		reassq_curr->b_cont = dmp;
1070 		goto empty_srp;
1071 	} else {
1072 		/*
1073 		 * If partial delivery did take place but the next arriving
1074 		 * fragment was not the next to be delivered, or partial
1075 		 * delivery broke off due to a gap, fragments remain on the
1076 		 * tail. The next fragment due to be delivered still has to
1077 		 * be set as the new head of list upon arrival. Fake B-bit
1078 		 * on that frag then see if it also completes the message.
1079 		 */
1080 		if (srp->sr_partial_delivered &&
1081 		    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1082 			SCTP_DATA_SET_BBIT(*dc);
1083 			srp->sr_hasBchunk = B_TRUE;
1084 			if (SCTP_DATA_GET_EBIT(*dc)) {
1085 				/* Got all fragments of this message now */
1086 				goto frag_done;
1087 			}
1088 		}
1089 	}
1090 
1091 	/* grab the frag header of already queued tail frag for comparison */
1092 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1093 	ASSERT(qmp->b_cont == NULL);
1094 
1095 	/* check if the frag goes on the tail in order */
1096 	if (SEQ_GT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1097 		qmp->b_cont = dmp;
1098 		srp->sr_tail = dmp;
1099 		dmp->b_cont = NULL;
1100 		if (srp->sr_hasBchunk && srp->sr_nexttsn ==
1101 		    ntohl((*dc)->sdh_tsn)) {
1102 			srp->sr_msglen += ntohs((*dc)->sdh_len);
1103 			srp->sr_nexttsn++;
1104 		}
1105 		goto inserted;
1106 	}
1107 
1108 	/* Next check if we should insert this frag at the beginning */
1109 	qmp = reassq_curr->b_cont;
1110 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1111 	if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1112 		dmp->b_cont = qmp;
1113 		reassq_curr->b_cont = dmp;
1114 		if (SCTP_DATA_GET_BBIT(*dc)) {
1115 			srp->sr_hasBchunk = B_TRUE;
1116 			srp->sr_nexttsn = ntohl((*dc)->sdh_tsn);
1117 		}
1118 		goto preinserted;
1119 	}
1120 
1121 	/* Insert this frag in it's correct order in the middle */
1122 	for (;;) {
1123 		/* Tail check above should have caught this */
1124 		ASSERT(qmp->b_cont != NULL);
1125 
1126 		qdc = (sctp_data_hdr_t *)qmp->b_cont->b_rptr;
1127 		if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1128 			/* insert here */
1129 			dmp->b_cont = qmp->b_cont;
1130 			qmp->b_cont = dmp;
1131 			break;
1132 		}
1133 		qmp = qmp->b_cont;
1134 	}
1135 preinserted:
1136 	/*
1137 	 * Need head of message and to be due to deliver, otherwise skip
1138 	 * the recalculation of the message length below.
1139 	 */
1140 	if (!srp->sr_hasBchunk || ntohl((*dc)->sdh_tsn) != srp->sr_nexttsn)
1141 		goto inserted;
1142 	/*
1143 	 * fraglen contains the length of consecutive chunks of fragments.
1144 	 * starting from the chunk we just inserted.
1145 	 */
1146 	tsn = srp->sr_nexttsn;
1147 	for (qmp = dmp; qmp != NULL; qmp = qmp->b_cont) {
1148 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1149 		if (tsn != ntohl(qdc->sdh_tsn))
1150 			break;
1151 		fraglen += ntohs(qdc->sdh_len);
1152 		tsn++;
1153 	}
1154 	srp->sr_nexttsn = tsn;
1155 	srp->sr_msglen += fraglen;
1156 inserted:
1157 	srp->sr_got++;
1158 	first_mp = reassq_curr->b_cont;
1159 	/* Prior to this frag either the beginning or end frag was missing */
1160 	if (srp->sr_needed == 0) {
1161 		/* used to check if we have the first and last fragments */
1162 		bdc = (sctp_data_hdr_t *)first_mp->b_rptr;
1163 		edc = (sctp_data_hdr_t *)srp->sr_tail->b_rptr;
1164 
1165 		/*
1166 		 * If we now have both the beginning and the end of the message,
1167 		 * calculate how many fragments in the complete message.
1168 		 */
1169 		if (SCTP_DATA_GET_BBIT(bdc) && SCTP_DATA_GET_EBIT(edc)) {
1170 			srp->sr_needed = ntohl(edc->sdh_tsn) -
1171 			    ntohl(bdc->sdh_tsn) + 1;
1172 		}
1173 	}
1174 
1175 	/*
1176 	 * Try partial delivery if the message length has exceeded the
1177 	 * partial delivery point. Only do this if we can immediately
1178 	 * deliver the partially assembled message, and only partially
1179 	 * deliver one message at a time (i.e. messages cannot be
1180 	 * intermixed arriving at the upper layer).
1181 	 * sctp_try_partial_delivery() will return a message consisting
1182 	 * of only consecutive fragments.
1183 	 */
1184 	if (srp->sr_needed != srp->sr_got) {
1185 		/* we don't have the full message yet */
1186 		dmp = NULL;
1187 		if (ntohl((*dc)->sdh_tsn) <= sctp->sctp_ftsn &&
1188 		    srp->sr_msglen >= sctp->sctp_pd_point &&
1189 		    srp->sr_ssn == sip->nextseq) {
1190 			dmp = sctp_try_partial_delivery(sctp, reassq_curr,
1191 			    srp, dc);
1192 		}
1193 		*tpfinished = B_FALSE;
1194 		/*
1195 		 * NULL unless a segment of the message now qualified for
1196 		 * partial_delivery and has been prepared for delivery by
1197 		 * sctp_try_partial_delivery().
1198 		 */
1199 		return (dmp);
1200 	}
1201 frag_done:
1202 	/*
1203 	 * Reassembly complete for this message, prepare the data for delivery.
1204 	 * First unlink the reassembly queue for this ssn from the list of
1205 	 * messages in reassembly.
1206 	 */
1207 	if (sip->istr_reass == reassq_curr) {
1208 		sip->istr_reass = reassq_curr->b_next;
1209 		if (reassq_curr->b_next)
1210 			reassq_curr->b_next->b_prev = NULL;
1211 	} else {
1212 		ASSERT(reassq_curr->b_prev != NULL);
1213 		reassq_curr->b_prev->b_next = reassq_curr->b_next;
1214 		if (reassq_curr->b_next)
1215 			reassq_curr->b_next->b_prev = reassq_curr->b_prev;
1216 	}
1217 
1218 	/*
1219 	 * Need to clean up b_prev and b_next as freeb() will
1220 	 * ASSERT that they are unused.
1221 	 */
1222 	reassq_curr->b_next = NULL;
1223 	reassq_curr->b_prev = NULL;
1224 
1225 	dmp = reassq_curr;
1226 	/* point to the head of the reassembled data message */
1227 	dmp = dmp->b_cont;
1228 	reassq_curr->b_cont = NULL;
1229 	freeb(reassq_curr);
1230 	/* Tell our caller that we are returning a complete message. */
1231 	*tpfinished = B_TRUE;
1232 
1233 	/*
1234 	 * Adjust all mblk's except the lead so their rptr's point to the
1235 	 * payload. sctp_data_chunk() will need to process the lead's data
1236 	 * data chunk section, so leave its rptr pointing at the data chunk
1237 	 * header.
1238 	 */
1239 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
1240 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
1241 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1242 		qmp->b_rptr = (uchar_t *)(qdc + 1);
1243 	}
1244 	BUMP_LOCAL(sctp->sctp_reassmsgs);
1245 
1246 	return (dmp);
1247 }
1248 
1249 static void
1250 sctp_add_dup(uint32_t tsn, mblk_t **dups)
1251 {
1252 	mblk_t *mp;
1253 	size_t bsize = SCTP_DUP_MBLK_SZ * sizeof (tsn);
1254 
1255 	if (dups == NULL) {
1256 		return;
1257 	}
1258 
1259 	/* first time? */
1260 	if (*dups == NULL) {
1261 		*dups = allocb(bsize, BPRI_MED);
1262 		if (*dups == NULL) {
1263 			return;
1264 		}
1265 	}
1266 
1267 	mp = *dups;
1268 	if ((mp->b_wptr - mp->b_rptr) >= bsize) {
1269 		/* maximum reached */
1270 		return;
1271 	}
1272 
1273 	/* add the duplicate tsn */
1274 	bcopy(&tsn, mp->b_wptr, sizeof (tsn));
1275 	mp->b_wptr += sizeof (tsn);
1276 	ASSERT((mp->b_wptr - mp->b_rptr) <= bsize);
1277 }
1278 
1279 /*
1280  * All incoming sctp data, complete messages and fragments are handled by
1281  * this function. Unless the U-bit is set in the data chunk it will be
1282  * delivered in order or queued until an in-order delivery can be made.
1283  */
1284 static void
1285 sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
1286     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
1287 {
1288 	sctp_data_hdr_t *dc;
1289 	mblk_t *dmp, *pmp;
1290 	sctp_instr_t *instr;
1291 	int ubit;
1292 	int sid;
1293 	int isfrag;
1294 	uint16_t ssn;
1295 	uint32_t oftsn;
1296 	boolean_t can_deliver = B_TRUE;
1297 	uint32_t tsn;
1298 	int dlen;
1299 	boolean_t tpfinished = B_TRUE;
1300 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1301 	int	error;
1302 
1303 	/* The following are used multiple times, so we inline them */
1304 #define	SCTP_ACK_IT(sctp, tsn)						\
1305 	if (tsn == sctp->sctp_ftsn) {					\
1306 		dprint(2, ("data_chunk: acking next %x\n", tsn));	\
1307 		(sctp)->sctp_ftsn++;					\
1308 		if ((sctp)->sctp_sack_gaps > 0)				\
1309 			(sctp)->sctp_force_sack = 1;			\
1310 	} else if (SEQ_GT(tsn, sctp->sctp_ftsn)) {			\
1311 		/* Got a gap; record it */				\
1312 		BUMP_LOCAL(sctp->sctp_outseqtsns);			\
1313 		dprint(2, ("data_chunk: acking gap %x\n", tsn));	\
1314 		sctp_ack_add(&sctp->sctp_sack_info, tsn,		\
1315 		    &sctp->sctp_sack_gaps);				\
1316 		sctp->sctp_force_sack = 1;				\
1317 	}
1318 
1319 	dmp = NULL;
1320 
1321 	dc = (sctp_data_hdr_t *)ch;
1322 	tsn = ntohl(dc->sdh_tsn);
1323 
1324 	dprint(3, ("sctp_data_chunk: mp=%p tsn=%x\n", (void *)mp, tsn));
1325 
1326 	/* Check for duplicates */
1327 	if (SEQ_LT(tsn, sctp->sctp_ftsn)) {
1328 		dprint(4, ("sctp_data_chunk: dropping duplicate\n"));
1329 		BUMP_LOCAL(sctp->sctp_idupchunks);
1330 		sctp->sctp_force_sack = 1;
1331 		sctp_add_dup(dc->sdh_tsn, dups);
1332 		return;
1333 	}
1334 
1335 	/* Check for dups of sack'ed data */
1336 	if (sctp->sctp_sack_info != NULL) {
1337 		sctp_set_t *sp;
1338 
1339 		for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1340 			if (SEQ_GEQ(tsn, sp->begin) && SEQ_LEQ(tsn, sp->end)) {
1341 				dprint(4,
1342 				    ("sctp_data_chunk: dropping dup > "
1343 				    "cumtsn\n"));
1344 				BUMP_LOCAL(sctp->sctp_idupchunks);
1345 				sctp->sctp_force_sack = 1;
1346 				sctp_add_dup(dc->sdh_tsn, dups);
1347 				return;
1348 			}
1349 		}
1350 	}
1351 
1352 	/* We can no longer deliver anything up, but still need to handle it. */
1353 	if (SCTP_IS_DETACHED(sctp)) {
1354 		SCTPS_BUMP_MIB(sctps, sctpInClosed);
1355 		can_deliver = B_FALSE;
1356 	}
1357 
1358 	dlen = ntohs(dc->sdh_len) - sizeof (*dc);
1359 
1360 	/*
1361 	 * Check for buffer space. Note if this is the next expected TSN
1362 	 * we have to take it to avoid deadlock because we cannot deliver
1363 	 * later queued TSNs and thus clear buffer space without it.
1364 	 * We drop anything that is purely zero window probe data here.
1365 	 */
1366 	if ((sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) &&
1367 	    (tsn != sctp->sctp_ftsn || sctp->sctp_rwnd == 0)) {
1368 		/* Drop and SACK, but don't advance the cumulative TSN. */
1369 		sctp->sctp_force_sack = 1;
1370 		dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
1371 		    "dlen %d ssn %d tsn %x\n", sctp->sctp_rwnd,
1372 		    sctp->sctp_rxqueued, dlen, ntohs(dc->sdh_ssn),
1373 		    ntohl(dc->sdh_tsn)));
1374 		return;
1375 	}
1376 
1377 	sid = ntohs(dc->sdh_sid);
1378 
1379 	/* Data received for a stream not negotiated for this association */
1380 	if (sid >= sctp->sctp_num_istr) {
1381 		sctp_bsc_t	inval_parm;
1382 
1383 		/* Will populate the CAUSE block in the ERROR chunk. */
1384 		inval_parm.bsc_sid = dc->sdh_sid;
1385 		/* RESERVED, ignored at the receiving end */
1386 		inval_parm.bsc_pad = 0;
1387 
1388 		/* ack and drop it */
1389 		sctp_add_err(sctp, SCTP_ERR_BAD_SID, (void *)&inval_parm,
1390 		    sizeof (sctp_bsc_t), fp);
1391 		SCTP_ACK_IT(sctp, tsn);
1392 		return;
1393 	}
1394 
1395 	/* unordered delivery OK for this data if ubit set */
1396 	ubit = SCTP_DATA_GET_UBIT(dc);
1397 	ASSERT(sctp->sctp_instr != NULL);
1398 
1399 	/* select per stream structure for this stream from the array */
1400 	instr = &sctp->sctp_instr[sid];
1401 	/* Initialize the stream, if not yet used */
1402 	if (instr->sctp == NULL)
1403 		instr->sctp = sctp;
1404 
1405 	/* Begin and End bit set would mean a complete message */
1406 	isfrag = !(SCTP_DATA_GET_BBIT(dc) && SCTP_DATA_GET_EBIT(dc));
1407 
1408 	/* The ssn of this sctp message and of any fragments in it */
1409 	ssn = ntohs(dc->sdh_ssn);
1410 
1411 	dmp = dupb(mp);
1412 	if (dmp == NULL) {
1413 		/* drop it and don't ack, let the peer retransmit */
1414 		return;
1415 	}
1416 	/*
1417 	 * Past header and payload, note: the underlying buffer may
1418 	 * contain further chunks from the same incoming IP packet,
1419 	 * if so db_ref will be greater than one.
1420 	 */
1421 	dmp->b_wptr = (uchar_t *)ch + ntohs(ch->sch_len);
1422 
1423 	sctp->sctp_rxqueued += dlen;
1424 
1425 	oftsn = sctp->sctp_ftsn;
1426 
1427 	if (isfrag) {
1428 
1429 		error = 0;
1430 		/* fragmented data chunk */
1431 		dmp->b_rptr = (uchar_t *)dc;
1432 		if (ubit) {
1433 			/* prepare data for unordered delivery */
1434 			dmp = sctp_uodata_frag(sctp, dmp, &dc);
1435 #if	DEBUG
1436 			if (dmp != NULL) {
1437 				ASSERT(instr ==
1438 				    &sctp->sctp_instr[sid]);
1439 			}
1440 #endif
1441 		} else {
1442 			/*
1443 			 * Assemble fragments and queue for ordered delivery,
1444 			 * dmp returned is NULL or the head of a complete or
1445 			 * "partial delivery" message. Any returned message
1446 			 * and all its fragments will have the same ssn as the
1447 			 * input fragment currently being handled.
1448 			 */
1449 			dmp = sctp_data_frag(sctp, dmp, &dc, &error, instr,
1450 			    &tpfinished);
1451 		}
1452 		if (error == ENOMEM) {
1453 			/* back out the adjustment made earlier */
1454 			sctp->sctp_rxqueued -= dlen;
1455 			/*
1456 			 * Don't ack the segment,
1457 			 * the peer will retransmit.
1458 			 */
1459 			return;
1460 		}
1461 
1462 		if (dmp == NULL) {
1463 			/*
1464 			 * The frag has been queued for later in-order delivery,
1465 			 * but the cumulative TSN may need to advance, so also
1466 			 * need to perform the gap ack checks at the done label.
1467 			 */
1468 			SCTP_ACK_IT(sctp, tsn);
1469 			DTRACE_PROBE4(sctp_data_frag_queued, sctp_t *, sctp,
1470 			    int, sid, int, tsn, uint16_t, ssn);
1471 			goto done;
1472 		}
1473 	}
1474 
1475 	/*
1476 	 * Unless message is the next for delivery to the ulp, queue complete
1477 	 * message in the correct order for ordered delivery.
1478 	 * Note: tpfinished is true when the incoming chunk contains a complete
1479 	 * message or is the final missing fragment which completed a message.
1480 	 */
1481 	if (!ubit && tpfinished && ssn != instr->nextseq) {
1482 		/* Adjust rptr to point at the data chunk for compares */
1483 		dmp->b_rptr = (uchar_t *)dc;
1484 
1485 		dprint(2,
1486 		    ("data_chunk: inserted %x in pq (ssn %d expected %d)\n",
1487 		    ntohl(dc->sdh_tsn), (int)(ssn), (int)(instr->nextseq)));
1488 
1489 		if (instr->istr_msgs == NULL) {
1490 			instr->istr_msgs = dmp;
1491 			ASSERT(dmp->b_prev == NULL && dmp->b_next == NULL);
1492 		} else {
1493 			mblk_t			*imblk = instr->istr_msgs;
1494 			sctp_data_hdr_t		*idc;
1495 
1496 			/*
1497 			 * XXXNeed to take sequence wraps into account,
1498 			 * ... and a more efficient insertion algo.
1499 			 */
1500 			for (;;) {
1501 				idc = (sctp_data_hdr_t *)imblk->b_rptr;
1502 				if (SSN_GT(ntohs(idc->sdh_ssn),
1503 				    ntohs(dc->sdh_ssn))) {
1504 					if (instr->istr_msgs == imblk) {
1505 						instr->istr_msgs = dmp;
1506 						dmp->b_next = imblk;
1507 						imblk->b_prev = dmp;
1508 					} else {
1509 						ASSERT(imblk->b_prev != NULL);
1510 						imblk->b_prev->b_next = dmp;
1511 						dmp->b_prev = imblk->b_prev;
1512 						imblk->b_prev = dmp;
1513 						dmp->b_next = imblk;
1514 					}
1515 					break;
1516 				}
1517 				if (imblk->b_next == NULL) {
1518 					imblk->b_next = dmp;
1519 					dmp->b_prev = imblk;
1520 					break;
1521 				}
1522 				imblk = imblk->b_next;
1523 			}
1524 		}
1525 		(instr->istr_nmsgs)++;
1526 		(sctp->sctp_istr_nmsgs)++;
1527 		SCTP_ACK_IT(sctp, tsn);
1528 		DTRACE_PROBE4(sctp_pqueue_completemsg, sctp_t *, sctp,
1529 		    int, sid, int, tsn, uint16_t, ssn);
1530 		return;
1531 	}
1532 
1533 	/*
1534 	 * Deliver the data directly. Recalculate dlen now since
1535 	 * we may have just reassembled this data.
1536 	 */
1537 	dlen = dmp->b_wptr - (uchar_t *)dc - sizeof (*dc);
1538 	for (pmp = dmp->b_cont; pmp != NULL; pmp = pmp->b_cont)
1539 		dlen += MBLKL(pmp);
1540 	ASSERT(sctp->sctp_rxqueued >= dlen);
1541 
1542 	/* Deliver the message. */
1543 	sctp->sctp_rxqueued -= dlen;
1544 
1545 	if (can_deliver) {
1546 		/* step past header to the payload */
1547 		dmp->b_rptr = (uchar_t *)(dc + 1);
1548 		if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1549 		    ipp, ira) == 0) {
1550 			dprint(1, ("sctp_data_chunk: delivering %lu bytes\n",
1551 			    msgdsize(dmp)));
1552 			/*
1553 			 * We overload the meaning of b_flag for SCTP sockfs
1554 			 * internal use, to advise sockfs of partial delivery
1555 			 * semantics.
1556 			 */
1557 			dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
1558 			if (sctp->sctp_flowctrld) {
1559 				sctp->sctp_rwnd -= dlen;
1560 				if (sctp->sctp_rwnd < 0)
1561 					sctp->sctp_rwnd = 0;
1562 			}
1563 			if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1564 			    msgdsize(dmp), 0, &error, NULL) <= 0) {
1565 				sctp->sctp_flowctrld = B_TRUE;
1566 			}
1567 			SCTP_ACK_IT(sctp, tsn);
1568 		} else {
1569 			/* No memory don't ack, the peer will retransmit. */
1570 			freemsg(dmp);
1571 			return;
1572 		}
1573 	} else {
1574 		/* Closed above, ack to peer and free the data */
1575 		freemsg(dmp);
1576 		SCTP_ACK_IT(sctp, tsn);
1577 	}
1578 
1579 	/*
1580 	 * Data now enqueued, may already have been processed and free'd
1581 	 * by the ULP (or we may have just freed it above, if we could not
1582 	 * deliver), so we must not reference it (this is why we saved the
1583 	 * ssn and ubit earlier).
1584 	 */
1585 	if (ubit != 0) {
1586 		BUMP_LOCAL(sctp->sctp_iudchunks);
1587 		goto done;
1588 	}
1589 	BUMP_LOCAL(sctp->sctp_idchunks);
1590 
1591 	/*
1592 	 * There was a partial delivery and it has not finished,
1593 	 * don't pull anything from the pqueues or increment the
1594 	 * nextseq. This msg must complete before starting on
1595 	 * the next ssn and the partial message must have the
1596 	 * same ssn as the next expected message..
1597 	 */
1598 	if (!tpfinished) {
1599 		DTRACE_PROBE4(sctp_partial_delivery, sctp_t *, sctp,
1600 		    int, sid, int, tsn, uint16_t, ssn);
1601 		/*
1602 		 * Verify the partial delivery is part of the
1603 		 * message expected for ordered delivery.
1604 		 */
1605 		if (ssn != instr->nextseq) {
1606 			DTRACE_PROBE4(sctp_partial_delivery_error,
1607 			    sctp_t *, sctp, int, sid, int, tsn,
1608 			    uint16_t, ssn);
1609 			cmn_err(CE_WARN, "sctp partial"
1610 			    " delivery error, sctp 0x%p"
1611 			    " sid = 0x%x ssn != nextseq"
1612 			    " tsn 0x%x ftsn 0x%x"
1613 			    " ssn 0x%x nextseq 0x%x",
1614 			    (void *)sctp, sid,
1615 			    tsn, sctp->sctp_ftsn, ssn,
1616 			    instr->nextseq);
1617 		}
1618 
1619 		ASSERT(ssn == instr->nextseq);
1620 		goto done;
1621 	}
1622 
1623 	if (ssn != instr->nextseq) {
1624 		DTRACE_PROBE4(sctp_inorder_delivery_error,
1625 		    sctp_t *, sctp, int, sid, int, tsn,
1626 		    uint16_t, ssn);
1627 		cmn_err(CE_WARN, "sctp in-order delivery error, sctp 0x%p "
1628 		    "sid = 0x%x ssn != nextseq ssn 0x%x nextseq 0x%x",
1629 		    (void *)sctp, sid, ssn, instr->nextseq);
1630 	}
1631 
1632 	ASSERT(ssn == instr->nextseq);
1633 
1634 	DTRACE_PROBE4(sctp_deliver_completemsg, sctp_t *, sctp, int, sid,
1635 	    int, tsn, uint16_t, ssn);
1636 
1637 	instr->nextseq = ssn + 1;
1638 
1639 	/*
1640 	 * Deliver any successive data chunks waiting in the instr pqueue
1641 	 * for the data just sent up.
1642 	 */
1643 	while (instr->istr_nmsgs > 0) {
1644 		dmp = (mblk_t *)instr->istr_msgs;
1645 		dc = (sctp_data_hdr_t *)dmp->b_rptr;
1646 		ssn = ntohs(dc->sdh_ssn);
1647 		tsn = ntohl(dc->sdh_tsn);
1648 		/* Stop at the first gap in the sequence */
1649 		if (ssn != instr->nextseq)
1650 			break;
1651 
1652 		DTRACE_PROBE4(sctp_deliver_pqueuedmsg, sctp_t *, sctp,
1653 		    int, sid, int, tsn, uint16_t, ssn);
1654 		/*
1655 		 * Ready to deliver all data before the gap
1656 		 * to the upper layer.
1657 		 */
1658 		(instr->istr_nmsgs)--;
1659 		(instr->nextseq)++;
1660 		(sctp->sctp_istr_nmsgs)--;
1661 
1662 		instr->istr_msgs = instr->istr_msgs->b_next;
1663 		if (instr->istr_msgs != NULL)
1664 			instr->istr_msgs->b_prev = NULL;
1665 		dmp->b_next = dmp->b_prev = NULL;
1666 
1667 		dprint(2, ("data_chunk: pulling %x from pq (ssn %d)\n",
1668 		    ntohl(dc->sdh_tsn), (int)ssn));
1669 
1670 		/*
1671 		 * Composite messages indicate this chunk was reassembled,
1672 		 * each b_cont represents another TSN; Follow the chain to
1673 		 * reach the frag with the last tsn in order to advance ftsn
1674 		 * shortly by calling SCTP_ACK_IT().
1675 		 */
1676 		dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
1677 		for (pmp = dmp->b_cont; pmp; pmp = pmp->b_cont)
1678 			dlen += MBLKL(pmp);
1679 
1680 		ASSERT(sctp->sctp_rxqueued >= dlen);
1681 
1682 		sctp->sctp_rxqueued -= dlen;
1683 		if (can_deliver) {
1684 			dmp->b_rptr = (uchar_t *)(dc + 1);
1685 			if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1686 			    ipp, ira) == 0) {
1687 				dprint(1, ("sctp_data_chunk: delivering %lu "
1688 				    "bytes\n", msgdsize(dmp)));
1689 				/*
1690 				 * Meaning of b_flag overloaded for SCTP sockfs
1691 				 * internal use, advise sockfs of partial
1692 				 * delivery semantics.
1693 				 */
1694 				dmp->b_flag = tpfinished ?
1695 				    0 : SCTP_PARTIAL_DATA;
1696 				if (sctp->sctp_flowctrld) {
1697 					sctp->sctp_rwnd -= dlen;
1698 					if (sctp->sctp_rwnd < 0)
1699 						sctp->sctp_rwnd = 0;
1700 				}
1701 				if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1702 				    msgdsize(dmp), 0, &error, NULL) <= 0) {
1703 					sctp->sctp_flowctrld = B_TRUE;
1704 				}
1705 				SCTP_ACK_IT(sctp, tsn);
1706 			} else {
1707 				/* don't ack, the peer will retransmit */
1708 				freemsg(dmp);
1709 				return;
1710 			}
1711 		} else {
1712 			/* Closed above, ack and free the data */
1713 			freemsg(dmp);
1714 			SCTP_ACK_IT(sctp, tsn);
1715 		}
1716 	}
1717 
1718 done:
1719 
1720 	/*
1721 	 * If there are gap reports pending, check if advancing
1722 	 * the ftsn here closes a gap. If so, we can advance
1723 	 * ftsn to the end of the set.
1724 	 */
1725 	if (sctp->sctp_sack_info != NULL &&
1726 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
1727 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
1728 	}
1729 	/*
1730 	 * If ftsn has moved forward, maybe we can remove gap reports.
1731 	 * NB: dmp may now be NULL, so don't dereference it here.
1732 	 */
1733 	if (oftsn != sctp->sctp_ftsn && sctp->sctp_sack_info != NULL) {
1734 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
1735 		    &sctp->sctp_sack_gaps);
1736 		dprint(2, ("data_chunk: removed acks before %x (num=%d)\n",
1737 		    sctp->sctp_ftsn - 1, sctp->sctp_sack_gaps));
1738 	}
1739 
1740 #ifdef	DEBUG
1741 	if (sctp->sctp_sack_info != NULL) {
1742 		ASSERT(sctp->sctp_ftsn != sctp->sctp_sack_info->begin);
1743 	}
1744 #endif
1745 
1746 #undef	SCTP_ACK_IT
1747 }
1748 
1749 void
1750 sctp_fill_sack(sctp_t *sctp, unsigned char *dst, int sacklen)
1751 {
1752 	sctp_chunk_hdr_t *sch;
1753 	sctp_sack_chunk_t *sc;
1754 	sctp_sack_frag_t *sf;
1755 	uint16_t num_gaps = sctp->sctp_sack_gaps;
1756 	sctp_set_t *sp;
1757 
1758 	/* Chunk hdr */
1759 	sch = (sctp_chunk_hdr_t *)dst;
1760 	sch->sch_id = CHUNK_SACK;
1761 	sch->sch_flags = 0;
1762 	sch->sch_len = htons(sacklen);
1763 
1764 	/* SACK chunk */
1765 	sctp->sctp_lastacked = sctp->sctp_ftsn - 1;
1766 
1767 	sc = (sctp_sack_chunk_t *)(sch + 1);
1768 	sc->ssc_cumtsn = htonl(sctp->sctp_lastacked);
1769 	if (sctp->sctp_rxqueued < sctp->sctp_rwnd) {
1770 		sc->ssc_a_rwnd = htonl(sctp->sctp_rwnd - sctp->sctp_rxqueued);
1771 	} else {
1772 		sc->ssc_a_rwnd = 0;
1773 	}
1774 	/* Remember the last window sent to peer. */
1775 	sctp->sctp_arwnd = sc->ssc_a_rwnd;
1776 	sc->ssc_numfrags = htons(num_gaps);
1777 	sc->ssc_numdups = 0;
1778 
1779 	/* lay in gap reports */
1780 	sf = (sctp_sack_frag_t *)(sc + 1);
1781 	for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1782 		uint16_t offset;
1783 
1784 		/* start */
1785 		if (sp->begin > sctp->sctp_lastacked) {
1786 			offset = (uint16_t)(sp->begin - sctp->sctp_lastacked);
1787 		} else {
1788 			/* sequence number wrap */
1789 			offset = (uint16_t)(UINT32_MAX - sctp->sctp_lastacked +
1790 			    sp->begin);
1791 		}
1792 		sf->ssf_start = htons(offset);
1793 
1794 		/* end */
1795 		if (sp->end >= sp->begin) {
1796 			offset += (uint16_t)(sp->end - sp->begin);
1797 		} else {
1798 			/* sequence number wrap */
1799 			offset += (uint16_t)(UINT32_MAX - sp->begin + sp->end);
1800 		}
1801 		sf->ssf_end = htons(offset);
1802 
1803 		sf++;
1804 		/* This is just for debugging (a la the following assertion) */
1805 		num_gaps--;
1806 	}
1807 
1808 	ASSERT(num_gaps == 0);
1809 
1810 	/* If the SACK timer is running, stop it */
1811 	if (sctp->sctp_ack_timer_running) {
1812 		sctp_timer_stop(sctp->sctp_ack_mp);
1813 		sctp->sctp_ack_timer_running = B_FALSE;
1814 	}
1815 
1816 	BUMP_LOCAL(sctp->sctp_obchunks);
1817 	BUMP_LOCAL(sctp->sctp_osacks);
1818 }
1819 
1820 mblk_t *
1821 sctp_make_sack(sctp_t *sctp, sctp_faddr_t *sendto, mblk_t *dups)
1822 {
1823 	mblk_t *smp;
1824 	size_t slen;
1825 	sctp_chunk_hdr_t *sch;
1826 	sctp_sack_chunk_t *sc;
1827 	int32_t acks_max;
1828 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1829 	uint32_t	dups_len;
1830 	sctp_faddr_t	*fp;
1831 
1832 	ASSERT(sendto != NULL);
1833 
1834 	if (sctp->sctp_force_sack) {
1835 		sctp->sctp_force_sack = 0;
1836 		goto checks_done;
1837 	}
1838 
1839 	acks_max = sctps->sctps_deferred_acks_max;
1840 	if (sctp->sctp_state == SCTPS_ESTABLISHED) {
1841 		if (sctp->sctp_sack_toggle < acks_max) {
1842 			/* no need to SACK right now */
1843 			dprint(2, ("sctp_make_sack: %p no sack (toggle)\n",
1844 			    (void *)sctp));
1845 			return (NULL);
1846 		} else if (sctp->sctp_sack_toggle >= acks_max) {
1847 			sctp->sctp_sack_toggle = 0;
1848 		}
1849 	}
1850 
1851 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1852 		dprint(2, ("sctp_make_sack: %p no sack (already)\n",
1853 		    (void *)sctp));
1854 		return (NULL);
1855 	}
1856 
1857 checks_done:
1858 	dprint(2, ("sctp_make_sack: acking %x\n", sctp->sctp_ftsn - 1));
1859 
1860 	if (dups != NULL)
1861 		dups_len = MBLKL(dups);
1862 	else
1863 		dups_len = 0;
1864 	slen = sizeof (*sch) + sizeof (*sc) +
1865 	    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1866 
1867 	/*
1868 	 * If there are error chunks, check and see if we can send the
1869 	 * SACK chunk and error chunks together in one packet.  If not,
1870 	 * send the error chunks out now.
1871 	 */
1872 	if (sctp->sctp_err_chunks != NULL) {
1873 		fp = SCTP_CHUNK_DEST(sctp->sctp_err_chunks);
1874 		if (sctp->sctp_err_len + slen + dups_len > fp->sf_pmss) {
1875 			if ((smp = sctp_make_mp(sctp, fp, 0)) == NULL) {
1876 				SCTP_KSTAT(sctps, sctp_send_err_failed);
1877 				SCTP_KSTAT(sctps, sctp_send_sack_failed);
1878 				freemsg(sctp->sctp_err_chunks);
1879 				sctp->sctp_err_chunks = NULL;
1880 				sctp->sctp_err_len = 0;
1881 				return (NULL);
1882 			}
1883 			smp->b_cont = sctp->sctp_err_chunks;
1884 			sctp_set_iplen(sctp, smp, fp->sf_ixa);
1885 			(void) conn_ip_output(smp, fp->sf_ixa);
1886 			BUMP_LOCAL(sctp->sctp_opkts);
1887 			sctp->sctp_err_chunks = NULL;
1888 			sctp->sctp_err_len = 0;
1889 		}
1890 	}
1891 	smp = sctp_make_mp(sctp, sendto, slen);
1892 	if (smp == NULL) {
1893 		SCTP_KSTAT(sctps, sctp_send_sack_failed);
1894 		return (NULL);
1895 	}
1896 	sch = (sctp_chunk_hdr_t *)smp->b_wptr;
1897 
1898 	sctp_fill_sack(sctp, smp->b_wptr, slen);
1899 	smp->b_wptr += slen;
1900 	if (dups != NULL) {
1901 		sc = (sctp_sack_chunk_t *)(sch + 1);
1902 		sc->ssc_numdups = htons(MBLKL(dups) / sizeof (uint32_t));
1903 		sch->sch_len = htons(slen + dups_len);
1904 		smp->b_cont = dups;
1905 	}
1906 
1907 	if (sctp->sctp_err_chunks != NULL) {
1908 		linkb(smp, sctp->sctp_err_chunks);
1909 		sctp->sctp_err_chunks = NULL;
1910 		sctp->sctp_err_len = 0;
1911 	}
1912 	return (smp);
1913 }
1914 
1915 /*
1916  * Check and see if we need to send a SACK chunk.  If it is needed,
1917  * send it out.  Return true if a SACK chunk is sent, false otherwise.
1918  */
1919 boolean_t
1920 sctp_sack(sctp_t *sctp, mblk_t *dups)
1921 {
1922 	mblk_t *smp;
1923 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1924 
1925 	/* If we are shutting down, let send_shutdown() bundle the SACK */
1926 	if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
1927 		sctp_send_shutdown(sctp, 0);
1928 	}
1929 
1930 	ASSERT(sctp->sctp_lastdata != NULL);
1931 
1932 	if ((smp = sctp_make_sack(sctp, sctp->sctp_lastdata, dups)) == NULL) {
1933 		/* The caller of sctp_sack() will not free the dups mblk. */
1934 		if (dups != NULL)
1935 			freeb(dups);
1936 		return (B_FALSE);
1937 	}
1938 	dprint(2, ("sctp_sack: sending to %p %x:%x:%x:%x\n",
1939 	    (void *)sctp->sctp_lastdata,
1940 	    SCTP_PRINTADDR(sctp->sctp_lastdata->sf_faddr)));
1941 
1942 	sctp->sctp_active = LBOLT_FASTPATH64;
1943 
1944 	SCTPS_BUMP_MIB(sctps, sctpOutAck);
1945 
1946 	sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->sf_ixa);
1947 	(void) conn_ip_output(smp, sctp->sctp_lastdata->sf_ixa);
1948 	BUMP_LOCAL(sctp->sctp_opkts);
1949 	return (B_TRUE);
1950 }
1951 
1952 /*
1953  * This is called if we have a message that was partially sent and is
1954  * abandoned. The cum TSN will be the last chunk sent for this message,
1955  * subsequent chunks will be marked ABANDONED. We send a Forward TSN
1956  * chunk in this case with the TSN of the last sent chunk so that the
1957  * peer can clean up its fragment list for this message. This message
1958  * will be removed from the transmit list when the peer sends a SACK
1959  * back.
1960  */
1961 int
1962 sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta)
1963 {
1964 	sctp_data_hdr_t	*dh;
1965 	mblk_t		*nmp;
1966 	mblk_t		*head;
1967 	int32_t		unsent = 0;
1968 	mblk_t		*mp1 = meta->b_cont;
1969 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1970 	sctp_faddr_t	*fp = sctp->sctp_current;
1971 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1972 
1973 	dh = (sctp_data_hdr_t *)mp1->b_rptr;
1974 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, ntohl(dh->sdh_tsn))) {
1975 		sctp_ftsn_set_t	*sets = NULL;
1976 		uint_t		nsets = 0;
1977 		uint32_t	seglen = sizeof (uint32_t);
1978 		boolean_t	ubit = SCTP_DATA_GET_UBIT(dh);
1979 
1980 		while (mp1->b_next != NULL && SCTP_CHUNK_ISSENT(mp1->b_next))
1981 			mp1 = mp1->b_next;
1982 		dh = (sctp_data_hdr_t *)mp1->b_rptr;
1983 		sctp->sctp_adv_pap = ntohl(dh->sdh_tsn);
1984 		if (!ubit &&
1985 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, &seglen)) {
1986 			sctp->sctp_adv_pap = adv_pap;
1987 			return (ENOMEM);
1988 		}
1989 		nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, seglen);
1990 		sctp_free_ftsn_set(sets);
1991 		if (nmp == NULL) {
1992 			sctp->sctp_adv_pap = adv_pap;
1993 			return (ENOMEM);
1994 		}
1995 		head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
1996 		if (head == NULL) {
1997 			sctp->sctp_adv_pap = adv_pap;
1998 			freemsg(nmp);
1999 			SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
2000 			return (ENOMEM);
2001 		}
2002 		SCTP_MSG_SET_ABANDONED(meta);
2003 		sctp_set_iplen(sctp, head, fp->sf_ixa);
2004 		(void) conn_ip_output(head, fp->sf_ixa);
2005 		BUMP_LOCAL(sctp->sctp_opkts);
2006 		if (!fp->sf_timer_running)
2007 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2008 		mp1 = mp1->b_next;
2009 		while (mp1 != NULL) {
2010 			ASSERT(!SCTP_CHUNK_ISSENT(mp1));
2011 			ASSERT(!SCTP_CHUNK_ABANDONED(mp1));
2012 			SCTP_ABANDON_CHUNK(mp1);
2013 			dh = (sctp_data_hdr_t *)mp1->b_rptr;
2014 			unsent += ntohs(dh->sdh_len) - sizeof (*dh);
2015 			mp1 = mp1->b_next;
2016 		}
2017 		ASSERT(sctp->sctp_unsent >= unsent);
2018 		sctp->sctp_unsent -= unsent;
2019 		/*
2020 		 * Update ULP the amount of queued data, which is
2021 		 * sent-unack'ed + unsent.
2022 		 */
2023 		if (!SCTP_IS_DETACHED(sctp))
2024 			SCTP_TXQ_UPDATE(sctp);
2025 		return (0);
2026 	}
2027 	return (-1);
2028 }
2029 
2030 uint32_t
2031 sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked)
2032 {
2033 	mblk_t *ump, *nump, *mp = NULL;
2034 	uint16_t chunklen;
2035 	uint32_t xtsn;
2036 	sctp_faddr_t *fp;
2037 	sctp_data_hdr_t *sdc;
2038 	uint32_t cumack_forward = 0;
2039 	sctp_msg_hdr_t	*mhdr;
2040 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2041 
2042 	ump = sctp->sctp_xmit_head;
2043 
2044 	/*
2045 	 * Free messages only when they're completely acked.
2046 	 */
2047 	while (ump != NULL) {
2048 		mhdr = (sctp_msg_hdr_t *)ump->b_rptr;
2049 		for (mp = ump->b_cont; mp != NULL; mp = mp->b_next) {
2050 			if (SCTP_CHUNK_ABANDONED(mp)) {
2051 				ASSERT(SCTP_IS_MSG_ABANDONED(ump));
2052 				mp = NULL;
2053 				break;
2054 			}
2055 			/*
2056 			 * We check for abandoned message if we are PR-SCTP
2057 			 * aware, if this is not the first chunk in the
2058 			 * message (b_cont) and if the message is marked
2059 			 * abandoned.
2060 			 */
2061 			if (!SCTP_CHUNK_ISSENT(mp)) {
2062 				if (sctp->sctp_prsctp_aware &&
2063 				    mp != ump->b_cont &&
2064 				    (SCTP_IS_MSG_ABANDONED(ump) ||
2065 				    SCTP_MSG_TO_BE_ABANDONED(ump, mhdr,
2066 				    sctp))) {
2067 					(void) sctp_check_abandoned_msg(sctp,
2068 					    ump);
2069 				}
2070 				goto cum_ack_done;
2071 			}
2072 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2073 			xtsn = ntohl(sdc->sdh_tsn);
2074 			if (SEQ_GEQ(sctp->sctp_lastack_rxd, xtsn))
2075 				continue;
2076 			if (SEQ_GEQ(tsn, xtsn)) {
2077 				fp = SCTP_CHUNK_DEST(mp);
2078 				chunklen = ntohs(sdc->sdh_len);
2079 
2080 				if (sctp->sctp_out_time != 0 &&
2081 				    xtsn == sctp->sctp_rtt_tsn) {
2082 					/* Got a new RTT measurement */
2083 					sctp_update_rtt(sctp, fp,
2084 					    ddi_get_lbolt64() -
2085 					    sctp->sctp_out_time);
2086 					sctp->sctp_out_time = 0;
2087 				}
2088 				if (SCTP_CHUNK_ISACKED(mp))
2089 					continue;
2090 				SCTP_CHUNK_SET_SACKCNT(mp, 0);
2091 				SCTP_CHUNK_ACKED(mp);
2092 				ASSERT(fp->sf_suna >= chunklen);
2093 				fp->sf_suna -= chunklen;
2094 				fp->sf_acked += chunklen;
2095 				cumack_forward += chunklen;
2096 				ASSERT(sctp->sctp_unacked >=
2097 				    (chunklen - sizeof (*sdc)));
2098 				sctp->sctp_unacked -=
2099 				    (chunklen - sizeof (*sdc));
2100 				if (fp->sf_suna == 0) {
2101 					/* all outstanding data acked */
2102 					fp->sf_pba = 0;
2103 					SCTP_FADDR_TIMER_STOP(fp);
2104 				} else {
2105 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2106 					    fp->sf_rto);
2107 				}
2108 			} else {
2109 				goto cum_ack_done;
2110 			}
2111 		}
2112 		nump = ump->b_next;
2113 		if (nump != NULL)
2114 			nump->b_prev = NULL;
2115 		if (ump == sctp->sctp_xmit_tail)
2116 			sctp->sctp_xmit_tail = nump;
2117 		if (SCTP_IS_MSG_ABANDONED(ump)) {
2118 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
2119 			ump->b_next = NULL;
2120 			sctp_sendfail_event(sctp, ump, 0, B_TRUE);
2121 		} else {
2122 			sctp_free_msg(ump);
2123 		}
2124 		sctp->sctp_xmit_head = ump = nump;
2125 	}
2126 cum_ack_done:
2127 	*first_unacked = mp;
2128 	if (cumack_forward > 0) {
2129 		SCTPS_BUMP_MIB(sctps, sctpInAck);
2130 		if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) {
2131 			sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd;
2132 		}
2133 
2134 		/*
2135 		 * Update ULP the amount of queued data, which is
2136 		 * sent-unack'ed + unsent.
2137 		 */
2138 		if (!SCTP_IS_DETACHED(sctp))
2139 			SCTP_TXQ_UPDATE(sctp);
2140 
2141 		/* Time to send a shutdown? */
2142 		if (sctp->sctp_state == SCTPS_SHUTDOWN_PENDING) {
2143 			sctp_send_shutdown(sctp, 0);
2144 		}
2145 		sctp->sctp_xmit_unacked = mp;
2146 	} else {
2147 		/* dup ack */
2148 		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2149 	}
2150 	sctp->sctp_lastack_rxd = tsn;
2151 	if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd))
2152 		sctp->sctp_adv_pap = sctp->sctp_lastack_rxd;
2153 	ASSERT(sctp->sctp_xmit_head || sctp->sctp_unacked == 0);
2154 
2155 	return (cumack_forward);
2156 }
2157 
2158 static int
2159 sctp_set_frwnd(sctp_t *sctp, uint32_t frwnd)
2160 {
2161 	uint32_t orwnd;
2162 
2163 	if (sctp->sctp_unacked > frwnd) {
2164 		sctp->sctp_frwnd = 0;
2165 		return (0);
2166 	}
2167 	orwnd = sctp->sctp_frwnd;
2168 	sctp->sctp_frwnd = frwnd - sctp->sctp_unacked;
2169 	if (orwnd < sctp->sctp_frwnd) {
2170 		return (1);
2171 	} else {
2172 		return (0);
2173 	}
2174 }
2175 
2176 /*
2177  * For un-ordered messages.
2178  * Walk the sctp->sctp_uo_frag list and remove any fragments with TSN
2179  * less than/equal to ftsn. Fragments for un-ordered messages are
2180  * strictly in sequence (w.r.t TSN).
2181  */
2182 static int
2183 sctp_ftsn_check_uo_frag(sctp_t *sctp, uint32_t ftsn)
2184 {
2185 	mblk_t		*hmp;
2186 	mblk_t		*hmp_next;
2187 	sctp_data_hdr_t	*dc;
2188 	int		dlen = 0;
2189 
2190 	hmp = sctp->sctp_uo_frags;
2191 	while (hmp != NULL) {
2192 		hmp_next = hmp->b_next;
2193 		dc = (sctp_data_hdr_t *)hmp->b_rptr;
2194 		if (SEQ_GT(ntohl(dc->sdh_tsn), ftsn))
2195 			return (dlen);
2196 		sctp->sctp_uo_frags = hmp_next;
2197 		if (hmp_next != NULL)
2198 			hmp_next->b_prev = NULL;
2199 		hmp->b_next = NULL;
2200 		dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2201 		freeb(hmp);
2202 		hmp = hmp_next;
2203 	}
2204 	return (dlen);
2205 }
2206 
2207 /*
2208  * For ordered messages.
2209  * Check for existing fragments for an sid-ssn pair reported as abandoned,
2210  * hence will not receive, in the Forward TSN. If there are fragments, then
2211  * we just nuke them. If and when Partial Delivery API is supported, we
2212  * would need to send a notification to the upper layer about this.
2213  */
2214 static int
2215 sctp_ftsn_check_frag(sctp_t *sctp, uint16_t ssn, sctp_instr_t *sip)
2216 {
2217 	sctp_reass_t	*srp;
2218 	mblk_t		*hmp;
2219 	mblk_t		*dmp;
2220 	mblk_t		*hmp_next;
2221 	sctp_data_hdr_t	*dc;
2222 	int		dlen = 0;
2223 
2224 	hmp = sip->istr_reass;
2225 	while (hmp != NULL) {
2226 		hmp_next = hmp->b_next;
2227 		srp = (sctp_reass_t *)DB_BASE(hmp);
2228 		if (SSN_GT(srp->sr_ssn, ssn))
2229 			return (dlen);
2230 		/*
2231 		 * If we had sent part of this message up, send a partial
2232 		 * delivery event. Since this is ordered delivery, we should
2233 		 * have sent partial message only for the next in sequence,
2234 		 * hence the ASSERT. See comments in sctp_data_chunk() for
2235 		 * trypartial.
2236 		 */
2237 		if (srp->sr_partial_delivered) {
2238 			if (srp->sr_ssn != sip->nextseq)
2239 				cmn_err(CE_WARN, "sctp partial"
2240 				    " delivery notify, sctp 0x%p"
2241 				    " sip = 0x%p ssn != nextseq"
2242 				    " ssn 0x%x nextseq 0x%x",
2243 				    (void *)sctp, (void *)sip,
2244 				    srp->sr_ssn, sip->nextseq);
2245 			ASSERT(sip->nextseq == srp->sr_ssn);
2246 			sctp_partial_delivery_event(sctp);
2247 		}
2248 		/* Take it out of the reass queue */
2249 		sip->istr_reass = hmp_next;
2250 		if (hmp_next != NULL)
2251 			hmp_next->b_prev = NULL;
2252 		hmp->b_next = NULL;
2253 		ASSERT(hmp->b_prev == NULL);
2254 		dmp = hmp;
2255 		ASSERT(DB_TYPE(hmp) == M_CTL);
2256 		dmp = hmp->b_cont;
2257 		hmp->b_cont = NULL;
2258 		freeb(hmp);
2259 		hmp = dmp;
2260 		while (dmp != NULL) {
2261 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2262 			dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2263 			dmp = dmp->b_cont;
2264 		}
2265 		freemsg(hmp);
2266 		hmp = hmp_next;
2267 	}
2268 	return (dlen);
2269 }
2270 
2271 /*
2272  * Update sctp_ftsn to the cumulative TSN from the Forward TSN chunk. Remove
2273  * any SACK gaps less than the newly updated sctp_ftsn. Walk through the
2274  * sid-ssn pair in the Forward TSN and for each, clean the fragment list
2275  * for this pair, if needed, and check if we can deliver subsequent
2276  * messages, if any, from the instream queue (that were waiting for this
2277  * sid-ssn message to show up). Once we are done try to update the SACK
2278  * info. We could get a duplicate Forward TSN, in which case just send
2279  * a SACK. If any of the sid values in the Forward TSN is invalid,
2280  * send back an "Invalid Stream Identifier" error and continue processing
2281  * the rest.
2282  */
2283 static void
2284 sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
2285     ip_pkt_t *ipp, ip_recv_attr_t *ira)
2286 {
2287 	uint32_t	*ftsn = (uint32_t *)(ch + 1);
2288 	ftsn_entry_t	*ftsn_entry;
2289 	sctp_instr_t	*instr;
2290 	boolean_t	can_deliver = B_TRUE;
2291 	size_t		dlen;
2292 	int		flen;
2293 	mblk_t		*dmp;
2294 	mblk_t		*pmp;
2295 	sctp_data_hdr_t	*dc;
2296 	ssize_t		remaining;
2297 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2298 
2299 	*ftsn = ntohl(*ftsn);
2300 	remaining =  ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn);
2301 
2302 	if (SCTP_IS_DETACHED(sctp)) {
2303 		SCTPS_BUMP_MIB(sctps, sctpInClosed);
2304 		can_deliver = B_FALSE;
2305 	}
2306 	/*
2307 	 * un-ordered messages don't have SID-SSN pair entries, we check
2308 	 * for any fragments (for un-ordered message) to be discarded using
2309 	 * the cumulative FTSN.
2310 	 */
2311 	flen = sctp_ftsn_check_uo_frag(sctp, *ftsn);
2312 	if (flen > 0) {
2313 		ASSERT(sctp->sctp_rxqueued >= flen);
2314 		sctp->sctp_rxqueued -= flen;
2315 	}
2316 	ftsn_entry = (ftsn_entry_t *)(ftsn + 1);
2317 	while (remaining >= sizeof (*ftsn_entry)) {
2318 		ftsn_entry->ftsn_sid = ntohs(ftsn_entry->ftsn_sid);
2319 		ftsn_entry->ftsn_ssn = ntohs(ftsn_entry->ftsn_ssn);
2320 		if (ftsn_entry->ftsn_sid >= sctp->sctp_num_istr) {
2321 			sctp_bsc_t	inval_parm;
2322 
2323 			/* Will populate the CAUSE block in the ERROR chunk. */
2324 			inval_parm.bsc_sid = htons(ftsn_entry->ftsn_sid);
2325 			/* RESERVED, ignored at the receiving end */
2326 			inval_parm.bsc_pad = 0;
2327 
2328 			sctp_add_err(sctp, SCTP_ERR_BAD_SID,
2329 			    (void *)&inval_parm, sizeof (sctp_bsc_t), fp);
2330 			ftsn_entry++;
2331 			remaining -= sizeof (*ftsn_entry);
2332 			continue;
2333 		}
2334 		instr = &sctp->sctp_instr[ftsn_entry->ftsn_sid];
2335 		flen = sctp_ftsn_check_frag(sctp, ftsn_entry->ftsn_ssn, instr);
2336 		/* Indicates frags were nuked, update rxqueued */
2337 		if (flen > 0) {
2338 			ASSERT(sctp->sctp_rxqueued >= flen);
2339 			sctp->sctp_rxqueued -= flen;
2340 		}
2341 		/*
2342 		 * It is possible to receive an FTSN chunk with SSN smaller
2343 		 * than then nextseq if this chunk is a retransmission because
2344 		 * of incomplete processing when it was first processed.
2345 		 */
2346 		if (SSN_GE(ftsn_entry->ftsn_ssn, instr->nextseq))
2347 			instr->nextseq = ftsn_entry->ftsn_ssn + 1;
2348 		while (instr->istr_nmsgs > 0) {
2349 			mblk_t	*next;
2350 
2351 			dmp = (mblk_t *)instr->istr_msgs;
2352 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2353 			if (ntohs(dc->sdh_ssn) != instr->nextseq)
2354 				break;
2355 
2356 			next = dmp->b_next;
2357 			dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
2358 			for (pmp = dmp->b_cont; pmp != NULL;
2359 			    pmp = pmp->b_cont) {
2360 				dlen += MBLKL(pmp);
2361 			}
2362 			if (can_deliver) {
2363 				int error;
2364 
2365 				dmp->b_rptr = (uchar_t *)(dc + 1);
2366 				dmp->b_next = NULL;
2367 				ASSERT(dmp->b_prev == NULL);
2368 				if (sctp_input_add_ancillary(sctp,
2369 				    &dmp, dc, fp, ipp, ira) == 0) {
2370 					sctp->sctp_rxqueued -= dlen;
2371 					/*
2372 					 * Override b_flag for SCTP sockfs
2373 					 * internal use
2374 					 */
2375 
2376 					dmp->b_flag = 0;
2377 					if (sctp->sctp_flowctrld) {
2378 						sctp->sctp_rwnd -= dlen;
2379 						if (sctp->sctp_rwnd < 0)
2380 							sctp->sctp_rwnd = 0;
2381 					}
2382 					if (sctp->sctp_ulp_recv(
2383 					    sctp->sctp_ulpd, dmp, msgdsize(dmp),
2384 					    0, &error, NULL) <= 0) {
2385 						sctp->sctp_flowctrld = B_TRUE;
2386 					}
2387 				} else {
2388 					/*
2389 					 * We will resume processing when
2390 					 * the FTSN chunk is re-xmitted.
2391 					 */
2392 					dmp->b_rptr = (uchar_t *)dc;
2393 					dmp->b_next = next;
2394 					dprint(0,
2395 					    ("FTSN dequeuing %u failed\n",
2396 					    ntohs(dc->sdh_ssn)));
2397 					return;
2398 				}
2399 			} else {
2400 				sctp->sctp_rxqueued -= dlen;
2401 				ASSERT(dmp->b_prev == NULL);
2402 				dmp->b_next = NULL;
2403 				freemsg(dmp);
2404 			}
2405 			instr->istr_nmsgs--;
2406 			instr->nextseq++;
2407 			sctp->sctp_istr_nmsgs--;
2408 			if (next != NULL)
2409 				next->b_prev = NULL;
2410 			instr->istr_msgs = next;
2411 		}
2412 		ftsn_entry++;
2413 		remaining -= sizeof (*ftsn_entry);
2414 	}
2415 	/* Duplicate FTSN */
2416 	if (*ftsn <= (sctp->sctp_ftsn - 1)) {
2417 		sctp->sctp_force_sack = 1;
2418 		return;
2419 	}
2420 	/* Advance cum TSN to that reported in the Forward TSN chunk */
2421 	sctp->sctp_ftsn = *ftsn + 1;
2422 
2423 	/* Remove all the SACK gaps before the new cum TSN */
2424 	if (sctp->sctp_sack_info != NULL) {
2425 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2426 		    &sctp->sctp_sack_gaps);
2427 	}
2428 	/*
2429 	 * If there are gap reports pending, check if advancing
2430 	 * the ftsn here closes a gap. If so, we can advance
2431 	 * ftsn to the end of the set.
2432 	 * If ftsn has moved forward, maybe we can remove gap reports.
2433 	 */
2434 	if (sctp->sctp_sack_info != NULL &&
2435 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
2436 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
2437 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2438 		    &sctp->sctp_sack_gaps);
2439 	}
2440 }
2441 
2442 /*
2443  * When we have processed a SACK we check to see if we can advance the
2444  * cumulative TSN if there are abandoned chunks immediately following
2445  * the updated cumulative TSN. If there are, we attempt to send a
2446  * Forward TSN chunk.
2447  */
2448 static void
2449 sctp_check_abandoned_data(sctp_t *sctp, sctp_faddr_t *fp)
2450 {
2451 	mblk_t		*meta = sctp->sctp_xmit_head;
2452 	mblk_t		*mp;
2453 	mblk_t		*nmp;
2454 	uint32_t	seglen;
2455 	uint32_t	adv_pap = sctp->sctp_adv_pap;
2456 
2457 	/*
2458 	 * We only check in the first meta since otherwise we can't
2459 	 * advance the cumulative ack point. We just look for chunks
2460 	 * marked for retransmission, else we might prematurely
2461 	 * send an FTSN for a sent, but unacked, chunk.
2462 	 */
2463 	for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2464 		if (!SCTP_CHUNK_ISSENT(mp))
2465 			return;
2466 		if (SCTP_CHUNK_WANT_REXMIT(mp))
2467 			break;
2468 	}
2469 	if (mp == NULL)
2470 		return;
2471 	sctp_check_adv_ack_pt(sctp, meta, mp);
2472 	if (SEQ_GT(sctp->sctp_adv_pap, adv_pap)) {
2473 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
2474 		if (nmp == NULL) {
2475 			sctp->sctp_adv_pap = adv_pap;
2476 			if (!fp->sf_timer_running)
2477 				SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2478 			return;
2479 		}
2480 		sctp_set_iplen(sctp, nmp, fp->sf_ixa);
2481 		(void) conn_ip_output(nmp, fp->sf_ixa);
2482 		BUMP_LOCAL(sctp->sctp_opkts);
2483 		if (!fp->sf_timer_running)
2484 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2485 	}
2486 }
2487 
2488 /*
2489  * The processing here follows the same logic in sctp_got_sack(), the reason
2490  * we do this separately is because, usually, gap blocks are ordered and
2491  * we can process it in sctp_got_sack(). However if they aren't we would
2492  * need to do some additional non-optimal stuff when we start processing the
2493  * unordered gaps. To that effect sctp_got_sack() does the processing in the
2494  * simple case and this does the same in the more involved case.
2495  */
2496 static uint32_t
2497 sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf,
2498     int num_gaps, mblk_t *umphead, mblk_t *mphead, int *trysend,
2499     boolean_t *fast_recovery, uint32_t fr_xtsn)
2500 {
2501 	uint32_t		xtsn;
2502 	uint32_t		gapstart = 0;
2503 	uint32_t		gapend = 0;
2504 	int			gapcnt;
2505 	uint16_t		chunklen;
2506 	sctp_data_hdr_t		*sdc;
2507 	int			gstart;
2508 	mblk_t			*ump = umphead;
2509 	mblk_t			*mp = mphead;
2510 	sctp_faddr_t		*fp;
2511 	uint32_t		acked = 0;
2512 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2513 
2514 	/*
2515 	 * gstart tracks the last (in the order of TSN) gapstart that
2516 	 * we process in this SACK gaps walk.
2517 	 */
2518 	gstart = ctsn;
2519 
2520 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2521 	xtsn = ntohl(sdc->sdh_tsn);
2522 	for (gapcnt = 0; gapcnt < num_gaps; gapcnt++, ssf++) {
2523 		if (gapstart != 0) {
2524 			/*
2525 			 * If we have reached the end of the transmit list or
2526 			 * hit an unsent chunk or encountered an unordered gap
2527 			 * block start from the ctsn again.
2528 			 */
2529 			if (ump == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2530 			    SEQ_LT(ctsn + ntohs(ssf->ssf_start), xtsn)) {
2531 				ump = umphead;
2532 				mp = mphead;
2533 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
2534 				xtsn = ntohl(sdc->sdh_tsn);
2535 			}
2536 		}
2537 
2538 		gapstart = ctsn + ntohs(ssf->ssf_start);
2539 		gapend = ctsn + ntohs(ssf->ssf_end);
2540 
2541 		/*
2542 		 * Sanity checks:
2543 		 *
2544 		 * 1. SACK for TSN we have not sent - ABORT
2545 		 * 2. Invalid or spurious gaps, ignore all gaps
2546 		 */
2547 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2548 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2549 			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2550 			*trysend = -1;
2551 			return (acked);
2552 		} else if (SEQ_LT(gapend, gapstart) ||
2553 		    SEQ_LEQ(gapstart, ctsn)) {
2554 			break;
2555 		}
2556 		/*
2557 		 * The xtsn can be the TSN processed for the last gap
2558 		 * (gapend) or it could be the cumulative TSN. We continue
2559 		 * with the last xtsn as long as the gaps are ordered, when
2560 		 * we hit an unordered gap, we re-start from the cumulative
2561 		 * TSN. For the first gap it is always the cumulative TSN.
2562 		 */
2563 		while (xtsn != gapstart) {
2564 			/*
2565 			 * We can't reliably check for reneged chunks
2566 			 * when walking the unordered list, so we don't.
2567 			 * In case the peer reneges then we will end up
2568 			 * sending the reneged chunk via timeout.
2569 			 */
2570 			mp = mp->b_next;
2571 			if (mp == NULL) {
2572 				ump = ump->b_next;
2573 				/*
2574 				 * ump can't be NULL because of the sanity
2575 				 * check above.
2576 				 */
2577 				ASSERT(ump != NULL);
2578 				mp = ump->b_cont;
2579 			}
2580 			/*
2581 			 * mp can't be unsent because of the sanity check
2582 			 * above.
2583 			 */
2584 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2585 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2586 			xtsn = ntohl(sdc->sdh_tsn);
2587 		}
2588 		/*
2589 		 * Now that we have found the chunk with TSN == 'gapstart',
2590 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2591 		 * All intermediate chunks will be marked ACKED, if they
2592 		 * haven't already been.
2593 		 */
2594 		while (SEQ_LEQ(xtsn, gapend)) {
2595 			/*
2596 			 * SACKed
2597 			 */
2598 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2599 			if (!SCTP_CHUNK_ISACKED(mp)) {
2600 				SCTP_CHUNK_ACKED(mp);
2601 
2602 				fp = SCTP_CHUNK_DEST(mp);
2603 				chunklen = ntohs(sdc->sdh_len);
2604 				ASSERT(fp->sf_suna >= chunklen);
2605 				fp->sf_suna -= chunklen;
2606 				if (fp->sf_suna == 0) {
2607 					/* All outstanding data acked. */
2608 					fp->sf_pba = 0;
2609 					SCTP_FADDR_TIMER_STOP(fp);
2610 				}
2611 				fp->sf_acked += chunklen;
2612 				acked += chunklen;
2613 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
2614 				ASSERT(sctp->sctp_unacked >= 0);
2615 			}
2616 			/*
2617 			 * Move to the next message in the transmit list
2618 			 * if we are done with all the chunks from the current
2619 			 * message. Note, it is possible to hit the end of the
2620 			 * transmit list here, i.e. if we have already completed
2621 			 * processing the gap block.
2622 			 */
2623 			mp = mp->b_next;
2624 			if (mp == NULL) {
2625 				ump = ump->b_next;
2626 				if (ump == NULL) {
2627 					ASSERT(xtsn == gapend);
2628 					break;
2629 				}
2630 				mp = ump->b_cont;
2631 			}
2632 			/*
2633 			 * Likewise, we can hit an unsent chunk once we have
2634 			 * completed processing the gap block.
2635 			 */
2636 			if (!SCTP_CHUNK_ISSENT(mp)) {
2637 				ASSERT(xtsn == gapend);
2638 				break;
2639 			}
2640 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2641 			xtsn = ntohl(sdc->sdh_tsn);
2642 		}
2643 		/*
2644 		 * We keep track of the last gap we successfully processed
2645 		 * so that we can terminate the walk below for incrementing
2646 		 * the SACK count.
2647 		 */
2648 		if (SEQ_LT(gstart, gapstart))
2649 			gstart = gapstart;
2650 	}
2651 	/*
2652 	 * Check if have incremented the SACK count for all unacked TSNs in
2653 	 * sctp_got_sack(), if so we are done.
2654 	 */
2655 	if (SEQ_LEQ(gstart, fr_xtsn))
2656 		return (acked);
2657 
2658 	ump = umphead;
2659 	mp = mphead;
2660 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2661 	xtsn = ntohl(sdc->sdh_tsn);
2662 	while (SEQ_LT(xtsn, gstart)) {
2663 		/*
2664 		 * We have incremented SACK count for TSNs less than fr_tsn
2665 		 * in sctp_got_sack(), so don't increment them again here.
2666 		 */
2667 		if (SEQ_GT(xtsn, fr_xtsn) && !SCTP_CHUNK_ISACKED(mp)) {
2668 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2669 			if (SCTP_CHUNK_SACKCNT(mp) ==
2670 			    sctps->sctps_fast_rxt_thresh) {
2671 				SCTP_CHUNK_REXMIT(sctp, mp);
2672 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2673 				*trysend = 1;
2674 				if (!*fast_recovery) {
2675 					/*
2676 					 * Entering fast recovery.
2677 					 */
2678 					fp = SCTP_CHUNK_DEST(mp);
2679 					fp->sf_ssthresh = fp->sf_cwnd / 2;
2680 					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2681 						fp->sf_ssthresh =
2682 						    2 * fp->sf_pmss;
2683 					}
2684 					fp->sf_cwnd = fp->sf_ssthresh;
2685 					fp->sf_pba = 0;
2686 					sctp->sctp_recovery_tsn =
2687 					    sctp->sctp_ltsn - 1;
2688 					*fast_recovery = B_TRUE;
2689 				}
2690 			}
2691 		}
2692 		mp = mp->b_next;
2693 		if (mp == NULL) {
2694 			ump = ump->b_next;
2695 			/* We can't get to the end of the transmit list here */
2696 			ASSERT(ump != NULL);
2697 			mp = ump->b_cont;
2698 		}
2699 		/* We can't hit an unsent chunk here */
2700 		ASSERT(SCTP_CHUNK_ISSENT(mp));
2701 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
2702 		xtsn = ntohl(sdc->sdh_tsn);
2703 	}
2704 	return (acked);
2705 }
2706 
2707 static int
2708 sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch)
2709 {
2710 	sctp_sack_chunk_t	*sc;
2711 	sctp_data_hdr_t		*sdc;
2712 	sctp_sack_frag_t	*ssf;
2713 	mblk_t			*ump;
2714 	mblk_t			*mp;
2715 	mblk_t			*mp1;
2716 	uint32_t		cumtsn;
2717 	uint32_t		xtsn;
2718 	uint32_t		gapstart = 0;
2719 	uint32_t		gapend = 0;
2720 	uint32_t		acked = 0;
2721 	uint16_t		chunklen;
2722 	sctp_faddr_t		*fp;
2723 	int			num_gaps;
2724 	int			trysend = 0;
2725 	int			i;
2726 	boolean_t		fast_recovery = B_FALSE;
2727 	boolean_t		cumack_forward = B_FALSE;
2728 	boolean_t		fwd_tsn = B_FALSE;
2729 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2730 
2731 	BUMP_LOCAL(sctp->sctp_ibchunks);
2732 	BUMP_LOCAL(sctp->sctp_isacks);
2733 	chunklen = ntohs(sch->sch_len);
2734 	if (chunklen < (sizeof (*sch) + sizeof (*sc)))
2735 		return (0);
2736 
2737 	sc = (sctp_sack_chunk_t *)(sch + 1);
2738 	cumtsn = ntohl(sc->ssc_cumtsn);
2739 
2740 	dprint(2, ("got sack cumtsn %x -> %x\n", sctp->sctp_lastack_rxd,
2741 	    cumtsn));
2742 
2743 	/* out of order */
2744 	if (SEQ_LT(cumtsn, sctp->sctp_lastack_rxd))
2745 		return (0);
2746 
2747 	if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) {
2748 		SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2749 		/* Send an ABORT */
2750 		return (-1);
2751 	}
2752 
2753 	/*
2754 	 * Cwnd only done when not in fast recovery mode.
2755 	 */
2756 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn))
2757 		fast_recovery = B_TRUE;
2758 
2759 	/*
2760 	 * .. and if the cum TSN is not moving ahead on account Forward TSN
2761 	 */
2762 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_adv_pap))
2763 		fwd_tsn = B_TRUE;
2764 
2765 	if (cumtsn == sctp->sctp_lastack_rxd &&
2766 	    (sctp->sctp_xmit_unacked == NULL ||
2767 	    !SCTP_CHUNK_ABANDONED(sctp->sctp_xmit_unacked))) {
2768 		if (sctp->sctp_xmit_unacked != NULL)
2769 			mp = sctp->sctp_xmit_unacked;
2770 		else if (sctp->sctp_xmit_head != NULL)
2771 			mp = sctp->sctp_xmit_head->b_cont;
2772 		else
2773 			mp = NULL;
2774 		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2775 		/*
2776 		 * If we were doing a zero win probe and the win
2777 		 * has now opened to at least MSS, re-transmit the
2778 		 * zero win probe via sctp_rexmit_packet().
2779 		 */
2780 		if (mp != NULL && sctp->sctp_zero_win_probe &&
2781 		    ntohl(sc->ssc_a_rwnd) >= sctp->sctp_current->sf_pmss) {
2782 			mblk_t	*pkt;
2783 			uint_t	pkt_len;
2784 			mblk_t	*mp1 = mp;
2785 			mblk_t	*meta = sctp->sctp_xmit_head;
2786 
2787 			/*
2788 			 * Reset the RTO since we have been backing-off
2789 			 * to send the ZWP.
2790 			 */
2791 			fp = sctp->sctp_current;
2792 			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2793 			SCTP_MAX_RTO(sctp, fp);
2794 			/* Resend the ZWP */
2795 			pkt = sctp_rexmit_packet(sctp, &meta, &mp1, fp,
2796 			    &pkt_len);
2797 			if (pkt == NULL) {
2798 				SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2799 				return (0);
2800 			}
2801 			ASSERT(pkt_len <= fp->sf_pmss);
2802 			sctp->sctp_zero_win_probe = B_FALSE;
2803 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2804 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2805 			sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2806 			(void) conn_ip_output(pkt, fp->sf_ixa);
2807 			BUMP_LOCAL(sctp->sctp_opkts);
2808 		}
2809 	} else {
2810 		if (sctp->sctp_zero_win_probe) {
2811 			/*
2812 			 * Reset the RTO since we have been backing-off
2813 			 * to send the ZWP.
2814 			 */
2815 			fp = sctp->sctp_current;
2816 			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2817 			SCTP_MAX_RTO(sctp, fp);
2818 			sctp->sctp_zero_win_probe = B_FALSE;
2819 			/* This is probably not required */
2820 			if (!sctp->sctp_rexmitting) {
2821 				sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2822 				sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2823 			}
2824 		}
2825 		acked = sctp_cumack(sctp, cumtsn, &mp);
2826 		sctp->sctp_xmit_unacked = mp;
2827 		if (acked > 0) {
2828 			trysend = 1;
2829 			cumack_forward = B_TRUE;
2830 			if (fwd_tsn && SEQ_GEQ(sctp->sctp_lastack_rxd,
2831 			    sctp->sctp_adv_pap)) {
2832 				cumack_forward = B_FALSE;
2833 			}
2834 		}
2835 	}
2836 	num_gaps = ntohs(sc->ssc_numfrags);
2837 	UPDATE_LOCAL(sctp->sctp_gapcnt, num_gaps);
2838 	if (num_gaps == 0 || mp == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2839 	    chunklen < (sizeof (*sch) + sizeof (*sc) +
2840 	    num_gaps * sizeof (*ssf))) {
2841 		goto ret;
2842 	}
2843 #ifdef	DEBUG
2844 	/*
2845 	 * Since we delete any message that has been acked completely,
2846 	 * the unacked chunk must belong to sctp_xmit_head (as
2847 	 * we don't have a back pointer from the mp to the meta data
2848 	 * we do this).
2849 	 */
2850 	{
2851 		mblk_t	*mp2 = sctp->sctp_xmit_head->b_cont;
2852 
2853 		while (mp2 != NULL) {
2854 			if (mp2 == mp)
2855 				break;
2856 			mp2 = mp2->b_next;
2857 		}
2858 		ASSERT(mp2 != NULL);
2859 	}
2860 #endif
2861 	ump = sctp->sctp_xmit_head;
2862 
2863 	/*
2864 	 * Just remember where we started from, in case we need to call
2865 	 * sctp_process_uo_gaps() if the gap blocks are unordered.
2866 	 */
2867 	mp1 = mp;
2868 
2869 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2870 	xtsn = ntohl(sdc->sdh_tsn);
2871 	ASSERT(xtsn == cumtsn + 1);
2872 
2873 	/*
2874 	 * Go through SACK gaps. They are ordered based on start TSN.
2875 	 */
2876 	ssf = (sctp_sack_frag_t *)(sc + 1);
2877 	for (i = 0; i < num_gaps; i++, ssf++) {
2878 		if (gapstart != 0) {
2879 			/* check for unordered gap */
2880 			if (SEQ_LEQ(cumtsn + ntohs(ssf->ssf_start), gapstart)) {
2881 				acked += sctp_process_uo_gaps(sctp,
2882 				    cumtsn, ssf, num_gaps - i,
2883 				    sctp->sctp_xmit_head, mp1,
2884 				    &trysend, &fast_recovery, gapstart);
2885 				if (trysend < 0) {
2886 					SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2887 					return (-1);
2888 				}
2889 				break;
2890 			}
2891 		}
2892 		gapstart = cumtsn + ntohs(ssf->ssf_start);
2893 		gapend = cumtsn + ntohs(ssf->ssf_end);
2894 
2895 		/*
2896 		 * Sanity checks:
2897 		 *
2898 		 * 1. SACK for TSN we have not sent - ABORT
2899 		 * 2. Invalid or spurious gaps, ignore all gaps
2900 		 */
2901 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2902 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2903 			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2904 			return (-1);
2905 		} else if (SEQ_LT(gapend, gapstart) ||
2906 		    SEQ_LEQ(gapstart, cumtsn)) {
2907 			break;
2908 		}
2909 		/*
2910 		 * Let's start at the current TSN (for the 1st gap we start
2911 		 * from the cumulative TSN, for subsequent ones we start from
2912 		 * where the previous gapend was found - second while loop
2913 		 * below) and walk the transmit list till we find the TSN
2914 		 * corresponding to gapstart. All the unacked chunks till we
2915 		 * get to the chunk with TSN == gapstart will have their
2916 		 * SACKCNT incremented by 1. Note since the gap blocks are
2917 		 * ordered, we won't be incrementing the SACKCNT for an
2918 		 * unacked chunk by more than one while processing the gap
2919 		 * blocks. If the SACKCNT for any unacked chunk exceeds
2920 		 * the fast retransmit threshold, we will fast retransmit
2921 		 * after processing all the gap blocks.
2922 		 */
2923 		ASSERT(SEQ_LEQ(xtsn, gapstart));
2924 		while (xtsn != gapstart) {
2925 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2926 			if (SCTP_CHUNK_SACKCNT(mp) ==
2927 			    sctps->sctps_fast_rxt_thresh) {
2928 				SCTP_CHUNK_REXMIT(sctp, mp);
2929 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2930 				trysend = 1;
2931 				if (!fast_recovery) {
2932 					/*
2933 					 * Entering fast recovery.
2934 					 */
2935 					fp = SCTP_CHUNK_DEST(mp);
2936 					fp->sf_ssthresh = fp->sf_cwnd / 2;
2937 					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2938 						fp->sf_ssthresh =
2939 						    2 * fp->sf_pmss;
2940 					}
2941 					fp->sf_cwnd = fp->sf_ssthresh;
2942 					fp->sf_pba = 0;
2943 					sctp->sctp_recovery_tsn =
2944 					    sctp->sctp_ltsn - 1;
2945 					fast_recovery = B_TRUE;
2946 				}
2947 			}
2948 
2949 			/*
2950 			 * Peer may have reneged on this chunk, so un-sack
2951 			 * it now. If the peer did renege, we need to
2952 			 * readjust unacked.
2953 			 */
2954 			if (SCTP_CHUNK_ISACKED(mp)) {
2955 				chunklen = ntohs(sdc->sdh_len);
2956 				fp = SCTP_CHUNK_DEST(mp);
2957 				fp->sf_suna += chunklen;
2958 				sctp->sctp_unacked += chunklen - sizeof (*sdc);
2959 				SCTP_CHUNK_CLEAR_ACKED(sctp, mp);
2960 				if (!fp->sf_timer_running) {
2961 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2962 					    fp->sf_rto);
2963 				}
2964 			}
2965 
2966 			mp = mp->b_next;
2967 			if (mp == NULL) {
2968 				ump = ump->b_next;
2969 				/*
2970 				 * ump can't be NULL given the sanity check
2971 				 * above.  But if it is NULL, it means that
2972 				 * there is a data corruption.  We'd better
2973 				 * panic.
2974 				 */
2975 				if (ump == NULL) {
2976 					panic("Memory corruption detected: gap "
2977 					    "start TSN 0x%x missing from the "
2978 					    "xmit list: %p", gapstart,
2979 					    (void *)sctp);
2980 				}
2981 				mp = ump->b_cont;
2982 			}
2983 			/*
2984 			 * mp can't be unsent given the sanity check above.
2985 			 */
2986 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2987 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2988 			xtsn = ntohl(sdc->sdh_tsn);
2989 		}
2990 		/*
2991 		 * Now that we have found the chunk with TSN == 'gapstart',
2992 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2993 		 * All intermediate chunks will be marked ACKED, if they
2994 		 * haven't already been.
2995 		 */
2996 		while (SEQ_LEQ(xtsn, gapend)) {
2997 			/*
2998 			 * SACKed
2999 			 */
3000 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
3001 			if (!SCTP_CHUNK_ISACKED(mp)) {
3002 				SCTP_CHUNK_ACKED(mp);
3003 
3004 				fp = SCTP_CHUNK_DEST(mp);
3005 				chunklen = ntohs(sdc->sdh_len);
3006 				ASSERT(fp->sf_suna >= chunklen);
3007 				fp->sf_suna -= chunklen;
3008 				if (fp->sf_suna == 0) {
3009 					/* All outstanding data acked. */
3010 					fp->sf_pba = 0;
3011 					SCTP_FADDR_TIMER_STOP(fp);
3012 				}
3013 				fp->sf_acked += chunklen;
3014 				acked += chunklen;
3015 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
3016 				ASSERT(sctp->sctp_unacked >= 0);
3017 			}
3018 			/* Go to the next chunk of the current message */
3019 			mp = mp->b_next;
3020 			/*
3021 			 * Move to the next message in the transmit list
3022 			 * if we are done with all the chunks from the current
3023 			 * message. Note, it is possible to hit the end of the
3024 			 * transmit list here, i.e. if we have already completed
3025 			 * processing the gap block.  But the TSN must be equal
3026 			 * to the gapend because of the above sanity check.
3027 			 * If it is not equal, it means that some data is
3028 			 * missing.
3029 			 * Also, note that we break here, which means we
3030 			 * continue processing gap blocks, if any. In case of
3031 			 * ordered gap blocks there can't be any following
3032 			 * this (if there is it will fail the sanity check
3033 			 * above). In case of un-ordered gap blocks we will
3034 			 * switch to sctp_process_uo_gaps().  In either case
3035 			 * it should be fine to continue with NULL ump/mp,
3036 			 * but we just reset it to xmit_head.
3037 			 */
3038 			if (mp == NULL) {
3039 				ump = ump->b_next;
3040 				if (ump == NULL) {
3041 					if (xtsn != gapend) {
3042 						panic("Memory corruption "
3043 						    "detected: gap end TSN "
3044 						    "0x%x missing from the "
3045 						    "xmit list: %p", gapend,
3046 						    (void *)sctp);
3047 					}
3048 					ump = sctp->sctp_xmit_head;
3049 					mp = mp1;
3050 					sdc = (sctp_data_hdr_t *)mp->b_rptr;
3051 					xtsn = ntohl(sdc->sdh_tsn);
3052 					break;
3053 				}
3054 				mp = ump->b_cont;
3055 			}
3056 			/*
3057 			 * Likewise, we could hit an unsent chunk once we have
3058 			 * completed processing the gap block. Again, it is
3059 			 * fine to continue processing gap blocks with mp
3060 			 * pointing to the unsent chunk, because if there
3061 			 * are more ordered gap blocks, they will fail the
3062 			 * sanity check, and if there are un-ordered gap blocks,
3063 			 * we will continue processing in sctp_process_uo_gaps()
3064 			 * We just reset the mp to the one we started with.
3065 			 */
3066 			if (!SCTP_CHUNK_ISSENT(mp)) {
3067 				ASSERT(xtsn == gapend);
3068 				ump = sctp->sctp_xmit_head;
3069 				mp = mp1;
3070 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
3071 				xtsn = ntohl(sdc->sdh_tsn);
3072 				break;
3073 			}
3074 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
3075 			xtsn = ntohl(sdc->sdh_tsn);
3076 		}
3077 	}
3078 	if (sctp->sctp_prsctp_aware)
3079 		sctp_check_abandoned_data(sctp, sctp->sctp_current);
3080 	if (sctp->sctp_chk_fast_rexmit)
3081 		sctp_fast_rexmit(sctp);
3082 ret:
3083 	trysend += sctp_set_frwnd(sctp, ntohl(sc->ssc_a_rwnd));
3084 
3085 	/*
3086 	 * If receive window is closed while there is unsent data,
3087 	 * set a timer for doing zero window probes.
3088 	 */
3089 	if (sctp->sctp_frwnd == 0 && sctp->sctp_unacked == 0 &&
3090 	    sctp->sctp_unsent != 0) {
3091 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
3092 		    sctp->sctp_current->sf_rto);
3093 	}
3094 
3095 	/*
3096 	 * Set cwnd for all destinations.
3097 	 * Congestion window gets increased only when cumulative
3098 	 * TSN moves forward, we're not in fast recovery, and
3099 	 * cwnd has been fully utilized (almost fully, need to allow
3100 	 * some leeway due to non-MSS sized messages).
3101 	 */
3102 	if (sctp->sctp_current->sf_acked == acked) {
3103 		/*
3104 		 * Fast-path, only data sent to sctp_current got acked.
3105 		 */
3106 		fp = sctp->sctp_current;
3107 		if (cumack_forward && !fast_recovery &&
3108 		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3109 			if (fp->sf_cwnd < fp->sf_ssthresh) {
3110 				/*
3111 				 * Slow start
3112 				 */
3113 				if (fp->sf_acked > fp->sf_pmss) {
3114 					fp->sf_cwnd += fp->sf_pmss;
3115 				} else {
3116 					fp->sf_cwnd += fp->sf_acked;
3117 				}
3118 				fp->sf_cwnd = MIN(fp->sf_cwnd,
3119 				    sctp->sctp_cwnd_max);
3120 			} else {
3121 				/*
3122 				 * Congestion avoidance
3123 				 */
3124 				fp->sf_pba += fp->sf_acked;
3125 				if (fp->sf_pba >= fp->sf_cwnd) {
3126 					fp->sf_pba -= fp->sf_cwnd;
3127 					fp->sf_cwnd += fp->sf_pmss;
3128 					fp->sf_cwnd = MIN(fp->sf_cwnd,
3129 					    sctp->sctp_cwnd_max);
3130 				}
3131 			}
3132 		}
3133 		/*
3134 		 * Limit the burst of transmitted data segments.
3135 		 */
3136 		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3137 		    fp->sf_cwnd) {
3138 			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3139 			    fp->sf_pmss;
3140 		}
3141 		fp->sf_acked = 0;
3142 		goto check_ss_rxmit;
3143 	}
3144 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
3145 		if (cumack_forward && fp->sf_acked && !fast_recovery &&
3146 		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3147 			if (fp->sf_cwnd < fp->sf_ssthresh) {
3148 				if (fp->sf_acked > fp->sf_pmss) {
3149 					fp->sf_cwnd += fp->sf_pmss;
3150 				} else {
3151 					fp->sf_cwnd += fp->sf_acked;
3152 				}
3153 				fp->sf_cwnd = MIN(fp->sf_cwnd,
3154 				    sctp->sctp_cwnd_max);
3155 			} else {
3156 				fp->sf_pba += fp->sf_acked;
3157 				if (fp->sf_pba >= fp->sf_cwnd) {
3158 					fp->sf_pba -= fp->sf_cwnd;
3159 					fp->sf_cwnd += fp->sf_pmss;
3160 					fp->sf_cwnd = MIN(fp->sf_cwnd,
3161 					    sctp->sctp_cwnd_max);
3162 				}
3163 			}
3164 		}
3165 		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3166 		    fp->sf_cwnd) {
3167 			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3168 			    fp->sf_pmss;
3169 		}
3170 		fp->sf_acked = 0;
3171 	}
3172 	fp = sctp->sctp_current;
3173 check_ss_rxmit:
3174 	/*
3175 	 * If this is a SACK following a timeout, check if there are
3176 	 * still unacked chunks (sent before the timeout) that we can
3177 	 * send.
3178 	 */
3179 	if (sctp->sctp_rexmitting) {
3180 		if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_rxt_maxtsn)) {
3181 			/*
3182 			 * As we are in retransmission phase, we may get a
3183 			 * SACK which indicates some new chunks are received
3184 			 * but cum_tsn does not advance.  During this
3185 			 * phase, the other side advances cum_tsn only because
3186 			 * it receives our retransmitted chunks.  Only
3187 			 * this signals that some chunks are still
3188 			 * missing.
3189 			 */
3190 			if (cumack_forward) {
3191 				fp->sf_rxt_unacked -= acked;
3192 				sctp_ss_rexmit(sctp);
3193 			}
3194 		} else {
3195 			sctp->sctp_rexmitting = B_FALSE;
3196 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
3197 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
3198 			fp->sf_rxt_unacked = 0;
3199 		}
3200 	}
3201 	return (trysend);
3202 }
3203 
3204 /*
3205  * Returns 0 if the caller should stop processing any more chunks,
3206  * 1 if the caller should skip this chunk and continue processing.
3207  */
3208 static int
3209 sctp_strange_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp)
3210 {
3211 	size_t len;
3212 
3213 	BUMP_LOCAL(sctp->sctp_ibchunks);
3214 	/* check top two bits for action required */
3215 	if (ch->sch_id & 0x40) {	/* also matches 0xc0 */
3216 		len = ntohs(ch->sch_len);
3217 		sctp_add_err(sctp, SCTP_ERR_UNREC_CHUNK, ch, len, fp);
3218 
3219 		if ((ch->sch_id & 0xc0) == 0xc0) {
3220 			/* skip and continue */
3221 			return (1);
3222 		} else {
3223 			/* stop processing */
3224 			return (0);
3225 		}
3226 	}
3227 	if (ch->sch_id & 0x80) {
3228 		/* skip and continue, no error */
3229 		return (1);
3230 	}
3231 	/* top two bits are clear; stop processing and no error */
3232 	return (0);
3233 }
3234 
3235 /*
3236  * Basic sanity checks on all input chunks and parameters: they must
3237  * be of legitimate size for their purported type, and must follow
3238  * ordering conventions as defined in rfc2960.
3239  *
3240  * Returns 1 if the chunk and all encloded params are legitimate,
3241  * 0 otherwise.
3242  */
3243 /*ARGSUSED*/
3244 static int
3245 sctp_check_input(sctp_t *sctp, sctp_chunk_hdr_t *ch, ssize_t len, int first)
3246 {
3247 	sctp_parm_hdr_t	*ph;
3248 	void		*p = NULL;
3249 	ssize_t		clen;
3250 	uint16_t	ch_len;
3251 
3252 	ch_len = ntohs(ch->sch_len);
3253 	if (ch_len > len) {
3254 		return (0);
3255 	}
3256 
3257 	switch (ch->sch_id) {
3258 	case CHUNK_DATA:
3259 		if (ch_len < sizeof (sctp_data_hdr_t)) {
3260 			return (0);
3261 		}
3262 		return (1);
3263 	case CHUNK_INIT:
3264 	case CHUNK_INIT_ACK:
3265 		{
3266 			ssize_t	remlen = len;
3267 
3268 			/*
3269 			 * INIT and INIT-ACK chunks must not be bundled with
3270 			 * any other.
3271 			 */
3272 			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3273 			    (ch_len < (sizeof (*ch) +
3274 			    sizeof (sctp_init_chunk_t)))) {
3275 				return (0);
3276 			}
3277 			/* may have params that need checking */
3278 			p = (char *)(ch + 1) + sizeof (sctp_init_chunk_t);
3279 			clen = ch_len - (sizeof (*ch) +
3280 			    sizeof (sctp_init_chunk_t));
3281 		}
3282 		break;
3283 	case CHUNK_SACK:
3284 		if (ch_len < (sizeof (*ch) + sizeof (sctp_sack_chunk_t))) {
3285 			return (0);
3286 		}
3287 		/* dup and gap reports checked by got_sack() */
3288 		return (1);
3289 	case CHUNK_SHUTDOWN:
3290 		if (ch_len < (sizeof (*ch) + sizeof (uint32_t))) {
3291 			return (0);
3292 		}
3293 		return (1);
3294 	case CHUNK_ABORT:
3295 	case CHUNK_ERROR:
3296 		if (ch_len < sizeof (*ch)) {
3297 			return (0);
3298 		}
3299 		/* may have params that need checking */
3300 		p = ch + 1;
3301 		clen = ch_len - sizeof (*ch);
3302 		break;
3303 	case CHUNK_ECNE:
3304 	case CHUNK_CWR:
3305 	case CHUNK_HEARTBEAT:
3306 	case CHUNK_HEARTBEAT_ACK:
3307 	/* Full ASCONF chunk and parameter checks are in asconf.c */
3308 	case CHUNK_ASCONF:
3309 	case CHUNK_ASCONF_ACK:
3310 		if (ch_len < sizeof (*ch)) {
3311 			return (0);
3312 		}
3313 		/* heartbeat data checked by process_heartbeat() */
3314 		return (1);
3315 	case CHUNK_SHUTDOWN_COMPLETE:
3316 		{
3317 			ssize_t remlen = len;
3318 
3319 			/*
3320 			 * SHUTDOWN-COMPLETE chunk must not be bundled with any
3321 			 * other
3322 			 */
3323 			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3324 			    ch_len < sizeof (*ch)) {
3325 				return (0);
3326 			}
3327 		}
3328 		return (1);
3329 	case CHUNK_COOKIE:
3330 	case CHUNK_COOKIE_ACK:
3331 	case CHUNK_SHUTDOWN_ACK:
3332 		if (ch_len < sizeof (*ch) || !first) {
3333 			return (0);
3334 		}
3335 		return (1);
3336 	case CHUNK_FORWARD_TSN:
3337 		if (ch_len < (sizeof (*ch) + sizeof (uint32_t)))
3338 			return (0);
3339 		return (1);
3340 	default:
3341 		return (1);	/* handled by strange_chunk() */
3342 	}
3343 
3344 	/* check and byteorder parameters */
3345 	if (clen <= 0) {
3346 		return (1);
3347 	}
3348 	ASSERT(p != NULL);
3349 
3350 	ph = p;
3351 	while (ph != NULL && clen > 0) {
3352 		ch_len = ntohs(ph->sph_len);
3353 		if (ch_len > len || ch_len < sizeof (*ph)) {
3354 			return (0);
3355 		}
3356 		ph = sctp_next_parm(ph, &clen);
3357 	}
3358 
3359 	/* All OK */
3360 	return (1);
3361 }
3362 
3363 static mblk_t *
3364 sctp_check_in_policy(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3365 {
3366 	boolean_t policy_present;
3367 	ipha_t *ipha;
3368 	ip6_t *ip6h;
3369 	netstack_t	*ns = ipst->ips_netstack;
3370 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3371 
3372 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
3373 		policy_present = ipss->ipsec_inbound_v4_policy_present;
3374 		ipha = (ipha_t *)mp->b_rptr;
3375 		ip6h = NULL;
3376 	} else {
3377 		policy_present = ipss->ipsec_inbound_v6_policy_present;
3378 		ipha = NULL;
3379 		ip6h = (ip6_t *)mp->b_rptr;
3380 	}
3381 
3382 	if (policy_present) {
3383 		/*
3384 		 * The conn_t parameter is NULL because we already know
3385 		 * nobody's home.
3386 		 */
3387 		mp = ipsec_check_global_policy(mp, (conn_t *)NULL,
3388 		    ipha, ip6h, ira, ns);
3389 		if (mp == NULL)
3390 			return (NULL);
3391 	}
3392 	return (mp);
3393 }
3394 
3395 /* Handle out-of-the-blue packets */
3396 void
3397 sctp_ootb_input(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3398 {
3399 	sctp_t			*sctp;
3400 	sctp_chunk_hdr_t	*ch;
3401 	sctp_hdr_t		*sctph;
3402 	in6_addr_t		src, dst;
3403 	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3404 	ssize_t			mlen;
3405 	sctp_stack_t		*sctps;
3406 	boolean_t		secure;
3407 	zoneid_t		zoneid = ira->ira_zoneid;
3408 	uchar_t			*rptr;
3409 
3410 	ASSERT(ira->ira_ill == NULL);
3411 
3412 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
3413 
3414 	sctps = ipst->ips_netstack->netstack_sctp;
3415 
3416 	SCTPS_BUMP_MIB(sctps, sctpOutOfBlue);
3417 	SCTPS_BUMP_MIB(sctps, sctpInSCTPPkts);
3418 
3419 	if (mp->b_cont != NULL) {
3420 		/*
3421 		 * All subsequent code is vastly simplified if it can
3422 		 * assume a single contiguous chunk of data.
3423 		 */
3424 		if (pullupmsg(mp, -1) == 0) {
3425 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3426 			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3427 			freemsg(mp);
3428 			return;
3429 		}
3430 	}
3431 
3432 	rptr = mp->b_rptr;
3433 	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3434 	if (ira->ira_flags & IRAF_IS_IPV4) {
3435 		ipha_t *ipha;
3436 
3437 		ipha = (ipha_t *)rptr;
3438 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3439 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3440 	} else {
3441 		ip6_t *ip6h;
3442 
3443 		ip6h = (ip6_t *)rptr;
3444 		src = ip6h->ip6_src;
3445 		dst = ip6h->ip6_dst;
3446 	}
3447 
3448 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3449 	if ((ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
3450 		dprint(3, ("sctp_ootb_input: invalid packet\n"));
3451 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3452 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3453 		freemsg(mp);
3454 		return;
3455 	}
3456 
3457 	switch (ch->sch_id) {
3458 	case CHUNK_INIT:
3459 		/* no listener; send abort  */
3460 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3461 			return;
3462 		sctp_ootb_send_abort(sctp_init2vtag(ch), 0,
3463 		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3464 		break;
3465 	case CHUNK_INIT_ACK:
3466 		/* check for changed src addr */
3467 		sctp = sctp_addrlist2sctp(mp, sctph, ch, zoneid, sctps);
3468 		if (sctp != NULL) {
3469 			/* success; proceed to normal path */
3470 			mutex_enter(&sctp->sctp_lock);
3471 			if (sctp->sctp_running) {
3472 				sctp_add_recvq(sctp, mp, B_FALSE, ira);
3473 				mutex_exit(&sctp->sctp_lock);
3474 			} else {
3475 				/*
3476 				 * If the source address is changed, we
3477 				 * don't need to worry too much about
3478 				 * out of order processing.  So we don't
3479 				 * check if the recvq is empty or not here.
3480 				 */
3481 				sctp->sctp_running = B_TRUE;
3482 				mutex_exit(&sctp->sctp_lock);
3483 				sctp_input_data(sctp, mp, ira);
3484 				WAKE_SCTP(sctp);
3485 			}
3486 			SCTP_REFRELE(sctp);
3487 			return;
3488 		}
3489 		/* else bogus init ack; drop it */
3490 		break;
3491 	case CHUNK_SHUTDOWN_ACK:
3492 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3493 			return;
3494 		sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3495 		return;
3496 	case CHUNK_ERROR:
3497 	case CHUNK_ABORT:
3498 	case CHUNK_COOKIE_ACK:
3499 	case CHUNK_SHUTDOWN_COMPLETE:
3500 		break;
3501 	default:
3502 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3503 			return;
3504 		sctp_ootb_send_abort(sctph->sh_verf, 0,
3505 		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3506 		break;
3507 	}
3508 	freemsg(mp);
3509 }
3510 
3511 /*
3512  * Handle sctp packets.
3513  * Note that we rele the sctp_t (the caller got a reference on it).
3514  */
3515 void
3516 sctp_input(conn_t *connp, ipha_t *ipha, ip6_t *ip6h, mblk_t *mp,
3517     ip_recv_attr_t *ira)
3518 {
3519 	sctp_t		*sctp = CONN2SCTP(connp);
3520 	boolean_t	secure;
3521 	ill_t		*ill = ira->ira_ill;
3522 	ip_stack_t	*ipst = ill->ill_ipst;
3523 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3524 	iaflags_t	iraflags = ira->ira_flags;
3525 	ill_t		*rill = ira->ira_rill;
3526 
3527 	secure = iraflags & IRAF_IPSEC_SECURE;
3528 
3529 	if (connp->conn_min_ttl != 0 && connp->conn_min_ttl > ira->ira_ttl) {
3530 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3531 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
3532 		SCTP_REFRELE(sctp);
3533 		freemsg(mp);
3534 		return;
3535 	}
3536 
3537 	/*
3538 	 * We check some fields in conn_t without holding a lock.
3539 	 * This should be fine.
3540 	 */
3541 	if (((iraflags & IRAF_IS_IPV4) ?
3542 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
3543 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
3544 	    secure) {
3545 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
3546 		    ip6h, ira);
3547 		if (mp == NULL) {
3548 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3549 			/* Note that mp is NULL */
3550 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3551 			SCTP_REFRELE(sctp);
3552 			return;
3553 		}
3554 	}
3555 
3556 	ira->ira_ill = ira->ira_rill = NULL;
3557 
3558 	mutex_enter(&sctp->sctp_lock);
3559 	if (sctp->sctp_running) {
3560 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
3561 		mutex_exit(&sctp->sctp_lock);
3562 		goto done;
3563 	} else {
3564 		sctp->sctp_running = B_TRUE;
3565 		mutex_exit(&sctp->sctp_lock);
3566 
3567 		mutex_enter(&sctp->sctp_recvq_lock);
3568 		if (sctp->sctp_recvq != NULL) {
3569 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
3570 			mutex_exit(&sctp->sctp_recvq_lock);
3571 			WAKE_SCTP(sctp);
3572 			goto done;
3573 		}
3574 	}
3575 	mutex_exit(&sctp->sctp_recvq_lock);
3576 	if (ira->ira_flags & IRAF_ICMP_ERROR)
3577 		sctp_icmp_error(sctp, mp);
3578 	else
3579 		sctp_input_data(sctp, mp, ira);
3580 	WAKE_SCTP(sctp);
3581 
3582 done:
3583 	SCTP_REFRELE(sctp);
3584 	ira->ira_ill = ill;
3585 	ira->ira_rill = rill;
3586 }
3587 
3588 static void
3589 sctp_process_abort(sctp_t *sctp, sctp_chunk_hdr_t *ch, int err)
3590 {
3591 	sctp_stack_t	*sctps = sctp->sctp_sctps;
3592 
3593 	SCTPS_BUMP_MIB(sctps, sctpAborted);
3594 	BUMP_LOCAL(sctp->sctp_ibchunks);
3595 
3596 	/*
3597 	 * SCTP_COMM_LOST is only sent up if the association is
3598 	 * established (sctp_state >= SCTPS_ESTABLISHED).
3599 	 */
3600 	if (sctp->sctp_state >= SCTPS_ESTABLISHED) {
3601 		sctp_assoc_event(sctp, SCTP_COMM_LOST,
3602 		    ntohs(((sctp_parm_hdr_t *)(ch + 1))->sph_type), ch);
3603 	}
3604 
3605 	sctp_clean_death(sctp, err);
3606 }
3607 
3608 void
3609 sctp_input_data(sctp_t *sctp, mblk_t *mp, ip_recv_attr_t *ira)
3610 {
3611 	sctp_chunk_hdr_t	*ch;
3612 	ssize_t			mlen;
3613 	int			gotdata;
3614 	int			trysend;
3615 	sctp_faddr_t		*fp;
3616 	sctp_init_chunk_t	*iack;
3617 	uint32_t		tsn;
3618 	sctp_data_hdr_t		*sdc;
3619 	ip_pkt_t		ipp;
3620 	in6_addr_t		src;
3621 	in6_addr_t		dst;
3622 	uint_t			ifindex;
3623 	sctp_hdr_t		*sctph;
3624 	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3625 	mblk_t			*dups = NULL;
3626 	int			recv_adaptation;
3627 	boolean_t		wake_eager = B_FALSE;
3628 	in6_addr_t		peer_src;
3629 	int64_t			now;
3630 	sctp_stack_t		*sctps = sctp->sctp_sctps;
3631 	ip_stack_t		*ipst = sctps->sctps_netstack->netstack_ip;
3632 	boolean_t		hb_already = B_FALSE;
3633 	cred_t			*cr;
3634 	pid_t			cpid;
3635 	uchar_t			*rptr;
3636 	conn_t			*connp = sctp->sctp_connp;
3637 	boolean_t		shutdown_ack_needed = B_FALSE;
3638 
3639 	ASSERT(DB_TYPE(mp) == M_DATA);
3640 	ASSERT(ira->ira_ill == NULL);
3641 
3642 	if (mp->b_cont != NULL) {
3643 		/*
3644 		 * All subsequent code is vastly simplified if it can
3645 		 * assume a single contiguous chunk of data.
3646 		 */
3647 		if (pullupmsg(mp, -1) == 0) {
3648 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3649 			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3650 			freemsg(mp);
3651 			return;
3652 		}
3653 	}
3654 
3655 	BUMP_LOCAL(sctp->sctp_ipkts);
3656 	ifindex = ira->ira_ruifindex;
3657 
3658 	rptr = mp->b_rptr;
3659 
3660 	ipp.ipp_fields = 0;
3661 	if (connp->conn_recv_ancillary.crb_all != 0) {
3662 		/*
3663 		 * Record packet information in the ip_pkt_t
3664 		 */
3665 		if (ira->ira_flags & IRAF_IS_IPV4) {
3666 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipp,
3667 			    B_FALSE);
3668 		} else {
3669 			uint8_t nexthdrp;
3670 
3671 			/*
3672 			 * IPv6 packets can only be received by applications
3673 			 * that are prepared to receive IPv6 addresses.
3674 			 * The IP fanout must ensure this.
3675 			 */
3676 			ASSERT(connp->conn_family == AF_INET6);
3677 
3678 			(void) ip_find_hdr_v6(mp, (ip6_t *)rptr, B_TRUE, &ipp,
3679 			    &nexthdrp);
3680 			ASSERT(nexthdrp == IPPROTO_SCTP);
3681 
3682 			/* Could have caused a pullup? */
3683 			rptr = mp->b_rptr;
3684 		}
3685 	}
3686 
3687 	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3688 
3689 	if (ira->ira_flags & IRAF_IS_IPV4) {
3690 		ipha_t *ipha;
3691 
3692 		ipha = (ipha_t *)rptr;
3693 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3694 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3695 	} else {
3696 		ip6_t *ip6h;
3697 
3698 		ip6h = (ip6_t *)rptr;
3699 		src = ip6h->ip6_src;
3700 		dst = ip6h->ip6_dst;
3701 	}
3702 
3703 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3704 	ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen);
3705 	if (ch == NULL) {
3706 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3707 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3708 		freemsg(mp);
3709 		return;
3710 	}
3711 
3712 	if (!sctp_check_input(sctp, ch, mlen, 1)) {
3713 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3714 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3715 		goto done;
3716 	}
3717 	/*
3718 	 * Check verfication tag (special handling for INIT,
3719 	 * COOKIE, SHUTDOWN_COMPLETE and SHUTDOWN_ACK chunks).
3720 	 * ABORTs are handled in the chunk processing loop, since
3721 	 * may not appear first. All other checked chunks must
3722 	 * appear first, or will have been dropped by check_input().
3723 	 */
3724 	switch (ch->sch_id) {
3725 	case CHUNK_INIT:
3726 		if (sctph->sh_verf != 0) {
3727 			/* drop it */
3728 			goto done;
3729 		}
3730 		break;
3731 	case CHUNK_SHUTDOWN_COMPLETE:
3732 		if (sctph->sh_verf == sctp->sctp_lvtag)
3733 			break;
3734 		if (sctph->sh_verf == sctp->sctp_fvtag &&
3735 		    SCTP_GET_TBIT(ch)) {
3736 			break;
3737 		}
3738 		/* else drop it */
3739 		goto done;
3740 	case CHUNK_ABORT:
3741 	case CHUNK_COOKIE:
3742 		/* handled below */
3743 		break;
3744 	case CHUNK_SHUTDOWN_ACK:
3745 		if (sctp->sctp_state > SCTPS_BOUND &&
3746 		    sctp->sctp_state < SCTPS_ESTABLISHED) {
3747 			/* treat as OOTB */
3748 			sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3749 			return;
3750 		}
3751 		/* else fallthru */
3752 	default:
3753 		/*
3754 		 * All other packets must have a valid
3755 		 * verification tag, however if this is a
3756 		 * listener, we use a refined version of
3757 		 * out-of-the-blue logic.
3758 		 */
3759 		if (sctph->sh_verf != sctp->sctp_lvtag &&
3760 		    sctp->sctp_state != SCTPS_LISTEN) {
3761 			/* drop it */
3762 			goto done;
3763 		}
3764 		break;
3765 	}
3766 
3767 	/* Have a valid sctp for this packet */
3768 	fp = sctp_lookup_faddr(sctp, &src);
3769 	dprint(2, ("sctp_dispatch_rput: mp=%p fp=%p sctp=%p\n", (void *)mp,
3770 	    (void *)fp, (void *)sctp));
3771 
3772 	gotdata = 0;
3773 	trysend = 0;
3774 
3775 	now = LBOLT_FASTPATH64;
3776 	/* Process the chunks */
3777 	do {
3778 		dprint(3, ("sctp_dispatch_rput: state=%d, chunk id=%d\n",
3779 		    sctp->sctp_state, (int)(ch->sch_id)));
3780 
3781 		if (ch->sch_id == CHUNK_ABORT) {
3782 			if (sctph->sh_verf != sctp->sctp_lvtag &&
3783 			    sctph->sh_verf != sctp->sctp_fvtag) {
3784 				/* drop it */
3785 				goto done;
3786 			}
3787 		}
3788 
3789 		switch (sctp->sctp_state) {
3790 
3791 		case SCTPS_ESTABLISHED:
3792 		case SCTPS_SHUTDOWN_PENDING:
3793 		case SCTPS_SHUTDOWN_SENT:
3794 			switch (ch->sch_id) {
3795 			case CHUNK_DATA:
3796 				/* 0-length data chunks are not allowed */
3797 				if (ntohs(ch->sch_len) == sizeof (*sdc)) {
3798 					sdc = (sctp_data_hdr_t *)ch;
3799 					tsn = sdc->sdh_tsn;
3800 					sctp_send_abort(sctp, sctp->sctp_fvtag,
3801 					    SCTP_ERR_NO_USR_DATA, (char *)&tsn,
3802 					    sizeof (tsn), mp, 0, B_FALSE, ira);
3803 					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3804 					    0, NULL);
3805 					sctp_clean_death(sctp, ECONNABORTED);
3806 					goto done;
3807 				}
3808 
3809 				ASSERT(fp != NULL);
3810 				sctp->sctp_lastdata = fp;
3811 				sctp_data_chunk(sctp, ch, mp, &dups, fp,
3812 				    &ipp, ira);
3813 				gotdata = 1;
3814 				/* Restart shutdown timer if shutting down */
3815 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3816 					/*
3817 					 * If we have exceeded our max
3818 					 * wait bound for waiting for a
3819 					 * shutdown ack from the peer,
3820 					 * abort the association.
3821 					 */
3822 					if (sctps->sctps_shutack_wait_bound !=
3823 					    0 &&
3824 					    TICK_TO_MSEC(now -
3825 					    sctp->sctp_out_time) >
3826 					    sctps->sctps_shutack_wait_bound) {
3827 						sctp_send_abort(sctp,
3828 						    sctp->sctp_fvtag, 0, NULL,
3829 						    0, mp, 0, B_FALSE, ira);
3830 						sctp_assoc_event(sctp,
3831 						    SCTP_COMM_LOST, 0, NULL);
3832 						sctp_clean_death(sctp,
3833 						    ECONNABORTED);
3834 						goto done;
3835 					}
3836 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
3837 					    fp->sf_rto);
3838 				}
3839 				break;
3840 			case CHUNK_SACK:
3841 				ASSERT(fp != NULL);
3842 				/*
3843 				 * Peer is real and alive if it can ack our
3844 				 * data.
3845 				 */
3846 				sctp_faddr_alive(sctp, fp);
3847 				trysend = sctp_got_sack(sctp, ch);
3848 				if (trysend < 0) {
3849 					sctp_send_abort(sctp, sctph->sh_verf,
3850 					    0, NULL, 0, mp, 0, B_FALSE, ira);
3851 					sctp_assoc_event(sctp,
3852 					    SCTP_COMM_LOST, 0, NULL);
3853 					sctp_clean_death(sctp,
3854 					    ECONNABORTED);
3855 					goto done;
3856 				}
3857 				break;
3858 			case CHUNK_HEARTBEAT:
3859 				if (!hb_already) {
3860 					/*
3861 					 * In any one packet, there should
3862 					 * only be one heartbeat chunk.  So
3863 					 * we should not process more than
3864 					 * once.
3865 					 */
3866 					sctp_return_heartbeat(sctp, ch, mp);
3867 					hb_already = B_TRUE;
3868 				}
3869 				break;
3870 			case CHUNK_HEARTBEAT_ACK:
3871 				sctp_process_heartbeat(sctp, ch);
3872 				break;
3873 			case CHUNK_SHUTDOWN:
3874 				sctp_shutdown_event(sctp);
3875 				trysend = sctp_shutdown_received(sctp, ch,
3876 				    B_FALSE, B_FALSE, fp);
3877 				BUMP_LOCAL(sctp->sctp_ibchunks);
3878 				break;
3879 			case CHUNK_SHUTDOWN_ACK:
3880 				BUMP_LOCAL(sctp->sctp_ibchunks);
3881 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3882 					sctp_shutdown_complete(sctp);
3883 					SCTPS_BUMP_MIB(sctps, sctpShutdowns);
3884 					sctp_assoc_event(sctp,
3885 					    SCTP_SHUTDOWN_COMP, 0, NULL);
3886 					sctp_clean_death(sctp, 0);
3887 					goto done;
3888 				}
3889 				break;
3890 			case CHUNK_ABORT: {
3891 				sctp_saddr_ipif_t *sp;
3892 
3893 				/* Ignore if delete pending */
3894 				sp = sctp_saddr_lookup(sctp, &dst, 0);
3895 				ASSERT(sp != NULL);
3896 				if (sp->saddr_ipif_delete_pending) {
3897 					BUMP_LOCAL(sctp->sctp_ibchunks);
3898 					break;
3899 				}
3900 
3901 				sctp_process_abort(sctp, ch, ECONNRESET);
3902 				goto done;
3903 			}
3904 			case CHUNK_INIT:
3905 				sctp_send_initack(sctp, sctph, ch, mp, ira);
3906 				break;
3907 			case CHUNK_COOKIE:
3908 				if (sctp_process_cookie(sctp, ch, mp, &iack,
3909 				    sctph, &recv_adaptation, NULL, ira) != -1) {
3910 					sctp_send_cookie_ack(sctp);
3911 					sctp_assoc_event(sctp, SCTP_RESTART,
3912 					    0, NULL);
3913 					if (recv_adaptation) {
3914 						sctp->sctp_recv_adaptation = 1;
3915 						sctp_adaptation_event(sctp);
3916 					}
3917 				} else {
3918 					SCTPS_BUMP_MIB(sctps,
3919 					    sctpInInvalidCookie);
3920 				}
3921 				break;
3922 			case CHUNK_ERROR: {
3923 				int error;
3924 
3925 				BUMP_LOCAL(sctp->sctp_ibchunks);
3926 				error = sctp_handle_error(sctp, sctph, ch, mp,
3927 				    ira);
3928 				if (error != 0) {
3929 					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3930 					    0, NULL);
3931 					sctp_clean_death(sctp, error);
3932 					goto done;
3933 				}
3934 				break;
3935 			}
3936 			case CHUNK_ASCONF:
3937 				ASSERT(fp != NULL);
3938 				sctp_input_asconf(sctp, ch, fp);
3939 				BUMP_LOCAL(sctp->sctp_ibchunks);
3940 				break;
3941 			case CHUNK_ASCONF_ACK:
3942 				ASSERT(fp != NULL);
3943 				sctp_faddr_alive(sctp, fp);
3944 				sctp_input_asconf_ack(sctp, ch, fp);
3945 				BUMP_LOCAL(sctp->sctp_ibchunks);
3946 				break;
3947 			case CHUNK_FORWARD_TSN:
3948 				ASSERT(fp != NULL);
3949 				sctp->sctp_lastdata = fp;
3950 				sctp_process_forward_tsn(sctp, ch, fp,
3951 				    &ipp, ira);
3952 				gotdata = 1;
3953 				BUMP_LOCAL(sctp->sctp_ibchunks);
3954 				break;
3955 			default:
3956 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
3957 					goto nomorechunks;
3958 				} /* else skip and continue processing */
3959 				break;
3960 			}
3961 			break;
3962 
3963 		case SCTPS_LISTEN:
3964 			switch (ch->sch_id) {
3965 			case CHUNK_INIT:
3966 				sctp_send_initack(sctp, sctph, ch, mp, ira);
3967 				break;
3968 			case CHUNK_COOKIE: {
3969 				sctp_t *eager;
3970 
3971 				if (sctp_process_cookie(sctp, ch, mp, &iack,
3972 				    sctph, &recv_adaptation, &peer_src,
3973 				    ira) == -1) {
3974 					SCTPS_BUMP_MIB(sctps,
3975 					    sctpInInvalidCookie);
3976 					goto done;
3977 				}
3978 
3979 				/*
3980 				 * The cookie is good; ensure that
3981 				 * the peer used the verification
3982 				 * tag from the init ack in the header.
3983 				 */
3984 				if (iack->sic_inittag != sctph->sh_verf)
3985 					goto done;
3986 
3987 				eager = sctp_conn_request(sctp, mp, ifindex,
3988 				    ip_hdr_len, iack, ira);
3989 				if (eager == NULL) {
3990 					sctp_send_abort(sctp, sctph->sh_verf,
3991 					    SCTP_ERR_NO_RESOURCES, NULL, 0, mp,
3992 					    0, B_FALSE, ira);
3993 					goto done;
3994 				}
3995 
3996 				/*
3997 				 * If there were extra chunks
3998 				 * bundled with the cookie,
3999 				 * they must be processed
4000 				 * on the eager's queue. We
4001 				 * accomplish this by refeeding
4002 				 * the whole packet into the
4003 				 * state machine on the right
4004 				 * q. The packet (mp) gets
4005 				 * there via the eager's
4006 				 * cookie_mp field (overloaded
4007 				 * with the active open role).
4008 				 * This is picked up when
4009 				 * processing the null bind
4010 				 * request put on the eager's
4011 				 * q by sctp_accept(). We must
4012 				 * first revert the cookie
4013 				 * chunk's length field to network
4014 				 * byteorder so it can be
4015 				 * properly reprocessed on the
4016 				 * eager's queue.
4017 				 */
4018 				SCTPS_BUMP_MIB(sctps, sctpPassiveEstab);
4019 				if (mlen > ntohs(ch->sch_len)) {
4020 					eager->sctp_cookie_mp = dupb(mp);
4021 					/*
4022 					 * If no mem, just let
4023 					 * the peer retransmit.
4024 					 */
4025 				}
4026 				sctp_assoc_event(eager, SCTP_COMM_UP, 0, NULL);
4027 				if (recv_adaptation) {
4028 					eager->sctp_recv_adaptation = 1;
4029 					eager->sctp_rx_adaptation_code =
4030 					    sctp->sctp_rx_adaptation_code;
4031 					sctp_adaptation_event(eager);
4032 				}
4033 
4034 				eager->sctp_active = now;
4035 				sctp_send_cookie_ack(eager);
4036 
4037 				wake_eager = B_TRUE;
4038 
4039 				/*
4040 				 * Process rest of the chunks with eager.
4041 				 */
4042 				sctp = eager;
4043 				fp = sctp_lookup_faddr(sctp, &peer_src);
4044 				/*
4045 				 * Confirm peer's original source.  fp can
4046 				 * only be NULL if peer does not use the
4047 				 * original source as one of its addresses...
4048 				 */
4049 				if (fp == NULL)
4050 					fp = sctp_lookup_faddr(sctp, &src);
4051 				else
4052 					sctp_faddr_alive(sctp, fp);
4053 
4054 				/*
4055 				 * Validate the peer addresses.  It also starts
4056 				 * the heartbeat timer.
4057 				 */
4058 				sctp_validate_peer(sctp);
4059 				break;
4060 			}
4061 			/* Anything else is considered out-of-the-blue */
4062 			case CHUNK_ERROR:
4063 			case CHUNK_ABORT:
4064 			case CHUNK_COOKIE_ACK:
4065 			case CHUNK_SHUTDOWN_COMPLETE:
4066 				BUMP_LOCAL(sctp->sctp_ibchunks);
4067 				goto done;
4068 			default:
4069 				BUMP_LOCAL(sctp->sctp_ibchunks);
4070 				sctp_send_abort(sctp, sctph->sh_verf, 0, NULL,
4071 				    0, mp, 0, B_TRUE, ira);
4072 				goto done;
4073 			}
4074 			break;
4075 
4076 		case SCTPS_COOKIE_WAIT:
4077 			switch (ch->sch_id) {
4078 			case CHUNK_INIT_ACK:
4079 				sctp_stop_faddr_timers(sctp);
4080 				sctp_faddr_alive(sctp, sctp->sctp_current);
4081 				sctp_send_cookie_echo(sctp, ch, mp, ira);
4082 				BUMP_LOCAL(sctp->sctp_ibchunks);
4083 				break;
4084 			case CHUNK_ABORT:
4085 				sctp_process_abort(sctp, ch, ECONNREFUSED);
4086 				goto done;
4087 			case CHUNK_INIT:
4088 				sctp_send_initack(sctp, sctph, ch, mp, ira);
4089 				break;
4090 			case CHUNK_COOKIE:
4091 				cr = ira->ira_cred;
4092 				cpid = ira->ira_cpid;
4093 
4094 				if (sctp_process_cookie(sctp, ch, mp, &iack,
4095 				    sctph, &recv_adaptation, NULL, ira) == -1) {
4096 					SCTPS_BUMP_MIB(sctps,
4097 					    sctpInInvalidCookie);
4098 					break;
4099 				}
4100 				sctp_send_cookie_ack(sctp);
4101 				sctp_stop_faddr_timers(sctp);
4102 				if (!SCTP_IS_DETACHED(sctp)) {
4103 					sctp->sctp_ulp_connected(
4104 					    sctp->sctp_ulpd, 0, cr, cpid);
4105 					sctp_set_ulp_prop(sctp);
4106 
4107 				}
4108 				SCTP_ASSOC_EST(sctps, sctp);
4109 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4110 				if (sctp->sctp_cookie_mp) {
4111 					freemsg(sctp->sctp_cookie_mp);
4112 					sctp->sctp_cookie_mp = NULL;
4113 				}
4114 
4115 				/* Validate the peer addresses. */
4116 				sctp->sctp_active = now;
4117 				sctp_validate_peer(sctp);
4118 
4119 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4120 				if (recv_adaptation) {
4121 					sctp->sctp_recv_adaptation = 1;
4122 					sctp_adaptation_event(sctp);
4123 				}
4124 				/* Try sending queued data, or ASCONFs */
4125 				trysend = 1;
4126 				break;
4127 			default:
4128 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4129 					goto nomorechunks;
4130 				} /* else skip and continue processing */
4131 				break;
4132 			}
4133 			break;
4134 
4135 		case SCTPS_COOKIE_ECHOED:
4136 			switch (ch->sch_id) {
4137 			case CHUNK_COOKIE_ACK:
4138 				cr = ira->ira_cred;
4139 				cpid = ira->ira_cpid;
4140 
4141 				if (!SCTP_IS_DETACHED(sctp)) {
4142 					sctp->sctp_ulp_connected(
4143 					    sctp->sctp_ulpd, 0, cr, cpid);
4144 					sctp_set_ulp_prop(sctp);
4145 				}
4146 				if (sctp->sctp_unacked == 0)
4147 					sctp_stop_faddr_timers(sctp);
4148 				SCTP_ASSOC_EST(sctps, sctp);
4149 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4150 				BUMP_LOCAL(sctp->sctp_ibchunks);
4151 				if (sctp->sctp_cookie_mp) {
4152 					freemsg(sctp->sctp_cookie_mp);
4153 					sctp->sctp_cookie_mp = NULL;
4154 				}
4155 				sctp_faddr_alive(sctp, fp);
4156 				/* Validate the peer addresses. */
4157 				sctp->sctp_active = now;
4158 				sctp_validate_peer(sctp);
4159 
4160 				/* Try sending queued data, or ASCONFs */
4161 				trysend = 1;
4162 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4163 				sctp_adaptation_event(sctp);
4164 				break;
4165 			case CHUNK_ABORT:
4166 				sctp_process_abort(sctp, ch, ECONNREFUSED);
4167 				goto done;
4168 			case CHUNK_COOKIE:
4169 				cr = ira->ira_cred;
4170 				cpid = ira->ira_cpid;
4171 
4172 				if (sctp_process_cookie(sctp, ch, mp, &iack,
4173 				    sctph, &recv_adaptation, NULL, ira) == -1) {
4174 					SCTPS_BUMP_MIB(sctps,
4175 					    sctpInInvalidCookie);
4176 					break;
4177 				}
4178 				sctp_send_cookie_ack(sctp);
4179 
4180 				if (!SCTP_IS_DETACHED(sctp)) {
4181 					sctp->sctp_ulp_connected(
4182 					    sctp->sctp_ulpd, 0, cr, cpid);
4183 					sctp_set_ulp_prop(sctp);
4184 
4185 				}
4186 				if (sctp->sctp_unacked == 0)
4187 					sctp_stop_faddr_timers(sctp);
4188 				SCTP_ASSOC_EST(sctps, sctp);
4189 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4190 				if (sctp->sctp_cookie_mp) {
4191 					freemsg(sctp->sctp_cookie_mp);
4192 					sctp->sctp_cookie_mp = NULL;
4193 				}
4194 				/* Validate the peer addresses. */
4195 				sctp->sctp_active = now;
4196 				sctp_validate_peer(sctp);
4197 
4198 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4199 				if (recv_adaptation) {
4200 					sctp->sctp_recv_adaptation = 1;
4201 					sctp_adaptation_event(sctp);
4202 				}
4203 				/* Try sending queued data, or ASCONFs */
4204 				trysend = 1;
4205 				break;
4206 			case CHUNK_INIT:
4207 				sctp_send_initack(sctp, sctph, ch, mp, ira);
4208 				break;
4209 			case CHUNK_ERROR: {
4210 				sctp_parm_hdr_t *p;
4211 
4212 				BUMP_LOCAL(sctp->sctp_ibchunks);
4213 				/* check for a stale cookie */
4214 				if (ntohs(ch->sch_len) >=
4215 				    (sizeof (*p) + sizeof (*ch)) +
4216 				    sizeof (uint32_t)) {
4217 
4218 					p = (sctp_parm_hdr_t *)(ch + 1);
4219 					if (p->sph_type ==
4220 					    htons(SCTP_ERR_STALE_COOKIE)) {
4221 						SCTPS_BUMP_MIB(sctps,
4222 						    sctpAborted);
4223 						sctp_error_event(sctp,
4224 						    ch, B_FALSE);
4225 						sctp_assoc_event(sctp,
4226 						    SCTP_COMM_LOST, 0, NULL);
4227 						sctp_clean_death(sctp,
4228 						    ECONNREFUSED);
4229 						goto done;
4230 					}
4231 				}
4232 				break;
4233 			}
4234 			case CHUNK_HEARTBEAT:
4235 				if (!hb_already) {
4236 					sctp_return_heartbeat(sctp, ch, mp);
4237 					hb_already = B_TRUE;
4238 				}
4239 				break;
4240 			default:
4241 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4242 					goto nomorechunks;
4243 				} /* else skip and continue processing */
4244 			} /* switch (ch->sch_id) */
4245 			break;
4246 
4247 		case SCTPS_SHUTDOWN_ACK_SENT:
4248 			switch (ch->sch_id) {
4249 			case CHUNK_ABORT:
4250 				/* Pass gathered wisdom to IP for keeping */
4251 				sctp_update_dce(sctp);
4252 				sctp_process_abort(sctp, ch, 0);
4253 				goto done;
4254 			case CHUNK_SHUTDOWN_COMPLETE:
4255 				BUMP_LOCAL(sctp->sctp_ibchunks);
4256 				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
4257 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4258 				    NULL);
4259 
4260 				/* Pass gathered wisdom to IP for keeping */
4261 				sctp_update_dce(sctp);
4262 				sctp_clean_death(sctp, 0);
4263 				goto done;
4264 			case CHUNK_SHUTDOWN_ACK:
4265 				sctp_shutdown_complete(sctp);
4266 				BUMP_LOCAL(sctp->sctp_ibchunks);
4267 				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
4268 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4269 				    NULL);
4270 				sctp_clean_death(sctp, 0);
4271 				goto done;
4272 			case CHUNK_COOKIE:
4273 				(void) sctp_shutdown_received(sctp, NULL,
4274 				    B_TRUE, B_FALSE, fp);
4275 				BUMP_LOCAL(sctp->sctp_ibchunks);
4276 				break;
4277 			case CHUNK_HEARTBEAT:
4278 				if (!hb_already) {
4279 					sctp_return_heartbeat(sctp, ch, mp);
4280 					hb_already = B_TRUE;
4281 				}
4282 				break;
4283 			default:
4284 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4285 					goto nomorechunks;
4286 				} /* else skip and continue processing */
4287 				break;
4288 			}
4289 			break;
4290 
4291 		case SCTPS_SHUTDOWN_RECEIVED:
4292 			switch (ch->sch_id) {
4293 			case CHUNK_SHUTDOWN:
4294 				trysend = sctp_shutdown_received(sctp, ch,
4295 				    B_FALSE, B_FALSE, fp);
4296 				/*
4297 				 * shutdown_ack_needed may have been set as
4298 				 * mentioned in the case CHUNK_SACK below.
4299 				 * If sctp_shutdown_received() above found
4300 				 * the xmit queue empty the SHUTDOWN ACK chunk
4301 				 * has already been sent (or scheduled to be
4302 				 * sent on the timer) and the SCTP state
4303 				 * changed, so reset shutdown_ack_needed.
4304 				 */
4305 				if (shutdown_ack_needed && (sctp->sctp_state ==
4306 				    SCTPS_SHUTDOWN_ACK_SENT))
4307 					shutdown_ack_needed = B_FALSE;
4308 				break;
4309 			case CHUNK_SACK:
4310 				trysend = sctp_got_sack(sctp, ch);
4311 				if (trysend < 0) {
4312 					sctp_send_abort(sctp, sctph->sh_verf,
4313 					    0, NULL, 0, mp, 0, B_FALSE, ira);
4314 					sctp_assoc_event(sctp,
4315 					    SCTP_COMM_LOST, 0, NULL);
4316 					sctp_clean_death(sctp,
4317 					    ECONNABORTED);
4318 					goto done;
4319 				}
4320 
4321 				/*
4322 				 * All data acknowledgement after a shutdown
4323 				 * should be done with SHUTDOWN chunk.
4324 				 * However some peer SCTP do not conform with
4325 				 * this and can unexpectedly send a SACK chunk.
4326 				 * If all data are acknowledged, set
4327 				 * shutdown_ack_needed here indicating that
4328 				 * SHUTDOWN ACK needs to be sent later by
4329 				 * sctp_send_shutdown_ack().
4330 				 */
4331 				if ((sctp->sctp_xmit_head == NULL) &&
4332 				    (sctp->sctp_xmit_unsent == NULL))
4333 					shutdown_ack_needed = B_TRUE;
4334 				break;
4335 			case CHUNK_ABORT:
4336 				sctp_process_abort(sctp, ch, ECONNRESET);
4337 				goto done;
4338 			case CHUNK_HEARTBEAT:
4339 				if (!hb_already) {
4340 					sctp_return_heartbeat(sctp, ch, mp);
4341 					hb_already = B_TRUE;
4342 				}
4343 				break;
4344 			default:
4345 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4346 					goto nomorechunks;
4347 				} /* else skip and continue processing */
4348 				break;
4349 			}
4350 			break;
4351 
4352 		default:
4353 			/*
4354 			 * The only remaining states are SCTPS_IDLE and
4355 			 * SCTPS_BOUND, and we should not be getting here
4356 			 * for these.
4357 			 */
4358 			ASSERT(0);
4359 		} /* switch (sctp->sctp_state) */
4360 
4361 		ch = sctp_next_chunk(ch, &mlen);
4362 		if (ch != NULL && !sctp_check_input(sctp, ch, mlen, 0))
4363 			goto done;
4364 	} while (ch != NULL);
4365 
4366 	/* Finished processing all chunks in packet */
4367 
4368 nomorechunks:
4369 
4370 	if (shutdown_ack_needed)
4371 		sctp_send_shutdown_ack(sctp, fp, B_FALSE);
4372 
4373 	/* SACK if necessary */
4374 	if (gotdata) {
4375 		boolean_t sack_sent;
4376 
4377 		(sctp->sctp_sack_toggle)++;
4378 		sack_sent = sctp_sack(sctp, dups);
4379 		dups = NULL;
4380 
4381 		/* If a SACK is sent, no need to restart the timer. */
4382 		if (!sack_sent && !sctp->sctp_ack_timer_running) {
4383 			sctp->sctp_ack_timer_running = B_TRUE;
4384 			sctp_timer(sctp, sctp->sctp_ack_mp,
4385 			    MSEC_TO_TICK(sctps->sctps_deferred_ack_interval));
4386 		}
4387 	}
4388 
4389 	if (trysend) {
4390 		sctp_output(sctp, UINT_MAX);
4391 		if (sctp->sctp_cxmit_list != NULL)
4392 			sctp_wput_asconf(sctp, NULL);
4393 	}
4394 	/*
4395 	 * If there is unsent data, make sure a timer is running, check
4396 	 * timer_mp, if sctp_closei_local() ran the timers may be free.
4397 	 */
4398 	if (sctp->sctp_unsent > 0 && !sctp->sctp_current->sf_timer_running &&
4399 	    sctp->sctp_current->sf_timer_mp != NULL) {
4400 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
4401 		    sctp->sctp_current->sf_rto);
4402 	}
4403 
4404 done:
4405 	if (dups != NULL)
4406 		freeb(dups);
4407 	freemsg(mp);
4408 
4409 	if (sctp->sctp_err_chunks != NULL)
4410 		sctp_process_err(sctp);
4411 
4412 	if (wake_eager) {
4413 		/*
4414 		 * sctp points to newly created control block, need to
4415 		 * release it before exiting.
4416 		 */
4417 		WAKE_SCTP(sctp);
4418 	}
4419 }
4420 
4421 /*
4422  * Some amount of data got removed from ULP's receive queue and we can
4423  * push messages up if we are flow controlled before.  Reset the receive
4424  * window to full capacity (conn_rcvbuf) and check if we should send a
4425  * window update.
4426  */
4427 void
4428 sctp_recvd(sctp_t *sctp, int len)
4429 {
4430 	sctp_stack_t	*sctps = sctp->sctp_sctps;
4431 	conn_t		*connp = sctp->sctp_connp;
4432 	boolean_t	send_sack = B_FALSE;
4433 
4434 	ASSERT(sctp != NULL);
4435 	RUN_SCTP(sctp);
4436 
4437 	sctp->sctp_flowctrld = B_FALSE;
4438 	/* This is the amount of data queued in ULP. */
4439 	sctp->sctp_ulp_rxqueued = connp->conn_rcvbuf - len;
4440 
4441 	if (connp->conn_rcvbuf - sctp->sctp_arwnd >= sctp->sctp_mss)
4442 		send_sack = B_TRUE;
4443 	sctp->sctp_rwnd = connp->conn_rcvbuf;
4444 
4445 	if (sctp->sctp_state >= SCTPS_ESTABLISHED && send_sack) {
4446 		sctp->sctp_force_sack = 1;
4447 		SCTPS_BUMP_MIB(sctps, sctpOutWinUpdate);
4448 		(void) sctp_sack(sctp, NULL);
4449 	}
4450 	WAKE_SCTP(sctp);
4451 }
4452