xref: /illumos-gate/usr/src/uts/common/inet/sctp/sctp_input.c (revision 1ed6b69a5ca1ca3ee5e9a4931f74e2237c7e1c9f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/socket.h>
34 #include <sys/strsun.h>
35 #include <sys/strsubr.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/ip6.h>
39 #include <netinet/tcp_seq.h>
40 #include <netinet/sctp.h>
41 
42 #include <inet/common.h>
43 #include <inet/ip.h>
44 #include <inet/ip_if.h>
45 #include <inet/ip6.h>
46 #include <inet/mib2.h>
47 #include <inet/ipclassifier.h>
48 #include <inet/ipp_common.h>
49 #include <inet/ipsec_impl.h>
50 #include <inet/sctp_ip.h>
51 
52 #include "sctp_impl.h"
53 #include "sctp_asconf.h"
54 #include "sctp_addr.h"
55 
56 static struct kmem_cache *sctp_kmem_set_cache;
57 
58 /*
59  * PR-SCTP comments.
60  *
61  * When we get a valid Forward TSN chunk, we check the fragment list for this
62  * SSN and preceeding SSNs free all them. Further, if this Forward TSN causes
63  * the next expected SSN to be present in the stream queue, we deliver any
64  * such stranded messages upstream. We also update the SACK info. appropriately.
65  * When checking for advancing the cumulative ack (in sctp_cumack()) we must
66  * check for abandoned chunks and messages. While traversing the tramsmit
67  * list if we come across an abandoned chunk, we can skip the message (i.e.
68  * take it out of the (re)transmit list) since this message, and hence this
69  * chunk, has been marked abandoned by sctp_rexmit(). If we come across an
70  * unsent chunk for a message this now abandoned we need to check if a
71  * Forward TSN needs to be sent, this could be a case where we deferred sending
72  * a Forward TSN in sctp_get_msg_to_send(). Further, after processing a
73  * SACK we check if the Advanced peer ack point can be moved ahead, i.e.
74  * if we can send a Forward TSN via sctp_check_abandoned_data().
75  */
76 void
77 sctp_free_set(sctp_set_t *s)
78 {
79 	sctp_set_t *p;
80 
81 	while (s) {
82 		p = s->next;
83 		kmem_cache_free(sctp_kmem_set_cache, s);
84 		s = p;
85 	}
86 }
87 
88 static void
89 sctp_ack_add(sctp_set_t **head, uint32_t tsn, int *num)
90 {
91 	sctp_set_t *p, *t;
92 
93 	if (head == NULL || num == NULL)
94 		return;
95 
96 	ASSERT(*num >= 0);
97 	ASSERT((*num == 0 && *head == NULL) || (*num > 0 && *head != NULL));
98 
99 	if (*head == NULL) {
100 		*head = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
101 		if (*head == NULL)
102 			return;
103 		(*head)->prev = (*head)->next = NULL;
104 		(*head)->begin = tsn;
105 		(*head)->end = tsn;
106 		*num = 1;
107 		return;
108 	}
109 
110 	ASSERT((*head)->prev == NULL);
111 
112 	/*
113 	 * Handle this special case here so we don't have to check
114 	 * for it each time in the loop.
115 	 */
116 	if (SEQ_LT(tsn + 1, (*head)->begin)) {
117 		/* add a new set, and move the head pointer */
118 		t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
119 		if (t == NULL)
120 			return;
121 		t->next = *head;
122 		t->prev = NULL;
123 		(*head)->prev = t;
124 		t->begin = tsn;
125 		t->end = tsn;
126 		(*num)++;
127 		*head = t;
128 		return;
129 	}
130 
131 	/*
132 	 * We need to handle the following cases, where p points to
133 	 * the current set (as we walk through the loop):
134 	 *
135 	 * 1. tsn is entirely less than p; create a new set before p.
136 	 * 2. tsn borders p from less; coalesce p with tsn.
137 	 * 3. tsn is withing p; do nothing.
138 	 * 4. tsn borders p from greater; coalesce p with tsn.
139 	 * 4a. p may now border p->next from less; if so, coalesce those
140 	 *    two sets.
141 	 * 5. tsn is entirely greater then all sets; add a new set at
142 	 *    the end.
143 	 */
144 	for (p = *head; ; p = p->next) {
145 		if (SEQ_LT(tsn + 1, p->begin)) {
146 			/* 1: add a new set before p. */
147 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
148 			if (t == NULL)
149 				return;
150 			t->next = p;
151 			t->prev = NULL;
152 			t->begin = tsn;
153 			t->end = tsn;
154 			if (p->prev) {
155 				t->prev = p->prev;
156 				p->prev->next = t;
157 			}
158 			p->prev = t;
159 			(*num)++;
160 			return;
161 		}
162 
163 		if ((tsn + 1) == p->begin) {
164 			/* 2: adjust p->begin */
165 			p->begin = tsn;
166 			return;
167 		}
168 
169 		if (SEQ_GEQ(tsn, p->begin) && SEQ_LEQ(tsn, p->end)) {
170 			/* 3; do nothing */
171 			return;
172 		}
173 
174 		if ((p->end + 1) == tsn) {
175 			/* 4; adjust p->end */
176 			p->end = tsn;
177 
178 			if (p->next != NULL && (tsn + 1) == p->next->begin) {
179 				/* 4a: coalesce p and p->next */
180 				t = p->next;
181 				p->end = t->end;
182 				p->next = t->next;
183 				if (t->next != NULL)
184 					t->next->prev = p;
185 				kmem_cache_free(sctp_kmem_set_cache, t);
186 				(*num)--;
187 			}
188 			return;
189 		}
190 
191 		if (p->next == NULL) {
192 			/* 5: add new set at the end */
193 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
194 			if (t == NULL)
195 				return;
196 			t->next = NULL;
197 			t->prev = p;
198 			t->begin = tsn;
199 			t->end = tsn;
200 			p->next = t;
201 			(*num)++;
202 			return;
203 		}
204 
205 		if (SEQ_GT(tsn, p->end + 1))
206 			continue;
207 	}
208 }
209 
210 static void
211 sctp_ack_rem(sctp_set_t **head, uint32_t end, int *num)
212 {
213 	sctp_set_t *p, *t;
214 
215 	if (head == NULL || *head == NULL || num == NULL)
216 		return;
217 
218 	/* Nothing to remove */
219 	if (SEQ_LT(end, (*head)->begin))
220 		return;
221 
222 	/* Find out where to start removing sets */
223 	for (p = *head; p->next; p = p->next) {
224 		if (SEQ_LEQ(end, p->end))
225 			break;
226 	}
227 
228 	if (SEQ_LT(end, p->end) && SEQ_GEQ(end, p->begin)) {
229 		/* adjust p */
230 		p->begin = end + 1;
231 		/* all done */
232 		if (p == *head)
233 			return;
234 	} else if (SEQ_GEQ(end, p->end)) {
235 		/* remove this set too */
236 		p = p->next;
237 	}
238 
239 	/* unlink everything before this set */
240 	t = *head;
241 	*head = p;
242 	if (p != NULL && p->prev != NULL) {
243 		p->prev->next = NULL;
244 		p->prev = NULL;
245 	}
246 
247 	sctp_free_set(t);
248 
249 	/* recount the number of sets */
250 	*num = 0;
251 
252 	for (p = *head; p != NULL; p = p->next)
253 		(*num)++;
254 }
255 
256 void
257 sctp_sets_init()
258 {
259 	sctp_kmem_set_cache = kmem_cache_create("sctp_set_cache",
260 	    sizeof (sctp_set_t), 0, NULL, NULL, NULL, NULL,
261 	    NULL, 0);
262 }
263 
264 void
265 sctp_sets_fini()
266 {
267 	kmem_cache_destroy(sctp_kmem_set_cache);
268 }
269 
270 sctp_chunk_hdr_t *
271 sctp_first_chunk(uchar_t *rptr, ssize_t remaining)
272 {
273 	sctp_chunk_hdr_t *ch;
274 	uint16_t ch_len;
275 
276 	if (remaining < sizeof (*ch)) {
277 		return (NULL);
278 	}
279 
280 	ch = (sctp_chunk_hdr_t *)rptr;
281 	ch_len = ntohs(ch->sch_len);
282 
283 	if (ch_len < sizeof (*ch) || remaining < ch_len) {
284 		return (NULL);
285 	}
286 
287 	return (ch);
288 }
289 
290 sctp_chunk_hdr_t *
291 sctp_next_chunk(sctp_chunk_hdr_t *ch, ssize_t *remaining)
292 {
293 	int pad;
294 	uint16_t ch_len;
295 
296 	if (!ch) {
297 		return (NULL);
298 	}
299 
300 	ch_len = ntohs(ch->sch_len);
301 
302 	if ((pad = ch_len & (SCTP_ALIGN - 1)) != 0) {
303 		pad = SCTP_ALIGN - pad;
304 	}
305 
306 	*remaining -= (ch_len + pad);
307 	ch = (sctp_chunk_hdr_t *)((char *)ch + ch_len + pad);
308 
309 	return (sctp_first_chunk((uchar_t *)ch, *remaining));
310 }
311 
312 /*
313  * Attach ancillary data to a received SCTP segments.
314  * If the source address (fp) is not the primary, send up a
315  * unitdata_ind so recvfrom() can populate the msg_name field.
316  * If ancillary data is also requested, we append it to the
317  * unitdata_req. Otherwise, we just send up an optdata_ind.
318  */
319 static int
320 sctp_input_add_ancillary(sctp_t *sctp, mblk_t **mp, sctp_data_hdr_t *dcp,
321     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
322 {
323 	struct T_unitdata_ind	*tudi;
324 	int			optlen;
325 	int			hdrlen;
326 	uchar_t			*optptr;
327 	struct cmsghdr		*cmsg;
328 	mblk_t			*mp1;
329 	struct sockaddr_in6	sin_buf[1];
330 	struct sockaddr_in6	*sin6;
331 	struct sockaddr_in	*sin4;
332 	crb_t			 addflag;	/* Which pieces to add */
333 	conn_t			*connp = sctp->sctp_connp;
334 
335 	sin4 = NULL;
336 	sin6 = NULL;
337 
338 	optlen = hdrlen = 0;
339 	addflag.crb_all = 0;
340 
341 	/* Figure out address size */
342 	if (connp->conn_family == AF_INET) {
343 		sin4 = (struct sockaddr_in *)sin_buf;
344 		sin4->sin_family = AF_INET;
345 		sin4->sin_port = connp->conn_fport;
346 		IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, sin4->sin_addr.s_addr);
347 		hdrlen = sizeof (*tudi) + sizeof (*sin4);
348 	} else {
349 		sin6 = sin_buf;
350 		sin6->sin6_family = AF_INET6;
351 		sin6->sin6_port = connp->conn_fport;
352 		sin6->sin6_addr = fp->sf_faddr;
353 		hdrlen = sizeof (*tudi) + sizeof (*sin6);
354 	}
355 	/* If app asked to receive send / recv info */
356 	if (sctp->sctp_recvsndrcvinfo)
357 		optlen += sizeof (*cmsg) + sizeof (struct sctp_sndrcvinfo);
358 
359 	if (connp->conn_recv_ancillary.crb_all == 0)
360 		goto noancillary;
361 
362 	if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
363 	    ira->ira_ruifindex != sctp->sctp_recvifindex) {
364 		optlen += sizeof (*cmsg) + sizeof (struct in6_pktinfo);
365 		if (hdrlen == 0)
366 			hdrlen = sizeof (struct T_unitdata_ind);
367 		addflag.crb_ip_recvpktinfo = 1;
368 	}
369 	/* If app asked for hoplimit and it has changed ... */
370 	if (connp->conn_recv_ancillary.crb_ipv6_recvhoplimit &&
371 	    ipp->ipp_hoplimit != sctp->sctp_recvhops) {
372 		optlen += sizeof (*cmsg) + sizeof (uint_t);
373 		if (hdrlen == 0)
374 			hdrlen = sizeof (struct T_unitdata_ind);
375 		addflag.crb_ipv6_recvhoplimit = 1;
376 	}
377 	/* If app asked for tclass and it has changed ... */
378 	if (connp->conn_recv_ancillary.crb_ipv6_recvtclass &&
379 	    ipp->ipp_tclass != sctp->sctp_recvtclass) {
380 		optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
381 		if (hdrlen == 0)
382 			hdrlen = sizeof (struct T_unitdata_ind);
383 		addflag.crb_ipv6_recvtclass = 1;
384 	}
385 	/* If app asked for hopbyhop headers and it has changed ... */
386 	if (connp->conn_recv_ancillary.crb_ipv6_recvhopopts &&
387 	    ip_cmpbuf(sctp->sctp_hopopts, sctp->sctp_hopoptslen,
388 	    (ipp->ipp_fields & IPPF_HOPOPTS),
389 	    ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
390 		optlen += sizeof (*cmsg) + ipp->ipp_hopoptslen -
391 		    sctp->sctp_v6label_len;
392 		if (hdrlen == 0)
393 			hdrlen = sizeof (struct T_unitdata_ind);
394 		addflag.crb_ipv6_recvhopopts = 1;
395 		if (!ip_allocbuf((void **)&sctp->sctp_hopopts,
396 		    &sctp->sctp_hopoptslen,
397 		    (ipp->ipp_fields & IPPF_HOPOPTS),
398 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen))
399 			return (-1);
400 	}
401 	/* If app asked for dst headers before routing headers ... */
402 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts &&
403 	    ip_cmpbuf(sctp->sctp_rthdrdstopts, sctp->sctp_rthdrdstoptslen,
404 	    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
405 	    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen)) {
406 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
407 		if (hdrlen == 0)
408 			hdrlen = sizeof (struct T_unitdata_ind);
409 		addflag.crb_ipv6_recvrthdrdstopts = 1;
410 		if (!ip_allocbuf((void **)&sctp->sctp_rthdrdstopts,
411 		    &sctp->sctp_rthdrdstoptslen,
412 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
413 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen))
414 			return (-1);
415 	}
416 	/* If app asked for routing headers and it has changed ... */
417 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdr &&
418 	    ip_cmpbuf(sctp->sctp_rthdr, sctp->sctp_rthdrlen,
419 	    (ipp->ipp_fields & IPPF_RTHDR),
420 	    ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
421 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrlen;
422 		if (hdrlen == 0)
423 			hdrlen = sizeof (struct T_unitdata_ind);
424 		addflag.crb_ipv6_recvrthdr = 1;
425 		if (!ip_allocbuf((void **)&sctp->sctp_rthdr,
426 		    &sctp->sctp_rthdrlen,
427 		    (ipp->ipp_fields & IPPF_RTHDR),
428 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen))
429 			return (-1);
430 	}
431 	/* If app asked for dest headers and it has changed ... */
432 	if (connp->conn_recv_ancillary.crb_ipv6_recvdstopts &&
433 	    ip_cmpbuf(sctp->sctp_dstopts, sctp->sctp_dstoptslen,
434 	    (ipp->ipp_fields & IPPF_DSTOPTS),
435 	    ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
436 		optlen += sizeof (*cmsg) + ipp->ipp_dstoptslen;
437 		if (hdrlen == 0)
438 			hdrlen = sizeof (struct T_unitdata_ind);
439 		addflag.crb_ipv6_recvdstopts = 1;
440 		if (!ip_allocbuf((void **)&sctp->sctp_dstopts,
441 		    &sctp->sctp_dstoptslen,
442 		    (ipp->ipp_fields & IPPF_DSTOPTS),
443 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen))
444 			return (-1);
445 	}
446 noancillary:
447 	/* Nothing to add */
448 	if (hdrlen == 0)
449 		return (-1);
450 
451 	mp1 = allocb(hdrlen + optlen + sizeof (void *), BPRI_MED);
452 	if (mp1 == NULL)
453 		return (-1);
454 	mp1->b_cont = *mp;
455 	*mp = mp1;
456 	mp1->b_rptr += sizeof (void *);  /* pointer worth of padding */
457 	mp1->b_wptr = mp1->b_rptr + hdrlen + optlen;
458 	DB_TYPE(mp1) = M_PROTO;
459 	tudi = (struct T_unitdata_ind *)mp1->b_rptr;
460 	tudi->PRIM_type = T_UNITDATA_IND;
461 	tudi->SRC_length = sin4 ? sizeof (*sin4) : sizeof (*sin6);
462 	tudi->SRC_offset = sizeof (*tudi);
463 	tudi->OPT_offset = sizeof (*tudi) + tudi->SRC_length;
464 	tudi->OPT_length = optlen;
465 	if (sin4) {
466 		bcopy(sin4, tudi + 1, sizeof (*sin4));
467 	} else {
468 		bcopy(sin6, tudi + 1, sizeof (*sin6));
469 	}
470 	optptr = (uchar_t *)tudi + tudi->OPT_offset;
471 
472 	if (sctp->sctp_recvsndrcvinfo) {
473 		/* XXX need backout method if memory allocation fails. */
474 		struct sctp_sndrcvinfo *sri;
475 
476 		cmsg = (struct cmsghdr *)optptr;
477 		cmsg->cmsg_level = IPPROTO_SCTP;
478 		cmsg->cmsg_type = SCTP_SNDRCV;
479 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*sri);
480 		optptr += sizeof (*cmsg);
481 
482 		sri = (struct sctp_sndrcvinfo *)(cmsg + 1);
483 		ASSERT(OK_32PTR(sri));
484 		sri->sinfo_stream = ntohs(dcp->sdh_sid);
485 		sri->sinfo_ssn = ntohs(dcp->sdh_ssn);
486 		if (SCTP_DATA_GET_UBIT(dcp)) {
487 			sri->sinfo_flags = MSG_UNORDERED;
488 		} else {
489 			sri->sinfo_flags = 0;
490 		}
491 		sri->sinfo_ppid = dcp->sdh_payload_id;
492 		sri->sinfo_context = 0;
493 		sri->sinfo_timetolive = 0;
494 		sri->sinfo_tsn = ntohl(dcp->sdh_tsn);
495 		sri->sinfo_cumtsn = sctp->sctp_ftsn;
496 		sri->sinfo_assoc_id = 0;
497 
498 		optptr += sizeof (*sri);
499 	}
500 
501 	/*
502 	 * If app asked for pktinfo and the index has changed ...
503 	 * Note that the local address never changes for the connection.
504 	 */
505 	if (addflag.crb_ip_recvpktinfo) {
506 		struct in6_pktinfo *pkti;
507 		uint_t ifindex;
508 
509 		ifindex = ira->ira_ruifindex;
510 		cmsg = (struct cmsghdr *)optptr;
511 		cmsg->cmsg_level = IPPROTO_IPV6;
512 		cmsg->cmsg_type = IPV6_PKTINFO;
513 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*pkti);
514 		optptr += sizeof (*cmsg);
515 
516 		pkti = (struct in6_pktinfo *)optptr;
517 		if (connp->conn_family == AF_INET6)
518 			pkti->ipi6_addr = sctp->sctp_ip6h->ip6_src;
519 		else
520 			IN6_IPADDR_TO_V4MAPPED(sctp->sctp_ipha->ipha_src,
521 			    &pkti->ipi6_addr);
522 
523 		pkti->ipi6_ifindex = ifindex;
524 		optptr += sizeof (*pkti);
525 		ASSERT(OK_32PTR(optptr));
526 		/* Save as "last" value */
527 		sctp->sctp_recvifindex = ifindex;
528 	}
529 	/* If app asked for hoplimit and it has changed ... */
530 	if (addflag.crb_ipv6_recvhoplimit) {
531 		cmsg = (struct cmsghdr *)optptr;
532 		cmsg->cmsg_level = IPPROTO_IPV6;
533 		cmsg->cmsg_type = IPV6_HOPLIMIT;
534 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
535 		optptr += sizeof (*cmsg);
536 
537 		*(uint_t *)optptr = ipp->ipp_hoplimit;
538 		optptr += sizeof (uint_t);
539 		ASSERT(OK_32PTR(optptr));
540 		/* Save as "last" value */
541 		sctp->sctp_recvhops = ipp->ipp_hoplimit;
542 	}
543 	/* If app asked for tclass and it has changed ... */
544 	if (addflag.crb_ipv6_recvtclass) {
545 		cmsg = (struct cmsghdr *)optptr;
546 		cmsg->cmsg_level = IPPROTO_IPV6;
547 		cmsg->cmsg_type = IPV6_TCLASS;
548 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
549 		optptr += sizeof (*cmsg);
550 
551 		*(uint_t *)optptr = ipp->ipp_tclass;
552 		optptr += sizeof (uint_t);
553 		ASSERT(OK_32PTR(optptr));
554 		/* Save as "last" value */
555 		sctp->sctp_recvtclass = ipp->ipp_tclass;
556 	}
557 	if (addflag.crb_ipv6_recvhopopts) {
558 		cmsg = (struct cmsghdr *)optptr;
559 		cmsg->cmsg_level = IPPROTO_IPV6;
560 		cmsg->cmsg_type = IPV6_HOPOPTS;
561 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_hopoptslen;
562 		optptr += sizeof (*cmsg);
563 
564 		bcopy(ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
565 		optptr += ipp->ipp_hopoptslen;
566 		ASSERT(OK_32PTR(optptr));
567 		/* Save as last value */
568 		ip_savebuf((void **)&sctp->sctp_hopopts,
569 		    &sctp->sctp_hopoptslen,
570 		    (ipp->ipp_fields & IPPF_HOPOPTS),
571 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen);
572 	}
573 	if (addflag.crb_ipv6_recvrthdrdstopts) {
574 		cmsg = (struct cmsghdr *)optptr;
575 		cmsg->cmsg_level = IPPROTO_IPV6;
576 		cmsg->cmsg_type = IPV6_RTHDRDSTOPTS;
577 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
578 		optptr += sizeof (*cmsg);
579 
580 		bcopy(ipp->ipp_rthdrdstopts, optptr, ipp->ipp_rthdrdstoptslen);
581 		optptr += ipp->ipp_rthdrdstoptslen;
582 		ASSERT(OK_32PTR(optptr));
583 		/* Save as last value */
584 		ip_savebuf((void **)&sctp->sctp_rthdrdstopts,
585 		    &sctp->sctp_rthdrdstoptslen,
586 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
587 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen);
588 	}
589 	if (addflag.crb_ipv6_recvrthdr) {
590 		cmsg = (struct cmsghdr *)optptr;
591 		cmsg->cmsg_level = IPPROTO_IPV6;
592 		cmsg->cmsg_type = IPV6_RTHDR;
593 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrlen;
594 		optptr += sizeof (*cmsg);
595 
596 		bcopy(ipp->ipp_rthdr, optptr, ipp->ipp_rthdrlen);
597 		optptr += ipp->ipp_rthdrlen;
598 		ASSERT(OK_32PTR(optptr));
599 		/* Save as last value */
600 		ip_savebuf((void **)&sctp->sctp_rthdr,
601 		    &sctp->sctp_rthdrlen,
602 		    (ipp->ipp_fields & IPPF_RTHDR),
603 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen);
604 	}
605 	if (addflag.crb_ipv6_recvdstopts) {
606 		cmsg = (struct cmsghdr *)optptr;
607 		cmsg->cmsg_level = IPPROTO_IPV6;
608 		cmsg->cmsg_type = IPV6_DSTOPTS;
609 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_dstoptslen;
610 		optptr += sizeof (*cmsg);
611 
612 		bcopy(ipp->ipp_dstopts, optptr, ipp->ipp_dstoptslen);
613 		optptr += ipp->ipp_dstoptslen;
614 		ASSERT(OK_32PTR(optptr));
615 		/* Save as last value */
616 		ip_savebuf((void **)&sctp->sctp_dstopts,
617 		    &sctp->sctp_dstoptslen,
618 		    (ipp->ipp_fields & IPPF_DSTOPTS),
619 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen);
620 	}
621 
622 	ASSERT(optptr == mp1->b_wptr);
623 
624 	return (0);
625 }
626 
627 void
628 sctp_free_reass(sctp_instr_t *sip)
629 {
630 	mblk_t *mp, *mpnext, *mctl;
631 #ifdef	DEBUG
632 	sctp_reass_t	*srp;
633 #endif
634 
635 	for (mp = sip->istr_reass; mp != NULL; mp = mpnext) {
636 		mpnext = mp->b_next;
637 		mp->b_next = NULL;
638 		mp->b_prev = NULL;
639 		if (DB_TYPE(mp) == M_CTL) {
640 			mctl = mp;
641 #ifdef	DEBUG
642 			srp = (sctp_reass_t *)DB_BASE(mctl);
643 			/* Partial delivery can leave empty srp */
644 			ASSERT(mp->b_cont != NULL || srp->sr_got == 0);
645 #endif
646 			mp = mp->b_cont;
647 			mctl->b_cont = NULL;
648 			freeb(mctl);
649 		}
650 		freemsg(mp);
651 	}
652 	sip->istr_reass = NULL;
653 }
654 
655 /*
656  * If the series of data fragments of which dmp is a part is successfully
657  * reassembled, the first mblk in the series is returned. dc is adjusted
658  * to point at the data chunk in the lead mblk, and b_rptr also points to
659  * the data chunk; the following mblk's b_rptr's point at the actual payload.
660  *
661  * If the series is not yet reassembled, NULL is returned. dc is not changed.
662  * XXX should probably move this up into the state machine.
663  */
664 
665 /* Fragment list for un-ordered messages. Partial delivery is not supported */
666 static mblk_t *
667 sctp_uodata_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc)
668 {
669 	mblk_t		*hmp;
670 	mblk_t		*begin = NULL;
671 	mblk_t		*end = NULL;
672 	sctp_data_hdr_t	*qdc;
673 	uint32_t	ntsn;
674 	uint32_t	tsn = ntohl((*dc)->sdh_tsn);
675 #ifdef	DEBUG
676 	mblk_t		*mp1;
677 #endif
678 
679 	/* First frag. */
680 	if (sctp->sctp_uo_frags == NULL) {
681 		sctp->sctp_uo_frags = dmp;
682 		return (NULL);
683 	}
684 	hmp = sctp->sctp_uo_frags;
685 	/*
686 	 * Insert the segment according to the TSN, fragmented unordered
687 	 * chunks are sequenced by TSN.
688 	 */
689 	while (hmp != NULL) {
690 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
691 		ntsn = ntohl(qdc->sdh_tsn);
692 		if (SEQ_GT(ntsn, tsn)) {
693 			if (hmp->b_prev == NULL) {
694 				dmp->b_next = hmp;
695 				hmp->b_prev = dmp;
696 				sctp->sctp_uo_frags = dmp;
697 			} else {
698 				dmp->b_next = hmp;
699 				dmp->b_prev = hmp->b_prev;
700 				hmp->b_prev->b_next = dmp;
701 				hmp->b_prev = dmp;
702 			}
703 			break;
704 		}
705 		if (hmp->b_next == NULL) {
706 			hmp->b_next = dmp;
707 			dmp->b_prev = hmp;
708 			break;
709 		}
710 		hmp = hmp->b_next;
711 	}
712 	/* check if we completed a msg */
713 	if (SCTP_DATA_GET_BBIT(*dc)) {
714 		begin = dmp;
715 	} else if (SCTP_DATA_GET_EBIT(*dc)) {
716 		end = dmp;
717 	}
718 	/*
719 	 * We walk consecutive TSNs backwards till we get a seg. with
720 	 * the B bit
721 	 */
722 	if (begin == NULL) {
723 		for (hmp = dmp->b_prev; hmp != NULL; hmp = hmp->b_prev) {
724 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
725 			ntsn = ntohl(qdc->sdh_tsn);
726 			if ((int32_t)(tsn - ntsn) > 1) {
727 				return (NULL);
728 			}
729 			if (SCTP_DATA_GET_BBIT(qdc)) {
730 				begin = hmp;
731 				break;
732 			}
733 			tsn = ntsn;
734 		}
735 	}
736 	tsn = ntohl((*dc)->sdh_tsn);
737 	/*
738 	 * We walk consecutive TSNs till we get a seg. with the E bit
739 	 */
740 	if (end == NULL) {
741 		for (hmp = dmp->b_next; hmp != NULL; hmp = hmp->b_next) {
742 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
743 			ntsn = ntohl(qdc->sdh_tsn);
744 			if ((int32_t)(ntsn - tsn) > 1) {
745 				return (NULL);
746 			}
747 			if (SCTP_DATA_GET_EBIT(qdc)) {
748 				end = hmp;
749 				break;
750 			}
751 			tsn = ntsn;
752 		}
753 	}
754 	if (begin == NULL || end == NULL) {
755 		return (NULL);
756 	}
757 	/* Got one!, Remove the msg from the list */
758 	if (sctp->sctp_uo_frags == begin) {
759 		ASSERT(begin->b_prev == NULL);
760 		sctp->sctp_uo_frags = end->b_next;
761 		if (end->b_next != NULL)
762 			end->b_next->b_prev = NULL;
763 	} else {
764 		begin->b_prev->b_next = end->b_next;
765 		if (end->b_next != NULL)
766 			end->b_next->b_prev = begin->b_prev;
767 	}
768 	begin->b_prev = NULL;
769 	end->b_next = NULL;
770 
771 	/*
772 	 * Null out b_next and b_prev and chain using b_cont.
773 	 */
774 	dmp = end = begin;
775 	hmp = begin->b_next;
776 	*dc = (sctp_data_hdr_t *)begin->b_rptr;
777 	begin->b_next = NULL;
778 	while (hmp != NULL) {
779 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
780 		hmp->b_rptr = (uchar_t *)(qdc + 1);
781 		end = hmp->b_next;
782 		dmp->b_cont = hmp;
783 		dmp = hmp;
784 
785 		if (end != NULL)
786 			hmp->b_next = NULL;
787 		hmp->b_prev = NULL;
788 		hmp = end;
789 	}
790 	BUMP_LOCAL(sctp->sctp_reassmsgs);
791 #ifdef	DEBUG
792 	mp1 = begin;
793 	while (mp1 != NULL) {
794 		ASSERT(mp1->b_next == NULL);
795 		ASSERT(mp1->b_prev == NULL);
796 		mp1 = mp1->b_cont;
797 	}
798 #endif
799 	return (begin);
800 }
801 
802 /*
803  * Try partial delivery.
804  */
805 static mblk_t *
806 sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
807     sctp_data_hdr_t **dc)
808 {
809 	mblk_t		*mp;
810 	mblk_t		*dmp;
811 	mblk_t		*qmp;
812 	mblk_t		*prev;
813 	sctp_data_hdr_t	*qdc;
814 	uint32_t	tsn;
815 
816 	ASSERT(DB_TYPE(hmp) == M_CTL);
817 
818 	dprint(4, ("trypartial: got=%d, needed=%d\n",
819 	    (int)(srp->sr_got), (int)(srp->sr_needed)));
820 
821 	mp = hmp->b_cont;
822 	qdc = (sctp_data_hdr_t *)mp->b_rptr;
823 
824 	ASSERT(SCTP_DATA_GET_BBIT(qdc) && srp->sr_hasBchunk);
825 
826 	tsn = ntohl(qdc->sdh_tsn) + 1;
827 
828 	/*
829 	 * This loop has two exit conditions: the
830 	 * end of received chunks has been reached, or
831 	 * there is a break in the sequence. We want
832 	 * to chop the reassembly list as follows (the
833 	 * numbers are TSNs):
834 	 *   10 -> 11 -> 	(end of chunks)
835 	 *   10 -> 11 -> | 13   (break in sequence)
836 	 */
837 	prev = mp;
838 	mp = mp->b_cont;
839 	while (mp != NULL) {
840 		qdc = (sctp_data_hdr_t *)mp->b_rptr;
841 		if (ntohl(qdc->sdh_tsn) != tsn)
842 			break;
843 		prev = mp;
844 		mp = mp->b_cont;
845 		tsn++;
846 	}
847 	/*
848 	 * We are sending all the fragments upstream, we have to retain
849 	 * the srp info for further fragments.
850 	 */
851 	if (mp == NULL) {
852 		dmp = hmp->b_cont;
853 		hmp->b_cont = NULL;
854 		srp->sr_nexttsn = tsn;
855 		srp->sr_msglen = 0;
856 		srp->sr_needed = 0;
857 		srp->sr_got = 0;
858 		srp->sr_tail = NULL;
859 	} else {
860 		/*
861 		 * There is a gap then some ordered frags which are not
862 		 * the next deliverable tsn. When the next deliverable
863 		 * frag arrives it will be set as the new list head in
864 		 * sctp_data_frag() by setting the B bit.
865 		 */
866 		dmp = hmp->b_cont;
867 		hmp->b_cont = mp;
868 	}
869 	srp->sr_hasBchunk = B_FALSE;
870 	/*
871 	 * mp now points at the last chunk in the sequence,
872 	 * and prev points to mp's previous in the list.
873 	 * We chop the list at prev. Subsequent fragment
874 	 * deliveries will follow the normal reassembly
875 	 * path unless they too exceed the sctp_pd_point.
876 	 */
877 	prev->b_cont = NULL;
878 	srp->sr_partial_delivered = B_TRUE;
879 
880 	dprint(4, ("trypartial: got some, got=%d, needed=%d\n",
881 	    (int)(srp->sr_got), (int)(srp->sr_needed)));
882 
883 	/*
884 	 * Adjust all mblk's except the lead so their rptr's point to the
885 	 * payload. sctp_data_chunk() will need to process the lead's
886 	 * data chunk section, so leave it's rptr pointing at the data chunk.
887 	 */
888 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
889 	if (srp->sr_tail != NULL) {
890 		srp->sr_got--;
891 		ASSERT(srp->sr_got != 0);
892 		if (srp->sr_needed != 0) {
893 			srp->sr_needed--;
894 			ASSERT(srp->sr_needed != 0);
895 		}
896 		srp->sr_msglen -= ntohs((*dc)->sdh_len);
897 	}
898 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
899 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
900 		qmp->b_rptr = (uchar_t *)(qdc + 1);
901 
902 		/*
903 		 * Deduct the balance from got and needed here, now that
904 		 * we know we are actually delivering these data.
905 		 */
906 		if (srp->sr_tail != NULL) {
907 			srp->sr_got--;
908 			ASSERT(srp->sr_got != 0);
909 			if (srp->sr_needed != 0) {
910 				srp->sr_needed--;
911 				ASSERT(srp->sr_needed != 0);
912 			}
913 			srp->sr_msglen -= ntohs(qdc->sdh_len);
914 		}
915 	}
916 	ASSERT(srp->sr_msglen == 0);
917 	BUMP_LOCAL(sctp->sctp_reassmsgs);
918 
919 	return (dmp);
920 }
921 
922 /*
923  * Handle received fragments for ordered delivery to upper layer protocol.
924  * Manage the per message reassembly queue and if this fragment completes
925  * reassembly of the message, or qualifies the already reassembled data
926  * for partial delivery, prepare the message for delivery upstream.
927  *
928  * tpfinished in the caller remains set only when the incoming fragment
929  * has completed the reassembly of the message associated with its ssn.
930  */
931 static mblk_t *
932 sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
933     sctp_instr_t *sip, boolean_t *tpfinished)
934 {
935 	mblk_t		*reassq_curr, *reassq_next, *reassq_prev;
936 	mblk_t		*new_reassq;
937 	mblk_t		*qmp;
938 	mblk_t		*first_mp;
939 	sctp_reass_t	*srp;
940 	sctp_data_hdr_t	*qdc;
941 	sctp_data_hdr_t	*bdc;
942 	sctp_data_hdr_t	*edc;
943 	uint32_t	tsn;
944 	uint16_t	fraglen = 0;
945 
946 	*error = 0;
947 
948 	/*
949 	 * Find the reassembly queue for this data chunk, if none
950 	 * yet exists, a new per message queue will be created and
951 	 * appended to the end of the list of per message queues.
952 	 *
953 	 * sip points on sctp_instr_t representing instream messages
954 	 * as yet undelivered for this stream (sid) of the association.
955 	 */
956 	reassq_next = reassq_prev = sip->istr_reass;
957 	for (; reassq_next != NULL; reassq_next = reassq_next->b_next) {
958 		srp = (sctp_reass_t *)DB_BASE(reassq_next);
959 		if (ntohs((*dc)->sdh_ssn) == srp->sr_ssn) {
960 			reassq_curr = reassq_next;
961 			goto foundit;
962 		} else if (SSN_GT(srp->sr_ssn, ntohs((*dc)->sdh_ssn)))
963 			break;
964 		reassq_prev = reassq_next;
965 	}
966 
967 	/*
968 	 * First fragment of this message received, allocate a M_CTL that
969 	 * will head the reassembly queue for this message. The message
970 	 * and all its fragments are identified by having the same ssn.
971 	 *
972 	 * Arriving fragments will be inserted in tsn order on the
973 	 * reassembly queue for this message (ssn), linked by b_cont.
974 	 */
975 	if ((new_reassq = allocb(sizeof (*srp), BPRI_MED)) == NULL) {
976 		*error = ENOMEM;
977 		return (NULL);
978 	}
979 	DB_TYPE(new_reassq) = M_CTL;
980 	srp = (sctp_reass_t *)DB_BASE(new_reassq);
981 	new_reassq->b_cont = dmp;
982 
983 	/*
984 	 * All per ssn reassembly queues, (one for each message) on
985 	 * this stream are doubly linked by b_next/b_prev back to the
986 	 * instr_reass of the instream structure associated with this
987 	 * stream id, (sip is initialized as sctp->sctp_instr[sid]).
988 	 * Insert the new reassembly queue in the correct (ssn) order.
989 	 */
990 	if (reassq_next != NULL) {
991 		if (sip->istr_reass == reassq_next) {
992 			/* head insertion */
993 			sip->istr_reass = new_reassq;
994 			new_reassq->b_next = reassq_next;
995 			new_reassq->b_prev = NULL;
996 			reassq_next->b_prev = new_reassq;
997 		} else {
998 			/* mid queue insertion */
999 			reassq_prev->b_next = new_reassq;
1000 			new_reassq->b_prev = reassq_prev;
1001 			new_reassq->b_next = reassq_next;
1002 			reassq_next->b_prev = new_reassq;
1003 		}
1004 	} else {
1005 		/* place new reassembly queue at the end */
1006 		if (sip->istr_reass == NULL) {
1007 			sip->istr_reass = new_reassq;
1008 			new_reassq->b_prev = NULL;
1009 		} else {
1010 			reassq_prev->b_next = new_reassq;
1011 			new_reassq->b_prev = reassq_prev;
1012 		}
1013 		new_reassq->b_next = NULL;
1014 	}
1015 	srp->sr_partial_delivered = B_FALSE;
1016 	srp->sr_ssn = ntohs((*dc)->sdh_ssn);
1017 	srp->sr_hasBchunk = B_FALSE;
1018 empty_srp:
1019 	srp->sr_needed = 0;
1020 	srp->sr_got = 1;
1021 	/* tail always the highest tsn on the reassembly queue for this ssn */
1022 	srp->sr_tail = dmp;
1023 	if (SCTP_DATA_GET_BBIT(*dc)) {
1024 		/* Incoming frag is flagged as the beginning of message */
1025 		srp->sr_msglen = ntohs((*dc)->sdh_len);
1026 		srp->sr_nexttsn = ntohl((*dc)->sdh_tsn) + 1;
1027 		srp->sr_hasBchunk = B_TRUE;
1028 	} else if (srp->sr_partial_delivered &&
1029 	    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1030 		/*
1031 		 * The real beginning fragment of the message was already
1032 		 * delivered upward, so this is the earliest frag expected.
1033 		 * Fake the B-bit then see if this frag also completes the
1034 		 * message.
1035 		 */
1036 		SCTP_DATA_SET_BBIT(*dc);
1037 		srp->sr_hasBchunk = B_TRUE;
1038 		srp->sr_msglen = ntohs((*dc)->sdh_len);
1039 		if (SCTP_DATA_GET_EBIT(*dc)) {
1040 			/* This frag is marked as the end of message */
1041 			srp->sr_needed = 1;
1042 			/* Got all fragments of this message now */
1043 			goto frag_done;
1044 		}
1045 		srp->sr_nexttsn++;
1046 	}
1047 
1048 	/* The only fragment of this message currently queued */
1049 	*tpfinished = B_FALSE;
1050 	return (NULL);
1051 foundit:
1052 	/*
1053 	 * This message already has a reassembly queue. Insert the new frag
1054 	 * in the reassembly queue. Try the tail first, on the assumption
1055 	 * that the fragments are arriving in order.
1056 	 */
1057 	qmp = srp->sr_tail;
1058 
1059 	/*
1060 	 * A NULL tail means all existing fragments of the message have
1061 	 * been entirely consumed during a partially delivery.
1062 	 */
1063 	if (qmp == NULL) {
1064 		ASSERT(srp->sr_got == 0 && srp->sr_needed == 0 &&
1065 		    srp->sr_partial_delivered);
1066 		ASSERT(reassq_curr->b_cont == NULL);
1067 		reassq_curr->b_cont = dmp;
1068 		goto empty_srp;
1069 	} else {
1070 		/*
1071 		 * If partial delivery did take place but the next arriving
1072 		 * fragment was not the next to be delivered, or partial
1073 		 * delivery broke off due to a gap, fragments remain on the
1074 		 * tail. The next fragment due to be delivered still has to
1075 		 * be set as the new head of list upon arrival. Fake B-bit
1076 		 * on that frag then see if it also completes the message.
1077 		 */
1078 		if (srp->sr_partial_delivered &&
1079 		    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1080 			SCTP_DATA_SET_BBIT(*dc);
1081 			srp->sr_hasBchunk = B_TRUE;
1082 			if (SCTP_DATA_GET_EBIT(*dc)) {
1083 				/* Got all fragments of this message now */
1084 				goto frag_done;
1085 			}
1086 		}
1087 	}
1088 
1089 	/* grab the frag header of already queued tail frag for comparison */
1090 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1091 	ASSERT(qmp->b_cont == NULL);
1092 
1093 	/* check if the frag goes on the tail in order */
1094 	if (SEQ_GT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1095 		qmp->b_cont = dmp;
1096 		srp->sr_tail = dmp;
1097 		dmp->b_cont = NULL;
1098 		if (srp->sr_hasBchunk && srp->sr_nexttsn ==
1099 		    ntohl((*dc)->sdh_tsn)) {
1100 			srp->sr_msglen += ntohs((*dc)->sdh_len);
1101 			srp->sr_nexttsn++;
1102 		}
1103 		goto inserted;
1104 	}
1105 
1106 	/* Next check if we should insert this frag at the beginning */
1107 	qmp = reassq_curr->b_cont;
1108 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1109 	if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1110 		dmp->b_cont = qmp;
1111 		reassq_curr->b_cont = dmp;
1112 		if (SCTP_DATA_GET_BBIT(*dc)) {
1113 			srp->sr_hasBchunk = B_TRUE;
1114 			srp->sr_nexttsn = ntohl((*dc)->sdh_tsn);
1115 		}
1116 		goto preinserted;
1117 	}
1118 
1119 	/* Insert this frag in it's correct order in the middle */
1120 	for (;;) {
1121 		/* Tail check above should have caught this */
1122 		ASSERT(qmp->b_cont != NULL);
1123 
1124 		qdc = (sctp_data_hdr_t *)qmp->b_cont->b_rptr;
1125 		if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1126 			/* insert here */
1127 			dmp->b_cont = qmp->b_cont;
1128 			qmp->b_cont = dmp;
1129 			break;
1130 		}
1131 		qmp = qmp->b_cont;
1132 	}
1133 preinserted:
1134 	/*
1135 	 * Need head of message and to be due to deliver, otherwise skip
1136 	 * the recalculation of the message length below.
1137 	 */
1138 	if (!srp->sr_hasBchunk || ntohl((*dc)->sdh_tsn) != srp->sr_nexttsn)
1139 		goto inserted;
1140 	/*
1141 	 * fraglen contains the length of consecutive chunks of fragments.
1142 	 * starting from the chunk we just inserted.
1143 	 */
1144 	tsn = srp->sr_nexttsn;
1145 	for (qmp = dmp; qmp != NULL; qmp = qmp->b_cont) {
1146 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1147 		if (tsn != ntohl(qdc->sdh_tsn))
1148 			break;
1149 		fraglen += ntohs(qdc->sdh_len);
1150 		tsn++;
1151 	}
1152 	srp->sr_nexttsn = tsn;
1153 	srp->sr_msglen += fraglen;
1154 inserted:
1155 	srp->sr_got++;
1156 	first_mp = reassq_curr->b_cont;
1157 	/* Prior to this frag either the beginning or end frag was missing */
1158 	if (srp->sr_needed == 0) {
1159 		/* used to check if we have the first and last fragments */
1160 		bdc = (sctp_data_hdr_t *)first_mp->b_rptr;
1161 		edc = (sctp_data_hdr_t *)srp->sr_tail->b_rptr;
1162 
1163 		/*
1164 		 * If we now have both the beginning and the end of the message,
1165 		 * calculate how many fragments in the complete message.
1166 		 */
1167 		if (SCTP_DATA_GET_BBIT(bdc) && SCTP_DATA_GET_EBIT(edc)) {
1168 			srp->sr_needed = ntohl(edc->sdh_tsn) -
1169 			    ntohl(bdc->sdh_tsn) + 1;
1170 		}
1171 	}
1172 
1173 	/*
1174 	 * Try partial delivery if the message length has exceeded the
1175 	 * partial delivery point. Only do this if we can immediately
1176 	 * deliver the partially assembled message, and only partially
1177 	 * deliver one message at a time (i.e. messages cannot be
1178 	 * intermixed arriving at the upper layer).
1179 	 * sctp_try_partial_delivery() will return a message consisting
1180 	 * of only consecutive fragments.
1181 	 */
1182 	if (srp->sr_needed != srp->sr_got) {
1183 		/* we don't have the full message yet */
1184 		dmp = NULL;
1185 		if (ntohl((*dc)->sdh_tsn) <= sctp->sctp_ftsn &&
1186 		    srp->sr_msglen >= sctp->sctp_pd_point &&
1187 		    srp->sr_ssn == sip->nextseq) {
1188 			dmp = sctp_try_partial_delivery(sctp, reassq_curr,
1189 			    srp, dc);
1190 		}
1191 		*tpfinished = B_FALSE;
1192 		/*
1193 		 * NULL unless a segment of the message now qualified for
1194 		 * partial_delivery and has been prepared for delivery by
1195 		 * sctp_try_partial_delivery().
1196 		 */
1197 		return (dmp);
1198 	}
1199 frag_done:
1200 	/*
1201 	 * Reassembly complete for this message, prepare the data for delivery.
1202 	 * First unlink the reassembly queue for this ssn from the list of
1203 	 * messages in reassembly.
1204 	 */
1205 	if (sip->istr_reass == reassq_curr) {
1206 		sip->istr_reass = reassq_curr->b_next;
1207 		if (reassq_curr->b_next)
1208 			reassq_curr->b_next->b_prev = NULL;
1209 	} else {
1210 		ASSERT(reassq_curr->b_prev != NULL);
1211 		reassq_curr->b_prev->b_next = reassq_curr->b_next;
1212 		if (reassq_curr->b_next)
1213 			reassq_curr->b_next->b_prev = reassq_curr->b_prev;
1214 	}
1215 
1216 	/*
1217 	 * Need to clean up b_prev and b_next as freeb() will
1218 	 * ASSERT that they are unused.
1219 	 */
1220 	reassq_curr->b_next = NULL;
1221 	reassq_curr->b_prev = NULL;
1222 
1223 	dmp = reassq_curr;
1224 	/* point to the head of the reassembled data message */
1225 	dmp = dmp->b_cont;
1226 	reassq_curr->b_cont = NULL;
1227 	freeb(reassq_curr);
1228 	/* Tell our caller that we are returning a complete message. */
1229 	*tpfinished = B_TRUE;
1230 
1231 	/*
1232 	 * Adjust all mblk's except the lead so their rptr's point to the
1233 	 * payload. sctp_data_chunk() will need to process the lead's data
1234 	 * data chunk section, so leave its rptr pointing at the data chunk
1235 	 * header.
1236 	 */
1237 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
1238 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
1239 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1240 		qmp->b_rptr = (uchar_t *)(qdc + 1);
1241 	}
1242 	BUMP_LOCAL(sctp->sctp_reassmsgs);
1243 
1244 	return (dmp);
1245 }
1246 
1247 static void
1248 sctp_add_dup(uint32_t tsn, mblk_t **dups)
1249 {
1250 	mblk_t *mp;
1251 	size_t bsize = SCTP_DUP_MBLK_SZ * sizeof (tsn);
1252 
1253 	if (dups == NULL) {
1254 		return;
1255 	}
1256 
1257 	/* first time? */
1258 	if (*dups == NULL) {
1259 		*dups = allocb(bsize, BPRI_MED);
1260 		if (*dups == NULL) {
1261 			return;
1262 		}
1263 	}
1264 
1265 	mp = *dups;
1266 	if ((mp->b_wptr - mp->b_rptr) >= bsize) {
1267 		/* maximum reached */
1268 		return;
1269 	}
1270 
1271 	/* add the duplicate tsn */
1272 	bcopy(&tsn, mp->b_wptr, sizeof (tsn));
1273 	mp->b_wptr += sizeof (tsn);
1274 	ASSERT((mp->b_wptr - mp->b_rptr) <= bsize);
1275 }
1276 
1277 /*
1278  * All incoming sctp data, complete messages and fragments are handled by
1279  * this function. Unless the U-bit is set in the data chunk it will be
1280  * delivered in order or queued until an in-order delivery can be made.
1281  */
1282 static void
1283 sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
1284     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
1285 {
1286 	sctp_data_hdr_t *dc;
1287 	mblk_t *dmp, *pmp;
1288 	sctp_instr_t *instr;
1289 	int ubit;
1290 	int sid;
1291 	int isfrag;
1292 	uint16_t ssn;
1293 	uint32_t oftsn;
1294 	boolean_t can_deliver = B_TRUE;
1295 	uint32_t tsn;
1296 	int dlen;
1297 	boolean_t tpfinished = B_TRUE;
1298 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1299 	int	error;
1300 
1301 	/* The following are used multiple times, so we inline them */
1302 #define	SCTP_ACK_IT(sctp, tsn)						\
1303 	if (tsn == sctp->sctp_ftsn) {					\
1304 		dprint(2, ("data_chunk: acking next %x\n", tsn));	\
1305 		(sctp)->sctp_ftsn++;					\
1306 		if ((sctp)->sctp_sack_gaps > 0)				\
1307 			(sctp)->sctp_force_sack = 1;			\
1308 	} else if (SEQ_GT(tsn, sctp->sctp_ftsn)) {			\
1309 		/* Got a gap; record it */				\
1310 		BUMP_LOCAL(sctp->sctp_outseqtsns);			\
1311 		dprint(2, ("data_chunk: acking gap %x\n", tsn));	\
1312 		sctp_ack_add(&sctp->sctp_sack_info, tsn,		\
1313 		    &sctp->sctp_sack_gaps);				\
1314 		sctp->sctp_force_sack = 1;				\
1315 	}
1316 
1317 	dmp = NULL;
1318 
1319 	dc = (sctp_data_hdr_t *)ch;
1320 	tsn = ntohl(dc->sdh_tsn);
1321 
1322 	dprint(3, ("sctp_data_chunk: mp=%p tsn=%x\n", (void *)mp, tsn));
1323 
1324 	/* Check for duplicates */
1325 	if (SEQ_LT(tsn, sctp->sctp_ftsn)) {
1326 		dprint(4, ("sctp_data_chunk: dropping duplicate\n"));
1327 		BUMP_LOCAL(sctp->sctp_idupchunks);
1328 		sctp->sctp_force_sack = 1;
1329 		sctp_add_dup(dc->sdh_tsn, dups);
1330 		return;
1331 	}
1332 
1333 	/* Check for dups of sack'ed data */
1334 	if (sctp->sctp_sack_info != NULL) {
1335 		sctp_set_t *sp;
1336 
1337 		for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1338 			if (SEQ_GEQ(tsn, sp->begin) && SEQ_LEQ(tsn, sp->end)) {
1339 				dprint(4,
1340 				    ("sctp_data_chunk: dropping dup > "
1341 				    "cumtsn\n"));
1342 				BUMP_LOCAL(sctp->sctp_idupchunks);
1343 				sctp->sctp_force_sack = 1;
1344 				sctp_add_dup(dc->sdh_tsn, dups);
1345 				return;
1346 			}
1347 		}
1348 	}
1349 
1350 	/* We can no longer deliver anything up, but still need to handle it. */
1351 	if (SCTP_IS_DETACHED(sctp)) {
1352 		SCTPS_BUMP_MIB(sctps, sctpInClosed);
1353 		can_deliver = B_FALSE;
1354 	}
1355 
1356 	dlen = ntohs(dc->sdh_len) - sizeof (*dc);
1357 
1358 	/*
1359 	 * Check for buffer space. Note if this is the next expected TSN
1360 	 * we have to take it to avoid deadlock because we cannot deliver
1361 	 * later queued TSNs and thus clear buffer space without it.
1362 	 * We drop anything that is purely zero window probe data here.
1363 	 */
1364 	if ((sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) &&
1365 	    (tsn != sctp->sctp_ftsn || sctp->sctp_rwnd == 0)) {
1366 		/* Drop and SACK, but don't advance the cumulative TSN. */
1367 		sctp->sctp_force_sack = 1;
1368 		dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
1369 		    "dlen %d ssn %d tsn %x\n", sctp->sctp_rwnd,
1370 		    sctp->sctp_rxqueued, dlen, ntohs(dc->sdh_ssn),
1371 		    ntohl(dc->sdh_tsn)));
1372 		return;
1373 	}
1374 
1375 	sid = ntohs(dc->sdh_sid);
1376 
1377 	/* Data received for a stream not negotiated for this association */
1378 	if (sid >= sctp->sctp_num_istr) {
1379 		sctp_bsc_t	inval_parm;
1380 
1381 		/* Will populate the CAUSE block in the ERROR chunk. */
1382 		inval_parm.bsc_sid = dc->sdh_sid;
1383 		/* RESERVED, ignored at the receiving end */
1384 		inval_parm.bsc_pad = 0;
1385 
1386 		/* ack and drop it */
1387 		sctp_add_err(sctp, SCTP_ERR_BAD_SID, (void *)&inval_parm,
1388 		    sizeof (sctp_bsc_t), fp);
1389 		SCTP_ACK_IT(sctp, tsn);
1390 		return;
1391 	}
1392 
1393 	/* unordered delivery OK for this data if ubit set */
1394 	ubit = SCTP_DATA_GET_UBIT(dc);
1395 	ASSERT(sctp->sctp_instr != NULL);
1396 
1397 	/* select per stream structure for this stream from the array */
1398 	instr = &sctp->sctp_instr[sid];
1399 	/* Initialize the stream, if not yet used */
1400 	if (instr->sctp == NULL)
1401 		instr->sctp = sctp;
1402 
1403 	/* Begin and End bit set would mean a complete message */
1404 	isfrag = !(SCTP_DATA_GET_BBIT(dc) && SCTP_DATA_GET_EBIT(dc));
1405 
1406 	/* The ssn of this sctp message and of any fragments in it */
1407 	ssn = ntohs(dc->sdh_ssn);
1408 
1409 	dmp = dupb(mp);
1410 	if (dmp == NULL) {
1411 		/* drop it and don't ack, let the peer retransmit */
1412 		return;
1413 	}
1414 	/*
1415 	 * Past header and payload, note: the underlying buffer may
1416 	 * contain further chunks from the same incoming IP packet,
1417 	 * if so db_ref will be greater than one.
1418 	 */
1419 	dmp->b_wptr = (uchar_t *)ch + ntohs(ch->sch_len);
1420 
1421 	sctp->sctp_rxqueued += dlen;
1422 
1423 	oftsn = sctp->sctp_ftsn;
1424 
1425 	if (isfrag) {
1426 
1427 		error = 0;
1428 		/* fragmented data chunk */
1429 		dmp->b_rptr = (uchar_t *)dc;
1430 		if (ubit) {
1431 			/* prepare data for unordered delivery */
1432 			dmp = sctp_uodata_frag(sctp, dmp, &dc);
1433 #if	DEBUG
1434 			if (dmp != NULL) {
1435 				ASSERT(instr ==
1436 				    &sctp->sctp_instr[sid]);
1437 			}
1438 #endif
1439 		} else {
1440 			/*
1441 			 * Assemble fragments and queue for ordered delivery,
1442 			 * dmp returned is NULL or the head of a complete or
1443 			 * "partial delivery" message. Any returned message
1444 			 * and all its fragments will have the same ssn as the
1445 			 * input fragment currently being handled.
1446 			 */
1447 			dmp = sctp_data_frag(sctp, dmp, &dc, &error, instr,
1448 			    &tpfinished);
1449 		}
1450 		if (error == ENOMEM) {
1451 			/* back out the adjustment made earlier */
1452 			sctp->sctp_rxqueued -= dlen;
1453 			/*
1454 			 * Don't ack the segment,
1455 			 * the peer will retransmit.
1456 			 */
1457 			return;
1458 		}
1459 
1460 		if (dmp == NULL) {
1461 			/*
1462 			 * The frag has been queued for later in-order delivery,
1463 			 * but the cumulative TSN may need to advance, so also
1464 			 * need to perform the gap ack checks at the done label.
1465 			 */
1466 			SCTP_ACK_IT(sctp, tsn);
1467 			DTRACE_PROBE4(sctp_data_frag_queued, sctp_t *, sctp,
1468 			    int, sid, int, tsn, uint16_t, ssn);
1469 			goto done;
1470 		}
1471 	}
1472 
1473 	/*
1474 	 * Unless message is the next for delivery to the ulp, queue complete
1475 	 * message in the correct order for ordered delivery.
1476 	 * Note: tpfinished is true when the incoming chunk contains a complete
1477 	 * message or is the final missing fragment which completed a message.
1478 	 */
1479 	if (!ubit && tpfinished && ssn != instr->nextseq) {
1480 		/* Adjust rptr to point at the data chunk for compares */
1481 		dmp->b_rptr = (uchar_t *)dc;
1482 
1483 		dprint(2,
1484 		    ("data_chunk: inserted %x in pq (ssn %d expected %d)\n",
1485 		    ntohl(dc->sdh_tsn), (int)(ssn), (int)(instr->nextseq)));
1486 
1487 		if (instr->istr_msgs == NULL) {
1488 			instr->istr_msgs = dmp;
1489 			ASSERT(dmp->b_prev == NULL && dmp->b_next == NULL);
1490 		} else {
1491 			mblk_t			*imblk = instr->istr_msgs;
1492 			sctp_data_hdr_t		*idc;
1493 
1494 			/*
1495 			 * XXXNeed to take sequence wraps into account,
1496 			 * ... and a more efficient insertion algo.
1497 			 */
1498 			for (;;) {
1499 				idc = (sctp_data_hdr_t *)imblk->b_rptr;
1500 				if (SSN_GT(ntohs(idc->sdh_ssn),
1501 				    ntohs(dc->sdh_ssn))) {
1502 					if (instr->istr_msgs == imblk) {
1503 						instr->istr_msgs = dmp;
1504 						dmp->b_next = imblk;
1505 						imblk->b_prev = dmp;
1506 					} else {
1507 						ASSERT(imblk->b_prev != NULL);
1508 						imblk->b_prev->b_next = dmp;
1509 						dmp->b_prev = imblk->b_prev;
1510 						imblk->b_prev = dmp;
1511 						dmp->b_next = imblk;
1512 					}
1513 					break;
1514 				}
1515 				if (imblk->b_next == NULL) {
1516 					imblk->b_next = dmp;
1517 					dmp->b_prev = imblk;
1518 					break;
1519 				}
1520 				imblk = imblk->b_next;
1521 			}
1522 		}
1523 		(instr->istr_nmsgs)++;
1524 		(sctp->sctp_istr_nmsgs)++;
1525 		SCTP_ACK_IT(sctp, tsn);
1526 		DTRACE_PROBE4(sctp_pqueue_completemsg, sctp_t *, sctp,
1527 		    int, sid, int, tsn, uint16_t, ssn);
1528 		return;
1529 	}
1530 
1531 	/*
1532 	 * Deliver the data directly. Recalculate dlen now since
1533 	 * we may have just reassembled this data.
1534 	 */
1535 	dlen = dmp->b_wptr - (uchar_t *)dc - sizeof (*dc);
1536 	for (pmp = dmp->b_cont; pmp != NULL; pmp = pmp->b_cont)
1537 		dlen += MBLKL(pmp);
1538 	ASSERT(sctp->sctp_rxqueued >= dlen);
1539 
1540 	/* Deliver the message. */
1541 	sctp->sctp_rxqueued -= dlen;
1542 
1543 	if (can_deliver) {
1544 		/* step past header to the payload */
1545 		dmp->b_rptr = (uchar_t *)(dc + 1);
1546 		if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1547 		    ipp, ira) == 0) {
1548 			dprint(1, ("sctp_data_chunk: delivering %lu bytes\n",
1549 			    msgdsize(dmp)));
1550 			/*
1551 			 * We overload the meaning of b_flag for SCTP sockfs
1552 			 * internal use, to advise sockfs of partial delivery
1553 			 * semantics.
1554 			 */
1555 			dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
1556 			if (sctp->sctp_flowctrld) {
1557 				sctp->sctp_rwnd -= dlen;
1558 				if (sctp->sctp_rwnd < 0)
1559 					sctp->sctp_rwnd = 0;
1560 			}
1561 			if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1562 			    msgdsize(dmp), 0, &error, NULL) <= 0) {
1563 				sctp->sctp_flowctrld = B_TRUE;
1564 			}
1565 			SCTP_ACK_IT(sctp, tsn);
1566 		} else {
1567 			/* No memory don't ack, the peer will retransmit. */
1568 			freemsg(dmp);
1569 			return;
1570 		}
1571 	} else {
1572 		/* Closed above, ack to peer and free the data */
1573 		freemsg(dmp);
1574 		SCTP_ACK_IT(sctp, tsn);
1575 	}
1576 
1577 	/*
1578 	 * Data now enqueued, may already have been processed and free'd
1579 	 * by the ULP (or we may have just freed it above, if we could not
1580 	 * deliver), so we must not reference it (this is why we saved the
1581 	 * ssn and ubit earlier).
1582 	 */
1583 	if (ubit != 0) {
1584 		BUMP_LOCAL(sctp->sctp_iudchunks);
1585 		goto done;
1586 	}
1587 	BUMP_LOCAL(sctp->sctp_idchunks);
1588 
1589 	/*
1590 	 * There was a partial delivery and it has not finished,
1591 	 * don't pull anything from the pqueues or increment the
1592 	 * nextseq. This msg must complete before starting on
1593 	 * the next ssn and the partial message must have the
1594 	 * same ssn as the next expected message..
1595 	 */
1596 	if (!tpfinished) {
1597 		DTRACE_PROBE4(sctp_partial_delivery, sctp_t *, sctp,
1598 		    int, sid, int, tsn, uint16_t, ssn);
1599 		/*
1600 		 * Verify the partial delivery is part of the
1601 		 * message expected for ordered delivery.
1602 		 */
1603 		if (ssn != instr->nextseq) {
1604 			DTRACE_PROBE4(sctp_partial_delivery_error,
1605 			    sctp_t *, sctp, int, sid, int, tsn,
1606 			    uint16_t, ssn);
1607 			cmn_err(CE_WARN, "sctp partial"
1608 			    " delivery error, sctp 0x%p"
1609 			    " sid = 0x%x ssn != nextseq"
1610 			    " tsn 0x%x ftsn 0x%x"
1611 			    " ssn 0x%x nextseq 0x%x",
1612 			    (void *)sctp, sid,
1613 			    tsn, sctp->sctp_ftsn, ssn,
1614 			    instr->nextseq);
1615 		}
1616 
1617 		ASSERT(ssn == instr->nextseq);
1618 		goto done;
1619 	}
1620 
1621 	if (ssn != instr->nextseq) {
1622 		DTRACE_PROBE4(sctp_inorder_delivery_error,
1623 		    sctp_t *, sctp, int, sid, int, tsn,
1624 		    uint16_t, ssn);
1625 		cmn_err(CE_WARN, "sctp in-order delivery error, sctp 0x%p "
1626 		    "sid = 0x%x ssn != nextseq ssn 0x%x nextseq 0x%x",
1627 		    (void *)sctp, sid, ssn, instr->nextseq);
1628 	}
1629 
1630 	ASSERT(ssn == instr->nextseq);
1631 
1632 	DTRACE_PROBE4(sctp_deliver_completemsg, sctp_t *, sctp, int, sid,
1633 	    int, tsn, uint16_t, ssn);
1634 
1635 	instr->nextseq = ssn + 1;
1636 
1637 	/*
1638 	 * Deliver any successive data chunks waiting in the instr pqueue
1639 	 * for the data just sent up.
1640 	 */
1641 	while (instr->istr_nmsgs > 0) {
1642 		dmp = (mblk_t *)instr->istr_msgs;
1643 		dc = (sctp_data_hdr_t *)dmp->b_rptr;
1644 		ssn = ntohs(dc->sdh_ssn);
1645 		tsn = ntohl(dc->sdh_tsn);
1646 		/* Stop at the first gap in the sequence */
1647 		if (ssn != instr->nextseq)
1648 			break;
1649 
1650 		DTRACE_PROBE4(sctp_deliver_pqueuedmsg, sctp_t *, sctp,
1651 		    int, sid, int, tsn, uint16_t, ssn);
1652 		/*
1653 		 * Ready to deliver all data before the gap
1654 		 * to the upper layer.
1655 		 */
1656 		(instr->istr_nmsgs)--;
1657 		(instr->nextseq)++;
1658 		(sctp->sctp_istr_nmsgs)--;
1659 
1660 		instr->istr_msgs = instr->istr_msgs->b_next;
1661 		if (instr->istr_msgs != NULL)
1662 			instr->istr_msgs->b_prev = NULL;
1663 		dmp->b_next = dmp->b_prev = NULL;
1664 
1665 		dprint(2, ("data_chunk: pulling %x from pq (ssn %d)\n",
1666 		    ntohl(dc->sdh_tsn), (int)ssn));
1667 
1668 		/*
1669 		 * Composite messages indicate this chunk was reassembled,
1670 		 * each b_cont represents another TSN; Follow the chain to
1671 		 * reach the frag with the last tsn in order to advance ftsn
1672 		 * shortly by calling SCTP_ACK_IT().
1673 		 */
1674 		dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
1675 		for (pmp = dmp->b_cont; pmp; pmp = pmp->b_cont)
1676 			dlen += MBLKL(pmp);
1677 
1678 		ASSERT(sctp->sctp_rxqueued >= dlen);
1679 
1680 		sctp->sctp_rxqueued -= dlen;
1681 		if (can_deliver) {
1682 			dmp->b_rptr = (uchar_t *)(dc + 1);
1683 			if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1684 			    ipp, ira) == 0) {
1685 				dprint(1, ("sctp_data_chunk: delivering %lu "
1686 				    "bytes\n", msgdsize(dmp)));
1687 				/*
1688 				 * Meaning of b_flag overloaded for SCTP sockfs
1689 				 * internal use, advise sockfs of partial
1690 				 * delivery semantics.
1691 				 */
1692 				dmp->b_flag = tpfinished ?
1693 				    0 : SCTP_PARTIAL_DATA;
1694 				if (sctp->sctp_flowctrld) {
1695 					sctp->sctp_rwnd -= dlen;
1696 					if (sctp->sctp_rwnd < 0)
1697 						sctp->sctp_rwnd = 0;
1698 				}
1699 				if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1700 				    msgdsize(dmp), 0, &error, NULL) <= 0) {
1701 					sctp->sctp_flowctrld = B_TRUE;
1702 				}
1703 				SCTP_ACK_IT(sctp, tsn);
1704 			} else {
1705 				/* don't ack, the peer will retransmit */
1706 				freemsg(dmp);
1707 				return;
1708 			}
1709 		} else {
1710 			/* Closed above, ack and free the data */
1711 			freemsg(dmp);
1712 			SCTP_ACK_IT(sctp, tsn);
1713 		}
1714 	}
1715 
1716 done:
1717 
1718 	/*
1719 	 * If there are gap reports pending, check if advancing
1720 	 * the ftsn here closes a gap. If so, we can advance
1721 	 * ftsn to the end of the set.
1722 	 */
1723 	if (sctp->sctp_sack_info != NULL &&
1724 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
1725 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
1726 	}
1727 	/*
1728 	 * If ftsn has moved forward, maybe we can remove gap reports.
1729 	 * NB: dmp may now be NULL, so don't dereference it here.
1730 	 */
1731 	if (oftsn != sctp->sctp_ftsn && sctp->sctp_sack_info != NULL) {
1732 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
1733 		    &sctp->sctp_sack_gaps);
1734 		dprint(2, ("data_chunk: removed acks before %x (num=%d)\n",
1735 		    sctp->sctp_ftsn - 1, sctp->sctp_sack_gaps));
1736 	}
1737 
1738 #ifdef	DEBUG
1739 	if (sctp->sctp_sack_info != NULL) {
1740 		ASSERT(sctp->sctp_ftsn != sctp->sctp_sack_info->begin);
1741 	}
1742 #endif
1743 
1744 #undef	SCTP_ACK_IT
1745 }
1746 
1747 void
1748 sctp_fill_sack(sctp_t *sctp, unsigned char *dst, int sacklen)
1749 {
1750 	sctp_chunk_hdr_t *sch;
1751 	sctp_sack_chunk_t *sc;
1752 	sctp_sack_frag_t *sf;
1753 	uint16_t num_gaps = sctp->sctp_sack_gaps;
1754 	sctp_set_t *sp;
1755 
1756 	/* Chunk hdr */
1757 	sch = (sctp_chunk_hdr_t *)dst;
1758 	sch->sch_id = CHUNK_SACK;
1759 	sch->sch_flags = 0;
1760 	sch->sch_len = htons(sacklen);
1761 
1762 	/* SACK chunk */
1763 	sctp->sctp_lastacked = sctp->sctp_ftsn - 1;
1764 
1765 	sc = (sctp_sack_chunk_t *)(sch + 1);
1766 	sc->ssc_cumtsn = htonl(sctp->sctp_lastacked);
1767 	if (sctp->sctp_rxqueued < sctp->sctp_rwnd) {
1768 		sc->ssc_a_rwnd = htonl(sctp->sctp_rwnd - sctp->sctp_rxqueued);
1769 	} else {
1770 		sc->ssc_a_rwnd = 0;
1771 	}
1772 	/* Remember the last window sent to peer. */
1773 	sctp->sctp_arwnd = sc->ssc_a_rwnd;
1774 	sc->ssc_numfrags = htons(num_gaps);
1775 	sc->ssc_numdups = 0;
1776 
1777 	/* lay in gap reports */
1778 	sf = (sctp_sack_frag_t *)(sc + 1);
1779 	for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1780 		uint16_t offset;
1781 
1782 		/* start */
1783 		if (sp->begin > sctp->sctp_lastacked) {
1784 			offset = (uint16_t)(sp->begin - sctp->sctp_lastacked);
1785 		} else {
1786 			/* sequence number wrap */
1787 			offset = (uint16_t)(UINT32_MAX - sctp->sctp_lastacked +
1788 			    sp->begin);
1789 		}
1790 		sf->ssf_start = htons(offset);
1791 
1792 		/* end */
1793 		if (sp->end >= sp->begin) {
1794 			offset += (uint16_t)(sp->end - sp->begin);
1795 		} else {
1796 			/* sequence number wrap */
1797 			offset += (uint16_t)(UINT32_MAX - sp->begin + sp->end);
1798 		}
1799 		sf->ssf_end = htons(offset);
1800 
1801 		sf++;
1802 		/* This is just for debugging (a la the following assertion) */
1803 		num_gaps--;
1804 	}
1805 
1806 	ASSERT(num_gaps == 0);
1807 
1808 	/* If the SACK timer is running, stop it */
1809 	if (sctp->sctp_ack_timer_running) {
1810 		sctp_timer_stop(sctp->sctp_ack_mp);
1811 		sctp->sctp_ack_timer_running = B_FALSE;
1812 	}
1813 
1814 	BUMP_LOCAL(sctp->sctp_obchunks);
1815 	BUMP_LOCAL(sctp->sctp_osacks);
1816 }
1817 
1818 mblk_t *
1819 sctp_make_sack(sctp_t *sctp, sctp_faddr_t *sendto, mblk_t *dups)
1820 {
1821 	mblk_t *smp;
1822 	size_t slen;
1823 	sctp_chunk_hdr_t *sch;
1824 	sctp_sack_chunk_t *sc;
1825 	int32_t acks_max;
1826 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1827 	uint32_t	dups_len;
1828 	sctp_faddr_t	*fp;
1829 
1830 	ASSERT(sendto != NULL);
1831 
1832 	if (sctp->sctp_force_sack) {
1833 		sctp->sctp_force_sack = 0;
1834 		goto checks_done;
1835 	}
1836 
1837 	acks_max = sctps->sctps_deferred_acks_max;
1838 	if (sctp->sctp_state == SCTPS_ESTABLISHED) {
1839 		if (sctp->sctp_sack_toggle < acks_max) {
1840 			/* no need to SACK right now */
1841 			dprint(2, ("sctp_make_sack: %p no sack (toggle)\n",
1842 			    (void *)sctp));
1843 			return (NULL);
1844 		} else if (sctp->sctp_sack_toggle >= acks_max) {
1845 			sctp->sctp_sack_toggle = 0;
1846 		}
1847 	}
1848 
1849 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1850 		dprint(2, ("sctp_make_sack: %p no sack (already)\n",
1851 		    (void *)sctp));
1852 		return (NULL);
1853 	}
1854 
1855 checks_done:
1856 	dprint(2, ("sctp_make_sack: acking %x\n", sctp->sctp_ftsn - 1));
1857 
1858 	if (dups != NULL)
1859 		dups_len = MBLKL(dups);
1860 	else
1861 		dups_len = 0;
1862 	slen = sizeof (*sch) + sizeof (*sc) +
1863 	    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1864 
1865 	/*
1866 	 * If there are error chunks, check and see if we can send the
1867 	 * SACK chunk and error chunks together in one packet.  If not,
1868 	 * send the error chunks out now.
1869 	 */
1870 	if (sctp->sctp_err_chunks != NULL) {
1871 		fp = SCTP_CHUNK_DEST(sctp->sctp_err_chunks);
1872 		if (sctp->sctp_err_len + slen + dups_len > fp->sf_pmss) {
1873 			if ((smp = sctp_make_mp(sctp, fp, 0)) == NULL) {
1874 				SCTP_KSTAT(sctps, sctp_send_err_failed);
1875 				SCTP_KSTAT(sctps, sctp_send_sack_failed);
1876 				freemsg(sctp->sctp_err_chunks);
1877 				sctp->sctp_err_chunks = NULL;
1878 				sctp->sctp_err_len = 0;
1879 				return (NULL);
1880 			}
1881 			smp->b_cont = sctp->sctp_err_chunks;
1882 			sctp_set_iplen(sctp, smp, fp->sf_ixa);
1883 			(void) conn_ip_output(smp, fp->sf_ixa);
1884 			BUMP_LOCAL(sctp->sctp_opkts);
1885 			sctp->sctp_err_chunks = NULL;
1886 			sctp->sctp_err_len = 0;
1887 		}
1888 	}
1889 	smp = sctp_make_mp(sctp, sendto, slen);
1890 	if (smp == NULL) {
1891 		SCTP_KSTAT(sctps, sctp_send_sack_failed);
1892 		return (NULL);
1893 	}
1894 	sch = (sctp_chunk_hdr_t *)smp->b_wptr;
1895 
1896 	sctp_fill_sack(sctp, smp->b_wptr, slen);
1897 	smp->b_wptr += slen;
1898 	if (dups != NULL) {
1899 		sc = (sctp_sack_chunk_t *)(sch + 1);
1900 		sc->ssc_numdups = htons(MBLKL(dups) / sizeof (uint32_t));
1901 		sch->sch_len = htons(slen + dups_len);
1902 		smp->b_cont = dups;
1903 	}
1904 
1905 	if (sctp->sctp_err_chunks != NULL) {
1906 		linkb(smp, sctp->sctp_err_chunks);
1907 		sctp->sctp_err_chunks = NULL;
1908 		sctp->sctp_err_len = 0;
1909 	}
1910 	return (smp);
1911 }
1912 
1913 /*
1914  * Check and see if we need to send a SACK chunk.  If it is needed,
1915  * send it out.  Return true if a SACK chunk is sent, false otherwise.
1916  */
1917 boolean_t
1918 sctp_sack(sctp_t *sctp, mblk_t *dups)
1919 {
1920 	mblk_t *smp;
1921 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1922 
1923 	/* If we are shutting down, let send_shutdown() bundle the SACK */
1924 	if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
1925 		sctp_send_shutdown(sctp, 0);
1926 	}
1927 
1928 	ASSERT(sctp->sctp_lastdata != NULL);
1929 
1930 	if ((smp = sctp_make_sack(sctp, sctp->sctp_lastdata, dups)) == NULL) {
1931 		/* The caller of sctp_sack() will not free the dups mblk. */
1932 		if (dups != NULL)
1933 			freeb(dups);
1934 		return (B_FALSE);
1935 	}
1936 	dprint(2, ("sctp_sack: sending to %p %x:%x:%x:%x\n",
1937 	    (void *)sctp->sctp_lastdata,
1938 	    SCTP_PRINTADDR(sctp->sctp_lastdata->sf_faddr)));
1939 
1940 	sctp->sctp_active = LBOLT_FASTPATH64;
1941 
1942 	SCTPS_BUMP_MIB(sctps, sctpOutAck);
1943 
1944 	sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->sf_ixa);
1945 	(void) conn_ip_output(smp, sctp->sctp_lastdata->sf_ixa);
1946 	BUMP_LOCAL(sctp->sctp_opkts);
1947 	return (B_TRUE);
1948 }
1949 
1950 /*
1951  * This is called if we have a message that was partially sent and is
1952  * abandoned. The cum TSN will be the last chunk sent for this message,
1953  * subsequent chunks will be marked ABANDONED. We send a Forward TSN
1954  * chunk in this case with the TSN of the last sent chunk so that the
1955  * peer can clean up its fragment list for this message. This message
1956  * will be removed from the transmit list when the peer sends a SACK
1957  * back.
1958  */
1959 int
1960 sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta)
1961 {
1962 	sctp_data_hdr_t	*dh;
1963 	mblk_t		*nmp;
1964 	mblk_t		*head;
1965 	int32_t		unsent = 0;
1966 	mblk_t		*mp1 = meta->b_cont;
1967 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1968 	sctp_faddr_t	*fp = sctp->sctp_current;
1969 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1970 
1971 	dh = (sctp_data_hdr_t *)mp1->b_rptr;
1972 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, ntohl(dh->sdh_tsn))) {
1973 		sctp_ftsn_set_t	*sets = NULL;
1974 		uint_t		nsets = 0;
1975 		uint32_t	seglen = sizeof (uint32_t);
1976 		boolean_t	ubit = SCTP_DATA_GET_UBIT(dh);
1977 
1978 		while (mp1->b_next != NULL && SCTP_CHUNK_ISSENT(mp1->b_next))
1979 			mp1 = mp1->b_next;
1980 		dh = (sctp_data_hdr_t *)mp1->b_rptr;
1981 		sctp->sctp_adv_pap = ntohl(dh->sdh_tsn);
1982 		if (!ubit &&
1983 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, &seglen)) {
1984 			sctp->sctp_adv_pap = adv_pap;
1985 			return (ENOMEM);
1986 		}
1987 		nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, seglen);
1988 		sctp_free_ftsn_set(sets);
1989 		if (nmp == NULL) {
1990 			sctp->sctp_adv_pap = adv_pap;
1991 			return (ENOMEM);
1992 		}
1993 		head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
1994 		if (head == NULL) {
1995 			sctp->sctp_adv_pap = adv_pap;
1996 			freemsg(nmp);
1997 			SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1998 			return (ENOMEM);
1999 		}
2000 		SCTP_MSG_SET_ABANDONED(meta);
2001 		sctp_set_iplen(sctp, head, fp->sf_ixa);
2002 		(void) conn_ip_output(head, fp->sf_ixa);
2003 		BUMP_LOCAL(sctp->sctp_opkts);
2004 		if (!fp->sf_timer_running)
2005 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2006 		mp1 = mp1->b_next;
2007 		while (mp1 != NULL) {
2008 			ASSERT(!SCTP_CHUNK_ISSENT(mp1));
2009 			ASSERT(!SCTP_CHUNK_ABANDONED(mp1));
2010 			SCTP_ABANDON_CHUNK(mp1);
2011 			dh = (sctp_data_hdr_t *)mp1->b_rptr;
2012 			unsent += ntohs(dh->sdh_len) - sizeof (*dh);
2013 			mp1 = mp1->b_next;
2014 		}
2015 		ASSERT(sctp->sctp_unsent >= unsent);
2016 		sctp->sctp_unsent -= unsent;
2017 		/*
2018 		 * Update ULP the amount of queued data, which is
2019 		 * sent-unack'ed + unsent.
2020 		 */
2021 		if (!SCTP_IS_DETACHED(sctp))
2022 			SCTP_TXQ_UPDATE(sctp);
2023 		return (0);
2024 	}
2025 	return (-1);
2026 }
2027 
2028 uint32_t
2029 sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked)
2030 {
2031 	mblk_t *ump, *nump, *mp = NULL;
2032 	uint16_t chunklen;
2033 	uint32_t xtsn;
2034 	sctp_faddr_t *fp;
2035 	sctp_data_hdr_t *sdc;
2036 	uint32_t cumack_forward = 0;
2037 	sctp_msg_hdr_t	*mhdr;
2038 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2039 
2040 	ump = sctp->sctp_xmit_head;
2041 
2042 	/*
2043 	 * Free messages only when they're completely acked.
2044 	 */
2045 	while (ump != NULL) {
2046 		mhdr = (sctp_msg_hdr_t *)ump->b_rptr;
2047 		for (mp = ump->b_cont; mp != NULL; mp = mp->b_next) {
2048 			if (SCTP_CHUNK_ABANDONED(mp)) {
2049 				ASSERT(SCTP_IS_MSG_ABANDONED(ump));
2050 				mp = NULL;
2051 				break;
2052 			}
2053 			/*
2054 			 * We check for abandoned message if we are PR-SCTP
2055 			 * aware, if this is not the first chunk in the
2056 			 * message (b_cont) and if the message is marked
2057 			 * abandoned.
2058 			 */
2059 			if (!SCTP_CHUNK_ISSENT(mp)) {
2060 				if (sctp->sctp_prsctp_aware &&
2061 				    mp != ump->b_cont &&
2062 				    (SCTP_IS_MSG_ABANDONED(ump) ||
2063 				    SCTP_MSG_TO_BE_ABANDONED(ump, mhdr,
2064 				    sctp))) {
2065 					(void) sctp_check_abandoned_msg(sctp,
2066 					    ump);
2067 				}
2068 				goto cum_ack_done;
2069 			}
2070 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2071 			xtsn = ntohl(sdc->sdh_tsn);
2072 			if (SEQ_GEQ(sctp->sctp_lastack_rxd, xtsn))
2073 				continue;
2074 			if (SEQ_GEQ(tsn, xtsn)) {
2075 				fp = SCTP_CHUNK_DEST(mp);
2076 				chunklen = ntohs(sdc->sdh_len);
2077 
2078 				if (sctp->sctp_out_time != 0 &&
2079 				    xtsn == sctp->sctp_rtt_tsn) {
2080 					/* Got a new RTT measurement */
2081 					sctp_update_rtt(sctp, fp,
2082 					    ddi_get_lbolt64() -
2083 					    sctp->sctp_out_time);
2084 					sctp->sctp_out_time = 0;
2085 				}
2086 				if (SCTP_CHUNK_ISACKED(mp))
2087 					continue;
2088 				SCTP_CHUNK_SET_SACKCNT(mp, 0);
2089 				SCTP_CHUNK_ACKED(mp);
2090 				ASSERT(fp->sf_suna >= chunklen);
2091 				fp->sf_suna -= chunklen;
2092 				fp->sf_acked += chunklen;
2093 				cumack_forward += chunklen;
2094 				ASSERT(sctp->sctp_unacked >=
2095 				    (chunklen - sizeof (*sdc)));
2096 				sctp->sctp_unacked -=
2097 				    (chunklen - sizeof (*sdc));
2098 				if (fp->sf_suna == 0) {
2099 					/* all outstanding data acked */
2100 					fp->sf_pba = 0;
2101 					SCTP_FADDR_TIMER_STOP(fp);
2102 				} else {
2103 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2104 					    fp->sf_rto);
2105 				}
2106 			} else {
2107 				goto cum_ack_done;
2108 			}
2109 		}
2110 		nump = ump->b_next;
2111 		if (nump != NULL)
2112 			nump->b_prev = NULL;
2113 		if (ump == sctp->sctp_xmit_tail)
2114 			sctp->sctp_xmit_tail = nump;
2115 		if (SCTP_IS_MSG_ABANDONED(ump)) {
2116 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
2117 			ump->b_next = NULL;
2118 			sctp_sendfail_event(sctp, ump, 0, B_TRUE);
2119 		} else {
2120 			sctp_free_msg(ump);
2121 		}
2122 		sctp->sctp_xmit_head = ump = nump;
2123 	}
2124 cum_ack_done:
2125 	*first_unacked = mp;
2126 	if (cumack_forward > 0) {
2127 		SCTPS_BUMP_MIB(sctps, sctpInAck);
2128 		if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) {
2129 			sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd;
2130 		}
2131 
2132 		/*
2133 		 * Update ULP the amount of queued data, which is
2134 		 * sent-unack'ed + unsent.
2135 		 */
2136 		if (!SCTP_IS_DETACHED(sctp))
2137 			SCTP_TXQ_UPDATE(sctp);
2138 
2139 		/* Time to send a shutdown? */
2140 		if (sctp->sctp_state == SCTPS_SHUTDOWN_PENDING) {
2141 			sctp_send_shutdown(sctp, 0);
2142 		}
2143 		sctp->sctp_xmit_unacked = mp;
2144 	} else {
2145 		/* dup ack */
2146 		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2147 	}
2148 	sctp->sctp_lastack_rxd = tsn;
2149 	if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd))
2150 		sctp->sctp_adv_pap = sctp->sctp_lastack_rxd;
2151 	ASSERT(sctp->sctp_xmit_head || sctp->sctp_unacked == 0);
2152 
2153 	return (cumack_forward);
2154 }
2155 
2156 static int
2157 sctp_set_frwnd(sctp_t *sctp, uint32_t frwnd)
2158 {
2159 	uint32_t orwnd;
2160 
2161 	if (sctp->sctp_unacked > frwnd) {
2162 		sctp->sctp_frwnd = 0;
2163 		return (0);
2164 	}
2165 	orwnd = sctp->sctp_frwnd;
2166 	sctp->sctp_frwnd = frwnd - sctp->sctp_unacked;
2167 	if (orwnd < sctp->sctp_frwnd) {
2168 		return (1);
2169 	} else {
2170 		return (0);
2171 	}
2172 }
2173 
2174 /*
2175  * For un-ordered messages.
2176  * Walk the sctp->sctp_uo_frag list and remove any fragments with TSN
2177  * less than/equal to ftsn. Fragments for un-ordered messages are
2178  * strictly in sequence (w.r.t TSN).
2179  */
2180 static int
2181 sctp_ftsn_check_uo_frag(sctp_t *sctp, uint32_t ftsn)
2182 {
2183 	mblk_t		*hmp;
2184 	mblk_t		*hmp_next;
2185 	sctp_data_hdr_t	*dc;
2186 	int		dlen = 0;
2187 
2188 	hmp = sctp->sctp_uo_frags;
2189 	while (hmp != NULL) {
2190 		hmp_next = hmp->b_next;
2191 		dc = (sctp_data_hdr_t *)hmp->b_rptr;
2192 		if (SEQ_GT(ntohl(dc->sdh_tsn), ftsn))
2193 			return (dlen);
2194 		sctp->sctp_uo_frags = hmp_next;
2195 		if (hmp_next != NULL)
2196 			hmp_next->b_prev = NULL;
2197 		hmp->b_next = NULL;
2198 		dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2199 		freeb(hmp);
2200 		hmp = hmp_next;
2201 	}
2202 	return (dlen);
2203 }
2204 
2205 /*
2206  * For ordered messages.
2207  * Check for existing fragments for an sid-ssn pair reported as abandoned,
2208  * hence will not receive, in the Forward TSN. If there are fragments, then
2209  * we just nuke them. If and when Partial Delivery API is supported, we
2210  * would need to send a notification to the upper layer about this.
2211  */
2212 static int
2213 sctp_ftsn_check_frag(sctp_t *sctp, uint16_t ssn, sctp_instr_t *sip)
2214 {
2215 	sctp_reass_t	*srp;
2216 	mblk_t		*hmp;
2217 	mblk_t		*dmp;
2218 	mblk_t		*hmp_next;
2219 	sctp_data_hdr_t	*dc;
2220 	int		dlen = 0;
2221 
2222 	hmp = sip->istr_reass;
2223 	while (hmp != NULL) {
2224 		hmp_next = hmp->b_next;
2225 		srp = (sctp_reass_t *)DB_BASE(hmp);
2226 		if (SSN_GT(srp->sr_ssn, ssn))
2227 			return (dlen);
2228 		/*
2229 		 * If we had sent part of this message up, send a partial
2230 		 * delivery event. Since this is ordered delivery, we should
2231 		 * have sent partial message only for the next in sequence,
2232 		 * hence the ASSERT. See comments in sctp_data_chunk() for
2233 		 * trypartial.
2234 		 */
2235 		if (srp->sr_partial_delivered) {
2236 			if (srp->sr_ssn != sip->nextseq)
2237 				cmn_err(CE_WARN, "sctp partial"
2238 				    " delivery notify, sctp 0x%p"
2239 				    " sip = 0x%p ssn != nextseq"
2240 				    " ssn 0x%x nextseq 0x%x",
2241 				    (void *)sctp, (void *)sip,
2242 				    srp->sr_ssn, sip->nextseq);
2243 			ASSERT(sip->nextseq == srp->sr_ssn);
2244 			sctp_partial_delivery_event(sctp);
2245 		}
2246 		/* Take it out of the reass queue */
2247 		sip->istr_reass = hmp_next;
2248 		if (hmp_next != NULL)
2249 			hmp_next->b_prev = NULL;
2250 		hmp->b_next = NULL;
2251 		ASSERT(hmp->b_prev == NULL);
2252 		dmp = hmp;
2253 		ASSERT(DB_TYPE(hmp) == M_CTL);
2254 		dmp = hmp->b_cont;
2255 		hmp->b_cont = NULL;
2256 		freeb(hmp);
2257 		hmp = dmp;
2258 		while (dmp != NULL) {
2259 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2260 			dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2261 			dmp = dmp->b_cont;
2262 		}
2263 		freemsg(hmp);
2264 		hmp = hmp_next;
2265 	}
2266 	return (dlen);
2267 }
2268 
2269 /*
2270  * Update sctp_ftsn to the cumulative TSN from the Forward TSN chunk. Remove
2271  * any SACK gaps less than the newly updated sctp_ftsn. Walk through the
2272  * sid-ssn pair in the Forward TSN and for each, clean the fragment list
2273  * for this pair, if needed, and check if we can deliver subsequent
2274  * messages, if any, from the instream queue (that were waiting for this
2275  * sid-ssn message to show up). Once we are done try to update the SACK
2276  * info. We could get a duplicate Forward TSN, in which case just send
2277  * a SACK. If any of the sid values in the Forward TSN is invalid,
2278  * send back an "Invalid Stream Identifier" error and continue processing
2279  * the rest.
2280  */
2281 static void
2282 sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
2283     ip_pkt_t *ipp, ip_recv_attr_t *ira)
2284 {
2285 	uint32_t	*ftsn = (uint32_t *)(ch + 1);
2286 	ftsn_entry_t	*ftsn_entry;
2287 	sctp_instr_t	*instr;
2288 	boolean_t	can_deliver = B_TRUE;
2289 	size_t		dlen;
2290 	int		flen;
2291 	mblk_t		*dmp;
2292 	mblk_t		*pmp;
2293 	sctp_data_hdr_t	*dc;
2294 	ssize_t		remaining;
2295 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2296 
2297 	*ftsn = ntohl(*ftsn);
2298 	remaining =  ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn);
2299 
2300 	if (SCTP_IS_DETACHED(sctp)) {
2301 		SCTPS_BUMP_MIB(sctps, sctpInClosed);
2302 		can_deliver = B_FALSE;
2303 	}
2304 	/*
2305 	 * un-ordered messages don't have SID-SSN pair entries, we check
2306 	 * for any fragments (for un-ordered message) to be discarded using
2307 	 * the cumulative FTSN.
2308 	 */
2309 	flen = sctp_ftsn_check_uo_frag(sctp, *ftsn);
2310 	if (flen > 0) {
2311 		ASSERT(sctp->sctp_rxqueued >= flen);
2312 		sctp->sctp_rxqueued -= flen;
2313 	}
2314 	ftsn_entry = (ftsn_entry_t *)(ftsn + 1);
2315 	while (remaining >= sizeof (*ftsn_entry)) {
2316 		ftsn_entry->ftsn_sid = ntohs(ftsn_entry->ftsn_sid);
2317 		ftsn_entry->ftsn_ssn = ntohs(ftsn_entry->ftsn_ssn);
2318 		if (ftsn_entry->ftsn_sid >= sctp->sctp_num_istr) {
2319 			sctp_bsc_t	inval_parm;
2320 
2321 			/* Will populate the CAUSE block in the ERROR chunk. */
2322 			inval_parm.bsc_sid = htons(ftsn_entry->ftsn_sid);
2323 			/* RESERVED, ignored at the receiving end */
2324 			inval_parm.bsc_pad = 0;
2325 
2326 			sctp_add_err(sctp, SCTP_ERR_BAD_SID,
2327 			    (void *)&inval_parm, sizeof (sctp_bsc_t), fp);
2328 			ftsn_entry++;
2329 			remaining -= sizeof (*ftsn_entry);
2330 			continue;
2331 		}
2332 		instr = &sctp->sctp_instr[ftsn_entry->ftsn_sid];
2333 		flen = sctp_ftsn_check_frag(sctp, ftsn_entry->ftsn_ssn, instr);
2334 		/* Indicates frags were nuked, update rxqueued */
2335 		if (flen > 0) {
2336 			ASSERT(sctp->sctp_rxqueued >= flen);
2337 			sctp->sctp_rxqueued -= flen;
2338 		}
2339 		/*
2340 		 * It is possible to receive an FTSN chunk with SSN smaller
2341 		 * than then nextseq if this chunk is a retransmission because
2342 		 * of incomplete processing when it was first processed.
2343 		 */
2344 		if (SSN_GE(ftsn_entry->ftsn_ssn, instr->nextseq))
2345 			instr->nextseq = ftsn_entry->ftsn_ssn + 1;
2346 		while (instr->istr_nmsgs > 0) {
2347 			mblk_t	*next;
2348 
2349 			dmp = (mblk_t *)instr->istr_msgs;
2350 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2351 			if (ntohs(dc->sdh_ssn) != instr->nextseq)
2352 				break;
2353 
2354 			next = dmp->b_next;
2355 			dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
2356 			for (pmp = dmp->b_cont; pmp != NULL;
2357 			    pmp = pmp->b_cont) {
2358 				dlen += MBLKL(pmp);
2359 			}
2360 			if (can_deliver) {
2361 				int error;
2362 
2363 				dmp->b_rptr = (uchar_t *)(dc + 1);
2364 				dmp->b_next = NULL;
2365 				ASSERT(dmp->b_prev == NULL);
2366 				if (sctp_input_add_ancillary(sctp,
2367 				    &dmp, dc, fp, ipp, ira) == 0) {
2368 					sctp->sctp_rxqueued -= dlen;
2369 					/*
2370 					 * Override b_flag for SCTP sockfs
2371 					 * internal use
2372 					 */
2373 
2374 					dmp->b_flag = 0;
2375 					if (sctp->sctp_flowctrld) {
2376 						sctp->sctp_rwnd -= dlen;
2377 						if (sctp->sctp_rwnd < 0)
2378 							sctp->sctp_rwnd = 0;
2379 					}
2380 					if (sctp->sctp_ulp_recv(
2381 					    sctp->sctp_ulpd, dmp, msgdsize(dmp),
2382 					    0, &error, NULL) <= 0) {
2383 						sctp->sctp_flowctrld = B_TRUE;
2384 					}
2385 				} else {
2386 					/*
2387 					 * We will resume processing when
2388 					 * the FTSN chunk is re-xmitted.
2389 					 */
2390 					dmp->b_rptr = (uchar_t *)dc;
2391 					dmp->b_next = next;
2392 					dprint(0,
2393 					    ("FTSN dequeuing %u failed\n",
2394 					    ntohs(dc->sdh_ssn)));
2395 					return;
2396 				}
2397 			} else {
2398 				sctp->sctp_rxqueued -= dlen;
2399 				ASSERT(dmp->b_prev == NULL);
2400 				dmp->b_next = NULL;
2401 				freemsg(dmp);
2402 			}
2403 			instr->istr_nmsgs--;
2404 			instr->nextseq++;
2405 			sctp->sctp_istr_nmsgs--;
2406 			if (next != NULL)
2407 				next->b_prev = NULL;
2408 			instr->istr_msgs = next;
2409 		}
2410 		ftsn_entry++;
2411 		remaining -= sizeof (*ftsn_entry);
2412 	}
2413 	/* Duplicate FTSN */
2414 	if (*ftsn <= (sctp->sctp_ftsn - 1)) {
2415 		sctp->sctp_force_sack = 1;
2416 		return;
2417 	}
2418 	/* Advance cum TSN to that reported in the Forward TSN chunk */
2419 	sctp->sctp_ftsn = *ftsn + 1;
2420 
2421 	/* Remove all the SACK gaps before the new cum TSN */
2422 	if (sctp->sctp_sack_info != NULL) {
2423 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2424 		    &sctp->sctp_sack_gaps);
2425 	}
2426 	/*
2427 	 * If there are gap reports pending, check if advancing
2428 	 * the ftsn here closes a gap. If so, we can advance
2429 	 * ftsn to the end of the set.
2430 	 * If ftsn has moved forward, maybe we can remove gap reports.
2431 	 */
2432 	if (sctp->sctp_sack_info != NULL &&
2433 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
2434 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
2435 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2436 		    &sctp->sctp_sack_gaps);
2437 	}
2438 }
2439 
2440 /*
2441  * When we have processed a SACK we check to see if we can advance the
2442  * cumulative TSN if there are abandoned chunks immediately following
2443  * the updated cumulative TSN. If there are, we attempt to send a
2444  * Forward TSN chunk.
2445  */
2446 static void
2447 sctp_check_abandoned_data(sctp_t *sctp, sctp_faddr_t *fp)
2448 {
2449 	mblk_t		*meta = sctp->sctp_xmit_head;
2450 	mblk_t		*mp;
2451 	mblk_t		*nmp;
2452 	uint32_t	seglen;
2453 	uint32_t	adv_pap = sctp->sctp_adv_pap;
2454 
2455 	/*
2456 	 * We only check in the first meta since otherwise we can't
2457 	 * advance the cumulative ack point. We just look for chunks
2458 	 * marked for retransmission, else we might prematurely
2459 	 * send an FTSN for a sent, but unacked, chunk.
2460 	 */
2461 	for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2462 		if (!SCTP_CHUNK_ISSENT(mp))
2463 			return;
2464 		if (SCTP_CHUNK_WANT_REXMIT(mp))
2465 			break;
2466 	}
2467 	if (mp == NULL)
2468 		return;
2469 	sctp_check_adv_ack_pt(sctp, meta, mp);
2470 	if (SEQ_GT(sctp->sctp_adv_pap, adv_pap)) {
2471 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
2472 		if (nmp == NULL) {
2473 			sctp->sctp_adv_pap = adv_pap;
2474 			if (!fp->sf_timer_running)
2475 				SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2476 			return;
2477 		}
2478 		sctp_set_iplen(sctp, nmp, fp->sf_ixa);
2479 		(void) conn_ip_output(nmp, fp->sf_ixa);
2480 		BUMP_LOCAL(sctp->sctp_opkts);
2481 		if (!fp->sf_timer_running)
2482 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2483 	}
2484 }
2485 
2486 /*
2487  * The processing here follows the same logic in sctp_got_sack(), the reason
2488  * we do this separately is because, usually, gap blocks are ordered and
2489  * we can process it in sctp_got_sack(). However if they aren't we would
2490  * need to do some additional non-optimal stuff when we start processing the
2491  * unordered gaps. To that effect sctp_got_sack() does the processing in the
2492  * simple case and this does the same in the more involved case.
2493  */
2494 static uint32_t
2495 sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf,
2496     int num_gaps, mblk_t *umphead, mblk_t *mphead, int *trysend,
2497     boolean_t *fast_recovery, uint32_t fr_xtsn)
2498 {
2499 	uint32_t		xtsn;
2500 	uint32_t		gapstart = 0;
2501 	uint32_t		gapend = 0;
2502 	int			gapcnt;
2503 	uint16_t		chunklen;
2504 	sctp_data_hdr_t		*sdc;
2505 	int			gstart;
2506 	mblk_t			*ump = umphead;
2507 	mblk_t			*mp = mphead;
2508 	sctp_faddr_t		*fp;
2509 	uint32_t		acked = 0;
2510 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2511 
2512 	/*
2513 	 * gstart tracks the last (in the order of TSN) gapstart that
2514 	 * we process in this SACK gaps walk.
2515 	 */
2516 	gstart = ctsn;
2517 
2518 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2519 	xtsn = ntohl(sdc->sdh_tsn);
2520 	for (gapcnt = 0; gapcnt < num_gaps; gapcnt++, ssf++) {
2521 		if (gapstart != 0) {
2522 			/*
2523 			 * If we have reached the end of the transmit list or
2524 			 * hit an unsent chunk or encountered an unordered gap
2525 			 * block start from the ctsn again.
2526 			 */
2527 			if (ump == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2528 			    SEQ_LT(ctsn + ntohs(ssf->ssf_start), xtsn)) {
2529 				ump = umphead;
2530 				mp = mphead;
2531 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
2532 				xtsn = ntohl(sdc->sdh_tsn);
2533 			}
2534 		}
2535 
2536 		gapstart = ctsn + ntohs(ssf->ssf_start);
2537 		gapend = ctsn + ntohs(ssf->ssf_end);
2538 
2539 		/*
2540 		 * Sanity checks:
2541 		 *
2542 		 * 1. SACK for TSN we have not sent - ABORT
2543 		 * 2. Invalid or spurious gaps, ignore all gaps
2544 		 */
2545 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2546 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2547 			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2548 			*trysend = -1;
2549 			return (acked);
2550 		} else if (SEQ_LT(gapend, gapstart) ||
2551 		    SEQ_LEQ(gapstart, ctsn)) {
2552 			break;
2553 		}
2554 		/*
2555 		 * The xtsn can be the TSN processed for the last gap
2556 		 * (gapend) or it could be the cumulative TSN. We continue
2557 		 * with the last xtsn as long as the gaps are ordered, when
2558 		 * we hit an unordered gap, we re-start from the cumulative
2559 		 * TSN. For the first gap it is always the cumulative TSN.
2560 		 */
2561 		while (xtsn != gapstart) {
2562 			/*
2563 			 * We can't reliably check for reneged chunks
2564 			 * when walking the unordered list, so we don't.
2565 			 * In case the peer reneges then we will end up
2566 			 * sending the reneged chunk via timeout.
2567 			 */
2568 			mp = mp->b_next;
2569 			if (mp == NULL) {
2570 				ump = ump->b_next;
2571 				/*
2572 				 * ump can't be NULL because of the sanity
2573 				 * check above.
2574 				 */
2575 				ASSERT(ump != NULL);
2576 				mp = ump->b_cont;
2577 			}
2578 			/*
2579 			 * mp can't be unsent because of the sanity check
2580 			 * above.
2581 			 */
2582 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2583 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2584 			xtsn = ntohl(sdc->sdh_tsn);
2585 		}
2586 		/*
2587 		 * Now that we have found the chunk with TSN == 'gapstart',
2588 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2589 		 * All intermediate chunks will be marked ACKED, if they
2590 		 * haven't already been.
2591 		 */
2592 		while (SEQ_LEQ(xtsn, gapend)) {
2593 			/*
2594 			 * SACKed
2595 			 */
2596 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2597 			if (!SCTP_CHUNK_ISACKED(mp)) {
2598 				SCTP_CHUNK_ACKED(mp);
2599 
2600 				fp = SCTP_CHUNK_DEST(mp);
2601 				chunklen = ntohs(sdc->sdh_len);
2602 				ASSERT(fp->sf_suna >= chunklen);
2603 				fp->sf_suna -= chunklen;
2604 				if (fp->sf_suna == 0) {
2605 					/* All outstanding data acked. */
2606 					fp->sf_pba = 0;
2607 					SCTP_FADDR_TIMER_STOP(fp);
2608 				}
2609 				fp->sf_acked += chunklen;
2610 				acked += chunklen;
2611 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
2612 				ASSERT(sctp->sctp_unacked >= 0);
2613 			}
2614 			/*
2615 			 * Move to the next message in the transmit list
2616 			 * if we are done with all the chunks from the current
2617 			 * message. Note, it is possible to hit the end of the
2618 			 * transmit list here, i.e. if we have already completed
2619 			 * processing the gap block.
2620 			 */
2621 			mp = mp->b_next;
2622 			if (mp == NULL) {
2623 				ump = ump->b_next;
2624 				if (ump == NULL) {
2625 					ASSERT(xtsn == gapend);
2626 					break;
2627 				}
2628 				mp = ump->b_cont;
2629 			}
2630 			/*
2631 			 * Likewise, we can hit an unsent chunk once we have
2632 			 * completed processing the gap block.
2633 			 */
2634 			if (!SCTP_CHUNK_ISSENT(mp)) {
2635 				ASSERT(xtsn == gapend);
2636 				break;
2637 			}
2638 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2639 			xtsn = ntohl(sdc->sdh_tsn);
2640 		}
2641 		/*
2642 		 * We keep track of the last gap we successfully processed
2643 		 * so that we can terminate the walk below for incrementing
2644 		 * the SACK count.
2645 		 */
2646 		if (SEQ_LT(gstart, gapstart))
2647 			gstart = gapstart;
2648 	}
2649 	/*
2650 	 * Check if have incremented the SACK count for all unacked TSNs in
2651 	 * sctp_got_sack(), if so we are done.
2652 	 */
2653 	if (SEQ_LEQ(gstart, fr_xtsn))
2654 		return (acked);
2655 
2656 	ump = umphead;
2657 	mp = mphead;
2658 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2659 	xtsn = ntohl(sdc->sdh_tsn);
2660 	while (SEQ_LT(xtsn, gstart)) {
2661 		/*
2662 		 * We have incremented SACK count for TSNs less than fr_tsn
2663 		 * in sctp_got_sack(), so don't increment them again here.
2664 		 */
2665 		if (SEQ_GT(xtsn, fr_xtsn) && !SCTP_CHUNK_ISACKED(mp)) {
2666 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2667 			if (SCTP_CHUNK_SACKCNT(mp) ==
2668 			    sctps->sctps_fast_rxt_thresh) {
2669 				SCTP_CHUNK_REXMIT(sctp, mp);
2670 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2671 				*trysend = 1;
2672 				if (!*fast_recovery) {
2673 					/*
2674 					 * Entering fast recovery.
2675 					 */
2676 					fp = SCTP_CHUNK_DEST(mp);
2677 					fp->sf_ssthresh = fp->sf_cwnd / 2;
2678 					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2679 						fp->sf_ssthresh =
2680 						    2 * fp->sf_pmss;
2681 					}
2682 					fp->sf_cwnd = fp->sf_ssthresh;
2683 					fp->sf_pba = 0;
2684 					sctp->sctp_recovery_tsn =
2685 					    sctp->sctp_ltsn - 1;
2686 					*fast_recovery = B_TRUE;
2687 				}
2688 			}
2689 		}
2690 		mp = mp->b_next;
2691 		if (mp == NULL) {
2692 			ump = ump->b_next;
2693 			/* We can't get to the end of the transmit list here */
2694 			ASSERT(ump != NULL);
2695 			mp = ump->b_cont;
2696 		}
2697 		/* We can't hit an unsent chunk here */
2698 		ASSERT(SCTP_CHUNK_ISSENT(mp));
2699 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
2700 		xtsn = ntohl(sdc->sdh_tsn);
2701 	}
2702 	return (acked);
2703 }
2704 
2705 static int
2706 sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch)
2707 {
2708 	sctp_sack_chunk_t	*sc;
2709 	sctp_data_hdr_t		*sdc;
2710 	sctp_sack_frag_t	*ssf;
2711 	mblk_t			*ump;
2712 	mblk_t			*mp;
2713 	mblk_t			*mp1;
2714 	uint32_t		cumtsn;
2715 	uint32_t		xtsn;
2716 	uint32_t		gapstart = 0;
2717 	uint32_t		gapend = 0;
2718 	uint32_t		acked = 0;
2719 	uint16_t		chunklen;
2720 	sctp_faddr_t		*fp;
2721 	int			num_gaps;
2722 	int			trysend = 0;
2723 	int			i;
2724 	boolean_t		fast_recovery = B_FALSE;
2725 	boolean_t		cumack_forward = B_FALSE;
2726 	boolean_t		fwd_tsn = B_FALSE;
2727 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2728 
2729 	BUMP_LOCAL(sctp->sctp_ibchunks);
2730 	BUMP_LOCAL(sctp->sctp_isacks);
2731 	chunklen = ntohs(sch->sch_len);
2732 	if (chunklen < (sizeof (*sch) + sizeof (*sc)))
2733 		return (0);
2734 
2735 	sc = (sctp_sack_chunk_t *)(sch + 1);
2736 	cumtsn = ntohl(sc->ssc_cumtsn);
2737 
2738 	dprint(2, ("got sack cumtsn %x -> %x\n", sctp->sctp_lastack_rxd,
2739 	    cumtsn));
2740 
2741 	/* out of order */
2742 	if (SEQ_LT(cumtsn, sctp->sctp_lastack_rxd))
2743 		return (0);
2744 
2745 	if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) {
2746 		SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2747 		/* Send an ABORT */
2748 		return (-1);
2749 	}
2750 
2751 	/*
2752 	 * Cwnd only done when not in fast recovery mode.
2753 	 */
2754 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn))
2755 		fast_recovery = B_TRUE;
2756 
2757 	/*
2758 	 * .. and if the cum TSN is not moving ahead on account Forward TSN
2759 	 */
2760 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_adv_pap))
2761 		fwd_tsn = B_TRUE;
2762 
2763 	if (cumtsn == sctp->sctp_lastack_rxd &&
2764 	    (sctp->sctp_xmit_unacked == NULL ||
2765 	    !SCTP_CHUNK_ABANDONED(sctp->sctp_xmit_unacked))) {
2766 		if (sctp->sctp_xmit_unacked != NULL)
2767 			mp = sctp->sctp_xmit_unacked;
2768 		else if (sctp->sctp_xmit_head != NULL)
2769 			mp = sctp->sctp_xmit_head->b_cont;
2770 		else
2771 			mp = NULL;
2772 		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2773 		/*
2774 		 * If we were doing a zero win probe and the win
2775 		 * has now opened to at least MSS, re-transmit the
2776 		 * zero win probe via sctp_rexmit_packet().
2777 		 */
2778 		if (mp != NULL && sctp->sctp_zero_win_probe &&
2779 		    ntohl(sc->ssc_a_rwnd) >= sctp->sctp_current->sf_pmss) {
2780 			mblk_t	*pkt;
2781 			uint_t	pkt_len;
2782 			mblk_t	*mp1 = mp;
2783 			mblk_t	*meta = sctp->sctp_xmit_head;
2784 
2785 			/*
2786 			 * Reset the RTO since we have been backing-off
2787 			 * to send the ZWP.
2788 			 */
2789 			fp = sctp->sctp_current;
2790 			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2791 			SCTP_MAX_RTO(sctp, fp);
2792 			/* Resend the ZWP */
2793 			pkt = sctp_rexmit_packet(sctp, &meta, &mp1, fp,
2794 			    &pkt_len);
2795 			if (pkt == NULL) {
2796 				SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2797 				return (0);
2798 			}
2799 			ASSERT(pkt_len <= fp->sf_pmss);
2800 			sctp->sctp_zero_win_probe = B_FALSE;
2801 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2802 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2803 			sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2804 			(void) conn_ip_output(pkt, fp->sf_ixa);
2805 			BUMP_LOCAL(sctp->sctp_opkts);
2806 		}
2807 	} else {
2808 		if (sctp->sctp_zero_win_probe) {
2809 			/*
2810 			 * Reset the RTO since we have been backing-off
2811 			 * to send the ZWP.
2812 			 */
2813 			fp = sctp->sctp_current;
2814 			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2815 			SCTP_MAX_RTO(sctp, fp);
2816 			sctp->sctp_zero_win_probe = B_FALSE;
2817 			/* This is probably not required */
2818 			if (!sctp->sctp_rexmitting) {
2819 				sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2820 				sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2821 			}
2822 		}
2823 		acked = sctp_cumack(sctp, cumtsn, &mp);
2824 		sctp->sctp_xmit_unacked = mp;
2825 		if (acked > 0) {
2826 			trysend = 1;
2827 			cumack_forward = B_TRUE;
2828 			if (fwd_tsn && SEQ_GEQ(sctp->sctp_lastack_rxd,
2829 			    sctp->sctp_adv_pap)) {
2830 				cumack_forward = B_FALSE;
2831 			}
2832 		}
2833 	}
2834 	num_gaps = ntohs(sc->ssc_numfrags);
2835 	UPDATE_LOCAL(sctp->sctp_gapcnt, num_gaps);
2836 	if (num_gaps == 0 || mp == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2837 	    chunklen < (sizeof (*sch) + sizeof (*sc) +
2838 	    num_gaps * sizeof (*ssf))) {
2839 		goto ret;
2840 	}
2841 #ifdef	DEBUG
2842 	/*
2843 	 * Since we delete any message that has been acked completely,
2844 	 * the unacked chunk must belong to sctp_xmit_head (as
2845 	 * we don't have a back pointer from the mp to the meta data
2846 	 * we do this).
2847 	 */
2848 	{
2849 		mblk_t	*mp2 = sctp->sctp_xmit_head->b_cont;
2850 
2851 		while (mp2 != NULL) {
2852 			if (mp2 == mp)
2853 				break;
2854 			mp2 = mp2->b_next;
2855 		}
2856 		ASSERT(mp2 != NULL);
2857 	}
2858 #endif
2859 	ump = sctp->sctp_xmit_head;
2860 
2861 	/*
2862 	 * Just remember where we started from, in case we need to call
2863 	 * sctp_process_uo_gaps() if the gap blocks are unordered.
2864 	 */
2865 	mp1 = mp;
2866 
2867 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2868 	xtsn = ntohl(sdc->sdh_tsn);
2869 	ASSERT(xtsn == cumtsn + 1);
2870 
2871 	/*
2872 	 * Go through SACK gaps. They are ordered based on start TSN.
2873 	 */
2874 	ssf = (sctp_sack_frag_t *)(sc + 1);
2875 	for (i = 0; i < num_gaps; i++, ssf++) {
2876 		if (gapstart != 0) {
2877 			/* check for unordered gap */
2878 			if (SEQ_LEQ(cumtsn + ntohs(ssf->ssf_start), gapstart)) {
2879 				acked += sctp_process_uo_gaps(sctp,
2880 				    cumtsn, ssf, num_gaps - i,
2881 				    sctp->sctp_xmit_head, mp1,
2882 				    &trysend, &fast_recovery, gapstart);
2883 				if (trysend < 0) {
2884 					SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2885 					return (-1);
2886 				}
2887 				break;
2888 			}
2889 		}
2890 		gapstart = cumtsn + ntohs(ssf->ssf_start);
2891 		gapend = cumtsn + ntohs(ssf->ssf_end);
2892 
2893 		/*
2894 		 * Sanity checks:
2895 		 *
2896 		 * 1. SACK for TSN we have not sent - ABORT
2897 		 * 2. Invalid or spurious gaps, ignore all gaps
2898 		 */
2899 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2900 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2901 			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2902 			return (-1);
2903 		} else if (SEQ_LT(gapend, gapstart) ||
2904 		    SEQ_LEQ(gapstart, cumtsn)) {
2905 			break;
2906 		}
2907 		/*
2908 		 * Let's start at the current TSN (for the 1st gap we start
2909 		 * from the cumulative TSN, for subsequent ones we start from
2910 		 * where the previous gapend was found - second while loop
2911 		 * below) and walk the transmit list till we find the TSN
2912 		 * corresponding to gapstart. All the unacked chunks till we
2913 		 * get to the chunk with TSN == gapstart will have their
2914 		 * SACKCNT incremented by 1. Note since the gap blocks are
2915 		 * ordered, we won't be incrementing the SACKCNT for an
2916 		 * unacked chunk by more than one while processing the gap
2917 		 * blocks. If the SACKCNT for any unacked chunk exceeds
2918 		 * the fast retransmit threshold, we will fast retransmit
2919 		 * after processing all the gap blocks.
2920 		 */
2921 		ASSERT(SEQ_LEQ(xtsn, gapstart));
2922 		while (xtsn != gapstart) {
2923 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2924 			if (SCTP_CHUNK_SACKCNT(mp) ==
2925 			    sctps->sctps_fast_rxt_thresh) {
2926 				SCTP_CHUNK_REXMIT(sctp, mp);
2927 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2928 				trysend = 1;
2929 				if (!fast_recovery) {
2930 					/*
2931 					 * Entering fast recovery.
2932 					 */
2933 					fp = SCTP_CHUNK_DEST(mp);
2934 					fp->sf_ssthresh = fp->sf_cwnd / 2;
2935 					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2936 						fp->sf_ssthresh =
2937 						    2 * fp->sf_pmss;
2938 					}
2939 					fp->sf_cwnd = fp->sf_ssthresh;
2940 					fp->sf_pba = 0;
2941 					sctp->sctp_recovery_tsn =
2942 					    sctp->sctp_ltsn - 1;
2943 					fast_recovery = B_TRUE;
2944 				}
2945 			}
2946 
2947 			/*
2948 			 * Peer may have reneged on this chunk, so un-sack
2949 			 * it now. If the peer did renege, we need to
2950 			 * readjust unacked.
2951 			 */
2952 			if (SCTP_CHUNK_ISACKED(mp)) {
2953 				chunklen = ntohs(sdc->sdh_len);
2954 				fp = SCTP_CHUNK_DEST(mp);
2955 				fp->sf_suna += chunklen;
2956 				sctp->sctp_unacked += chunklen - sizeof (*sdc);
2957 				SCTP_CHUNK_CLEAR_ACKED(sctp, mp);
2958 				if (!fp->sf_timer_running) {
2959 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2960 					    fp->sf_rto);
2961 				}
2962 			}
2963 
2964 			mp = mp->b_next;
2965 			if (mp == NULL) {
2966 				ump = ump->b_next;
2967 				/*
2968 				 * ump can't be NULL given the sanity check
2969 				 * above.  But if it is NULL, it means that
2970 				 * there is a data corruption.  We'd better
2971 				 * panic.
2972 				 */
2973 				if (ump == NULL) {
2974 					panic("Memory corruption detected: gap "
2975 					    "start TSN 0x%x missing from the "
2976 					    "xmit list: %p", gapstart,
2977 					    (void *)sctp);
2978 				}
2979 				mp = ump->b_cont;
2980 			}
2981 			/*
2982 			 * mp can't be unsent given the sanity check above.
2983 			 */
2984 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2985 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2986 			xtsn = ntohl(sdc->sdh_tsn);
2987 		}
2988 		/*
2989 		 * Now that we have found the chunk with TSN == 'gapstart',
2990 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2991 		 * All intermediate chunks will be marked ACKED, if they
2992 		 * haven't already been.
2993 		 */
2994 		while (SEQ_LEQ(xtsn, gapend)) {
2995 			/*
2996 			 * SACKed
2997 			 */
2998 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2999 			if (!SCTP_CHUNK_ISACKED(mp)) {
3000 				SCTP_CHUNK_ACKED(mp);
3001 
3002 				fp = SCTP_CHUNK_DEST(mp);
3003 				chunklen = ntohs(sdc->sdh_len);
3004 				ASSERT(fp->sf_suna >= chunklen);
3005 				fp->sf_suna -= chunklen;
3006 				if (fp->sf_suna == 0) {
3007 					/* All outstanding data acked. */
3008 					fp->sf_pba = 0;
3009 					SCTP_FADDR_TIMER_STOP(fp);
3010 				}
3011 				fp->sf_acked += chunklen;
3012 				acked += chunklen;
3013 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
3014 				ASSERT(sctp->sctp_unacked >= 0);
3015 			}
3016 			/* Go to the next chunk of the current message */
3017 			mp = mp->b_next;
3018 			/*
3019 			 * Move to the next message in the transmit list
3020 			 * if we are done with all the chunks from the current
3021 			 * message. Note, it is possible to hit the end of the
3022 			 * transmit list here, i.e. if we have already completed
3023 			 * processing the gap block.  But the TSN must be equal
3024 			 * to the gapend because of the above sanity check.
3025 			 * If it is not equal, it means that some data is
3026 			 * missing.
3027 			 * Also, note that we break here, which means we
3028 			 * continue processing gap blocks, if any. In case of
3029 			 * ordered gap blocks there can't be any following
3030 			 * this (if there is it will fail the sanity check
3031 			 * above). In case of un-ordered gap blocks we will
3032 			 * switch to sctp_process_uo_gaps().  In either case
3033 			 * it should be fine to continue with NULL ump/mp,
3034 			 * but we just reset it to xmit_head.
3035 			 */
3036 			if (mp == NULL) {
3037 				ump = ump->b_next;
3038 				if (ump == NULL) {
3039 					if (xtsn != gapend) {
3040 						panic("Memory corruption "
3041 						    "detected: gap end TSN "
3042 						    "0x%x missing from the "
3043 						    "xmit list: %p", gapend,
3044 						    (void *)sctp);
3045 					}
3046 					ump = sctp->sctp_xmit_head;
3047 					mp = mp1;
3048 					sdc = (sctp_data_hdr_t *)mp->b_rptr;
3049 					xtsn = ntohl(sdc->sdh_tsn);
3050 					break;
3051 				}
3052 				mp = ump->b_cont;
3053 			}
3054 			/*
3055 			 * Likewise, we could hit an unsent chunk once we have
3056 			 * completed processing the gap block. Again, it is
3057 			 * fine to continue processing gap blocks with mp
3058 			 * pointing to the unsent chunk, because if there
3059 			 * are more ordered gap blocks, they will fail the
3060 			 * sanity check, and if there are un-ordered gap blocks,
3061 			 * we will continue processing in sctp_process_uo_gaps()
3062 			 * We just reset the mp to the one we started with.
3063 			 */
3064 			if (!SCTP_CHUNK_ISSENT(mp)) {
3065 				ASSERT(xtsn == gapend);
3066 				ump = sctp->sctp_xmit_head;
3067 				mp = mp1;
3068 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
3069 				xtsn = ntohl(sdc->sdh_tsn);
3070 				break;
3071 			}
3072 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
3073 			xtsn = ntohl(sdc->sdh_tsn);
3074 		}
3075 	}
3076 	if (sctp->sctp_prsctp_aware)
3077 		sctp_check_abandoned_data(sctp, sctp->sctp_current);
3078 	if (sctp->sctp_chk_fast_rexmit)
3079 		sctp_fast_rexmit(sctp);
3080 ret:
3081 	trysend += sctp_set_frwnd(sctp, ntohl(sc->ssc_a_rwnd));
3082 
3083 	/*
3084 	 * If receive window is closed while there is unsent data,
3085 	 * set a timer for doing zero window probes.
3086 	 */
3087 	if (sctp->sctp_frwnd == 0 && sctp->sctp_unacked == 0 &&
3088 	    sctp->sctp_unsent != 0) {
3089 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
3090 		    sctp->sctp_current->sf_rto);
3091 	}
3092 
3093 	/*
3094 	 * Set cwnd for all destinations.
3095 	 * Congestion window gets increased only when cumulative
3096 	 * TSN moves forward, we're not in fast recovery, and
3097 	 * cwnd has been fully utilized (almost fully, need to allow
3098 	 * some leeway due to non-MSS sized messages).
3099 	 */
3100 	if (sctp->sctp_current->sf_acked == acked) {
3101 		/*
3102 		 * Fast-path, only data sent to sctp_current got acked.
3103 		 */
3104 		fp = sctp->sctp_current;
3105 		if (cumack_forward && !fast_recovery &&
3106 		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3107 			if (fp->sf_cwnd < fp->sf_ssthresh) {
3108 				/*
3109 				 * Slow start
3110 				 */
3111 				if (fp->sf_acked > fp->sf_pmss) {
3112 					fp->sf_cwnd += fp->sf_pmss;
3113 				} else {
3114 					fp->sf_cwnd += fp->sf_acked;
3115 				}
3116 				fp->sf_cwnd = MIN(fp->sf_cwnd,
3117 				    sctp->sctp_cwnd_max);
3118 			} else {
3119 				/*
3120 				 * Congestion avoidance
3121 				 */
3122 				fp->sf_pba += fp->sf_acked;
3123 				if (fp->sf_pba >= fp->sf_cwnd) {
3124 					fp->sf_pba -= fp->sf_cwnd;
3125 					fp->sf_cwnd += fp->sf_pmss;
3126 					fp->sf_cwnd = MIN(fp->sf_cwnd,
3127 					    sctp->sctp_cwnd_max);
3128 				}
3129 			}
3130 		}
3131 		/*
3132 		 * Limit the burst of transmitted data segments.
3133 		 */
3134 		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3135 		    fp->sf_cwnd) {
3136 			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3137 			    fp->sf_pmss;
3138 		}
3139 		fp->sf_acked = 0;
3140 		goto check_ss_rxmit;
3141 	}
3142 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
3143 		if (cumack_forward && fp->sf_acked && !fast_recovery &&
3144 		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3145 			if (fp->sf_cwnd < fp->sf_ssthresh) {
3146 				if (fp->sf_acked > fp->sf_pmss) {
3147 					fp->sf_cwnd += fp->sf_pmss;
3148 				} else {
3149 					fp->sf_cwnd += fp->sf_acked;
3150 				}
3151 				fp->sf_cwnd = MIN(fp->sf_cwnd,
3152 				    sctp->sctp_cwnd_max);
3153 			} else {
3154 				fp->sf_pba += fp->sf_acked;
3155 				if (fp->sf_pba >= fp->sf_cwnd) {
3156 					fp->sf_pba -= fp->sf_cwnd;
3157 					fp->sf_cwnd += fp->sf_pmss;
3158 					fp->sf_cwnd = MIN(fp->sf_cwnd,
3159 					    sctp->sctp_cwnd_max);
3160 				}
3161 			}
3162 		}
3163 		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3164 		    fp->sf_cwnd) {
3165 			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3166 			    fp->sf_pmss;
3167 		}
3168 		fp->sf_acked = 0;
3169 	}
3170 	fp = sctp->sctp_current;
3171 check_ss_rxmit:
3172 	/*
3173 	 * If this is a SACK following a timeout, check if there are
3174 	 * still unacked chunks (sent before the timeout) that we can
3175 	 * send.
3176 	 */
3177 	if (sctp->sctp_rexmitting) {
3178 		if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_rxt_maxtsn)) {
3179 			/*
3180 			 * As we are in retransmission phase, we may get a
3181 			 * SACK which indicates some new chunks are received
3182 			 * but cum_tsn does not advance.  During this
3183 			 * phase, the other side advances cum_tsn only because
3184 			 * it receives our retransmitted chunks.  Only
3185 			 * this signals that some chunks are still
3186 			 * missing.
3187 			 */
3188 			if (cumack_forward) {
3189 				fp->sf_rxt_unacked -= acked;
3190 				sctp_ss_rexmit(sctp);
3191 			}
3192 		} else {
3193 			sctp->sctp_rexmitting = B_FALSE;
3194 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
3195 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
3196 			fp->sf_rxt_unacked = 0;
3197 		}
3198 	}
3199 	return (trysend);
3200 }
3201 
3202 /*
3203  * Returns 0 if the caller should stop processing any more chunks,
3204  * 1 if the caller should skip this chunk and continue processing.
3205  */
3206 static int
3207 sctp_strange_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp)
3208 {
3209 	size_t len;
3210 
3211 	BUMP_LOCAL(sctp->sctp_ibchunks);
3212 	/* check top two bits for action required */
3213 	if (ch->sch_id & 0x40) {	/* also matches 0xc0 */
3214 		len = ntohs(ch->sch_len);
3215 		sctp_add_err(sctp, SCTP_ERR_UNREC_CHUNK, ch, len, fp);
3216 
3217 		if ((ch->sch_id & 0xc0) == 0xc0) {
3218 			/* skip and continue */
3219 			return (1);
3220 		} else {
3221 			/* stop processing */
3222 			return (0);
3223 		}
3224 	}
3225 	if (ch->sch_id & 0x80) {
3226 		/* skip and continue, no error */
3227 		return (1);
3228 	}
3229 	/* top two bits are clear; stop processing and no error */
3230 	return (0);
3231 }
3232 
3233 /*
3234  * Basic sanity checks on all input chunks and parameters: they must
3235  * be of legitimate size for their purported type, and must follow
3236  * ordering conventions as defined in rfc2960.
3237  *
3238  * Returns 1 if the chunk and all encloded params are legitimate,
3239  * 0 otherwise.
3240  */
3241 /*ARGSUSED*/
3242 static int
3243 sctp_check_input(sctp_t *sctp, sctp_chunk_hdr_t *ch, ssize_t len, int first)
3244 {
3245 	sctp_parm_hdr_t	*ph;
3246 	void		*p = NULL;
3247 	ssize_t		clen;
3248 	uint16_t	ch_len;
3249 
3250 	ch_len = ntohs(ch->sch_len);
3251 	if (ch_len > len) {
3252 		return (0);
3253 	}
3254 
3255 	switch (ch->sch_id) {
3256 	case CHUNK_DATA:
3257 		if (ch_len < sizeof (sctp_data_hdr_t)) {
3258 			return (0);
3259 		}
3260 		return (1);
3261 	case CHUNK_INIT:
3262 	case CHUNK_INIT_ACK:
3263 		{
3264 			ssize_t	remlen = len;
3265 
3266 			/*
3267 			 * INIT and INIT-ACK chunks must not be bundled with
3268 			 * any other.
3269 			 */
3270 			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3271 			    (ch_len < (sizeof (*ch) +
3272 			    sizeof (sctp_init_chunk_t)))) {
3273 				return (0);
3274 			}
3275 			/* may have params that need checking */
3276 			p = (char *)(ch + 1) + sizeof (sctp_init_chunk_t);
3277 			clen = ch_len - (sizeof (*ch) +
3278 			    sizeof (sctp_init_chunk_t));
3279 		}
3280 		break;
3281 	case CHUNK_SACK:
3282 		if (ch_len < (sizeof (*ch) + sizeof (sctp_sack_chunk_t))) {
3283 			return (0);
3284 		}
3285 		/* dup and gap reports checked by got_sack() */
3286 		return (1);
3287 	case CHUNK_SHUTDOWN:
3288 		if (ch_len < (sizeof (*ch) + sizeof (uint32_t))) {
3289 			return (0);
3290 		}
3291 		return (1);
3292 	case CHUNK_ABORT:
3293 	case CHUNK_ERROR:
3294 		if (ch_len < sizeof (*ch)) {
3295 			return (0);
3296 		}
3297 		/* may have params that need checking */
3298 		p = ch + 1;
3299 		clen = ch_len - sizeof (*ch);
3300 		break;
3301 	case CHUNK_ECNE:
3302 	case CHUNK_CWR:
3303 	case CHUNK_HEARTBEAT:
3304 	case CHUNK_HEARTBEAT_ACK:
3305 	/* Full ASCONF chunk and parameter checks are in asconf.c */
3306 	case CHUNK_ASCONF:
3307 	case CHUNK_ASCONF_ACK:
3308 		if (ch_len < sizeof (*ch)) {
3309 			return (0);
3310 		}
3311 		/* heartbeat data checked by process_heartbeat() */
3312 		return (1);
3313 	case CHUNK_SHUTDOWN_COMPLETE:
3314 		{
3315 			ssize_t remlen = len;
3316 
3317 			/*
3318 			 * SHUTDOWN-COMPLETE chunk must not be bundled with any
3319 			 * other
3320 			 */
3321 			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3322 			    ch_len < sizeof (*ch)) {
3323 				return (0);
3324 			}
3325 		}
3326 		return (1);
3327 	case CHUNK_COOKIE:
3328 	case CHUNK_COOKIE_ACK:
3329 	case CHUNK_SHUTDOWN_ACK:
3330 		if (ch_len < sizeof (*ch) || !first) {
3331 			return (0);
3332 		}
3333 		return (1);
3334 	case CHUNK_FORWARD_TSN:
3335 		if (ch_len < (sizeof (*ch) + sizeof (uint32_t)))
3336 			return (0);
3337 		return (1);
3338 	default:
3339 		return (1);	/* handled by strange_chunk() */
3340 	}
3341 
3342 	/* check and byteorder parameters */
3343 	if (clen <= 0) {
3344 		return (1);
3345 	}
3346 	ASSERT(p != NULL);
3347 
3348 	ph = p;
3349 	while (ph != NULL && clen > 0) {
3350 		ch_len = ntohs(ph->sph_len);
3351 		if (ch_len > len || ch_len < sizeof (*ph)) {
3352 			return (0);
3353 		}
3354 		ph = sctp_next_parm(ph, &clen);
3355 	}
3356 
3357 	/* All OK */
3358 	return (1);
3359 }
3360 
3361 static mblk_t *
3362 sctp_check_in_policy(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3363 {
3364 	boolean_t policy_present;
3365 	ipha_t *ipha;
3366 	ip6_t *ip6h;
3367 	netstack_t	*ns = ipst->ips_netstack;
3368 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3369 
3370 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
3371 		policy_present = ipss->ipsec_inbound_v4_policy_present;
3372 		ipha = (ipha_t *)mp->b_rptr;
3373 		ip6h = NULL;
3374 	} else {
3375 		policy_present = ipss->ipsec_inbound_v6_policy_present;
3376 		ipha = NULL;
3377 		ip6h = (ip6_t *)mp->b_rptr;
3378 	}
3379 
3380 	if (policy_present) {
3381 		/*
3382 		 * The conn_t parameter is NULL because we already know
3383 		 * nobody's home.
3384 		 */
3385 		mp = ipsec_check_global_policy(mp, (conn_t *)NULL,
3386 		    ipha, ip6h, ira, ns);
3387 		if (mp == NULL)
3388 			return (NULL);
3389 	}
3390 	return (mp);
3391 }
3392 
3393 /* Handle out-of-the-blue packets */
3394 void
3395 sctp_ootb_input(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3396 {
3397 	sctp_t			*sctp;
3398 	sctp_chunk_hdr_t	*ch;
3399 	sctp_hdr_t		*sctph;
3400 	in6_addr_t		src, dst;
3401 	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3402 	ssize_t			mlen;
3403 	sctp_stack_t		*sctps;
3404 	boolean_t		secure;
3405 	zoneid_t		zoneid = ira->ira_zoneid;
3406 	uchar_t			*rptr;
3407 
3408 	ASSERT(ira->ira_ill == NULL);
3409 
3410 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
3411 
3412 	sctps = ipst->ips_netstack->netstack_sctp;
3413 
3414 	SCTPS_BUMP_MIB(sctps, sctpOutOfBlue);
3415 	SCTPS_BUMP_MIB(sctps, sctpInSCTPPkts);
3416 
3417 	if (mp->b_cont != NULL) {
3418 		/*
3419 		 * All subsequent code is vastly simplified if it can
3420 		 * assume a single contiguous chunk of data.
3421 		 */
3422 		if (pullupmsg(mp, -1) == 0) {
3423 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3424 			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3425 			freemsg(mp);
3426 			return;
3427 		}
3428 	}
3429 
3430 	rptr = mp->b_rptr;
3431 	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3432 	if (ira->ira_flags & IRAF_IS_IPV4) {
3433 		ipha_t *ipha;
3434 
3435 		ipha = (ipha_t *)rptr;
3436 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3437 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3438 	} else {
3439 		ip6_t *ip6h;
3440 
3441 		ip6h = (ip6_t *)rptr;
3442 		src = ip6h->ip6_src;
3443 		dst = ip6h->ip6_dst;
3444 	}
3445 
3446 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3447 	if ((ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
3448 		dprint(3, ("sctp_ootb_input: invalid packet\n"));
3449 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3450 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3451 		freemsg(mp);
3452 		return;
3453 	}
3454 
3455 	switch (ch->sch_id) {
3456 	case CHUNK_INIT:
3457 		/* no listener; send abort  */
3458 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3459 			return;
3460 		sctp_ootb_send_abort(sctp_init2vtag(ch), 0,
3461 		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3462 		break;
3463 	case CHUNK_INIT_ACK:
3464 		/* check for changed src addr */
3465 		sctp = sctp_addrlist2sctp(mp, sctph, ch, zoneid, sctps);
3466 		if (sctp != NULL) {
3467 			/* success; proceed to normal path */
3468 			mutex_enter(&sctp->sctp_lock);
3469 			if (sctp->sctp_running) {
3470 				sctp_add_recvq(sctp, mp, B_FALSE, ira);
3471 				mutex_exit(&sctp->sctp_lock);
3472 			} else {
3473 				/*
3474 				 * If the source address is changed, we
3475 				 * don't need to worry too much about
3476 				 * out of order processing.  So we don't
3477 				 * check if the recvq is empty or not here.
3478 				 */
3479 				sctp->sctp_running = B_TRUE;
3480 				mutex_exit(&sctp->sctp_lock);
3481 				sctp_input_data(sctp, mp, ira);
3482 				WAKE_SCTP(sctp);
3483 			}
3484 			SCTP_REFRELE(sctp);
3485 			return;
3486 		}
3487 		/* else bogus init ack; drop it */
3488 		break;
3489 	case CHUNK_SHUTDOWN_ACK:
3490 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3491 			return;
3492 		sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3493 		return;
3494 	case CHUNK_ERROR:
3495 	case CHUNK_ABORT:
3496 	case CHUNK_COOKIE_ACK:
3497 	case CHUNK_SHUTDOWN_COMPLETE:
3498 		break;
3499 	default:
3500 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3501 			return;
3502 		sctp_ootb_send_abort(sctph->sh_verf, 0,
3503 		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3504 		break;
3505 	}
3506 	freemsg(mp);
3507 }
3508 
3509 /*
3510  * Handle sctp packets.
3511  * Note that we rele the sctp_t (the caller got a reference on it).
3512  */
3513 void
3514 sctp_input(conn_t *connp, ipha_t *ipha, ip6_t *ip6h, mblk_t *mp,
3515     ip_recv_attr_t *ira)
3516 {
3517 	sctp_t		*sctp = CONN2SCTP(connp);
3518 	boolean_t	secure;
3519 	ill_t		*ill = ira->ira_ill;
3520 	ip_stack_t	*ipst = ill->ill_ipst;
3521 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3522 	iaflags_t	iraflags = ira->ira_flags;
3523 	ill_t		*rill = ira->ira_rill;
3524 
3525 	secure = iraflags & IRAF_IPSEC_SECURE;
3526 
3527 	/*
3528 	 * We check some fields in conn_t without holding a lock.
3529 	 * This should be fine.
3530 	 */
3531 	if (((iraflags & IRAF_IS_IPV4) ?
3532 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
3533 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
3534 	    secure) {
3535 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
3536 		    ip6h, ira);
3537 		if (mp == NULL) {
3538 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3539 			/* Note that mp is NULL */
3540 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3541 			SCTP_REFRELE(sctp);
3542 			return;
3543 		}
3544 	}
3545 
3546 	ira->ira_ill = ira->ira_rill = NULL;
3547 
3548 	mutex_enter(&sctp->sctp_lock);
3549 	if (sctp->sctp_running) {
3550 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
3551 		mutex_exit(&sctp->sctp_lock);
3552 		goto done;
3553 	} else {
3554 		sctp->sctp_running = B_TRUE;
3555 		mutex_exit(&sctp->sctp_lock);
3556 
3557 		mutex_enter(&sctp->sctp_recvq_lock);
3558 		if (sctp->sctp_recvq != NULL) {
3559 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
3560 			mutex_exit(&sctp->sctp_recvq_lock);
3561 			WAKE_SCTP(sctp);
3562 			goto done;
3563 		}
3564 	}
3565 	mutex_exit(&sctp->sctp_recvq_lock);
3566 	if (ira->ira_flags & IRAF_ICMP_ERROR)
3567 		sctp_icmp_error(sctp, mp);
3568 	else
3569 		sctp_input_data(sctp, mp, ira);
3570 	WAKE_SCTP(sctp);
3571 
3572 done:
3573 	SCTP_REFRELE(sctp);
3574 	ira->ira_ill = ill;
3575 	ira->ira_rill = rill;
3576 }
3577 
3578 static void
3579 sctp_process_abort(sctp_t *sctp, sctp_chunk_hdr_t *ch, int err)
3580 {
3581 	sctp_stack_t	*sctps = sctp->sctp_sctps;
3582 
3583 	SCTPS_BUMP_MIB(sctps, sctpAborted);
3584 	BUMP_LOCAL(sctp->sctp_ibchunks);
3585 
3586 	/*
3587 	 * SCTP_COMM_LOST is only sent up if the association is
3588 	 * established (sctp_state >= SCTPS_ESTABLISHED).
3589 	 */
3590 	if (sctp->sctp_state >= SCTPS_ESTABLISHED) {
3591 		sctp_assoc_event(sctp, SCTP_COMM_LOST,
3592 		    ntohs(((sctp_parm_hdr_t *)(ch + 1))->sph_type), ch);
3593 	}
3594 
3595 	sctp_clean_death(sctp, err);
3596 }
3597 
3598 void
3599 sctp_input_data(sctp_t *sctp, mblk_t *mp, ip_recv_attr_t *ira)
3600 {
3601 	sctp_chunk_hdr_t	*ch;
3602 	ssize_t			mlen;
3603 	int			gotdata;
3604 	int			trysend;
3605 	sctp_faddr_t		*fp;
3606 	sctp_init_chunk_t	*iack;
3607 	uint32_t		tsn;
3608 	sctp_data_hdr_t		*sdc;
3609 	ip_pkt_t		ipp;
3610 	in6_addr_t		src;
3611 	in6_addr_t		dst;
3612 	uint_t			ifindex;
3613 	sctp_hdr_t		*sctph;
3614 	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3615 	mblk_t			*dups = NULL;
3616 	int			recv_adaptation;
3617 	boolean_t		wake_eager = B_FALSE;
3618 	in6_addr_t		peer_src;
3619 	int64_t			now;
3620 	sctp_stack_t		*sctps = sctp->sctp_sctps;
3621 	ip_stack_t		*ipst = sctps->sctps_netstack->netstack_ip;
3622 	boolean_t		hb_already = B_FALSE;
3623 	cred_t			*cr;
3624 	pid_t			cpid;
3625 	uchar_t			*rptr;
3626 	conn_t			*connp = sctp->sctp_connp;
3627 	boolean_t		shutdown_ack_needed = B_FALSE;
3628 
3629 	ASSERT(DB_TYPE(mp) == M_DATA);
3630 	ASSERT(ira->ira_ill == NULL);
3631 
3632 	if (mp->b_cont != NULL) {
3633 		/*
3634 		 * All subsequent code is vastly simplified if it can
3635 		 * assume a single contiguous chunk of data.
3636 		 */
3637 		if (pullupmsg(mp, -1) == 0) {
3638 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3639 			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3640 			freemsg(mp);
3641 			return;
3642 		}
3643 	}
3644 
3645 	BUMP_LOCAL(sctp->sctp_ipkts);
3646 	ifindex = ira->ira_ruifindex;
3647 
3648 	rptr = mp->b_rptr;
3649 
3650 	ipp.ipp_fields = 0;
3651 	if (connp->conn_recv_ancillary.crb_all != 0) {
3652 		/*
3653 		 * Record packet information in the ip_pkt_t
3654 		 */
3655 		if (ira->ira_flags & IRAF_IS_IPV4) {
3656 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipp,
3657 			    B_FALSE);
3658 		} else {
3659 			uint8_t nexthdrp;
3660 
3661 			/*
3662 			 * IPv6 packets can only be received by applications
3663 			 * that are prepared to receive IPv6 addresses.
3664 			 * The IP fanout must ensure this.
3665 			 */
3666 			ASSERT(connp->conn_family == AF_INET6);
3667 
3668 			(void) ip_find_hdr_v6(mp, (ip6_t *)rptr, B_TRUE, &ipp,
3669 			    &nexthdrp);
3670 			ASSERT(nexthdrp == IPPROTO_SCTP);
3671 
3672 			/* Could have caused a pullup? */
3673 			rptr = mp->b_rptr;
3674 		}
3675 	}
3676 
3677 	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3678 
3679 	if (ira->ira_flags & IRAF_IS_IPV4) {
3680 		ipha_t *ipha;
3681 
3682 		ipha = (ipha_t *)rptr;
3683 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3684 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3685 	} else {
3686 		ip6_t *ip6h;
3687 
3688 		ip6h = (ip6_t *)rptr;
3689 		src = ip6h->ip6_src;
3690 		dst = ip6h->ip6_dst;
3691 	}
3692 
3693 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3694 	ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen);
3695 	if (ch == NULL) {
3696 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3697 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3698 		freemsg(mp);
3699 		return;
3700 	}
3701 
3702 	if (!sctp_check_input(sctp, ch, mlen, 1)) {
3703 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3704 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3705 		goto done;
3706 	}
3707 	/*
3708 	 * Check verfication tag (special handling for INIT,
3709 	 * COOKIE, SHUTDOWN_COMPLETE and SHUTDOWN_ACK chunks).
3710 	 * ABORTs are handled in the chunk processing loop, since
3711 	 * may not appear first. All other checked chunks must
3712 	 * appear first, or will have been dropped by check_input().
3713 	 */
3714 	switch (ch->sch_id) {
3715 	case CHUNK_INIT:
3716 		if (sctph->sh_verf != 0) {
3717 			/* drop it */
3718 			goto done;
3719 		}
3720 		break;
3721 	case CHUNK_SHUTDOWN_COMPLETE:
3722 		if (sctph->sh_verf == sctp->sctp_lvtag)
3723 			break;
3724 		if (sctph->sh_verf == sctp->sctp_fvtag &&
3725 		    SCTP_GET_TBIT(ch)) {
3726 			break;
3727 		}
3728 		/* else drop it */
3729 		goto done;
3730 	case CHUNK_ABORT:
3731 	case CHUNK_COOKIE:
3732 		/* handled below */
3733 		break;
3734 	case CHUNK_SHUTDOWN_ACK:
3735 		if (sctp->sctp_state > SCTPS_BOUND &&
3736 		    sctp->sctp_state < SCTPS_ESTABLISHED) {
3737 			/* treat as OOTB */
3738 			sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3739 			return;
3740 		}
3741 		/* else fallthru */
3742 	default:
3743 		/*
3744 		 * All other packets must have a valid
3745 		 * verification tag, however if this is a
3746 		 * listener, we use a refined version of
3747 		 * out-of-the-blue logic.
3748 		 */
3749 		if (sctph->sh_verf != sctp->sctp_lvtag &&
3750 		    sctp->sctp_state != SCTPS_LISTEN) {
3751 			/* drop it */
3752 			goto done;
3753 		}
3754 		break;
3755 	}
3756 
3757 	/* Have a valid sctp for this packet */
3758 	fp = sctp_lookup_faddr(sctp, &src);
3759 	dprint(2, ("sctp_dispatch_rput: mp=%p fp=%p sctp=%p\n", (void *)mp,
3760 	    (void *)fp, (void *)sctp));
3761 
3762 	gotdata = 0;
3763 	trysend = 0;
3764 
3765 	now = LBOLT_FASTPATH64;
3766 	/* Process the chunks */
3767 	do {
3768 		dprint(3, ("sctp_dispatch_rput: state=%d, chunk id=%d\n",
3769 		    sctp->sctp_state, (int)(ch->sch_id)));
3770 
3771 		if (ch->sch_id == CHUNK_ABORT) {
3772 			if (sctph->sh_verf != sctp->sctp_lvtag &&
3773 			    sctph->sh_verf != sctp->sctp_fvtag) {
3774 				/* drop it */
3775 				goto done;
3776 			}
3777 		}
3778 
3779 		switch (sctp->sctp_state) {
3780 
3781 		case SCTPS_ESTABLISHED:
3782 		case SCTPS_SHUTDOWN_PENDING:
3783 		case SCTPS_SHUTDOWN_SENT:
3784 			switch (ch->sch_id) {
3785 			case CHUNK_DATA:
3786 				/* 0-length data chunks are not allowed */
3787 				if (ntohs(ch->sch_len) == sizeof (*sdc)) {
3788 					sdc = (sctp_data_hdr_t *)ch;
3789 					tsn = sdc->sdh_tsn;
3790 					sctp_send_abort(sctp, sctp->sctp_fvtag,
3791 					    SCTP_ERR_NO_USR_DATA, (char *)&tsn,
3792 					    sizeof (tsn), mp, 0, B_FALSE, ira);
3793 					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3794 					    0, NULL);
3795 					sctp_clean_death(sctp, ECONNABORTED);
3796 					goto done;
3797 				}
3798 
3799 				ASSERT(fp != NULL);
3800 				sctp->sctp_lastdata = fp;
3801 				sctp_data_chunk(sctp, ch, mp, &dups, fp,
3802 				    &ipp, ira);
3803 				gotdata = 1;
3804 				/* Restart shutdown timer if shutting down */
3805 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3806 					/*
3807 					 * If we have exceeded our max
3808 					 * wait bound for waiting for a
3809 					 * shutdown ack from the peer,
3810 					 * abort the association.
3811 					 */
3812 					if (sctps->sctps_shutack_wait_bound !=
3813 					    0 &&
3814 					    TICK_TO_MSEC(now -
3815 					    sctp->sctp_out_time) >
3816 					    sctps->sctps_shutack_wait_bound) {
3817 						sctp_send_abort(sctp,
3818 						    sctp->sctp_fvtag, 0, NULL,
3819 						    0, mp, 0, B_FALSE, ira);
3820 						sctp_assoc_event(sctp,
3821 						    SCTP_COMM_LOST, 0, NULL);
3822 						sctp_clean_death(sctp,
3823 						    ECONNABORTED);
3824 						goto done;
3825 					}
3826 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
3827 					    fp->sf_rto);
3828 				}
3829 				break;
3830 			case CHUNK_SACK:
3831 				ASSERT(fp != NULL);
3832 				/*
3833 				 * Peer is real and alive if it can ack our
3834 				 * data.
3835 				 */
3836 				sctp_faddr_alive(sctp, fp);
3837 				trysend = sctp_got_sack(sctp, ch);
3838 				if (trysend < 0) {
3839 					sctp_send_abort(sctp, sctph->sh_verf,
3840 					    0, NULL, 0, mp, 0, B_FALSE, ira);
3841 					sctp_assoc_event(sctp,
3842 					    SCTP_COMM_LOST, 0, NULL);
3843 					sctp_clean_death(sctp,
3844 					    ECONNABORTED);
3845 					goto done;
3846 				}
3847 				break;
3848 			case CHUNK_HEARTBEAT:
3849 				if (!hb_already) {
3850 					/*
3851 					 * In any one packet, there should
3852 					 * only be one heartbeat chunk.  So
3853 					 * we should not process more than
3854 					 * once.
3855 					 */
3856 					sctp_return_heartbeat(sctp, ch, mp);
3857 					hb_already = B_TRUE;
3858 				}
3859 				break;
3860 			case CHUNK_HEARTBEAT_ACK:
3861 				sctp_process_heartbeat(sctp, ch);
3862 				break;
3863 			case CHUNK_SHUTDOWN:
3864 				sctp_shutdown_event(sctp);
3865 				trysend = sctp_shutdown_received(sctp, ch,
3866 				    B_FALSE, B_FALSE, fp);
3867 				BUMP_LOCAL(sctp->sctp_ibchunks);
3868 				break;
3869 			case CHUNK_SHUTDOWN_ACK:
3870 				BUMP_LOCAL(sctp->sctp_ibchunks);
3871 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3872 					sctp_shutdown_complete(sctp);
3873 					SCTPS_BUMP_MIB(sctps, sctpShutdowns);
3874 					sctp_assoc_event(sctp,
3875 					    SCTP_SHUTDOWN_COMP, 0, NULL);
3876 					sctp_clean_death(sctp, 0);
3877 					goto done;
3878 				}
3879 				break;
3880 			case CHUNK_ABORT: {
3881 				sctp_saddr_ipif_t *sp;
3882 
3883 				/* Ignore if delete pending */
3884 				sp = sctp_saddr_lookup(sctp, &dst, 0);
3885 				ASSERT(sp != NULL);
3886 				if (sp->saddr_ipif_delete_pending) {
3887 					BUMP_LOCAL(sctp->sctp_ibchunks);
3888 					break;
3889 				}
3890 
3891 				sctp_process_abort(sctp, ch, ECONNRESET);
3892 				goto done;
3893 			}
3894 			case CHUNK_INIT:
3895 				sctp_send_initack(sctp, sctph, ch, mp, ira);
3896 				break;
3897 			case CHUNK_COOKIE:
3898 				if (sctp_process_cookie(sctp, ch, mp, &iack,
3899 				    sctph, &recv_adaptation, NULL, ira) != -1) {
3900 					sctp_send_cookie_ack(sctp);
3901 					sctp_assoc_event(sctp, SCTP_RESTART,
3902 					    0, NULL);
3903 					if (recv_adaptation) {
3904 						sctp->sctp_recv_adaptation = 1;
3905 						sctp_adaptation_event(sctp);
3906 					}
3907 				} else {
3908 					SCTPS_BUMP_MIB(sctps,
3909 					    sctpInInvalidCookie);
3910 				}
3911 				break;
3912 			case CHUNK_ERROR: {
3913 				int error;
3914 
3915 				BUMP_LOCAL(sctp->sctp_ibchunks);
3916 				error = sctp_handle_error(sctp, sctph, ch, mp,
3917 				    ira);
3918 				if (error != 0) {
3919 					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3920 					    0, NULL);
3921 					sctp_clean_death(sctp, error);
3922 					goto done;
3923 				}
3924 				break;
3925 			}
3926 			case CHUNK_ASCONF:
3927 				ASSERT(fp != NULL);
3928 				sctp_input_asconf(sctp, ch, fp);
3929 				BUMP_LOCAL(sctp->sctp_ibchunks);
3930 				break;
3931 			case CHUNK_ASCONF_ACK:
3932 				ASSERT(fp != NULL);
3933 				sctp_faddr_alive(sctp, fp);
3934 				sctp_input_asconf_ack(sctp, ch, fp);
3935 				BUMP_LOCAL(sctp->sctp_ibchunks);
3936 				break;
3937 			case CHUNK_FORWARD_TSN:
3938 				ASSERT(fp != NULL);
3939 				sctp->sctp_lastdata = fp;
3940 				sctp_process_forward_tsn(sctp, ch, fp,
3941 				    &ipp, ira);
3942 				gotdata = 1;
3943 				BUMP_LOCAL(sctp->sctp_ibchunks);
3944 				break;
3945 			default:
3946 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
3947 					goto nomorechunks;
3948 				} /* else skip and continue processing */
3949 				break;
3950 			}
3951 			break;
3952 
3953 		case SCTPS_LISTEN:
3954 			switch (ch->sch_id) {
3955 			case CHUNK_INIT:
3956 				sctp_send_initack(sctp, sctph, ch, mp, ira);
3957 				break;
3958 			case CHUNK_COOKIE: {
3959 				sctp_t *eager;
3960 
3961 				if (sctp_process_cookie(sctp, ch, mp, &iack,
3962 				    sctph, &recv_adaptation, &peer_src,
3963 				    ira) == -1) {
3964 					SCTPS_BUMP_MIB(sctps,
3965 					    sctpInInvalidCookie);
3966 					goto done;
3967 				}
3968 
3969 				/*
3970 				 * The cookie is good; ensure that
3971 				 * the peer used the verification
3972 				 * tag from the init ack in the header.
3973 				 */
3974 				if (iack->sic_inittag != sctph->sh_verf)
3975 					goto done;
3976 
3977 				eager = sctp_conn_request(sctp, mp, ifindex,
3978 				    ip_hdr_len, iack, ira);
3979 				if (eager == NULL) {
3980 					sctp_send_abort(sctp, sctph->sh_verf,
3981 					    SCTP_ERR_NO_RESOURCES, NULL, 0, mp,
3982 					    0, B_FALSE, ira);
3983 					goto done;
3984 				}
3985 
3986 				/*
3987 				 * If there were extra chunks
3988 				 * bundled with the cookie,
3989 				 * they must be processed
3990 				 * on the eager's queue. We
3991 				 * accomplish this by refeeding
3992 				 * the whole packet into the
3993 				 * state machine on the right
3994 				 * q. The packet (mp) gets
3995 				 * there via the eager's
3996 				 * cookie_mp field (overloaded
3997 				 * with the active open role).
3998 				 * This is picked up when
3999 				 * processing the null bind
4000 				 * request put on the eager's
4001 				 * q by sctp_accept(). We must
4002 				 * first revert the cookie
4003 				 * chunk's length field to network
4004 				 * byteorder so it can be
4005 				 * properly reprocessed on the
4006 				 * eager's queue.
4007 				 */
4008 				SCTPS_BUMP_MIB(sctps, sctpPassiveEstab);
4009 				if (mlen > ntohs(ch->sch_len)) {
4010 					eager->sctp_cookie_mp = dupb(mp);
4011 					/*
4012 					 * If no mem, just let
4013 					 * the peer retransmit.
4014 					 */
4015 				}
4016 				sctp_assoc_event(eager, SCTP_COMM_UP, 0, NULL);
4017 				if (recv_adaptation) {
4018 					eager->sctp_recv_adaptation = 1;
4019 					eager->sctp_rx_adaptation_code =
4020 					    sctp->sctp_rx_adaptation_code;
4021 					sctp_adaptation_event(eager);
4022 				}
4023 
4024 				eager->sctp_active = now;
4025 				sctp_send_cookie_ack(eager);
4026 
4027 				wake_eager = B_TRUE;
4028 
4029 				/*
4030 				 * Process rest of the chunks with eager.
4031 				 */
4032 				sctp = eager;
4033 				fp = sctp_lookup_faddr(sctp, &peer_src);
4034 				/*
4035 				 * Confirm peer's original source.  fp can
4036 				 * only be NULL if peer does not use the
4037 				 * original source as one of its addresses...
4038 				 */
4039 				if (fp == NULL)
4040 					fp = sctp_lookup_faddr(sctp, &src);
4041 				else
4042 					sctp_faddr_alive(sctp, fp);
4043 
4044 				/*
4045 				 * Validate the peer addresses.  It also starts
4046 				 * the heartbeat timer.
4047 				 */
4048 				sctp_validate_peer(sctp);
4049 				break;
4050 			}
4051 			/* Anything else is considered out-of-the-blue */
4052 			case CHUNK_ERROR:
4053 			case CHUNK_ABORT:
4054 			case CHUNK_COOKIE_ACK:
4055 			case CHUNK_SHUTDOWN_COMPLETE:
4056 				BUMP_LOCAL(sctp->sctp_ibchunks);
4057 				goto done;
4058 			default:
4059 				BUMP_LOCAL(sctp->sctp_ibchunks);
4060 				sctp_send_abort(sctp, sctph->sh_verf, 0, NULL,
4061 				    0, mp, 0, B_TRUE, ira);
4062 				goto done;
4063 			}
4064 			break;
4065 
4066 		case SCTPS_COOKIE_WAIT:
4067 			switch (ch->sch_id) {
4068 			case CHUNK_INIT_ACK:
4069 				sctp_stop_faddr_timers(sctp);
4070 				sctp_faddr_alive(sctp, sctp->sctp_current);
4071 				sctp_send_cookie_echo(sctp, ch, mp, ira);
4072 				BUMP_LOCAL(sctp->sctp_ibchunks);
4073 				break;
4074 			case CHUNK_ABORT:
4075 				sctp_process_abort(sctp, ch, ECONNREFUSED);
4076 				goto done;
4077 			case CHUNK_INIT:
4078 				sctp_send_initack(sctp, sctph, ch, mp, ira);
4079 				break;
4080 			case CHUNK_COOKIE:
4081 				cr = ira->ira_cred;
4082 				cpid = ira->ira_cpid;
4083 
4084 				if (sctp_process_cookie(sctp, ch, mp, &iack,
4085 				    sctph, &recv_adaptation, NULL, ira) == -1) {
4086 					SCTPS_BUMP_MIB(sctps,
4087 					    sctpInInvalidCookie);
4088 					break;
4089 				}
4090 				sctp_send_cookie_ack(sctp);
4091 				sctp_stop_faddr_timers(sctp);
4092 				if (!SCTP_IS_DETACHED(sctp)) {
4093 					sctp->sctp_ulp_connected(
4094 					    sctp->sctp_ulpd, 0, cr, cpid);
4095 					sctp_set_ulp_prop(sctp);
4096 
4097 				}
4098 				SCTP_ASSOC_EST(sctps, sctp);
4099 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4100 				if (sctp->sctp_cookie_mp) {
4101 					freemsg(sctp->sctp_cookie_mp);
4102 					sctp->sctp_cookie_mp = NULL;
4103 				}
4104 
4105 				/* Validate the peer addresses. */
4106 				sctp->sctp_active = now;
4107 				sctp_validate_peer(sctp);
4108 
4109 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4110 				if (recv_adaptation) {
4111 					sctp->sctp_recv_adaptation = 1;
4112 					sctp_adaptation_event(sctp);
4113 				}
4114 				/* Try sending queued data, or ASCONFs */
4115 				trysend = 1;
4116 				break;
4117 			default:
4118 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4119 					goto nomorechunks;
4120 				} /* else skip and continue processing */
4121 				break;
4122 			}
4123 			break;
4124 
4125 		case SCTPS_COOKIE_ECHOED:
4126 			switch (ch->sch_id) {
4127 			case CHUNK_COOKIE_ACK:
4128 				cr = ira->ira_cred;
4129 				cpid = ira->ira_cpid;
4130 
4131 				if (!SCTP_IS_DETACHED(sctp)) {
4132 					sctp->sctp_ulp_connected(
4133 					    sctp->sctp_ulpd, 0, cr, cpid);
4134 					sctp_set_ulp_prop(sctp);
4135 				}
4136 				if (sctp->sctp_unacked == 0)
4137 					sctp_stop_faddr_timers(sctp);
4138 				SCTP_ASSOC_EST(sctps, sctp);
4139 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4140 				BUMP_LOCAL(sctp->sctp_ibchunks);
4141 				if (sctp->sctp_cookie_mp) {
4142 					freemsg(sctp->sctp_cookie_mp);
4143 					sctp->sctp_cookie_mp = NULL;
4144 				}
4145 				sctp_faddr_alive(sctp, fp);
4146 				/* Validate the peer addresses. */
4147 				sctp->sctp_active = now;
4148 				sctp_validate_peer(sctp);
4149 
4150 				/* Try sending queued data, or ASCONFs */
4151 				trysend = 1;
4152 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4153 				sctp_adaptation_event(sctp);
4154 				break;
4155 			case CHUNK_ABORT:
4156 				sctp_process_abort(sctp, ch, ECONNREFUSED);
4157 				goto done;
4158 			case CHUNK_COOKIE:
4159 				cr = ira->ira_cred;
4160 				cpid = ira->ira_cpid;
4161 
4162 				if (sctp_process_cookie(sctp, ch, mp, &iack,
4163 				    sctph, &recv_adaptation, NULL, ira) == -1) {
4164 					SCTPS_BUMP_MIB(sctps,
4165 					    sctpInInvalidCookie);
4166 					break;
4167 				}
4168 				sctp_send_cookie_ack(sctp);
4169 
4170 				if (!SCTP_IS_DETACHED(sctp)) {
4171 					sctp->sctp_ulp_connected(
4172 					    sctp->sctp_ulpd, 0, cr, cpid);
4173 					sctp_set_ulp_prop(sctp);
4174 
4175 				}
4176 				if (sctp->sctp_unacked == 0)
4177 					sctp_stop_faddr_timers(sctp);
4178 				SCTP_ASSOC_EST(sctps, sctp);
4179 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4180 				if (sctp->sctp_cookie_mp) {
4181 					freemsg(sctp->sctp_cookie_mp);
4182 					sctp->sctp_cookie_mp = NULL;
4183 				}
4184 				/* Validate the peer addresses. */
4185 				sctp->sctp_active = now;
4186 				sctp_validate_peer(sctp);
4187 
4188 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4189 				if (recv_adaptation) {
4190 					sctp->sctp_recv_adaptation = 1;
4191 					sctp_adaptation_event(sctp);
4192 				}
4193 				/* Try sending queued data, or ASCONFs */
4194 				trysend = 1;
4195 				break;
4196 			case CHUNK_INIT:
4197 				sctp_send_initack(sctp, sctph, ch, mp, ira);
4198 				break;
4199 			case CHUNK_ERROR: {
4200 				sctp_parm_hdr_t *p;
4201 
4202 				BUMP_LOCAL(sctp->sctp_ibchunks);
4203 				/* check for a stale cookie */
4204 				if (ntohs(ch->sch_len) >=
4205 				    (sizeof (*p) + sizeof (*ch)) +
4206 				    sizeof (uint32_t)) {
4207 
4208 					p = (sctp_parm_hdr_t *)(ch + 1);
4209 					if (p->sph_type ==
4210 					    htons(SCTP_ERR_STALE_COOKIE)) {
4211 						SCTPS_BUMP_MIB(sctps,
4212 						    sctpAborted);
4213 						sctp_error_event(sctp,
4214 						    ch, B_FALSE);
4215 						sctp_assoc_event(sctp,
4216 						    SCTP_COMM_LOST, 0, NULL);
4217 						sctp_clean_death(sctp,
4218 						    ECONNREFUSED);
4219 						goto done;
4220 					}
4221 				}
4222 				break;
4223 			}
4224 			case CHUNK_HEARTBEAT:
4225 				if (!hb_already) {
4226 					sctp_return_heartbeat(sctp, ch, mp);
4227 					hb_already = B_TRUE;
4228 				}
4229 				break;
4230 			default:
4231 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4232 					goto nomorechunks;
4233 				} /* else skip and continue processing */
4234 			} /* switch (ch->sch_id) */
4235 			break;
4236 
4237 		case SCTPS_SHUTDOWN_ACK_SENT:
4238 			switch (ch->sch_id) {
4239 			case CHUNK_ABORT:
4240 				/* Pass gathered wisdom to IP for keeping */
4241 				sctp_update_dce(sctp);
4242 				sctp_process_abort(sctp, ch, 0);
4243 				goto done;
4244 			case CHUNK_SHUTDOWN_COMPLETE:
4245 				BUMP_LOCAL(sctp->sctp_ibchunks);
4246 				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
4247 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4248 				    NULL);
4249 
4250 				/* Pass gathered wisdom to IP for keeping */
4251 				sctp_update_dce(sctp);
4252 				sctp_clean_death(sctp, 0);
4253 				goto done;
4254 			case CHUNK_SHUTDOWN_ACK:
4255 				sctp_shutdown_complete(sctp);
4256 				BUMP_LOCAL(sctp->sctp_ibchunks);
4257 				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
4258 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4259 				    NULL);
4260 				sctp_clean_death(sctp, 0);
4261 				goto done;
4262 			case CHUNK_COOKIE:
4263 				(void) sctp_shutdown_received(sctp, NULL,
4264 				    B_TRUE, B_FALSE, fp);
4265 				BUMP_LOCAL(sctp->sctp_ibchunks);
4266 				break;
4267 			case CHUNK_HEARTBEAT:
4268 				if (!hb_already) {
4269 					sctp_return_heartbeat(sctp, ch, mp);
4270 					hb_already = B_TRUE;
4271 				}
4272 				break;
4273 			default:
4274 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4275 					goto nomorechunks;
4276 				} /* else skip and continue processing */
4277 				break;
4278 			}
4279 			break;
4280 
4281 		case SCTPS_SHUTDOWN_RECEIVED:
4282 			switch (ch->sch_id) {
4283 			case CHUNK_SHUTDOWN:
4284 				trysend = sctp_shutdown_received(sctp, ch,
4285 				    B_FALSE, B_FALSE, fp);
4286 				/*
4287 				 * shutdown_ack_needed may have been set as
4288 				 * mentioned in the case CHUNK_SACK below.
4289 				 * If sctp_shutdown_received() above found
4290 				 * the xmit queue empty the SHUTDOWN ACK chunk
4291 				 * has already been sent (or scheduled to be
4292 				 * sent on the timer) and the SCTP state
4293 				 * changed, so reset shutdown_ack_needed.
4294 				 */
4295 				if (shutdown_ack_needed && (sctp->sctp_state ==
4296 				    SCTPS_SHUTDOWN_ACK_SENT))
4297 					shutdown_ack_needed = B_FALSE;
4298 				break;
4299 			case CHUNK_SACK:
4300 				trysend = sctp_got_sack(sctp, ch);
4301 				if (trysend < 0) {
4302 					sctp_send_abort(sctp, sctph->sh_verf,
4303 					    0, NULL, 0, mp, 0, B_FALSE, ira);
4304 					sctp_assoc_event(sctp,
4305 					    SCTP_COMM_LOST, 0, NULL);
4306 					sctp_clean_death(sctp,
4307 					    ECONNABORTED);
4308 					goto done;
4309 				}
4310 
4311 				/*
4312 				 * All data acknowledgement after a shutdown
4313 				 * should be done with SHUTDOWN chunk.
4314 				 * However some peer SCTP do not conform with
4315 				 * this and can unexpectedly send a SACK chunk.
4316 				 * If all data are acknowledged, set
4317 				 * shutdown_ack_needed here indicating that
4318 				 * SHUTDOWN ACK needs to be sent later by
4319 				 * sctp_send_shutdown_ack().
4320 				 */
4321 				if ((sctp->sctp_xmit_head == NULL) &&
4322 				    (sctp->sctp_xmit_unsent == NULL))
4323 					shutdown_ack_needed = B_TRUE;
4324 				break;
4325 			case CHUNK_ABORT:
4326 				sctp_process_abort(sctp, ch, ECONNRESET);
4327 				goto done;
4328 			case CHUNK_HEARTBEAT:
4329 				if (!hb_already) {
4330 					sctp_return_heartbeat(sctp, ch, mp);
4331 					hb_already = B_TRUE;
4332 				}
4333 				break;
4334 			default:
4335 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4336 					goto nomorechunks;
4337 				} /* else skip and continue processing */
4338 				break;
4339 			}
4340 			break;
4341 
4342 		default:
4343 			/*
4344 			 * The only remaining states are SCTPS_IDLE and
4345 			 * SCTPS_BOUND, and we should not be getting here
4346 			 * for these.
4347 			 */
4348 			ASSERT(0);
4349 		} /* switch (sctp->sctp_state) */
4350 
4351 		ch = sctp_next_chunk(ch, &mlen);
4352 		if (ch != NULL && !sctp_check_input(sctp, ch, mlen, 0))
4353 			goto done;
4354 	} while (ch != NULL);
4355 
4356 	/* Finished processing all chunks in packet */
4357 
4358 nomorechunks:
4359 
4360 	if (shutdown_ack_needed)
4361 		sctp_send_shutdown_ack(sctp, fp, B_FALSE);
4362 
4363 	/* SACK if necessary */
4364 	if (gotdata) {
4365 		boolean_t sack_sent;
4366 
4367 		(sctp->sctp_sack_toggle)++;
4368 		sack_sent = sctp_sack(sctp, dups);
4369 		dups = NULL;
4370 
4371 		/* If a SACK is sent, no need to restart the timer. */
4372 		if (!sack_sent && !sctp->sctp_ack_timer_running) {
4373 			sctp->sctp_ack_timer_running = B_TRUE;
4374 			sctp_timer(sctp, sctp->sctp_ack_mp,
4375 			    MSEC_TO_TICK(sctps->sctps_deferred_ack_interval));
4376 		}
4377 	}
4378 
4379 	if (trysend) {
4380 		sctp_output(sctp, UINT_MAX);
4381 		if (sctp->sctp_cxmit_list != NULL)
4382 			sctp_wput_asconf(sctp, NULL);
4383 	}
4384 	/*
4385 	 * If there is unsent data, make sure a timer is running, check
4386 	 * timer_mp, if sctp_closei_local() ran the timers may be free.
4387 	 */
4388 	if (sctp->sctp_unsent > 0 && !sctp->sctp_current->sf_timer_running &&
4389 	    sctp->sctp_current->sf_timer_mp != NULL) {
4390 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
4391 		    sctp->sctp_current->sf_rto);
4392 	}
4393 
4394 done:
4395 	if (dups != NULL)
4396 		freeb(dups);
4397 	freemsg(mp);
4398 
4399 	if (sctp->sctp_err_chunks != NULL)
4400 		sctp_process_err(sctp);
4401 
4402 	if (wake_eager) {
4403 		/*
4404 		 * sctp points to newly created control block, need to
4405 		 * release it before exiting.
4406 		 */
4407 		WAKE_SCTP(sctp);
4408 	}
4409 }
4410 
4411 /*
4412  * Some amount of data got removed from ULP's receive queue and we can
4413  * push messages up if we are flow controlled before.  Reset the receive
4414  * window to full capacity (conn_rcvbuf) and check if we should send a
4415  * window update.
4416  */
4417 void
4418 sctp_recvd(sctp_t *sctp, int len)
4419 {
4420 	sctp_stack_t	*sctps = sctp->sctp_sctps;
4421 	conn_t		*connp = sctp->sctp_connp;
4422 	boolean_t	send_sack = B_FALSE;
4423 
4424 	ASSERT(sctp != NULL);
4425 	RUN_SCTP(sctp);
4426 
4427 	sctp->sctp_flowctrld = B_FALSE;
4428 	/* This is the amount of data queued in ULP. */
4429 	sctp->sctp_ulp_rxqueued = connp->conn_rcvbuf - len;
4430 
4431 	if (connp->conn_rcvbuf - sctp->sctp_arwnd >= sctp->sctp_mss)
4432 		send_sack = B_TRUE;
4433 	sctp->sctp_rwnd = connp->conn_rcvbuf;
4434 
4435 	if (sctp->sctp_state >= SCTPS_ESTABLISHED && send_sack) {
4436 		sctp->sctp_force_sack = 1;
4437 		SCTPS_BUMP_MIB(sctps, sctpOutWinUpdate);
4438 		(void) sctp_sack(sctp, NULL);
4439 	}
4440 	WAKE_SCTP(sctp);
4441 }
4442