xref: /illumos-gate/usr/src/uts/common/inet/sctp/sctp_input.c (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/socket.h>
34 #include <sys/strsun.h>
35 #include <sys/strsubr.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/ip6.h>
39 #include <netinet/tcp_seq.h>
40 #include <netinet/sctp.h>
41 
42 #include <inet/common.h>
43 #include <inet/ip.h>
44 #include <inet/ip_if.h>
45 #include <inet/ip6.h>
46 #include <inet/mib2.h>
47 #include <inet/ipclassifier.h>
48 #include <inet/ipp_common.h>
49 #include <inet/ipsec_impl.h>
50 #include <inet/sctp_ip.h>
51 
52 #include "sctp_impl.h"
53 #include "sctp_asconf.h"
54 #include "sctp_addr.h"
55 
56 static struct kmem_cache *sctp_kmem_set_cache;
57 
58 /*
59  * PR-SCTP comments.
60  *
61  * When we get a valid Forward TSN chunk, we check the fragment list for this
62  * SSN and preceeding SSNs free all them. Further, if this Forward TSN causes
63  * the next expected SSN to be present in the stream queue, we deliver any
64  * such stranded messages upstream. We also update the SACK info. appropriately.
65  * When checking for advancing the cumulative ack (in sctp_cumack()) we must
66  * check for abandoned chunks and messages. While traversing the tramsmit
67  * list if we come across an abandoned chunk, we can skip the message (i.e.
68  * take it out of the (re)transmit list) since this message, and hence this
69  * chunk, has been marked abandoned by sctp_rexmit(). If we come across an
70  * unsent chunk for a message this now abandoned we need to check if a
71  * Forward TSN needs to be sent, this could be a case where we deferred sending
72  * a Forward TSN in sctp_get_msg_to_send(). Further, after processing a
73  * SACK we check if the Advanced peer ack point can be moved ahead, i.e.
74  * if we can send a Forward TSN via sctp_check_abandoned_data().
75  */
76 void
77 sctp_free_set(sctp_set_t *s)
78 {
79 	sctp_set_t *p;
80 
81 	while (s) {
82 		p = s->next;
83 		kmem_cache_free(sctp_kmem_set_cache, s);
84 		s = p;
85 	}
86 }
87 
88 static void
89 sctp_ack_add(sctp_set_t **head, uint32_t tsn, int *num)
90 {
91 	sctp_set_t *p, *t;
92 
93 	if (head == NULL || num == NULL)
94 		return;
95 
96 	ASSERT(*num >= 0);
97 	ASSERT((*num == 0 && *head == NULL) || (*num > 0 && *head != NULL));
98 
99 	if (*head == NULL) {
100 		*head = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
101 		if (*head == NULL)
102 			return;
103 		(*head)->prev = (*head)->next = NULL;
104 		(*head)->begin = tsn;
105 		(*head)->end = tsn;
106 		*num = 1;
107 		return;
108 	}
109 
110 	ASSERT((*head)->prev == NULL);
111 
112 	/*
113 	 * Handle this special case here so we don't have to check
114 	 * for it each time in the loop.
115 	 */
116 	if (SEQ_LT(tsn + 1, (*head)->begin)) {
117 		/* add a new set, and move the head pointer */
118 		t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
119 		if (t == NULL)
120 			return;
121 		t->next = *head;
122 		t->prev = NULL;
123 		(*head)->prev = t;
124 		t->begin = tsn;
125 		t->end = tsn;
126 		(*num)++;
127 		*head = t;
128 		return;
129 	}
130 
131 	/*
132 	 * We need to handle the following cases, where p points to
133 	 * the current set (as we walk through the loop):
134 	 *
135 	 * 1. tsn is entirely less than p; create a new set before p.
136 	 * 2. tsn borders p from less; coalesce p with tsn.
137 	 * 3. tsn is withing p; do nothing.
138 	 * 4. tsn borders p from greater; coalesce p with tsn.
139 	 * 4a. p may now border p->next from less; if so, coalesce those
140 	 *    two sets.
141 	 * 5. tsn is entirely greater then all sets; add a new set at
142 	 *    the end.
143 	 */
144 	for (p = *head; ; p = p->next) {
145 		if (SEQ_LT(tsn + 1, p->begin)) {
146 			/* 1: add a new set before p. */
147 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
148 			if (t == NULL)
149 				return;
150 			t->next = p;
151 			t->prev = NULL;
152 			t->begin = tsn;
153 			t->end = tsn;
154 			if (p->prev) {
155 				t->prev = p->prev;
156 				p->prev->next = t;
157 			}
158 			p->prev = t;
159 			(*num)++;
160 			return;
161 		}
162 
163 		if ((tsn + 1) == p->begin) {
164 			/* 2: adjust p->begin */
165 			p->begin = tsn;
166 			return;
167 		}
168 
169 		if (SEQ_GEQ(tsn, p->begin) && SEQ_LEQ(tsn, p->end)) {
170 			/* 3; do nothing */
171 			return;
172 		}
173 
174 		if ((p->end + 1) == tsn) {
175 			/* 4; adjust p->end */
176 			p->end = tsn;
177 
178 			if (p->next != NULL && (tsn + 1) == p->next->begin) {
179 				/* 4a: coalesce p and p->next */
180 				t = p->next;
181 				p->end = t->end;
182 				p->next = t->next;
183 				if (t->next != NULL)
184 					t->next->prev = p;
185 				kmem_cache_free(sctp_kmem_set_cache, t);
186 				(*num)--;
187 			}
188 			return;
189 		}
190 
191 		if (p->next == NULL) {
192 			/* 5: add new set at the end */
193 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
194 			if (t == NULL)
195 				return;
196 			t->next = NULL;
197 			t->prev = p;
198 			t->begin = tsn;
199 			t->end = tsn;
200 			p->next = t;
201 			(*num)++;
202 			return;
203 		}
204 
205 		if (SEQ_GT(tsn, p->end + 1))
206 			continue;
207 	}
208 }
209 
210 static void
211 sctp_ack_rem(sctp_set_t **head, uint32_t end, int *num)
212 {
213 	sctp_set_t *p, *t;
214 
215 	if (head == NULL || *head == NULL || num == NULL)
216 		return;
217 
218 	/* Nothing to remove */
219 	if (SEQ_LT(end, (*head)->begin))
220 		return;
221 
222 	/* Find out where to start removing sets */
223 	for (p = *head; p->next; p = p->next) {
224 		if (SEQ_LEQ(end, p->end))
225 			break;
226 	}
227 
228 	if (SEQ_LT(end, p->end) && SEQ_GEQ(end, p->begin)) {
229 		/* adjust p */
230 		p->begin = end + 1;
231 		/* all done */
232 		if (p == *head)
233 			return;
234 	} else if (SEQ_GEQ(end, p->end)) {
235 		/* remove this set too */
236 		p = p->next;
237 	}
238 
239 	/* unlink everything before this set */
240 	t = *head;
241 	*head = p;
242 	if (p != NULL && p->prev != NULL) {
243 		p->prev->next = NULL;
244 		p->prev = NULL;
245 	}
246 
247 	sctp_free_set(t);
248 
249 	/* recount the number of sets */
250 	*num = 0;
251 
252 	for (p = *head; p != NULL; p = p->next)
253 		(*num)++;
254 }
255 
256 void
257 sctp_sets_init()
258 {
259 	sctp_kmem_set_cache = kmem_cache_create("sctp_set_cache",
260 	    sizeof (sctp_set_t), 0, NULL, NULL, NULL, NULL,
261 	    NULL, 0);
262 }
263 
264 void
265 sctp_sets_fini()
266 {
267 	kmem_cache_destroy(sctp_kmem_set_cache);
268 }
269 
270 sctp_chunk_hdr_t *
271 sctp_first_chunk(uchar_t *rptr, ssize_t remaining)
272 {
273 	sctp_chunk_hdr_t *ch;
274 	uint16_t ch_len;
275 
276 	if (remaining < sizeof (*ch)) {
277 		return (NULL);
278 	}
279 
280 	ch = (sctp_chunk_hdr_t *)rptr;
281 	ch_len = ntohs(ch->sch_len);
282 
283 	if (ch_len < sizeof (*ch) || remaining < ch_len) {
284 		return (NULL);
285 	}
286 
287 	return (ch);
288 }
289 
290 sctp_chunk_hdr_t *
291 sctp_next_chunk(sctp_chunk_hdr_t *ch, ssize_t *remaining)
292 {
293 	int pad;
294 	uint16_t ch_len;
295 
296 	if (!ch) {
297 		return (NULL);
298 	}
299 
300 	ch_len = ntohs(ch->sch_len);
301 
302 	if ((pad = ch_len & (SCTP_ALIGN - 1)) != 0) {
303 		pad = SCTP_ALIGN - pad;
304 	}
305 
306 	*remaining -= (ch_len + pad);
307 	ch = (sctp_chunk_hdr_t *)((char *)ch + ch_len + pad);
308 
309 	return (sctp_first_chunk((uchar_t *)ch, *remaining));
310 }
311 
312 /*
313  * Attach ancillary data to a received SCTP segments.
314  * If the source address (fp) is not the primary, send up a
315  * unitdata_ind so recvfrom() can populate the msg_name field.
316  * If ancillary data is also requested, we append it to the
317  * unitdata_req. Otherwise, we just send up an optdata_ind.
318  */
319 static int
320 sctp_input_add_ancillary(sctp_t *sctp, mblk_t **mp, sctp_data_hdr_t *dcp,
321     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
322 {
323 	struct T_unitdata_ind	*tudi;
324 	int			optlen;
325 	int			hdrlen;
326 	uchar_t			*optptr;
327 	struct cmsghdr		*cmsg;
328 	mblk_t			*mp1;
329 	struct sockaddr_in6	sin_buf[1];
330 	struct sockaddr_in6	*sin6;
331 	struct sockaddr_in	*sin4;
332 	crb_t			 addflag;	/* Which pieces to add */
333 	conn_t			*connp = sctp->sctp_connp;
334 
335 	sin4 = NULL;
336 	sin6 = NULL;
337 
338 	optlen = hdrlen = 0;
339 	addflag.crb_all = 0;
340 
341 	/* Figure out address size */
342 	if (connp->conn_family == AF_INET) {
343 		sin4 = (struct sockaddr_in *)sin_buf;
344 		sin4->sin_family = AF_INET;
345 		sin4->sin_port = connp->conn_fport;
346 		IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, sin4->sin_addr.s_addr);
347 		hdrlen = sizeof (*tudi) + sizeof (*sin4);
348 	} else {
349 		sin6 = sin_buf;
350 		sin6->sin6_family = AF_INET6;
351 		sin6->sin6_port = connp->conn_fport;
352 		sin6->sin6_addr = fp->sf_faddr;
353 		hdrlen = sizeof (*tudi) + sizeof (*sin6);
354 	}
355 	/* If app asked to receive send / recv info */
356 	if (sctp->sctp_recvsndrcvinfo)
357 		optlen += sizeof (*cmsg) + sizeof (struct sctp_sndrcvinfo);
358 
359 	if (connp->conn_recv_ancillary.crb_all == 0)
360 		goto noancillary;
361 
362 	if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
363 	    ira->ira_ruifindex != sctp->sctp_recvifindex) {
364 		optlen += sizeof (*cmsg) + sizeof (struct in6_pktinfo);
365 		if (hdrlen == 0)
366 			hdrlen = sizeof (struct T_unitdata_ind);
367 		addflag.crb_ip_recvpktinfo = 1;
368 	}
369 	/* If app asked for hoplimit and it has changed ... */
370 	if (connp->conn_recv_ancillary.crb_ipv6_recvhoplimit &&
371 	    ipp->ipp_hoplimit != sctp->sctp_recvhops) {
372 		optlen += sizeof (*cmsg) + sizeof (uint_t);
373 		if (hdrlen == 0)
374 			hdrlen = sizeof (struct T_unitdata_ind);
375 		addflag.crb_ipv6_recvhoplimit = 1;
376 	}
377 	/* If app asked for tclass and it has changed ... */
378 	if (connp->conn_recv_ancillary.crb_ipv6_recvtclass &&
379 	    ipp->ipp_tclass != sctp->sctp_recvtclass) {
380 		optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
381 		if (hdrlen == 0)
382 			hdrlen = sizeof (struct T_unitdata_ind);
383 		addflag.crb_ipv6_recvtclass = 1;
384 	}
385 	/* If app asked for hopbyhop headers and it has changed ... */
386 	if (connp->conn_recv_ancillary.crb_ipv6_recvhopopts &&
387 	    ip_cmpbuf(sctp->sctp_hopopts, sctp->sctp_hopoptslen,
388 	    (ipp->ipp_fields & IPPF_HOPOPTS),
389 	    ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
390 		optlen += sizeof (*cmsg) + ipp->ipp_hopoptslen -
391 		    sctp->sctp_v6label_len;
392 		if (hdrlen == 0)
393 			hdrlen = sizeof (struct T_unitdata_ind);
394 		addflag.crb_ipv6_recvhopopts = 1;
395 		if (!ip_allocbuf((void **)&sctp->sctp_hopopts,
396 		    &sctp->sctp_hopoptslen,
397 		    (ipp->ipp_fields & IPPF_HOPOPTS),
398 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen))
399 			return (-1);
400 	}
401 	/* If app asked for dst headers before routing headers ... */
402 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts &&
403 	    ip_cmpbuf(sctp->sctp_rthdrdstopts, sctp->sctp_rthdrdstoptslen,
404 	    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
405 	    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen)) {
406 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
407 		if (hdrlen == 0)
408 			hdrlen = sizeof (struct T_unitdata_ind);
409 		addflag.crb_ipv6_recvrthdrdstopts = 1;
410 		if (!ip_allocbuf((void **)&sctp->sctp_rthdrdstopts,
411 		    &sctp->sctp_rthdrdstoptslen,
412 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
413 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen))
414 			return (-1);
415 	}
416 	/* If app asked for routing headers and it has changed ... */
417 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdr &&
418 	    ip_cmpbuf(sctp->sctp_rthdr, sctp->sctp_rthdrlen,
419 	    (ipp->ipp_fields & IPPF_RTHDR),
420 	    ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
421 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrlen;
422 		if (hdrlen == 0)
423 			hdrlen = sizeof (struct T_unitdata_ind);
424 		addflag.crb_ipv6_recvrthdr = 1;
425 		if (!ip_allocbuf((void **)&sctp->sctp_rthdr,
426 		    &sctp->sctp_rthdrlen,
427 		    (ipp->ipp_fields & IPPF_RTHDR),
428 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen))
429 			return (-1);
430 	}
431 	/* If app asked for dest headers and it has changed ... */
432 	if (connp->conn_recv_ancillary.crb_ipv6_recvdstopts &&
433 	    ip_cmpbuf(sctp->sctp_dstopts, sctp->sctp_dstoptslen,
434 	    (ipp->ipp_fields & IPPF_DSTOPTS),
435 	    ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
436 		optlen += sizeof (*cmsg) + ipp->ipp_dstoptslen;
437 		if (hdrlen == 0)
438 			hdrlen = sizeof (struct T_unitdata_ind);
439 		addflag.crb_ipv6_recvdstopts = 1;
440 		if (!ip_allocbuf((void **)&sctp->sctp_dstopts,
441 		    &sctp->sctp_dstoptslen,
442 		    (ipp->ipp_fields & IPPF_DSTOPTS),
443 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen))
444 			return (-1);
445 	}
446 noancillary:
447 	/* Nothing to add */
448 	if (hdrlen == 0)
449 		return (-1);
450 
451 	mp1 = allocb(hdrlen + optlen + sizeof (void *), BPRI_MED);
452 	if (mp1 == NULL)
453 		return (-1);
454 	mp1->b_cont = *mp;
455 	*mp = mp1;
456 	mp1->b_rptr += sizeof (void *);  /* pointer worth of padding */
457 	mp1->b_wptr = mp1->b_rptr + hdrlen + optlen;
458 	DB_TYPE(mp1) = M_PROTO;
459 	tudi = (struct T_unitdata_ind *)mp1->b_rptr;
460 	tudi->PRIM_type = T_UNITDATA_IND;
461 	tudi->SRC_length = sin4 ? sizeof (*sin4) : sizeof (*sin6);
462 	tudi->SRC_offset = sizeof (*tudi);
463 	tudi->OPT_offset = sizeof (*tudi) + tudi->SRC_length;
464 	tudi->OPT_length = optlen;
465 	if (sin4) {
466 		bcopy(sin4, tudi + 1, sizeof (*sin4));
467 	} else {
468 		bcopy(sin6, tudi + 1, sizeof (*sin6));
469 	}
470 	optptr = (uchar_t *)tudi + tudi->OPT_offset;
471 
472 	if (sctp->sctp_recvsndrcvinfo) {
473 		/* XXX need backout method if memory allocation fails. */
474 		struct sctp_sndrcvinfo *sri;
475 
476 		cmsg = (struct cmsghdr *)optptr;
477 		cmsg->cmsg_level = IPPROTO_SCTP;
478 		cmsg->cmsg_type = SCTP_SNDRCV;
479 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*sri);
480 		optptr += sizeof (*cmsg);
481 
482 		sri = (struct sctp_sndrcvinfo *)(cmsg + 1);
483 		ASSERT(OK_32PTR(sri));
484 		sri->sinfo_stream = ntohs(dcp->sdh_sid);
485 		sri->sinfo_ssn = ntohs(dcp->sdh_ssn);
486 		if (SCTP_DATA_GET_UBIT(dcp)) {
487 			sri->sinfo_flags = MSG_UNORDERED;
488 		} else {
489 			sri->sinfo_flags = 0;
490 		}
491 		sri->sinfo_ppid = dcp->sdh_payload_id;
492 		sri->sinfo_context = 0;
493 		sri->sinfo_timetolive = 0;
494 		sri->sinfo_tsn = ntohl(dcp->sdh_tsn);
495 		sri->sinfo_cumtsn = sctp->sctp_ftsn;
496 		sri->sinfo_assoc_id = 0;
497 
498 		optptr += sizeof (*sri);
499 	}
500 
501 	/*
502 	 * If app asked for pktinfo and the index has changed ...
503 	 * Note that the local address never changes for the connection.
504 	 */
505 	if (addflag.crb_ip_recvpktinfo) {
506 		struct in6_pktinfo *pkti;
507 		uint_t ifindex;
508 
509 		ifindex = ira->ira_ruifindex;
510 		cmsg = (struct cmsghdr *)optptr;
511 		cmsg->cmsg_level = IPPROTO_IPV6;
512 		cmsg->cmsg_type = IPV6_PKTINFO;
513 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*pkti);
514 		optptr += sizeof (*cmsg);
515 
516 		pkti = (struct in6_pktinfo *)optptr;
517 		if (connp->conn_family == AF_INET6)
518 			pkti->ipi6_addr = sctp->sctp_ip6h->ip6_src;
519 		else
520 			IN6_IPADDR_TO_V4MAPPED(sctp->sctp_ipha->ipha_src,
521 			    &pkti->ipi6_addr);
522 
523 		pkti->ipi6_ifindex = ifindex;
524 		optptr += sizeof (*pkti);
525 		ASSERT(OK_32PTR(optptr));
526 		/* Save as "last" value */
527 		sctp->sctp_recvifindex = ifindex;
528 	}
529 	/* If app asked for hoplimit and it has changed ... */
530 	if (addflag.crb_ipv6_recvhoplimit) {
531 		cmsg = (struct cmsghdr *)optptr;
532 		cmsg->cmsg_level = IPPROTO_IPV6;
533 		cmsg->cmsg_type = IPV6_HOPLIMIT;
534 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
535 		optptr += sizeof (*cmsg);
536 
537 		*(uint_t *)optptr = ipp->ipp_hoplimit;
538 		optptr += sizeof (uint_t);
539 		ASSERT(OK_32PTR(optptr));
540 		/* Save as "last" value */
541 		sctp->sctp_recvhops = ipp->ipp_hoplimit;
542 	}
543 	/* If app asked for tclass and it has changed ... */
544 	if (addflag.crb_ipv6_recvtclass) {
545 		cmsg = (struct cmsghdr *)optptr;
546 		cmsg->cmsg_level = IPPROTO_IPV6;
547 		cmsg->cmsg_type = IPV6_TCLASS;
548 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
549 		optptr += sizeof (*cmsg);
550 
551 		*(uint_t *)optptr = ipp->ipp_tclass;
552 		optptr += sizeof (uint_t);
553 		ASSERT(OK_32PTR(optptr));
554 		/* Save as "last" value */
555 		sctp->sctp_recvtclass = ipp->ipp_tclass;
556 	}
557 	if (addflag.crb_ipv6_recvhopopts) {
558 		cmsg = (struct cmsghdr *)optptr;
559 		cmsg->cmsg_level = IPPROTO_IPV6;
560 		cmsg->cmsg_type = IPV6_HOPOPTS;
561 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_hopoptslen;
562 		optptr += sizeof (*cmsg);
563 
564 		bcopy(ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
565 		optptr += ipp->ipp_hopoptslen;
566 		ASSERT(OK_32PTR(optptr));
567 		/* Save as last value */
568 		ip_savebuf((void **)&sctp->sctp_hopopts,
569 		    &sctp->sctp_hopoptslen,
570 		    (ipp->ipp_fields & IPPF_HOPOPTS),
571 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen);
572 	}
573 	if (addflag.crb_ipv6_recvrthdrdstopts) {
574 		cmsg = (struct cmsghdr *)optptr;
575 		cmsg->cmsg_level = IPPROTO_IPV6;
576 		cmsg->cmsg_type = IPV6_RTHDRDSTOPTS;
577 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
578 		optptr += sizeof (*cmsg);
579 
580 		bcopy(ipp->ipp_rthdrdstopts, optptr, ipp->ipp_rthdrdstoptslen);
581 		optptr += ipp->ipp_rthdrdstoptslen;
582 		ASSERT(OK_32PTR(optptr));
583 		/* Save as last value */
584 		ip_savebuf((void **)&sctp->sctp_rthdrdstopts,
585 		    &sctp->sctp_rthdrdstoptslen,
586 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
587 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen);
588 	}
589 	if (addflag.crb_ipv6_recvrthdr) {
590 		cmsg = (struct cmsghdr *)optptr;
591 		cmsg->cmsg_level = IPPROTO_IPV6;
592 		cmsg->cmsg_type = IPV6_RTHDR;
593 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrlen;
594 		optptr += sizeof (*cmsg);
595 
596 		bcopy(ipp->ipp_rthdr, optptr, ipp->ipp_rthdrlen);
597 		optptr += ipp->ipp_rthdrlen;
598 		ASSERT(OK_32PTR(optptr));
599 		/* Save as last value */
600 		ip_savebuf((void **)&sctp->sctp_rthdr,
601 		    &sctp->sctp_rthdrlen,
602 		    (ipp->ipp_fields & IPPF_RTHDR),
603 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen);
604 	}
605 	if (addflag.crb_ipv6_recvdstopts) {
606 		cmsg = (struct cmsghdr *)optptr;
607 		cmsg->cmsg_level = IPPROTO_IPV6;
608 		cmsg->cmsg_type = IPV6_DSTOPTS;
609 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_dstoptslen;
610 		optptr += sizeof (*cmsg);
611 
612 		bcopy(ipp->ipp_dstopts, optptr, ipp->ipp_dstoptslen);
613 		optptr += ipp->ipp_dstoptslen;
614 		ASSERT(OK_32PTR(optptr));
615 		/* Save as last value */
616 		ip_savebuf((void **)&sctp->sctp_dstopts,
617 		    &sctp->sctp_dstoptslen,
618 		    (ipp->ipp_fields & IPPF_DSTOPTS),
619 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen);
620 	}
621 
622 	ASSERT(optptr == mp1->b_wptr);
623 
624 	return (0);
625 }
626 
627 void
628 sctp_free_reass(sctp_instr_t *sip)
629 {
630 	mblk_t *mp, *mpnext, *mctl;
631 #ifdef	DEBUG
632 	sctp_reass_t	*srp;
633 #endif
634 
635 	for (mp = sip->istr_reass; mp != NULL; mp = mpnext) {
636 		mpnext = mp->b_next;
637 		mp->b_next = NULL;
638 		mp->b_prev = NULL;
639 		if (DB_TYPE(mp) == M_CTL) {
640 			mctl = mp;
641 #ifdef	DEBUG
642 			srp = (sctp_reass_t *)DB_BASE(mctl);
643 			/* Partial delivery can leave empty srp */
644 			ASSERT(mp->b_cont != NULL || srp->sr_got == 0);
645 #endif
646 			mp = mp->b_cont;
647 			mctl->b_cont = NULL;
648 			freeb(mctl);
649 		}
650 		freemsg(mp);
651 	}
652 	sip->istr_reass = NULL;
653 }
654 
655 /*
656  * If the series of data fragments of which dmp is a part is successfully
657  * reassembled, the first mblk in the series is returned. dc is adjusted
658  * to point at the data chunk in the lead mblk, and b_rptr also points to
659  * the data chunk; the following mblk's b_rptr's point at the actual payload.
660  *
661  * If the series is not yet reassembled, NULL is returned. dc is not changed.
662  * XXX should probably move this up into the state machine.
663  */
664 
665 /* Fragment list for un-ordered messages. Partial delivery is not supported */
666 static mblk_t *
667 sctp_uodata_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc)
668 {
669 	mblk_t		*hmp;
670 	mblk_t		*begin = NULL;
671 	mblk_t		*end = NULL;
672 	sctp_data_hdr_t	*qdc;
673 	uint32_t	ntsn;
674 	uint32_t	tsn = ntohl((*dc)->sdh_tsn);
675 #ifdef	DEBUG
676 	mblk_t		*mp1;
677 #endif
678 
679 	/* First frag. */
680 	if (sctp->sctp_uo_frags == NULL) {
681 		sctp->sctp_uo_frags = dmp;
682 		return (NULL);
683 	}
684 	hmp = sctp->sctp_uo_frags;
685 	/*
686 	 * Insert the segment according to the TSN, fragmented unordered
687 	 * chunks are sequenced by TSN.
688 	 */
689 	while (hmp != NULL) {
690 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
691 		ntsn = ntohl(qdc->sdh_tsn);
692 		if (SEQ_GT(ntsn, tsn)) {
693 			if (hmp->b_prev == NULL) {
694 				dmp->b_next = hmp;
695 				hmp->b_prev = dmp;
696 				sctp->sctp_uo_frags = dmp;
697 			} else {
698 				dmp->b_next = hmp;
699 				dmp->b_prev = hmp->b_prev;
700 				hmp->b_prev->b_next = dmp;
701 				hmp->b_prev = dmp;
702 			}
703 			break;
704 		}
705 		if (hmp->b_next == NULL) {
706 			hmp->b_next = dmp;
707 			dmp->b_prev = hmp;
708 			break;
709 		}
710 		hmp = hmp->b_next;
711 	}
712 	/* check if we completed a msg */
713 	if (SCTP_DATA_GET_BBIT(*dc)) {
714 		begin = dmp;
715 	} else if (SCTP_DATA_GET_EBIT(*dc)) {
716 		end = dmp;
717 	}
718 	/*
719 	 * We walk consecutive TSNs backwards till we get a seg. with
720 	 * the B bit
721 	 */
722 	if (begin == NULL) {
723 		for (hmp = dmp->b_prev; hmp != NULL; hmp = hmp->b_prev) {
724 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
725 			ntsn = ntohl(qdc->sdh_tsn);
726 			if ((int32_t)(tsn - ntsn) > 1) {
727 				return (NULL);
728 			}
729 			if (SCTP_DATA_GET_BBIT(qdc)) {
730 				begin = hmp;
731 				break;
732 			}
733 			tsn = ntsn;
734 		}
735 	}
736 	tsn = ntohl((*dc)->sdh_tsn);
737 	/*
738 	 * We walk consecutive TSNs till we get a seg. with the E bit
739 	 */
740 	if (end == NULL) {
741 		for (hmp = dmp->b_next; hmp != NULL; hmp = hmp->b_next) {
742 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
743 			ntsn = ntohl(qdc->sdh_tsn);
744 			if ((int32_t)(ntsn - tsn) > 1) {
745 				return (NULL);
746 			}
747 			if (SCTP_DATA_GET_EBIT(qdc)) {
748 				end = hmp;
749 				break;
750 			}
751 			tsn = ntsn;
752 		}
753 	}
754 	if (begin == NULL || end == NULL) {
755 		return (NULL);
756 	}
757 	/* Got one!, Remove the msg from the list */
758 	if (sctp->sctp_uo_frags == begin) {
759 		ASSERT(begin->b_prev == NULL);
760 		sctp->sctp_uo_frags = end->b_next;
761 		if (end->b_next != NULL)
762 			end->b_next->b_prev = NULL;
763 	} else {
764 		begin->b_prev->b_next = end->b_next;
765 		if (end->b_next != NULL)
766 			end->b_next->b_prev = begin->b_prev;
767 	}
768 	begin->b_prev = NULL;
769 	end->b_next = NULL;
770 
771 	/*
772 	 * Null out b_next and b_prev and chain using b_cont.
773 	 */
774 	dmp = end = begin;
775 	hmp = begin->b_next;
776 	*dc = (sctp_data_hdr_t *)begin->b_rptr;
777 	begin->b_next = NULL;
778 	while (hmp != NULL) {
779 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
780 		hmp->b_rptr = (uchar_t *)(qdc + 1);
781 		end = hmp->b_next;
782 		dmp->b_cont = hmp;
783 		dmp = hmp;
784 
785 		if (end != NULL)
786 			hmp->b_next = NULL;
787 		hmp->b_prev = NULL;
788 		hmp = end;
789 	}
790 	BUMP_LOCAL(sctp->sctp_reassmsgs);
791 #ifdef	DEBUG
792 	mp1 = begin;
793 	while (mp1 != NULL) {
794 		ASSERT(mp1->b_next == NULL);
795 		ASSERT(mp1->b_prev == NULL);
796 		mp1 = mp1->b_cont;
797 	}
798 #endif
799 	return (begin);
800 }
801 
802 /*
803  * Try partial delivery.
804  */
805 static mblk_t *
806 sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
807     sctp_data_hdr_t **dc)
808 {
809 	mblk_t		*mp;
810 	mblk_t		*dmp;
811 	mblk_t		*qmp;
812 	mblk_t		*prev;
813 	sctp_data_hdr_t	*qdc;
814 	uint32_t	tsn;
815 
816 	ASSERT(DB_TYPE(hmp) == M_CTL);
817 
818 	dprint(4, ("trypartial: got=%d, needed=%d\n",
819 	    (int)(srp->sr_got), (int)(srp->sr_needed)));
820 
821 	mp = hmp->b_cont;
822 	qdc = (sctp_data_hdr_t *)mp->b_rptr;
823 
824 	ASSERT(SCTP_DATA_GET_BBIT(qdc) && srp->sr_hasBchunk);
825 
826 	tsn = ntohl(qdc->sdh_tsn) + 1;
827 
828 	/*
829 	 * This loop has two exit conditions: the
830 	 * end of received chunks has been reached, or
831 	 * there is a break in the sequence. We want
832 	 * to chop the reassembly list as follows (the
833 	 * numbers are TSNs):
834 	 *   10 -> 11 ->	(end of chunks)
835 	 *   10 -> 11 -> | 13   (break in sequence)
836 	 */
837 	prev = mp;
838 	mp = mp->b_cont;
839 	while (mp != NULL) {
840 		qdc = (sctp_data_hdr_t *)mp->b_rptr;
841 		if (ntohl(qdc->sdh_tsn) != tsn)
842 			break;
843 		prev = mp;
844 		mp = mp->b_cont;
845 		tsn++;
846 	}
847 	/*
848 	 * We are sending all the fragments upstream, we have to retain
849 	 * the srp info for further fragments.
850 	 */
851 	if (mp == NULL) {
852 		dmp = hmp->b_cont;
853 		hmp->b_cont = NULL;
854 		srp->sr_nexttsn = tsn;
855 		srp->sr_msglen = 0;
856 		srp->sr_needed = 0;
857 		srp->sr_got = 0;
858 		srp->sr_tail = NULL;
859 	} else {
860 		/*
861 		 * There is a gap then some ordered frags which are not
862 		 * the next deliverable tsn. When the next deliverable
863 		 * frag arrives it will be set as the new list head in
864 		 * sctp_data_frag() by setting the B bit.
865 		 */
866 		dmp = hmp->b_cont;
867 		hmp->b_cont = mp;
868 	}
869 	srp->sr_hasBchunk = B_FALSE;
870 	/*
871 	 * mp now points at the last chunk in the sequence,
872 	 * and prev points to mp's previous in the list.
873 	 * We chop the list at prev. Subsequent fragment
874 	 * deliveries will follow the normal reassembly
875 	 * path unless they too exceed the sctp_pd_point.
876 	 */
877 	prev->b_cont = NULL;
878 	srp->sr_partial_delivered = B_TRUE;
879 
880 	dprint(4, ("trypartial: got some, got=%d, needed=%d\n",
881 	    (int)(srp->sr_got), (int)(srp->sr_needed)));
882 
883 	/*
884 	 * Adjust all mblk's except the lead so their rptr's point to the
885 	 * payload. sctp_data_chunk() will need to process the lead's
886 	 * data chunk section, so leave it's rptr pointing at the data chunk.
887 	 */
888 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
889 	if (srp->sr_tail != NULL) {
890 		srp->sr_got--;
891 		ASSERT(srp->sr_got != 0);
892 		if (srp->sr_needed != 0) {
893 			srp->sr_needed--;
894 			ASSERT(srp->sr_needed != 0);
895 		}
896 		srp->sr_msglen -= ntohs((*dc)->sdh_len);
897 	}
898 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
899 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
900 		qmp->b_rptr = (uchar_t *)(qdc + 1);
901 
902 		/*
903 		 * Deduct the balance from got and needed here, now that
904 		 * we know we are actually delivering these data.
905 		 */
906 		if (srp->sr_tail != NULL) {
907 			srp->sr_got--;
908 			ASSERT(srp->sr_got != 0);
909 			if (srp->sr_needed != 0) {
910 				srp->sr_needed--;
911 				ASSERT(srp->sr_needed != 0);
912 			}
913 			srp->sr_msglen -= ntohs(qdc->sdh_len);
914 		}
915 	}
916 	ASSERT(srp->sr_msglen == 0);
917 	BUMP_LOCAL(sctp->sctp_reassmsgs);
918 
919 	return (dmp);
920 }
921 
922 /*
923  * Handle received fragments for ordered delivery to upper layer protocol.
924  * Manage the per message reassembly queue and if this fragment completes
925  * reassembly of the message, or qualifies the already reassembled data
926  * for partial delivery, prepare the message for delivery upstream.
927  *
928  * tpfinished in the caller remains set only when the incoming fragment
929  * has completed the reassembly of the message associated with its ssn.
930  */
931 static mblk_t *
932 sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
933     sctp_instr_t *sip, boolean_t *tpfinished)
934 {
935 	mblk_t		*reassq_curr, *reassq_next, *reassq_prev;
936 	mblk_t		*new_reassq;
937 	mblk_t		*qmp;
938 	mblk_t		*first_mp;
939 	sctp_reass_t	*srp;
940 	sctp_data_hdr_t	*qdc;
941 	sctp_data_hdr_t	*bdc;
942 	sctp_data_hdr_t	*edc;
943 	uint32_t	tsn;
944 	uint16_t	fraglen = 0;
945 
946 	reassq_curr = NULL;
947 	*error = 0;
948 
949 	/*
950 	 * Find the reassembly queue for this data chunk, if none
951 	 * yet exists, a new per message queue will be created and
952 	 * appended to the end of the list of per message queues.
953 	 *
954 	 * sip points on sctp_instr_t representing instream messages
955 	 * as yet undelivered for this stream (sid) of the association.
956 	 */
957 	reassq_next = reassq_prev = sip->istr_reass;
958 	for (; reassq_next != NULL; reassq_next = reassq_next->b_next) {
959 		srp = (sctp_reass_t *)DB_BASE(reassq_next);
960 		if (ntohs((*dc)->sdh_ssn) == srp->sr_ssn) {
961 			reassq_curr = reassq_next;
962 			goto foundit;
963 		} else if (SSN_GT(srp->sr_ssn, ntohs((*dc)->sdh_ssn)))
964 			break;
965 		reassq_prev = reassq_next;
966 	}
967 
968 	/*
969 	 * First fragment of this message received, allocate a M_CTL that
970 	 * will head the reassembly queue for this message. The message
971 	 * and all its fragments are identified by having the same ssn.
972 	 *
973 	 * Arriving fragments will be inserted in tsn order on the
974 	 * reassembly queue for this message (ssn), linked by b_cont.
975 	 */
976 	if ((new_reassq = allocb(sizeof (*srp), BPRI_MED)) == NULL) {
977 		*error = ENOMEM;
978 		return (NULL);
979 	}
980 	DB_TYPE(new_reassq) = M_CTL;
981 	srp = (sctp_reass_t *)DB_BASE(new_reassq);
982 	new_reassq->b_cont = dmp;
983 
984 	/*
985 	 * All per ssn reassembly queues, (one for each message) on
986 	 * this stream are doubly linked by b_next/b_prev back to the
987 	 * instr_reass of the instream structure associated with this
988 	 * stream id, (sip is initialized as sctp->sctp_instr[sid]).
989 	 * Insert the new reassembly queue in the correct (ssn) order.
990 	 */
991 	if (reassq_next != NULL) {
992 		if (sip->istr_reass == reassq_next) {
993 			/* head insertion */
994 			sip->istr_reass = new_reassq;
995 			new_reassq->b_next = reassq_next;
996 			new_reassq->b_prev = NULL;
997 			reassq_next->b_prev = new_reassq;
998 		} else {
999 			/* mid queue insertion */
1000 			reassq_prev->b_next = new_reassq;
1001 			new_reassq->b_prev = reassq_prev;
1002 			new_reassq->b_next = reassq_next;
1003 			reassq_next->b_prev = new_reassq;
1004 		}
1005 	} else {
1006 		/* place new reassembly queue at the end */
1007 		if (sip->istr_reass == NULL) {
1008 			sip->istr_reass = new_reassq;
1009 			new_reassq->b_prev = NULL;
1010 		} else {
1011 			reassq_prev->b_next = new_reassq;
1012 			new_reassq->b_prev = reassq_prev;
1013 		}
1014 		new_reassq->b_next = NULL;
1015 	}
1016 	srp->sr_partial_delivered = B_FALSE;
1017 	srp->sr_ssn = ntohs((*dc)->sdh_ssn);
1018 	srp->sr_hasBchunk = B_FALSE;
1019 empty_srp:
1020 	srp->sr_needed = 0;
1021 	srp->sr_got = 1;
1022 	/* tail always the highest tsn on the reassembly queue for this ssn */
1023 	srp->sr_tail = dmp;
1024 	if (SCTP_DATA_GET_BBIT(*dc)) {
1025 		/* Incoming frag is flagged as the beginning of message */
1026 		srp->sr_msglen = ntohs((*dc)->sdh_len);
1027 		srp->sr_nexttsn = ntohl((*dc)->sdh_tsn) + 1;
1028 		srp->sr_hasBchunk = B_TRUE;
1029 	} else if (srp->sr_partial_delivered &&
1030 	    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1031 		/*
1032 		 * The real beginning fragment of the message was already
1033 		 * delivered upward, so this is the earliest frag expected.
1034 		 * Fake the B-bit then see if this frag also completes the
1035 		 * message.
1036 		 */
1037 		SCTP_DATA_SET_BBIT(*dc);
1038 		srp->sr_hasBchunk = B_TRUE;
1039 		srp->sr_msglen = ntohs((*dc)->sdh_len);
1040 		if (SCTP_DATA_GET_EBIT(*dc)) {
1041 			/* This frag is marked as the end of message */
1042 			srp->sr_needed = 1;
1043 			/* Got all fragments of this message now */
1044 			goto frag_done;
1045 		}
1046 		srp->sr_nexttsn++;
1047 	}
1048 
1049 	/* The only fragment of this message currently queued */
1050 	*tpfinished = B_FALSE;
1051 	return (NULL);
1052 foundit:
1053 	/*
1054 	 * This message already has a reassembly queue. Insert the new frag
1055 	 * in the reassembly queue. Try the tail first, on the assumption
1056 	 * that the fragments are arriving in order.
1057 	 */
1058 	qmp = srp->sr_tail;
1059 
1060 	/*
1061 	 * A NULL tail means all existing fragments of the message have
1062 	 * been entirely consumed during a partially delivery.
1063 	 */
1064 	if (qmp == NULL) {
1065 		ASSERT(srp->sr_got == 0 && srp->sr_needed == 0 &&
1066 		    srp->sr_partial_delivered);
1067 		ASSERT(reassq_curr->b_cont == NULL);
1068 		reassq_curr->b_cont = dmp;
1069 		goto empty_srp;
1070 	} else {
1071 		/*
1072 		 * If partial delivery did take place but the next arriving
1073 		 * fragment was not the next to be delivered, or partial
1074 		 * delivery broke off due to a gap, fragments remain on the
1075 		 * tail. The next fragment due to be delivered still has to
1076 		 * be set as the new head of list upon arrival. Fake B-bit
1077 		 * on that frag then see if it also completes the message.
1078 		 */
1079 		if (srp->sr_partial_delivered &&
1080 		    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1081 			SCTP_DATA_SET_BBIT(*dc);
1082 			srp->sr_hasBchunk = B_TRUE;
1083 			if (SCTP_DATA_GET_EBIT(*dc)) {
1084 				/* Got all fragments of this message now */
1085 				goto frag_done;
1086 			}
1087 		}
1088 	}
1089 
1090 	/* grab the frag header of already queued tail frag for comparison */
1091 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1092 	ASSERT(qmp->b_cont == NULL);
1093 
1094 	/* check if the frag goes on the tail in order */
1095 	if (SEQ_GT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1096 		qmp->b_cont = dmp;
1097 		srp->sr_tail = dmp;
1098 		dmp->b_cont = NULL;
1099 		if (srp->sr_hasBchunk && srp->sr_nexttsn ==
1100 		    ntohl((*dc)->sdh_tsn)) {
1101 			srp->sr_msglen += ntohs((*dc)->sdh_len);
1102 			srp->sr_nexttsn++;
1103 		}
1104 		goto inserted;
1105 	}
1106 
1107 	/* Next check if we should insert this frag at the beginning */
1108 	qmp = reassq_curr->b_cont;
1109 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1110 	if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1111 		dmp->b_cont = qmp;
1112 		reassq_curr->b_cont = dmp;
1113 		if (SCTP_DATA_GET_BBIT(*dc)) {
1114 			srp->sr_hasBchunk = B_TRUE;
1115 			srp->sr_nexttsn = ntohl((*dc)->sdh_tsn);
1116 		}
1117 		goto preinserted;
1118 	}
1119 
1120 	/* Insert this frag in it's correct order in the middle */
1121 	for (;;) {
1122 		/* Tail check above should have caught this */
1123 		ASSERT(qmp->b_cont != NULL);
1124 
1125 		qdc = (sctp_data_hdr_t *)qmp->b_cont->b_rptr;
1126 		if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1127 			/* insert here */
1128 			dmp->b_cont = qmp->b_cont;
1129 			qmp->b_cont = dmp;
1130 			break;
1131 		}
1132 		qmp = qmp->b_cont;
1133 	}
1134 preinserted:
1135 	/*
1136 	 * Need head of message and to be due to deliver, otherwise skip
1137 	 * the recalculation of the message length below.
1138 	 */
1139 	if (!srp->sr_hasBchunk || ntohl((*dc)->sdh_tsn) != srp->sr_nexttsn)
1140 		goto inserted;
1141 	/*
1142 	 * fraglen contains the length of consecutive chunks of fragments.
1143 	 * starting from the chunk we just inserted.
1144 	 */
1145 	tsn = srp->sr_nexttsn;
1146 	for (qmp = dmp; qmp != NULL; qmp = qmp->b_cont) {
1147 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1148 		if (tsn != ntohl(qdc->sdh_tsn))
1149 			break;
1150 		fraglen += ntohs(qdc->sdh_len);
1151 		tsn++;
1152 	}
1153 	srp->sr_nexttsn = tsn;
1154 	srp->sr_msglen += fraglen;
1155 inserted:
1156 	srp->sr_got++;
1157 	first_mp = reassq_curr->b_cont;
1158 	/* Prior to this frag either the beginning or end frag was missing */
1159 	if (srp->sr_needed == 0) {
1160 		/* used to check if we have the first and last fragments */
1161 		bdc = (sctp_data_hdr_t *)first_mp->b_rptr;
1162 		edc = (sctp_data_hdr_t *)srp->sr_tail->b_rptr;
1163 
1164 		/*
1165 		 * If we now have both the beginning and the end of the message,
1166 		 * calculate how many fragments in the complete message.
1167 		 */
1168 		if (SCTP_DATA_GET_BBIT(bdc) && SCTP_DATA_GET_EBIT(edc)) {
1169 			srp->sr_needed = ntohl(edc->sdh_tsn) -
1170 			    ntohl(bdc->sdh_tsn) + 1;
1171 		}
1172 	}
1173 
1174 	/*
1175 	 * Try partial delivery if the message length has exceeded the
1176 	 * partial delivery point. Only do this if we can immediately
1177 	 * deliver the partially assembled message, and only partially
1178 	 * deliver one message at a time (i.e. messages cannot be
1179 	 * intermixed arriving at the upper layer).
1180 	 * sctp_try_partial_delivery() will return a message consisting
1181 	 * of only consecutive fragments.
1182 	 */
1183 	if (srp->sr_needed != srp->sr_got) {
1184 		/* we don't have the full message yet */
1185 		dmp = NULL;
1186 		if (ntohl((*dc)->sdh_tsn) <= sctp->sctp_ftsn &&
1187 		    srp->sr_msglen >= sctp->sctp_pd_point &&
1188 		    srp->sr_ssn == sip->nextseq) {
1189 			dmp = sctp_try_partial_delivery(sctp, reassq_curr,
1190 			    srp, dc);
1191 		}
1192 		*tpfinished = B_FALSE;
1193 		/*
1194 		 * NULL unless a segment of the message now qualified for
1195 		 * partial_delivery and has been prepared for delivery by
1196 		 * sctp_try_partial_delivery().
1197 		 */
1198 		return (dmp);
1199 	}
1200 frag_done:
1201 	/*
1202 	 * Reassembly complete for this message, prepare the data for delivery.
1203 	 * First unlink the reassembly queue for this ssn from the list of
1204 	 * messages in reassembly.
1205 	 */
1206 	if (sip->istr_reass == reassq_curr) {
1207 		sip->istr_reass = reassq_curr->b_next;
1208 		if (reassq_curr->b_next)
1209 			reassq_curr->b_next->b_prev = NULL;
1210 	} else {
1211 		ASSERT(reassq_curr->b_prev != NULL);
1212 		reassq_curr->b_prev->b_next = reassq_curr->b_next;
1213 		if (reassq_curr->b_next)
1214 			reassq_curr->b_next->b_prev = reassq_curr->b_prev;
1215 	}
1216 
1217 	/*
1218 	 * Need to clean up b_prev and b_next as freeb() will
1219 	 * ASSERT that they are unused.
1220 	 */
1221 	reassq_curr->b_next = NULL;
1222 	reassq_curr->b_prev = NULL;
1223 
1224 	dmp = reassq_curr;
1225 	/* point to the head of the reassembled data message */
1226 	dmp = dmp->b_cont;
1227 	reassq_curr->b_cont = NULL;
1228 	freeb(reassq_curr);
1229 	/* Tell our caller that we are returning a complete message. */
1230 	*tpfinished = B_TRUE;
1231 
1232 	/*
1233 	 * Adjust all mblk's except the lead so their rptr's point to the
1234 	 * payload. sctp_data_chunk() will need to process the lead's data
1235 	 * data chunk section, so leave its rptr pointing at the data chunk
1236 	 * header.
1237 	 */
1238 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
1239 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
1240 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1241 		qmp->b_rptr = (uchar_t *)(qdc + 1);
1242 	}
1243 	BUMP_LOCAL(sctp->sctp_reassmsgs);
1244 
1245 	return (dmp);
1246 }
1247 
1248 static void
1249 sctp_add_dup(uint32_t tsn, mblk_t **dups)
1250 {
1251 	mblk_t *mp;
1252 	size_t bsize = SCTP_DUP_MBLK_SZ * sizeof (tsn);
1253 
1254 	if (dups == NULL) {
1255 		return;
1256 	}
1257 
1258 	/* first time? */
1259 	if (*dups == NULL) {
1260 		*dups = allocb(bsize, BPRI_MED);
1261 		if (*dups == NULL) {
1262 			return;
1263 		}
1264 	}
1265 
1266 	mp = *dups;
1267 	if ((mp->b_wptr - mp->b_rptr) >= bsize) {
1268 		/* maximum reached */
1269 		return;
1270 	}
1271 
1272 	/* add the duplicate tsn */
1273 	bcopy(&tsn, mp->b_wptr, sizeof (tsn));
1274 	mp->b_wptr += sizeof (tsn);
1275 	ASSERT((mp->b_wptr - mp->b_rptr) <= bsize);
1276 }
1277 
1278 /*
1279  * All incoming sctp data, complete messages and fragments are handled by
1280  * this function. Unless the U-bit is set in the data chunk it will be
1281  * delivered in order or queued until an in-order delivery can be made.
1282  */
1283 static void
1284 sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
1285     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
1286 {
1287 	sctp_data_hdr_t *dc;
1288 	mblk_t *dmp, *pmp;
1289 	sctp_instr_t *instr;
1290 	int ubit;
1291 	int sid;
1292 	int isfrag;
1293 	uint16_t ssn;
1294 	uint32_t oftsn;
1295 	boolean_t can_deliver = B_TRUE;
1296 	uint32_t tsn;
1297 	int dlen;
1298 	boolean_t tpfinished = B_TRUE;
1299 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1300 	int	error;
1301 
1302 	/* The following are used multiple times, so we inline them */
1303 #define	SCTP_ACK_IT(sctp, tsn)						\
1304 	if (tsn == sctp->sctp_ftsn) {					\
1305 		dprint(2, ("data_chunk: acking next %x\n", tsn));	\
1306 		(sctp)->sctp_ftsn++;					\
1307 		if ((sctp)->sctp_sack_gaps > 0)				\
1308 			(sctp)->sctp_force_sack = 1;			\
1309 	} else if (SEQ_GT(tsn, sctp->sctp_ftsn)) {			\
1310 		/* Got a gap; record it */				\
1311 		BUMP_LOCAL(sctp->sctp_outseqtsns);			\
1312 		dprint(2, ("data_chunk: acking gap %x\n", tsn));	\
1313 		sctp_ack_add(&sctp->sctp_sack_info, tsn,		\
1314 		    &sctp->sctp_sack_gaps);				\
1315 		sctp->sctp_force_sack = 1;				\
1316 	}
1317 
1318 	dmp = NULL;
1319 
1320 	dc = (sctp_data_hdr_t *)ch;
1321 	tsn = ntohl(dc->sdh_tsn);
1322 
1323 	dprint(3, ("sctp_data_chunk: mp=%p tsn=%x\n", (void *)mp, tsn));
1324 
1325 	/* Check for duplicates */
1326 	if (SEQ_LT(tsn, sctp->sctp_ftsn)) {
1327 		dprint(4, ("sctp_data_chunk: dropping duplicate\n"));
1328 		BUMP_LOCAL(sctp->sctp_idupchunks);
1329 		sctp->sctp_force_sack = 1;
1330 		sctp_add_dup(dc->sdh_tsn, dups);
1331 		return;
1332 	}
1333 
1334 	/* Check for dups of sack'ed data */
1335 	if (sctp->sctp_sack_info != NULL) {
1336 		sctp_set_t *sp;
1337 
1338 		for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1339 			if (SEQ_GEQ(tsn, sp->begin) && SEQ_LEQ(tsn, sp->end)) {
1340 				dprint(4,
1341 				    ("sctp_data_chunk: dropping dup > "
1342 				    "cumtsn\n"));
1343 				BUMP_LOCAL(sctp->sctp_idupchunks);
1344 				sctp->sctp_force_sack = 1;
1345 				sctp_add_dup(dc->sdh_tsn, dups);
1346 				return;
1347 			}
1348 		}
1349 	}
1350 
1351 	/* We can no longer deliver anything up, but still need to handle it. */
1352 	if (SCTP_IS_DETACHED(sctp)) {
1353 		SCTPS_BUMP_MIB(sctps, sctpInClosed);
1354 		can_deliver = B_FALSE;
1355 	}
1356 
1357 	dlen = ntohs(dc->sdh_len) - sizeof (*dc);
1358 
1359 	/*
1360 	 * Check for buffer space. Note if this is the next expected TSN
1361 	 * we have to take it to avoid deadlock because we cannot deliver
1362 	 * later queued TSNs and thus clear buffer space without it.
1363 	 * We drop anything that is purely zero window probe data here.
1364 	 */
1365 	if ((sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) &&
1366 	    (tsn != sctp->sctp_ftsn || sctp->sctp_rwnd == 0)) {
1367 		/* Drop and SACK, but don't advance the cumulative TSN. */
1368 		sctp->sctp_force_sack = 1;
1369 		dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
1370 		    "dlen %d ssn %d tsn %x\n", sctp->sctp_rwnd,
1371 		    sctp->sctp_rxqueued, dlen, ntohs(dc->sdh_ssn),
1372 		    ntohl(dc->sdh_tsn)));
1373 		return;
1374 	}
1375 
1376 	sid = ntohs(dc->sdh_sid);
1377 
1378 	/* Data received for a stream not negotiated for this association */
1379 	if (sid >= sctp->sctp_num_istr) {
1380 		sctp_bsc_t	inval_parm;
1381 
1382 		/* Will populate the CAUSE block in the ERROR chunk. */
1383 		inval_parm.bsc_sid = dc->sdh_sid;
1384 		/* RESERVED, ignored at the receiving end */
1385 		inval_parm.bsc_pad = 0;
1386 
1387 		/* ack and drop it */
1388 		sctp_add_err(sctp, SCTP_ERR_BAD_SID, (void *)&inval_parm,
1389 		    sizeof (sctp_bsc_t), fp);
1390 		SCTP_ACK_IT(sctp, tsn);
1391 		return;
1392 	}
1393 
1394 	/* unordered delivery OK for this data if ubit set */
1395 	ubit = SCTP_DATA_GET_UBIT(dc);
1396 	ASSERT(sctp->sctp_instr != NULL);
1397 
1398 	/* select per stream structure for this stream from the array */
1399 	instr = &sctp->sctp_instr[sid];
1400 	/* Initialize the stream, if not yet used */
1401 	if (instr->sctp == NULL)
1402 		instr->sctp = sctp;
1403 
1404 	/* Begin and End bit set would mean a complete message */
1405 	isfrag = !(SCTP_DATA_GET_BBIT(dc) && SCTP_DATA_GET_EBIT(dc));
1406 
1407 	/* The ssn of this sctp message and of any fragments in it */
1408 	ssn = ntohs(dc->sdh_ssn);
1409 
1410 	dmp = dupb(mp);
1411 	if (dmp == NULL) {
1412 		/* drop it and don't ack, let the peer retransmit */
1413 		return;
1414 	}
1415 	/*
1416 	 * Past header and payload, note: the underlying buffer may
1417 	 * contain further chunks from the same incoming IP packet,
1418 	 * if so db_ref will be greater than one.
1419 	 */
1420 	dmp->b_wptr = (uchar_t *)ch + ntohs(ch->sch_len);
1421 
1422 	sctp->sctp_rxqueued += dlen;
1423 
1424 	oftsn = sctp->sctp_ftsn;
1425 
1426 	if (isfrag) {
1427 
1428 		error = 0;
1429 		/* fragmented data chunk */
1430 		dmp->b_rptr = (uchar_t *)dc;
1431 		if (ubit) {
1432 			/* prepare data for unordered delivery */
1433 			dmp = sctp_uodata_frag(sctp, dmp, &dc);
1434 #if	DEBUG
1435 			if (dmp != NULL) {
1436 				ASSERT(instr ==
1437 				    &sctp->sctp_instr[sid]);
1438 			}
1439 #endif
1440 		} else {
1441 			/*
1442 			 * Assemble fragments and queue for ordered delivery,
1443 			 * dmp returned is NULL or the head of a complete or
1444 			 * "partial delivery" message. Any returned message
1445 			 * and all its fragments will have the same ssn as the
1446 			 * input fragment currently being handled.
1447 			 */
1448 			dmp = sctp_data_frag(sctp, dmp, &dc, &error, instr,
1449 			    &tpfinished);
1450 		}
1451 		if (error == ENOMEM) {
1452 			/* back out the adjustment made earlier */
1453 			sctp->sctp_rxqueued -= dlen;
1454 			/*
1455 			 * Don't ack the segment,
1456 			 * the peer will retransmit.
1457 			 */
1458 			return;
1459 		}
1460 
1461 		if (dmp == NULL) {
1462 			/*
1463 			 * The frag has been queued for later in-order delivery,
1464 			 * but the cumulative TSN may need to advance, so also
1465 			 * need to perform the gap ack checks at the done label.
1466 			 */
1467 			SCTP_ACK_IT(sctp, tsn);
1468 			DTRACE_PROBE4(sctp_data_frag_queued, sctp_t *, sctp,
1469 			    int, sid, int, tsn, uint16_t, ssn);
1470 			goto done;
1471 		}
1472 	}
1473 
1474 	/*
1475 	 * Unless message is the next for delivery to the ulp, queue complete
1476 	 * message in the correct order for ordered delivery.
1477 	 * Note: tpfinished is true when the incoming chunk contains a complete
1478 	 * message or is the final missing fragment which completed a message.
1479 	 */
1480 	if (!ubit && tpfinished && ssn != instr->nextseq) {
1481 		/* Adjust rptr to point at the data chunk for compares */
1482 		dmp->b_rptr = (uchar_t *)dc;
1483 
1484 		dprint(2,
1485 		    ("data_chunk: inserted %x in pq (ssn %d expected %d)\n",
1486 		    ntohl(dc->sdh_tsn), (int)(ssn), (int)(instr->nextseq)));
1487 
1488 		if (instr->istr_msgs == NULL) {
1489 			instr->istr_msgs = dmp;
1490 			ASSERT(dmp->b_prev == NULL && dmp->b_next == NULL);
1491 		} else {
1492 			mblk_t			*imblk = instr->istr_msgs;
1493 			sctp_data_hdr_t		*idc;
1494 
1495 			/*
1496 			 * XXXNeed to take sequence wraps into account,
1497 			 * ... and a more efficient insertion algo.
1498 			 */
1499 			for (;;) {
1500 				idc = (sctp_data_hdr_t *)imblk->b_rptr;
1501 				if (SSN_GT(ntohs(idc->sdh_ssn),
1502 				    ntohs(dc->sdh_ssn))) {
1503 					if (instr->istr_msgs == imblk) {
1504 						instr->istr_msgs = dmp;
1505 						dmp->b_next = imblk;
1506 						imblk->b_prev = dmp;
1507 					} else {
1508 						ASSERT(imblk->b_prev != NULL);
1509 						imblk->b_prev->b_next = dmp;
1510 						dmp->b_prev = imblk->b_prev;
1511 						imblk->b_prev = dmp;
1512 						dmp->b_next = imblk;
1513 					}
1514 					break;
1515 				}
1516 				if (imblk->b_next == NULL) {
1517 					imblk->b_next = dmp;
1518 					dmp->b_prev = imblk;
1519 					break;
1520 				}
1521 				imblk = imblk->b_next;
1522 			}
1523 		}
1524 		(instr->istr_nmsgs)++;
1525 		(sctp->sctp_istr_nmsgs)++;
1526 		SCTP_ACK_IT(sctp, tsn);
1527 		DTRACE_PROBE4(sctp_pqueue_completemsg, sctp_t *, sctp,
1528 		    int, sid, int, tsn, uint16_t, ssn);
1529 		return;
1530 	}
1531 
1532 	/*
1533 	 * Deliver the data directly. Recalculate dlen now since
1534 	 * we may have just reassembled this data.
1535 	 */
1536 	dlen = dmp->b_wptr - (uchar_t *)dc - sizeof (*dc);
1537 	for (pmp = dmp->b_cont; pmp != NULL; pmp = pmp->b_cont)
1538 		dlen += MBLKL(pmp);
1539 	ASSERT(sctp->sctp_rxqueued >= dlen);
1540 
1541 	/* Deliver the message. */
1542 	sctp->sctp_rxqueued -= dlen;
1543 
1544 	if (can_deliver) {
1545 		/* step past header to the payload */
1546 		dmp->b_rptr = (uchar_t *)(dc + 1);
1547 		if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1548 		    ipp, ira) == 0) {
1549 			dprint(1, ("sctp_data_chunk: delivering %lu bytes\n",
1550 			    msgdsize(dmp)));
1551 			/*
1552 			 * We overload the meaning of b_flag for SCTP sockfs
1553 			 * internal use, to advise sockfs of partial delivery
1554 			 * semantics.
1555 			 */
1556 			dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
1557 			if (sctp->sctp_flowctrld) {
1558 				sctp->sctp_rwnd -= dlen;
1559 				if (sctp->sctp_rwnd < 0)
1560 					sctp->sctp_rwnd = 0;
1561 			}
1562 			if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1563 			    msgdsize(dmp), 0, &error, NULL) <= 0) {
1564 				sctp->sctp_flowctrld = B_TRUE;
1565 			}
1566 			SCTP_ACK_IT(sctp, tsn);
1567 		} else {
1568 			/* No memory don't ack, the peer will retransmit. */
1569 			freemsg(dmp);
1570 			return;
1571 		}
1572 	} else {
1573 		/* Closed above, ack to peer and free the data */
1574 		freemsg(dmp);
1575 		SCTP_ACK_IT(sctp, tsn);
1576 	}
1577 
1578 	/*
1579 	 * Data now enqueued, may already have been processed and free'd
1580 	 * by the ULP (or we may have just freed it above, if we could not
1581 	 * deliver), so we must not reference it (this is why we saved the
1582 	 * ssn and ubit earlier).
1583 	 */
1584 	if (ubit != 0) {
1585 		BUMP_LOCAL(sctp->sctp_iudchunks);
1586 		goto done;
1587 	}
1588 	BUMP_LOCAL(sctp->sctp_idchunks);
1589 
1590 	/*
1591 	 * There was a partial delivery and it has not finished,
1592 	 * don't pull anything from the pqueues or increment the
1593 	 * nextseq. This msg must complete before starting on
1594 	 * the next ssn and the partial message must have the
1595 	 * same ssn as the next expected message..
1596 	 */
1597 	if (!tpfinished) {
1598 		DTRACE_PROBE4(sctp_partial_delivery, sctp_t *, sctp,
1599 		    int, sid, int, tsn, uint16_t, ssn);
1600 		/*
1601 		 * Verify the partial delivery is part of the
1602 		 * message expected for ordered delivery.
1603 		 */
1604 		if (ssn != instr->nextseq) {
1605 			DTRACE_PROBE4(sctp_partial_delivery_error,
1606 			    sctp_t *, sctp, int, sid, int, tsn,
1607 			    uint16_t, ssn);
1608 			cmn_err(CE_WARN, "sctp partial"
1609 			    " delivery error, sctp 0x%p"
1610 			    " sid = 0x%x ssn != nextseq"
1611 			    " tsn 0x%x ftsn 0x%x"
1612 			    " ssn 0x%x nextseq 0x%x",
1613 			    (void *)sctp, sid,
1614 			    tsn, sctp->sctp_ftsn, ssn,
1615 			    instr->nextseq);
1616 		}
1617 
1618 		ASSERT(ssn == instr->nextseq);
1619 		goto done;
1620 	}
1621 
1622 	if (ssn != instr->nextseq) {
1623 		DTRACE_PROBE4(sctp_inorder_delivery_error,
1624 		    sctp_t *, sctp, int, sid, int, tsn,
1625 		    uint16_t, ssn);
1626 		cmn_err(CE_WARN, "sctp in-order delivery error, sctp 0x%p "
1627 		    "sid = 0x%x ssn != nextseq ssn 0x%x nextseq 0x%x",
1628 		    (void *)sctp, sid, ssn, instr->nextseq);
1629 	}
1630 
1631 	ASSERT(ssn == instr->nextseq);
1632 
1633 	DTRACE_PROBE4(sctp_deliver_completemsg, sctp_t *, sctp, int, sid,
1634 	    int, tsn, uint16_t, ssn);
1635 
1636 	instr->nextseq = ssn + 1;
1637 
1638 	/*
1639 	 * Deliver any successive data chunks waiting in the instr pqueue
1640 	 * for the data just sent up.
1641 	 */
1642 	while (instr->istr_nmsgs > 0) {
1643 		dmp = (mblk_t *)instr->istr_msgs;
1644 		dc = (sctp_data_hdr_t *)dmp->b_rptr;
1645 		ssn = ntohs(dc->sdh_ssn);
1646 		tsn = ntohl(dc->sdh_tsn);
1647 		/* Stop at the first gap in the sequence */
1648 		if (ssn != instr->nextseq)
1649 			break;
1650 
1651 		DTRACE_PROBE4(sctp_deliver_pqueuedmsg, sctp_t *, sctp,
1652 		    int, sid, int, tsn, uint16_t, ssn);
1653 		/*
1654 		 * Ready to deliver all data before the gap
1655 		 * to the upper layer.
1656 		 */
1657 		(instr->istr_nmsgs)--;
1658 		(instr->nextseq)++;
1659 		(sctp->sctp_istr_nmsgs)--;
1660 
1661 		instr->istr_msgs = instr->istr_msgs->b_next;
1662 		if (instr->istr_msgs != NULL)
1663 			instr->istr_msgs->b_prev = NULL;
1664 		dmp->b_next = dmp->b_prev = NULL;
1665 
1666 		dprint(2, ("data_chunk: pulling %x from pq (ssn %d)\n",
1667 		    ntohl(dc->sdh_tsn), (int)ssn));
1668 
1669 		/*
1670 		 * Composite messages indicate this chunk was reassembled,
1671 		 * each b_cont represents another TSN; Follow the chain to
1672 		 * reach the frag with the last tsn in order to advance ftsn
1673 		 * shortly by calling SCTP_ACK_IT().
1674 		 */
1675 		dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
1676 		for (pmp = dmp->b_cont; pmp; pmp = pmp->b_cont)
1677 			dlen += MBLKL(pmp);
1678 
1679 		ASSERT(sctp->sctp_rxqueued >= dlen);
1680 
1681 		sctp->sctp_rxqueued -= dlen;
1682 		if (can_deliver) {
1683 			dmp->b_rptr = (uchar_t *)(dc + 1);
1684 			if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1685 			    ipp, ira) == 0) {
1686 				dprint(1, ("sctp_data_chunk: delivering %lu "
1687 				    "bytes\n", msgdsize(dmp)));
1688 				/*
1689 				 * Meaning of b_flag overloaded for SCTP sockfs
1690 				 * internal use, advise sockfs of partial
1691 				 * delivery semantics.
1692 				 */
1693 				dmp->b_flag = tpfinished ?
1694 				    0 : SCTP_PARTIAL_DATA;
1695 				if (sctp->sctp_flowctrld) {
1696 					sctp->sctp_rwnd -= dlen;
1697 					if (sctp->sctp_rwnd < 0)
1698 						sctp->sctp_rwnd = 0;
1699 				}
1700 				if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1701 				    msgdsize(dmp), 0, &error, NULL) <= 0) {
1702 					sctp->sctp_flowctrld = B_TRUE;
1703 				}
1704 				SCTP_ACK_IT(sctp, tsn);
1705 			} else {
1706 				/* don't ack, the peer will retransmit */
1707 				freemsg(dmp);
1708 				return;
1709 			}
1710 		} else {
1711 			/* Closed above, ack and free the data */
1712 			freemsg(dmp);
1713 			SCTP_ACK_IT(sctp, tsn);
1714 		}
1715 	}
1716 
1717 done:
1718 
1719 	/*
1720 	 * If there are gap reports pending, check if advancing
1721 	 * the ftsn here closes a gap. If so, we can advance
1722 	 * ftsn to the end of the set.
1723 	 */
1724 	if (sctp->sctp_sack_info != NULL &&
1725 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
1726 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
1727 	}
1728 	/*
1729 	 * If ftsn has moved forward, maybe we can remove gap reports.
1730 	 * NB: dmp may now be NULL, so don't dereference it here.
1731 	 */
1732 	if (oftsn != sctp->sctp_ftsn && sctp->sctp_sack_info != NULL) {
1733 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
1734 		    &sctp->sctp_sack_gaps);
1735 		dprint(2, ("data_chunk: removed acks before %x (num=%d)\n",
1736 		    sctp->sctp_ftsn - 1, sctp->sctp_sack_gaps));
1737 	}
1738 
1739 #ifdef	DEBUG
1740 	if (sctp->sctp_sack_info != NULL) {
1741 		ASSERT(sctp->sctp_ftsn != sctp->sctp_sack_info->begin);
1742 	}
1743 #endif
1744 
1745 #undef	SCTP_ACK_IT
1746 }
1747 
1748 void
1749 sctp_fill_sack(sctp_t *sctp, unsigned char *dst, int sacklen)
1750 {
1751 	sctp_chunk_hdr_t *sch;
1752 	sctp_sack_chunk_t *sc;
1753 	sctp_sack_frag_t *sf;
1754 	uint16_t num_gaps = sctp->sctp_sack_gaps;
1755 	sctp_set_t *sp;
1756 
1757 	/* Chunk hdr */
1758 	sch = (sctp_chunk_hdr_t *)dst;
1759 	sch->sch_id = CHUNK_SACK;
1760 	sch->sch_flags = 0;
1761 	sch->sch_len = htons(sacklen);
1762 
1763 	/* SACK chunk */
1764 	sctp->sctp_lastacked = sctp->sctp_ftsn - 1;
1765 
1766 	sc = (sctp_sack_chunk_t *)(sch + 1);
1767 	sc->ssc_cumtsn = htonl(sctp->sctp_lastacked);
1768 	if (sctp->sctp_rxqueued < sctp->sctp_rwnd) {
1769 		sc->ssc_a_rwnd = htonl(sctp->sctp_rwnd - sctp->sctp_rxqueued);
1770 	} else {
1771 		sc->ssc_a_rwnd = 0;
1772 	}
1773 	/* Remember the last window sent to peer. */
1774 	sctp->sctp_arwnd = sc->ssc_a_rwnd;
1775 	sc->ssc_numfrags = htons(num_gaps);
1776 	sc->ssc_numdups = 0;
1777 
1778 	/* lay in gap reports */
1779 	sf = (sctp_sack_frag_t *)(sc + 1);
1780 	for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1781 		uint16_t offset;
1782 
1783 		/* start */
1784 		if (sp->begin > sctp->sctp_lastacked) {
1785 			offset = (uint16_t)(sp->begin - sctp->sctp_lastacked);
1786 		} else {
1787 			/* sequence number wrap */
1788 			offset = (uint16_t)(UINT32_MAX - sctp->sctp_lastacked +
1789 			    sp->begin);
1790 		}
1791 		sf->ssf_start = htons(offset);
1792 
1793 		/* end */
1794 		if (sp->end >= sp->begin) {
1795 			offset += (uint16_t)(sp->end - sp->begin);
1796 		} else {
1797 			/* sequence number wrap */
1798 			offset += (uint16_t)(UINT32_MAX - sp->begin + sp->end);
1799 		}
1800 		sf->ssf_end = htons(offset);
1801 
1802 		sf++;
1803 		/* This is just for debugging (a la the following assertion) */
1804 		num_gaps--;
1805 	}
1806 
1807 	ASSERT(num_gaps == 0);
1808 
1809 	/* If the SACK timer is running, stop it */
1810 	if (sctp->sctp_ack_timer_running) {
1811 		sctp_timer_stop(sctp->sctp_ack_mp);
1812 		sctp->sctp_ack_timer_running = B_FALSE;
1813 	}
1814 
1815 	BUMP_LOCAL(sctp->sctp_obchunks);
1816 	BUMP_LOCAL(sctp->sctp_osacks);
1817 }
1818 
1819 mblk_t *
1820 sctp_make_sack(sctp_t *sctp, sctp_faddr_t *sendto, mblk_t *dups)
1821 {
1822 	mblk_t *smp;
1823 	size_t slen;
1824 	sctp_chunk_hdr_t *sch;
1825 	sctp_sack_chunk_t *sc;
1826 	int32_t acks_max;
1827 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1828 	uint32_t	dups_len;
1829 	sctp_faddr_t	*fp;
1830 
1831 	ASSERT(sendto != NULL);
1832 
1833 	if (sctp->sctp_force_sack) {
1834 		sctp->sctp_force_sack = 0;
1835 		goto checks_done;
1836 	}
1837 
1838 	acks_max = sctps->sctps_deferred_acks_max;
1839 	if (sctp->sctp_state == SCTPS_ESTABLISHED) {
1840 		if (sctp->sctp_sack_toggle < acks_max) {
1841 			/* no need to SACK right now */
1842 			dprint(2, ("sctp_make_sack: %p no sack (toggle)\n",
1843 			    (void *)sctp));
1844 			return (NULL);
1845 		} else if (sctp->sctp_sack_toggle >= acks_max) {
1846 			sctp->sctp_sack_toggle = 0;
1847 		}
1848 	}
1849 
1850 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1851 		dprint(2, ("sctp_make_sack: %p no sack (already)\n",
1852 		    (void *)sctp));
1853 		return (NULL);
1854 	}
1855 
1856 checks_done:
1857 	dprint(2, ("sctp_make_sack: acking %x\n", sctp->sctp_ftsn - 1));
1858 
1859 	if (dups != NULL)
1860 		dups_len = MBLKL(dups);
1861 	else
1862 		dups_len = 0;
1863 	slen = sizeof (*sch) + sizeof (*sc) +
1864 	    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1865 
1866 	/*
1867 	 * If there are error chunks, check and see if we can send the
1868 	 * SACK chunk and error chunks together in one packet.  If not,
1869 	 * send the error chunks out now.
1870 	 */
1871 	if (sctp->sctp_err_chunks != NULL) {
1872 		fp = SCTP_CHUNK_DEST(sctp->sctp_err_chunks);
1873 		if (sctp->sctp_err_len + slen + dups_len > fp->sf_pmss) {
1874 			if ((smp = sctp_make_mp(sctp, fp, 0)) == NULL) {
1875 				SCTP_KSTAT(sctps, sctp_send_err_failed);
1876 				SCTP_KSTAT(sctps, sctp_send_sack_failed);
1877 				freemsg(sctp->sctp_err_chunks);
1878 				sctp->sctp_err_chunks = NULL;
1879 				sctp->sctp_err_len = 0;
1880 				return (NULL);
1881 			}
1882 			smp->b_cont = sctp->sctp_err_chunks;
1883 			sctp_set_iplen(sctp, smp, fp->sf_ixa);
1884 			(void) conn_ip_output(smp, fp->sf_ixa);
1885 			BUMP_LOCAL(sctp->sctp_opkts);
1886 			sctp->sctp_err_chunks = NULL;
1887 			sctp->sctp_err_len = 0;
1888 		}
1889 	}
1890 	smp = sctp_make_mp(sctp, sendto, slen);
1891 	if (smp == NULL) {
1892 		SCTP_KSTAT(sctps, sctp_send_sack_failed);
1893 		return (NULL);
1894 	}
1895 	sch = (sctp_chunk_hdr_t *)smp->b_wptr;
1896 
1897 	sctp_fill_sack(sctp, smp->b_wptr, slen);
1898 	smp->b_wptr += slen;
1899 	if (dups != NULL) {
1900 		sc = (sctp_sack_chunk_t *)(sch + 1);
1901 		sc->ssc_numdups = htons(MBLKL(dups) / sizeof (uint32_t));
1902 		sch->sch_len = htons(slen + dups_len);
1903 		smp->b_cont = dups;
1904 	}
1905 
1906 	if (sctp->sctp_err_chunks != NULL) {
1907 		linkb(smp, sctp->sctp_err_chunks);
1908 		sctp->sctp_err_chunks = NULL;
1909 		sctp->sctp_err_len = 0;
1910 	}
1911 	return (smp);
1912 }
1913 
1914 /*
1915  * Check and see if we need to send a SACK chunk.  If it is needed,
1916  * send it out.  Return true if a SACK chunk is sent, false otherwise.
1917  */
1918 boolean_t
1919 sctp_sack(sctp_t *sctp, mblk_t *dups)
1920 {
1921 	mblk_t *smp;
1922 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1923 
1924 	/* If we are shutting down, let send_shutdown() bundle the SACK */
1925 	if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
1926 		sctp_send_shutdown(sctp, 0);
1927 	}
1928 
1929 	ASSERT(sctp->sctp_lastdata != NULL);
1930 
1931 	if ((smp = sctp_make_sack(sctp, sctp->sctp_lastdata, dups)) == NULL) {
1932 		/* The caller of sctp_sack() will not free the dups mblk. */
1933 		if (dups != NULL)
1934 			freeb(dups);
1935 		return (B_FALSE);
1936 	}
1937 	dprint(2, ("sctp_sack: sending to %p %x:%x:%x:%x\n",
1938 	    (void *)sctp->sctp_lastdata,
1939 	    SCTP_PRINTADDR(sctp->sctp_lastdata->sf_faddr)));
1940 
1941 	sctp->sctp_active = LBOLT_FASTPATH64;
1942 
1943 	SCTPS_BUMP_MIB(sctps, sctpOutAck);
1944 
1945 	sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->sf_ixa);
1946 	(void) conn_ip_output(smp, sctp->sctp_lastdata->sf_ixa);
1947 	BUMP_LOCAL(sctp->sctp_opkts);
1948 	return (B_TRUE);
1949 }
1950 
1951 /*
1952  * This is called if we have a message that was partially sent and is
1953  * abandoned. The cum TSN will be the last chunk sent for this message,
1954  * subsequent chunks will be marked ABANDONED. We send a Forward TSN
1955  * chunk in this case with the TSN of the last sent chunk so that the
1956  * peer can clean up its fragment list for this message. This message
1957  * will be removed from the transmit list when the peer sends a SACK
1958  * back.
1959  */
1960 int
1961 sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta)
1962 {
1963 	sctp_data_hdr_t	*dh;
1964 	mblk_t		*nmp;
1965 	mblk_t		*head;
1966 	int32_t		unsent = 0;
1967 	mblk_t		*mp1 = meta->b_cont;
1968 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1969 	sctp_faddr_t	*fp = sctp->sctp_current;
1970 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1971 
1972 	dh = (sctp_data_hdr_t *)mp1->b_rptr;
1973 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, ntohl(dh->sdh_tsn))) {
1974 		sctp_ftsn_set_t	*sets = NULL;
1975 		uint_t		nsets = 0;
1976 		uint32_t	seglen = sizeof (uint32_t);
1977 		boolean_t	ubit = SCTP_DATA_GET_UBIT(dh);
1978 
1979 		while (mp1->b_next != NULL && SCTP_CHUNK_ISSENT(mp1->b_next))
1980 			mp1 = mp1->b_next;
1981 		dh = (sctp_data_hdr_t *)mp1->b_rptr;
1982 		sctp->sctp_adv_pap = ntohl(dh->sdh_tsn);
1983 		if (!ubit &&
1984 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, &seglen)) {
1985 			sctp->sctp_adv_pap = adv_pap;
1986 			return (ENOMEM);
1987 		}
1988 		nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, seglen);
1989 		sctp_free_ftsn_set(sets);
1990 		if (nmp == NULL) {
1991 			sctp->sctp_adv_pap = adv_pap;
1992 			return (ENOMEM);
1993 		}
1994 		head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
1995 		if (head == NULL) {
1996 			sctp->sctp_adv_pap = adv_pap;
1997 			freemsg(nmp);
1998 			SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1999 			return (ENOMEM);
2000 		}
2001 		SCTP_MSG_SET_ABANDONED(meta);
2002 		sctp_set_iplen(sctp, head, fp->sf_ixa);
2003 		(void) conn_ip_output(head, fp->sf_ixa);
2004 		BUMP_LOCAL(sctp->sctp_opkts);
2005 		if (!fp->sf_timer_running)
2006 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2007 		mp1 = mp1->b_next;
2008 		while (mp1 != NULL) {
2009 			ASSERT(!SCTP_CHUNK_ISSENT(mp1));
2010 			ASSERT(!SCTP_CHUNK_ABANDONED(mp1));
2011 			SCTP_ABANDON_CHUNK(mp1);
2012 			dh = (sctp_data_hdr_t *)mp1->b_rptr;
2013 			unsent += ntohs(dh->sdh_len) - sizeof (*dh);
2014 			mp1 = mp1->b_next;
2015 		}
2016 		ASSERT(sctp->sctp_unsent >= unsent);
2017 		sctp->sctp_unsent -= unsent;
2018 		/*
2019 		 * Update ULP the amount of queued data, which is
2020 		 * sent-unack'ed + unsent.
2021 		 */
2022 		if (!SCTP_IS_DETACHED(sctp))
2023 			SCTP_TXQ_UPDATE(sctp);
2024 		return (0);
2025 	}
2026 	return (-1);
2027 }
2028 
2029 uint32_t
2030 sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked)
2031 {
2032 	mblk_t *ump, *nump, *mp = NULL;
2033 	uint16_t chunklen;
2034 	uint32_t xtsn;
2035 	sctp_faddr_t *fp;
2036 	sctp_data_hdr_t *sdc;
2037 	uint32_t cumack_forward = 0;
2038 	sctp_msg_hdr_t	*mhdr;
2039 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2040 
2041 	ump = sctp->sctp_xmit_head;
2042 
2043 	/*
2044 	 * Free messages only when they're completely acked.
2045 	 */
2046 	while (ump != NULL) {
2047 		mhdr = (sctp_msg_hdr_t *)ump->b_rptr;
2048 		for (mp = ump->b_cont; mp != NULL; mp = mp->b_next) {
2049 			if (SCTP_CHUNK_ABANDONED(mp)) {
2050 				ASSERT(SCTP_IS_MSG_ABANDONED(ump));
2051 				mp = NULL;
2052 				break;
2053 			}
2054 			/*
2055 			 * We check for abandoned message if we are PR-SCTP
2056 			 * aware, if this is not the first chunk in the
2057 			 * message (b_cont) and if the message is marked
2058 			 * abandoned.
2059 			 */
2060 			if (!SCTP_CHUNK_ISSENT(mp)) {
2061 				if (sctp->sctp_prsctp_aware &&
2062 				    mp != ump->b_cont &&
2063 				    (SCTP_IS_MSG_ABANDONED(ump) ||
2064 				    SCTP_MSG_TO_BE_ABANDONED(ump, mhdr,
2065 				    sctp))) {
2066 					(void) sctp_check_abandoned_msg(sctp,
2067 					    ump);
2068 				}
2069 				goto cum_ack_done;
2070 			}
2071 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2072 			xtsn = ntohl(sdc->sdh_tsn);
2073 			if (SEQ_GEQ(sctp->sctp_lastack_rxd, xtsn))
2074 				continue;
2075 			if (SEQ_GEQ(tsn, xtsn)) {
2076 				fp = SCTP_CHUNK_DEST(mp);
2077 				chunklen = ntohs(sdc->sdh_len);
2078 
2079 				if (sctp->sctp_out_time != 0 &&
2080 				    xtsn == sctp->sctp_rtt_tsn) {
2081 					/* Got a new RTT measurement */
2082 					sctp_update_rtt(sctp, fp,
2083 					    ddi_get_lbolt64() -
2084 					    sctp->sctp_out_time);
2085 					sctp->sctp_out_time = 0;
2086 				}
2087 				if (SCTP_CHUNK_ISACKED(mp))
2088 					continue;
2089 				SCTP_CHUNK_SET_SACKCNT(mp, 0);
2090 				SCTP_CHUNK_ACKED(mp);
2091 				ASSERT(fp->sf_suna >= chunklen);
2092 				fp->sf_suna -= chunklen;
2093 				fp->sf_acked += chunklen;
2094 				cumack_forward += chunklen;
2095 				ASSERT(sctp->sctp_unacked >=
2096 				    (chunklen - sizeof (*sdc)));
2097 				sctp->sctp_unacked -=
2098 				    (chunklen - sizeof (*sdc));
2099 				if (fp->sf_suna == 0) {
2100 					/* all outstanding data acked */
2101 					fp->sf_pba = 0;
2102 					SCTP_FADDR_TIMER_STOP(fp);
2103 				} else {
2104 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2105 					    fp->sf_rto);
2106 				}
2107 			} else {
2108 				goto cum_ack_done;
2109 			}
2110 		}
2111 		nump = ump->b_next;
2112 		if (nump != NULL)
2113 			nump->b_prev = NULL;
2114 		if (ump == sctp->sctp_xmit_tail)
2115 			sctp->sctp_xmit_tail = nump;
2116 		if (SCTP_IS_MSG_ABANDONED(ump)) {
2117 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
2118 			ump->b_next = NULL;
2119 			sctp_sendfail_event(sctp, ump, 0, B_TRUE);
2120 		} else {
2121 			sctp_free_msg(ump);
2122 		}
2123 		sctp->sctp_xmit_head = ump = nump;
2124 	}
2125 cum_ack_done:
2126 	*first_unacked = mp;
2127 	if (cumack_forward > 0) {
2128 		SCTPS_BUMP_MIB(sctps, sctpInAck);
2129 		if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) {
2130 			sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd;
2131 		}
2132 
2133 		/*
2134 		 * Update ULP the amount of queued data, which is
2135 		 * sent-unack'ed + unsent.
2136 		 */
2137 		if (!SCTP_IS_DETACHED(sctp))
2138 			SCTP_TXQ_UPDATE(sctp);
2139 
2140 		/* Time to send a shutdown? */
2141 		if (sctp->sctp_state == SCTPS_SHUTDOWN_PENDING) {
2142 			sctp_send_shutdown(sctp, 0);
2143 		}
2144 		sctp->sctp_xmit_unacked = mp;
2145 	} else {
2146 		/* dup ack */
2147 		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2148 	}
2149 	sctp->sctp_lastack_rxd = tsn;
2150 	if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd))
2151 		sctp->sctp_adv_pap = sctp->sctp_lastack_rxd;
2152 	ASSERT(sctp->sctp_xmit_head || sctp->sctp_unacked == 0);
2153 
2154 	return (cumack_forward);
2155 }
2156 
2157 static int
2158 sctp_set_frwnd(sctp_t *sctp, uint32_t frwnd)
2159 {
2160 	uint32_t orwnd;
2161 
2162 	if (sctp->sctp_unacked > frwnd) {
2163 		sctp->sctp_frwnd = 0;
2164 		return (0);
2165 	}
2166 	orwnd = sctp->sctp_frwnd;
2167 	sctp->sctp_frwnd = frwnd - sctp->sctp_unacked;
2168 	if (orwnd < sctp->sctp_frwnd) {
2169 		return (1);
2170 	} else {
2171 		return (0);
2172 	}
2173 }
2174 
2175 /*
2176  * For un-ordered messages.
2177  * Walk the sctp->sctp_uo_frag list and remove any fragments with TSN
2178  * less than/equal to ftsn. Fragments for un-ordered messages are
2179  * strictly in sequence (w.r.t TSN).
2180  */
2181 static int
2182 sctp_ftsn_check_uo_frag(sctp_t *sctp, uint32_t ftsn)
2183 {
2184 	mblk_t		*hmp;
2185 	mblk_t		*hmp_next;
2186 	sctp_data_hdr_t	*dc;
2187 	int		dlen = 0;
2188 
2189 	hmp = sctp->sctp_uo_frags;
2190 	while (hmp != NULL) {
2191 		hmp_next = hmp->b_next;
2192 		dc = (sctp_data_hdr_t *)hmp->b_rptr;
2193 		if (SEQ_GT(ntohl(dc->sdh_tsn), ftsn))
2194 			return (dlen);
2195 		sctp->sctp_uo_frags = hmp_next;
2196 		if (hmp_next != NULL)
2197 			hmp_next->b_prev = NULL;
2198 		hmp->b_next = NULL;
2199 		dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2200 		freeb(hmp);
2201 		hmp = hmp_next;
2202 	}
2203 	return (dlen);
2204 }
2205 
2206 /*
2207  * For ordered messages.
2208  * Check for existing fragments for an sid-ssn pair reported as abandoned,
2209  * hence will not receive, in the Forward TSN. If there are fragments, then
2210  * we just nuke them. If and when Partial Delivery API is supported, we
2211  * would need to send a notification to the upper layer about this.
2212  */
2213 static int
2214 sctp_ftsn_check_frag(sctp_t *sctp, uint16_t ssn, sctp_instr_t *sip)
2215 {
2216 	sctp_reass_t	*srp;
2217 	mblk_t		*hmp;
2218 	mblk_t		*dmp;
2219 	mblk_t		*hmp_next;
2220 	sctp_data_hdr_t	*dc;
2221 	int		dlen = 0;
2222 
2223 	hmp = sip->istr_reass;
2224 	while (hmp != NULL) {
2225 		hmp_next = hmp->b_next;
2226 		srp = (sctp_reass_t *)DB_BASE(hmp);
2227 		if (SSN_GT(srp->sr_ssn, ssn))
2228 			return (dlen);
2229 		/*
2230 		 * If we had sent part of this message up, send a partial
2231 		 * delivery event. Since this is ordered delivery, we should
2232 		 * have sent partial message only for the next in sequence,
2233 		 * hence the ASSERT. See comments in sctp_data_chunk() for
2234 		 * trypartial.
2235 		 */
2236 		if (srp->sr_partial_delivered) {
2237 			if (srp->sr_ssn != sip->nextseq)
2238 				cmn_err(CE_WARN, "sctp partial"
2239 				    " delivery notify, sctp 0x%p"
2240 				    " sip = 0x%p ssn != nextseq"
2241 				    " ssn 0x%x nextseq 0x%x",
2242 				    (void *)sctp, (void *)sip,
2243 				    srp->sr_ssn, sip->nextseq);
2244 			ASSERT(sip->nextseq == srp->sr_ssn);
2245 			sctp_partial_delivery_event(sctp);
2246 		}
2247 		/* Take it out of the reass queue */
2248 		sip->istr_reass = hmp_next;
2249 		if (hmp_next != NULL)
2250 			hmp_next->b_prev = NULL;
2251 		hmp->b_next = NULL;
2252 		ASSERT(hmp->b_prev == NULL);
2253 		dmp = hmp;
2254 		ASSERT(DB_TYPE(hmp) == M_CTL);
2255 		dmp = hmp->b_cont;
2256 		hmp->b_cont = NULL;
2257 		freeb(hmp);
2258 		hmp = dmp;
2259 		while (dmp != NULL) {
2260 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2261 			dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2262 			dmp = dmp->b_cont;
2263 		}
2264 		freemsg(hmp);
2265 		hmp = hmp_next;
2266 	}
2267 	return (dlen);
2268 }
2269 
2270 /*
2271  * Update sctp_ftsn to the cumulative TSN from the Forward TSN chunk. Remove
2272  * any SACK gaps less than the newly updated sctp_ftsn. Walk through the
2273  * sid-ssn pair in the Forward TSN and for each, clean the fragment list
2274  * for this pair, if needed, and check if we can deliver subsequent
2275  * messages, if any, from the instream queue (that were waiting for this
2276  * sid-ssn message to show up). Once we are done try to update the SACK
2277  * info. We could get a duplicate Forward TSN, in which case just send
2278  * a SACK. If any of the sid values in the Forward TSN is invalid,
2279  * send back an "Invalid Stream Identifier" error and continue processing
2280  * the rest.
2281  */
2282 static void
2283 sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
2284     ip_pkt_t *ipp, ip_recv_attr_t *ira)
2285 {
2286 	uint32_t	*ftsn = (uint32_t *)(ch + 1);
2287 	ftsn_entry_t	*ftsn_entry;
2288 	sctp_instr_t	*instr;
2289 	boolean_t	can_deliver = B_TRUE;
2290 	size_t		dlen;
2291 	int		flen;
2292 	mblk_t		*dmp;
2293 	mblk_t		*pmp;
2294 	sctp_data_hdr_t	*dc;
2295 	ssize_t		remaining;
2296 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2297 
2298 	*ftsn = ntohl(*ftsn);
2299 	remaining =  ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn);
2300 
2301 	if (SCTP_IS_DETACHED(sctp)) {
2302 		SCTPS_BUMP_MIB(sctps, sctpInClosed);
2303 		can_deliver = B_FALSE;
2304 	}
2305 	/*
2306 	 * un-ordered messages don't have SID-SSN pair entries, we check
2307 	 * for any fragments (for un-ordered message) to be discarded using
2308 	 * the cumulative FTSN.
2309 	 */
2310 	flen = sctp_ftsn_check_uo_frag(sctp, *ftsn);
2311 	if (flen > 0) {
2312 		ASSERT(sctp->sctp_rxqueued >= flen);
2313 		sctp->sctp_rxqueued -= flen;
2314 	}
2315 	ftsn_entry = (ftsn_entry_t *)(ftsn + 1);
2316 	while (remaining >= sizeof (*ftsn_entry)) {
2317 		ftsn_entry->ftsn_sid = ntohs(ftsn_entry->ftsn_sid);
2318 		ftsn_entry->ftsn_ssn = ntohs(ftsn_entry->ftsn_ssn);
2319 		if (ftsn_entry->ftsn_sid >= sctp->sctp_num_istr) {
2320 			sctp_bsc_t	inval_parm;
2321 
2322 			/* Will populate the CAUSE block in the ERROR chunk. */
2323 			inval_parm.bsc_sid = htons(ftsn_entry->ftsn_sid);
2324 			/* RESERVED, ignored at the receiving end */
2325 			inval_parm.bsc_pad = 0;
2326 
2327 			sctp_add_err(sctp, SCTP_ERR_BAD_SID,
2328 			    (void *)&inval_parm, sizeof (sctp_bsc_t), fp);
2329 			ftsn_entry++;
2330 			remaining -= sizeof (*ftsn_entry);
2331 			continue;
2332 		}
2333 		instr = &sctp->sctp_instr[ftsn_entry->ftsn_sid];
2334 		flen = sctp_ftsn_check_frag(sctp, ftsn_entry->ftsn_ssn, instr);
2335 		/* Indicates frags were nuked, update rxqueued */
2336 		if (flen > 0) {
2337 			ASSERT(sctp->sctp_rxqueued >= flen);
2338 			sctp->sctp_rxqueued -= flen;
2339 		}
2340 		/*
2341 		 * It is possible to receive an FTSN chunk with SSN smaller
2342 		 * than then nextseq if this chunk is a retransmission because
2343 		 * of incomplete processing when it was first processed.
2344 		 */
2345 		if (SSN_GE(ftsn_entry->ftsn_ssn, instr->nextseq))
2346 			instr->nextseq = ftsn_entry->ftsn_ssn + 1;
2347 		while (instr->istr_nmsgs > 0) {
2348 			mblk_t	*next;
2349 
2350 			dmp = (mblk_t *)instr->istr_msgs;
2351 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2352 			if (ntohs(dc->sdh_ssn) != instr->nextseq)
2353 				break;
2354 
2355 			next = dmp->b_next;
2356 			dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
2357 			for (pmp = dmp->b_cont; pmp != NULL;
2358 			    pmp = pmp->b_cont) {
2359 				dlen += MBLKL(pmp);
2360 			}
2361 			if (can_deliver) {
2362 				int error;
2363 
2364 				dmp->b_rptr = (uchar_t *)(dc + 1);
2365 				dmp->b_next = NULL;
2366 				ASSERT(dmp->b_prev == NULL);
2367 				if (sctp_input_add_ancillary(sctp,
2368 				    &dmp, dc, fp, ipp, ira) == 0) {
2369 					sctp->sctp_rxqueued -= dlen;
2370 					/*
2371 					 * Override b_flag for SCTP sockfs
2372 					 * internal use
2373 					 */
2374 
2375 					dmp->b_flag = 0;
2376 					if (sctp->sctp_flowctrld) {
2377 						sctp->sctp_rwnd -= dlen;
2378 						if (sctp->sctp_rwnd < 0)
2379 							sctp->sctp_rwnd = 0;
2380 					}
2381 					if (sctp->sctp_ulp_recv(
2382 					    sctp->sctp_ulpd, dmp, msgdsize(dmp),
2383 					    0, &error, NULL) <= 0) {
2384 						sctp->sctp_flowctrld = B_TRUE;
2385 					}
2386 				} else {
2387 					/*
2388 					 * We will resume processing when
2389 					 * the FTSN chunk is re-xmitted.
2390 					 */
2391 					dmp->b_rptr = (uchar_t *)dc;
2392 					dmp->b_next = next;
2393 					dprint(0,
2394 					    ("FTSN dequeuing %u failed\n",
2395 					    ntohs(dc->sdh_ssn)));
2396 					return;
2397 				}
2398 			} else {
2399 				sctp->sctp_rxqueued -= dlen;
2400 				ASSERT(dmp->b_prev == NULL);
2401 				dmp->b_next = NULL;
2402 				freemsg(dmp);
2403 			}
2404 			instr->istr_nmsgs--;
2405 			instr->nextseq++;
2406 			sctp->sctp_istr_nmsgs--;
2407 			if (next != NULL)
2408 				next->b_prev = NULL;
2409 			instr->istr_msgs = next;
2410 		}
2411 		ftsn_entry++;
2412 		remaining -= sizeof (*ftsn_entry);
2413 	}
2414 	/* Duplicate FTSN */
2415 	if (*ftsn <= (sctp->sctp_ftsn - 1)) {
2416 		sctp->sctp_force_sack = 1;
2417 		return;
2418 	}
2419 	/* Advance cum TSN to that reported in the Forward TSN chunk */
2420 	sctp->sctp_ftsn = *ftsn + 1;
2421 
2422 	/* Remove all the SACK gaps before the new cum TSN */
2423 	if (sctp->sctp_sack_info != NULL) {
2424 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2425 		    &sctp->sctp_sack_gaps);
2426 	}
2427 	/*
2428 	 * If there are gap reports pending, check if advancing
2429 	 * the ftsn here closes a gap. If so, we can advance
2430 	 * ftsn to the end of the set.
2431 	 * If ftsn has moved forward, maybe we can remove gap reports.
2432 	 */
2433 	if (sctp->sctp_sack_info != NULL &&
2434 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
2435 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
2436 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2437 		    &sctp->sctp_sack_gaps);
2438 	}
2439 }
2440 
2441 /*
2442  * When we have processed a SACK we check to see if we can advance the
2443  * cumulative TSN if there are abandoned chunks immediately following
2444  * the updated cumulative TSN. If there are, we attempt to send a
2445  * Forward TSN chunk.
2446  */
2447 static void
2448 sctp_check_abandoned_data(sctp_t *sctp, sctp_faddr_t *fp)
2449 {
2450 	mblk_t		*meta = sctp->sctp_xmit_head;
2451 	mblk_t		*mp;
2452 	mblk_t		*nmp;
2453 	uint32_t	seglen;
2454 	uint32_t	adv_pap = sctp->sctp_adv_pap;
2455 
2456 	/*
2457 	 * We only check in the first meta since otherwise we can't
2458 	 * advance the cumulative ack point. We just look for chunks
2459 	 * marked for retransmission, else we might prematurely
2460 	 * send an FTSN for a sent, but unacked, chunk.
2461 	 */
2462 	for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2463 		if (!SCTP_CHUNK_ISSENT(mp))
2464 			return;
2465 		if (SCTP_CHUNK_WANT_REXMIT(mp))
2466 			break;
2467 	}
2468 	if (mp == NULL)
2469 		return;
2470 	sctp_check_adv_ack_pt(sctp, meta, mp);
2471 	if (SEQ_GT(sctp->sctp_adv_pap, adv_pap)) {
2472 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
2473 		if (nmp == NULL) {
2474 			sctp->sctp_adv_pap = adv_pap;
2475 			if (!fp->sf_timer_running)
2476 				SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2477 			return;
2478 		}
2479 		sctp_set_iplen(sctp, nmp, fp->sf_ixa);
2480 		(void) conn_ip_output(nmp, fp->sf_ixa);
2481 		BUMP_LOCAL(sctp->sctp_opkts);
2482 		if (!fp->sf_timer_running)
2483 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2484 	}
2485 }
2486 
2487 /*
2488  * The processing here follows the same logic in sctp_got_sack(), the reason
2489  * we do this separately is because, usually, gap blocks are ordered and
2490  * we can process it in sctp_got_sack(). However if they aren't we would
2491  * need to do some additional non-optimal stuff when we start processing the
2492  * unordered gaps. To that effect sctp_got_sack() does the processing in the
2493  * simple case and this does the same in the more involved case.
2494  */
2495 static uint32_t
2496 sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf,
2497     int num_gaps, mblk_t *umphead, mblk_t *mphead, int *trysend,
2498     boolean_t *fast_recovery, uint32_t fr_xtsn)
2499 {
2500 	uint32_t		xtsn;
2501 	uint32_t		gapstart = 0;
2502 	uint32_t		gapend = 0;
2503 	int			gapcnt;
2504 	uint16_t		chunklen;
2505 	sctp_data_hdr_t		*sdc;
2506 	int			gstart;
2507 	mblk_t			*ump = umphead;
2508 	mblk_t			*mp = mphead;
2509 	sctp_faddr_t		*fp;
2510 	uint32_t		acked = 0;
2511 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2512 
2513 	/*
2514 	 * gstart tracks the last (in the order of TSN) gapstart that
2515 	 * we process in this SACK gaps walk.
2516 	 */
2517 	gstart = ctsn;
2518 
2519 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2520 	xtsn = ntohl(sdc->sdh_tsn);
2521 	for (gapcnt = 0; gapcnt < num_gaps; gapcnt++, ssf++) {
2522 		if (gapstart != 0) {
2523 			/*
2524 			 * If we have reached the end of the transmit list or
2525 			 * hit an unsent chunk or encountered an unordered gap
2526 			 * block start from the ctsn again.
2527 			 */
2528 			if (ump == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2529 			    SEQ_LT(ctsn + ntohs(ssf->ssf_start), xtsn)) {
2530 				ump = umphead;
2531 				mp = mphead;
2532 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
2533 				xtsn = ntohl(sdc->sdh_tsn);
2534 			}
2535 		}
2536 
2537 		gapstart = ctsn + ntohs(ssf->ssf_start);
2538 		gapend = ctsn + ntohs(ssf->ssf_end);
2539 
2540 		/*
2541 		 * Sanity checks:
2542 		 *
2543 		 * 1. SACK for TSN we have not sent - ABORT
2544 		 * 2. Invalid or spurious gaps, ignore all gaps
2545 		 */
2546 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2547 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2548 			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2549 			*trysend = -1;
2550 			return (acked);
2551 		} else if (SEQ_LT(gapend, gapstart) ||
2552 		    SEQ_LEQ(gapstart, ctsn)) {
2553 			break;
2554 		}
2555 		/*
2556 		 * The xtsn can be the TSN processed for the last gap
2557 		 * (gapend) or it could be the cumulative TSN. We continue
2558 		 * with the last xtsn as long as the gaps are ordered, when
2559 		 * we hit an unordered gap, we re-start from the cumulative
2560 		 * TSN. For the first gap it is always the cumulative TSN.
2561 		 */
2562 		while (xtsn != gapstart) {
2563 			/*
2564 			 * We can't reliably check for reneged chunks
2565 			 * when walking the unordered list, so we don't.
2566 			 * In case the peer reneges then we will end up
2567 			 * sending the reneged chunk via timeout.
2568 			 */
2569 			mp = mp->b_next;
2570 			if (mp == NULL) {
2571 				ump = ump->b_next;
2572 				/*
2573 				 * ump can't be NULL because of the sanity
2574 				 * check above.
2575 				 */
2576 				ASSERT(ump != NULL);
2577 				mp = ump->b_cont;
2578 			}
2579 			/*
2580 			 * mp can't be unsent because of the sanity check
2581 			 * above.
2582 			 */
2583 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2584 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2585 			xtsn = ntohl(sdc->sdh_tsn);
2586 		}
2587 		/*
2588 		 * Now that we have found the chunk with TSN == 'gapstart',
2589 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2590 		 * All intermediate chunks will be marked ACKED, if they
2591 		 * haven't already been.
2592 		 */
2593 		while (SEQ_LEQ(xtsn, gapend)) {
2594 			/*
2595 			 * SACKed
2596 			 */
2597 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2598 			if (!SCTP_CHUNK_ISACKED(mp)) {
2599 				SCTP_CHUNK_ACKED(mp);
2600 
2601 				fp = SCTP_CHUNK_DEST(mp);
2602 				chunklen = ntohs(sdc->sdh_len);
2603 				ASSERT(fp->sf_suna >= chunklen);
2604 				fp->sf_suna -= chunklen;
2605 				if (fp->sf_suna == 0) {
2606 					/* All outstanding data acked. */
2607 					fp->sf_pba = 0;
2608 					SCTP_FADDR_TIMER_STOP(fp);
2609 				}
2610 				fp->sf_acked += chunklen;
2611 				acked += chunklen;
2612 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
2613 				ASSERT(sctp->sctp_unacked >= 0);
2614 			}
2615 			/*
2616 			 * Move to the next message in the transmit list
2617 			 * if we are done with all the chunks from the current
2618 			 * message. Note, it is possible to hit the end of the
2619 			 * transmit list here, i.e. if we have already completed
2620 			 * processing the gap block.
2621 			 */
2622 			mp = mp->b_next;
2623 			if (mp == NULL) {
2624 				ump = ump->b_next;
2625 				if (ump == NULL) {
2626 					ASSERT(xtsn == gapend);
2627 					break;
2628 				}
2629 				mp = ump->b_cont;
2630 			}
2631 			/*
2632 			 * Likewise, we can hit an unsent chunk once we have
2633 			 * completed processing the gap block.
2634 			 */
2635 			if (!SCTP_CHUNK_ISSENT(mp)) {
2636 				ASSERT(xtsn == gapend);
2637 				break;
2638 			}
2639 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2640 			xtsn = ntohl(sdc->sdh_tsn);
2641 		}
2642 		/*
2643 		 * We keep track of the last gap we successfully processed
2644 		 * so that we can terminate the walk below for incrementing
2645 		 * the SACK count.
2646 		 */
2647 		if (SEQ_LT(gstart, gapstart))
2648 			gstart = gapstart;
2649 	}
2650 	/*
2651 	 * Check if have incremented the SACK count for all unacked TSNs in
2652 	 * sctp_got_sack(), if so we are done.
2653 	 */
2654 	if (SEQ_LEQ(gstart, fr_xtsn))
2655 		return (acked);
2656 
2657 	ump = umphead;
2658 	mp = mphead;
2659 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2660 	xtsn = ntohl(sdc->sdh_tsn);
2661 	while (SEQ_LT(xtsn, gstart)) {
2662 		/*
2663 		 * We have incremented SACK count for TSNs less than fr_tsn
2664 		 * in sctp_got_sack(), so don't increment them again here.
2665 		 */
2666 		if (SEQ_GT(xtsn, fr_xtsn) && !SCTP_CHUNK_ISACKED(mp)) {
2667 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2668 			if (SCTP_CHUNK_SACKCNT(mp) ==
2669 			    sctps->sctps_fast_rxt_thresh) {
2670 				SCTP_CHUNK_REXMIT(sctp, mp);
2671 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2672 				*trysend = 1;
2673 				if (!*fast_recovery) {
2674 					/*
2675 					 * Entering fast recovery.
2676 					 */
2677 					fp = SCTP_CHUNK_DEST(mp);
2678 					fp->sf_ssthresh = fp->sf_cwnd / 2;
2679 					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2680 						fp->sf_ssthresh =
2681 						    2 * fp->sf_pmss;
2682 					}
2683 					fp->sf_cwnd = fp->sf_ssthresh;
2684 					fp->sf_pba = 0;
2685 					sctp->sctp_recovery_tsn =
2686 					    sctp->sctp_ltsn - 1;
2687 					*fast_recovery = B_TRUE;
2688 				}
2689 			}
2690 		}
2691 		mp = mp->b_next;
2692 		if (mp == NULL) {
2693 			ump = ump->b_next;
2694 			/* We can't get to the end of the transmit list here */
2695 			ASSERT(ump != NULL);
2696 			mp = ump->b_cont;
2697 		}
2698 		/* We can't hit an unsent chunk here */
2699 		ASSERT(SCTP_CHUNK_ISSENT(mp));
2700 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
2701 		xtsn = ntohl(sdc->sdh_tsn);
2702 	}
2703 	return (acked);
2704 }
2705 
2706 static int
2707 sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch)
2708 {
2709 	sctp_sack_chunk_t	*sc;
2710 	sctp_data_hdr_t		*sdc;
2711 	sctp_sack_frag_t	*ssf;
2712 	mblk_t			*ump;
2713 	mblk_t			*mp;
2714 	mblk_t			*mp1;
2715 	uint32_t		cumtsn;
2716 	uint32_t		xtsn;
2717 	uint32_t		gapstart = 0;
2718 	uint32_t		gapend = 0;
2719 	uint32_t		acked = 0;
2720 	uint16_t		chunklen;
2721 	sctp_faddr_t		*fp;
2722 	int			num_gaps;
2723 	int			trysend = 0;
2724 	int			i;
2725 	boolean_t		fast_recovery = B_FALSE;
2726 	boolean_t		cumack_forward = B_FALSE;
2727 	boolean_t		fwd_tsn = B_FALSE;
2728 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2729 
2730 	BUMP_LOCAL(sctp->sctp_ibchunks);
2731 	BUMP_LOCAL(sctp->sctp_isacks);
2732 	chunklen = ntohs(sch->sch_len);
2733 	if (chunklen < (sizeof (*sch) + sizeof (*sc)))
2734 		return (0);
2735 
2736 	sc = (sctp_sack_chunk_t *)(sch + 1);
2737 	cumtsn = ntohl(sc->ssc_cumtsn);
2738 
2739 	dprint(2, ("got sack cumtsn %x -> %x\n", sctp->sctp_lastack_rxd,
2740 	    cumtsn));
2741 
2742 	/* out of order */
2743 	if (SEQ_LT(cumtsn, sctp->sctp_lastack_rxd))
2744 		return (0);
2745 
2746 	if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) {
2747 		SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2748 		/* Send an ABORT */
2749 		return (-1);
2750 	}
2751 
2752 	/*
2753 	 * Cwnd only done when not in fast recovery mode.
2754 	 */
2755 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn))
2756 		fast_recovery = B_TRUE;
2757 
2758 	/*
2759 	 * .. and if the cum TSN is not moving ahead on account Forward TSN
2760 	 */
2761 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_adv_pap))
2762 		fwd_tsn = B_TRUE;
2763 
2764 	if (cumtsn == sctp->sctp_lastack_rxd &&
2765 	    (sctp->sctp_xmit_unacked == NULL ||
2766 	    !SCTP_CHUNK_ABANDONED(sctp->sctp_xmit_unacked))) {
2767 		if (sctp->sctp_xmit_unacked != NULL)
2768 			mp = sctp->sctp_xmit_unacked;
2769 		else if (sctp->sctp_xmit_head != NULL)
2770 			mp = sctp->sctp_xmit_head->b_cont;
2771 		else
2772 			mp = NULL;
2773 		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2774 		/*
2775 		 * If we were doing a zero win probe and the win
2776 		 * has now opened to at least MSS, re-transmit the
2777 		 * zero win probe via sctp_rexmit_packet().
2778 		 */
2779 		if (mp != NULL && sctp->sctp_zero_win_probe &&
2780 		    ntohl(sc->ssc_a_rwnd) >= sctp->sctp_current->sf_pmss) {
2781 			mblk_t	*pkt;
2782 			uint_t	pkt_len;
2783 			mblk_t	*mp1 = mp;
2784 			mblk_t	*meta = sctp->sctp_xmit_head;
2785 
2786 			/*
2787 			 * Reset the RTO since we have been backing-off
2788 			 * to send the ZWP.
2789 			 */
2790 			fp = sctp->sctp_current;
2791 			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2792 			SCTP_MAX_RTO(sctp, fp);
2793 			/* Resend the ZWP */
2794 			pkt = sctp_rexmit_packet(sctp, &meta, &mp1, fp,
2795 			    &pkt_len);
2796 			if (pkt == NULL) {
2797 				SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2798 				return (0);
2799 			}
2800 			ASSERT(pkt_len <= fp->sf_pmss);
2801 			sctp->sctp_zero_win_probe = B_FALSE;
2802 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2803 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2804 			sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2805 			(void) conn_ip_output(pkt, fp->sf_ixa);
2806 			BUMP_LOCAL(sctp->sctp_opkts);
2807 		}
2808 	} else {
2809 		if (sctp->sctp_zero_win_probe) {
2810 			/*
2811 			 * Reset the RTO since we have been backing-off
2812 			 * to send the ZWP.
2813 			 */
2814 			fp = sctp->sctp_current;
2815 			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2816 			SCTP_MAX_RTO(sctp, fp);
2817 			sctp->sctp_zero_win_probe = B_FALSE;
2818 			/* This is probably not required */
2819 			if (!sctp->sctp_rexmitting) {
2820 				sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2821 				sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2822 			}
2823 		}
2824 		acked = sctp_cumack(sctp, cumtsn, &mp);
2825 		sctp->sctp_xmit_unacked = mp;
2826 		if (acked > 0) {
2827 			trysend = 1;
2828 			cumack_forward = B_TRUE;
2829 			if (fwd_tsn && SEQ_GEQ(sctp->sctp_lastack_rxd,
2830 			    sctp->sctp_adv_pap)) {
2831 				cumack_forward = B_FALSE;
2832 			}
2833 		}
2834 	}
2835 	num_gaps = ntohs(sc->ssc_numfrags);
2836 	UPDATE_LOCAL(sctp->sctp_gapcnt, num_gaps);
2837 	if (num_gaps == 0 || mp == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2838 	    chunklen < (sizeof (*sch) + sizeof (*sc) +
2839 	    num_gaps * sizeof (*ssf))) {
2840 		goto ret;
2841 	}
2842 #ifdef	DEBUG
2843 	/*
2844 	 * Since we delete any message that has been acked completely,
2845 	 * the unacked chunk must belong to sctp_xmit_head (as
2846 	 * we don't have a back pointer from the mp to the meta data
2847 	 * we do this).
2848 	 */
2849 	{
2850 		mblk_t	*mp2 = sctp->sctp_xmit_head->b_cont;
2851 
2852 		while (mp2 != NULL) {
2853 			if (mp2 == mp)
2854 				break;
2855 			mp2 = mp2->b_next;
2856 		}
2857 		ASSERT(mp2 != NULL);
2858 	}
2859 #endif
2860 	ump = sctp->sctp_xmit_head;
2861 
2862 	/*
2863 	 * Just remember where we started from, in case we need to call
2864 	 * sctp_process_uo_gaps() if the gap blocks are unordered.
2865 	 */
2866 	mp1 = mp;
2867 
2868 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2869 	xtsn = ntohl(sdc->sdh_tsn);
2870 	ASSERT(xtsn == cumtsn + 1);
2871 
2872 	/*
2873 	 * Go through SACK gaps. They are ordered based on start TSN.
2874 	 */
2875 	ssf = (sctp_sack_frag_t *)(sc + 1);
2876 	for (i = 0; i < num_gaps; i++, ssf++) {
2877 		if (gapstart != 0) {
2878 			/* check for unordered gap */
2879 			if (SEQ_LEQ(cumtsn + ntohs(ssf->ssf_start), gapstart)) {
2880 				acked += sctp_process_uo_gaps(sctp,
2881 				    cumtsn, ssf, num_gaps - i,
2882 				    sctp->sctp_xmit_head, mp1,
2883 				    &trysend, &fast_recovery, gapstart);
2884 				if (trysend < 0) {
2885 					SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2886 					return (-1);
2887 				}
2888 				break;
2889 			}
2890 		}
2891 		gapstart = cumtsn + ntohs(ssf->ssf_start);
2892 		gapend = cumtsn + ntohs(ssf->ssf_end);
2893 
2894 		/*
2895 		 * Sanity checks:
2896 		 *
2897 		 * 1. SACK for TSN we have not sent - ABORT
2898 		 * 2. Invalid or spurious gaps, ignore all gaps
2899 		 */
2900 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2901 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2902 			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2903 			return (-1);
2904 		} else if (SEQ_LT(gapend, gapstart) ||
2905 		    SEQ_LEQ(gapstart, cumtsn)) {
2906 			break;
2907 		}
2908 		/*
2909 		 * Let's start at the current TSN (for the 1st gap we start
2910 		 * from the cumulative TSN, for subsequent ones we start from
2911 		 * where the previous gapend was found - second while loop
2912 		 * below) and walk the transmit list till we find the TSN
2913 		 * corresponding to gapstart. All the unacked chunks till we
2914 		 * get to the chunk with TSN == gapstart will have their
2915 		 * SACKCNT incremented by 1. Note since the gap blocks are
2916 		 * ordered, we won't be incrementing the SACKCNT for an
2917 		 * unacked chunk by more than one while processing the gap
2918 		 * blocks. If the SACKCNT for any unacked chunk exceeds
2919 		 * the fast retransmit threshold, we will fast retransmit
2920 		 * after processing all the gap blocks.
2921 		 */
2922 		ASSERT(SEQ_LEQ(xtsn, gapstart));
2923 		while (xtsn != gapstart) {
2924 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2925 			if (SCTP_CHUNK_SACKCNT(mp) ==
2926 			    sctps->sctps_fast_rxt_thresh) {
2927 				SCTP_CHUNK_REXMIT(sctp, mp);
2928 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2929 				trysend = 1;
2930 				if (!fast_recovery) {
2931 					/*
2932 					 * Entering fast recovery.
2933 					 */
2934 					fp = SCTP_CHUNK_DEST(mp);
2935 					fp->sf_ssthresh = fp->sf_cwnd / 2;
2936 					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2937 						fp->sf_ssthresh =
2938 						    2 * fp->sf_pmss;
2939 					}
2940 					fp->sf_cwnd = fp->sf_ssthresh;
2941 					fp->sf_pba = 0;
2942 					sctp->sctp_recovery_tsn =
2943 					    sctp->sctp_ltsn - 1;
2944 					fast_recovery = B_TRUE;
2945 				}
2946 			}
2947 
2948 			/*
2949 			 * Peer may have reneged on this chunk, so un-sack
2950 			 * it now. If the peer did renege, we need to
2951 			 * readjust unacked.
2952 			 */
2953 			if (SCTP_CHUNK_ISACKED(mp)) {
2954 				chunklen = ntohs(sdc->sdh_len);
2955 				fp = SCTP_CHUNK_DEST(mp);
2956 				fp->sf_suna += chunklen;
2957 				sctp->sctp_unacked += chunklen - sizeof (*sdc);
2958 				SCTP_CHUNK_CLEAR_ACKED(sctp, mp);
2959 				if (!fp->sf_timer_running) {
2960 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2961 					    fp->sf_rto);
2962 				}
2963 			}
2964 
2965 			mp = mp->b_next;
2966 			if (mp == NULL) {
2967 				ump = ump->b_next;
2968 				/*
2969 				 * ump can't be NULL given the sanity check
2970 				 * above.  But if it is NULL, it means that
2971 				 * there is a data corruption.  We'd better
2972 				 * panic.
2973 				 */
2974 				if (ump == NULL) {
2975 					panic("Memory corruption detected: gap "
2976 					    "start TSN 0x%x missing from the "
2977 					    "xmit list: %p", gapstart,
2978 					    (void *)sctp);
2979 				}
2980 				mp = ump->b_cont;
2981 			}
2982 			/*
2983 			 * mp can't be unsent given the sanity check above.
2984 			 */
2985 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2986 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2987 			xtsn = ntohl(sdc->sdh_tsn);
2988 		}
2989 		/*
2990 		 * Now that we have found the chunk with TSN == 'gapstart',
2991 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2992 		 * All intermediate chunks will be marked ACKED, if they
2993 		 * haven't already been.
2994 		 */
2995 		while (SEQ_LEQ(xtsn, gapend)) {
2996 			/*
2997 			 * SACKed
2998 			 */
2999 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
3000 			if (!SCTP_CHUNK_ISACKED(mp)) {
3001 				SCTP_CHUNK_ACKED(mp);
3002 
3003 				fp = SCTP_CHUNK_DEST(mp);
3004 				chunklen = ntohs(sdc->sdh_len);
3005 				ASSERT(fp->sf_suna >= chunklen);
3006 				fp->sf_suna -= chunklen;
3007 				if (fp->sf_suna == 0) {
3008 					/* All outstanding data acked. */
3009 					fp->sf_pba = 0;
3010 					SCTP_FADDR_TIMER_STOP(fp);
3011 				}
3012 				fp->sf_acked += chunklen;
3013 				acked += chunklen;
3014 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
3015 				ASSERT(sctp->sctp_unacked >= 0);
3016 			}
3017 			/* Go to the next chunk of the current message */
3018 			mp = mp->b_next;
3019 			/*
3020 			 * Move to the next message in the transmit list
3021 			 * if we are done with all the chunks from the current
3022 			 * message. Note, it is possible to hit the end of the
3023 			 * transmit list here, i.e. if we have already completed
3024 			 * processing the gap block.  But the TSN must be equal
3025 			 * to the gapend because of the above sanity check.
3026 			 * If it is not equal, it means that some data is
3027 			 * missing.
3028 			 * Also, note that we break here, which means we
3029 			 * continue processing gap blocks, if any. In case of
3030 			 * ordered gap blocks there can't be any following
3031 			 * this (if there is it will fail the sanity check
3032 			 * above). In case of un-ordered gap blocks we will
3033 			 * switch to sctp_process_uo_gaps().  In either case
3034 			 * it should be fine to continue with NULL ump/mp,
3035 			 * but we just reset it to xmit_head.
3036 			 */
3037 			if (mp == NULL) {
3038 				ump = ump->b_next;
3039 				if (ump == NULL) {
3040 					if (xtsn != gapend) {
3041 						panic("Memory corruption "
3042 						    "detected: gap end TSN "
3043 						    "0x%x missing from the "
3044 						    "xmit list: %p", gapend,
3045 						    (void *)sctp);
3046 					}
3047 					ump = sctp->sctp_xmit_head;
3048 					mp = mp1;
3049 					sdc = (sctp_data_hdr_t *)mp->b_rptr;
3050 					xtsn = ntohl(sdc->sdh_tsn);
3051 					break;
3052 				}
3053 				mp = ump->b_cont;
3054 			}
3055 			/*
3056 			 * Likewise, we could hit an unsent chunk once we have
3057 			 * completed processing the gap block. Again, it is
3058 			 * fine to continue processing gap blocks with mp
3059 			 * pointing to the unsent chunk, because if there
3060 			 * are more ordered gap blocks, they will fail the
3061 			 * sanity check, and if there are un-ordered gap blocks,
3062 			 * we will continue processing in sctp_process_uo_gaps()
3063 			 * We just reset the mp to the one we started with.
3064 			 */
3065 			if (!SCTP_CHUNK_ISSENT(mp)) {
3066 				ASSERT(xtsn == gapend);
3067 				ump = sctp->sctp_xmit_head;
3068 				mp = mp1;
3069 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
3070 				xtsn = ntohl(sdc->sdh_tsn);
3071 				break;
3072 			}
3073 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
3074 			xtsn = ntohl(sdc->sdh_tsn);
3075 		}
3076 	}
3077 	if (sctp->sctp_prsctp_aware)
3078 		sctp_check_abandoned_data(sctp, sctp->sctp_current);
3079 	if (sctp->sctp_chk_fast_rexmit)
3080 		sctp_fast_rexmit(sctp);
3081 ret:
3082 	trysend += sctp_set_frwnd(sctp, ntohl(sc->ssc_a_rwnd));
3083 
3084 	/*
3085 	 * If receive window is closed while there is unsent data,
3086 	 * set a timer for doing zero window probes.
3087 	 */
3088 	if (sctp->sctp_frwnd == 0 && sctp->sctp_unacked == 0 &&
3089 	    sctp->sctp_unsent != 0) {
3090 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
3091 		    sctp->sctp_current->sf_rto);
3092 	}
3093 
3094 	/*
3095 	 * Set cwnd for all destinations.
3096 	 * Congestion window gets increased only when cumulative
3097 	 * TSN moves forward, we're not in fast recovery, and
3098 	 * cwnd has been fully utilized (almost fully, need to allow
3099 	 * some leeway due to non-MSS sized messages).
3100 	 */
3101 	if (sctp->sctp_current->sf_acked == acked) {
3102 		/*
3103 		 * Fast-path, only data sent to sctp_current got acked.
3104 		 */
3105 		fp = sctp->sctp_current;
3106 		if (cumack_forward && !fast_recovery &&
3107 		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3108 			if (fp->sf_cwnd < fp->sf_ssthresh) {
3109 				/*
3110 				 * Slow start
3111 				 */
3112 				if (fp->sf_acked > fp->sf_pmss) {
3113 					fp->sf_cwnd += fp->sf_pmss;
3114 				} else {
3115 					fp->sf_cwnd += fp->sf_acked;
3116 				}
3117 				fp->sf_cwnd = MIN(fp->sf_cwnd,
3118 				    sctp->sctp_cwnd_max);
3119 			} else {
3120 				/*
3121 				 * Congestion avoidance
3122 				 */
3123 				fp->sf_pba += fp->sf_acked;
3124 				if (fp->sf_pba >= fp->sf_cwnd) {
3125 					fp->sf_pba -= fp->sf_cwnd;
3126 					fp->sf_cwnd += fp->sf_pmss;
3127 					fp->sf_cwnd = MIN(fp->sf_cwnd,
3128 					    sctp->sctp_cwnd_max);
3129 				}
3130 			}
3131 		}
3132 		/*
3133 		 * Limit the burst of transmitted data segments.
3134 		 */
3135 		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3136 		    fp->sf_cwnd) {
3137 			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3138 			    fp->sf_pmss;
3139 		}
3140 		fp->sf_acked = 0;
3141 		goto check_ss_rxmit;
3142 	}
3143 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
3144 		if (cumack_forward && fp->sf_acked && !fast_recovery &&
3145 		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3146 			if (fp->sf_cwnd < fp->sf_ssthresh) {
3147 				if (fp->sf_acked > fp->sf_pmss) {
3148 					fp->sf_cwnd += fp->sf_pmss;
3149 				} else {
3150 					fp->sf_cwnd += fp->sf_acked;
3151 				}
3152 				fp->sf_cwnd = MIN(fp->sf_cwnd,
3153 				    sctp->sctp_cwnd_max);
3154 			} else {
3155 				fp->sf_pba += fp->sf_acked;
3156 				if (fp->sf_pba >= fp->sf_cwnd) {
3157 					fp->sf_pba -= fp->sf_cwnd;
3158 					fp->sf_cwnd += fp->sf_pmss;
3159 					fp->sf_cwnd = MIN(fp->sf_cwnd,
3160 					    sctp->sctp_cwnd_max);
3161 				}
3162 			}
3163 		}
3164 		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3165 		    fp->sf_cwnd) {
3166 			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3167 			    fp->sf_pmss;
3168 		}
3169 		fp->sf_acked = 0;
3170 	}
3171 	fp = sctp->sctp_current;
3172 check_ss_rxmit:
3173 	/*
3174 	 * If this is a SACK following a timeout, check if there are
3175 	 * still unacked chunks (sent before the timeout) that we can
3176 	 * send.
3177 	 */
3178 	if (sctp->sctp_rexmitting) {
3179 		if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_rxt_maxtsn)) {
3180 			/*
3181 			 * As we are in retransmission phase, we may get a
3182 			 * SACK which indicates some new chunks are received
3183 			 * but cum_tsn does not advance.  During this
3184 			 * phase, the other side advances cum_tsn only because
3185 			 * it receives our retransmitted chunks.  Only
3186 			 * this signals that some chunks are still
3187 			 * missing.
3188 			 */
3189 			if (cumack_forward) {
3190 				fp->sf_rxt_unacked -= acked;
3191 				sctp_ss_rexmit(sctp);
3192 			}
3193 		} else {
3194 			sctp->sctp_rexmitting = B_FALSE;
3195 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
3196 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
3197 			fp->sf_rxt_unacked = 0;
3198 		}
3199 	}
3200 	return (trysend);
3201 }
3202 
3203 /*
3204  * Returns 0 if the caller should stop processing any more chunks,
3205  * 1 if the caller should skip this chunk and continue processing.
3206  */
3207 static int
3208 sctp_strange_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp)
3209 {
3210 	size_t len;
3211 
3212 	BUMP_LOCAL(sctp->sctp_ibchunks);
3213 	/* check top two bits for action required */
3214 	if (ch->sch_id & 0x40) {	/* also matches 0xc0 */
3215 		len = ntohs(ch->sch_len);
3216 		sctp_add_err(sctp, SCTP_ERR_UNREC_CHUNK, ch, len, fp);
3217 
3218 		if ((ch->sch_id & 0xc0) == 0xc0) {
3219 			/* skip and continue */
3220 			return (1);
3221 		} else {
3222 			/* stop processing */
3223 			return (0);
3224 		}
3225 	}
3226 	if (ch->sch_id & 0x80) {
3227 		/* skip and continue, no error */
3228 		return (1);
3229 	}
3230 	/* top two bits are clear; stop processing and no error */
3231 	return (0);
3232 }
3233 
3234 /*
3235  * Basic sanity checks on all input chunks and parameters: they must
3236  * be of legitimate size for their purported type, and must follow
3237  * ordering conventions as defined in rfc2960.
3238  *
3239  * Returns 1 if the chunk and all encloded params are legitimate,
3240  * 0 otherwise.
3241  */
3242 /*ARGSUSED*/
3243 static int
3244 sctp_check_input(sctp_t *sctp, sctp_chunk_hdr_t *ch, ssize_t len, int first)
3245 {
3246 	sctp_parm_hdr_t	*ph;
3247 	void		*p = NULL;
3248 	ssize_t		clen;
3249 	uint16_t	ch_len;
3250 
3251 	ch_len = ntohs(ch->sch_len);
3252 	if (ch_len > len) {
3253 		return (0);
3254 	}
3255 
3256 	switch (ch->sch_id) {
3257 	case CHUNK_DATA:
3258 		if (ch_len < sizeof (sctp_data_hdr_t)) {
3259 			return (0);
3260 		}
3261 		return (1);
3262 	case CHUNK_INIT:
3263 	case CHUNK_INIT_ACK:
3264 		{
3265 			ssize_t	remlen = len;
3266 
3267 			/*
3268 			 * INIT and INIT-ACK chunks must not be bundled with
3269 			 * any other.
3270 			 */
3271 			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3272 			    (ch_len < (sizeof (*ch) +
3273 			    sizeof (sctp_init_chunk_t)))) {
3274 				return (0);
3275 			}
3276 			/* may have params that need checking */
3277 			p = (char *)(ch + 1) + sizeof (sctp_init_chunk_t);
3278 			clen = ch_len - (sizeof (*ch) +
3279 			    sizeof (sctp_init_chunk_t));
3280 		}
3281 		break;
3282 	case CHUNK_SACK:
3283 		if (ch_len < (sizeof (*ch) + sizeof (sctp_sack_chunk_t))) {
3284 			return (0);
3285 		}
3286 		/* dup and gap reports checked by got_sack() */
3287 		return (1);
3288 	case CHUNK_SHUTDOWN:
3289 		if (ch_len < (sizeof (*ch) + sizeof (uint32_t))) {
3290 			return (0);
3291 		}
3292 		return (1);
3293 	case CHUNK_ABORT:
3294 	case CHUNK_ERROR:
3295 		if (ch_len < sizeof (*ch)) {
3296 			return (0);
3297 		}
3298 		/* may have params that need checking */
3299 		p = ch + 1;
3300 		clen = ch_len - sizeof (*ch);
3301 		break;
3302 	case CHUNK_ECNE:
3303 	case CHUNK_CWR:
3304 	case CHUNK_HEARTBEAT:
3305 	case CHUNK_HEARTBEAT_ACK:
3306 	/* Full ASCONF chunk and parameter checks are in asconf.c */
3307 	case CHUNK_ASCONF:
3308 	case CHUNK_ASCONF_ACK:
3309 		if (ch_len < sizeof (*ch)) {
3310 			return (0);
3311 		}
3312 		/* heartbeat data checked by process_heartbeat() */
3313 		return (1);
3314 	case CHUNK_SHUTDOWN_COMPLETE:
3315 		{
3316 			ssize_t remlen = len;
3317 
3318 			/*
3319 			 * SHUTDOWN-COMPLETE chunk must not be bundled with any
3320 			 * other
3321 			 */
3322 			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3323 			    ch_len < sizeof (*ch)) {
3324 				return (0);
3325 			}
3326 		}
3327 		return (1);
3328 	case CHUNK_COOKIE:
3329 	case CHUNK_COOKIE_ACK:
3330 	case CHUNK_SHUTDOWN_ACK:
3331 		if (ch_len < sizeof (*ch) || !first) {
3332 			return (0);
3333 		}
3334 		return (1);
3335 	case CHUNK_FORWARD_TSN:
3336 		if (ch_len < (sizeof (*ch) + sizeof (uint32_t)))
3337 			return (0);
3338 		return (1);
3339 	default:
3340 		return (1);	/* handled by strange_chunk() */
3341 	}
3342 
3343 	/* check and byteorder parameters */
3344 	if (clen <= 0) {
3345 		return (1);
3346 	}
3347 	ASSERT(p != NULL);
3348 
3349 	ph = p;
3350 	while (ph != NULL && clen > 0) {
3351 		ch_len = ntohs(ph->sph_len);
3352 		if (ch_len > len || ch_len < sizeof (*ph)) {
3353 			return (0);
3354 		}
3355 		ph = sctp_next_parm(ph, &clen);
3356 	}
3357 
3358 	/* All OK */
3359 	return (1);
3360 }
3361 
3362 static mblk_t *
3363 sctp_check_in_policy(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3364 {
3365 	boolean_t policy_present;
3366 	ipha_t *ipha;
3367 	ip6_t *ip6h;
3368 	netstack_t	*ns = ipst->ips_netstack;
3369 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3370 
3371 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
3372 		policy_present = ipss->ipsec_inbound_v4_policy_present;
3373 		ipha = (ipha_t *)mp->b_rptr;
3374 		ip6h = NULL;
3375 	} else {
3376 		policy_present = ipss->ipsec_inbound_v6_policy_present;
3377 		ipha = NULL;
3378 		ip6h = (ip6_t *)mp->b_rptr;
3379 	}
3380 
3381 	if (policy_present) {
3382 		/*
3383 		 * The conn_t parameter is NULL because we already know
3384 		 * nobody's home.
3385 		 */
3386 		mp = ipsec_check_global_policy(mp, (conn_t *)NULL,
3387 		    ipha, ip6h, ira, ns);
3388 		if (mp == NULL)
3389 			return (NULL);
3390 	}
3391 	return (mp);
3392 }
3393 
3394 /* Handle out-of-the-blue packets */
3395 void
3396 sctp_ootb_input(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3397 {
3398 	sctp_t			*sctp;
3399 	sctp_chunk_hdr_t	*ch;
3400 	sctp_hdr_t		*sctph;
3401 	in6_addr_t		src, dst;
3402 	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3403 	ssize_t			mlen;
3404 	sctp_stack_t		*sctps;
3405 	boolean_t		secure;
3406 	zoneid_t		zoneid = ira->ira_zoneid;
3407 	uchar_t			*rptr;
3408 
3409 	ASSERT(ira->ira_ill == NULL);
3410 
3411 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
3412 
3413 	sctps = ipst->ips_netstack->netstack_sctp;
3414 
3415 	SCTPS_BUMP_MIB(sctps, sctpOutOfBlue);
3416 	SCTPS_BUMP_MIB(sctps, sctpInSCTPPkts);
3417 
3418 	if (mp->b_cont != NULL) {
3419 		/*
3420 		 * All subsequent code is vastly simplified if it can
3421 		 * assume a single contiguous chunk of data.
3422 		 */
3423 		if (pullupmsg(mp, -1) == 0) {
3424 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3425 			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3426 			freemsg(mp);
3427 			return;
3428 		}
3429 	}
3430 
3431 	rptr = mp->b_rptr;
3432 	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3433 	if (ira->ira_flags & IRAF_IS_IPV4) {
3434 		ipha_t *ipha;
3435 
3436 		ipha = (ipha_t *)rptr;
3437 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3438 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3439 	} else {
3440 		ip6_t *ip6h;
3441 
3442 		ip6h = (ip6_t *)rptr;
3443 		src = ip6h->ip6_src;
3444 		dst = ip6h->ip6_dst;
3445 	}
3446 
3447 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3448 	if ((ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
3449 		dprint(3, ("sctp_ootb_input: invalid packet\n"));
3450 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3451 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3452 		freemsg(mp);
3453 		return;
3454 	}
3455 
3456 	switch (ch->sch_id) {
3457 	case CHUNK_INIT:
3458 		/* no listener; send abort  */
3459 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3460 			return;
3461 		sctp_ootb_send_abort(sctp_init2vtag(ch), 0,
3462 		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3463 		break;
3464 	case CHUNK_INIT_ACK:
3465 		/* check for changed src addr */
3466 		sctp = sctp_addrlist2sctp(mp, sctph, ch, zoneid, sctps);
3467 		if (sctp != NULL) {
3468 			/* success; proceed to normal path */
3469 			mutex_enter(&sctp->sctp_lock);
3470 			if (sctp->sctp_running) {
3471 				sctp_add_recvq(sctp, mp, B_FALSE, ira);
3472 				mutex_exit(&sctp->sctp_lock);
3473 			} else {
3474 				/*
3475 				 * If the source address is changed, we
3476 				 * don't need to worry too much about
3477 				 * out of order processing.  So we don't
3478 				 * check if the recvq is empty or not here.
3479 				 */
3480 				sctp->sctp_running = B_TRUE;
3481 				mutex_exit(&sctp->sctp_lock);
3482 				sctp_input_data(sctp, mp, ira);
3483 				WAKE_SCTP(sctp);
3484 			}
3485 			SCTP_REFRELE(sctp);
3486 			return;
3487 		}
3488 		/* else bogus init ack; drop it */
3489 		break;
3490 	case CHUNK_SHUTDOWN_ACK:
3491 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3492 			return;
3493 		sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3494 		return;
3495 	case CHUNK_ERROR:
3496 	case CHUNK_ABORT:
3497 	case CHUNK_COOKIE_ACK:
3498 	case CHUNK_SHUTDOWN_COMPLETE:
3499 		break;
3500 	default:
3501 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3502 			return;
3503 		sctp_ootb_send_abort(sctph->sh_verf, 0,
3504 		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3505 		break;
3506 	}
3507 	freemsg(mp);
3508 }
3509 
3510 /*
3511  * Handle sctp packets.
3512  * Note that we rele the sctp_t (the caller got a reference on it).
3513  */
3514 void
3515 sctp_input(conn_t *connp, ipha_t *ipha, ip6_t *ip6h, mblk_t *mp,
3516     ip_recv_attr_t *ira)
3517 {
3518 	sctp_t		*sctp = CONN2SCTP(connp);
3519 	boolean_t	secure;
3520 	ill_t		*ill = ira->ira_ill;
3521 	ip_stack_t	*ipst = ill->ill_ipst;
3522 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3523 	iaflags_t	iraflags = ira->ira_flags;
3524 	ill_t		*rill = ira->ira_rill;
3525 
3526 	secure = iraflags & IRAF_IPSEC_SECURE;
3527 
3528 	/*
3529 	 * We check some fields in conn_t without holding a lock.
3530 	 * This should be fine.
3531 	 */
3532 	if (((iraflags & IRAF_IS_IPV4) ?
3533 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
3534 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
3535 	    secure) {
3536 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
3537 		    ip6h, ira);
3538 		if (mp == NULL) {
3539 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3540 			/* Note that mp is NULL */
3541 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3542 			SCTP_REFRELE(sctp);
3543 			return;
3544 		}
3545 	}
3546 
3547 	ira->ira_ill = ira->ira_rill = NULL;
3548 
3549 	mutex_enter(&sctp->sctp_lock);
3550 	if (sctp->sctp_running) {
3551 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
3552 		mutex_exit(&sctp->sctp_lock);
3553 		goto done;
3554 	} else {
3555 		sctp->sctp_running = B_TRUE;
3556 		mutex_exit(&sctp->sctp_lock);
3557 
3558 		mutex_enter(&sctp->sctp_recvq_lock);
3559 		if (sctp->sctp_recvq != NULL) {
3560 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
3561 			mutex_exit(&sctp->sctp_recvq_lock);
3562 			WAKE_SCTP(sctp);
3563 			goto done;
3564 		}
3565 	}
3566 	mutex_exit(&sctp->sctp_recvq_lock);
3567 	if (ira->ira_flags & IRAF_ICMP_ERROR)
3568 		sctp_icmp_error(sctp, mp);
3569 	else
3570 		sctp_input_data(sctp, mp, ira);
3571 	WAKE_SCTP(sctp);
3572 
3573 done:
3574 	SCTP_REFRELE(sctp);
3575 	ira->ira_ill = ill;
3576 	ira->ira_rill = rill;
3577 }
3578 
3579 static void
3580 sctp_process_abort(sctp_t *sctp, sctp_chunk_hdr_t *ch, int err)
3581 {
3582 	sctp_stack_t	*sctps = sctp->sctp_sctps;
3583 
3584 	SCTPS_BUMP_MIB(sctps, sctpAborted);
3585 	BUMP_LOCAL(sctp->sctp_ibchunks);
3586 
3587 	/*
3588 	 * SCTP_COMM_LOST is only sent up if the association is
3589 	 * established (sctp_state >= SCTPS_ESTABLISHED).
3590 	 */
3591 	if (sctp->sctp_state >= SCTPS_ESTABLISHED) {
3592 		sctp_assoc_event(sctp, SCTP_COMM_LOST,
3593 		    ntohs(((sctp_parm_hdr_t *)(ch + 1))->sph_type), ch);
3594 	}
3595 
3596 	sctp_clean_death(sctp, err);
3597 }
3598 
3599 void
3600 sctp_input_data(sctp_t *sctp, mblk_t *mp, ip_recv_attr_t *ira)
3601 {
3602 	sctp_chunk_hdr_t	*ch;
3603 	ssize_t			mlen;
3604 	int			gotdata;
3605 	int			trysend;
3606 	sctp_faddr_t		*fp;
3607 	sctp_init_chunk_t	*iack;
3608 	uint32_t		tsn;
3609 	sctp_data_hdr_t		*sdc;
3610 	ip_pkt_t		ipp;
3611 	in6_addr_t		src;
3612 	in6_addr_t		dst;
3613 	uint_t			ifindex;
3614 	sctp_hdr_t		*sctph;
3615 	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3616 	mblk_t			*dups = NULL;
3617 	int			recv_adaptation;
3618 	boolean_t		wake_eager = B_FALSE;
3619 	in6_addr_t		peer_src;
3620 	int64_t			now;
3621 	sctp_stack_t		*sctps = sctp->sctp_sctps;
3622 	ip_stack_t		*ipst = sctps->sctps_netstack->netstack_ip;
3623 	boolean_t		hb_already = B_FALSE;
3624 	cred_t			*cr;
3625 	pid_t			cpid;
3626 	uchar_t			*rptr;
3627 	conn_t			*connp = sctp->sctp_connp;
3628 	boolean_t		shutdown_ack_needed = B_FALSE;
3629 
3630 	ASSERT(DB_TYPE(mp) == M_DATA);
3631 	ASSERT(ira->ira_ill == NULL);
3632 
3633 	if (mp->b_cont != NULL) {
3634 		/*
3635 		 * All subsequent code is vastly simplified if it can
3636 		 * assume a single contiguous chunk of data.
3637 		 */
3638 		if (pullupmsg(mp, -1) == 0) {
3639 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3640 			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3641 			freemsg(mp);
3642 			return;
3643 		}
3644 	}
3645 
3646 	BUMP_LOCAL(sctp->sctp_ipkts);
3647 	ifindex = ira->ira_ruifindex;
3648 
3649 	rptr = mp->b_rptr;
3650 
3651 	ipp.ipp_fields = 0;
3652 	if (connp->conn_recv_ancillary.crb_all != 0) {
3653 		/*
3654 		 * Record packet information in the ip_pkt_t
3655 		 */
3656 		if (ira->ira_flags & IRAF_IS_IPV4) {
3657 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipp,
3658 			    B_FALSE);
3659 		} else {
3660 			uint8_t nexthdrp;
3661 
3662 			/*
3663 			 * IPv6 packets can only be received by applications
3664 			 * that are prepared to receive IPv6 addresses.
3665 			 * The IP fanout must ensure this.
3666 			 */
3667 			ASSERT(connp->conn_family == AF_INET6);
3668 
3669 			(void) ip_find_hdr_v6(mp, (ip6_t *)rptr, B_TRUE, &ipp,
3670 			    &nexthdrp);
3671 			ASSERT(nexthdrp == IPPROTO_SCTP);
3672 
3673 			/* Could have caused a pullup? */
3674 			rptr = mp->b_rptr;
3675 		}
3676 	}
3677 
3678 	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3679 
3680 	if (ira->ira_flags & IRAF_IS_IPV4) {
3681 		ipha_t *ipha;
3682 
3683 		ipha = (ipha_t *)rptr;
3684 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3685 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3686 	} else {
3687 		ip6_t *ip6h;
3688 
3689 		ip6h = (ip6_t *)rptr;
3690 		src = ip6h->ip6_src;
3691 		dst = ip6h->ip6_dst;
3692 	}
3693 
3694 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3695 	ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen);
3696 	if (ch == NULL) {
3697 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3698 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3699 		freemsg(mp);
3700 		return;
3701 	}
3702 
3703 	if (!sctp_check_input(sctp, ch, mlen, 1)) {
3704 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3705 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3706 		goto done;
3707 	}
3708 	/*
3709 	 * Check verfication tag (special handling for INIT,
3710 	 * COOKIE, SHUTDOWN_COMPLETE and SHUTDOWN_ACK chunks).
3711 	 * ABORTs are handled in the chunk processing loop, since
3712 	 * may not appear first. All other checked chunks must
3713 	 * appear first, or will have been dropped by check_input().
3714 	 */
3715 	switch (ch->sch_id) {
3716 	case CHUNK_INIT:
3717 		if (sctph->sh_verf != 0) {
3718 			/* drop it */
3719 			goto done;
3720 		}
3721 		break;
3722 	case CHUNK_SHUTDOWN_COMPLETE:
3723 		if (sctph->sh_verf == sctp->sctp_lvtag)
3724 			break;
3725 		if (sctph->sh_verf == sctp->sctp_fvtag &&
3726 		    SCTP_GET_TBIT(ch)) {
3727 			break;
3728 		}
3729 		/* else drop it */
3730 		goto done;
3731 	case CHUNK_ABORT:
3732 	case CHUNK_COOKIE:
3733 		/* handled below */
3734 		break;
3735 	case CHUNK_SHUTDOWN_ACK:
3736 		if (sctp->sctp_state > SCTPS_BOUND &&
3737 		    sctp->sctp_state < SCTPS_ESTABLISHED) {
3738 			/* treat as OOTB */
3739 			sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3740 			return;
3741 		}
3742 		/* else fallthru */
3743 	default:
3744 		/*
3745 		 * All other packets must have a valid
3746 		 * verification tag, however if this is a
3747 		 * listener, we use a refined version of
3748 		 * out-of-the-blue logic.
3749 		 */
3750 		if (sctph->sh_verf != sctp->sctp_lvtag &&
3751 		    sctp->sctp_state != SCTPS_LISTEN) {
3752 			/* drop it */
3753 			goto done;
3754 		}
3755 		break;
3756 	}
3757 
3758 	/* Have a valid sctp for this packet */
3759 	fp = sctp_lookup_faddr(sctp, &src);
3760 	dprint(2, ("sctp_dispatch_rput: mp=%p fp=%p sctp=%p\n", (void *)mp,
3761 	    (void *)fp, (void *)sctp));
3762 
3763 	gotdata = 0;
3764 	trysend = 0;
3765 
3766 	now = LBOLT_FASTPATH64;
3767 	/* Process the chunks */
3768 	do {
3769 		dprint(3, ("sctp_dispatch_rput: state=%d, chunk id=%d\n",
3770 		    sctp->sctp_state, (int)(ch->sch_id)));
3771 
3772 		if (ch->sch_id == CHUNK_ABORT) {
3773 			if (sctph->sh_verf != sctp->sctp_lvtag &&
3774 			    sctph->sh_verf != sctp->sctp_fvtag) {
3775 				/* drop it */
3776 				goto done;
3777 			}
3778 		}
3779 
3780 		switch (sctp->sctp_state) {
3781 
3782 		case SCTPS_ESTABLISHED:
3783 		case SCTPS_SHUTDOWN_PENDING:
3784 		case SCTPS_SHUTDOWN_SENT:
3785 			switch (ch->sch_id) {
3786 			case CHUNK_DATA:
3787 				/* 0-length data chunks are not allowed */
3788 				if (ntohs(ch->sch_len) == sizeof (*sdc)) {
3789 					sdc = (sctp_data_hdr_t *)ch;
3790 					tsn = sdc->sdh_tsn;
3791 					sctp_send_abort(sctp, sctp->sctp_fvtag,
3792 					    SCTP_ERR_NO_USR_DATA, (char *)&tsn,
3793 					    sizeof (tsn), mp, 0, B_FALSE, ira);
3794 					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3795 					    0, NULL);
3796 					sctp_clean_death(sctp, ECONNABORTED);
3797 					goto done;
3798 				}
3799 
3800 				ASSERT(fp != NULL);
3801 				sctp->sctp_lastdata = fp;
3802 				sctp_data_chunk(sctp, ch, mp, &dups, fp,
3803 				    &ipp, ira);
3804 				gotdata = 1;
3805 				/* Restart shutdown timer if shutting down */
3806 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3807 					/*
3808 					 * If we have exceeded our max
3809 					 * wait bound for waiting for a
3810 					 * shutdown ack from the peer,
3811 					 * abort the association.
3812 					 */
3813 					if (sctps->sctps_shutack_wait_bound !=
3814 					    0 &&
3815 					    TICK_TO_MSEC(now -
3816 					    sctp->sctp_out_time) >
3817 					    sctps->sctps_shutack_wait_bound) {
3818 						sctp_send_abort(sctp,
3819 						    sctp->sctp_fvtag, 0, NULL,
3820 						    0, mp, 0, B_FALSE, ira);
3821 						sctp_assoc_event(sctp,
3822 						    SCTP_COMM_LOST, 0, NULL);
3823 						sctp_clean_death(sctp,
3824 						    ECONNABORTED);
3825 						goto done;
3826 					}
3827 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
3828 					    fp->sf_rto);
3829 				}
3830 				break;
3831 			case CHUNK_SACK:
3832 				ASSERT(fp != NULL);
3833 				/*
3834 				 * Peer is real and alive if it can ack our
3835 				 * data.
3836 				 */
3837 				sctp_faddr_alive(sctp, fp);
3838 				trysend = sctp_got_sack(sctp, ch);
3839 				if (trysend < 0) {
3840 					sctp_send_abort(sctp, sctph->sh_verf,
3841 					    0, NULL, 0, mp, 0, B_FALSE, ira);
3842 					sctp_assoc_event(sctp,
3843 					    SCTP_COMM_LOST, 0, NULL);
3844 					sctp_clean_death(sctp,
3845 					    ECONNABORTED);
3846 					goto done;
3847 				}
3848 				break;
3849 			case CHUNK_HEARTBEAT:
3850 				if (!hb_already) {
3851 					/*
3852 					 * In any one packet, there should
3853 					 * only be one heartbeat chunk.  So
3854 					 * we should not process more than
3855 					 * once.
3856 					 */
3857 					sctp_return_heartbeat(sctp, ch, mp);
3858 					hb_already = B_TRUE;
3859 				}
3860 				break;
3861 			case CHUNK_HEARTBEAT_ACK:
3862 				sctp_process_heartbeat(sctp, ch);
3863 				break;
3864 			case CHUNK_SHUTDOWN:
3865 				sctp_shutdown_event(sctp);
3866 				trysend = sctp_shutdown_received(sctp, ch,
3867 				    B_FALSE, B_FALSE, fp);
3868 				BUMP_LOCAL(sctp->sctp_ibchunks);
3869 				break;
3870 			case CHUNK_SHUTDOWN_ACK:
3871 				BUMP_LOCAL(sctp->sctp_ibchunks);
3872 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3873 					sctp_shutdown_complete(sctp);
3874 					SCTPS_BUMP_MIB(sctps, sctpShutdowns);
3875 					sctp_assoc_event(sctp,
3876 					    SCTP_SHUTDOWN_COMP, 0, NULL);
3877 					sctp_clean_death(sctp, 0);
3878 					goto done;
3879 				}
3880 				break;
3881 			case CHUNK_ABORT: {
3882 				sctp_saddr_ipif_t *sp;
3883 
3884 				/* Ignore if delete pending */
3885 				sp = sctp_saddr_lookup(sctp, &dst, 0);
3886 				ASSERT(sp != NULL);
3887 				if (sp->saddr_ipif_delete_pending) {
3888 					BUMP_LOCAL(sctp->sctp_ibchunks);
3889 					break;
3890 				}
3891 
3892 				sctp_process_abort(sctp, ch, ECONNRESET);
3893 				goto done;
3894 			}
3895 			case CHUNK_INIT:
3896 				sctp_send_initack(sctp, sctph, ch, mp, ira);
3897 				break;
3898 			case CHUNK_COOKIE:
3899 				if (sctp_process_cookie(sctp, ch, mp, &iack,
3900 				    sctph, &recv_adaptation, NULL, ira) != -1) {
3901 					sctp_send_cookie_ack(sctp);
3902 					sctp_assoc_event(sctp, SCTP_RESTART,
3903 					    0, NULL);
3904 					if (recv_adaptation) {
3905 						sctp->sctp_recv_adaptation = 1;
3906 						sctp_adaptation_event(sctp);
3907 					}
3908 				} else {
3909 					SCTPS_BUMP_MIB(sctps,
3910 					    sctpInInvalidCookie);
3911 				}
3912 				break;
3913 			case CHUNK_ERROR: {
3914 				int error;
3915 
3916 				BUMP_LOCAL(sctp->sctp_ibchunks);
3917 				error = sctp_handle_error(sctp, sctph, ch, mp,
3918 				    ira);
3919 				if (error != 0) {
3920 					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3921 					    0, NULL);
3922 					sctp_clean_death(sctp, error);
3923 					goto done;
3924 				}
3925 				break;
3926 			}
3927 			case CHUNK_ASCONF:
3928 				ASSERT(fp != NULL);
3929 				sctp_input_asconf(sctp, ch, fp);
3930 				BUMP_LOCAL(sctp->sctp_ibchunks);
3931 				break;
3932 			case CHUNK_ASCONF_ACK:
3933 				ASSERT(fp != NULL);
3934 				sctp_faddr_alive(sctp, fp);
3935 				sctp_input_asconf_ack(sctp, ch, fp);
3936 				BUMP_LOCAL(sctp->sctp_ibchunks);
3937 				break;
3938 			case CHUNK_FORWARD_TSN:
3939 				ASSERT(fp != NULL);
3940 				sctp->sctp_lastdata = fp;
3941 				sctp_process_forward_tsn(sctp, ch, fp,
3942 				    &ipp, ira);
3943 				gotdata = 1;
3944 				BUMP_LOCAL(sctp->sctp_ibchunks);
3945 				break;
3946 			default:
3947 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
3948 					goto nomorechunks;
3949 				} /* else skip and continue processing */
3950 				break;
3951 			}
3952 			break;
3953 
3954 		case SCTPS_LISTEN:
3955 			switch (ch->sch_id) {
3956 			case CHUNK_INIT:
3957 				sctp_send_initack(sctp, sctph, ch, mp, ira);
3958 				break;
3959 			case CHUNK_COOKIE: {
3960 				sctp_t *eager;
3961 
3962 				if (sctp_process_cookie(sctp, ch, mp, &iack,
3963 				    sctph, &recv_adaptation, &peer_src,
3964 				    ira) == -1) {
3965 					SCTPS_BUMP_MIB(sctps,
3966 					    sctpInInvalidCookie);
3967 					goto done;
3968 				}
3969 
3970 				/*
3971 				 * The cookie is good; ensure that
3972 				 * the peer used the verification
3973 				 * tag from the init ack in the header.
3974 				 */
3975 				if (iack->sic_inittag != sctph->sh_verf)
3976 					goto done;
3977 
3978 				eager = sctp_conn_request(sctp, mp, ifindex,
3979 				    ip_hdr_len, iack, ira);
3980 				if (eager == NULL) {
3981 					sctp_send_abort(sctp, sctph->sh_verf,
3982 					    SCTP_ERR_NO_RESOURCES, NULL, 0, mp,
3983 					    0, B_FALSE, ira);
3984 					goto done;
3985 				}
3986 
3987 				/*
3988 				 * If there were extra chunks
3989 				 * bundled with the cookie,
3990 				 * they must be processed
3991 				 * on the eager's queue. We
3992 				 * accomplish this by refeeding
3993 				 * the whole packet into the
3994 				 * state machine on the right
3995 				 * q. The packet (mp) gets
3996 				 * there via the eager's
3997 				 * cookie_mp field (overloaded
3998 				 * with the active open role).
3999 				 * This is picked up when
4000 				 * processing the null bind
4001 				 * request put on the eager's
4002 				 * q by sctp_accept(). We must
4003 				 * first revert the cookie
4004 				 * chunk's length field to network
4005 				 * byteorder so it can be
4006 				 * properly reprocessed on the
4007 				 * eager's queue.
4008 				 */
4009 				SCTPS_BUMP_MIB(sctps, sctpPassiveEstab);
4010 				if (mlen > ntohs(ch->sch_len)) {
4011 					eager->sctp_cookie_mp = dupb(mp);
4012 					/*
4013 					 * If no mem, just let
4014 					 * the peer retransmit.
4015 					 */
4016 				}
4017 				sctp_assoc_event(eager, SCTP_COMM_UP, 0, NULL);
4018 				if (recv_adaptation) {
4019 					eager->sctp_recv_adaptation = 1;
4020 					eager->sctp_rx_adaptation_code =
4021 					    sctp->sctp_rx_adaptation_code;
4022 					sctp_adaptation_event(eager);
4023 				}
4024 
4025 				eager->sctp_active = now;
4026 				sctp_send_cookie_ack(eager);
4027 
4028 				wake_eager = B_TRUE;
4029 
4030 				/*
4031 				 * Process rest of the chunks with eager.
4032 				 */
4033 				sctp = eager;
4034 				fp = sctp_lookup_faddr(sctp, &peer_src);
4035 				/*
4036 				 * Confirm peer's original source.  fp can
4037 				 * only be NULL if peer does not use the
4038 				 * original source as one of its addresses...
4039 				 */
4040 				if (fp == NULL)
4041 					fp = sctp_lookup_faddr(sctp, &src);
4042 				else
4043 					sctp_faddr_alive(sctp, fp);
4044 
4045 				/*
4046 				 * Validate the peer addresses.  It also starts
4047 				 * the heartbeat timer.
4048 				 */
4049 				sctp_validate_peer(sctp);
4050 				break;
4051 			}
4052 			/* Anything else is considered out-of-the-blue */
4053 			case CHUNK_ERROR:
4054 			case CHUNK_ABORT:
4055 			case CHUNK_COOKIE_ACK:
4056 			case CHUNK_SHUTDOWN_COMPLETE:
4057 				BUMP_LOCAL(sctp->sctp_ibchunks);
4058 				goto done;
4059 			default:
4060 				BUMP_LOCAL(sctp->sctp_ibchunks);
4061 				sctp_send_abort(sctp, sctph->sh_verf, 0, NULL,
4062 				    0, mp, 0, B_TRUE, ira);
4063 				goto done;
4064 			}
4065 			break;
4066 
4067 		case SCTPS_COOKIE_WAIT:
4068 			switch (ch->sch_id) {
4069 			case CHUNK_INIT_ACK:
4070 				sctp_stop_faddr_timers(sctp);
4071 				sctp_faddr_alive(sctp, sctp->sctp_current);
4072 				sctp_send_cookie_echo(sctp, ch, mp, ira);
4073 				BUMP_LOCAL(sctp->sctp_ibchunks);
4074 				break;
4075 			case CHUNK_ABORT:
4076 				sctp_process_abort(sctp, ch, ECONNREFUSED);
4077 				goto done;
4078 			case CHUNK_INIT:
4079 				sctp_send_initack(sctp, sctph, ch, mp, ira);
4080 				break;
4081 			case CHUNK_COOKIE:
4082 				cr = ira->ira_cred;
4083 				cpid = ira->ira_cpid;
4084 
4085 				if (sctp_process_cookie(sctp, ch, mp, &iack,
4086 				    sctph, &recv_adaptation, NULL, ira) == -1) {
4087 					SCTPS_BUMP_MIB(sctps,
4088 					    sctpInInvalidCookie);
4089 					break;
4090 				}
4091 				sctp_send_cookie_ack(sctp);
4092 				sctp_stop_faddr_timers(sctp);
4093 				if (!SCTP_IS_DETACHED(sctp)) {
4094 					sctp->sctp_ulp_connected(
4095 					    sctp->sctp_ulpd, 0, cr, cpid);
4096 					sctp_set_ulp_prop(sctp);
4097 
4098 				}
4099 				SCTP_ASSOC_EST(sctps, sctp);
4100 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4101 				if (sctp->sctp_cookie_mp) {
4102 					freemsg(sctp->sctp_cookie_mp);
4103 					sctp->sctp_cookie_mp = NULL;
4104 				}
4105 
4106 				/* Validate the peer addresses. */
4107 				sctp->sctp_active = now;
4108 				sctp_validate_peer(sctp);
4109 
4110 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4111 				if (recv_adaptation) {
4112 					sctp->sctp_recv_adaptation = 1;
4113 					sctp_adaptation_event(sctp);
4114 				}
4115 				/* Try sending queued data, or ASCONFs */
4116 				trysend = 1;
4117 				break;
4118 			default:
4119 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4120 					goto nomorechunks;
4121 				} /* else skip and continue processing */
4122 				break;
4123 			}
4124 			break;
4125 
4126 		case SCTPS_COOKIE_ECHOED:
4127 			switch (ch->sch_id) {
4128 			case CHUNK_COOKIE_ACK:
4129 				cr = ira->ira_cred;
4130 				cpid = ira->ira_cpid;
4131 
4132 				if (!SCTP_IS_DETACHED(sctp)) {
4133 					sctp->sctp_ulp_connected(
4134 					    sctp->sctp_ulpd, 0, cr, cpid);
4135 					sctp_set_ulp_prop(sctp);
4136 				}
4137 				if (sctp->sctp_unacked == 0)
4138 					sctp_stop_faddr_timers(sctp);
4139 				SCTP_ASSOC_EST(sctps, sctp);
4140 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4141 				BUMP_LOCAL(sctp->sctp_ibchunks);
4142 				if (sctp->sctp_cookie_mp) {
4143 					freemsg(sctp->sctp_cookie_mp);
4144 					sctp->sctp_cookie_mp = NULL;
4145 				}
4146 				sctp_faddr_alive(sctp, fp);
4147 				/* Validate the peer addresses. */
4148 				sctp->sctp_active = now;
4149 				sctp_validate_peer(sctp);
4150 
4151 				/* Try sending queued data, or ASCONFs */
4152 				trysend = 1;
4153 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4154 				sctp_adaptation_event(sctp);
4155 				break;
4156 			case CHUNK_ABORT:
4157 				sctp_process_abort(sctp, ch, ECONNREFUSED);
4158 				goto done;
4159 			case CHUNK_COOKIE:
4160 				cr = ira->ira_cred;
4161 				cpid = ira->ira_cpid;
4162 
4163 				if (sctp_process_cookie(sctp, ch, mp, &iack,
4164 				    sctph, &recv_adaptation, NULL, ira) == -1) {
4165 					SCTPS_BUMP_MIB(sctps,
4166 					    sctpInInvalidCookie);
4167 					break;
4168 				}
4169 				sctp_send_cookie_ack(sctp);
4170 
4171 				if (!SCTP_IS_DETACHED(sctp)) {
4172 					sctp->sctp_ulp_connected(
4173 					    sctp->sctp_ulpd, 0, cr, cpid);
4174 					sctp_set_ulp_prop(sctp);
4175 
4176 				}
4177 				if (sctp->sctp_unacked == 0)
4178 					sctp_stop_faddr_timers(sctp);
4179 				SCTP_ASSOC_EST(sctps, sctp);
4180 				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4181 				if (sctp->sctp_cookie_mp) {
4182 					freemsg(sctp->sctp_cookie_mp);
4183 					sctp->sctp_cookie_mp = NULL;
4184 				}
4185 				/* Validate the peer addresses. */
4186 				sctp->sctp_active = now;
4187 				sctp_validate_peer(sctp);
4188 
4189 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4190 				if (recv_adaptation) {
4191 					sctp->sctp_recv_adaptation = 1;
4192 					sctp_adaptation_event(sctp);
4193 				}
4194 				/* Try sending queued data, or ASCONFs */
4195 				trysend = 1;
4196 				break;
4197 			case CHUNK_INIT:
4198 				sctp_send_initack(sctp, sctph, ch, mp, ira);
4199 				break;
4200 			case CHUNK_ERROR: {
4201 				sctp_parm_hdr_t *p;
4202 
4203 				BUMP_LOCAL(sctp->sctp_ibchunks);
4204 				/* check for a stale cookie */
4205 				if (ntohs(ch->sch_len) >=
4206 				    (sizeof (*p) + sizeof (*ch)) +
4207 				    sizeof (uint32_t)) {
4208 
4209 					p = (sctp_parm_hdr_t *)(ch + 1);
4210 					if (p->sph_type ==
4211 					    htons(SCTP_ERR_STALE_COOKIE)) {
4212 						SCTPS_BUMP_MIB(sctps,
4213 						    sctpAborted);
4214 						sctp_error_event(sctp,
4215 						    ch, B_FALSE);
4216 						sctp_assoc_event(sctp,
4217 						    SCTP_COMM_LOST, 0, NULL);
4218 						sctp_clean_death(sctp,
4219 						    ECONNREFUSED);
4220 						goto done;
4221 					}
4222 				}
4223 				break;
4224 			}
4225 			case CHUNK_HEARTBEAT:
4226 				if (!hb_already) {
4227 					sctp_return_heartbeat(sctp, ch, mp);
4228 					hb_already = B_TRUE;
4229 				}
4230 				break;
4231 			default:
4232 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4233 					goto nomorechunks;
4234 				} /* else skip and continue processing */
4235 			} /* switch (ch->sch_id) */
4236 			break;
4237 
4238 		case SCTPS_SHUTDOWN_ACK_SENT:
4239 			switch (ch->sch_id) {
4240 			case CHUNK_ABORT:
4241 				/* Pass gathered wisdom to IP for keeping */
4242 				sctp_update_dce(sctp);
4243 				sctp_process_abort(sctp, ch, 0);
4244 				goto done;
4245 			case CHUNK_SHUTDOWN_COMPLETE:
4246 				BUMP_LOCAL(sctp->sctp_ibchunks);
4247 				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
4248 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4249 				    NULL);
4250 
4251 				/* Pass gathered wisdom to IP for keeping */
4252 				sctp_update_dce(sctp);
4253 				sctp_clean_death(sctp, 0);
4254 				goto done;
4255 			case CHUNK_SHUTDOWN_ACK:
4256 				sctp_shutdown_complete(sctp);
4257 				BUMP_LOCAL(sctp->sctp_ibchunks);
4258 				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
4259 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4260 				    NULL);
4261 				sctp_clean_death(sctp, 0);
4262 				goto done;
4263 			case CHUNK_COOKIE:
4264 				(void) sctp_shutdown_received(sctp, NULL,
4265 				    B_TRUE, B_FALSE, fp);
4266 				BUMP_LOCAL(sctp->sctp_ibchunks);
4267 				break;
4268 			case CHUNK_HEARTBEAT:
4269 				if (!hb_already) {
4270 					sctp_return_heartbeat(sctp, ch, mp);
4271 					hb_already = B_TRUE;
4272 				}
4273 				break;
4274 			default:
4275 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4276 					goto nomorechunks;
4277 				} /* else skip and continue processing */
4278 				break;
4279 			}
4280 			break;
4281 
4282 		case SCTPS_SHUTDOWN_RECEIVED:
4283 			switch (ch->sch_id) {
4284 			case CHUNK_SHUTDOWN:
4285 				trysend = sctp_shutdown_received(sctp, ch,
4286 				    B_FALSE, B_FALSE, fp);
4287 				/*
4288 				 * shutdown_ack_needed may have been set as
4289 				 * mentioned in the case CHUNK_SACK below.
4290 				 * If sctp_shutdown_received() above found
4291 				 * the xmit queue empty the SHUTDOWN ACK chunk
4292 				 * has already been sent (or scheduled to be
4293 				 * sent on the timer) and the SCTP state
4294 				 * changed, so reset shutdown_ack_needed.
4295 				 */
4296 				if (shutdown_ack_needed && (sctp->sctp_state ==
4297 				    SCTPS_SHUTDOWN_ACK_SENT))
4298 					shutdown_ack_needed = B_FALSE;
4299 				break;
4300 			case CHUNK_SACK:
4301 				trysend = sctp_got_sack(sctp, ch);
4302 				if (trysend < 0) {
4303 					sctp_send_abort(sctp, sctph->sh_verf,
4304 					    0, NULL, 0, mp, 0, B_FALSE, ira);
4305 					sctp_assoc_event(sctp,
4306 					    SCTP_COMM_LOST, 0, NULL);
4307 					sctp_clean_death(sctp,
4308 					    ECONNABORTED);
4309 					goto done;
4310 				}
4311 
4312 				/*
4313 				 * All data acknowledgement after a shutdown
4314 				 * should be done with SHUTDOWN chunk.
4315 				 * However some peer SCTP do not conform with
4316 				 * this and can unexpectedly send a SACK chunk.
4317 				 * If all data are acknowledged, set
4318 				 * shutdown_ack_needed here indicating that
4319 				 * SHUTDOWN ACK needs to be sent later by
4320 				 * sctp_send_shutdown_ack().
4321 				 */
4322 				if ((sctp->sctp_xmit_head == NULL) &&
4323 				    (sctp->sctp_xmit_unsent == NULL))
4324 					shutdown_ack_needed = B_TRUE;
4325 				break;
4326 			case CHUNK_ABORT:
4327 				sctp_process_abort(sctp, ch, ECONNRESET);
4328 				goto done;
4329 			case CHUNK_HEARTBEAT:
4330 				if (!hb_already) {
4331 					sctp_return_heartbeat(sctp, ch, mp);
4332 					hb_already = B_TRUE;
4333 				}
4334 				break;
4335 			default:
4336 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4337 					goto nomorechunks;
4338 				} /* else skip and continue processing */
4339 				break;
4340 			}
4341 			break;
4342 
4343 		default:
4344 			/*
4345 			 * The only remaining states are SCTPS_IDLE and
4346 			 * SCTPS_BOUND, and we should not be getting here
4347 			 * for these.
4348 			 */
4349 			ASSERT(0);
4350 		} /* switch (sctp->sctp_state) */
4351 
4352 		ch = sctp_next_chunk(ch, &mlen);
4353 		if (ch != NULL && !sctp_check_input(sctp, ch, mlen, 0))
4354 			goto done;
4355 	} while (ch != NULL);
4356 
4357 	/* Finished processing all chunks in packet */
4358 
4359 nomorechunks:
4360 
4361 	if (shutdown_ack_needed)
4362 		sctp_send_shutdown_ack(sctp, fp, B_FALSE);
4363 
4364 	/* SACK if necessary */
4365 	if (gotdata) {
4366 		boolean_t sack_sent;
4367 
4368 		(sctp->sctp_sack_toggle)++;
4369 		sack_sent = sctp_sack(sctp, dups);
4370 		dups = NULL;
4371 
4372 		/* If a SACK is sent, no need to restart the timer. */
4373 		if (!sack_sent && !sctp->sctp_ack_timer_running) {
4374 			sctp->sctp_ack_timer_running = B_TRUE;
4375 			sctp_timer(sctp, sctp->sctp_ack_mp,
4376 			    MSEC_TO_TICK(sctps->sctps_deferred_ack_interval));
4377 		}
4378 	}
4379 
4380 	if (trysend) {
4381 		sctp_output(sctp, UINT_MAX);
4382 		if (sctp->sctp_cxmit_list != NULL)
4383 			sctp_wput_asconf(sctp, NULL);
4384 	}
4385 	/*
4386 	 * If there is unsent data, make sure a timer is running, check
4387 	 * timer_mp, if sctp_closei_local() ran the timers may be free.
4388 	 */
4389 	if (sctp->sctp_unsent > 0 && !sctp->sctp_current->sf_timer_running &&
4390 	    sctp->sctp_current->sf_timer_mp != NULL) {
4391 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
4392 		    sctp->sctp_current->sf_rto);
4393 	}
4394 
4395 done:
4396 	if (dups != NULL)
4397 		freeb(dups);
4398 	freemsg(mp);
4399 
4400 	if (sctp->sctp_err_chunks != NULL)
4401 		sctp_process_err(sctp);
4402 
4403 	if (wake_eager) {
4404 		/*
4405 		 * sctp points to newly created control block, need to
4406 		 * release it before exiting.
4407 		 */
4408 		WAKE_SCTP(sctp);
4409 	}
4410 }
4411 
4412 /*
4413  * Some amount of data got removed from ULP's receive queue and we can
4414  * push messages up if we are flow controlled before.  Reset the receive
4415  * window to full capacity (conn_rcvbuf) and check if we should send a
4416  * window update.
4417  */
4418 void
4419 sctp_recvd(sctp_t *sctp, int len)
4420 {
4421 	sctp_stack_t	*sctps = sctp->sctp_sctps;
4422 	conn_t		*connp = sctp->sctp_connp;
4423 	boolean_t	send_sack = B_FALSE;
4424 
4425 	ASSERT(sctp != NULL);
4426 	RUN_SCTP(sctp);
4427 
4428 	sctp->sctp_flowctrld = B_FALSE;
4429 	/* This is the amount of data queued in ULP. */
4430 	sctp->sctp_ulp_rxqueued = connp->conn_rcvbuf - len;
4431 
4432 	if (connp->conn_rcvbuf - sctp->sctp_arwnd >= sctp->sctp_mss)
4433 		send_sack = B_TRUE;
4434 	sctp->sctp_rwnd = connp->conn_rcvbuf;
4435 
4436 	if (sctp->sctp_state >= SCTPS_ESTABLISHED && send_sack) {
4437 		sctp->sctp_force_sack = 1;
4438 		SCTPS_BUMP_MIB(sctps, sctpOutWinUpdate);
4439 		(void) sctp_sack(sctp, NULL);
4440 	}
4441 	WAKE_SCTP(sctp);
4442 }
4443