xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_input.c (revision 16e76cdd6e3cfaac7d91c3b0644ee1bc6cf52347)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/socket.h>
34 #include <sys/strsun.h>
35 #include <sys/strsubr.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/ip6.h>
39 #include <netinet/tcp_seq.h>
40 #include <netinet/sctp.h>
41 
42 #include <inet/common.h>
43 #include <inet/ip.h>
44 #include <inet/ip_if.h>
45 #include <inet/ip6.h>
46 #include <inet/mib2.h>
47 #include <inet/ipclassifier.h>
48 #include <inet/ipp_common.h>
49 #include <inet/ipsec_impl.h>
50 #include <inet/sctp_ip.h>
51 
52 #include "sctp_impl.h"
53 #include "sctp_asconf.h"
54 #include "sctp_addr.h"
55 
56 static struct kmem_cache *sctp_kmem_set_cache;
57 
58 /*
59  * PR-SCTP comments.
60  *
61  * When we get a valid Forward TSN chunk, we check the fragment list for this
62  * SSN and preceeding SSNs free all them. Further, if this Forward TSN causes
63  * the next expected SSN to be present in the stream queue, we deliver any
64  * such stranded messages upstream. We also update the SACK info. appropriately.
65  * When checking for advancing the cumulative ack (in sctp_cumack()) we must
66  * check for abandoned chunks and messages. While traversing the tramsmit
67  * list if we come across an abandoned chunk, we can skip the message (i.e.
68  * take it out of the (re)transmit list) since this message, and hence this
69  * chunk, has been marked abandoned by sctp_rexmit(). If we come across an
70  * unsent chunk for a message this now abandoned we need to check if a
71  * Forward TSN needs to be sent, this could be a case where we deferred sending
72  * a Forward TSN in sctp_get_msg_to_send(). Further, after processing a
73  * SACK we check if the Advanced peer ack point can be moved ahead, i.e.
74  * if we can send a Forward TSN via sctp_check_abandoned_data().
75  */
76 void
77 sctp_free_set(sctp_set_t *s)
78 {
79 	sctp_set_t *p;
80 
81 	while (s) {
82 		p = s->next;
83 		kmem_cache_free(sctp_kmem_set_cache, s);
84 		s = p;
85 	}
86 }
87 
88 static void
89 sctp_ack_add(sctp_set_t **head, uint32_t tsn, int *num)
90 {
91 	sctp_set_t *p, *t;
92 
93 	if (head == NULL || num == NULL)
94 		return;
95 
96 	ASSERT(*num >= 0);
97 	ASSERT((*num == 0 && *head == NULL) || (*num > 0 && *head != NULL));
98 
99 	if (*head == NULL) {
100 		*head = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
101 		if (*head == NULL)
102 			return;
103 		(*head)->prev = (*head)->next = NULL;
104 		(*head)->begin = tsn;
105 		(*head)->end = tsn;
106 		*num = 1;
107 		return;
108 	}
109 
110 	ASSERT((*head)->prev == NULL);
111 
112 	/*
113 	 * Handle this special case here so we don't have to check
114 	 * for it each time in the loop.
115 	 */
116 	if (SEQ_LT(tsn + 1, (*head)->begin)) {
117 		/* add a new set, and move the head pointer */
118 		t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
119 		if (t == NULL)
120 			return;
121 		t->next = *head;
122 		t->prev = NULL;
123 		(*head)->prev = t;
124 		t->begin = tsn;
125 		t->end = tsn;
126 		(*num)++;
127 		*head = t;
128 		return;
129 	}
130 
131 	/*
132 	 * We need to handle the following cases, where p points to
133 	 * the current set (as we walk through the loop):
134 	 *
135 	 * 1. tsn is entirely less than p; create a new set before p.
136 	 * 2. tsn borders p from less; coalesce p with tsn.
137 	 * 3. tsn is withing p; do nothing.
138 	 * 4. tsn borders p from greater; coalesce p with tsn.
139 	 * 4a. p may now border p->next from less; if so, coalesce those
140 	 *    two sets.
141 	 * 5. tsn is entirely greater then all sets; add a new set at
142 	 *    the end.
143 	 */
144 	for (p = *head; ; p = p->next) {
145 		if (SEQ_LT(tsn + 1, p->begin)) {
146 			/* 1: add a new set before p. */
147 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
148 			if (t == NULL)
149 				return;
150 			t->next = p;
151 			t->prev = NULL;
152 			t->begin = tsn;
153 			t->end = tsn;
154 			if (p->prev) {
155 				t->prev = p->prev;
156 				p->prev->next = t;
157 			}
158 			p->prev = t;
159 			(*num)++;
160 			return;
161 		}
162 
163 		if ((tsn + 1) == p->begin) {
164 			/* 2: adjust p->begin */
165 			p->begin = tsn;
166 			return;
167 		}
168 
169 		if (SEQ_GEQ(tsn, p->begin) && SEQ_LEQ(tsn, p->end)) {
170 			/* 3; do nothing */
171 			return;
172 		}
173 
174 		if ((p->end + 1) == tsn) {
175 			/* 4; adjust p->end */
176 			p->end = tsn;
177 
178 			if (p->next != NULL && (tsn + 1) == p->next->begin) {
179 				/* 4a: coalesce p and p->next */
180 				t = p->next;
181 				p->end = t->end;
182 				p->next = t->next;
183 				if (t->next != NULL)
184 					t->next->prev = p;
185 				kmem_cache_free(sctp_kmem_set_cache, t);
186 				(*num)--;
187 			}
188 			return;
189 		}
190 
191 		if (p->next == NULL) {
192 			/* 5: add new set at the end */
193 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
194 			if (t == NULL)
195 				return;
196 			t->next = NULL;
197 			t->prev = p;
198 			t->begin = tsn;
199 			t->end = tsn;
200 			p->next = t;
201 			(*num)++;
202 			return;
203 		}
204 
205 		if (SEQ_GT(tsn, p->end + 1))
206 			continue;
207 	}
208 }
209 
210 static void
211 sctp_ack_rem(sctp_set_t **head, uint32_t end, int *num)
212 {
213 	sctp_set_t *p, *t;
214 
215 	if (head == NULL || *head == NULL || num == NULL)
216 		return;
217 
218 	/* Nothing to remove */
219 	if (SEQ_LT(end, (*head)->begin))
220 		return;
221 
222 	/* Find out where to start removing sets */
223 	for (p = *head; p->next; p = p->next) {
224 		if (SEQ_LEQ(end, p->end))
225 			break;
226 	}
227 
228 	if (SEQ_LT(end, p->end) && SEQ_GEQ(end, p->begin)) {
229 		/* adjust p */
230 		p->begin = end + 1;
231 		/* all done */
232 		if (p == *head)
233 			return;
234 	} else if (SEQ_GEQ(end, p->end)) {
235 		/* remove this set too */
236 		p = p->next;
237 	}
238 
239 	/* unlink everything before this set */
240 	t = *head;
241 	*head = p;
242 	if (p != NULL && p->prev != NULL) {
243 		p->prev->next = NULL;
244 		p->prev = NULL;
245 	}
246 
247 	sctp_free_set(t);
248 
249 	/* recount the number of sets */
250 	*num = 0;
251 
252 	for (p = *head; p != NULL; p = p->next)
253 		(*num)++;
254 }
255 
256 void
257 sctp_sets_init()
258 {
259 	sctp_kmem_set_cache = kmem_cache_create("sctp_set_cache",
260 	    sizeof (sctp_set_t), 0, NULL, NULL, NULL, NULL,
261 	    NULL, 0);
262 }
263 
264 void
265 sctp_sets_fini()
266 {
267 	kmem_cache_destroy(sctp_kmem_set_cache);
268 }
269 
270 sctp_chunk_hdr_t *
271 sctp_first_chunk(uchar_t *rptr, ssize_t remaining)
272 {
273 	sctp_chunk_hdr_t *ch;
274 	uint16_t ch_len;
275 
276 	if (remaining < sizeof (*ch)) {
277 		return (NULL);
278 	}
279 
280 	ch = (sctp_chunk_hdr_t *)rptr;
281 	ch_len = ntohs(ch->sch_len);
282 
283 	if (ch_len < sizeof (*ch) || remaining < ch_len) {
284 		return (NULL);
285 	}
286 
287 	return (ch);
288 }
289 
290 sctp_chunk_hdr_t *
291 sctp_next_chunk(sctp_chunk_hdr_t *ch, ssize_t *remaining)
292 {
293 	int pad;
294 	uint16_t ch_len;
295 
296 	if (!ch) {
297 		return (NULL);
298 	}
299 
300 	ch_len = ntohs(ch->sch_len);
301 
302 	if ((pad = ch_len & (SCTP_ALIGN - 1)) != 0) {
303 		pad = SCTP_ALIGN - pad;
304 	}
305 
306 	*remaining -= (ch_len + pad);
307 	ch = (sctp_chunk_hdr_t *)((char *)ch + ch_len + pad);
308 
309 	return (sctp_first_chunk((uchar_t *)ch, *remaining));
310 }
311 
312 /*
313  * Attach ancillary data to a received SCTP segments.
314  * If the source address (fp) is not the primary, send up a
315  * unitdata_ind so recvfrom() can populate the msg_name field.
316  * If ancillary data is also requested, we append it to the
317  * unitdata_req. Otherwise, we just send up an optdata_ind.
318  */
319 static int
320 sctp_input_add_ancillary(sctp_t *sctp, mblk_t **mp, sctp_data_hdr_t *dcp,
321     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
322 {
323 	struct T_unitdata_ind	*tudi;
324 	int			optlen;
325 	int			hdrlen;
326 	uchar_t			*optptr;
327 	struct cmsghdr		*cmsg;
328 	mblk_t			*mp1;
329 	struct sockaddr_in6	sin_buf[1];
330 	struct sockaddr_in6	*sin6;
331 	struct sockaddr_in	*sin4;
332 	crb_t			 addflag;	/* Which pieces to add */
333 	conn_t			*connp = sctp->sctp_connp;
334 
335 	sin4 = NULL;
336 	sin6 = NULL;
337 
338 	optlen = hdrlen = 0;
339 	addflag.crb_all = 0;
340 
341 	/* Figure out address size */
342 	if (connp->conn_family == AF_INET) {
343 		sin4 = (struct sockaddr_in *)sin_buf;
344 		sin4->sin_family = AF_INET;
345 		sin4->sin_port = connp->conn_fport;
346 		IN6_V4MAPPED_TO_IPADDR(&fp->faddr, sin4->sin_addr.s_addr);
347 		hdrlen = sizeof (*tudi) + sizeof (*sin4);
348 	} else {
349 		sin6 = sin_buf;
350 		sin6->sin6_family = AF_INET6;
351 		sin6->sin6_port = connp->conn_fport;
352 		sin6->sin6_addr = fp->faddr;
353 		hdrlen = sizeof (*tudi) + sizeof (*sin6);
354 	}
355 	/* If app asked to receive send / recv info */
356 	if (sctp->sctp_recvsndrcvinfo)
357 		optlen += sizeof (*cmsg) + sizeof (struct sctp_sndrcvinfo);
358 
359 	if (connp->conn_recv_ancillary.crb_all == 0)
360 		goto noancillary;
361 
362 	if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
363 	    ira->ira_ruifindex != sctp->sctp_recvifindex) {
364 		optlen += sizeof (*cmsg) + sizeof (struct in6_pktinfo);
365 		if (hdrlen == 0)
366 			hdrlen = sizeof (struct T_unitdata_ind);
367 		addflag.crb_ip_recvpktinfo = 1;
368 	}
369 	/* If app asked for hoplimit and it has changed ... */
370 	if (connp->conn_recv_ancillary.crb_ipv6_recvhoplimit &&
371 	    ipp->ipp_hoplimit != sctp->sctp_recvhops) {
372 		optlen += sizeof (*cmsg) + sizeof (uint_t);
373 		if (hdrlen == 0)
374 			hdrlen = sizeof (struct T_unitdata_ind);
375 		addflag.crb_ipv6_recvhoplimit = 1;
376 	}
377 	/* If app asked for tclass and it has changed ... */
378 	if (connp->conn_recv_ancillary.crb_ipv6_recvtclass &&
379 	    ipp->ipp_tclass != sctp->sctp_recvtclass) {
380 		optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
381 		if (hdrlen == 0)
382 			hdrlen = sizeof (struct T_unitdata_ind);
383 		addflag.crb_ipv6_recvtclass = 1;
384 	}
385 	/* If app asked for hopbyhop headers and it has changed ... */
386 	if (connp->conn_recv_ancillary.crb_ipv6_recvhopopts &&
387 	    ip_cmpbuf(sctp->sctp_hopopts, sctp->sctp_hopoptslen,
388 	    (ipp->ipp_fields & IPPF_HOPOPTS),
389 	    ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
390 		optlen += sizeof (*cmsg) + ipp->ipp_hopoptslen -
391 		    sctp->sctp_v6label_len;
392 		if (hdrlen == 0)
393 			hdrlen = sizeof (struct T_unitdata_ind);
394 		addflag.crb_ipv6_recvhopopts = 1;
395 		if (!ip_allocbuf((void **)&sctp->sctp_hopopts,
396 		    &sctp->sctp_hopoptslen,
397 		    (ipp->ipp_fields & IPPF_HOPOPTS),
398 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen))
399 			return (-1);
400 	}
401 	/* If app asked for dst headers before routing headers ... */
402 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts &&
403 	    ip_cmpbuf(sctp->sctp_rthdrdstopts, sctp->sctp_rthdrdstoptslen,
404 	    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
405 	    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen)) {
406 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
407 		if (hdrlen == 0)
408 			hdrlen = sizeof (struct T_unitdata_ind);
409 		addflag.crb_ipv6_recvrthdrdstopts = 1;
410 		if (!ip_allocbuf((void **)&sctp->sctp_rthdrdstopts,
411 		    &sctp->sctp_rthdrdstoptslen,
412 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
413 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen))
414 			return (-1);
415 	}
416 	/* If app asked for routing headers and it has changed ... */
417 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdr &&
418 	    ip_cmpbuf(sctp->sctp_rthdr, sctp->sctp_rthdrlen,
419 	    (ipp->ipp_fields & IPPF_RTHDR),
420 	    ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
421 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrlen;
422 		if (hdrlen == 0)
423 			hdrlen = sizeof (struct T_unitdata_ind);
424 		addflag.crb_ipv6_recvrthdr = 1;
425 		if (!ip_allocbuf((void **)&sctp->sctp_rthdr,
426 		    &sctp->sctp_rthdrlen,
427 		    (ipp->ipp_fields & IPPF_RTHDR),
428 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen))
429 			return (-1);
430 	}
431 	/* If app asked for dest headers and it has changed ... */
432 	if (connp->conn_recv_ancillary.crb_ipv6_recvdstopts &&
433 	    ip_cmpbuf(sctp->sctp_dstopts, sctp->sctp_dstoptslen,
434 	    (ipp->ipp_fields & IPPF_DSTOPTS),
435 	    ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
436 		optlen += sizeof (*cmsg) + ipp->ipp_dstoptslen;
437 		if (hdrlen == 0)
438 			hdrlen = sizeof (struct T_unitdata_ind);
439 		addflag.crb_ipv6_recvdstopts = 1;
440 		if (!ip_allocbuf((void **)&sctp->sctp_dstopts,
441 		    &sctp->sctp_dstoptslen,
442 		    (ipp->ipp_fields & IPPF_DSTOPTS),
443 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen))
444 			return (-1);
445 	}
446 noancillary:
447 	/* Nothing to add */
448 	if (hdrlen == 0)
449 		return (-1);
450 
451 	mp1 = allocb(hdrlen + optlen + sizeof (void *), BPRI_MED);
452 	if (mp1 == NULL)
453 		return (-1);
454 	mp1->b_cont = *mp;
455 	*mp = mp1;
456 	mp1->b_rptr += sizeof (void *);  /* pointer worth of padding */
457 	mp1->b_wptr = mp1->b_rptr + hdrlen + optlen;
458 	DB_TYPE(mp1) = M_PROTO;
459 	tudi = (struct T_unitdata_ind *)mp1->b_rptr;
460 	tudi->PRIM_type = T_UNITDATA_IND;
461 	tudi->SRC_length = sin4 ? sizeof (*sin4) : sizeof (*sin6);
462 	tudi->SRC_offset = sizeof (*tudi);
463 	tudi->OPT_offset = sizeof (*tudi) + tudi->SRC_length;
464 	tudi->OPT_length = optlen;
465 	if (sin4) {
466 		bcopy(sin4, tudi + 1, sizeof (*sin4));
467 	} else {
468 		bcopy(sin6, tudi + 1, sizeof (*sin6));
469 	}
470 	optptr = (uchar_t *)tudi + tudi->OPT_offset;
471 
472 	if (sctp->sctp_recvsndrcvinfo) {
473 		/* XXX need backout method if memory allocation fails. */
474 		struct sctp_sndrcvinfo *sri;
475 
476 		cmsg = (struct cmsghdr *)optptr;
477 		cmsg->cmsg_level = IPPROTO_SCTP;
478 		cmsg->cmsg_type = SCTP_SNDRCV;
479 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*sri);
480 		optptr += sizeof (*cmsg);
481 
482 		sri = (struct sctp_sndrcvinfo *)(cmsg + 1);
483 		ASSERT(OK_32PTR(sri));
484 		sri->sinfo_stream = ntohs(dcp->sdh_sid);
485 		sri->sinfo_ssn = ntohs(dcp->sdh_ssn);
486 		if (SCTP_DATA_GET_UBIT(dcp)) {
487 			sri->sinfo_flags = MSG_UNORDERED;
488 		} else {
489 			sri->sinfo_flags = 0;
490 		}
491 		sri->sinfo_ppid = dcp->sdh_payload_id;
492 		sri->sinfo_context = 0;
493 		sri->sinfo_timetolive = 0;
494 		sri->sinfo_tsn = ntohl(dcp->sdh_tsn);
495 		sri->sinfo_cumtsn = sctp->sctp_ftsn;
496 		sri->sinfo_assoc_id = 0;
497 
498 		optptr += sizeof (*sri);
499 	}
500 
501 	/*
502 	 * If app asked for pktinfo and the index has changed ...
503 	 * Note that the local address never changes for the connection.
504 	 */
505 	if (addflag.crb_ip_recvpktinfo) {
506 		struct in6_pktinfo *pkti;
507 		uint_t ifindex;
508 
509 		ifindex = ira->ira_ruifindex;
510 		cmsg = (struct cmsghdr *)optptr;
511 		cmsg->cmsg_level = IPPROTO_IPV6;
512 		cmsg->cmsg_type = IPV6_PKTINFO;
513 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*pkti);
514 		optptr += sizeof (*cmsg);
515 
516 		pkti = (struct in6_pktinfo *)optptr;
517 		if (connp->conn_family == AF_INET6)
518 			pkti->ipi6_addr = sctp->sctp_ip6h->ip6_src;
519 		else
520 			IN6_IPADDR_TO_V4MAPPED(sctp->sctp_ipha->ipha_src,
521 			    &pkti->ipi6_addr);
522 
523 		pkti->ipi6_ifindex = ifindex;
524 		optptr += sizeof (*pkti);
525 		ASSERT(OK_32PTR(optptr));
526 		/* Save as "last" value */
527 		sctp->sctp_recvifindex = ifindex;
528 	}
529 	/* If app asked for hoplimit and it has changed ... */
530 	if (addflag.crb_ipv6_recvhoplimit) {
531 		cmsg = (struct cmsghdr *)optptr;
532 		cmsg->cmsg_level = IPPROTO_IPV6;
533 		cmsg->cmsg_type = IPV6_HOPLIMIT;
534 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
535 		optptr += sizeof (*cmsg);
536 
537 		*(uint_t *)optptr = ipp->ipp_hoplimit;
538 		optptr += sizeof (uint_t);
539 		ASSERT(OK_32PTR(optptr));
540 		/* Save as "last" value */
541 		sctp->sctp_recvhops = ipp->ipp_hoplimit;
542 	}
543 	/* If app asked for tclass and it has changed ... */
544 	if (addflag.crb_ipv6_recvtclass) {
545 		cmsg = (struct cmsghdr *)optptr;
546 		cmsg->cmsg_level = IPPROTO_IPV6;
547 		cmsg->cmsg_type = IPV6_TCLASS;
548 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
549 		optptr += sizeof (*cmsg);
550 
551 		*(uint_t *)optptr = ipp->ipp_tclass;
552 		optptr += sizeof (uint_t);
553 		ASSERT(OK_32PTR(optptr));
554 		/* Save as "last" value */
555 		sctp->sctp_recvtclass = ipp->ipp_tclass;
556 	}
557 	if (addflag.crb_ipv6_recvhopopts) {
558 		cmsg = (struct cmsghdr *)optptr;
559 		cmsg->cmsg_level = IPPROTO_IPV6;
560 		cmsg->cmsg_type = IPV6_HOPOPTS;
561 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_hopoptslen;
562 		optptr += sizeof (*cmsg);
563 
564 		bcopy(ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
565 		optptr += ipp->ipp_hopoptslen;
566 		ASSERT(OK_32PTR(optptr));
567 		/* Save as last value */
568 		ip_savebuf((void **)&sctp->sctp_hopopts,
569 		    &sctp->sctp_hopoptslen,
570 		    (ipp->ipp_fields & IPPF_HOPOPTS),
571 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen);
572 	}
573 	if (addflag.crb_ipv6_recvrthdrdstopts) {
574 		cmsg = (struct cmsghdr *)optptr;
575 		cmsg->cmsg_level = IPPROTO_IPV6;
576 		cmsg->cmsg_type = IPV6_RTHDRDSTOPTS;
577 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
578 		optptr += sizeof (*cmsg);
579 
580 		bcopy(ipp->ipp_rthdrdstopts, optptr, ipp->ipp_rthdrdstoptslen);
581 		optptr += ipp->ipp_rthdrdstoptslen;
582 		ASSERT(OK_32PTR(optptr));
583 		/* Save as last value */
584 		ip_savebuf((void **)&sctp->sctp_rthdrdstopts,
585 		    &sctp->sctp_rthdrdstoptslen,
586 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
587 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen);
588 	}
589 	if (addflag.crb_ipv6_recvrthdr) {
590 		cmsg = (struct cmsghdr *)optptr;
591 		cmsg->cmsg_level = IPPROTO_IPV6;
592 		cmsg->cmsg_type = IPV6_RTHDR;
593 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrlen;
594 		optptr += sizeof (*cmsg);
595 
596 		bcopy(ipp->ipp_rthdr, optptr, ipp->ipp_rthdrlen);
597 		optptr += ipp->ipp_rthdrlen;
598 		ASSERT(OK_32PTR(optptr));
599 		/* Save as last value */
600 		ip_savebuf((void **)&sctp->sctp_rthdr,
601 		    &sctp->sctp_rthdrlen,
602 		    (ipp->ipp_fields & IPPF_RTHDR),
603 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen);
604 	}
605 	if (addflag.crb_ipv6_recvdstopts) {
606 		cmsg = (struct cmsghdr *)optptr;
607 		cmsg->cmsg_level = IPPROTO_IPV6;
608 		cmsg->cmsg_type = IPV6_DSTOPTS;
609 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_dstoptslen;
610 		optptr += sizeof (*cmsg);
611 
612 		bcopy(ipp->ipp_dstopts, optptr, ipp->ipp_dstoptslen);
613 		optptr += ipp->ipp_dstoptslen;
614 		ASSERT(OK_32PTR(optptr));
615 		/* Save as last value */
616 		ip_savebuf((void **)&sctp->sctp_dstopts,
617 		    &sctp->sctp_dstoptslen,
618 		    (ipp->ipp_fields & IPPF_DSTOPTS),
619 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen);
620 	}
621 
622 	ASSERT(optptr == mp1->b_wptr);
623 
624 	return (0);
625 }
626 
627 void
628 sctp_free_reass(sctp_instr_t *sip)
629 {
630 	mblk_t *mp, *mpnext, *mctl;
631 #ifdef	DEBUG
632 	sctp_reass_t	*srp;
633 #endif
634 
635 	for (mp = sip->istr_reass; mp != NULL; mp = mpnext) {
636 		mpnext = mp->b_next;
637 		mp->b_next = NULL;
638 		mp->b_prev = NULL;
639 		if (DB_TYPE(mp) == M_CTL) {
640 			mctl = mp;
641 #ifdef	DEBUG
642 			srp = (sctp_reass_t *)DB_BASE(mctl);
643 			/* Partial delivery can leave empty srp */
644 			ASSERT(mp->b_cont != NULL || srp->got == 0);
645 #endif
646 			mp = mp->b_cont;
647 			mctl->b_cont = NULL;
648 			freeb(mctl);
649 		}
650 		freemsg(mp);
651 	}
652 	sip->istr_reass = NULL;
653 }
654 
655 /*
656  * If the series of data fragments of which dmp is a part is successfully
657  * reassembled, the first mblk in the series is returned. dc is adjusted
658  * to point at the data chunk in the lead mblk, and b_rptr also points to
659  * the data chunk; the following mblk's b_rptr's point at the actual payload.
660  *
661  * If the series is not yet reassembled, NULL is returned. dc is not changed.
662  * XXX should probably move this up into the state machine.
663  */
664 
665 /* Fragment list for un-ordered messages. Partial delivery is not supported */
666 static mblk_t *
667 sctp_uodata_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc)
668 {
669 	mblk_t		*hmp;
670 	mblk_t		*begin = NULL;
671 	mblk_t		*end = NULL;
672 	sctp_data_hdr_t	*qdc;
673 	uint32_t	ntsn;
674 	uint32_t	tsn = ntohl((*dc)->sdh_tsn);
675 #ifdef	DEBUG
676 	mblk_t		*mp1;
677 #endif
678 
679 	/* First frag. */
680 	if (sctp->sctp_uo_frags == NULL) {
681 		sctp->sctp_uo_frags = dmp;
682 		return (NULL);
683 	}
684 	hmp = sctp->sctp_uo_frags;
685 	/*
686 	 * Insert the segment according to the TSN, fragmented unordered
687 	 * chunks are sequenced by TSN.
688 	 */
689 	while (hmp != NULL) {
690 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
691 		ntsn = ntohl(qdc->sdh_tsn);
692 		if (SEQ_GT(ntsn, tsn)) {
693 			if (hmp->b_prev == NULL) {
694 				dmp->b_next = hmp;
695 				hmp->b_prev = dmp;
696 				sctp->sctp_uo_frags = dmp;
697 			} else {
698 				dmp->b_next = hmp;
699 				dmp->b_prev = hmp->b_prev;
700 				hmp->b_prev->b_next = dmp;
701 				hmp->b_prev = dmp;
702 			}
703 			break;
704 		}
705 		if (hmp->b_next == NULL) {
706 			hmp->b_next = dmp;
707 			dmp->b_prev = hmp;
708 			break;
709 		}
710 		hmp = hmp->b_next;
711 	}
712 	/* check if we completed a msg */
713 	if (SCTP_DATA_GET_BBIT(*dc)) {
714 		begin = dmp;
715 	} else if (SCTP_DATA_GET_EBIT(*dc)) {
716 		end = dmp;
717 	}
718 	/*
719 	 * We walk consecutive TSNs backwards till we get a seg. with
720 	 * the B bit
721 	 */
722 	if (begin == NULL) {
723 		for (hmp = dmp->b_prev; hmp != NULL; hmp = hmp->b_prev) {
724 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
725 			ntsn = ntohl(qdc->sdh_tsn);
726 			if ((int32_t)(tsn - ntsn) > 1) {
727 				return (NULL);
728 			}
729 			if (SCTP_DATA_GET_BBIT(qdc)) {
730 				begin = hmp;
731 				break;
732 			}
733 			tsn = ntsn;
734 		}
735 	}
736 	tsn = ntohl((*dc)->sdh_tsn);
737 	/*
738 	 * We walk consecutive TSNs till we get a seg. with the E bit
739 	 */
740 	if (end == NULL) {
741 		for (hmp = dmp->b_next; hmp != NULL; hmp = hmp->b_next) {
742 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
743 			ntsn = ntohl(qdc->sdh_tsn);
744 			if ((int32_t)(ntsn - tsn) > 1) {
745 				return (NULL);
746 			}
747 			if (SCTP_DATA_GET_EBIT(qdc)) {
748 				end = hmp;
749 				break;
750 			}
751 			tsn = ntsn;
752 		}
753 	}
754 	if (begin == NULL || end == NULL) {
755 		return (NULL);
756 	}
757 	/* Got one!, Remove the msg from the list */
758 	if (sctp->sctp_uo_frags == begin) {
759 		ASSERT(begin->b_prev == NULL);
760 		sctp->sctp_uo_frags = end->b_next;
761 		if (end->b_next != NULL)
762 			end->b_next->b_prev = NULL;
763 	} else {
764 		begin->b_prev->b_next = end->b_next;
765 		if (end->b_next != NULL)
766 			end->b_next->b_prev = begin->b_prev;
767 	}
768 	begin->b_prev = NULL;
769 	end->b_next = NULL;
770 
771 	/*
772 	 * Null out b_next and b_prev and chain using b_cont.
773 	 */
774 	dmp = end = begin;
775 	hmp = begin->b_next;
776 	*dc = (sctp_data_hdr_t *)begin->b_rptr;
777 	begin->b_next = NULL;
778 	while (hmp != NULL) {
779 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
780 		hmp->b_rptr = (uchar_t *)(qdc + 1);
781 		end = hmp->b_next;
782 		dmp->b_cont = hmp;
783 		dmp = hmp;
784 
785 		if (end != NULL)
786 			hmp->b_next = NULL;
787 		hmp->b_prev = NULL;
788 		hmp = end;
789 	}
790 	BUMP_LOCAL(sctp->sctp_reassmsgs);
791 #ifdef	DEBUG
792 	mp1 = begin;
793 	while (mp1 != NULL) {
794 		ASSERT(mp1->b_next == NULL);
795 		ASSERT(mp1->b_prev == NULL);
796 		mp1 = mp1->b_cont;
797 	}
798 #endif
799 	return (begin);
800 }
801 
802 /*
803  * Try partial delivery.
804  */
805 static mblk_t *
806 sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
807     sctp_data_hdr_t **dc)
808 {
809 	mblk_t		*mp;
810 	mblk_t		*dmp;
811 	mblk_t		*qmp;
812 	mblk_t		*prev;
813 	sctp_data_hdr_t	*qdc;
814 	uint32_t	tsn;
815 
816 	ASSERT(DB_TYPE(hmp) == M_CTL);
817 
818 	dprint(4, ("trypartial: got=%d, needed=%d\n",
819 	    (int)(srp->got), (int)(srp->needed)));
820 
821 	mp = hmp->b_cont;
822 	qdc = (sctp_data_hdr_t *)mp->b_rptr;
823 
824 	ASSERT(SCTP_DATA_GET_BBIT(qdc) && srp->hasBchunk);
825 
826 	tsn = ntohl(qdc->sdh_tsn) + 1;
827 
828 	/*
829 	 * This loop has two exit conditions: the
830 	 * end of received chunks has been reached, or
831 	 * there is a break in the sequence. We want
832 	 * to chop the reassembly list as follows (the
833 	 * numbers are TSNs):
834 	 *   10 -> 11 -> 	(end of chunks)
835 	 *   10 -> 11 -> | 13   (break in sequence)
836 	 */
837 	prev = mp;
838 	mp = mp->b_cont;
839 	while (mp != NULL) {
840 		qdc = (sctp_data_hdr_t *)mp->b_rptr;
841 		if (ntohl(qdc->sdh_tsn) != tsn)
842 			break;
843 		prev = mp;
844 		mp = mp->b_cont;
845 		tsn++;
846 	}
847 	/*
848 	 * We are sending all the fragments upstream, we have to retain
849 	 * the srp info for further fragments.
850 	 */
851 	if (mp == NULL) {
852 		dmp = hmp->b_cont;
853 		hmp->b_cont = NULL;
854 		srp->nexttsn = tsn;
855 		srp->msglen = 0;
856 		srp->needed = 0;
857 		srp->got = 0;
858 		srp->tail = NULL;
859 	} else {
860 		/*
861 		 * There is a gap then some ordered frags which are not
862 		 * the next deliverable tsn. When the next deliverable
863 		 * frag arrives it will be set as the new list head in
864 		 * sctp_data_frag() by setting the B bit.
865 		 */
866 		dmp = hmp->b_cont;
867 		hmp->b_cont = mp;
868 	}
869 	srp->hasBchunk = B_FALSE;
870 	/*
871 	 * mp now points at the last chunk in the sequence,
872 	 * and prev points to mp's previous in the list.
873 	 * We chop the list at prev. Subsequent fragment
874 	 * deliveries will follow the normal reassembly
875 	 * path unless they too exceed the sctp_pd_point.
876 	 */
877 	prev->b_cont = NULL;
878 	srp->partial_delivered = B_TRUE;
879 
880 	dprint(4, ("trypartial: got some, got=%d, needed=%d\n",
881 	    (int)(srp->got), (int)(srp->needed)));
882 
883 	/*
884 	 * Adjust all mblk's except the lead so their rptr's point to the
885 	 * payload. sctp_data_chunk() will need to process the lead's
886 	 * data chunk section, so leave it's rptr pointing at the data chunk.
887 	 */
888 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
889 	if (srp->tail != NULL) {
890 		srp->got--;
891 		ASSERT(srp->got != 0);
892 		if (srp->needed != 0) {
893 			srp->needed--;
894 			ASSERT(srp->needed != 0);
895 		}
896 		srp->msglen -= ntohs((*dc)->sdh_len);
897 	}
898 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
899 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
900 		qmp->b_rptr = (uchar_t *)(qdc + 1);
901 
902 		/*
903 		 * Deduct the balance from got and needed here, now that
904 		 * we know we are actually delivering these data.
905 		 */
906 		if (srp->tail != NULL) {
907 			srp->got--;
908 			ASSERT(srp->got != 0);
909 			if (srp->needed != 0) {
910 				srp->needed--;
911 				ASSERT(srp->needed != 0);
912 			}
913 			srp->msglen -= ntohs(qdc->sdh_len);
914 		}
915 	}
916 	ASSERT(srp->msglen == 0);
917 	BUMP_LOCAL(sctp->sctp_reassmsgs);
918 
919 	return (dmp);
920 }
921 
922 /*
923  * Handle received fragments for ordered delivery to upper layer protocol.
924  * Manage the per message reassembly queue and if this fragment completes
925  * reassembly of the message, or qualifies the already reassembled data
926  * for partial delivery, prepare the message for delivery upstream.
927  *
928  * tpfinished in the caller remains set only when the incoming fragment
929  * has completed the reassembly of the message associated with its ssn.
930  */
931 static mblk_t *
932 sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
933     sctp_instr_t *sip, boolean_t *tpfinished)
934 {
935 	mblk_t		*reassq_curr, *reassq_next, *reassq_prev;
936 	mblk_t		*new_reassq;
937 	mblk_t		*qmp;
938 	mblk_t		*first_mp;
939 	sctp_reass_t	*srp;
940 	sctp_data_hdr_t	*qdc;
941 	sctp_data_hdr_t	*bdc;
942 	sctp_data_hdr_t	*edc;
943 	uint32_t	tsn;
944 	uint16_t	fraglen = 0;
945 
946 	*error = 0;
947 
948 	/*
949 	 * Find the reassembly queue for this data chunk, if none
950 	 * yet exists, a new per message queue will be created and
951 	 * appended to the end of the list of per message queues.
952 	 *
953 	 * sip points on sctp_instr_t representing instream messages
954 	 * as yet undelivered for this stream (sid) of the association.
955 	 */
956 	reassq_next = reassq_prev = sip->istr_reass;
957 	for (; reassq_next != NULL; reassq_next = reassq_next->b_next) {
958 		srp = (sctp_reass_t *)DB_BASE(reassq_next);
959 		if (ntohs((*dc)->sdh_ssn) == srp->ssn) {
960 			reassq_curr = reassq_next;
961 			goto foundit;
962 		} else if (SSN_GT(srp->ssn, ntohs((*dc)->sdh_ssn)))
963 			break;
964 		reassq_prev = reassq_next;
965 	}
966 
967 	/*
968 	 * First fragment of this message received, allocate a M_CTL that
969 	 * will head the reassembly queue for this message. The message
970 	 * and all its fragments are identified by having the same ssn.
971 	 *
972 	 * Arriving fragments will be inserted in tsn order on the
973 	 * reassembly queue for this message (ssn), linked by b_cont.
974 	 */
975 	if ((new_reassq = allocb(sizeof (*srp), BPRI_MED)) == NULL) {
976 		*error = ENOMEM;
977 		return (NULL);
978 	}
979 	DB_TYPE(new_reassq) = M_CTL;
980 	srp = (sctp_reass_t *)DB_BASE(new_reassq);
981 	new_reassq->b_cont = dmp;
982 
983 	/*
984 	 * All per ssn reassembly queues, (one for each message) on
985 	 * this stream are doubly linked by b_next/b_prev back to the
986 	 * instr_reass of the instream structure associated with this
987 	 * stream id, (sip is initialized as sctp->sctp_instr[sid]).
988 	 * Insert the new reassembly queue in the correct (ssn) order.
989 	 */
990 	if (reassq_next != NULL) {
991 		if (sip->istr_reass == reassq_next) {
992 			/* head insertion */
993 			sip->istr_reass = new_reassq;
994 			new_reassq->b_next = reassq_next;
995 			new_reassq->b_prev = NULL;
996 			reassq_next->b_prev = new_reassq;
997 		} else {
998 			/* mid queue insertion */
999 			reassq_prev->b_next = new_reassq;
1000 			new_reassq->b_prev = reassq_prev;
1001 			new_reassq->b_next = reassq_next;
1002 			reassq_next->b_prev = new_reassq;
1003 		}
1004 	} else {
1005 		/* place new reassembly queue at the end */
1006 		if (sip->istr_reass == NULL) {
1007 			sip->istr_reass = new_reassq;
1008 			new_reassq->b_prev = NULL;
1009 		} else {
1010 			reassq_prev->b_next = new_reassq;
1011 			new_reassq->b_prev = reassq_prev;
1012 		}
1013 		new_reassq->b_next = NULL;
1014 	}
1015 	srp->partial_delivered = B_FALSE;
1016 	srp->ssn = ntohs((*dc)->sdh_ssn);
1017 	srp->hasBchunk = B_FALSE;
1018 empty_srp:
1019 	srp->needed = 0;
1020 	srp->got = 1;
1021 	/* tail always the highest tsn on the reassembly queue for this ssn */
1022 	srp->tail = dmp;
1023 	if (SCTP_DATA_GET_BBIT(*dc)) {
1024 		/* Incoming frag is flagged as the beginning of message */
1025 		srp->msglen = ntohs((*dc)->sdh_len);
1026 		srp->nexttsn = ntohl((*dc)->sdh_tsn) + 1;
1027 		srp->hasBchunk = B_TRUE;
1028 	} else if (srp->partial_delivered &&
1029 	    srp->nexttsn == ntohl((*dc)->sdh_tsn)) {
1030 		/*
1031 		 * The real beginning fragment of the message was already
1032 		 * delivered upward, so this is the earliest frag expected.
1033 		 * Fake the B-bit then see if this frag also completes the
1034 		 * message.
1035 		 */
1036 		SCTP_DATA_SET_BBIT(*dc);
1037 		srp->hasBchunk = B_TRUE;
1038 		srp->msglen = ntohs((*dc)->sdh_len);
1039 		if (SCTP_DATA_GET_EBIT(*dc)) {
1040 			/* This frag is marked as the end of message */
1041 			srp->needed = 1;
1042 			/* Got all fragments of this message now */
1043 			goto frag_done;
1044 		}
1045 		srp->nexttsn++;
1046 	}
1047 
1048 	/* The only fragment of this message currently queued */
1049 	*tpfinished = B_FALSE;
1050 	return (NULL);
1051 foundit:
1052 	/*
1053 	 * This message already has a reassembly queue. Insert the new frag
1054 	 * in the reassembly queue. Try the tail first, on the assumption
1055 	 * that the fragments are arriving in order.
1056 	 */
1057 	qmp = srp->tail;
1058 
1059 	/*
1060 	 * A NULL tail means all existing fragments of the message have
1061 	 * been entirely consumed during a partially delivery.
1062 	 */
1063 	if (qmp == NULL) {
1064 		ASSERT(srp->got == 0 && srp->needed == 0 &&
1065 		    srp->partial_delivered);
1066 		ASSERT(reassq_curr->b_cont == NULL);
1067 		reassq_curr->b_cont = dmp;
1068 		goto empty_srp;
1069 	} else {
1070 		/*
1071 		 * If partial delivery did take place but the next arriving
1072 		 * fragment was not the next to be delivered, or partial
1073 		 * delivery broke off due to a gap, fragments remain on the
1074 		 * tail. The next fragment due to be delivered still has to
1075 		 * be set as the new head of list upon arrival. Fake B-bit
1076 		 * on that frag then see if it also completes the message.
1077 		 */
1078 		if (srp->partial_delivered &&
1079 		    srp->nexttsn == ntohl((*dc)->sdh_tsn)) {
1080 			SCTP_DATA_SET_BBIT(*dc);
1081 			srp->hasBchunk = B_TRUE;
1082 			if (SCTP_DATA_GET_EBIT(*dc)) {
1083 				/* Got all fragments of this message now */
1084 				goto frag_done;
1085 			}
1086 		}
1087 	}
1088 
1089 	/* grab the frag header of already queued tail frag for comparison */
1090 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1091 	ASSERT(qmp->b_cont == NULL);
1092 
1093 	/* check if the frag goes on the tail in order */
1094 	if (SEQ_GT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1095 		qmp->b_cont = dmp;
1096 		srp->tail = dmp;
1097 		dmp->b_cont = NULL;
1098 		if (srp->hasBchunk && srp->nexttsn == ntohl((*dc)->sdh_tsn)) {
1099 			srp->msglen += ntohs((*dc)->sdh_len);
1100 			srp->nexttsn++;
1101 		}
1102 		goto inserted;
1103 	}
1104 
1105 	/* Next check if we should insert this frag at the beginning */
1106 	qmp = reassq_curr->b_cont;
1107 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1108 	if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1109 		dmp->b_cont = qmp;
1110 		reassq_curr->b_cont = dmp;
1111 		if (SCTP_DATA_GET_BBIT(*dc)) {
1112 			srp->hasBchunk = B_TRUE;
1113 			srp->nexttsn = ntohl((*dc)->sdh_tsn);
1114 		}
1115 		goto preinserted;
1116 	}
1117 
1118 	/* Insert this frag in it's correct order in the middle */
1119 	for (;;) {
1120 		/* Tail check above should have caught this */
1121 		ASSERT(qmp->b_cont != NULL);
1122 
1123 		qdc = (sctp_data_hdr_t *)qmp->b_cont->b_rptr;
1124 		if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1125 			/* insert here */
1126 			dmp->b_cont = qmp->b_cont;
1127 			qmp->b_cont = dmp;
1128 			break;
1129 		}
1130 		qmp = qmp->b_cont;
1131 	}
1132 preinserted:
1133 	/*
1134 	 * Need head of message and to be due to deliver, otherwise skip
1135 	 * the recalculation of the message length below.
1136 	 */
1137 	if (!srp->hasBchunk || ntohl((*dc)->sdh_tsn) != srp->nexttsn)
1138 		goto inserted;
1139 	/*
1140 	 * fraglen contains the length of consecutive chunks of fragments.
1141 	 * starting from the chunk we just inserted.
1142 	 */
1143 	tsn = srp->nexttsn;
1144 	for (qmp = dmp; qmp != NULL; qmp = qmp->b_cont) {
1145 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1146 		if (tsn != ntohl(qdc->sdh_tsn))
1147 			break;
1148 		fraglen += ntohs(qdc->sdh_len);
1149 		tsn++;
1150 	}
1151 	srp->nexttsn = tsn;
1152 	srp->msglen += fraglen;
1153 inserted:
1154 	srp->got++;
1155 	first_mp = reassq_curr->b_cont;
1156 	/* Prior to this frag either the beginning or end frag was missing */
1157 	if (srp->needed == 0) {
1158 		/* used to check if we have the first and last fragments */
1159 		bdc = (sctp_data_hdr_t *)first_mp->b_rptr;
1160 		edc = (sctp_data_hdr_t *)srp->tail->b_rptr;
1161 
1162 		/*
1163 		 * If we now have both the beginning and the end of the message,
1164 		 * calculate how many fragments in the complete message.
1165 		 */
1166 		if (SCTP_DATA_GET_BBIT(bdc) && SCTP_DATA_GET_EBIT(edc)) {
1167 			srp->needed = ntohl(edc->sdh_tsn) -
1168 			    ntohl(bdc->sdh_tsn) + 1;
1169 		}
1170 	}
1171 
1172 	/*
1173 	 * Try partial delivery if the message length has exceeded the
1174 	 * partial delivery point. Only do this if we can immediately
1175 	 * deliver the partially assembled message, and only partially
1176 	 * deliver one message at a time (i.e. messages cannot be
1177 	 * intermixed arriving at the upper layer).
1178 	 * sctp_try_partial_delivery() will return a message consisting
1179 	 * of only consecutive fragments.
1180 	 */
1181 	if (srp->needed != srp->got) {
1182 		/* we don't have the full message yet */
1183 		dmp = NULL;
1184 		if (ntohl((*dc)->sdh_tsn) <= sctp->sctp_ftsn &&
1185 		    srp->msglen >= sctp->sctp_pd_point &&
1186 		    srp->ssn == sip->nextseq) {
1187 			dmp = sctp_try_partial_delivery(sctp, reassq_curr,
1188 			    srp, dc);
1189 		}
1190 		*tpfinished = B_FALSE;
1191 		/*
1192 		 * NULL unless a segment of the message now qualified for
1193 		 * partial_delivery and has been prepared for delivery by
1194 		 * sctp_try_partial_delivery().
1195 		 */
1196 		return (dmp);
1197 	}
1198 frag_done:
1199 	/*
1200 	 * Reassembly complete for this message, prepare the data for delivery.
1201 	 * First unlink the reassembly queue for this ssn from the list of
1202 	 * messages in reassembly.
1203 	 */
1204 	if (sip->istr_reass == reassq_curr) {
1205 		sip->istr_reass = reassq_curr->b_next;
1206 		if (reassq_curr->b_next)
1207 			reassq_curr->b_next->b_prev = NULL;
1208 	} else {
1209 		ASSERT(reassq_curr->b_prev != NULL);
1210 		reassq_curr->b_prev->b_next = reassq_curr->b_next;
1211 		if (reassq_curr->b_next)
1212 			reassq_curr->b_next->b_prev = reassq_curr->b_prev;
1213 	}
1214 
1215 	/*
1216 	 * Need to clean up b_prev and b_next as freeb() will
1217 	 * ASSERT that they are unused.
1218 	 */
1219 	reassq_curr->b_next = NULL;
1220 	reassq_curr->b_prev = NULL;
1221 
1222 	dmp = reassq_curr;
1223 	/* point to the head of the reassembled data message */
1224 	dmp = dmp->b_cont;
1225 	reassq_curr->b_cont = NULL;
1226 	freeb(reassq_curr);
1227 	/* Tell our caller that we are returning a complete message. */
1228 	*tpfinished = B_TRUE;
1229 
1230 	/*
1231 	 * Adjust all mblk's except the lead so their rptr's point to the
1232 	 * payload. sctp_data_chunk() will need to process the lead's data
1233 	 * data chunk section, so leave its rptr pointing at the data chunk
1234 	 * header.
1235 	 */
1236 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
1237 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
1238 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1239 		qmp->b_rptr = (uchar_t *)(qdc + 1);
1240 	}
1241 	BUMP_LOCAL(sctp->sctp_reassmsgs);
1242 
1243 	return (dmp);
1244 }
1245 
1246 static void
1247 sctp_add_dup(uint32_t tsn, mblk_t **dups)
1248 {
1249 	mblk_t *mp;
1250 	size_t bsize = SCTP_DUP_MBLK_SZ * sizeof (tsn);
1251 
1252 	if (dups == NULL) {
1253 		return;
1254 	}
1255 
1256 	/* first time? */
1257 	if (*dups == NULL) {
1258 		*dups = allocb(bsize, BPRI_MED);
1259 		if (*dups == NULL) {
1260 			return;
1261 		}
1262 	}
1263 
1264 	mp = *dups;
1265 	if ((mp->b_wptr - mp->b_rptr) >= bsize) {
1266 		/* maximum reached */
1267 		return;
1268 	}
1269 
1270 	/* add the duplicate tsn */
1271 	bcopy(&tsn, mp->b_wptr, sizeof (tsn));
1272 	mp->b_wptr += sizeof (tsn);
1273 	ASSERT((mp->b_wptr - mp->b_rptr) <= bsize);
1274 }
1275 
1276 /*
1277  * All incoming sctp data, complete messages and fragments are handled by
1278  * this function. Unless the U-bit is set in the data chunk it will be
1279  * delivered in order or queued until an in-order delivery can be made.
1280  */
1281 static void
1282 sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
1283     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
1284 {
1285 	sctp_data_hdr_t *dc;
1286 	mblk_t *dmp, *pmp;
1287 	sctp_instr_t *instr;
1288 	int ubit;
1289 	int sid;
1290 	int isfrag;
1291 	uint16_t ssn;
1292 	uint32_t oftsn;
1293 	boolean_t can_deliver = B_TRUE;
1294 	uint32_t tsn;
1295 	int dlen;
1296 	boolean_t tpfinished = B_TRUE;
1297 	int32_t new_rwnd;
1298 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1299 	int	error;
1300 
1301 	/* The following are used multiple times, so we inline them */
1302 #define	SCTP_ACK_IT(sctp, tsn)						\
1303 	if (tsn == sctp->sctp_ftsn) {					\
1304 		dprint(2, ("data_chunk: acking next %x\n", tsn));	\
1305 		(sctp)->sctp_ftsn++;					\
1306 		if ((sctp)->sctp_sack_gaps > 0)				\
1307 			(sctp)->sctp_force_sack = 1;			\
1308 	} else if (SEQ_GT(tsn, sctp->sctp_ftsn)) {			\
1309 		/* Got a gap; record it */				\
1310 		BUMP_LOCAL(sctp->sctp_outseqtsns);			\
1311 		dprint(2, ("data_chunk: acking gap %x\n", tsn));	\
1312 		sctp_ack_add(&sctp->sctp_sack_info, tsn,		\
1313 		    &sctp->sctp_sack_gaps);				\
1314 		sctp->sctp_force_sack = 1;				\
1315 	}
1316 
1317 	dmp = NULL;
1318 
1319 	dc = (sctp_data_hdr_t *)ch;
1320 	tsn = ntohl(dc->sdh_tsn);
1321 
1322 	dprint(3, ("sctp_data_chunk: mp=%p tsn=%x\n", (void *)mp, tsn));
1323 
1324 	/* Check for duplicates */
1325 	if (SEQ_LT(tsn, sctp->sctp_ftsn)) {
1326 		dprint(4, ("sctp_data_chunk: dropping duplicate\n"));
1327 		BUMP_LOCAL(sctp->sctp_idupchunks);
1328 		sctp->sctp_force_sack = 1;
1329 		sctp_add_dup(dc->sdh_tsn, dups);
1330 		return;
1331 	}
1332 
1333 	/* Check for dups of sack'ed data */
1334 	if (sctp->sctp_sack_info != NULL) {
1335 		sctp_set_t *sp;
1336 
1337 		for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1338 			if (SEQ_GEQ(tsn, sp->begin) && SEQ_LEQ(tsn, sp->end)) {
1339 				dprint(4,
1340 				    ("sctp_data_chunk: dropping dup > "
1341 				    "cumtsn\n"));
1342 				BUMP_LOCAL(sctp->sctp_idupchunks);
1343 				sctp->sctp_force_sack = 1;
1344 				sctp_add_dup(dc->sdh_tsn, dups);
1345 				return;
1346 			}
1347 		}
1348 	}
1349 
1350 	/* We can no longer deliver anything up, but still need to handle it. */
1351 	if (SCTP_IS_DETACHED(sctp)) {
1352 		BUMP_MIB(&sctps->sctps_mib, sctpInClosed);
1353 		can_deliver = B_FALSE;
1354 	}
1355 
1356 	dlen = ntohs(dc->sdh_len) - sizeof (*dc);
1357 
1358 	/*
1359 	 * Check for buffer space. Note if this is the next expected TSN
1360 	 * we have to take it to avoid deadlock because we cannot deliver
1361 	 * later queued TSNs and thus clear buffer space without it.
1362 	 * We drop anything that is purely zero window probe data here.
1363 	 */
1364 	if ((sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) &&
1365 	    (tsn != sctp->sctp_ftsn || sctp->sctp_rwnd == 0)) {
1366 		/* Drop and SACK, but don't advance the cumulative TSN. */
1367 		sctp->sctp_force_sack = 1;
1368 		dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
1369 		    "dlen %d ssn %d tsn %x\n", sctp->sctp_rwnd,
1370 		    sctp->sctp_rxqueued, dlen, ntohs(dc->sdh_ssn),
1371 		    ntohl(dc->sdh_tsn)));
1372 		return;
1373 	}
1374 
1375 	sid = ntohs(dc->sdh_sid);
1376 
1377 	/* Data received for a stream not negotiated for this association */
1378 	if (sid >= sctp->sctp_num_istr) {
1379 		sctp_bsc_t	inval_parm;
1380 
1381 		/* Will populate the CAUSE block in the ERROR chunk. */
1382 		inval_parm.bsc_sid = dc->sdh_sid;
1383 		/* RESERVED, ignored at the receiving end */
1384 		inval_parm.bsc_pad = 0;
1385 
1386 		/* ack and drop it */
1387 		sctp_add_err(sctp, SCTP_ERR_BAD_SID, (void *)&inval_parm,
1388 		    sizeof (sctp_bsc_t), fp);
1389 		SCTP_ACK_IT(sctp, tsn);
1390 		return;
1391 	}
1392 
1393 	/* unordered delivery OK for this data if ubit set */
1394 	ubit = SCTP_DATA_GET_UBIT(dc);
1395 	ASSERT(sctp->sctp_instr != NULL);
1396 
1397 	/* select per stream structure for this stream from the array */
1398 	instr = &sctp->sctp_instr[sid];
1399 	/* Initialize the stream, if not yet used */
1400 	if (instr->sctp == NULL)
1401 		instr->sctp = sctp;
1402 
1403 	/* Begin and End bit set would mean a complete message */
1404 	isfrag = !(SCTP_DATA_GET_BBIT(dc) && SCTP_DATA_GET_EBIT(dc));
1405 
1406 	/* The ssn of this sctp message and of any fragments in it */
1407 	ssn = ntohs(dc->sdh_ssn);
1408 
1409 	dmp = dupb(mp);
1410 	if (dmp == NULL) {
1411 		/* drop it and don't ack, let the peer retransmit */
1412 		return;
1413 	}
1414 	/*
1415 	 * Past header and payload, note: the underlying buffer may
1416 	 * contain further chunks from the same incoming IP packet,
1417 	 * if so db_ref will be greater than one.
1418 	 */
1419 	dmp->b_wptr = (uchar_t *)ch + ntohs(ch->sch_len);
1420 
1421 	sctp->sctp_rxqueued += dlen;
1422 
1423 	oftsn = sctp->sctp_ftsn;
1424 
1425 	if (isfrag) {
1426 
1427 		error = 0;
1428 		/* fragmented data chunk */
1429 		dmp->b_rptr = (uchar_t *)dc;
1430 		if (ubit) {
1431 			/* prepare data for unordered delivery */
1432 			dmp = sctp_uodata_frag(sctp, dmp, &dc);
1433 #if	DEBUG
1434 			if (dmp != NULL) {
1435 				ASSERT(instr ==
1436 				    &sctp->sctp_instr[sid]);
1437 			}
1438 #endif
1439 		} else {
1440 			/*
1441 			 * Assemble fragments and queue for ordered delivery,
1442 			 * dmp returned is NULL or the head of a complete or
1443 			 * "partial delivery" message. Any returned message
1444 			 * and all its fragments will have the same ssn as the
1445 			 * input fragment currently being handled.
1446 			 */
1447 			dmp = sctp_data_frag(sctp, dmp, &dc, &error, instr,
1448 			    &tpfinished);
1449 		}
1450 		if (error == ENOMEM) {
1451 			/* back out the adjustment made earlier */
1452 			sctp->sctp_rxqueued -= dlen;
1453 			/*
1454 			 * Don't ack the segment,
1455 			 * the peer will retransmit.
1456 			 */
1457 			return;
1458 		}
1459 
1460 		if (dmp == NULL) {
1461 			/*
1462 			 * The frag has been queued for later in-order delivery,
1463 			 * but the cumulative TSN may need to advance, so also
1464 			 * need to perform the gap ack checks at the done label.
1465 			 */
1466 			SCTP_ACK_IT(sctp, tsn);
1467 			DTRACE_PROBE4(sctp_data_frag_queued, sctp_t *, sctp,
1468 			    int, sid, int, tsn, uint16_t, ssn);
1469 			goto done;
1470 		}
1471 	}
1472 
1473 	/*
1474 	 * Unless message is the next for delivery to the ulp, queue complete
1475 	 * message in the correct order for ordered delivery.
1476 	 * Note: tpfinished is true when the incoming chunk contains a complete
1477 	 * message or is the final missing fragment which completed a message.
1478 	 */
1479 	if (!ubit && tpfinished && ssn != instr->nextseq) {
1480 		/* Adjust rptr to point at the data chunk for compares */
1481 		dmp->b_rptr = (uchar_t *)dc;
1482 
1483 		dprint(2,
1484 		    ("data_chunk: inserted %x in pq (ssn %d expected %d)\n",
1485 		    ntohl(dc->sdh_tsn), (int)(ssn), (int)(instr->nextseq)));
1486 
1487 		if (instr->istr_msgs == NULL) {
1488 			instr->istr_msgs = dmp;
1489 			ASSERT(dmp->b_prev == NULL && dmp->b_next == NULL);
1490 		} else {
1491 			mblk_t			*imblk = instr->istr_msgs;
1492 			sctp_data_hdr_t		*idc;
1493 
1494 			/*
1495 			 * XXXNeed to take sequence wraps into account,
1496 			 * ... and a more efficient insertion algo.
1497 			 */
1498 			for (;;) {
1499 				idc = (sctp_data_hdr_t *)imblk->b_rptr;
1500 				if (SSN_GT(ntohs(idc->sdh_ssn),
1501 				    ntohs(dc->sdh_ssn))) {
1502 					if (instr->istr_msgs == imblk) {
1503 						instr->istr_msgs = dmp;
1504 						dmp->b_next = imblk;
1505 						imblk->b_prev = dmp;
1506 					} else {
1507 						ASSERT(imblk->b_prev != NULL);
1508 						imblk->b_prev->b_next = dmp;
1509 						dmp->b_prev = imblk->b_prev;
1510 						imblk->b_prev = dmp;
1511 						dmp->b_next = imblk;
1512 					}
1513 					break;
1514 				}
1515 				if (imblk->b_next == NULL) {
1516 					imblk->b_next = dmp;
1517 					dmp->b_prev = imblk;
1518 					break;
1519 				}
1520 				imblk = imblk->b_next;
1521 			}
1522 		}
1523 		(instr->istr_nmsgs)++;
1524 		(sctp->sctp_istr_nmsgs)++;
1525 		SCTP_ACK_IT(sctp, tsn);
1526 		DTRACE_PROBE4(sctp_pqueue_completemsg, sctp_t *, sctp,
1527 		    int, sid, int, tsn, uint16_t, ssn);
1528 		return;
1529 	}
1530 
1531 	/*
1532 	 * Deliver the data directly. Recalculate dlen now since
1533 	 * we may have just reassembled this data.
1534 	 */
1535 	dlen = dmp->b_wptr - (uchar_t *)dc - sizeof (*dc);
1536 	for (pmp = dmp->b_cont; pmp != NULL; pmp = pmp->b_cont)
1537 		dlen += MBLKL(pmp);
1538 	ASSERT(sctp->sctp_rxqueued >= dlen);
1539 
1540 	/* Deliver the message. */
1541 	sctp->sctp_rxqueued -= dlen;
1542 
1543 	if (can_deliver) {
1544 
1545 		/* step past header to the payload */
1546 		dmp->b_rptr = (uchar_t *)(dc + 1);
1547 		if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1548 		    ipp, ira) == 0) {
1549 			dprint(1, ("sctp_data_chunk: delivering %lu bytes\n",
1550 			    msgdsize(dmp)));
1551 			sctp->sctp_rwnd -= dlen;
1552 			/*
1553 			 * We overload the meaning of b_flag for SCTP sockfs
1554 			 * internal use, to advise sockfs of partial delivery
1555 			 * semantics.
1556 			 */
1557 			dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
1558 			new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1559 			    msgdsize(dmp), 0, &error, NULL);
1560 			/*
1561 			 * Since we always deliver the next TSN data chunk,
1562 			 * we may buffer a little more than allowed. In
1563 			 * that case, just mark the window as 0.
1564 			 */
1565 			if (new_rwnd < 0)
1566 				sctp->sctp_rwnd = 0;
1567 			else if (new_rwnd > sctp->sctp_rwnd)
1568 				sctp->sctp_rwnd = new_rwnd;
1569 			SCTP_ACK_IT(sctp, tsn);
1570 		} else {
1571 			/* No memory don't ack, the peer will retransmit. */
1572 			freemsg(dmp);
1573 			return;
1574 		}
1575 	} else {
1576 		/* Closed above, ack to peer and free the data */
1577 		freemsg(dmp);
1578 		SCTP_ACK_IT(sctp, tsn);
1579 	}
1580 
1581 	/*
1582 	 * Data now enqueued, may already have been processed and free'd
1583 	 * by the ULP (or we may have just freed it above, if we could not
1584 	 * deliver), so we must not reference it (this is why we saved the
1585 	 * ssn and ubit earlier).
1586 	 */
1587 	if (ubit != 0) {
1588 		BUMP_LOCAL(sctp->sctp_iudchunks);
1589 		goto done;
1590 	}
1591 	BUMP_LOCAL(sctp->sctp_idchunks);
1592 
1593 	/*
1594 	 * There was a partial delivery and it has not finished,
1595 	 * don't pull anything from the pqueues or increment the
1596 	 * nextseq. This msg must complete before starting on
1597 	 * the next ssn and the partial message must have the
1598 	 * same ssn as the next expected message..
1599 	 */
1600 	if (!tpfinished) {
1601 		DTRACE_PROBE4(sctp_partial_delivery, sctp_t *, sctp,
1602 		    int, sid, int, tsn, uint16_t, ssn);
1603 		/*
1604 		 * Verify the partial delivery is part of the
1605 		 * message expected for ordered delivery.
1606 		 */
1607 		if (ssn != instr->nextseq) {
1608 			DTRACE_PROBE4(sctp_partial_delivery_error,
1609 			    sctp_t *, sctp, int, sid, int, tsn,
1610 			    uint16_t, ssn);
1611 			cmn_err(CE_WARN, "sctp partial"
1612 			    " delivery error, sctp 0x%p"
1613 			    " sid = 0x%x ssn != nextseq"
1614 			    " tsn 0x%x ftsn 0x%x"
1615 			    " ssn 0x%x nextseq 0x%x",
1616 			    (void *)sctp, sid,
1617 			    tsn, sctp->sctp_ftsn, ssn,
1618 			    instr->nextseq);
1619 		}
1620 
1621 		ASSERT(ssn == instr->nextseq);
1622 		goto done;
1623 	}
1624 
1625 	if (ssn != instr->nextseq) {
1626 		DTRACE_PROBE4(sctp_inorder_delivery_error,
1627 		    sctp_t *, sctp, int, sid, int, tsn,
1628 		    uint16_t, ssn);
1629 		cmn_err(CE_WARN, "sctp in-order delivery error, sctp 0x%p "
1630 		    "sid = 0x%x ssn != nextseq ssn 0x%x nextseq 0x%x",
1631 		    (void *)sctp, sid, ssn, instr->nextseq);
1632 	}
1633 
1634 	ASSERT(ssn == instr->nextseq);
1635 
1636 	DTRACE_PROBE4(sctp_deliver_completemsg, sctp_t *, sctp, int, sid,
1637 	    int, tsn, uint16_t, ssn);
1638 
1639 	instr->nextseq = ssn + 1;
1640 
1641 	/*
1642 	 * Deliver any successive data chunks waiting in the instr pqueue
1643 	 * for the data just sent up.
1644 	 */
1645 	while (instr->istr_nmsgs > 0) {
1646 		dmp = (mblk_t *)instr->istr_msgs;
1647 		dc = (sctp_data_hdr_t *)dmp->b_rptr;
1648 		ssn = ntohs(dc->sdh_ssn);
1649 		tsn = ntohl(dc->sdh_tsn);
1650 		/* Stop at the first gap in the sequence */
1651 		if (ssn != instr->nextseq)
1652 			break;
1653 
1654 		DTRACE_PROBE4(sctp_deliver_pqueuedmsg, sctp_t *, sctp,
1655 		    int, sid, int, tsn, uint16_t, ssn);
1656 		/*
1657 		 * Ready to deliver all data before the gap
1658 		 * to the upper layer.
1659 		 */
1660 		(instr->istr_nmsgs)--;
1661 		(instr->nextseq)++;
1662 		(sctp->sctp_istr_nmsgs)--;
1663 
1664 		instr->istr_msgs = instr->istr_msgs->b_next;
1665 		if (instr->istr_msgs != NULL)
1666 			instr->istr_msgs->b_prev = NULL;
1667 		dmp->b_next = dmp->b_prev = NULL;
1668 
1669 		dprint(2, ("data_chunk: pulling %x from pq (ssn %d)\n",
1670 		    ntohl(dc->sdh_tsn), (int)ssn));
1671 
1672 		/*
1673 		 * Composite messages indicate this chunk was reassembled,
1674 		 * each b_cont represents another TSN; Follow the chain to
1675 		 * reach the frag with the last tsn in order to advance ftsn
1676 		 * shortly by calling SCTP_ACK_IT().
1677 		 */
1678 		dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
1679 		for (pmp = dmp->b_cont; pmp; pmp = pmp->b_cont)
1680 			dlen += MBLKL(pmp);
1681 
1682 		ASSERT(sctp->sctp_rxqueued >= dlen);
1683 
1684 		sctp->sctp_rxqueued -= dlen;
1685 		if (can_deliver) {
1686 			dmp->b_rptr = (uchar_t *)(dc + 1);
1687 			if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1688 			    ipp, ira) == 0) {
1689 				dprint(1, ("sctp_data_chunk: delivering %lu "
1690 				    "bytes\n", msgdsize(dmp)));
1691 				sctp->sctp_rwnd -= dlen;
1692 				/*
1693 				 * Meaning of b_flag overloaded for SCTP sockfs
1694 				 * internal use, advise sockfs of partial
1695 				 * delivery semantics.
1696 				 */
1697 				dmp->b_flag = tpfinished ?
1698 				    0 : SCTP_PARTIAL_DATA;
1699 				new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd,
1700 				    dmp, msgdsize(dmp), 0, &error, NULL);
1701 				if (new_rwnd < 0)
1702 					sctp->sctp_rwnd = 0;
1703 				else if (new_rwnd > sctp->sctp_rwnd)
1704 					sctp->sctp_rwnd = new_rwnd;
1705 				SCTP_ACK_IT(sctp, tsn);
1706 			} else {
1707 				/* don't ack, the peer will retransmit */
1708 				freemsg(dmp);
1709 				return;
1710 			}
1711 		} else {
1712 			/* Closed above, ack and free the data */
1713 			freemsg(dmp);
1714 			SCTP_ACK_IT(sctp, tsn);
1715 		}
1716 	}
1717 
1718 done:
1719 
1720 	/*
1721 	 * If there are gap reports pending, check if advancing
1722 	 * the ftsn here closes a gap. If so, we can advance
1723 	 * ftsn to the end of the set.
1724 	 */
1725 	if (sctp->sctp_sack_info != NULL &&
1726 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
1727 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
1728 	}
1729 	/*
1730 	 * If ftsn has moved forward, maybe we can remove gap reports.
1731 	 * NB: dmp may now be NULL, so don't dereference it here.
1732 	 */
1733 	if (oftsn != sctp->sctp_ftsn && sctp->sctp_sack_info != NULL) {
1734 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
1735 		    &sctp->sctp_sack_gaps);
1736 		dprint(2, ("data_chunk: removed acks before %x (num=%d)\n",
1737 		    sctp->sctp_ftsn - 1, sctp->sctp_sack_gaps));
1738 	}
1739 
1740 #ifdef	DEBUG
1741 	if (sctp->sctp_sack_info != NULL) {
1742 		ASSERT(sctp->sctp_ftsn != sctp->sctp_sack_info->begin);
1743 	}
1744 #endif
1745 
1746 #undef	SCTP_ACK_IT
1747 }
1748 
1749 void
1750 sctp_fill_sack(sctp_t *sctp, unsigned char *dst, int sacklen)
1751 {
1752 	sctp_chunk_hdr_t *sch;
1753 	sctp_sack_chunk_t *sc;
1754 	sctp_sack_frag_t *sf;
1755 	uint16_t num_gaps = sctp->sctp_sack_gaps;
1756 	sctp_set_t *sp;
1757 
1758 	/* Chunk hdr */
1759 	sch = (sctp_chunk_hdr_t *)dst;
1760 	sch->sch_id = CHUNK_SACK;
1761 	sch->sch_flags = 0;
1762 	sch->sch_len = htons(sacklen);
1763 
1764 	/* SACK chunk */
1765 	sctp->sctp_lastacked = sctp->sctp_ftsn - 1;
1766 
1767 	sc = (sctp_sack_chunk_t *)(sch + 1);
1768 	sc->ssc_cumtsn = htonl(sctp->sctp_lastacked);
1769 	if (sctp->sctp_rxqueued < sctp->sctp_rwnd) {
1770 		sc->ssc_a_rwnd = htonl(sctp->sctp_rwnd - sctp->sctp_rxqueued);
1771 	} else {
1772 		sc->ssc_a_rwnd = 0;
1773 	}
1774 	sc->ssc_numfrags = htons(num_gaps);
1775 	sc->ssc_numdups = 0;
1776 
1777 	/* lay in gap reports */
1778 	sf = (sctp_sack_frag_t *)(sc + 1);
1779 	for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1780 		uint16_t offset;
1781 
1782 		/* start */
1783 		if (sp->begin > sctp->sctp_lastacked) {
1784 			offset = (uint16_t)(sp->begin - sctp->sctp_lastacked);
1785 		} else {
1786 			/* sequence number wrap */
1787 			offset = (uint16_t)(UINT32_MAX - sctp->sctp_lastacked +
1788 			    sp->begin);
1789 		}
1790 		sf->ssf_start = htons(offset);
1791 
1792 		/* end */
1793 		if (sp->end >= sp->begin) {
1794 			offset += (uint16_t)(sp->end - sp->begin);
1795 		} else {
1796 			/* sequence number wrap */
1797 			offset += (uint16_t)(UINT32_MAX - sp->begin + sp->end);
1798 		}
1799 		sf->ssf_end = htons(offset);
1800 
1801 		sf++;
1802 		/* This is just for debugging (a la the following assertion) */
1803 		num_gaps--;
1804 	}
1805 
1806 	ASSERT(num_gaps == 0);
1807 
1808 	/* If the SACK timer is running, stop it */
1809 	if (sctp->sctp_ack_timer_running) {
1810 		sctp_timer_stop(sctp->sctp_ack_mp);
1811 		sctp->sctp_ack_timer_running = B_FALSE;
1812 	}
1813 
1814 	BUMP_LOCAL(sctp->sctp_obchunks);
1815 	BUMP_LOCAL(sctp->sctp_osacks);
1816 }
1817 
1818 mblk_t *
1819 sctp_make_sack(sctp_t *sctp, sctp_faddr_t *sendto, mblk_t *dups)
1820 {
1821 	mblk_t *smp;
1822 	size_t slen;
1823 	sctp_chunk_hdr_t *sch;
1824 	sctp_sack_chunk_t *sc;
1825 	int32_t acks_max;
1826 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1827 	uint32_t	dups_len;
1828 	sctp_faddr_t	*fp;
1829 
1830 	ASSERT(sendto != NULL);
1831 
1832 	if (sctp->sctp_force_sack) {
1833 		sctp->sctp_force_sack = 0;
1834 		goto checks_done;
1835 	}
1836 
1837 	acks_max = sctps->sctps_deferred_acks_max;
1838 	if (sctp->sctp_state == SCTPS_ESTABLISHED) {
1839 		if (sctp->sctp_sack_toggle < acks_max) {
1840 			/* no need to SACK right now */
1841 			dprint(2, ("sctp_make_sack: %p no sack (toggle)\n",
1842 			    (void *)sctp));
1843 			return (NULL);
1844 		} else if (sctp->sctp_sack_toggle >= acks_max) {
1845 			sctp->sctp_sack_toggle = 0;
1846 		}
1847 	}
1848 
1849 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1850 		dprint(2, ("sctp_make_sack: %p no sack (already)\n",
1851 		    (void *)sctp));
1852 		return (NULL);
1853 	}
1854 
1855 checks_done:
1856 	dprint(2, ("sctp_make_sack: acking %x\n", sctp->sctp_ftsn - 1));
1857 
1858 	if (dups != NULL)
1859 		dups_len = MBLKL(dups);
1860 	else
1861 		dups_len = 0;
1862 	slen = sizeof (*sch) + sizeof (*sc) +
1863 	    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1864 
1865 	/*
1866 	 * If there are error chunks, check and see if we can send the
1867 	 * SACK chunk and error chunks together in one packet.  If not,
1868 	 * send the error chunks out now.
1869 	 */
1870 	if (sctp->sctp_err_chunks != NULL) {
1871 		fp = SCTP_CHUNK_DEST(sctp->sctp_err_chunks);
1872 		if (sctp->sctp_err_len + slen + dups_len > fp->sfa_pmss) {
1873 			if ((smp = sctp_make_mp(sctp, fp, 0)) == NULL) {
1874 				SCTP_KSTAT(sctps, sctp_send_err_failed);
1875 				SCTP_KSTAT(sctps, sctp_send_sack_failed);
1876 				freemsg(sctp->sctp_err_chunks);
1877 				sctp->sctp_err_chunks = NULL;
1878 				sctp->sctp_err_len = 0;
1879 				return (NULL);
1880 			}
1881 			smp->b_cont = sctp->sctp_err_chunks;
1882 			sctp_set_iplen(sctp, smp, fp->ixa);
1883 			(void) conn_ip_output(smp, fp->ixa);
1884 			BUMP_LOCAL(sctp->sctp_opkts);
1885 			sctp->sctp_err_chunks = NULL;
1886 			sctp->sctp_err_len = 0;
1887 		}
1888 	}
1889 	smp = sctp_make_mp(sctp, sendto, slen);
1890 	if (smp == NULL) {
1891 		SCTP_KSTAT(sctps, sctp_send_sack_failed);
1892 		return (NULL);
1893 	}
1894 	sch = (sctp_chunk_hdr_t *)smp->b_wptr;
1895 
1896 	sctp_fill_sack(sctp, smp->b_wptr, slen);
1897 	smp->b_wptr += slen;
1898 	if (dups != NULL) {
1899 		sc = (sctp_sack_chunk_t *)(sch + 1);
1900 		sc->ssc_numdups = htons(MBLKL(dups) / sizeof (uint32_t));
1901 		sch->sch_len = htons(slen + dups_len);
1902 		smp->b_cont = dups;
1903 	}
1904 
1905 	if (sctp->sctp_err_chunks != NULL) {
1906 		linkb(smp, sctp->sctp_err_chunks);
1907 		sctp->sctp_err_chunks = NULL;
1908 		sctp->sctp_err_len = 0;
1909 	}
1910 	return (smp);
1911 }
1912 
1913 /*
1914  * Check and see if we need to send a SACK chunk.  If it is needed,
1915  * send it out.  Return true if a SACK chunk is sent, false otherwise.
1916  */
1917 boolean_t
1918 sctp_sack(sctp_t *sctp, mblk_t *dups)
1919 {
1920 	mblk_t *smp;
1921 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1922 
1923 	/* If we are shutting down, let send_shutdown() bundle the SACK */
1924 	if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
1925 		sctp_send_shutdown(sctp, 0);
1926 	}
1927 
1928 	ASSERT(sctp->sctp_lastdata != NULL);
1929 
1930 	if ((smp = sctp_make_sack(sctp, sctp->sctp_lastdata, dups)) == NULL) {
1931 		/* The caller of sctp_sack() will not free the dups mblk. */
1932 		if (dups != NULL)
1933 			freeb(dups);
1934 		return (B_FALSE);
1935 	}
1936 	dprint(2, ("sctp_sack: sending to %p %x:%x:%x:%x\n",
1937 	    (void *)sctp->sctp_lastdata,
1938 	    SCTP_PRINTADDR(sctp->sctp_lastdata->faddr)));
1939 
1940 	sctp->sctp_active = ddi_get_lbolt64();
1941 
1942 	BUMP_MIB(&sctps->sctps_mib, sctpOutAck);
1943 
1944 	sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->ixa);
1945 	(void) conn_ip_output(smp, sctp->sctp_lastdata->ixa);
1946 	BUMP_LOCAL(sctp->sctp_opkts);
1947 	return (B_TRUE);
1948 }
1949 
1950 /*
1951  * This is called if we have a message that was partially sent and is
1952  * abandoned. The cum TSN will be the last chunk sent for this message,
1953  * subsequent chunks will be marked ABANDONED. We send a Forward TSN
1954  * chunk in this case with the TSN of the last sent chunk so that the
1955  * peer can clean up its fragment list for this message. This message
1956  * will be removed from the transmit list when the peer sends a SACK
1957  * back.
1958  */
1959 int
1960 sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta)
1961 {
1962 	sctp_data_hdr_t	*dh;
1963 	mblk_t		*nmp;
1964 	mblk_t		*head;
1965 	int32_t		unsent = 0;
1966 	mblk_t		*mp1 = meta->b_cont;
1967 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1968 	sctp_faddr_t	*fp = sctp->sctp_current;
1969 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1970 
1971 	dh = (sctp_data_hdr_t *)mp1->b_rptr;
1972 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, ntohl(dh->sdh_tsn))) {
1973 		sctp_ftsn_set_t	*sets = NULL;
1974 		uint_t		nsets = 0;
1975 		uint32_t	seglen = sizeof (uint32_t);
1976 		boolean_t	ubit = SCTP_DATA_GET_UBIT(dh);
1977 
1978 		while (mp1->b_next != NULL && SCTP_CHUNK_ISSENT(mp1->b_next))
1979 			mp1 = mp1->b_next;
1980 		dh = (sctp_data_hdr_t *)mp1->b_rptr;
1981 		sctp->sctp_adv_pap = ntohl(dh->sdh_tsn);
1982 		if (!ubit &&
1983 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, &seglen)) {
1984 			sctp->sctp_adv_pap = adv_pap;
1985 			return (ENOMEM);
1986 		}
1987 		nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, seglen);
1988 		sctp_free_ftsn_set(sets);
1989 		if (nmp == NULL) {
1990 			sctp->sctp_adv_pap = adv_pap;
1991 			return (ENOMEM);
1992 		}
1993 		head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
1994 		if (head == NULL) {
1995 			sctp->sctp_adv_pap = adv_pap;
1996 			freemsg(nmp);
1997 			SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1998 			return (ENOMEM);
1999 		}
2000 		SCTP_MSG_SET_ABANDONED(meta);
2001 		sctp_set_iplen(sctp, head, fp->ixa);
2002 		(void) conn_ip_output(head, fp->ixa);
2003 		BUMP_LOCAL(sctp->sctp_opkts);
2004 		if (!fp->timer_running)
2005 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2006 		mp1 = mp1->b_next;
2007 		while (mp1 != NULL) {
2008 			ASSERT(!SCTP_CHUNK_ISSENT(mp1));
2009 			ASSERT(!SCTP_CHUNK_ABANDONED(mp1));
2010 			SCTP_ABANDON_CHUNK(mp1);
2011 			dh = (sctp_data_hdr_t *)mp1->b_rptr;
2012 			unsent += ntohs(dh->sdh_len) - sizeof (*dh);
2013 			mp1 = mp1->b_next;
2014 		}
2015 		ASSERT(sctp->sctp_unsent >= unsent);
2016 		sctp->sctp_unsent -= unsent;
2017 		/*
2018 		 * Update ULP the amount of queued data, which is
2019 		 * sent-unack'ed + unsent.
2020 		 */
2021 		if (!SCTP_IS_DETACHED(sctp))
2022 			SCTP_TXQ_UPDATE(sctp);
2023 		return (0);
2024 	}
2025 	return (-1);
2026 }
2027 
2028 uint32_t
2029 sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked)
2030 {
2031 	mblk_t *ump, *nump, *mp = NULL;
2032 	uint16_t chunklen;
2033 	uint32_t xtsn;
2034 	sctp_faddr_t *fp;
2035 	sctp_data_hdr_t *sdc;
2036 	uint32_t cumack_forward = 0;
2037 	sctp_msg_hdr_t	*mhdr;
2038 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2039 
2040 	ump = sctp->sctp_xmit_head;
2041 
2042 	/*
2043 	 * Free messages only when they're completely acked.
2044 	 */
2045 	while (ump != NULL) {
2046 		mhdr = (sctp_msg_hdr_t *)ump->b_rptr;
2047 		for (mp = ump->b_cont; mp != NULL; mp = mp->b_next) {
2048 			if (SCTP_CHUNK_ABANDONED(mp)) {
2049 				ASSERT(SCTP_IS_MSG_ABANDONED(ump));
2050 				mp = NULL;
2051 				break;
2052 			}
2053 			/*
2054 			 * We check for abandoned message if we are PR-SCTP
2055 			 * aware, if this is not the first chunk in the
2056 			 * message (b_cont) and if the message is marked
2057 			 * abandoned.
2058 			 */
2059 			if (!SCTP_CHUNK_ISSENT(mp)) {
2060 				if (sctp->sctp_prsctp_aware &&
2061 				    mp != ump->b_cont &&
2062 				    (SCTP_IS_MSG_ABANDONED(ump) ||
2063 				    SCTP_MSG_TO_BE_ABANDONED(ump, mhdr,
2064 				    sctp))) {
2065 					(void) sctp_check_abandoned_msg(sctp,
2066 					    ump);
2067 				}
2068 				goto cum_ack_done;
2069 			}
2070 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2071 			xtsn = ntohl(sdc->sdh_tsn);
2072 			if (SEQ_GEQ(sctp->sctp_lastack_rxd, xtsn))
2073 				continue;
2074 			if (SEQ_GEQ(tsn, xtsn)) {
2075 				fp = SCTP_CHUNK_DEST(mp);
2076 				chunklen = ntohs(sdc->sdh_len);
2077 
2078 				if (sctp->sctp_out_time != 0 &&
2079 				    xtsn == sctp->sctp_rtt_tsn) {
2080 					/* Got a new RTT measurement */
2081 					sctp_update_rtt(sctp, fp,
2082 					    ddi_get_lbolt64() -
2083 					    sctp->sctp_out_time);
2084 					sctp->sctp_out_time = 0;
2085 				}
2086 				if (SCTP_CHUNK_ISACKED(mp))
2087 					continue;
2088 				SCTP_CHUNK_SET_SACKCNT(mp, 0);
2089 				SCTP_CHUNK_ACKED(mp);
2090 				ASSERT(fp->suna >= chunklen);
2091 				fp->suna -= chunklen;
2092 				fp->acked += chunklen;
2093 				cumack_forward += chunklen;
2094 				ASSERT(sctp->sctp_unacked >=
2095 				    (chunklen - sizeof (*sdc)));
2096 				sctp->sctp_unacked -=
2097 				    (chunklen - sizeof (*sdc));
2098 				if (fp->suna == 0) {
2099 					/* all outstanding data acked */
2100 					fp->pba = 0;
2101 					SCTP_FADDR_TIMER_STOP(fp);
2102 				} else {
2103 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2104 					    fp->rto);
2105 				}
2106 			} else {
2107 				goto cum_ack_done;
2108 			}
2109 		}
2110 		nump = ump->b_next;
2111 		if (nump != NULL)
2112 			nump->b_prev = NULL;
2113 		if (ump == sctp->sctp_xmit_tail)
2114 			sctp->sctp_xmit_tail = nump;
2115 		if (SCTP_IS_MSG_ABANDONED(ump)) {
2116 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
2117 			ump->b_next = NULL;
2118 			sctp_sendfail_event(sctp, ump, 0, B_TRUE);
2119 		} else {
2120 			sctp_free_msg(ump);
2121 		}
2122 		sctp->sctp_xmit_head = ump = nump;
2123 	}
2124 cum_ack_done:
2125 	*first_unacked = mp;
2126 	if (cumack_forward > 0) {
2127 		BUMP_MIB(&sctps->sctps_mib, sctpInAck);
2128 		if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) {
2129 			sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd;
2130 		}
2131 
2132 		/*
2133 		 * Update ULP the amount of queued data, which is
2134 		 * sent-unack'ed + unsent.
2135 		 */
2136 		if (!SCTP_IS_DETACHED(sctp))
2137 			SCTP_TXQ_UPDATE(sctp);
2138 
2139 		/* Time to send a shutdown? */
2140 		if (sctp->sctp_state == SCTPS_SHUTDOWN_PENDING) {
2141 			sctp_send_shutdown(sctp, 0);
2142 		}
2143 		sctp->sctp_xmit_unacked = mp;
2144 	} else {
2145 		/* dup ack */
2146 		BUMP_MIB(&sctps->sctps_mib, sctpInDupAck);
2147 	}
2148 	sctp->sctp_lastack_rxd = tsn;
2149 	if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd))
2150 		sctp->sctp_adv_pap = sctp->sctp_lastack_rxd;
2151 	ASSERT(sctp->sctp_xmit_head || sctp->sctp_unacked == 0);
2152 
2153 	return (cumack_forward);
2154 }
2155 
2156 static int
2157 sctp_set_frwnd(sctp_t *sctp, uint32_t frwnd)
2158 {
2159 	uint32_t orwnd;
2160 
2161 	if (sctp->sctp_unacked > frwnd) {
2162 		sctp->sctp_frwnd = 0;
2163 		return (0);
2164 	}
2165 	orwnd = sctp->sctp_frwnd;
2166 	sctp->sctp_frwnd = frwnd - sctp->sctp_unacked;
2167 	if (orwnd < sctp->sctp_frwnd) {
2168 		return (1);
2169 	} else {
2170 		return (0);
2171 	}
2172 }
2173 
2174 /*
2175  * For un-ordered messages.
2176  * Walk the sctp->sctp_uo_frag list and remove any fragments with TSN
2177  * less than/equal to ftsn. Fragments for un-ordered messages are
2178  * strictly in sequence (w.r.t TSN).
2179  */
2180 static int
2181 sctp_ftsn_check_uo_frag(sctp_t *sctp, uint32_t ftsn)
2182 {
2183 	mblk_t		*hmp;
2184 	mblk_t		*hmp_next;
2185 	sctp_data_hdr_t	*dc;
2186 	int		dlen = 0;
2187 
2188 	hmp = sctp->sctp_uo_frags;
2189 	while (hmp != NULL) {
2190 		hmp_next = hmp->b_next;
2191 		dc = (sctp_data_hdr_t *)hmp->b_rptr;
2192 		if (SEQ_GT(ntohl(dc->sdh_tsn), ftsn))
2193 			return (dlen);
2194 		sctp->sctp_uo_frags = hmp_next;
2195 		if (hmp_next != NULL)
2196 			hmp_next->b_prev = NULL;
2197 		hmp->b_next = NULL;
2198 		dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2199 		freeb(hmp);
2200 		hmp = hmp_next;
2201 	}
2202 	return (dlen);
2203 }
2204 
2205 /*
2206  * For ordered messages.
2207  * Check for existing fragments for an sid-ssn pair reported as abandoned,
2208  * hence will not receive, in the Forward TSN. If there are fragments, then
2209  * we just nuke them. If and when Partial Delivery API is supported, we
2210  * would need to send a notification to the upper layer about this.
2211  */
2212 static int
2213 sctp_ftsn_check_frag(sctp_t *sctp, uint16_t ssn, sctp_instr_t *sip)
2214 {
2215 	sctp_reass_t	*srp;
2216 	mblk_t		*hmp;
2217 	mblk_t		*dmp;
2218 	mblk_t		*hmp_next;
2219 	sctp_data_hdr_t	*dc;
2220 	int		dlen = 0;
2221 
2222 	hmp = sip->istr_reass;
2223 	while (hmp != NULL) {
2224 		hmp_next = hmp->b_next;
2225 		srp = (sctp_reass_t *)DB_BASE(hmp);
2226 		if (SSN_GT(srp->ssn, ssn))
2227 			return (dlen);
2228 		/*
2229 		 * If we had sent part of this message up, send a partial
2230 		 * delivery event. Since this is ordered delivery, we should
2231 		 * have sent partial message only for the next in sequence,
2232 		 * hence the ASSERT. See comments in sctp_data_chunk() for
2233 		 * trypartial.
2234 		 */
2235 		if (srp->partial_delivered) {
2236 			if (srp->ssn != sip->nextseq)
2237 				cmn_err(CE_WARN, "sctp partial"
2238 				    " delivery notify, sctp 0x%p"
2239 				    " sip = 0x%p ssn != nextseq"
2240 				    " ssn 0x%x nextseq 0x%x",
2241 				    (void *)sctp, (void *)sip,
2242 				    srp->ssn, sip->nextseq);
2243 			ASSERT(sip->nextseq == srp->ssn);
2244 			sctp_partial_delivery_event(sctp);
2245 		}
2246 		/* Take it out of the reass queue */
2247 		sip->istr_reass = hmp_next;
2248 		if (hmp_next != NULL)
2249 			hmp_next->b_prev = NULL;
2250 		hmp->b_next = NULL;
2251 		ASSERT(hmp->b_prev == NULL);
2252 		dmp = hmp;
2253 		ASSERT(DB_TYPE(hmp) == M_CTL);
2254 		dmp = hmp->b_cont;
2255 		hmp->b_cont = NULL;
2256 		freeb(hmp);
2257 		hmp = dmp;
2258 		while (dmp != NULL) {
2259 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2260 			dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2261 			dmp = dmp->b_cont;
2262 		}
2263 		freemsg(hmp);
2264 		hmp = hmp_next;
2265 	}
2266 	return (dlen);
2267 }
2268 
2269 /*
2270  * Update sctp_ftsn to the cumulative TSN from the Forward TSN chunk. Remove
2271  * any SACK gaps less than the newly updated sctp_ftsn. Walk through the
2272  * sid-ssn pair in the Forward TSN and for each, clean the fragment list
2273  * for this pair, if needed, and check if we can deliver subsequent
2274  * messages, if any, from the instream queue (that were waiting for this
2275  * sid-ssn message to show up). Once we are done try to update the SACK
2276  * info. We could get a duplicate Forward TSN, in which case just send
2277  * a SACK. If any of the sid values in the Forward TSN is invalid,
2278  * send back an "Invalid Stream Identifier" error and continue processing
2279  * the rest.
2280  */
2281 static void
2282 sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
2283     ip_pkt_t *ipp, ip_recv_attr_t *ira)
2284 {
2285 	uint32_t	*ftsn = (uint32_t *)(ch + 1);
2286 	ftsn_entry_t	*ftsn_entry;
2287 	sctp_instr_t	*instr;
2288 	boolean_t	can_deliver = B_TRUE;
2289 	size_t		dlen;
2290 	int		flen;
2291 	mblk_t		*dmp;
2292 	mblk_t		*pmp;
2293 	sctp_data_hdr_t	*dc;
2294 	ssize_t		remaining;
2295 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2296 
2297 	*ftsn = ntohl(*ftsn);
2298 	remaining =  ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn);
2299 
2300 	if (SCTP_IS_DETACHED(sctp)) {
2301 		BUMP_MIB(&sctps->sctps_mib, sctpInClosed);
2302 		can_deliver = B_FALSE;
2303 	}
2304 	/*
2305 	 * un-ordered messages don't have SID-SSN pair entries, we check
2306 	 * for any fragments (for un-ordered message) to be discarded using
2307 	 * the cumulative FTSN.
2308 	 */
2309 	flen = sctp_ftsn_check_uo_frag(sctp, *ftsn);
2310 	if (flen > 0) {
2311 		ASSERT(sctp->sctp_rxqueued >= flen);
2312 		sctp->sctp_rxqueued -= flen;
2313 	}
2314 	ftsn_entry = (ftsn_entry_t *)(ftsn + 1);
2315 	while (remaining >= sizeof (*ftsn_entry)) {
2316 		ftsn_entry->ftsn_sid = ntohs(ftsn_entry->ftsn_sid);
2317 		ftsn_entry->ftsn_ssn = ntohs(ftsn_entry->ftsn_ssn);
2318 		if (ftsn_entry->ftsn_sid >= sctp->sctp_num_istr) {
2319 			sctp_bsc_t	inval_parm;
2320 
2321 			/* Will populate the CAUSE block in the ERROR chunk. */
2322 			inval_parm.bsc_sid = htons(ftsn_entry->ftsn_sid);
2323 			/* RESERVED, ignored at the receiving end */
2324 			inval_parm.bsc_pad = 0;
2325 
2326 			sctp_add_err(sctp, SCTP_ERR_BAD_SID,
2327 			    (void *)&inval_parm, sizeof (sctp_bsc_t), fp);
2328 			ftsn_entry++;
2329 			remaining -= sizeof (*ftsn_entry);
2330 			continue;
2331 		}
2332 		instr = &sctp->sctp_instr[ftsn_entry->ftsn_sid];
2333 		flen = sctp_ftsn_check_frag(sctp, ftsn_entry->ftsn_ssn, instr);
2334 		/* Indicates frags were nuked, update rxqueued */
2335 		if (flen > 0) {
2336 			ASSERT(sctp->sctp_rxqueued >= flen);
2337 			sctp->sctp_rxqueued -= flen;
2338 		}
2339 		/*
2340 		 * It is possible to receive an FTSN chunk with SSN smaller
2341 		 * than then nextseq if this chunk is a retransmission because
2342 		 * of incomplete processing when it was first processed.
2343 		 */
2344 		if (SSN_GE(ftsn_entry->ftsn_ssn, instr->nextseq))
2345 			instr->nextseq = ftsn_entry->ftsn_ssn + 1;
2346 		while (instr->istr_nmsgs > 0) {
2347 			mblk_t	*next;
2348 
2349 			dmp = (mblk_t *)instr->istr_msgs;
2350 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2351 			if (ntohs(dc->sdh_ssn) != instr->nextseq)
2352 				break;
2353 
2354 			next = dmp->b_next;
2355 			dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
2356 			for (pmp = dmp->b_cont; pmp != NULL;
2357 			    pmp = pmp->b_cont) {
2358 				dlen += MBLKL(pmp);
2359 			}
2360 			if (can_deliver) {
2361 				int32_t	nrwnd;
2362 				int error;
2363 
2364 				dmp->b_rptr = (uchar_t *)(dc + 1);
2365 				dmp->b_next = NULL;
2366 				ASSERT(dmp->b_prev == NULL);
2367 				if (sctp_input_add_ancillary(sctp,
2368 				    &dmp, dc, fp, ipp, ira) == 0) {
2369 					sctp->sctp_rxqueued -= dlen;
2370 					sctp->sctp_rwnd -= dlen;
2371 					/*
2372 					 * Override b_flag for SCTP sockfs
2373 					 * internal use
2374 					 */
2375 
2376 					dmp->b_flag = 0;
2377 					nrwnd = sctp->sctp_ulp_recv(
2378 					    sctp->sctp_ulpd, dmp, msgdsize(dmp),
2379 					    0, &error, NULL);
2380 					if (nrwnd < 0)
2381 						sctp->sctp_rwnd = 0;
2382 					else if (nrwnd > sctp->sctp_rwnd)
2383 						sctp->sctp_rwnd = nrwnd;
2384 				} else {
2385 					/*
2386 					 * We will resume processing when
2387 					 * the FTSN chunk is re-xmitted.
2388 					 */
2389 					dmp->b_rptr = (uchar_t *)dc;
2390 					dmp->b_next = next;
2391 					dprint(0,
2392 					    ("FTSN dequeuing %u failed\n",
2393 					    ntohs(dc->sdh_ssn)));
2394 					return;
2395 				}
2396 			} else {
2397 				sctp->sctp_rxqueued -= dlen;
2398 				ASSERT(dmp->b_prev == NULL);
2399 				dmp->b_next = NULL;
2400 				freemsg(dmp);
2401 			}
2402 			instr->istr_nmsgs--;
2403 			instr->nextseq++;
2404 			sctp->sctp_istr_nmsgs--;
2405 			if (next != NULL)
2406 				next->b_prev = NULL;
2407 			instr->istr_msgs = next;
2408 		}
2409 		ftsn_entry++;
2410 		remaining -= sizeof (*ftsn_entry);
2411 	}
2412 	/* Duplicate FTSN */
2413 	if (*ftsn <= (sctp->sctp_ftsn - 1)) {
2414 		sctp->sctp_force_sack = 1;
2415 		return;
2416 	}
2417 	/* Advance cum TSN to that reported in the Forward TSN chunk */
2418 	sctp->sctp_ftsn = *ftsn + 1;
2419 
2420 	/* Remove all the SACK gaps before the new cum TSN */
2421 	if (sctp->sctp_sack_info != NULL) {
2422 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2423 		    &sctp->sctp_sack_gaps);
2424 	}
2425 	/*
2426 	 * If there are gap reports pending, check if advancing
2427 	 * the ftsn here closes a gap. If so, we can advance
2428 	 * ftsn to the end of the set.
2429 	 * If ftsn has moved forward, maybe we can remove gap reports.
2430 	 */
2431 	if (sctp->sctp_sack_info != NULL &&
2432 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
2433 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
2434 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2435 		    &sctp->sctp_sack_gaps);
2436 	}
2437 }
2438 
2439 /*
2440  * When we have processed a SACK we check to see if we can advance the
2441  * cumulative TSN if there are abandoned chunks immediately following
2442  * the updated cumulative TSN. If there are, we attempt to send a
2443  * Forward TSN chunk.
2444  */
2445 static void
2446 sctp_check_abandoned_data(sctp_t *sctp, sctp_faddr_t *fp)
2447 {
2448 	mblk_t		*meta = sctp->sctp_xmit_head;
2449 	mblk_t		*mp;
2450 	mblk_t		*nmp;
2451 	uint32_t	seglen;
2452 	uint32_t	adv_pap = sctp->sctp_adv_pap;
2453 
2454 	/*
2455 	 * We only check in the first meta since otherwise we can't
2456 	 * advance the cumulative ack point. We just look for chunks
2457 	 * marked for retransmission, else we might prematurely
2458 	 * send an FTSN for a sent, but unacked, chunk.
2459 	 */
2460 	for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2461 		if (!SCTP_CHUNK_ISSENT(mp))
2462 			return;
2463 		if (SCTP_CHUNK_WANT_REXMIT(mp))
2464 			break;
2465 	}
2466 	if (mp == NULL)
2467 		return;
2468 	sctp_check_adv_ack_pt(sctp, meta, mp);
2469 	if (SEQ_GT(sctp->sctp_adv_pap, adv_pap)) {
2470 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
2471 		if (nmp == NULL) {
2472 			sctp->sctp_adv_pap = adv_pap;
2473 			if (!fp->timer_running)
2474 				SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2475 			return;
2476 		}
2477 		sctp_set_iplen(sctp, nmp, fp->ixa);
2478 		(void) conn_ip_output(nmp, fp->ixa);
2479 		BUMP_LOCAL(sctp->sctp_opkts);
2480 		if (!fp->timer_running)
2481 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
2482 	}
2483 }
2484 
2485 /*
2486  * The processing here follows the same logic in sctp_got_sack(), the reason
2487  * we do this separately is because, usually, gap blocks are ordered and
2488  * we can process it in sctp_got_sack(). However if they aren't we would
2489  * need to do some additional non-optimal stuff when we start processing the
2490  * unordered gaps. To that effect sctp_got_sack() does the processing in the
2491  * simple case and this does the same in the more involved case.
2492  */
2493 static uint32_t
2494 sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf,
2495     int num_gaps, mblk_t *umphead, mblk_t *mphead, int *trysend,
2496     boolean_t *fast_recovery, uint32_t fr_xtsn)
2497 {
2498 	uint32_t		xtsn;
2499 	uint32_t		gapstart = 0;
2500 	uint32_t		gapend = 0;
2501 	int			gapcnt;
2502 	uint16_t		chunklen;
2503 	sctp_data_hdr_t		*sdc;
2504 	int			gstart;
2505 	mblk_t			*ump = umphead;
2506 	mblk_t			*mp = mphead;
2507 	sctp_faddr_t		*fp;
2508 	uint32_t		acked = 0;
2509 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2510 
2511 	/*
2512 	 * gstart tracks the last (in the order of TSN) gapstart that
2513 	 * we process in this SACK gaps walk.
2514 	 */
2515 	gstart = ctsn;
2516 
2517 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2518 	xtsn = ntohl(sdc->sdh_tsn);
2519 	for (gapcnt = 0; gapcnt < num_gaps; gapcnt++, ssf++) {
2520 		if (gapstart != 0) {
2521 			/*
2522 			 * If we have reached the end of the transmit list or
2523 			 * hit an unsent chunk or encountered an unordered gap
2524 			 * block start from the ctsn again.
2525 			 */
2526 			if (ump == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2527 			    SEQ_LT(ctsn + ntohs(ssf->ssf_start), xtsn)) {
2528 				ump = umphead;
2529 				mp = mphead;
2530 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
2531 				xtsn = ntohl(sdc->sdh_tsn);
2532 			}
2533 		}
2534 
2535 		gapstart = ctsn + ntohs(ssf->ssf_start);
2536 		gapend = ctsn + ntohs(ssf->ssf_end);
2537 
2538 		/*
2539 		 * Sanity checks:
2540 		 *
2541 		 * 1. SACK for TSN we have not sent - ABORT
2542 		 * 2. Invalid or spurious gaps, ignore all gaps
2543 		 */
2544 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2545 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2546 			BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent);
2547 			*trysend = -1;
2548 			return (acked);
2549 		} else if (SEQ_LT(gapend, gapstart) ||
2550 		    SEQ_LEQ(gapstart, ctsn)) {
2551 			break;
2552 		}
2553 		/*
2554 		 * The xtsn can be the TSN processed for the last gap
2555 		 * (gapend) or it could be the cumulative TSN. We continue
2556 		 * with the last xtsn as long as the gaps are ordered, when
2557 		 * we hit an unordered gap, we re-start from the cumulative
2558 		 * TSN. For the first gap it is always the cumulative TSN.
2559 		 */
2560 		while (xtsn != gapstart) {
2561 			/*
2562 			 * We can't reliably check for reneged chunks
2563 			 * when walking the unordered list, so we don't.
2564 			 * In case the peer reneges then we will end up
2565 			 * sending the reneged chunk via timeout.
2566 			 */
2567 			mp = mp->b_next;
2568 			if (mp == NULL) {
2569 				ump = ump->b_next;
2570 				/*
2571 				 * ump can't be NULL because of the sanity
2572 				 * check above.
2573 				 */
2574 				ASSERT(ump != NULL);
2575 				mp = ump->b_cont;
2576 			}
2577 			/*
2578 			 * mp can't be unsent because of the sanity check
2579 			 * above.
2580 			 */
2581 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2582 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2583 			xtsn = ntohl(sdc->sdh_tsn);
2584 		}
2585 		/*
2586 		 * Now that we have found the chunk with TSN == 'gapstart',
2587 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2588 		 * All intermediate chunks will be marked ACKED, if they
2589 		 * haven't already been.
2590 		 */
2591 		while (SEQ_LEQ(xtsn, gapend)) {
2592 			/*
2593 			 * SACKed
2594 			 */
2595 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2596 			if (!SCTP_CHUNK_ISACKED(mp)) {
2597 				SCTP_CHUNK_ACKED(mp);
2598 
2599 				fp = SCTP_CHUNK_DEST(mp);
2600 				chunklen = ntohs(sdc->sdh_len);
2601 				ASSERT(fp->suna >= chunklen);
2602 				fp->suna -= chunklen;
2603 				if (fp->suna == 0) {
2604 					/* All outstanding data acked. */
2605 					fp->pba = 0;
2606 					SCTP_FADDR_TIMER_STOP(fp);
2607 				}
2608 				fp->acked += chunklen;
2609 				acked += chunklen;
2610 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
2611 				ASSERT(sctp->sctp_unacked >= 0);
2612 			}
2613 			/*
2614 			 * Move to the next message in the transmit list
2615 			 * if we are done with all the chunks from the current
2616 			 * message. Note, it is possible to hit the end of the
2617 			 * transmit list here, i.e. if we have already completed
2618 			 * processing the gap block.
2619 			 */
2620 			mp = mp->b_next;
2621 			if (mp == NULL) {
2622 				ump = ump->b_next;
2623 				if (ump == NULL) {
2624 					ASSERT(xtsn == gapend);
2625 					break;
2626 				}
2627 				mp = ump->b_cont;
2628 			}
2629 			/*
2630 			 * Likewise, we can hit an unsent chunk once we have
2631 			 * completed processing the gap block.
2632 			 */
2633 			if (!SCTP_CHUNK_ISSENT(mp)) {
2634 				ASSERT(xtsn == gapend);
2635 				break;
2636 			}
2637 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2638 			xtsn = ntohl(sdc->sdh_tsn);
2639 		}
2640 		/*
2641 		 * We keep track of the last gap we successfully processed
2642 		 * so that we can terminate the walk below for incrementing
2643 		 * the SACK count.
2644 		 */
2645 		if (SEQ_LT(gstart, gapstart))
2646 			gstart = gapstart;
2647 	}
2648 	/*
2649 	 * Check if have incremented the SACK count for all unacked TSNs in
2650 	 * sctp_got_sack(), if so we are done.
2651 	 */
2652 	if (SEQ_LEQ(gstart, fr_xtsn))
2653 		return (acked);
2654 
2655 	ump = umphead;
2656 	mp = mphead;
2657 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2658 	xtsn = ntohl(sdc->sdh_tsn);
2659 	while (SEQ_LT(xtsn, gstart)) {
2660 		/*
2661 		 * We have incremented SACK count for TSNs less than fr_tsn
2662 		 * in sctp_got_sack(), so don't increment them again here.
2663 		 */
2664 		if (SEQ_GT(xtsn, fr_xtsn) && !SCTP_CHUNK_ISACKED(mp)) {
2665 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2666 			if (SCTP_CHUNK_SACKCNT(mp) ==
2667 			    sctps->sctps_fast_rxt_thresh) {
2668 				SCTP_CHUNK_REXMIT(sctp, mp);
2669 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2670 				*trysend = 1;
2671 				if (!*fast_recovery) {
2672 					/*
2673 					 * Entering fast recovery.
2674 					 */
2675 					fp = SCTP_CHUNK_DEST(mp);
2676 					fp->ssthresh = fp->cwnd / 2;
2677 					if (fp->ssthresh < 2 * fp->sfa_pmss) {
2678 						fp->ssthresh =
2679 						    2 * fp->sfa_pmss;
2680 					}
2681 					fp->cwnd = fp->ssthresh;
2682 					fp->pba = 0;
2683 					sctp->sctp_recovery_tsn =
2684 					    sctp->sctp_ltsn - 1;
2685 					*fast_recovery = B_TRUE;
2686 				}
2687 			}
2688 		}
2689 		mp = mp->b_next;
2690 		if (mp == NULL) {
2691 			ump = ump->b_next;
2692 			/* We can't get to the end of the transmit list here */
2693 			ASSERT(ump != NULL);
2694 			mp = ump->b_cont;
2695 		}
2696 		/* We can't hit an unsent chunk here */
2697 		ASSERT(SCTP_CHUNK_ISSENT(mp));
2698 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
2699 		xtsn = ntohl(sdc->sdh_tsn);
2700 	}
2701 	return (acked);
2702 }
2703 
2704 static int
2705 sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch)
2706 {
2707 	sctp_sack_chunk_t	*sc;
2708 	sctp_data_hdr_t		*sdc;
2709 	sctp_sack_frag_t	*ssf;
2710 	mblk_t			*ump;
2711 	mblk_t			*mp;
2712 	mblk_t			*mp1;
2713 	uint32_t		cumtsn;
2714 	uint32_t		xtsn;
2715 	uint32_t		gapstart = 0;
2716 	uint32_t		gapend = 0;
2717 	uint32_t		acked = 0;
2718 	uint16_t		chunklen;
2719 	sctp_faddr_t		*fp;
2720 	int			num_gaps;
2721 	int			trysend = 0;
2722 	int			i;
2723 	boolean_t		fast_recovery = B_FALSE;
2724 	boolean_t		cumack_forward = B_FALSE;
2725 	boolean_t		fwd_tsn = B_FALSE;
2726 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2727 
2728 	BUMP_LOCAL(sctp->sctp_ibchunks);
2729 	BUMP_LOCAL(sctp->sctp_isacks);
2730 	chunklen = ntohs(sch->sch_len);
2731 	if (chunklen < (sizeof (*sch) + sizeof (*sc)))
2732 		return (0);
2733 
2734 	sc = (sctp_sack_chunk_t *)(sch + 1);
2735 	cumtsn = ntohl(sc->ssc_cumtsn);
2736 
2737 	dprint(2, ("got sack cumtsn %x -> %x\n", sctp->sctp_lastack_rxd,
2738 	    cumtsn));
2739 
2740 	/* out of order */
2741 	if (SEQ_LT(cumtsn, sctp->sctp_lastack_rxd))
2742 		return (0);
2743 
2744 	if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) {
2745 		BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent);
2746 		/* Send an ABORT */
2747 		return (-1);
2748 	}
2749 
2750 	/*
2751 	 * Cwnd only done when not in fast recovery mode.
2752 	 */
2753 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn))
2754 		fast_recovery = B_TRUE;
2755 
2756 	/*
2757 	 * .. and if the cum TSN is not moving ahead on account Forward TSN
2758 	 */
2759 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_adv_pap))
2760 		fwd_tsn = B_TRUE;
2761 
2762 	if (cumtsn == sctp->sctp_lastack_rxd &&
2763 	    (sctp->sctp_xmit_unacked == NULL ||
2764 	    !SCTP_CHUNK_ABANDONED(sctp->sctp_xmit_unacked))) {
2765 		if (sctp->sctp_xmit_unacked != NULL)
2766 			mp = sctp->sctp_xmit_unacked;
2767 		else if (sctp->sctp_xmit_head != NULL)
2768 			mp = sctp->sctp_xmit_head->b_cont;
2769 		else
2770 			mp = NULL;
2771 		BUMP_MIB(&sctps->sctps_mib, sctpInDupAck);
2772 		/*
2773 		 * If we were doing a zero win probe and the win
2774 		 * has now opened to at least MSS, re-transmit the
2775 		 * zero win probe via sctp_rexmit_packet().
2776 		 */
2777 		if (mp != NULL && sctp->sctp_zero_win_probe &&
2778 		    ntohl(sc->ssc_a_rwnd) >= sctp->sctp_current->sfa_pmss) {
2779 			mblk_t	*pkt;
2780 			uint_t	pkt_len;
2781 			mblk_t	*mp1 = mp;
2782 			mblk_t	*meta = sctp->sctp_xmit_head;
2783 
2784 			/*
2785 			 * Reset the RTO since we have been backing-off
2786 			 * to send the ZWP.
2787 			 */
2788 			fp = sctp->sctp_current;
2789 			fp->rto = fp->srtt + 4 * fp->rttvar;
2790 			SCTP_MAX_RTO(sctp, fp);
2791 			/* Resend the ZWP */
2792 			pkt = sctp_rexmit_packet(sctp, &meta, &mp1, fp,
2793 			    &pkt_len);
2794 			if (pkt == NULL) {
2795 				SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2796 				return (0);
2797 			}
2798 			ASSERT(pkt_len <= fp->sfa_pmss);
2799 			sctp->sctp_zero_win_probe = B_FALSE;
2800 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2801 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2802 			sctp_set_iplen(sctp, pkt, fp->ixa);
2803 			(void) conn_ip_output(pkt, fp->ixa);
2804 			BUMP_LOCAL(sctp->sctp_opkts);
2805 		}
2806 	} else {
2807 		if (sctp->sctp_zero_win_probe) {
2808 			/*
2809 			 * Reset the RTO since we have been backing-off
2810 			 * to send the ZWP.
2811 			 */
2812 			fp = sctp->sctp_current;
2813 			fp->rto = fp->srtt + 4 * fp->rttvar;
2814 			SCTP_MAX_RTO(sctp, fp);
2815 			sctp->sctp_zero_win_probe = B_FALSE;
2816 			/* This is probably not required */
2817 			if (!sctp->sctp_rexmitting) {
2818 				sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2819 				sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2820 			}
2821 		}
2822 		acked = sctp_cumack(sctp, cumtsn, &mp);
2823 		sctp->sctp_xmit_unacked = mp;
2824 		if (acked > 0) {
2825 			trysend = 1;
2826 			cumack_forward = B_TRUE;
2827 			if (fwd_tsn && SEQ_GEQ(sctp->sctp_lastack_rxd,
2828 			    sctp->sctp_adv_pap)) {
2829 				cumack_forward = B_FALSE;
2830 			}
2831 		}
2832 	}
2833 	num_gaps = ntohs(sc->ssc_numfrags);
2834 	UPDATE_LOCAL(sctp->sctp_gapcnt, num_gaps);
2835 	if (num_gaps == 0 || mp == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2836 	    chunklen < (sizeof (*sch) + sizeof (*sc) +
2837 	    num_gaps * sizeof (*ssf))) {
2838 		goto ret;
2839 	}
2840 #ifdef	DEBUG
2841 	/*
2842 	 * Since we delete any message that has been acked completely,
2843 	 * the unacked chunk must belong to sctp_xmit_head (as
2844 	 * we don't have a back pointer from the mp to the meta data
2845 	 * we do this).
2846 	 */
2847 	{
2848 		mblk_t	*mp2 = sctp->sctp_xmit_head->b_cont;
2849 
2850 		while (mp2 != NULL) {
2851 			if (mp2 == mp)
2852 				break;
2853 			mp2 = mp2->b_next;
2854 		}
2855 		ASSERT(mp2 != NULL);
2856 	}
2857 #endif
2858 	ump = sctp->sctp_xmit_head;
2859 
2860 	/*
2861 	 * Just remember where we started from, in case we need to call
2862 	 * sctp_process_uo_gaps() if the gap blocks are unordered.
2863 	 */
2864 	mp1 = mp;
2865 
2866 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2867 	xtsn = ntohl(sdc->sdh_tsn);
2868 	ASSERT(xtsn == cumtsn + 1);
2869 
2870 	/*
2871 	 * Go through SACK gaps. They are ordered based on start TSN.
2872 	 */
2873 	ssf = (sctp_sack_frag_t *)(sc + 1);
2874 	for (i = 0; i < num_gaps; i++, ssf++) {
2875 		if (gapstart != 0) {
2876 			/* check for unordered gap */
2877 			if (SEQ_LEQ(cumtsn + ntohs(ssf->ssf_start), gapstart)) {
2878 				acked += sctp_process_uo_gaps(sctp,
2879 				    cumtsn, ssf, num_gaps - i,
2880 				    sctp->sctp_xmit_head, mp1,
2881 				    &trysend, &fast_recovery, gapstart);
2882 				if (trysend < 0) {
2883 					BUMP_MIB(&sctps->sctps_mib,
2884 					    sctpInAckUnsent);
2885 					return (-1);
2886 				}
2887 				break;
2888 			}
2889 		}
2890 		gapstart = cumtsn + ntohs(ssf->ssf_start);
2891 		gapend = cumtsn + ntohs(ssf->ssf_end);
2892 
2893 		/*
2894 		 * Sanity checks:
2895 		 *
2896 		 * 1. SACK for TSN we have not sent - ABORT
2897 		 * 2. Invalid or spurious gaps, ignore all gaps
2898 		 */
2899 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2900 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2901 			BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent);
2902 			return (-1);
2903 		} else if (SEQ_LT(gapend, gapstart) ||
2904 		    SEQ_LEQ(gapstart, cumtsn)) {
2905 			break;
2906 		}
2907 		/*
2908 		 * Let's start at the current TSN (for the 1st gap we start
2909 		 * from the cumulative TSN, for subsequent ones we start from
2910 		 * where the previous gapend was found - second while loop
2911 		 * below) and walk the transmit list till we find the TSN
2912 		 * corresponding to gapstart. All the unacked chunks till we
2913 		 * get to the chunk with TSN == gapstart will have their
2914 		 * SACKCNT incremented by 1. Note since the gap blocks are
2915 		 * ordered, we won't be incrementing the SACKCNT for an
2916 		 * unacked chunk by more than one while processing the gap
2917 		 * blocks. If the SACKCNT for any unacked chunk exceeds
2918 		 * the fast retransmit threshold, we will fast retransmit
2919 		 * after processing all the gap blocks.
2920 		 */
2921 		ASSERT(SEQ_LEQ(xtsn, gapstart));
2922 		while (xtsn != gapstart) {
2923 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2924 			if (SCTP_CHUNK_SACKCNT(mp) ==
2925 			    sctps->sctps_fast_rxt_thresh) {
2926 				SCTP_CHUNK_REXMIT(sctp, mp);
2927 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2928 				trysend = 1;
2929 				if (!fast_recovery) {
2930 					/*
2931 					 * Entering fast recovery.
2932 					 */
2933 					fp = SCTP_CHUNK_DEST(mp);
2934 					fp->ssthresh = fp->cwnd / 2;
2935 					if (fp->ssthresh < 2 * fp->sfa_pmss) {
2936 						fp->ssthresh =
2937 						    2 * fp->sfa_pmss;
2938 					}
2939 					fp->cwnd = fp->ssthresh;
2940 					fp->pba = 0;
2941 					sctp->sctp_recovery_tsn =
2942 					    sctp->sctp_ltsn - 1;
2943 					fast_recovery = B_TRUE;
2944 				}
2945 			}
2946 
2947 			/*
2948 			 * Peer may have reneged on this chunk, so un-sack
2949 			 * it now. If the peer did renege, we need to
2950 			 * readjust unacked.
2951 			 */
2952 			if (SCTP_CHUNK_ISACKED(mp)) {
2953 				chunklen = ntohs(sdc->sdh_len);
2954 				fp = SCTP_CHUNK_DEST(mp);
2955 				fp->suna += chunklen;
2956 				sctp->sctp_unacked += chunklen - sizeof (*sdc);
2957 				SCTP_CHUNK_CLEAR_ACKED(sctp, mp);
2958 				if (!fp->timer_running) {
2959 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2960 					    fp->rto);
2961 				}
2962 			}
2963 
2964 			mp = mp->b_next;
2965 			if (mp == NULL) {
2966 				ump = ump->b_next;
2967 				/*
2968 				 * ump can't be NULL given the sanity check
2969 				 * above.  But if it is NULL, it means that
2970 				 * there is a data corruption.  We'd better
2971 				 * panic.
2972 				 */
2973 				if (ump == NULL) {
2974 					panic("Memory corruption detected: gap "
2975 					    "start TSN 0x%x missing from the "
2976 					    "xmit list: %p", gapstart,
2977 					    (void *)sctp);
2978 				}
2979 				mp = ump->b_cont;
2980 			}
2981 			/*
2982 			 * mp can't be unsent given the sanity check above.
2983 			 */
2984 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2985 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2986 			xtsn = ntohl(sdc->sdh_tsn);
2987 		}
2988 		/*
2989 		 * Now that we have found the chunk with TSN == 'gapstart',
2990 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2991 		 * All intermediate chunks will be marked ACKED, if they
2992 		 * haven't already been.
2993 		 */
2994 		while (SEQ_LEQ(xtsn, gapend)) {
2995 			/*
2996 			 * SACKed
2997 			 */
2998 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2999 			if (!SCTP_CHUNK_ISACKED(mp)) {
3000 				SCTP_CHUNK_ACKED(mp);
3001 
3002 				fp = SCTP_CHUNK_DEST(mp);
3003 				chunklen = ntohs(sdc->sdh_len);
3004 				ASSERT(fp->suna >= chunklen);
3005 				fp->suna -= chunklen;
3006 				if (fp->suna == 0) {
3007 					/* All outstanding data acked. */
3008 					fp->pba = 0;
3009 					SCTP_FADDR_TIMER_STOP(fp);
3010 				}
3011 				fp->acked += chunklen;
3012 				acked += chunklen;
3013 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
3014 				ASSERT(sctp->sctp_unacked >= 0);
3015 			}
3016 			/* Go to the next chunk of the current message */
3017 			mp = mp->b_next;
3018 			/*
3019 			 * Move to the next message in the transmit list
3020 			 * if we are done with all the chunks from the current
3021 			 * message. Note, it is possible to hit the end of the
3022 			 * transmit list here, i.e. if we have already completed
3023 			 * processing the gap block.  But the TSN must be equal
3024 			 * to the gapend because of the above sanity check.
3025 			 * If it is not equal, it means that some data is
3026 			 * missing.
3027 			 * Also, note that we break here, which means we
3028 			 * continue processing gap blocks, if any. In case of
3029 			 * ordered gap blocks there can't be any following
3030 			 * this (if there is it will fail the sanity check
3031 			 * above). In case of un-ordered gap blocks we will
3032 			 * switch to sctp_process_uo_gaps().  In either case
3033 			 * it should be fine to continue with NULL ump/mp,
3034 			 * but we just reset it to xmit_head.
3035 			 */
3036 			if (mp == NULL) {
3037 				ump = ump->b_next;
3038 				if (ump == NULL) {
3039 					if (xtsn != gapend) {
3040 						panic("Memory corruption "
3041 						    "detected: gap end TSN "
3042 						    "0x%x missing from the "
3043 						    "xmit list: %p", gapend,
3044 						    (void *)sctp);
3045 					}
3046 					ump = sctp->sctp_xmit_head;
3047 					mp = mp1;
3048 					sdc = (sctp_data_hdr_t *)mp->b_rptr;
3049 					xtsn = ntohl(sdc->sdh_tsn);
3050 					break;
3051 				}
3052 				mp = ump->b_cont;
3053 			}
3054 			/*
3055 			 * Likewise, we could hit an unsent chunk once we have
3056 			 * completed processing the gap block. Again, it is
3057 			 * fine to continue processing gap blocks with mp
3058 			 * pointing to the unsent chunk, because if there
3059 			 * are more ordered gap blocks, they will fail the
3060 			 * sanity check, and if there are un-ordered gap blocks,
3061 			 * we will continue processing in sctp_process_uo_gaps()
3062 			 * We just reset the mp to the one we started with.
3063 			 */
3064 			if (!SCTP_CHUNK_ISSENT(mp)) {
3065 				ASSERT(xtsn == gapend);
3066 				ump = sctp->sctp_xmit_head;
3067 				mp = mp1;
3068 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
3069 				xtsn = ntohl(sdc->sdh_tsn);
3070 				break;
3071 			}
3072 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
3073 			xtsn = ntohl(sdc->sdh_tsn);
3074 		}
3075 	}
3076 	if (sctp->sctp_prsctp_aware)
3077 		sctp_check_abandoned_data(sctp, sctp->sctp_current);
3078 	if (sctp->sctp_chk_fast_rexmit)
3079 		sctp_fast_rexmit(sctp);
3080 ret:
3081 	trysend += sctp_set_frwnd(sctp, ntohl(sc->ssc_a_rwnd));
3082 
3083 	/*
3084 	 * If receive window is closed while there is unsent data,
3085 	 * set a timer for doing zero window probes.
3086 	 */
3087 	if (sctp->sctp_frwnd == 0 && sctp->sctp_unacked == 0 &&
3088 	    sctp->sctp_unsent != 0) {
3089 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
3090 		    sctp->sctp_current->rto);
3091 	}
3092 
3093 	/*
3094 	 * Set cwnd for all destinations.
3095 	 * Congestion window gets increased only when cumulative
3096 	 * TSN moves forward, we're not in fast recovery, and
3097 	 * cwnd has been fully utilized (almost fully, need to allow
3098 	 * some leeway due to non-MSS sized messages).
3099 	 */
3100 	if (sctp->sctp_current->acked == acked) {
3101 		/*
3102 		 * Fast-path, only data sent to sctp_current got acked.
3103 		 */
3104 		fp = sctp->sctp_current;
3105 		if (cumack_forward && !fast_recovery &&
3106 		    (fp->acked + fp->suna > fp->cwnd - fp->sfa_pmss)) {
3107 			if (fp->cwnd < fp->ssthresh) {
3108 				/*
3109 				 * Slow start
3110 				 */
3111 				if (fp->acked > fp->sfa_pmss) {
3112 					fp->cwnd += fp->sfa_pmss;
3113 				} else {
3114 					fp->cwnd += fp->acked;
3115 				}
3116 				fp->cwnd = MIN(fp->cwnd, sctp->sctp_cwnd_max);
3117 			} else {
3118 				/*
3119 				 * Congestion avoidance
3120 				 */
3121 				fp->pba += fp->acked;
3122 				if (fp->pba >= fp->cwnd) {
3123 					fp->pba -= fp->cwnd;
3124 					fp->cwnd += fp->sfa_pmss;
3125 					fp->cwnd = MIN(fp->cwnd,
3126 					    sctp->sctp_cwnd_max);
3127 				}
3128 			}
3129 		}
3130 		/*
3131 		 * Limit the burst of transmitted data segments.
3132 		 */
3133 		if (fp->suna + sctps->sctps_maxburst * fp->sfa_pmss <
3134 		    fp->cwnd) {
3135 			fp->cwnd = fp->suna + sctps->sctps_maxburst *
3136 			    fp->sfa_pmss;
3137 		}
3138 		fp->acked = 0;
3139 		goto check_ss_rxmit;
3140 	}
3141 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
3142 		if (cumack_forward && fp->acked && !fast_recovery &&
3143 		    (fp->acked + fp->suna > fp->cwnd - fp->sfa_pmss)) {
3144 			if (fp->cwnd < fp->ssthresh) {
3145 				if (fp->acked > fp->sfa_pmss) {
3146 					fp->cwnd += fp->sfa_pmss;
3147 				} else {
3148 					fp->cwnd += fp->acked;
3149 				}
3150 				fp->cwnd = MIN(fp->cwnd, sctp->sctp_cwnd_max);
3151 			} else {
3152 				fp->pba += fp->acked;
3153 				if (fp->pba >= fp->cwnd) {
3154 					fp->pba -= fp->cwnd;
3155 					fp->cwnd += fp->sfa_pmss;
3156 					fp->cwnd = MIN(fp->cwnd,
3157 					    sctp->sctp_cwnd_max);
3158 				}
3159 			}
3160 		}
3161 		if (fp->suna + sctps->sctps_maxburst * fp->sfa_pmss <
3162 		    fp->cwnd) {
3163 			fp->cwnd = fp->suna + sctps->sctps_maxburst *
3164 			    fp->sfa_pmss;
3165 		}
3166 		fp->acked = 0;
3167 	}
3168 	fp = sctp->sctp_current;
3169 check_ss_rxmit:
3170 	/*
3171 	 * If this is a SACK following a timeout, check if there are
3172 	 * still unacked chunks (sent before the timeout) that we can
3173 	 * send.
3174 	 */
3175 	if (sctp->sctp_rexmitting) {
3176 		if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_rxt_maxtsn)) {
3177 			/*
3178 			 * As we are in retransmission phase, we may get a
3179 			 * SACK which indicates some new chunks are received
3180 			 * but cum_tsn does not advance.  During this
3181 			 * phase, the other side advances cum_tsn only because
3182 			 * it receives our retransmitted chunks.  Only
3183 			 * this signals that some chunks are still
3184 			 * missing.
3185 			 */
3186 			if (cumack_forward) {
3187 				fp->rxt_unacked -= acked;
3188 				sctp_ss_rexmit(sctp);
3189 			}
3190 		} else {
3191 			sctp->sctp_rexmitting = B_FALSE;
3192 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
3193 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
3194 			fp->rxt_unacked = 0;
3195 		}
3196 	}
3197 	return (trysend);
3198 }
3199 
3200 /*
3201  * Returns 0 if the caller should stop processing any more chunks,
3202  * 1 if the caller should skip this chunk and continue processing.
3203  */
3204 static int
3205 sctp_strange_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp)
3206 {
3207 	size_t len;
3208 
3209 	BUMP_LOCAL(sctp->sctp_ibchunks);
3210 	/* check top two bits for action required */
3211 	if (ch->sch_id & 0x40) {	/* also matches 0xc0 */
3212 		len = ntohs(ch->sch_len);
3213 		sctp_add_err(sctp, SCTP_ERR_UNREC_CHUNK, ch, len, fp);
3214 
3215 		if ((ch->sch_id & 0xc0) == 0xc0) {
3216 			/* skip and continue */
3217 			return (1);
3218 		} else {
3219 			/* stop processing */
3220 			return (0);
3221 		}
3222 	}
3223 	if (ch->sch_id & 0x80) {
3224 		/* skip and continue, no error */
3225 		return (1);
3226 	}
3227 	/* top two bits are clear; stop processing and no error */
3228 	return (0);
3229 }
3230 
3231 /*
3232  * Basic sanity checks on all input chunks and parameters: they must
3233  * be of legitimate size for their purported type, and must follow
3234  * ordering conventions as defined in rfc2960.
3235  *
3236  * Returns 1 if the chunk and all encloded params are legitimate,
3237  * 0 otherwise.
3238  */
3239 /*ARGSUSED*/
3240 static int
3241 sctp_check_input(sctp_t *sctp, sctp_chunk_hdr_t *ch, ssize_t len, int first)
3242 {
3243 	sctp_parm_hdr_t	*ph;
3244 	void		*p = NULL;
3245 	ssize_t		clen;
3246 	uint16_t	ch_len;
3247 
3248 	ch_len = ntohs(ch->sch_len);
3249 	if (ch_len > len) {
3250 		return (0);
3251 	}
3252 
3253 	switch (ch->sch_id) {
3254 	case CHUNK_DATA:
3255 		if (ch_len < sizeof (sctp_data_hdr_t)) {
3256 			return (0);
3257 		}
3258 		return (1);
3259 	case CHUNK_INIT:
3260 	case CHUNK_INIT_ACK:
3261 		{
3262 			ssize_t	remlen = len;
3263 
3264 			/*
3265 			 * INIT and INIT-ACK chunks must not be bundled with
3266 			 * any other.
3267 			 */
3268 			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3269 			    (ch_len < (sizeof (*ch) +
3270 			    sizeof (sctp_init_chunk_t)))) {
3271 				return (0);
3272 			}
3273 			/* may have params that need checking */
3274 			p = (char *)(ch + 1) + sizeof (sctp_init_chunk_t);
3275 			clen = ch_len - (sizeof (*ch) +
3276 			    sizeof (sctp_init_chunk_t));
3277 		}
3278 		break;
3279 	case CHUNK_SACK:
3280 		if (ch_len < (sizeof (*ch) + sizeof (sctp_sack_chunk_t))) {
3281 			return (0);
3282 		}
3283 		/* dup and gap reports checked by got_sack() */
3284 		return (1);
3285 	case CHUNK_SHUTDOWN:
3286 		if (ch_len < (sizeof (*ch) + sizeof (uint32_t))) {
3287 			return (0);
3288 		}
3289 		return (1);
3290 	case CHUNK_ABORT:
3291 	case CHUNK_ERROR:
3292 		if (ch_len < sizeof (*ch)) {
3293 			return (0);
3294 		}
3295 		/* may have params that need checking */
3296 		p = ch + 1;
3297 		clen = ch_len - sizeof (*ch);
3298 		break;
3299 	case CHUNK_ECNE:
3300 	case CHUNK_CWR:
3301 	case CHUNK_HEARTBEAT:
3302 	case CHUNK_HEARTBEAT_ACK:
3303 	/* Full ASCONF chunk and parameter checks are in asconf.c */
3304 	case CHUNK_ASCONF:
3305 	case CHUNK_ASCONF_ACK:
3306 		if (ch_len < sizeof (*ch)) {
3307 			return (0);
3308 		}
3309 		/* heartbeat data checked by process_heartbeat() */
3310 		return (1);
3311 	case CHUNK_SHUTDOWN_COMPLETE:
3312 		{
3313 			ssize_t remlen = len;
3314 
3315 			/*
3316 			 * SHUTDOWN-COMPLETE chunk must not be bundled with any
3317 			 * other
3318 			 */
3319 			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3320 			    ch_len < sizeof (*ch)) {
3321 				return (0);
3322 			}
3323 		}
3324 		return (1);
3325 	case CHUNK_COOKIE:
3326 	case CHUNK_COOKIE_ACK:
3327 	case CHUNK_SHUTDOWN_ACK:
3328 		if (ch_len < sizeof (*ch) || !first) {
3329 			return (0);
3330 		}
3331 		return (1);
3332 	case CHUNK_FORWARD_TSN:
3333 		if (ch_len < (sizeof (*ch) + sizeof (uint32_t)))
3334 			return (0);
3335 		return (1);
3336 	default:
3337 		return (1);	/* handled by strange_chunk() */
3338 	}
3339 
3340 	/* check and byteorder parameters */
3341 	if (clen <= 0) {
3342 		return (1);
3343 	}
3344 	ASSERT(p != NULL);
3345 
3346 	ph = p;
3347 	while (ph != NULL && clen > 0) {
3348 		ch_len = ntohs(ph->sph_len);
3349 		if (ch_len > len || ch_len < sizeof (*ph)) {
3350 			return (0);
3351 		}
3352 		ph = sctp_next_parm(ph, &clen);
3353 	}
3354 
3355 	/* All OK */
3356 	return (1);
3357 }
3358 
3359 static mblk_t *
3360 sctp_check_in_policy(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3361 {
3362 	boolean_t policy_present;
3363 	ipha_t *ipha;
3364 	ip6_t *ip6h;
3365 	netstack_t	*ns = ipst->ips_netstack;
3366 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3367 
3368 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
3369 		policy_present = ipss->ipsec_inbound_v4_policy_present;
3370 		ipha = (ipha_t *)mp->b_rptr;
3371 		ip6h = NULL;
3372 	} else {
3373 		policy_present = ipss->ipsec_inbound_v6_policy_present;
3374 		ipha = NULL;
3375 		ip6h = (ip6_t *)mp->b_rptr;
3376 	}
3377 
3378 	if (policy_present) {
3379 		/*
3380 		 * The conn_t parameter is NULL because we already know
3381 		 * nobody's home.
3382 		 */
3383 		mp = ipsec_check_global_policy(mp, (conn_t *)NULL,
3384 		    ipha, ip6h, ira, ns);
3385 		if (mp == NULL)
3386 			return (NULL);
3387 	}
3388 	return (mp);
3389 }
3390 
3391 /* Handle out-of-the-blue packets */
3392 void
3393 sctp_ootb_input(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3394 {
3395 	sctp_t			*sctp;
3396 	sctp_chunk_hdr_t	*ch;
3397 	sctp_hdr_t		*sctph;
3398 	in6_addr_t		src, dst;
3399 	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3400 	ssize_t			mlen;
3401 	sctp_stack_t		*sctps;
3402 	boolean_t		secure;
3403 	zoneid_t		zoneid = ira->ira_zoneid;
3404 	uchar_t			*rptr;
3405 
3406 	ASSERT(ira->ira_ill == NULL);
3407 
3408 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
3409 
3410 	sctps = ipst->ips_netstack->netstack_sctp;
3411 
3412 	BUMP_MIB(&sctps->sctps_mib, sctpOutOfBlue);
3413 	BUMP_MIB(&sctps->sctps_mib, sctpInSCTPPkts);
3414 
3415 	if (mp->b_cont != NULL) {
3416 		/*
3417 		 * All subsequent code is vastly simplified if it can
3418 		 * assume a single contiguous chunk of data.
3419 		 */
3420 		if (pullupmsg(mp, -1) == 0) {
3421 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3422 			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3423 			freemsg(mp);
3424 			return;
3425 		}
3426 	}
3427 
3428 	rptr = mp->b_rptr;
3429 	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3430 	if (ira->ira_flags & IRAF_IS_IPV4) {
3431 		ipha_t *ipha;
3432 
3433 		ipha = (ipha_t *)rptr;
3434 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3435 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3436 	} else {
3437 		ip6_t *ip6h;
3438 
3439 		ip6h = (ip6_t *)rptr;
3440 		src = ip6h->ip6_src;
3441 		dst = ip6h->ip6_dst;
3442 	}
3443 
3444 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3445 	if ((ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
3446 		dprint(3, ("sctp_ootb_input: invalid packet\n"));
3447 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3448 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3449 		freemsg(mp);
3450 		return;
3451 	}
3452 
3453 	switch (ch->sch_id) {
3454 	case CHUNK_INIT:
3455 		/* no listener; send abort  */
3456 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3457 			return;
3458 		sctp_ootb_send_abort(sctp_init2vtag(ch), 0,
3459 		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3460 		break;
3461 	case CHUNK_INIT_ACK:
3462 		/* check for changed src addr */
3463 		sctp = sctp_addrlist2sctp(mp, sctph, ch, zoneid, sctps);
3464 		if (sctp != NULL) {
3465 			/* success; proceed to normal path */
3466 			mutex_enter(&sctp->sctp_lock);
3467 			if (sctp->sctp_running) {
3468 				sctp_add_recvq(sctp, mp, B_FALSE, ira);
3469 				mutex_exit(&sctp->sctp_lock);
3470 			} else {
3471 				/*
3472 				 * If the source address is changed, we
3473 				 * don't need to worry too much about
3474 				 * out of order processing.  So we don't
3475 				 * check if the recvq is empty or not here.
3476 				 */
3477 				sctp->sctp_running = B_TRUE;
3478 				mutex_exit(&sctp->sctp_lock);
3479 				sctp_input_data(sctp, mp, ira);
3480 				WAKE_SCTP(sctp);
3481 			}
3482 			SCTP_REFRELE(sctp);
3483 			return;
3484 		}
3485 		/* else bogus init ack; drop it */
3486 		break;
3487 	case CHUNK_SHUTDOWN_ACK:
3488 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3489 			return;
3490 		sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3491 		return;
3492 	case CHUNK_ERROR:
3493 	case CHUNK_ABORT:
3494 	case CHUNK_COOKIE_ACK:
3495 	case CHUNK_SHUTDOWN_COMPLETE:
3496 		break;
3497 	default:
3498 		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3499 			return;
3500 		sctp_ootb_send_abort(sctph->sh_verf, 0,
3501 		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3502 		break;
3503 	}
3504 	freemsg(mp);
3505 }
3506 
3507 /*
3508  * Handle sctp packets.
3509  * Note that we rele the sctp_t (the caller got a reference on it).
3510  */
3511 void
3512 sctp_input(conn_t *connp, ipha_t *ipha, ip6_t *ip6h, mblk_t *mp,
3513     ip_recv_attr_t *ira)
3514 {
3515 	sctp_t		*sctp = CONN2SCTP(connp);
3516 	boolean_t	secure;
3517 	ill_t		*ill = ira->ira_ill;
3518 	ip_stack_t	*ipst = ill->ill_ipst;
3519 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3520 	iaflags_t	iraflags = ira->ira_flags;
3521 	ill_t		*rill = ira->ira_rill;
3522 
3523 	secure = iraflags & IRAF_IPSEC_SECURE;
3524 
3525 	/*
3526 	 * We check some fields in conn_t without holding a lock.
3527 	 * This should be fine.
3528 	 */
3529 	if (((iraflags & IRAF_IS_IPV4) ?
3530 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
3531 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
3532 	    secure) {
3533 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
3534 		    ip6h, ira);
3535 		if (mp == NULL) {
3536 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3537 			/* Note that mp is NULL */
3538 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3539 			SCTP_REFRELE(sctp);
3540 			return;
3541 		}
3542 	}
3543 
3544 	ira->ira_ill = ira->ira_rill = NULL;
3545 
3546 	mutex_enter(&sctp->sctp_lock);
3547 	if (sctp->sctp_running) {
3548 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
3549 		mutex_exit(&sctp->sctp_lock);
3550 		goto done;
3551 	} else {
3552 		sctp->sctp_running = B_TRUE;
3553 		mutex_exit(&sctp->sctp_lock);
3554 
3555 		mutex_enter(&sctp->sctp_recvq_lock);
3556 		if (sctp->sctp_recvq != NULL) {
3557 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
3558 			mutex_exit(&sctp->sctp_recvq_lock);
3559 			WAKE_SCTP(sctp);
3560 			goto done;
3561 		}
3562 	}
3563 	mutex_exit(&sctp->sctp_recvq_lock);
3564 	if (ira->ira_flags & IRAF_ICMP_ERROR)
3565 		sctp_icmp_error(sctp, mp);
3566 	else
3567 		sctp_input_data(sctp, mp, ira);
3568 	WAKE_SCTP(sctp);
3569 
3570 done:
3571 	SCTP_REFRELE(sctp);
3572 	ira->ira_ill = ill;
3573 	ira->ira_rill = rill;
3574 }
3575 
3576 static void
3577 sctp_process_abort(sctp_t *sctp, sctp_chunk_hdr_t *ch, int err)
3578 {
3579 	sctp_stack_t	*sctps = sctp->sctp_sctps;
3580 
3581 	BUMP_MIB(&sctps->sctps_mib, sctpAborted);
3582 	BUMP_LOCAL(sctp->sctp_ibchunks);
3583 
3584 	sctp_assoc_event(sctp, SCTP_COMM_LOST,
3585 	    ntohs(((sctp_parm_hdr_t *)(ch + 1))->sph_type), ch);
3586 	sctp_clean_death(sctp, err);
3587 }
3588 
3589 void
3590 sctp_input_data(sctp_t *sctp, mblk_t *mp, ip_recv_attr_t *ira)
3591 {
3592 	sctp_chunk_hdr_t	*ch;
3593 	ssize_t			mlen;
3594 	int			gotdata;
3595 	int			trysend;
3596 	sctp_faddr_t		*fp;
3597 	sctp_init_chunk_t	*iack;
3598 	uint32_t		tsn;
3599 	sctp_data_hdr_t		*sdc;
3600 	ip_pkt_t		ipp;
3601 	in6_addr_t		src;
3602 	in6_addr_t		dst;
3603 	uint_t			ifindex;
3604 	sctp_hdr_t		*sctph;
3605 	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3606 	mblk_t			*dups = NULL;
3607 	int			recv_adaptation;
3608 	boolean_t		wake_eager = B_FALSE;
3609 	in6_addr_t		peer_src;
3610 	int64_t			now;
3611 	sctp_stack_t		*sctps = sctp->sctp_sctps;
3612 	ip_stack_t		*ipst = sctps->sctps_netstack->netstack_ip;
3613 	boolean_t		hb_already = B_FALSE;
3614 	cred_t			*cr;
3615 	pid_t			cpid;
3616 	uchar_t			*rptr;
3617 	conn_t			*connp = sctp->sctp_connp;
3618 	boolean_t		shutdown_ack_needed = B_FALSE;
3619 
3620 	ASSERT(DB_TYPE(mp) == M_DATA);
3621 	ASSERT(ira->ira_ill == NULL);
3622 
3623 	if (mp->b_cont != NULL) {
3624 		/*
3625 		 * All subsequent code is vastly simplified if it can
3626 		 * assume a single contiguous chunk of data.
3627 		 */
3628 		if (pullupmsg(mp, -1) == 0) {
3629 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3630 			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3631 			freemsg(mp);
3632 			return;
3633 		}
3634 	}
3635 
3636 	BUMP_LOCAL(sctp->sctp_ipkts);
3637 	ifindex = ira->ira_ruifindex;
3638 
3639 	rptr = mp->b_rptr;
3640 
3641 	ipp.ipp_fields = 0;
3642 	if (connp->conn_recv_ancillary.crb_all != 0) {
3643 		/*
3644 		 * Record packet information in the ip_pkt_t
3645 		 */
3646 		if (ira->ira_flags & IRAF_IS_IPV4) {
3647 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipp,
3648 			    B_FALSE);
3649 		} else {
3650 			uint8_t nexthdrp;
3651 
3652 			/*
3653 			 * IPv6 packets can only be received by applications
3654 			 * that are prepared to receive IPv6 addresses.
3655 			 * The IP fanout must ensure this.
3656 			 */
3657 			ASSERT(connp->conn_family == AF_INET6);
3658 
3659 			(void) ip_find_hdr_v6(mp, (ip6_t *)rptr, B_TRUE, &ipp,
3660 			    &nexthdrp);
3661 			ASSERT(nexthdrp == IPPROTO_SCTP);
3662 
3663 			/* Could have caused a pullup? */
3664 			rptr = mp->b_rptr;
3665 		}
3666 	}
3667 
3668 	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3669 
3670 	if (ira->ira_flags & IRAF_IS_IPV4) {
3671 		ipha_t *ipha;
3672 
3673 		ipha = (ipha_t *)rptr;
3674 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3675 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3676 	} else {
3677 		ip6_t *ip6h;
3678 
3679 		ip6h = (ip6_t *)rptr;
3680 		src = ip6h->ip6_src;
3681 		dst = ip6h->ip6_dst;
3682 	}
3683 
3684 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3685 	ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen);
3686 	if (ch == NULL) {
3687 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3688 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3689 		freemsg(mp);
3690 		return;
3691 	}
3692 
3693 	if (!sctp_check_input(sctp, ch, mlen, 1)) {
3694 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3695 		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3696 		goto done;
3697 	}
3698 	/*
3699 	 * Check verfication tag (special handling for INIT,
3700 	 * COOKIE, SHUTDOWN_COMPLETE and SHUTDOWN_ACK chunks).
3701 	 * ABORTs are handled in the chunk processing loop, since
3702 	 * may not appear first. All other checked chunks must
3703 	 * appear first, or will have been dropped by check_input().
3704 	 */
3705 	switch (ch->sch_id) {
3706 	case CHUNK_INIT:
3707 		if (sctph->sh_verf != 0) {
3708 			/* drop it */
3709 			goto done;
3710 		}
3711 		break;
3712 	case CHUNK_SHUTDOWN_COMPLETE:
3713 		if (sctph->sh_verf == sctp->sctp_lvtag)
3714 			break;
3715 		if (sctph->sh_verf == sctp->sctp_fvtag &&
3716 		    SCTP_GET_TBIT(ch)) {
3717 			break;
3718 		}
3719 		/* else drop it */
3720 		goto done;
3721 	case CHUNK_ABORT:
3722 	case CHUNK_COOKIE:
3723 		/* handled below */
3724 		break;
3725 	case CHUNK_SHUTDOWN_ACK:
3726 		if (sctp->sctp_state > SCTPS_BOUND &&
3727 		    sctp->sctp_state < SCTPS_ESTABLISHED) {
3728 			/* treat as OOTB */
3729 			sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3730 			return;
3731 		}
3732 		/* else fallthru */
3733 	default:
3734 		/*
3735 		 * All other packets must have a valid
3736 		 * verification tag, however if this is a
3737 		 * listener, we use a refined version of
3738 		 * out-of-the-blue logic.
3739 		 */
3740 		if (sctph->sh_verf != sctp->sctp_lvtag &&
3741 		    sctp->sctp_state != SCTPS_LISTEN) {
3742 			/* drop it */
3743 			goto done;
3744 		}
3745 		break;
3746 	}
3747 
3748 	/* Have a valid sctp for this packet */
3749 	fp = sctp_lookup_faddr(sctp, &src);
3750 	dprint(2, ("sctp_dispatch_rput: mp=%p fp=%p sctp=%p\n", (void *)mp,
3751 	    (void *)fp, (void *)sctp));
3752 
3753 	gotdata = 0;
3754 	trysend = 0;
3755 
3756 	now = ddi_get_lbolt64();
3757 	/* Process the chunks */
3758 	do {
3759 		dprint(3, ("sctp_dispatch_rput: state=%d, chunk id=%d\n",
3760 		    sctp->sctp_state, (int)(ch->sch_id)));
3761 
3762 		if (ch->sch_id == CHUNK_ABORT) {
3763 			if (sctph->sh_verf != sctp->sctp_lvtag &&
3764 			    sctph->sh_verf != sctp->sctp_fvtag) {
3765 				/* drop it */
3766 				goto done;
3767 			}
3768 		}
3769 
3770 		switch (sctp->sctp_state) {
3771 
3772 		case SCTPS_ESTABLISHED:
3773 		case SCTPS_SHUTDOWN_PENDING:
3774 		case SCTPS_SHUTDOWN_SENT:
3775 			switch (ch->sch_id) {
3776 			case CHUNK_DATA:
3777 				/* 0-length data chunks are not allowed */
3778 				if (ntohs(ch->sch_len) == sizeof (*sdc)) {
3779 					sdc = (sctp_data_hdr_t *)ch;
3780 					tsn = sdc->sdh_tsn;
3781 					sctp_send_abort(sctp, sctp->sctp_fvtag,
3782 					    SCTP_ERR_NO_USR_DATA, (char *)&tsn,
3783 					    sizeof (tsn), mp, 0, B_FALSE, ira);
3784 					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3785 					    0, NULL);
3786 					sctp_clean_death(sctp, ECONNABORTED);
3787 					goto done;
3788 				}
3789 
3790 				ASSERT(fp != NULL);
3791 				sctp->sctp_lastdata = fp;
3792 				sctp_data_chunk(sctp, ch, mp, &dups, fp,
3793 				    &ipp, ira);
3794 				gotdata = 1;
3795 				/* Restart shutdown timer if shutting down */
3796 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3797 					/*
3798 					 * If we have exceeded our max
3799 					 * wait bound for waiting for a
3800 					 * shutdown ack from the peer,
3801 					 * abort the association.
3802 					 */
3803 					if (sctps->sctps_shutack_wait_bound !=
3804 					    0 &&
3805 					    TICK_TO_MSEC(now -
3806 					    sctp->sctp_out_time) >
3807 					    sctps->sctps_shutack_wait_bound) {
3808 						sctp_send_abort(sctp,
3809 						    sctp->sctp_fvtag, 0, NULL,
3810 						    0, mp, 0, B_FALSE, ira);
3811 						sctp_assoc_event(sctp,
3812 						    SCTP_COMM_LOST, 0, NULL);
3813 						sctp_clean_death(sctp,
3814 						    ECONNABORTED);
3815 						goto done;
3816 					}
3817 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
3818 					    fp->rto);
3819 				}
3820 				break;
3821 			case CHUNK_SACK:
3822 				ASSERT(fp != NULL);
3823 				/*
3824 				 * Peer is real and alive if it can ack our
3825 				 * data.
3826 				 */
3827 				sctp_faddr_alive(sctp, fp);
3828 				trysend = sctp_got_sack(sctp, ch);
3829 				if (trysend < 0) {
3830 					sctp_send_abort(sctp, sctph->sh_verf,
3831 					    0, NULL, 0, mp, 0, B_FALSE, ira);
3832 					sctp_assoc_event(sctp,
3833 					    SCTP_COMM_LOST, 0, NULL);
3834 					sctp_clean_death(sctp,
3835 					    ECONNABORTED);
3836 					goto done;
3837 				}
3838 				break;
3839 			case CHUNK_HEARTBEAT:
3840 				if (!hb_already) {
3841 					/*
3842 					 * In any one packet, there should
3843 					 * only be one heartbeat chunk.  So
3844 					 * we should not process more than
3845 					 * once.
3846 					 */
3847 					sctp_return_heartbeat(sctp, ch, mp);
3848 					hb_already = B_TRUE;
3849 				}
3850 				break;
3851 			case CHUNK_HEARTBEAT_ACK:
3852 				sctp_process_heartbeat(sctp, ch);
3853 				break;
3854 			case CHUNK_SHUTDOWN:
3855 				sctp_shutdown_event(sctp);
3856 				trysend = sctp_shutdown_received(sctp, ch,
3857 				    B_FALSE, B_FALSE, fp);
3858 				BUMP_LOCAL(sctp->sctp_ibchunks);
3859 				break;
3860 			case CHUNK_SHUTDOWN_ACK:
3861 				BUMP_LOCAL(sctp->sctp_ibchunks);
3862 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3863 					sctp_shutdown_complete(sctp);
3864 					BUMP_MIB(&sctps->sctps_mib,
3865 					    sctpShutdowns);
3866 					sctp_assoc_event(sctp,
3867 					    SCTP_SHUTDOWN_COMP, 0, NULL);
3868 					sctp_clean_death(sctp, 0);
3869 					goto done;
3870 				}
3871 				break;
3872 			case CHUNK_ABORT: {
3873 				sctp_saddr_ipif_t *sp;
3874 
3875 				/* Ignore if delete pending */
3876 				sp = sctp_saddr_lookup(sctp, &dst, 0);
3877 				ASSERT(sp != NULL);
3878 				if (sp->saddr_ipif_delete_pending) {
3879 					BUMP_LOCAL(sctp->sctp_ibchunks);
3880 					break;
3881 				}
3882 
3883 				sctp_process_abort(sctp, ch, ECONNRESET);
3884 				goto done;
3885 			}
3886 			case CHUNK_INIT:
3887 				sctp_send_initack(sctp, sctph, ch, mp, ira);
3888 				break;
3889 			case CHUNK_COOKIE:
3890 				if (sctp_process_cookie(sctp, ch, mp, &iack,
3891 				    sctph, &recv_adaptation, NULL, ira) != -1) {
3892 					sctp_send_cookie_ack(sctp);
3893 					sctp_assoc_event(sctp, SCTP_RESTART,
3894 					    0, NULL);
3895 					if (recv_adaptation) {
3896 						sctp->sctp_recv_adaptation = 1;
3897 						sctp_adaptation_event(sctp);
3898 					}
3899 				} else {
3900 					BUMP_MIB(&sctps->sctps_mib,
3901 					    sctpInInvalidCookie);
3902 				}
3903 				break;
3904 			case CHUNK_ERROR: {
3905 				int error;
3906 
3907 				BUMP_LOCAL(sctp->sctp_ibchunks);
3908 				error = sctp_handle_error(sctp, sctph, ch, mp,
3909 				    ira);
3910 				if (error != 0) {
3911 					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3912 					    0, NULL);
3913 					sctp_clean_death(sctp, error);
3914 					goto done;
3915 				}
3916 				break;
3917 			}
3918 			case CHUNK_ASCONF:
3919 				ASSERT(fp != NULL);
3920 				sctp_input_asconf(sctp, ch, fp);
3921 				BUMP_LOCAL(sctp->sctp_ibchunks);
3922 				break;
3923 			case CHUNK_ASCONF_ACK:
3924 				ASSERT(fp != NULL);
3925 				sctp_faddr_alive(sctp, fp);
3926 				sctp_input_asconf_ack(sctp, ch, fp);
3927 				BUMP_LOCAL(sctp->sctp_ibchunks);
3928 				break;
3929 			case CHUNK_FORWARD_TSN:
3930 				ASSERT(fp != NULL);
3931 				sctp->sctp_lastdata = fp;
3932 				sctp_process_forward_tsn(sctp, ch, fp,
3933 				    &ipp, ira);
3934 				gotdata = 1;
3935 				BUMP_LOCAL(sctp->sctp_ibchunks);
3936 				break;
3937 			default:
3938 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
3939 					goto nomorechunks;
3940 				} /* else skip and continue processing */
3941 				break;
3942 			}
3943 			break;
3944 
3945 		case SCTPS_LISTEN:
3946 			switch (ch->sch_id) {
3947 			case CHUNK_INIT:
3948 				sctp_send_initack(sctp, sctph, ch, mp, ira);
3949 				break;
3950 			case CHUNK_COOKIE: {
3951 				sctp_t *eager;
3952 
3953 				if (sctp_process_cookie(sctp, ch, mp, &iack,
3954 				    sctph, &recv_adaptation, &peer_src,
3955 				    ira) == -1) {
3956 					BUMP_MIB(&sctps->sctps_mib,
3957 					    sctpInInvalidCookie);
3958 					goto done;
3959 				}
3960 
3961 				/*
3962 				 * The cookie is good; ensure that
3963 				 * the peer used the verification
3964 				 * tag from the init ack in the header.
3965 				 */
3966 				if (iack->sic_inittag != sctph->sh_verf)
3967 					goto done;
3968 
3969 				eager = sctp_conn_request(sctp, mp, ifindex,
3970 				    ip_hdr_len, iack, ira);
3971 				if (eager == NULL) {
3972 					sctp_send_abort(sctp, sctph->sh_verf,
3973 					    SCTP_ERR_NO_RESOURCES, NULL, 0, mp,
3974 					    0, B_FALSE, ira);
3975 					goto done;
3976 				}
3977 
3978 				/*
3979 				 * If there were extra chunks
3980 				 * bundled with the cookie,
3981 				 * they must be processed
3982 				 * on the eager's queue. We
3983 				 * accomplish this by refeeding
3984 				 * the whole packet into the
3985 				 * state machine on the right
3986 				 * q. The packet (mp) gets
3987 				 * there via the eager's
3988 				 * cookie_mp field (overloaded
3989 				 * with the active open role).
3990 				 * This is picked up when
3991 				 * processing the null bind
3992 				 * request put on the eager's
3993 				 * q by sctp_accept(). We must
3994 				 * first revert the cookie
3995 				 * chunk's length field to network
3996 				 * byteorder so it can be
3997 				 * properly reprocessed on the
3998 				 * eager's queue.
3999 				 */
4000 				BUMP_MIB(&sctps->sctps_mib, sctpPassiveEstab);
4001 				if (mlen > ntohs(ch->sch_len)) {
4002 					eager->sctp_cookie_mp = dupb(mp);
4003 					/*
4004 					 * If no mem, just let
4005 					 * the peer retransmit.
4006 					 */
4007 				}
4008 				sctp_assoc_event(eager, SCTP_COMM_UP, 0, NULL);
4009 				if (recv_adaptation) {
4010 					eager->sctp_recv_adaptation = 1;
4011 					eager->sctp_rx_adaptation_code =
4012 					    sctp->sctp_rx_adaptation_code;
4013 					sctp_adaptation_event(eager);
4014 				}
4015 
4016 				eager->sctp_active = now;
4017 				sctp_send_cookie_ack(eager);
4018 
4019 				wake_eager = B_TRUE;
4020 
4021 				/*
4022 				 * Process rest of the chunks with eager.
4023 				 */
4024 				sctp = eager;
4025 				fp = sctp_lookup_faddr(sctp, &peer_src);
4026 				/*
4027 				 * Confirm peer's original source.  fp can
4028 				 * only be NULL if peer does not use the
4029 				 * original source as one of its addresses...
4030 				 */
4031 				if (fp == NULL)
4032 					fp = sctp_lookup_faddr(sctp, &src);
4033 				else
4034 					sctp_faddr_alive(sctp, fp);
4035 
4036 				/*
4037 				 * Validate the peer addresses.  It also starts
4038 				 * the heartbeat timer.
4039 				 */
4040 				sctp_validate_peer(sctp);
4041 				break;
4042 			}
4043 			/* Anything else is considered out-of-the-blue */
4044 			case CHUNK_ERROR:
4045 			case CHUNK_ABORT:
4046 			case CHUNK_COOKIE_ACK:
4047 			case CHUNK_SHUTDOWN_COMPLETE:
4048 				BUMP_LOCAL(sctp->sctp_ibchunks);
4049 				goto done;
4050 			default:
4051 				BUMP_LOCAL(sctp->sctp_ibchunks);
4052 				sctp_send_abort(sctp, sctph->sh_verf, 0, NULL,
4053 				    0, mp, 0, B_TRUE, ira);
4054 				goto done;
4055 			}
4056 			break;
4057 
4058 		case SCTPS_COOKIE_WAIT:
4059 			switch (ch->sch_id) {
4060 			case CHUNK_INIT_ACK:
4061 				sctp_stop_faddr_timers(sctp);
4062 				sctp_faddr_alive(sctp, sctp->sctp_current);
4063 				sctp_send_cookie_echo(sctp, ch, mp, ira);
4064 				BUMP_LOCAL(sctp->sctp_ibchunks);
4065 				break;
4066 			case CHUNK_ABORT:
4067 				sctp_process_abort(sctp, ch, ECONNREFUSED);
4068 				goto done;
4069 			case CHUNK_INIT:
4070 				sctp_send_initack(sctp, sctph, ch, mp, ira);
4071 				break;
4072 			case CHUNK_COOKIE:
4073 				cr = ira->ira_cred;
4074 				cpid = ira->ira_cpid;
4075 
4076 				if (sctp_process_cookie(sctp, ch, mp, &iack,
4077 				    sctph, &recv_adaptation, NULL, ira) == -1) {
4078 					BUMP_MIB(&sctps->sctps_mib,
4079 					    sctpInInvalidCookie);
4080 					break;
4081 				}
4082 				sctp_send_cookie_ack(sctp);
4083 				sctp_stop_faddr_timers(sctp);
4084 				if (!SCTP_IS_DETACHED(sctp)) {
4085 					sctp->sctp_ulp_connected(
4086 					    sctp->sctp_ulpd, 0, cr, cpid);
4087 					sctp_set_ulp_prop(sctp);
4088 
4089 				}
4090 				sctp->sctp_state = SCTPS_ESTABLISHED;
4091 				sctp->sctp_assoc_start_time =
4092 				    (uint32_t)ddi_get_lbolt();
4093 				BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab);
4094 				if (sctp->sctp_cookie_mp) {
4095 					freemsg(sctp->sctp_cookie_mp);
4096 					sctp->sctp_cookie_mp = NULL;
4097 				}
4098 
4099 				/* Validate the peer addresses. */
4100 				sctp->sctp_active = now;
4101 				sctp_validate_peer(sctp);
4102 
4103 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4104 				if (recv_adaptation) {
4105 					sctp->sctp_recv_adaptation = 1;
4106 					sctp_adaptation_event(sctp);
4107 				}
4108 				/* Try sending queued data, or ASCONFs */
4109 				trysend = 1;
4110 				break;
4111 			default:
4112 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4113 					goto nomorechunks;
4114 				} /* else skip and continue processing */
4115 				break;
4116 			}
4117 			break;
4118 
4119 		case SCTPS_COOKIE_ECHOED:
4120 			switch (ch->sch_id) {
4121 			case CHUNK_COOKIE_ACK:
4122 				cr = ira->ira_cred;
4123 				cpid = ira->ira_cpid;
4124 
4125 				if (!SCTP_IS_DETACHED(sctp)) {
4126 					sctp->sctp_ulp_connected(
4127 					    sctp->sctp_ulpd, 0, cr, cpid);
4128 					sctp_set_ulp_prop(sctp);
4129 				}
4130 				if (sctp->sctp_unacked == 0)
4131 					sctp_stop_faddr_timers(sctp);
4132 				sctp->sctp_state = SCTPS_ESTABLISHED;
4133 				sctp->sctp_assoc_start_time =
4134 				    (uint32_t)ddi_get_lbolt();
4135 				BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab);
4136 				BUMP_LOCAL(sctp->sctp_ibchunks);
4137 				if (sctp->sctp_cookie_mp) {
4138 					freemsg(sctp->sctp_cookie_mp);
4139 					sctp->sctp_cookie_mp = NULL;
4140 				}
4141 				sctp_faddr_alive(sctp, fp);
4142 				/* Validate the peer addresses. */
4143 				sctp->sctp_active = now;
4144 				sctp_validate_peer(sctp);
4145 
4146 				/* Try sending queued data, or ASCONFs */
4147 				trysend = 1;
4148 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4149 				sctp_adaptation_event(sctp);
4150 				break;
4151 			case CHUNK_ABORT:
4152 				sctp_process_abort(sctp, ch, ECONNREFUSED);
4153 				goto done;
4154 			case CHUNK_COOKIE:
4155 				cr = ira->ira_cred;
4156 				cpid = ira->ira_cpid;
4157 
4158 				if (sctp_process_cookie(sctp, ch, mp, &iack,
4159 				    sctph, &recv_adaptation, NULL, ira) == -1) {
4160 					BUMP_MIB(&sctps->sctps_mib,
4161 					    sctpInInvalidCookie);
4162 					break;
4163 				}
4164 				sctp_send_cookie_ack(sctp);
4165 
4166 				if (!SCTP_IS_DETACHED(sctp)) {
4167 					sctp->sctp_ulp_connected(
4168 					    sctp->sctp_ulpd, 0, cr, cpid);
4169 					sctp_set_ulp_prop(sctp);
4170 
4171 				}
4172 				if (sctp->sctp_unacked == 0)
4173 					sctp_stop_faddr_timers(sctp);
4174 				sctp->sctp_state = SCTPS_ESTABLISHED;
4175 				sctp->sctp_assoc_start_time =
4176 				    (uint32_t)ddi_get_lbolt();
4177 				BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab);
4178 				if (sctp->sctp_cookie_mp) {
4179 					freemsg(sctp->sctp_cookie_mp);
4180 					sctp->sctp_cookie_mp = NULL;
4181 				}
4182 				/* Validate the peer addresses. */
4183 				sctp->sctp_active = now;
4184 				sctp_validate_peer(sctp);
4185 
4186 				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4187 				if (recv_adaptation) {
4188 					sctp->sctp_recv_adaptation = 1;
4189 					sctp_adaptation_event(sctp);
4190 				}
4191 				/* Try sending queued data, or ASCONFs */
4192 				trysend = 1;
4193 				break;
4194 			case CHUNK_INIT:
4195 				sctp_send_initack(sctp, sctph, ch, mp, ira);
4196 				break;
4197 			case CHUNK_ERROR: {
4198 				sctp_parm_hdr_t *p;
4199 
4200 				BUMP_LOCAL(sctp->sctp_ibchunks);
4201 				/* check for a stale cookie */
4202 				if (ntohs(ch->sch_len) >=
4203 				    (sizeof (*p) + sizeof (*ch)) +
4204 				    sizeof (uint32_t)) {
4205 
4206 					p = (sctp_parm_hdr_t *)(ch + 1);
4207 					if (p->sph_type ==
4208 					    htons(SCTP_ERR_STALE_COOKIE)) {
4209 						BUMP_MIB(&sctps->sctps_mib,
4210 						    sctpAborted);
4211 						sctp_error_event(sctp,
4212 						    ch, B_FALSE);
4213 						sctp_assoc_event(sctp,
4214 						    SCTP_COMM_LOST, 0, NULL);
4215 						sctp_clean_death(sctp,
4216 						    ECONNREFUSED);
4217 						goto done;
4218 					}
4219 				}
4220 				break;
4221 			}
4222 			case CHUNK_HEARTBEAT:
4223 				if (!hb_already) {
4224 					sctp_return_heartbeat(sctp, ch, mp);
4225 					hb_already = B_TRUE;
4226 				}
4227 				break;
4228 			default:
4229 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4230 					goto nomorechunks;
4231 				} /* else skip and continue processing */
4232 			} /* switch (ch->sch_id) */
4233 			break;
4234 
4235 		case SCTPS_SHUTDOWN_ACK_SENT:
4236 			switch (ch->sch_id) {
4237 			case CHUNK_ABORT:
4238 				/* Pass gathered wisdom to IP for keeping */
4239 				sctp_update_dce(sctp);
4240 				sctp_process_abort(sctp, ch, 0);
4241 				goto done;
4242 			case CHUNK_SHUTDOWN_COMPLETE:
4243 				BUMP_LOCAL(sctp->sctp_ibchunks);
4244 				BUMP_MIB(&sctps->sctps_mib, sctpShutdowns);
4245 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4246 				    NULL);
4247 
4248 				/* Pass gathered wisdom to IP for keeping */
4249 				sctp_update_dce(sctp);
4250 				sctp_clean_death(sctp, 0);
4251 				goto done;
4252 			case CHUNK_SHUTDOWN_ACK:
4253 				sctp_shutdown_complete(sctp);
4254 				BUMP_LOCAL(sctp->sctp_ibchunks);
4255 				BUMP_MIB(&sctps->sctps_mib, sctpShutdowns);
4256 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4257 				    NULL);
4258 				sctp_clean_death(sctp, 0);
4259 				goto done;
4260 			case CHUNK_COOKIE:
4261 				(void) sctp_shutdown_received(sctp, NULL,
4262 				    B_TRUE, B_FALSE, fp);
4263 				BUMP_LOCAL(sctp->sctp_ibchunks);
4264 				break;
4265 			case CHUNK_HEARTBEAT:
4266 				if (!hb_already) {
4267 					sctp_return_heartbeat(sctp, ch, mp);
4268 					hb_already = B_TRUE;
4269 				}
4270 				break;
4271 			default:
4272 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4273 					goto nomorechunks;
4274 				} /* else skip and continue processing */
4275 				break;
4276 			}
4277 			break;
4278 
4279 		case SCTPS_SHUTDOWN_RECEIVED:
4280 			switch (ch->sch_id) {
4281 			case CHUNK_SHUTDOWN:
4282 				trysend = sctp_shutdown_received(sctp, ch,
4283 				    B_FALSE, B_FALSE, fp);
4284 				/*
4285 				 * shutdown_ack_needed may have been set as
4286 				 * mentioned in the case CHUNK_SACK below.
4287 				 * If sctp_shutdown_received() above found
4288 				 * the xmit queue empty the SHUTDOWN ACK chunk
4289 				 * has already been sent (or scheduled to be
4290 				 * sent on the timer) and the SCTP state
4291 				 * changed, so reset shutdown_ack_needed.
4292 				 */
4293 				if (shutdown_ack_needed && (sctp->sctp_state ==
4294 				    SCTPS_SHUTDOWN_ACK_SENT))
4295 					shutdown_ack_needed = B_FALSE;
4296 				break;
4297 			case CHUNK_SACK:
4298 				trysend = sctp_got_sack(sctp, ch);
4299 				if (trysend < 0) {
4300 					sctp_send_abort(sctp, sctph->sh_verf,
4301 					    0, NULL, 0, mp, 0, B_FALSE, ira);
4302 					sctp_assoc_event(sctp,
4303 					    SCTP_COMM_LOST, 0, NULL);
4304 					sctp_clean_death(sctp,
4305 					    ECONNABORTED);
4306 					goto done;
4307 				}
4308 
4309 				/*
4310 				 * All data acknowledgement after a shutdown
4311 				 * should be done with SHUTDOWN chunk.
4312 				 * However some peer SCTP do not conform with
4313 				 * this and can unexpectedly send a SACK chunk.
4314 				 * If all data are acknowledged, set
4315 				 * shutdown_ack_needed here indicating that
4316 				 * SHUTDOWN ACK needs to be sent later by
4317 				 * sctp_send_shutdown_ack().
4318 				 */
4319 				if ((sctp->sctp_xmit_head == NULL) &&
4320 				    (sctp->sctp_xmit_unsent == NULL))
4321 					shutdown_ack_needed = B_TRUE;
4322 				break;
4323 			case CHUNK_ABORT:
4324 				sctp_process_abort(sctp, ch, ECONNRESET);
4325 				goto done;
4326 			case CHUNK_HEARTBEAT:
4327 				if (!hb_already) {
4328 					sctp_return_heartbeat(sctp, ch, mp);
4329 					hb_already = B_TRUE;
4330 				}
4331 				break;
4332 			default:
4333 				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4334 					goto nomorechunks;
4335 				} /* else skip and continue processing */
4336 				break;
4337 			}
4338 			break;
4339 
4340 		default:
4341 			/*
4342 			 * The only remaining states are SCTPS_IDLE and
4343 			 * SCTPS_BOUND, and we should not be getting here
4344 			 * for these.
4345 			 */
4346 			ASSERT(0);
4347 		} /* switch (sctp->sctp_state) */
4348 
4349 		ch = sctp_next_chunk(ch, &mlen);
4350 		if (ch != NULL && !sctp_check_input(sctp, ch, mlen, 0))
4351 			goto done;
4352 	} while (ch != NULL);
4353 
4354 	/* Finished processing all chunks in packet */
4355 
4356 nomorechunks:
4357 
4358 	if (shutdown_ack_needed)
4359 		sctp_send_shutdown_ack(sctp, fp, B_FALSE);
4360 
4361 	/* SACK if necessary */
4362 	if (gotdata) {
4363 		boolean_t sack_sent;
4364 
4365 		(sctp->sctp_sack_toggle)++;
4366 		sack_sent = sctp_sack(sctp, dups);
4367 		dups = NULL;
4368 
4369 		/* If a SACK is sent, no need to restart the timer. */
4370 		if (!sack_sent && !sctp->sctp_ack_timer_running) {
4371 			sctp->sctp_ack_timer_running = B_TRUE;
4372 			sctp_timer(sctp, sctp->sctp_ack_mp,
4373 			    MSEC_TO_TICK(sctps->sctps_deferred_ack_interval));
4374 		}
4375 	}
4376 
4377 	if (trysend) {
4378 		sctp_output(sctp, UINT_MAX);
4379 		if (sctp->sctp_cxmit_list != NULL)
4380 			sctp_wput_asconf(sctp, NULL);
4381 	}
4382 	/*
4383 	 * If there is unsent data, make sure a timer is running, check
4384 	 * timer_mp, if sctp_closei_local() ran the timers may be free.
4385 	 */
4386 	if (sctp->sctp_unsent > 0 && !sctp->sctp_current->timer_running &&
4387 	    sctp->sctp_current->timer_mp != NULL) {
4388 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
4389 		    sctp->sctp_current->rto);
4390 	}
4391 
4392 done:
4393 	if (dups != NULL)
4394 		freeb(dups);
4395 	freemsg(mp);
4396 
4397 	if (sctp->sctp_err_chunks != NULL)
4398 		sctp_process_err(sctp);
4399 
4400 	if (wake_eager) {
4401 		/*
4402 		 * sctp points to newly created control block, need to
4403 		 * release it before exiting.
4404 		 */
4405 		WAKE_SCTP(sctp);
4406 	}
4407 }
4408 
4409 /*
4410  * Some amount of data got removed from rx q.
4411  * Check if we should send a window update.
4412  *
4413  * Due to way sctp_rwnd updates are made, ULP can give reports out-of-order.
4414  * To keep from dropping incoming data due to this, we only update
4415  * sctp_rwnd when if it's larger than what we've reported to peer earlier.
4416  */
4417 void
4418 sctp_recvd(sctp_t *sctp, int len)
4419 {
4420 	int32_t old, new;
4421 	sctp_stack_t	*sctps = sctp->sctp_sctps;
4422 
4423 	ASSERT(sctp != NULL);
4424 	RUN_SCTP(sctp);
4425 
4426 	if (len < sctp->sctp_rwnd) {
4427 		WAKE_SCTP(sctp);
4428 		return;
4429 	}
4430 
4431 	old = sctp->sctp_rwnd - sctp->sctp_rxqueued;
4432 	new = len - sctp->sctp_rxqueued;
4433 	sctp->sctp_rwnd = len;
4434 
4435 	if (sctp->sctp_state >= SCTPS_ESTABLISHED &&
4436 	    ((old <= new >> 1) || (old < sctp->sctp_mss))) {
4437 		sctp->sctp_force_sack = 1;
4438 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinUpdate);
4439 		(void) sctp_sack(sctp, NULL);
4440 	}
4441 	WAKE_SCTP(sctp);
4442 }
4443