xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_hash.c (revision a307732568c3d861c38b0342ae32434226d10e94)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved
24  */
25 
26 #include <sys/socket.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/tsol/tndb.h>
30 #include <sys/tsol/tnet.h>
31 
32 #include <netinet/in.h>
33 #include <netinet/ip6.h>
34 
35 #include <inet/common.h>
36 #include <inet/ip.h>
37 #include <inet/ip6.h>
38 #include <inet/ipclassifier.h>
39 #include <inet/ipsec_impl.h>
40 #include <inet/ipp_common.h>
41 #include <inet/sctp_ip.h>
42 
43 #include "sctp_impl.h"
44 #include "sctp_addr.h"
45 
46 /* Default association hash size.  The size must be a power of 2. */
47 #define	SCTP_CONN_HASH_SIZE	8192
48 
49 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
50 
51 /*
52  * Cluster networking hook for traversing current assoc list.
53  * This routine is used to extract the current list of live associations
54  * which must continue to to be dispatched to this node.
55  */
56 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
57     boolean_t);
58 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
59     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
60 
61 void
62 sctp_hash_init(sctp_stack_t *sctps)
63 {
64 	int i;
65 
66 	/* Start with /etc/system value */
67 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
68 
69 	if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) {
70 		/* Not a power of two. Round up to nearest power of two */
71 		for (i = 0; i < 31; i++) {
72 			if (sctps->sctps_conn_hash_size < (1 << i))
73 				break;
74 		}
75 		sctps->sctps_conn_hash_size = 1 << i;
76 	}
77 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
78 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
79 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
80 		    sctps->sctps_conn_hash_size);
81 	}
82 	sctps->sctps_conn_fanout =
83 	    (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
84 	    sizeof (sctp_tf_t), KM_SLEEP);
85 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
86 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
87 		    MUTEX_DEFAULT, NULL);
88 	}
89 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
90 	    sizeof (sctp_tf_t),	KM_SLEEP);
91 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
92 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
93 		    MUTEX_DEFAULT, NULL);
94 	}
95 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
96 	    sizeof (sctp_tf_t),	KM_SLEEP);
97 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
98 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
99 		    MUTEX_DEFAULT, NULL);
100 	}
101 }
102 
103 void
104 sctp_hash_destroy(sctp_stack_t *sctps)
105 {
106 	int i;
107 
108 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
109 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
110 	}
111 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
112 	    sizeof (sctp_tf_t));
113 	sctps->sctps_conn_fanout = NULL;
114 
115 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
116 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
117 	}
118 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
119 	    sizeof (sctp_tf_t));
120 	sctps->sctps_listen_fanout = NULL;
121 
122 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
123 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
124 	}
125 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
126 	    sizeof (sctp_tf_t));
127 	sctps->sctps_bind_fanout = NULL;
128 }
129 
130 /*
131  * Exported routine for extracting active SCTP associations.
132  * Like TCP, we terminate the walk if the callback returns non-zero.
133  *
134  * Need to walk all sctp_stack_t instances since this clustering
135  * interface is assumed global for all instances
136  */
137 int
138 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
139     void *arg, boolean_t cansleep)
140 {
141 	netstack_handle_t nh;
142 	netstack_t *ns;
143 	int ret = 0;
144 
145 	netstack_next_init(&nh);
146 	while ((ns = netstack_next(&nh)) != NULL) {
147 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
148 		    ns->netstack_sctp);
149 		netstack_rele(ns);
150 	}
151 	netstack_next_fini(&nh);
152 	return (ret);
153 }
154 
155 static int
156 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
157     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
158 {
159 	sctp_t		*sctp;
160 	sctp_t		*sctp_prev;
161 	cl_sctp_info_t	cl_sctpi;
162 	uchar_t		*slist;
163 	uchar_t		*flist;
164 
165 	sctp_prev = NULL;
166 	mutex_enter(&sctps->sctps_g_lock);
167 	sctp = list_head(&sctps->sctps_g_list);
168 	while (sctp != NULL) {
169 		size_t	ssize;
170 		size_t	fsize;
171 
172 		mutex_enter(&sctp->sctp_reflock);
173 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
174 			mutex_exit(&sctp->sctp_reflock);
175 			sctp = list_next(&sctps->sctps_g_list, sctp);
176 			continue;
177 		}
178 		sctp->sctp_refcnt++;
179 		mutex_exit(&sctp->sctp_reflock);
180 		mutex_exit(&sctps->sctps_g_lock);
181 		if (sctp_prev != NULL)
182 			SCTP_REFRELE(sctp_prev);
183 		RUN_SCTP(sctp);
184 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
185 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
186 
187 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
188 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
189 		if (slist == NULL || flist == NULL) {
190 			WAKE_SCTP(sctp);
191 			if (slist != NULL)
192 				kmem_free(slist, ssize);
193 			if (flist != NULL)
194 				kmem_free(flist, fsize);
195 			SCTP_REFRELE(sctp);
196 			return (1);
197 		}
198 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
199 		sctp_get_saddr_list(sctp, slist, ssize);
200 		sctp_get_faddr_list(sctp, flist, fsize);
201 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
202 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
203 		cl_sctpi.cl_sctpi_family = sctp->sctp_connp->conn_family;
204 		if (cl_sctpi.cl_sctpi_family == AF_INET)
205 			cl_sctpi.cl_sctpi_ipversion = IPV4_VERSION;
206 		else
207 			cl_sctpi.cl_sctpi_ipversion = IPV6_VERSION;
208 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
209 		cl_sctpi.cl_sctpi_lport = sctp->sctp_connp->conn_lport;
210 		cl_sctpi.cl_sctpi_fport = sctp->sctp_connp->conn_fport;
211 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
212 		WAKE_SCTP(sctp);
213 		cl_sctpi.cl_sctpi_laddrp = slist;
214 		cl_sctpi.cl_sctpi_faddrp = flist;
215 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
216 			kmem_free(slist, ssize);
217 			kmem_free(flist, fsize);
218 			SCTP_REFRELE(sctp);
219 			return (1);
220 		}
221 		/* list will be freed by cl_callback */
222 		sctp_prev = sctp;
223 		mutex_enter(&sctps->sctps_g_lock);
224 		sctp = list_next(&sctps->sctps_g_list, sctp);
225 	}
226 	mutex_exit(&sctps->sctps_g_lock);
227 	if (sctp_prev != NULL)
228 		SCTP_REFRELE(sctp_prev);
229 	return (0);
230 }
231 
232 sctp_t *
233 sctp_conn_match(in6_addr_t **faddrpp, uint32_t nfaddr, in6_addr_t *laddr,
234     uint32_t ports, zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
235 {
236 	sctp_tf_t		*tf;
237 	sctp_t			*sctp;
238 	sctp_faddr_t		*fp;
239 	conn_t			*connp;
240 	in6_addr_t		**faddrs, **endaddrs = &faddrpp[nfaddr];
241 
242 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
243 	mutex_enter(&tf->tf_lock);
244 
245 	for (sctp = tf->tf_sctp; sctp != NULL; sctp =
246 	    sctp->sctp_conn_hash_next) {
247 		connp = sctp->sctp_connp;
248 		if (ports != connp->conn_ports)
249 			continue;
250 		if (!(connp->conn_zoneid == zoneid ||
251 		    connp->conn_allzones ||
252 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
253 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
254 		    (iraflags & IRAF_TX_SHARED_ADDR))))
255 			continue;
256 
257 		/* check for faddr match */
258 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
259 			for (faddrs = faddrpp; faddrs < endaddrs; faddrs++) {
260 				if (IN6_ARE_ADDR_EQUAL(*faddrs, &fp->faddr)) {
261 					/* check for laddr match */
262 					if (sctp_saddr_lookup(sctp, laddr, 0)
263 					    != NULL) {
264 						SCTP_REFHOLD(sctp);
265 						mutex_exit(&tf->tf_lock);
266 						return (sctp);
267 					}
268 				}
269 			}
270 		}
271 
272 		/* no match; continue to the next in the chain */
273 	}
274 
275 	mutex_exit(&tf->tf_lock);
276 	return (sctp);
277 }
278 
279 static sctp_t *
280 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
281     iaflags_t iraflags, sctp_stack_t *sctps)
282 {
283 	sctp_t			*sctp;
284 	sctp_tf_t		*tf;
285 	uint16_t		lport;
286 	conn_t			*connp;
287 
288 	lport = ((uint16_t *)&ports)[1];
289 
290 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
291 	mutex_enter(&tf->tf_lock);
292 
293 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
294 		connp = sctp->sctp_connp;
295 		if (lport != connp->conn_lport)
296 			continue;
297 
298 		if (!(connp->conn_zoneid == zoneid ||
299 		    connp->conn_allzones ||
300 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
301 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
302 		    (iraflags & IRAF_TX_SHARED_ADDR))))
303 			continue;
304 
305 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
306 			SCTP_REFHOLD(sctp);
307 			goto done;
308 		}
309 		/* no match; continue to the next in the chain */
310 	}
311 
312 done:
313 	mutex_exit(&tf->tf_lock);
314 	return (sctp);
315 }
316 
317 /* called by ipsec_sctp_pol */
318 conn_t *
319 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
320     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
321 {
322 	sctp_t *sctp;
323 
324 	sctp = sctp_conn_match(&src, 1, dst, ports, zoneid, iraflags, sctps);
325 	if (sctp == NULL) {
326 		/* Not in conn fanout; check listen fanout */
327 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
328 		if (sctp == NULL)
329 			return (NULL);
330 	}
331 	return (sctp->sctp_connp);
332 }
333 
334 /*
335  * This is called from sctp_fanout() with IP header src & dst addresses.
336  * First call sctp_conn_match() to get a match by passing in src & dst
337  * addresses from IP header.
338  * However sctp_conn_match() can return no match under condition such as :
339  * A host can send an INIT ACK from a different address than the INIT was sent
340  * to (in a multi-homed env).
341  * According to RFC4960, a host can send additional addresses in an INIT
342  * ACK chunk.
343  * Therefore extract all addresses from the INIT ACK chunk, pass to
344  * sctp_conn_match() to get a match.
345  */
346 static sctp_t *
347 sctp_lookup_by_faddrs(mblk_t *mp, sctp_hdr_t *sctph, in6_addr_t *srcp,
348     in6_addr_t *dstp, uint32_t ports, zoneid_t zoneid, sctp_stack_t *sctps,
349     iaflags_t iraflags)
350 {
351 	sctp_t			*sctp;
352 	sctp_chunk_hdr_t	*ich;
353 	sctp_init_chunk_t	*iack;
354 	sctp_parm_hdr_t		*ph;
355 	ssize_t			mlen, remaining;
356 	uint16_t		param_type, addr_len = PARM_ADDR4_LEN;
357 	in6_addr_t		src;
358 	in6_addr_t		**addrbuf = NULL, **faddrpp = NULL;
359 	boolean_t		isv4;
360 	uint32_t		totaddr, nfaddr = 0;
361 
362 	/*
363 	 * If we get a match with the passed-in IP header src & dst addresses,
364 	 * quickly return the matched sctp.
365 	 */
366 	if ((sctp = sctp_conn_match(&srcp, 1, dstp, ports, zoneid, iraflags,
367 	    sctps)) != NULL) {
368 		return (sctp);
369 	}
370 
371 	/*
372 	 * Currently sctph is set to NULL in icmp error fanout case
373 	 * (ip_fanout_sctp()).
374 	 * The above sctp_conn_match() should handle that, otherwise
375 	 * return no match found.
376 	 */
377 	if (sctph == NULL)
378 		return (NULL);
379 
380 	/*
381 	 * Do a pullup again in case the previous one was partially successful,
382 	 * so try to complete the pullup here and have a single contiguous
383 	 * chunk for processing of entire INIT ACK chunk below.
384 	 */
385 	if (mp->b_cont != NULL) {
386 		if (pullupmsg(mp, -1) == 0) {
387 			return (NULL);
388 		}
389 	}
390 
391 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
392 	if ((ich = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
393 		return (NULL);
394 	}
395 
396 	if (ich->sch_id == CHUNK_INIT_ACK) {
397 		remaining = ntohs(ich->sch_len) - sizeof (*ich) -
398 		    sizeof (*iack);
399 		if (remaining < sizeof (*ph)) {
400 			return (NULL);
401 		}
402 
403 		isv4 = (iraflags & IRAF_IS_IPV4) ? B_TRUE : B_FALSE;
404 		if (!isv4)
405 			addr_len = PARM_ADDR6_LEN;
406 		totaddr = remaining/addr_len;
407 
408 		iack = (sctp_init_chunk_t *)(ich + 1);
409 		ph = (sctp_parm_hdr_t *)(iack + 1);
410 
411 		addrbuf = (in6_addr_t **)
412 		    kmem_zalloc(totaddr * sizeof (in6_addr_t *), KM_NOSLEEP);
413 		if (addrbuf == NULL)
414 			return (NULL);
415 		faddrpp = addrbuf;
416 
417 		while (ph != NULL) {
418 			/*
419 			 * According to RFC4960 :
420 			 * All integer fields in an SCTP packet MUST be
421 			 * transmitted in network byte order,
422 			 * unless otherwise stated.
423 			 * Therefore convert the param type to host byte order.
424 			 * Also do not add src address present in IP header
425 			 * as it has already been thru sctp_conn_match() above.
426 			 */
427 			param_type = ntohs(ph->sph_type);
428 			switch (param_type) {
429 			case PARM_ADDR4:
430 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
431 				    (ph + 1), &src);
432 				if (IN6_ARE_ADDR_EQUAL(&src, srcp))
433 					break;
434 				*faddrpp = (in6_addr_t *)
435 				    kmem_zalloc(sizeof (in6_addr_t),
436 				    KM_NOSLEEP);
437 				if (*faddrpp == NULL)
438 					break;
439 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
440 				    (ph + 1), *faddrpp);
441 				nfaddr++;
442 				faddrpp++;
443 				break;
444 			case PARM_ADDR6:
445 				*faddrpp = (in6_addr_t *)(ph + 1);
446 				if (IN6_ARE_ADDR_EQUAL(*faddrpp, srcp))
447 					break;
448 				nfaddr++;
449 				faddrpp++;
450 				break;
451 			default:
452 				break;
453 			}
454 			ph = sctp_next_parm(ph, &remaining);
455 		}
456 
457 		ASSERT(nfaddr < totaddr);
458 
459 		if (nfaddr > 0) {
460 			sctp = sctp_conn_match(addrbuf, nfaddr, dstp, ports,
461 			    zoneid, iraflags, sctps);
462 
463 			if (isv4) {
464 				for (faddrpp = addrbuf; nfaddr > 0;
465 				    faddrpp++, nfaddr--) {
466 					if (IN6_IS_ADDR_V4MAPPED(*faddrpp)) {
467 						kmem_free(*faddrpp,
468 						    sizeof (in6_addr_t));
469 					}
470 				}
471 			}
472 		}
473 		kmem_free(addrbuf, totaddr * sizeof (in6_addr_t *));
474 	}
475 	return (sctp);
476 }
477 
478 /*
479  * Fanout to a sctp instance.
480  */
481 conn_t *
482 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
483     ip_recv_attr_t *ira, mblk_t *mp, sctp_stack_t *sctps, sctp_hdr_t *sctph)
484 {
485 	zoneid_t zoneid = ira->ira_zoneid;
486 	iaflags_t iraflags = ira->ira_flags;
487 	sctp_t *sctp;
488 
489 	sctp = sctp_lookup_by_faddrs(mp, sctph, src, dst, ports, zoneid,
490 	    sctps, iraflags);
491 	if (sctp == NULL) {
492 		/* Not in conn fanout; check listen fanout */
493 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
494 		if (sctp == NULL)
495 			return (NULL);
496 		/*
497 		 * On systems running trusted extensions, check if dst
498 		 * should accept the packet. "IPV6_VERSION" indicates
499 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
500 		 * IPv6 addresses are supported.
501 		 */
502 		if ((iraflags & IRAF_SYSTEM_LABELED) &&
503 		    !tsol_receive_local(mp, dst, IPV6_VERSION, ira,
504 		    sctp->sctp_connp)) {
505 			DTRACE_PROBE3(
506 			    tx__ip__log__info__classify__sctp,
507 			    char *,
508 			    "connp(1) could not receive mp(2)",
509 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
510 			SCTP_REFRELE(sctp);
511 			return (NULL);
512 		}
513 	}
514 	/*
515 	 * For labeled systems, there's no need to check the
516 	 * label here.  It's known to be good as we checked
517 	 * before allowing the connection to become bound.
518 	 */
519 	return (sctp->sctp_connp);
520 }
521 
522 /*
523  * Fanout for ICMP errors for SCTP
524  * The caller puts <fport, lport> in the ports parameter.
525  */
526 void
527 ip_fanout_sctp(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t ports,
528     ip_recv_attr_t *ira)
529 {
530 	sctp_t		*sctp;
531 	conn_t		*connp;
532 	in6_addr_t	map_src, map_dst;
533 	in6_addr_t	*src, *dst;
534 	boolean_t	secure;
535 	ill_t		*ill = ira->ira_ill;
536 	ip_stack_t	*ipst = ill->ill_ipst;
537 	netstack_t	*ns = ipst->ips_netstack;
538 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
539 	sctp_stack_t	*sctps = ns->netstack_sctp;
540 	iaflags_t	iraflags = ira->ira_flags;
541 	ill_t		*rill = ira->ira_rill;
542 
543 	ASSERT(iraflags & IRAF_ICMP_ERROR);
544 
545 	secure = iraflags & IRAF_IPSEC_SECURE;
546 
547 	/* Assume IP provides aligned packets - otherwise toss */
548 	if (!OK_32PTR(mp->b_rptr)) {
549 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
550 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
551 		freemsg(mp);
552 		return;
553 	}
554 
555 	if (!(iraflags & IRAF_IS_IPV4)) {
556 		src = &ip6h->ip6_src;
557 		dst = &ip6h->ip6_dst;
558 	} else {
559 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
560 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
561 		src = &map_src;
562 		dst = &map_dst;
563 	}
564 	connp = sctp_fanout(src, dst, ports, ira, mp, sctps, NULL);
565 	if (connp == NULL) {
566 		ip_fanout_sctp_raw(mp, ipha, ip6h, ports, ira);
567 		return;
568 	}
569 	sctp = CONN2SCTP(connp);
570 
571 	/*
572 	 * We check some fields in conn_t without holding a lock.
573 	 * This should be fine.
574 	 */
575 	if (((iraflags & IRAF_IS_IPV4) ?
576 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
577 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
578 	    secure) {
579 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
580 		    ip6h, ira);
581 		if (mp == NULL) {
582 			SCTP_REFRELE(sctp);
583 			return;
584 		}
585 	}
586 
587 	ira->ira_ill = ira->ira_rill = NULL;
588 
589 	mutex_enter(&sctp->sctp_lock);
590 	if (sctp->sctp_running) {
591 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
592 		mutex_exit(&sctp->sctp_lock);
593 	} else {
594 		sctp->sctp_running = B_TRUE;
595 		mutex_exit(&sctp->sctp_lock);
596 
597 		mutex_enter(&sctp->sctp_recvq_lock);
598 		if (sctp->sctp_recvq != NULL) {
599 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
600 			mutex_exit(&sctp->sctp_recvq_lock);
601 			WAKE_SCTP(sctp);
602 		} else {
603 			mutex_exit(&sctp->sctp_recvq_lock);
604 			if (ira->ira_flags & IRAF_ICMP_ERROR) {
605 				sctp_icmp_error(sctp, mp);
606 			} else {
607 				sctp_input_data(sctp, mp, ira);
608 			}
609 			WAKE_SCTP(sctp);
610 		}
611 	}
612 	SCTP_REFRELE(sctp);
613 	ira->ira_ill = ill;
614 	ira->ira_rill = rill;
615 }
616 
617 void
618 sctp_conn_hash_remove(sctp_t *sctp)
619 {
620 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
621 
622 	if (!tf) {
623 		return;
624 	}
625 	/*
626 	 * On a clustered note send this notification to the clustering
627 	 * subsystem.
628 	 */
629 	if (cl_sctp_disconnect != NULL) {
630 		(*cl_sctp_disconnect)(sctp->sctp_connp->conn_family,
631 		    (cl_sctp_handle_t)sctp);
632 	}
633 
634 	mutex_enter(&tf->tf_lock);
635 	ASSERT(tf->tf_sctp);
636 	if (tf->tf_sctp == sctp) {
637 		tf->tf_sctp = sctp->sctp_conn_hash_next;
638 		if (sctp->sctp_conn_hash_next) {
639 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
640 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
641 		}
642 	} else {
643 		ASSERT(sctp->sctp_conn_hash_prev);
644 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
645 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
646 		    sctp->sctp_conn_hash_next;
647 
648 		if (sctp->sctp_conn_hash_next) {
649 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
650 			    == sctp);
651 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
652 			    sctp->sctp_conn_hash_prev;
653 		}
654 	}
655 	sctp->sctp_conn_hash_next = NULL;
656 	sctp->sctp_conn_hash_prev = NULL;
657 	sctp->sctp_conn_tfp = NULL;
658 	mutex_exit(&tf->tf_lock);
659 }
660 
661 void
662 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
663 {
664 	if (sctp->sctp_conn_tfp) {
665 		sctp_conn_hash_remove(sctp);
666 	}
667 
668 	if (!caller_holds_lock) {
669 		mutex_enter(&tf->tf_lock);
670 	} else {
671 		ASSERT(MUTEX_HELD(&tf->tf_lock));
672 	}
673 
674 	sctp->sctp_conn_hash_next = tf->tf_sctp;
675 	if (tf->tf_sctp) {
676 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
677 	}
678 	sctp->sctp_conn_hash_prev = NULL;
679 	tf->tf_sctp = sctp;
680 	sctp->sctp_conn_tfp = tf;
681 	if (!caller_holds_lock) {
682 		mutex_exit(&tf->tf_lock);
683 	}
684 }
685 
686 void
687 sctp_listen_hash_remove(sctp_t *sctp)
688 {
689 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
690 	conn_t	*connp = sctp->sctp_connp;
691 
692 	if (!tf) {
693 		return;
694 	}
695 	/*
696 	 * On a clustered note send this notification to the clustering
697 	 * subsystem.
698 	 */
699 	if (cl_sctp_unlisten != NULL) {
700 		uchar_t	*slist;
701 		ssize_t	ssize;
702 
703 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
704 		slist = kmem_alloc(ssize, KM_SLEEP);
705 		sctp_get_saddr_list(sctp, slist, ssize);
706 		(*cl_sctp_unlisten)(connp->conn_family, slist,
707 		    sctp->sctp_nsaddrs, connp->conn_lport);
708 		/* list will be freed by the clustering module */
709 	}
710 
711 	mutex_enter(&tf->tf_lock);
712 	ASSERT(tf->tf_sctp);
713 	if (tf->tf_sctp == sctp) {
714 		tf->tf_sctp = sctp->sctp_listen_hash_next;
715 		if (sctp->sctp_listen_hash_next != NULL) {
716 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
717 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
718 		}
719 	} else {
720 		ASSERT(sctp->sctp_listen_hash_prev);
721 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
722 		    sctp);
723 		ASSERT(sctp->sctp_listen_hash_next == NULL ||
724 		    sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
725 
726 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
727 		    sctp->sctp_listen_hash_next;
728 
729 		if (sctp->sctp_listen_hash_next != NULL) {
730 			sctp_t *next = sctp->sctp_listen_hash_next;
731 
732 			ASSERT(next->sctp_listen_hash_prev == sctp);
733 			next->sctp_listen_hash_prev =
734 			    sctp->sctp_listen_hash_prev;
735 		}
736 	}
737 	sctp->sctp_listen_hash_next = NULL;
738 	sctp->sctp_listen_hash_prev = NULL;
739 	sctp->sctp_listen_tfp = NULL;
740 	mutex_exit(&tf->tf_lock);
741 }
742 
743 void
744 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
745 {
746 	conn_t	*connp = sctp->sctp_connp;
747 
748 	if (sctp->sctp_listen_tfp) {
749 		sctp_listen_hash_remove(sctp);
750 	}
751 
752 	mutex_enter(&tf->tf_lock);
753 	sctp->sctp_listen_hash_next = tf->tf_sctp;
754 	if (tf->tf_sctp) {
755 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
756 	}
757 	sctp->sctp_listen_hash_prev = NULL;
758 	tf->tf_sctp = sctp;
759 	sctp->sctp_listen_tfp = tf;
760 	mutex_exit(&tf->tf_lock);
761 	/*
762 	 * On a clustered note send this notification to the clustering
763 	 * subsystem.
764 	 */
765 	if (cl_sctp_listen != NULL) {
766 		uchar_t	*slist;
767 		ssize_t	ssize;
768 
769 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
770 		slist = kmem_alloc(ssize, KM_SLEEP);
771 		sctp_get_saddr_list(sctp, slist, ssize);
772 		(*cl_sctp_listen)(connp->conn_family, slist,
773 		    sctp->sctp_nsaddrs, connp->conn_lport);
774 		/* list will be freed by the clustering module */
775 	}
776 }
777 
778 /*
779  * Hash list insertion routine for sctp_t structures.
780  * Inserts entries with the ones bound to a specific IP address first
781  * followed by those bound to INADDR_ANY.
782  */
783 void
784 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
785 {
786 	sctp_t	**sctpp;
787 	sctp_t	*sctpnext;
788 
789 	if (sctp->sctp_ptpbhn != NULL) {
790 		ASSERT(!caller_holds_lock);
791 		sctp_bind_hash_remove(sctp);
792 	}
793 	sctpp = &tbf->tf_sctp;
794 	if (!caller_holds_lock) {
795 		mutex_enter(&tbf->tf_lock);
796 	} else {
797 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
798 	}
799 	sctpnext = sctpp[0];
800 	if (sctpnext) {
801 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
802 	}
803 	sctp->sctp_bind_hash = sctpnext;
804 	sctp->sctp_ptpbhn = sctpp;
805 	sctpp[0] = sctp;
806 	/* For sctp_*_hash_remove */
807 	sctp->sctp_bind_lockp = &tbf->tf_lock;
808 	if (!caller_holds_lock)
809 		mutex_exit(&tbf->tf_lock);
810 }
811 
812 /*
813  * Hash list removal routine for sctp_t structures.
814  */
815 void
816 sctp_bind_hash_remove(sctp_t *sctp)
817 {
818 	sctp_t	*sctpnext;
819 	kmutex_t *lockp;
820 
821 	lockp = sctp->sctp_bind_lockp;
822 
823 	if (sctp->sctp_ptpbhn == NULL)
824 		return;
825 
826 	ASSERT(lockp != NULL);
827 	mutex_enter(lockp);
828 	if (sctp->sctp_ptpbhn) {
829 		sctpnext = sctp->sctp_bind_hash;
830 		if (sctpnext) {
831 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
832 			sctp->sctp_bind_hash = NULL;
833 		}
834 		*sctp->sctp_ptpbhn = sctpnext;
835 		sctp->sctp_ptpbhn = NULL;
836 	}
837 	mutex_exit(lockp);
838 	sctp->sctp_bind_lockp = NULL;
839 }
840 
841 /*
842  * Similar to but different from sctp_conn_match().
843  *
844  * Matches sets of addresses as follows: if the argument addr set is
845  * a complete subset of the corresponding addr set in the sctp_t, it
846  * is a match.
847  *
848  * Caller must hold tf->tf_lock.
849  *
850  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
851  */
852 sctp_t *
853 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
854     int min_state)
855 {
856 	sctp_t *sctp;
857 	sctp_faddr_t *fp;
858 
859 	ASSERT(MUTEX_HELD(&tf->tf_lock));
860 
861 	for (sctp = tf->tf_sctp; sctp != NULL;
862 	    sctp = sctp->sctp_conn_hash_next) {
863 		if (*ports != sctp->sctp_connp->conn_ports ||
864 		    sctp->sctp_state < min_state) {
865 			continue;
866 		}
867 
868 		/* check for faddr match */
869 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
870 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
871 				break;
872 			}
873 		}
874 
875 		if (fp == NULL) {
876 			/* no faddr match; keep looking */
877 			continue;
878 		}
879 
880 		/*
881 		 * There is an existing association with the same peer
882 		 * address.  So now we need to check if our local address
883 		 * set overlaps with the one of the existing association.
884 		 * If they overlap, we should return it.
885 		 */
886 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
887 			goto done;
888 		}
889 
890 		/* no match; continue searching */
891 	}
892 
893 done:
894 	if (sctp != NULL) {
895 		SCTP_REFHOLD(sctp);
896 	}
897 	return (sctp);
898 }
899