xref: /titanic_41/usr/src/uts/common/inet/sctp/sctp_hash.c (revision fe7cd8aa86d5ba3fb4aa1d5fee460467fd4413c9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/socket.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/tsol/tndb.h>
30 #include <sys/tsol/tnet.h>
31 
32 #include <netinet/in.h>
33 #include <netinet/ip6.h>
34 
35 #include <inet/common.h>
36 #include <inet/ip.h>
37 #include <inet/ip6.h>
38 #include <inet/ipclassifier.h>
39 #include <inet/ipsec_impl.h>
40 #include <inet/ipp_common.h>
41 #include <inet/sctp_ip.h>
42 
43 #include "sctp_impl.h"
44 #include "sctp_addr.h"
45 
46 /* Default association hash size.  The size must be a power of 2. */
47 #define	SCTP_CONN_HASH_SIZE	8192
48 
49 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
50 
51 /*
52  * Cluster networking hook for traversing current assoc list.
53  * This routine is used to extract the current list of live associations
54  * which must continue to to be dispatched to this node.
55  */
56 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
57     boolean_t);
58 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
59     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
60 
61 void
62 sctp_hash_init(sctp_stack_t *sctps)
63 {
64 	int i;
65 
66 	/* Start with /etc/system value */
67 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
68 
69 	if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) {
70 		/* Not a power of two. Round up to nearest power of two */
71 		for (i = 0; i < 31; i++) {
72 			if (sctps->sctps_conn_hash_size < (1 << i))
73 				break;
74 		}
75 		sctps->sctps_conn_hash_size = 1 << i;
76 	}
77 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
78 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
79 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
80 		    sctps->sctps_conn_hash_size);
81 	}
82 	sctps->sctps_conn_fanout =
83 	    (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
84 	    sizeof (sctp_tf_t), KM_SLEEP);
85 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
86 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
87 		    MUTEX_DEFAULT, NULL);
88 	}
89 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
90 	    sizeof (sctp_tf_t),	KM_SLEEP);
91 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
92 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
93 		    MUTEX_DEFAULT, NULL);
94 	}
95 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
96 	    sizeof (sctp_tf_t),	KM_SLEEP);
97 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
98 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
99 		    MUTEX_DEFAULT, NULL);
100 	}
101 }
102 
103 void
104 sctp_hash_destroy(sctp_stack_t *sctps)
105 {
106 	int i;
107 
108 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
109 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
110 	}
111 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
112 	    sizeof (sctp_tf_t));
113 	sctps->sctps_conn_fanout = NULL;
114 
115 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
116 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
117 	}
118 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
119 	    sizeof (sctp_tf_t));
120 	sctps->sctps_listen_fanout = NULL;
121 
122 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
123 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
124 	}
125 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
126 	    sizeof (sctp_tf_t));
127 	sctps->sctps_bind_fanout = NULL;
128 }
129 
130 /*
131  * Exported routine for extracting active SCTP associations.
132  * Like TCP, we terminate the walk if the callback returns non-zero.
133  *
134  * Need to walk all sctp_stack_t instances since this clustering
135  * interface is assumed global for all instances
136  */
137 int
138 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
139     void *arg, boolean_t cansleep)
140 {
141 	netstack_handle_t nh;
142 	netstack_t *ns;
143 	int ret = 0;
144 
145 	netstack_next_init(&nh);
146 	while ((ns = netstack_next(&nh)) != NULL) {
147 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
148 		    ns->netstack_sctp);
149 		netstack_rele(ns);
150 	}
151 	netstack_next_fini(&nh);
152 	return (ret);
153 }
154 
155 static int
156 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
157     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
158 {
159 	sctp_t		*sctp;
160 	sctp_t		*sctp_prev;
161 	cl_sctp_info_t	cl_sctpi;
162 	uchar_t		*slist;
163 	uchar_t		*flist;
164 
165 	sctp_prev = NULL;
166 	mutex_enter(&sctps->sctps_g_lock);
167 	sctp = list_head(&sctps->sctps_g_list);
168 	while (sctp != NULL) {
169 		size_t	ssize;
170 		size_t	fsize;
171 
172 		mutex_enter(&sctp->sctp_reflock);
173 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
174 			mutex_exit(&sctp->sctp_reflock);
175 			sctp = list_next(&sctps->sctps_g_list, sctp);
176 			continue;
177 		}
178 		sctp->sctp_refcnt++;
179 		mutex_exit(&sctp->sctp_reflock);
180 		mutex_exit(&sctps->sctps_g_lock);
181 		if (sctp_prev != NULL)
182 			SCTP_REFRELE(sctp_prev);
183 		RUN_SCTP(sctp);
184 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
185 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
186 
187 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
188 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
189 		if (slist == NULL || flist == NULL) {
190 			WAKE_SCTP(sctp);
191 			if (slist != NULL)
192 				kmem_free(slist, ssize);
193 			if (flist != NULL)
194 				kmem_free(flist, fsize);
195 			SCTP_REFRELE(sctp);
196 			return (1);
197 		}
198 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
199 		sctp_get_saddr_list(sctp, slist, ssize);
200 		sctp_get_faddr_list(sctp, flist, fsize);
201 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
202 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
203 		cl_sctpi.cl_sctpi_family = sctp->sctp_connp->conn_family;
204 		if (cl_sctpi.cl_sctpi_family == AF_INET)
205 			cl_sctpi.cl_sctpi_ipversion = IPV4_VERSION;
206 		else
207 			cl_sctpi.cl_sctpi_ipversion = IPV6_VERSION;
208 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
209 		cl_sctpi.cl_sctpi_lport = sctp->sctp_connp->conn_lport;
210 		cl_sctpi.cl_sctpi_fport = sctp->sctp_connp->conn_fport;
211 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
212 		WAKE_SCTP(sctp);
213 		cl_sctpi.cl_sctpi_laddrp = slist;
214 		cl_sctpi.cl_sctpi_faddrp = flist;
215 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
216 			kmem_free(slist, ssize);
217 			kmem_free(flist, fsize);
218 			SCTP_REFRELE(sctp);
219 			return (1);
220 		}
221 		/* list will be freed by cl_callback */
222 		sctp_prev = sctp;
223 		mutex_enter(&sctps->sctps_g_lock);
224 		sctp = list_next(&sctps->sctps_g_list, sctp);
225 	}
226 	mutex_exit(&sctps->sctps_g_lock);
227 	if (sctp_prev != NULL)
228 		SCTP_REFRELE(sctp_prev);
229 	return (0);
230 }
231 
232 sctp_t *
233 sctp_conn_match(in6_addr_t **faddrpp, uint32_t nfaddr, in6_addr_t *laddr,
234     uint32_t ports, zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
235 {
236 	sctp_tf_t		*tf;
237 	sctp_t			*sctp;
238 	sctp_faddr_t		*fp;
239 	conn_t			*connp;
240 	in6_addr_t		**faddrs, **endaddrs = &faddrpp[nfaddr];
241 
242 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
243 	mutex_enter(&tf->tf_lock);
244 
245 	for (sctp = tf->tf_sctp; sctp != NULL; sctp =
246 	    sctp->sctp_conn_hash_next) {
247 		connp = sctp->sctp_connp;
248 		if (ports != connp->conn_ports)
249 			continue;
250 		if (!(connp->conn_zoneid == zoneid ||
251 		    connp->conn_allzones ||
252 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
253 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
254 		    (iraflags & IRAF_TX_SHARED_ADDR))))
255 			continue;
256 
257 		/* check for faddr match */
258 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
259 			for (faddrs = faddrpp; faddrs < endaddrs; faddrs++) {
260 				if (IN6_ARE_ADDR_EQUAL(*faddrs,
261 				    &fp->sf_faddr)) {
262 					/* check for laddr match */
263 					if (sctp_saddr_lookup(sctp, laddr, 0)
264 					    != NULL) {
265 						SCTP_REFHOLD(sctp);
266 						mutex_exit(&tf->tf_lock);
267 						return (sctp);
268 					}
269 				}
270 			}
271 		}
272 
273 		/* no match; continue to the next in the chain */
274 	}
275 
276 	mutex_exit(&tf->tf_lock);
277 	return (sctp);
278 }
279 
280 static sctp_t *
281 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
282     iaflags_t iraflags, sctp_stack_t *sctps)
283 {
284 	sctp_t			*sctp;
285 	sctp_tf_t		*tf;
286 	uint16_t		lport;
287 	conn_t			*connp;
288 
289 	lport = ((uint16_t *)&ports)[1];
290 
291 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
292 	mutex_enter(&tf->tf_lock);
293 
294 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
295 		connp = sctp->sctp_connp;
296 		if (lport != connp->conn_lport)
297 			continue;
298 
299 		if (!(connp->conn_zoneid == zoneid ||
300 		    connp->conn_allzones ||
301 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
302 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
303 		    (iraflags & IRAF_TX_SHARED_ADDR))))
304 			continue;
305 
306 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
307 			SCTP_REFHOLD(sctp);
308 			goto done;
309 		}
310 		/* no match; continue to the next in the chain */
311 	}
312 
313 done:
314 	mutex_exit(&tf->tf_lock);
315 	return (sctp);
316 }
317 
318 /* called by ipsec_sctp_pol */
319 conn_t *
320 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
321     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
322 {
323 	sctp_t *sctp;
324 
325 	sctp = sctp_conn_match(&src, 1, dst, ports, zoneid, iraflags, sctps);
326 	if (sctp == NULL) {
327 		/* Not in conn fanout; check listen fanout */
328 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
329 		if (sctp == NULL)
330 			return (NULL);
331 	}
332 	return (sctp->sctp_connp);
333 }
334 
335 /*
336  * This is called from sctp_fanout() with IP header src & dst addresses.
337  * First call sctp_conn_match() to get a match by passing in src & dst
338  * addresses from IP header.
339  * However sctp_conn_match() can return no match under condition such as :
340  * A host can send an INIT ACK from a different address than the INIT was sent
341  * to (in a multi-homed env).
342  * According to RFC4960, a host can send additional addresses in an INIT
343  * ACK chunk.
344  * Therefore extract all addresses from the INIT ACK chunk, pass to
345  * sctp_conn_match() to get a match.
346  */
347 static sctp_t *
348 sctp_lookup_by_faddrs(mblk_t *mp, sctp_hdr_t *sctph, in6_addr_t *srcp,
349     in6_addr_t *dstp, uint32_t ports, zoneid_t zoneid, sctp_stack_t *sctps,
350     iaflags_t iraflags)
351 {
352 	sctp_t			*sctp;
353 	sctp_chunk_hdr_t	*ich;
354 	sctp_init_chunk_t	*iack;
355 	sctp_parm_hdr_t		*ph;
356 	ssize_t			mlen, remaining;
357 	uint16_t		param_type, addr_len = PARM_ADDR4_LEN;
358 	in6_addr_t		src;
359 	in6_addr_t		**addrbuf = NULL, **faddrpp = NULL;
360 	boolean_t		isv4;
361 	uint32_t		totaddr, nfaddr = 0;
362 
363 	/*
364 	 * If we get a match with the passed-in IP header src & dst addresses,
365 	 * quickly return the matched sctp.
366 	 */
367 	if ((sctp = sctp_conn_match(&srcp, 1, dstp, ports, zoneid, iraflags,
368 	    sctps)) != NULL) {
369 		return (sctp);
370 	}
371 
372 	/*
373 	 * Currently sctph is set to NULL in icmp error fanout case
374 	 * (ip_fanout_sctp()).
375 	 * The above sctp_conn_match() should handle that, otherwise
376 	 * return no match found.
377 	 */
378 	if (sctph == NULL)
379 		return (NULL);
380 
381 	/*
382 	 * Do a pullup again in case the previous one was partially successful,
383 	 * so try to complete the pullup here and have a single contiguous
384 	 * chunk for processing of entire INIT ACK chunk below.
385 	 */
386 	if (mp->b_cont != NULL) {
387 		if (pullupmsg(mp, -1) == 0) {
388 			return (NULL);
389 		}
390 	}
391 
392 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
393 	if ((ich = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
394 		return (NULL);
395 	}
396 
397 	if (ich->sch_id == CHUNK_INIT_ACK) {
398 		remaining = ntohs(ich->sch_len) - sizeof (*ich) -
399 		    sizeof (*iack);
400 		if (remaining < sizeof (*ph)) {
401 			return (NULL);
402 		}
403 
404 		isv4 = (iraflags & IRAF_IS_IPV4) ? B_TRUE : B_FALSE;
405 		if (!isv4)
406 			addr_len = PARM_ADDR6_LEN;
407 		totaddr = remaining/addr_len;
408 
409 		iack = (sctp_init_chunk_t *)(ich + 1);
410 		ph = (sctp_parm_hdr_t *)(iack + 1);
411 
412 		addrbuf = (in6_addr_t **)
413 		    kmem_zalloc(totaddr * sizeof (in6_addr_t *), KM_NOSLEEP);
414 		if (addrbuf == NULL)
415 			return (NULL);
416 		faddrpp = addrbuf;
417 
418 		while (ph != NULL) {
419 			/*
420 			 * According to RFC4960 :
421 			 * All integer fields in an SCTP packet MUST be
422 			 * transmitted in network byte order,
423 			 * unless otherwise stated.
424 			 * Therefore convert the param type to host byte order.
425 			 * Also do not add src address present in IP header
426 			 * as it has already been thru sctp_conn_match() above.
427 			 */
428 			param_type = ntohs(ph->sph_type);
429 			switch (param_type) {
430 			case PARM_ADDR4:
431 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
432 				    (ph + 1), &src);
433 				if (IN6_ARE_ADDR_EQUAL(&src, srcp))
434 					break;
435 				*faddrpp = (in6_addr_t *)
436 				    kmem_zalloc(sizeof (in6_addr_t),
437 				    KM_NOSLEEP);
438 				if (*faddrpp == NULL)
439 					break;
440 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
441 				    (ph + 1), *faddrpp);
442 				nfaddr++;
443 				faddrpp++;
444 				break;
445 			case PARM_ADDR6:
446 				*faddrpp = (in6_addr_t *)(ph + 1);
447 				if (IN6_ARE_ADDR_EQUAL(*faddrpp, srcp))
448 					break;
449 				nfaddr++;
450 				faddrpp++;
451 				break;
452 			default:
453 				break;
454 			}
455 			ph = sctp_next_parm(ph, &remaining);
456 		}
457 
458 		ASSERT(nfaddr < totaddr);
459 
460 		if (nfaddr > 0) {
461 			sctp = sctp_conn_match(addrbuf, nfaddr, dstp, ports,
462 			    zoneid, iraflags, sctps);
463 
464 			if (isv4) {
465 				for (faddrpp = addrbuf; nfaddr > 0;
466 				    faddrpp++, nfaddr--) {
467 					if (IN6_IS_ADDR_V4MAPPED(*faddrpp)) {
468 						kmem_free(*faddrpp,
469 						    sizeof (in6_addr_t));
470 					}
471 				}
472 			}
473 		}
474 		kmem_free(addrbuf, totaddr * sizeof (in6_addr_t *));
475 	}
476 	return (sctp);
477 }
478 
479 /*
480  * Fanout to a sctp instance.
481  */
482 conn_t *
483 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
484     ip_recv_attr_t *ira, mblk_t *mp, sctp_stack_t *sctps, sctp_hdr_t *sctph)
485 {
486 	zoneid_t zoneid = ira->ira_zoneid;
487 	iaflags_t iraflags = ira->ira_flags;
488 	sctp_t *sctp;
489 
490 	sctp = sctp_lookup_by_faddrs(mp, sctph, src, dst, ports, zoneid,
491 	    sctps, iraflags);
492 	if (sctp == NULL) {
493 		/* Not in conn fanout; check listen fanout */
494 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
495 		if (sctp == NULL)
496 			return (NULL);
497 		/*
498 		 * On systems running trusted extensions, check if dst
499 		 * should accept the packet. "IPV6_VERSION" indicates
500 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
501 		 * IPv6 addresses are supported.
502 		 */
503 		if ((iraflags & IRAF_SYSTEM_LABELED) &&
504 		    !tsol_receive_local(mp, dst, IPV6_VERSION, ira,
505 		    sctp->sctp_connp)) {
506 			DTRACE_PROBE3(
507 			    tx__ip__log__info__classify__sctp,
508 			    char *,
509 			    "connp(1) could not receive mp(2)",
510 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
511 			SCTP_REFRELE(sctp);
512 			return (NULL);
513 		}
514 	}
515 	/*
516 	 * For labeled systems, there's no need to check the
517 	 * label here.  It's known to be good as we checked
518 	 * before allowing the connection to become bound.
519 	 */
520 	return (sctp->sctp_connp);
521 }
522 
523 /*
524  * Fanout for ICMP errors for SCTP
525  * The caller puts <fport, lport> in the ports parameter.
526  */
527 void
528 ip_fanout_sctp(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t ports,
529     ip_recv_attr_t *ira)
530 {
531 	sctp_t		*sctp;
532 	conn_t		*connp;
533 	in6_addr_t	map_src, map_dst;
534 	in6_addr_t	*src, *dst;
535 	boolean_t	secure;
536 	ill_t		*ill = ira->ira_ill;
537 	ip_stack_t	*ipst = ill->ill_ipst;
538 	netstack_t	*ns = ipst->ips_netstack;
539 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
540 	sctp_stack_t	*sctps = ns->netstack_sctp;
541 	iaflags_t	iraflags = ira->ira_flags;
542 	ill_t		*rill = ira->ira_rill;
543 
544 	ASSERT(iraflags & IRAF_ICMP_ERROR);
545 
546 	secure = iraflags & IRAF_IPSEC_SECURE;
547 
548 	/* Assume IP provides aligned packets - otherwise toss */
549 	if (!OK_32PTR(mp->b_rptr)) {
550 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
551 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
552 		freemsg(mp);
553 		return;
554 	}
555 
556 	if (!(iraflags & IRAF_IS_IPV4)) {
557 		src = &ip6h->ip6_src;
558 		dst = &ip6h->ip6_dst;
559 	} else {
560 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
561 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
562 		src = &map_src;
563 		dst = &map_dst;
564 	}
565 	connp = sctp_fanout(src, dst, ports, ira, mp, sctps, NULL);
566 	if (connp == NULL) {
567 		ip_fanout_sctp_raw(mp, ipha, ip6h, ports, ira);
568 		return;
569 	}
570 	sctp = CONN2SCTP(connp);
571 
572 	/*
573 	 * We check some fields in conn_t without holding a lock.
574 	 * This should be fine.
575 	 */
576 	if (((iraflags & IRAF_IS_IPV4) ?
577 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
578 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
579 	    secure) {
580 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
581 		    ip6h, ira);
582 		if (mp == NULL) {
583 			SCTP_REFRELE(sctp);
584 			return;
585 		}
586 	}
587 
588 	ira->ira_ill = ira->ira_rill = NULL;
589 
590 	mutex_enter(&sctp->sctp_lock);
591 	if (sctp->sctp_running) {
592 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
593 		mutex_exit(&sctp->sctp_lock);
594 	} else {
595 		sctp->sctp_running = B_TRUE;
596 		mutex_exit(&sctp->sctp_lock);
597 
598 		mutex_enter(&sctp->sctp_recvq_lock);
599 		if (sctp->sctp_recvq != NULL) {
600 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
601 			mutex_exit(&sctp->sctp_recvq_lock);
602 			WAKE_SCTP(sctp);
603 		} else {
604 			mutex_exit(&sctp->sctp_recvq_lock);
605 			if (ira->ira_flags & IRAF_ICMP_ERROR) {
606 				sctp_icmp_error(sctp, mp);
607 			} else {
608 				sctp_input_data(sctp, mp, ira);
609 			}
610 			WAKE_SCTP(sctp);
611 		}
612 	}
613 	SCTP_REFRELE(sctp);
614 	ira->ira_ill = ill;
615 	ira->ira_rill = rill;
616 }
617 
618 void
619 sctp_conn_hash_remove(sctp_t *sctp)
620 {
621 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
622 
623 	if (!tf) {
624 		return;
625 	}
626 	/*
627 	 * On a clustered note send this notification to the clustering
628 	 * subsystem.
629 	 */
630 	if (cl_sctp_disconnect != NULL) {
631 		(*cl_sctp_disconnect)(sctp->sctp_connp->conn_family,
632 		    (cl_sctp_handle_t)sctp);
633 	}
634 
635 	mutex_enter(&tf->tf_lock);
636 	ASSERT(tf->tf_sctp);
637 	if (tf->tf_sctp == sctp) {
638 		tf->tf_sctp = sctp->sctp_conn_hash_next;
639 		if (sctp->sctp_conn_hash_next) {
640 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
641 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
642 		}
643 	} else {
644 		ASSERT(sctp->sctp_conn_hash_prev);
645 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
646 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
647 		    sctp->sctp_conn_hash_next;
648 
649 		if (sctp->sctp_conn_hash_next) {
650 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
651 			    == sctp);
652 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
653 			    sctp->sctp_conn_hash_prev;
654 		}
655 	}
656 	sctp->sctp_conn_hash_next = NULL;
657 	sctp->sctp_conn_hash_prev = NULL;
658 	sctp->sctp_conn_tfp = NULL;
659 	mutex_exit(&tf->tf_lock);
660 }
661 
662 void
663 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
664 {
665 	if (sctp->sctp_conn_tfp) {
666 		sctp_conn_hash_remove(sctp);
667 	}
668 
669 	if (!caller_holds_lock) {
670 		mutex_enter(&tf->tf_lock);
671 	} else {
672 		ASSERT(MUTEX_HELD(&tf->tf_lock));
673 	}
674 
675 	sctp->sctp_conn_hash_next = tf->tf_sctp;
676 	if (tf->tf_sctp) {
677 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
678 	}
679 	sctp->sctp_conn_hash_prev = NULL;
680 	tf->tf_sctp = sctp;
681 	sctp->sctp_conn_tfp = tf;
682 	if (!caller_holds_lock) {
683 		mutex_exit(&tf->tf_lock);
684 	}
685 }
686 
687 void
688 sctp_listen_hash_remove(sctp_t *sctp)
689 {
690 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
691 	conn_t	*connp = sctp->sctp_connp;
692 
693 	if (!tf) {
694 		return;
695 	}
696 	/*
697 	 * On a clustered note send this notification to the clustering
698 	 * subsystem.
699 	 */
700 	if (cl_sctp_unlisten != NULL) {
701 		uchar_t	*slist;
702 		ssize_t	ssize;
703 
704 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
705 		slist = kmem_alloc(ssize, KM_SLEEP);
706 		sctp_get_saddr_list(sctp, slist, ssize);
707 		(*cl_sctp_unlisten)(connp->conn_family, slist,
708 		    sctp->sctp_nsaddrs, connp->conn_lport);
709 		/* list will be freed by the clustering module */
710 	}
711 
712 	mutex_enter(&tf->tf_lock);
713 	ASSERT(tf->tf_sctp);
714 	if (tf->tf_sctp == sctp) {
715 		tf->tf_sctp = sctp->sctp_listen_hash_next;
716 		if (sctp->sctp_listen_hash_next != NULL) {
717 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
718 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
719 		}
720 	} else {
721 		ASSERT(sctp->sctp_listen_hash_prev);
722 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
723 		    sctp);
724 		ASSERT(sctp->sctp_listen_hash_next == NULL ||
725 		    sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
726 
727 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
728 		    sctp->sctp_listen_hash_next;
729 
730 		if (sctp->sctp_listen_hash_next != NULL) {
731 			sctp_t *next = sctp->sctp_listen_hash_next;
732 
733 			ASSERT(next->sctp_listen_hash_prev == sctp);
734 			next->sctp_listen_hash_prev =
735 			    sctp->sctp_listen_hash_prev;
736 		}
737 	}
738 	sctp->sctp_listen_hash_next = NULL;
739 	sctp->sctp_listen_hash_prev = NULL;
740 	sctp->sctp_listen_tfp = NULL;
741 	mutex_exit(&tf->tf_lock);
742 }
743 
744 void
745 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
746 {
747 	conn_t	*connp = sctp->sctp_connp;
748 
749 	if (sctp->sctp_listen_tfp) {
750 		sctp_listen_hash_remove(sctp);
751 	}
752 
753 	mutex_enter(&tf->tf_lock);
754 	sctp->sctp_listen_hash_next = tf->tf_sctp;
755 	if (tf->tf_sctp) {
756 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
757 	}
758 	sctp->sctp_listen_hash_prev = NULL;
759 	tf->tf_sctp = sctp;
760 	sctp->sctp_listen_tfp = tf;
761 	mutex_exit(&tf->tf_lock);
762 	/*
763 	 * On a clustered note send this notification to the clustering
764 	 * subsystem.
765 	 */
766 	if (cl_sctp_listen != NULL) {
767 		uchar_t	*slist;
768 		ssize_t	ssize;
769 
770 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
771 		slist = kmem_alloc(ssize, KM_SLEEP);
772 		sctp_get_saddr_list(sctp, slist, ssize);
773 		(*cl_sctp_listen)(connp->conn_family, slist,
774 		    sctp->sctp_nsaddrs, connp->conn_lport);
775 		/* list will be freed by the clustering module */
776 	}
777 }
778 
779 /*
780  * Hash list insertion routine for sctp_t structures.
781  * Inserts entries with the ones bound to a specific IP address first
782  * followed by those bound to INADDR_ANY.
783  */
784 void
785 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
786 {
787 	sctp_t	**sctpp;
788 	sctp_t	*sctpnext;
789 
790 	if (sctp->sctp_ptpbhn != NULL) {
791 		ASSERT(!caller_holds_lock);
792 		sctp_bind_hash_remove(sctp);
793 	}
794 	sctpp = &tbf->tf_sctp;
795 	if (!caller_holds_lock) {
796 		mutex_enter(&tbf->tf_lock);
797 	} else {
798 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
799 	}
800 	sctpnext = sctpp[0];
801 	if (sctpnext) {
802 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
803 	}
804 	sctp->sctp_bind_hash = sctpnext;
805 	sctp->sctp_ptpbhn = sctpp;
806 	sctpp[0] = sctp;
807 	/* For sctp_*_hash_remove */
808 	sctp->sctp_bind_lockp = &tbf->tf_lock;
809 	if (!caller_holds_lock)
810 		mutex_exit(&tbf->tf_lock);
811 }
812 
813 /*
814  * Hash list removal routine for sctp_t structures.
815  */
816 void
817 sctp_bind_hash_remove(sctp_t *sctp)
818 {
819 	sctp_t	*sctpnext;
820 	kmutex_t *lockp;
821 
822 	lockp = sctp->sctp_bind_lockp;
823 
824 	if (sctp->sctp_ptpbhn == NULL)
825 		return;
826 
827 	ASSERT(lockp != NULL);
828 	mutex_enter(lockp);
829 	if (sctp->sctp_ptpbhn) {
830 		sctpnext = sctp->sctp_bind_hash;
831 		if (sctpnext) {
832 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
833 			sctp->sctp_bind_hash = NULL;
834 		}
835 		*sctp->sctp_ptpbhn = sctpnext;
836 		sctp->sctp_ptpbhn = NULL;
837 	}
838 	mutex_exit(lockp);
839 	sctp->sctp_bind_lockp = NULL;
840 }
841 
842 /*
843  * Similar to but different from sctp_conn_match().
844  *
845  * Matches sets of addresses as follows: if the argument addr set is
846  * a complete subset of the corresponding addr set in the sctp_t, it
847  * is a match.
848  *
849  * Caller must hold tf->tf_lock.
850  *
851  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
852  */
853 sctp_t *
854 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
855     int min_state)
856 {
857 	sctp_t *sctp;
858 	sctp_faddr_t *fp;
859 
860 	ASSERT(MUTEX_HELD(&tf->tf_lock));
861 
862 	for (sctp = tf->tf_sctp; sctp != NULL;
863 	    sctp = sctp->sctp_conn_hash_next) {
864 		if (*ports != sctp->sctp_connp->conn_ports ||
865 		    sctp->sctp_state < min_state) {
866 			continue;
867 		}
868 
869 		/* check for faddr match */
870 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
871 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->sf_faddr)) {
872 				break;
873 			}
874 		}
875 
876 		if (fp == NULL) {
877 			/* no faddr match; keep looking */
878 			continue;
879 		}
880 
881 		/*
882 		 * There is an existing association with the same peer
883 		 * address.  So now we need to check if our local address
884 		 * set overlaps with the one of the existing association.
885 		 * If they overlap, we should return it.
886 		 */
887 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
888 			goto done;
889 		}
890 
891 		/* no match; continue searching */
892 	}
893 
894 done:
895 	if (sctp != NULL) {
896 		SCTP_REFHOLD(sctp);
897 	}
898 	return (sctp);
899 }
900