xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_hash.c (revision 6aa4fc89ec1cf2cdf7d7c3b9ec059802ac9abe65)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/sysmacros.h>
27 #include <sys/socket.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/tsol/tndb.h>
31 #include <sys/tsol/tnet.h>
32 
33 #include <netinet/in.h>
34 #include <netinet/ip6.h>
35 
36 #include <inet/common.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 #include <inet/ipclassifier.h>
40 #include <inet/ipsec_impl.h>
41 #include <inet/ipp_common.h>
42 #include <inet/sctp_ip.h>
43 
44 #include "sctp_impl.h"
45 #include "sctp_addr.h"
46 
47 /* Default association hash size.  The size must be a power of 2. */
48 #define	SCTP_CONN_HASH_SIZE	8192
49 
50 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
51 
52 /*
53  * Cluster networking hook for traversing current assoc list.
54  * This routine is used to extract the current list of live associations
55  * which must continue to to be dispatched to this node.
56  */
57 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
58     boolean_t);
59 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
60     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
61 
62 void
63 sctp_hash_init(sctp_stack_t *sctps)
64 {
65 	int i;
66 
67 	/* Start with /etc/system value */
68 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
69 
70 	if (!ISP2(sctps->sctps_conn_hash_size)) {
71 		/* Not a power of two. Round up to nearest power of two */
72 		for (i = 0; i < 31; i++) {
73 			if (sctps->sctps_conn_hash_size < (1 << i))
74 				break;
75 		}
76 		sctps->sctps_conn_hash_size = 1 << i;
77 	}
78 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
79 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
80 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
81 		    sctps->sctps_conn_hash_size);
82 	}
83 	sctps->sctps_conn_fanout =
84 	    (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
85 	    sizeof (sctp_tf_t), KM_SLEEP);
86 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
87 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
88 		    MUTEX_DEFAULT, NULL);
89 	}
90 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
91 	    sizeof (sctp_tf_t),	KM_SLEEP);
92 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
93 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
94 		    MUTEX_DEFAULT, NULL);
95 	}
96 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
97 	    sizeof (sctp_tf_t),	KM_SLEEP);
98 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
99 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
100 		    MUTEX_DEFAULT, NULL);
101 	}
102 }
103 
104 void
105 sctp_hash_destroy(sctp_stack_t *sctps)
106 {
107 	int i;
108 
109 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
110 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
111 	}
112 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
113 	    sizeof (sctp_tf_t));
114 	sctps->sctps_conn_fanout = NULL;
115 
116 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
117 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
118 	}
119 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
120 	    sizeof (sctp_tf_t));
121 	sctps->sctps_listen_fanout = NULL;
122 
123 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
124 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
125 	}
126 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
127 	    sizeof (sctp_tf_t));
128 	sctps->sctps_bind_fanout = NULL;
129 }
130 
131 /*
132  * Exported routine for extracting active SCTP associations.
133  * Like TCP, we terminate the walk if the callback returns non-zero.
134  *
135  * Need to walk all sctp_stack_t instances since this clustering
136  * interface is assumed global for all instances
137  */
138 int
139 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
140     void *arg, boolean_t cansleep)
141 {
142 	netstack_handle_t nh;
143 	netstack_t *ns;
144 	int ret = 0;
145 
146 	netstack_next_init(&nh);
147 	while ((ns = netstack_next(&nh)) != NULL) {
148 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
149 		    ns->netstack_sctp);
150 		netstack_rele(ns);
151 	}
152 	netstack_next_fini(&nh);
153 	return (ret);
154 }
155 
156 static int
157 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
158     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
159 {
160 	sctp_t		*sctp;
161 	sctp_t		*sctp_prev;
162 	cl_sctp_info_t	cl_sctpi;
163 	uchar_t		*slist;
164 	uchar_t		*flist;
165 
166 	sctp_prev = NULL;
167 	mutex_enter(&sctps->sctps_g_lock);
168 	sctp = list_head(&sctps->sctps_g_list);
169 	while (sctp != NULL) {
170 		size_t	ssize;
171 		size_t	fsize;
172 
173 		mutex_enter(&sctp->sctp_reflock);
174 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
175 			mutex_exit(&sctp->sctp_reflock);
176 			sctp = list_next(&sctps->sctps_g_list, sctp);
177 			continue;
178 		}
179 		sctp->sctp_refcnt++;
180 		mutex_exit(&sctp->sctp_reflock);
181 		mutex_exit(&sctps->sctps_g_lock);
182 		if (sctp_prev != NULL)
183 			SCTP_REFRELE(sctp_prev);
184 		RUN_SCTP(sctp);
185 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
186 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
187 
188 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
189 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
190 		if (slist == NULL || flist == NULL) {
191 			WAKE_SCTP(sctp);
192 			if (slist != NULL)
193 				kmem_free(slist, ssize);
194 			if (flist != NULL)
195 				kmem_free(flist, fsize);
196 			SCTP_REFRELE(sctp);
197 			return (1);
198 		}
199 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
200 		sctp_get_saddr_list(sctp, slist, ssize);
201 		sctp_get_faddr_list(sctp, flist, fsize);
202 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
203 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
204 		cl_sctpi.cl_sctpi_family = sctp->sctp_connp->conn_family;
205 		if (cl_sctpi.cl_sctpi_family == AF_INET)
206 			cl_sctpi.cl_sctpi_ipversion = IPV4_VERSION;
207 		else
208 			cl_sctpi.cl_sctpi_ipversion = IPV6_VERSION;
209 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
210 		cl_sctpi.cl_sctpi_lport = sctp->sctp_connp->conn_lport;
211 		cl_sctpi.cl_sctpi_fport = sctp->sctp_connp->conn_fport;
212 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
213 		WAKE_SCTP(sctp);
214 		cl_sctpi.cl_sctpi_laddrp = slist;
215 		cl_sctpi.cl_sctpi_faddrp = flist;
216 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
217 			kmem_free(slist, ssize);
218 			kmem_free(flist, fsize);
219 			SCTP_REFRELE(sctp);
220 			return (1);
221 		}
222 		/* list will be freed by cl_callback */
223 		sctp_prev = sctp;
224 		mutex_enter(&sctps->sctps_g_lock);
225 		sctp = list_next(&sctps->sctps_g_list, sctp);
226 	}
227 	mutex_exit(&sctps->sctps_g_lock);
228 	if (sctp_prev != NULL)
229 		SCTP_REFRELE(sctp_prev);
230 	return (0);
231 }
232 
233 sctp_t *
234 sctp_conn_match(in6_addr_t **faddrpp, uint32_t nfaddr, in6_addr_t *laddr,
235     uint32_t ports, zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
236 {
237 	sctp_tf_t		*tf;
238 	sctp_t			*sctp;
239 	sctp_faddr_t		*fp;
240 	conn_t			*connp;
241 	in6_addr_t		**faddrs, **endaddrs = &faddrpp[nfaddr];
242 
243 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
244 	mutex_enter(&tf->tf_lock);
245 
246 	for (sctp = tf->tf_sctp; sctp != NULL; sctp =
247 	    sctp->sctp_conn_hash_next) {
248 		connp = sctp->sctp_connp;
249 		if (ports != connp->conn_ports)
250 			continue;
251 		if (!(connp->conn_zoneid == zoneid ||
252 		    connp->conn_allzones ||
253 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
254 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
255 		    (iraflags & IRAF_TX_SHARED_ADDR))))
256 			continue;
257 
258 		/* check for faddr match */
259 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
260 			for (faddrs = faddrpp; faddrs < endaddrs; faddrs++) {
261 				if (IN6_ARE_ADDR_EQUAL(*faddrs,
262 				    &fp->sf_faddr)) {
263 					/* check for laddr match */
264 					if (sctp_saddr_lookup(sctp, laddr, 0)
265 					    != NULL) {
266 						SCTP_REFHOLD(sctp);
267 						mutex_exit(&tf->tf_lock);
268 						return (sctp);
269 					}
270 				}
271 			}
272 		}
273 
274 		/* no match; continue to the next in the chain */
275 	}
276 
277 	mutex_exit(&tf->tf_lock);
278 	return (sctp);
279 }
280 
281 static sctp_t *
282 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
283     iaflags_t iraflags, sctp_stack_t *sctps)
284 {
285 	sctp_t			*sctp;
286 	sctp_tf_t		*tf;
287 	uint16_t		lport;
288 	conn_t			*connp;
289 
290 	lport = ((uint16_t *)&ports)[1];
291 
292 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
293 	mutex_enter(&tf->tf_lock);
294 
295 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
296 		connp = sctp->sctp_connp;
297 		if (lport != connp->conn_lport)
298 			continue;
299 
300 		if (!(connp->conn_zoneid == zoneid ||
301 		    connp->conn_allzones ||
302 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
303 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
304 		    (iraflags & IRAF_TX_SHARED_ADDR))))
305 			continue;
306 
307 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
308 			SCTP_REFHOLD(sctp);
309 			goto done;
310 		}
311 		/* no match; continue to the next in the chain */
312 	}
313 
314 done:
315 	mutex_exit(&tf->tf_lock);
316 	return (sctp);
317 }
318 
319 /* called by ipsec_sctp_pol */
320 conn_t *
321 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
322     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
323 {
324 	sctp_t *sctp;
325 
326 	sctp = sctp_conn_match(&src, 1, dst, ports, zoneid, iraflags, sctps);
327 	if (sctp == NULL) {
328 		/* Not in conn fanout; check listen fanout */
329 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
330 		if (sctp == NULL)
331 			return (NULL);
332 	}
333 	return (sctp->sctp_connp);
334 }
335 
336 /*
337  * This is called from sctp_fanout() with IP header src & dst addresses.
338  * First call sctp_conn_match() to get a match by passing in src & dst
339  * addresses from IP header.
340  * However sctp_conn_match() can return no match under condition such as :
341  * A host can send an INIT ACK from a different address than the INIT was sent
342  * to (in a multi-homed env).
343  * According to RFC4960, a host can send additional addresses in an INIT
344  * ACK chunk.
345  * Therefore extract all addresses from the INIT ACK chunk, pass to
346  * sctp_conn_match() to get a match.
347  */
348 static sctp_t *
349 sctp_lookup_by_faddrs(mblk_t *mp, sctp_hdr_t *sctph, in6_addr_t *srcp,
350     in6_addr_t *dstp, uint32_t ports, zoneid_t zoneid, sctp_stack_t *sctps,
351     iaflags_t iraflags)
352 {
353 	sctp_t			*sctp;
354 	sctp_chunk_hdr_t	*ich;
355 	sctp_init_chunk_t	*iack;
356 	sctp_parm_hdr_t		*ph;
357 	ssize_t			mlen, remaining;
358 	uint16_t		param_type, addr_len = PARM_ADDR4_LEN;
359 	in6_addr_t		src;
360 	in6_addr_t		**addrbuf = NULL, **faddrpp = NULL;
361 	boolean_t		isv4;
362 	uint32_t		totaddr, nfaddr = 0;
363 
364 	/*
365 	 * If we get a match with the passed-in IP header src & dst addresses,
366 	 * quickly return the matched sctp.
367 	 */
368 	if ((sctp = sctp_conn_match(&srcp, 1, dstp, ports, zoneid, iraflags,
369 	    sctps)) != NULL) {
370 		return (sctp);
371 	}
372 
373 	/*
374 	 * Currently sctph is set to NULL in icmp error fanout case
375 	 * (ip_fanout_sctp()).
376 	 * The above sctp_conn_match() should handle that, otherwise
377 	 * return no match found.
378 	 */
379 	if (sctph == NULL)
380 		return (NULL);
381 
382 	/*
383 	 * Do a pullup again in case the previous one was partially successful,
384 	 * so try to complete the pullup here and have a single contiguous
385 	 * chunk for processing of entire INIT ACK chunk below.
386 	 */
387 	if (mp->b_cont != NULL) {
388 		if (pullupmsg(mp, -1) == 0) {
389 			return (NULL);
390 		}
391 	}
392 
393 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
394 	if ((ich = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
395 		return (NULL);
396 	}
397 
398 	if (ich->sch_id == CHUNK_INIT_ACK) {
399 		remaining = ntohs(ich->sch_len) - sizeof (*ich) -
400 		    sizeof (*iack);
401 		if (remaining < sizeof (*ph)) {
402 			return (NULL);
403 		}
404 
405 		isv4 = (iraflags & IRAF_IS_IPV4) ? B_TRUE : B_FALSE;
406 		if (!isv4)
407 			addr_len = PARM_ADDR6_LEN;
408 		totaddr = remaining/addr_len;
409 
410 		iack = (sctp_init_chunk_t *)(ich + 1);
411 		ph = (sctp_parm_hdr_t *)(iack + 1);
412 
413 		addrbuf = (in6_addr_t **)
414 		    kmem_zalloc(totaddr * sizeof (in6_addr_t *), KM_NOSLEEP);
415 		if (addrbuf == NULL)
416 			return (NULL);
417 		faddrpp = addrbuf;
418 
419 		while (ph != NULL) {
420 			/*
421 			 * According to RFC4960 :
422 			 * All integer fields in an SCTP packet MUST be
423 			 * transmitted in network byte order,
424 			 * unless otherwise stated.
425 			 * Therefore convert the param type to host byte order.
426 			 * Also do not add src address present in IP header
427 			 * as it has already been thru sctp_conn_match() above.
428 			 */
429 			param_type = ntohs(ph->sph_type);
430 			switch (param_type) {
431 			case PARM_ADDR4:
432 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
433 				    (ph + 1), &src);
434 				if (IN6_ARE_ADDR_EQUAL(&src, srcp))
435 					break;
436 				*faddrpp = (in6_addr_t *)
437 				    kmem_zalloc(sizeof (in6_addr_t),
438 				    KM_NOSLEEP);
439 				if (*faddrpp == NULL)
440 					break;
441 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
442 				    (ph + 1), *faddrpp);
443 				nfaddr++;
444 				faddrpp++;
445 				break;
446 			case PARM_ADDR6:
447 				*faddrpp = (in6_addr_t *)(ph + 1);
448 				if (IN6_ARE_ADDR_EQUAL(*faddrpp, srcp))
449 					break;
450 				nfaddr++;
451 				faddrpp++;
452 				break;
453 			default:
454 				break;
455 			}
456 			ph = sctp_next_parm(ph, &remaining);
457 		}
458 
459 		ASSERT(nfaddr < totaddr);
460 
461 		if (nfaddr > 0) {
462 			sctp = sctp_conn_match(addrbuf, nfaddr, dstp, ports,
463 			    zoneid, iraflags, sctps);
464 
465 			if (isv4) {
466 				for (faddrpp = addrbuf; nfaddr > 0;
467 				    faddrpp++, nfaddr--) {
468 					if (IN6_IS_ADDR_V4MAPPED(*faddrpp)) {
469 						kmem_free(*faddrpp,
470 						    sizeof (in6_addr_t));
471 					}
472 				}
473 			}
474 		}
475 		kmem_free(addrbuf, totaddr * sizeof (in6_addr_t *));
476 	}
477 	return (sctp);
478 }
479 
480 /*
481  * Fanout to a sctp instance.
482  */
483 conn_t *
484 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
485     ip_recv_attr_t *ira, mblk_t *mp, sctp_stack_t *sctps, sctp_hdr_t *sctph)
486 {
487 	zoneid_t zoneid = ira->ira_zoneid;
488 	iaflags_t iraflags = ira->ira_flags;
489 	sctp_t *sctp;
490 
491 	sctp = sctp_lookup_by_faddrs(mp, sctph, src, dst, ports, zoneid,
492 	    sctps, iraflags);
493 	if (sctp == NULL) {
494 		/* Not in conn fanout; check listen fanout */
495 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
496 		if (sctp == NULL)
497 			return (NULL);
498 		/*
499 		 * On systems running trusted extensions, check if dst
500 		 * should accept the packet. "IPV6_VERSION" indicates
501 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
502 		 * IPv6 addresses are supported.
503 		 */
504 		if ((iraflags & IRAF_SYSTEM_LABELED) &&
505 		    !tsol_receive_local(mp, dst, IPV6_VERSION, ira,
506 		    sctp->sctp_connp)) {
507 			DTRACE_PROBE3(
508 			    tx__ip__log__info__classify__sctp,
509 			    char *,
510 			    "connp(1) could not receive mp(2)",
511 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
512 			SCTP_REFRELE(sctp);
513 			return (NULL);
514 		}
515 	}
516 	/*
517 	 * For labeled systems, there's no need to check the
518 	 * label here.  It's known to be good as we checked
519 	 * before allowing the connection to become bound.
520 	 */
521 	return (sctp->sctp_connp);
522 }
523 
524 /*
525  * Fanout for ICMP errors for SCTP
526  * The caller puts <fport, lport> in the ports parameter.
527  */
528 void
529 ip_fanout_sctp(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t ports,
530     ip_recv_attr_t *ira)
531 {
532 	sctp_t		*sctp;
533 	conn_t		*connp;
534 	in6_addr_t	map_src, map_dst;
535 	in6_addr_t	*src, *dst;
536 	boolean_t	secure;
537 	ill_t		*ill = ira->ira_ill;
538 	ip_stack_t	*ipst = ill->ill_ipst;
539 	netstack_t	*ns = ipst->ips_netstack;
540 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
541 	sctp_stack_t	*sctps = ns->netstack_sctp;
542 	iaflags_t	iraflags = ira->ira_flags;
543 	ill_t		*rill = ira->ira_rill;
544 
545 	ASSERT(iraflags & IRAF_ICMP_ERROR);
546 
547 	secure = iraflags & IRAF_IPSEC_SECURE;
548 
549 	/* Assume IP provides aligned packets - otherwise toss */
550 	if (!OK_32PTR(mp->b_rptr)) {
551 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
552 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
553 		freemsg(mp);
554 		return;
555 	}
556 
557 	if (!(iraflags & IRAF_IS_IPV4)) {
558 		src = &ip6h->ip6_src;
559 		dst = &ip6h->ip6_dst;
560 	} else {
561 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
562 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
563 		src = &map_src;
564 		dst = &map_dst;
565 	}
566 	connp = sctp_fanout(src, dst, ports, ira, mp, sctps, NULL);
567 	if (connp == NULL) {
568 		ip_fanout_sctp_raw(mp, ipha, ip6h, ports, ira);
569 		return;
570 	}
571 	sctp = CONN2SCTP(connp);
572 
573 	/*
574 	 * We check some fields in conn_t without holding a lock.
575 	 * This should be fine.
576 	 */
577 	if (((iraflags & IRAF_IS_IPV4) ?
578 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
579 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
580 	    secure) {
581 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
582 		    ip6h, ira);
583 		if (mp == NULL) {
584 			SCTP_REFRELE(sctp);
585 			return;
586 		}
587 	}
588 
589 	ira->ira_ill = ira->ira_rill = NULL;
590 
591 	mutex_enter(&sctp->sctp_lock);
592 	if (sctp->sctp_running) {
593 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
594 		mutex_exit(&sctp->sctp_lock);
595 	} else {
596 		sctp->sctp_running = B_TRUE;
597 		mutex_exit(&sctp->sctp_lock);
598 
599 		mutex_enter(&sctp->sctp_recvq_lock);
600 		if (sctp->sctp_recvq != NULL) {
601 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
602 			mutex_exit(&sctp->sctp_recvq_lock);
603 			WAKE_SCTP(sctp);
604 		} else {
605 			mutex_exit(&sctp->sctp_recvq_lock);
606 			if (ira->ira_flags & IRAF_ICMP_ERROR) {
607 				sctp_icmp_error(sctp, mp);
608 			} else {
609 				sctp_input_data(sctp, mp, ira);
610 			}
611 			WAKE_SCTP(sctp);
612 		}
613 	}
614 	SCTP_REFRELE(sctp);
615 	ira->ira_ill = ill;
616 	ira->ira_rill = rill;
617 }
618 
619 void
620 sctp_conn_hash_remove(sctp_t *sctp)
621 {
622 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
623 
624 	if (!tf) {
625 		return;
626 	}
627 	/*
628 	 * On a clustered note send this notification to the clustering
629 	 * subsystem.
630 	 */
631 	if (cl_sctp_disconnect != NULL) {
632 		(*cl_sctp_disconnect)(sctp->sctp_connp->conn_family,
633 		    (cl_sctp_handle_t)sctp);
634 	}
635 
636 	mutex_enter(&tf->tf_lock);
637 	ASSERT(tf->tf_sctp);
638 	if (tf->tf_sctp == sctp) {
639 		tf->tf_sctp = sctp->sctp_conn_hash_next;
640 		if (sctp->sctp_conn_hash_next) {
641 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
642 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
643 		}
644 	} else {
645 		ASSERT(sctp->sctp_conn_hash_prev);
646 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
647 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
648 		    sctp->sctp_conn_hash_next;
649 
650 		if (sctp->sctp_conn_hash_next) {
651 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
652 			    == sctp);
653 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
654 			    sctp->sctp_conn_hash_prev;
655 		}
656 	}
657 	sctp->sctp_conn_hash_next = NULL;
658 	sctp->sctp_conn_hash_prev = NULL;
659 	sctp->sctp_conn_tfp = NULL;
660 	mutex_exit(&tf->tf_lock);
661 }
662 
663 void
664 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
665 {
666 	if (sctp->sctp_conn_tfp) {
667 		sctp_conn_hash_remove(sctp);
668 	}
669 
670 	if (!caller_holds_lock) {
671 		mutex_enter(&tf->tf_lock);
672 	} else {
673 		ASSERT(MUTEX_HELD(&tf->tf_lock));
674 	}
675 
676 	sctp->sctp_conn_hash_next = tf->tf_sctp;
677 	if (tf->tf_sctp) {
678 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
679 	}
680 	sctp->sctp_conn_hash_prev = NULL;
681 	tf->tf_sctp = sctp;
682 	sctp->sctp_conn_tfp = tf;
683 	if (!caller_holds_lock) {
684 		mutex_exit(&tf->tf_lock);
685 	}
686 }
687 
688 void
689 sctp_listen_hash_remove(sctp_t *sctp)
690 {
691 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
692 	conn_t	*connp = sctp->sctp_connp;
693 
694 	if (!tf) {
695 		return;
696 	}
697 	/*
698 	 * On a clustered note send this notification to the clustering
699 	 * subsystem.
700 	 */
701 	if (cl_sctp_unlisten != NULL) {
702 		uchar_t	*slist;
703 		ssize_t	ssize;
704 
705 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
706 		slist = kmem_alloc(ssize, KM_SLEEP);
707 		sctp_get_saddr_list(sctp, slist, ssize);
708 		(*cl_sctp_unlisten)(connp->conn_family, slist,
709 		    sctp->sctp_nsaddrs, connp->conn_lport);
710 		/* list will be freed by the clustering module */
711 	}
712 
713 	mutex_enter(&tf->tf_lock);
714 	ASSERT(tf->tf_sctp);
715 	if (tf->tf_sctp == sctp) {
716 		tf->tf_sctp = sctp->sctp_listen_hash_next;
717 		if (sctp->sctp_listen_hash_next != NULL) {
718 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
719 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
720 		}
721 	} else {
722 		ASSERT(sctp->sctp_listen_hash_prev);
723 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
724 		    sctp);
725 		ASSERT(sctp->sctp_listen_hash_next == NULL ||
726 		    sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
727 
728 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
729 		    sctp->sctp_listen_hash_next;
730 
731 		if (sctp->sctp_listen_hash_next != NULL) {
732 			sctp_t *next = sctp->sctp_listen_hash_next;
733 
734 			ASSERT(next->sctp_listen_hash_prev == sctp);
735 			next->sctp_listen_hash_prev =
736 			    sctp->sctp_listen_hash_prev;
737 		}
738 	}
739 	sctp->sctp_listen_hash_next = NULL;
740 	sctp->sctp_listen_hash_prev = NULL;
741 	sctp->sctp_listen_tfp = NULL;
742 	mutex_exit(&tf->tf_lock);
743 }
744 
745 void
746 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
747 {
748 	conn_t	*connp = sctp->sctp_connp;
749 
750 	if (sctp->sctp_listen_tfp) {
751 		sctp_listen_hash_remove(sctp);
752 	}
753 
754 	mutex_enter(&tf->tf_lock);
755 	sctp->sctp_listen_hash_next = tf->tf_sctp;
756 	if (tf->tf_sctp) {
757 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
758 	}
759 	sctp->sctp_listen_hash_prev = NULL;
760 	tf->tf_sctp = sctp;
761 	sctp->sctp_listen_tfp = tf;
762 	mutex_exit(&tf->tf_lock);
763 	/*
764 	 * On a clustered note send this notification to the clustering
765 	 * subsystem.
766 	 */
767 	if (cl_sctp_listen != NULL) {
768 		uchar_t	*slist;
769 		ssize_t	ssize;
770 
771 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
772 		slist = kmem_alloc(ssize, KM_SLEEP);
773 		sctp_get_saddr_list(sctp, slist, ssize);
774 		(*cl_sctp_listen)(connp->conn_family, slist,
775 		    sctp->sctp_nsaddrs, connp->conn_lport);
776 		/* list will be freed by the clustering module */
777 	}
778 }
779 
780 /*
781  * Hash list insertion routine for sctp_t structures.
782  * Inserts entries with the ones bound to a specific IP address first
783  * followed by those bound to INADDR_ANY.
784  */
785 void
786 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
787 {
788 	sctp_t	**sctpp;
789 	sctp_t	*sctpnext;
790 
791 	if (sctp->sctp_ptpbhn != NULL) {
792 		ASSERT(!caller_holds_lock);
793 		sctp_bind_hash_remove(sctp);
794 	}
795 	sctpp = &tbf->tf_sctp;
796 	if (!caller_holds_lock) {
797 		mutex_enter(&tbf->tf_lock);
798 	} else {
799 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
800 	}
801 	sctpnext = sctpp[0];
802 	if (sctpnext) {
803 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
804 	}
805 	sctp->sctp_bind_hash = sctpnext;
806 	sctp->sctp_ptpbhn = sctpp;
807 	sctpp[0] = sctp;
808 	/* For sctp_*_hash_remove */
809 	sctp->sctp_bind_lockp = &tbf->tf_lock;
810 	if (!caller_holds_lock)
811 		mutex_exit(&tbf->tf_lock);
812 }
813 
814 /*
815  * Hash list removal routine for sctp_t structures.
816  */
817 void
818 sctp_bind_hash_remove(sctp_t *sctp)
819 {
820 	sctp_t	*sctpnext;
821 	kmutex_t *lockp;
822 
823 	lockp = sctp->sctp_bind_lockp;
824 
825 	if (sctp->sctp_ptpbhn == NULL)
826 		return;
827 
828 	ASSERT(lockp != NULL);
829 	mutex_enter(lockp);
830 	if (sctp->sctp_ptpbhn) {
831 		sctpnext = sctp->sctp_bind_hash;
832 		if (sctpnext) {
833 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
834 			sctp->sctp_bind_hash = NULL;
835 		}
836 		*sctp->sctp_ptpbhn = sctpnext;
837 		sctp->sctp_ptpbhn = NULL;
838 	}
839 	mutex_exit(lockp);
840 	sctp->sctp_bind_lockp = NULL;
841 }
842 
843 /*
844  * Similar to but different from sctp_conn_match().
845  *
846  * Matches sets of addresses as follows: if the argument addr set is
847  * a complete subset of the corresponding addr set in the sctp_t, it
848  * is a match.
849  *
850  * Caller must hold tf->tf_lock.
851  *
852  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
853  */
854 sctp_t *
855 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
856     int min_state)
857 {
858 	sctp_t *sctp;
859 	sctp_faddr_t *fp;
860 
861 	ASSERT(MUTEX_HELD(&tf->tf_lock));
862 
863 	for (sctp = tf->tf_sctp; sctp != NULL;
864 	    sctp = sctp->sctp_conn_hash_next) {
865 		if (*ports != sctp->sctp_connp->conn_ports ||
866 		    sctp->sctp_state < min_state) {
867 			continue;
868 		}
869 
870 		/* check for faddr match */
871 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
872 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->sf_faddr)) {
873 				break;
874 			}
875 		}
876 
877 		if (fp == NULL) {
878 			/* no faddr match; keep looking */
879 			continue;
880 		}
881 
882 		/*
883 		 * There is an existing association with the same peer
884 		 * address.  So now we need to check if our local address
885 		 * set overlaps with the one of the existing association.
886 		 * If they overlap, we should return it.
887 		 */
888 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
889 			goto done;
890 		}
891 
892 		/* no match; continue searching */
893 	}
894 
895 done:
896 	if (sctp != NULL) {
897 		SCTP_REFHOLD(sctp);
898 	}
899 	return (sctp);
900 }
901