xref: /illumos-gate/usr/src/uts/common/inet/sctp/sctp_hash.c (revision 65a89a64c60f3061bbe2381edaacc81660af9a95)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/socket.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/strsun.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ipclassifier.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/ipp_common.h>
44 #include <inet/sctp_ip.h>
45 
46 #include "sctp_impl.h"
47 #include "sctp_addr.h"
48 
49 /* SCTP bind hash list - all sctp_t with state >= BOUND. */
50 sctp_tf_t	sctp_bind_fanout[SCTP_BIND_FANOUT_SIZE];
51 /* SCTP listen hash list - all sctp_t with state == LISTEN. */
52 sctp_tf_t	sctp_listen_fanout[SCTP_LISTEN_FANOUT_SIZE];
53 
54 /* Default association hash size.  The size must be a power of 2. */
55 #define	SCTP_CONN_HASH_SIZE	8192
56 
57 sctp_tf_t	*sctp_conn_fanout;
58 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
59 
60 /*
61  * Cluster networking hook for traversing current assoc list.
62  * This routine is used to extract the current list of live associations
63  * which must continue to to be dispatched to this node.
64  */
65 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
66     boolean_t);
67 
68 void
69 sctp_hash_init()
70 {
71 	int i;
72 
73 	if (sctp_conn_hash_size & (sctp_conn_hash_size - 1)) {
74 		/* Not a power of two. Round up to nearest power of two */
75 		for (i = 0; i < 31; i++) {
76 			if (sctp_conn_hash_size < (1 << i))
77 				break;
78 		}
79 		sctp_conn_hash_size = 1 << i;
80 	}
81 	if (sctp_conn_hash_size < SCTP_CONN_HASH_SIZE) {
82 		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
83 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
84 		    sctp_conn_hash_size);
85 	}
86 	sctp_conn_fanout =
87 		(sctp_tf_t *)kmem_zalloc(sctp_conn_hash_size *
88 		    sizeof (sctp_tf_t),	KM_SLEEP);
89 	for (i = 0; i < sctp_conn_hash_size; i++) {
90 		mutex_init(&sctp_conn_fanout[i].tf_lock, NULL,
91 			    MUTEX_DEFAULT, NULL);
92 	}
93 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
94 		mutex_init(&sctp_listen_fanout[i].tf_lock, NULL,
95 		    MUTEX_DEFAULT, NULL);
96 	}
97 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
98 		mutex_init(&sctp_bind_fanout[i].tf_lock, NULL,
99 		    MUTEX_DEFAULT, NULL);
100 	}
101 }
102 
103 void
104 sctp_hash_destroy()
105 {
106 	int i;
107 
108 	for (i = 0; i < sctp_conn_hash_size; i++) {
109 		mutex_destroy(&sctp_conn_fanout[i].tf_lock);
110 	}
111 	kmem_free(sctp_conn_fanout, sctp_conn_hash_size * sizeof (sctp_tf_t));
112 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
113 		mutex_destroy(&sctp_listen_fanout[i].tf_lock);
114 	}
115 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
116 		mutex_destroy(&sctp_bind_fanout[i].tf_lock);
117 	}
118 }
119 
120 /* Walk the SCTP global list and refrele the ire for this ipif */
121 void
122 sctp_ire_cache_flush(ipif_t *ipif)
123 {
124 	sctp_t			*sctp;
125 	sctp_t			*sctp_prev = NULL;
126 	sctp_faddr_t		*fp;
127 	conn_t			*connp;
128 	ire_t			*ire;
129 
130 	sctp = gsctp;
131 	mutex_enter(&sctp_g_lock);
132 	while (sctp != NULL) {
133 		mutex_enter(&sctp->sctp_reflock);
134 		if (sctp->sctp_condemned) {
135 			mutex_exit(&sctp->sctp_reflock);
136 			sctp = list_next(&sctp_g_list, sctp);
137 			continue;
138 		}
139 		sctp->sctp_refcnt++;
140 		mutex_exit(&sctp->sctp_reflock);
141 		mutex_exit(&sctp_g_lock);
142 		if (sctp_prev != NULL)
143 			SCTP_REFRELE(sctp_prev);
144 
145 		RUN_SCTP(sctp);
146 		connp = sctp->sctp_connp;
147 		mutex_enter(&connp->conn_lock);
148 		ire = connp->conn_ire_cache;
149 		if (ire != NULL &&
150 		    (ipif == NULL || ire->ire_ipif == ipif)) {
151 			connp->conn_ire_cache = NULL;
152 			mutex_exit(&connp->conn_lock);
153 			IRE_REFRELE_NOTR(ire);
154 		} else {
155 			mutex_exit(&connp->conn_lock);
156 		}
157 		/* check for ires cached in faddr */
158 		for (fp = sctp->sctp_faddrs; fp != NULL;
159 		    fp = fp->next) {
160 			ire = fp->ire;
161 			if (ire != NULL && (ipif == NULL ||
162 			    ire->ire_ipif == ipif)) {
163 				fp->ire = NULL;
164 				IRE_REFRELE_NOTR(ire);
165 			}
166 		}
167 		WAKE_SCTP(sctp);
168 		sctp_prev = sctp;
169 		mutex_enter(&sctp_g_lock);
170 		sctp = list_next(&sctp_g_list, sctp);
171 	}
172 	mutex_exit(&sctp_g_lock);
173 	if (sctp_prev != NULL)
174 		SCTP_REFRELE(sctp_prev);
175 }
176 
177 /*
178  * Exported routine for extracting active SCTP associations.
179  * Like TCP, we terminate the walk if the callback returns non-zero.
180  */
181 int
182 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg,
183     boolean_t cansleep)
184 {
185 	sctp_t		*sctp;
186 	sctp_t		*sctp_prev;
187 	cl_sctp_info_t	cl_sctpi;
188 	uchar_t		*slist;
189 	uchar_t		*flist;
190 
191 	sctp = gsctp;
192 	sctp_prev = NULL;
193 	mutex_enter(&sctp_g_lock);
194 	while (sctp != NULL) {
195 		size_t	ssize;
196 		size_t	fsize;
197 
198 		mutex_enter(&sctp->sctp_reflock);
199 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
200 			mutex_exit(&sctp->sctp_reflock);
201 			sctp = list_next(&sctp_g_list, sctp);
202 			continue;
203 		}
204 		sctp->sctp_refcnt++;
205 		mutex_exit(&sctp->sctp_reflock);
206 		mutex_exit(&sctp_g_lock);
207 		if (sctp_prev != NULL)
208 			SCTP_REFRELE(sctp_prev);
209 		RUN_SCTP(sctp);
210 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
211 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
212 
213 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
214 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
215 		if (slist == NULL || flist == NULL) {
216 			WAKE_SCTP(sctp);
217 			if (slist != NULL)
218 				kmem_free(slist, ssize);
219 			if (flist != NULL)
220 				kmem_free(flist, fsize);
221 			SCTP_REFRELE(sctp);
222 			return (1);
223 		}
224 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
225 		sctp_get_saddr_list(sctp, slist, ssize);
226 		sctp_get_faddr_list(sctp, flist, fsize);
227 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
228 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
229 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
230 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
231 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
232 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
233 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
234 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
235 		WAKE_SCTP(sctp);
236 		cl_sctpi.cl_sctpi_laddrp = slist;
237 		cl_sctpi.cl_sctpi_faddrp = flist;
238 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
239 			kmem_free(slist, ssize);
240 			kmem_free(flist, fsize);
241 			SCTP_REFRELE(sctp);
242 			return (1);
243 		}
244 		/* list will be freed by cl_callback */
245 		sctp_prev = sctp;
246 		mutex_enter(&sctp_g_lock);
247 		sctp = list_next(&sctp_g_list, sctp);
248 	}
249 	mutex_exit(&sctp_g_lock);
250 	if (sctp_prev != NULL)
251 		SCTP_REFRELE(sctp_prev);
252 	return (0);
253 }
254 
255 sctp_t *
256 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
257     uint_t ipif_seqid, zoneid_t zoneid)
258 {
259 	sctp_tf_t		*tf;
260 	sctp_t			*sctp;
261 	sctp_faddr_t		*fp;
262 
263 	tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
264 	mutex_enter(&tf->tf_lock);
265 
266 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
267 		if (ports != sctp->sctp_ports || (zoneid != ALL_ZONES &&
268 		    zoneid != sctp->sctp_zoneid)) {
269 			continue;
270 		}
271 
272 		/* check for faddr match */
273 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
274 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
275 				break;
276 			}
277 		}
278 
279 		if (!fp) {
280 			/* no faddr match; keep looking */
281 			continue;
282 		}
283 
284 		/* check for laddr match */
285 		if (ipif_seqid == 0) {
286 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
287 				SCTP_REFHOLD(sctp);
288 				goto done;
289 			}
290 		} else {
291 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
292 				SCTP_REFHOLD(sctp);
293 				goto done;
294 			}
295 		/* no match; continue to the next in the chain */
296 		}
297 	}
298 
299 done:
300 	mutex_exit(&tf->tf_lock);
301 	return (sctp);
302 }
303 
304 static sctp_t *
305 listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid,
306     zoneid_t zoneid)
307 {
308 	sctp_t			*sctp;
309 	sctp_tf_t		*tf;
310 	uint16_t		lport;
311 
312 	lport = ((uint16_t *)&ports)[1];
313 
314 	tf = &(sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
315 	mutex_enter(&tf->tf_lock);
316 
317 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
318 		if (lport != sctp->sctp_lport || (zoneid != ALL_ZONES &&
319 		    zoneid != sctp->sctp_zoneid)) {
320 			continue;
321 		}
322 
323 		if (ipif_seqid == 0) {
324 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
325 				SCTP_REFHOLD(sctp);
326 				goto done;
327 			}
328 		} else {
329 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
330 				SCTP_REFHOLD(sctp);
331 				goto done;
332 			}
333 		}
334 		/* no match; continue to the next in the chain */
335 	}
336 
337 done:
338 	mutex_exit(&tf->tf_lock);
339 	return (sctp);
340 }
341 
342 conn_t *
343 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
344     uint_t ipif_seqid, zoneid_t zoneid)
345 {
346 	sctp_t *sctp;
347 
348 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
349 	    zoneid)) == NULL) {
350 		/* Not in conn fanout; check listen fanout */
351 		if ((sctp = listen_match(dst, ports, ipif_seqid,
352 		    zoneid)) == NULL) {
353 			return (NULL);
354 		}
355 	}
356 	return (sctp->sctp_connp);
357 }
358 
359 /*
360  * Fanout for SCTP packets
361  * The caller puts <fport, lport> in the ports parameter.
362  */
363 /* ARGSUSED */
364 void
365 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
366     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
367     uint_t ipif_seqid, zoneid_t zoneid)
368 {
369 	sctp_t *sctp;
370 	boolean_t isv4;
371 	conn_t *connp;
372 	mblk_t *first_mp;
373 	ip6_t *ip6h;
374 	in6_addr_t map_src, map_dst;
375 	in6_addr_t *src, *dst;
376 
377 	first_mp = mp;
378 	if (mctl_present) {
379 		mp = first_mp->b_cont;
380 		ASSERT(mp != NULL);
381 	}
382 
383 	/* Assume IP provides aligned packets - otherwise toss */
384 	if (!OK_32PTR(mp->b_rptr)) {
385 		BUMP_MIB(&ip_mib, ipInDiscards);
386 		freemsg(first_mp);
387 		return;
388 	}
389 
390 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
391 		ip6h = (ip6_t *)ipha;
392 		src = &ip6h->ip6_src;
393 		dst = &ip6h->ip6_dst;
394 		isv4 = B_FALSE;
395 	} else {
396 		ip6h = NULL;
397 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
398 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
399 		src = &map_src;
400 		dst = &map_dst;
401 		isv4 = B_TRUE;
402 	}
403 	if ((connp = sctp_find_conn(src, dst, ports, ipif_seqid, zoneid)) ==
404 	    NULL) {
405 		ip_fanout_sctp_raw(mp, recv_ill, ipha, isv4,
406 		    ports, mctl_present, flags, ip_policy,
407 		    ipif_seqid, zoneid);
408 		return;
409 	}
410 	sctp = CONN2SCTP(connp);
411 
412 	/* Found a client; up it goes */
413 	BUMP_MIB(&ip_mib, ipInDelivers);
414 
415 	/*
416 	 * We check some fields in conn_t without holding a lock.
417 	 * This should be fine.
418 	 */
419 	if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) {
420 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
421 		    ipha, NULL, mctl_present);
422 		if (first_mp == NULL) {
423 			SCTP_REFRELE(sctp);
424 			return;
425 		}
426 	}
427 
428 	/* Initiate IPPF processing for fastpath */
429 	if (IPP_ENABLED(IPP_LOCAL_IN)) {
430 		ip_process(IPP_LOCAL_IN, &mp,
431 		    recv_ill->ill_phyint->phyint_ifindex);
432 		if (mp == NULL) {
433 			SCTP_REFRELE(sctp);
434 			if (mctl_present)
435 				freeb(first_mp);
436 			return;
437 		} else if (mctl_present) {
438 			/*
439 			 * ip_process might return a new mp.
440 			 */
441 			ASSERT(first_mp != mp);
442 			first_mp->b_cont = mp;
443 		} else {
444 			first_mp = mp;
445 		}
446 	}
447 
448 	if (connp->conn_recvif || connp->conn_recvslla ||
449 	    connp->conn_ipv6_recvpktinfo) {
450 		int in_flags = 0;
451 
452 		if (connp->conn_recvif || connp->conn_ipv6_recvpktinfo) {
453 			in_flags = IPF_RECVIF;
454 		}
455 		if (connp->conn_recvslla) {
456 			in_flags |= IPF_RECVSLLA;
457 		}
458 		if (isv4) {
459 			mp = ip_add_info(mp, recv_ill, in_flags);
460 		} else {
461 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
462 		}
463 		if (mp == NULL) {
464 			SCTP_REFRELE(sctp);
465 			if (mctl_present)
466 				freeb(first_mp);
467 			return;
468 		} else if (mctl_present) {
469 			/*
470 			 * ip_add_info might return a new mp.
471 			 */
472 			ASSERT(first_mp != mp);
473 			first_mp->b_cont = mp;
474 		} else {
475 			first_mp = mp;
476 		}
477 	}
478 
479 	mutex_enter(&sctp->sctp_lock);
480 	if (sctp->sctp_running) {
481 		if (mctl_present)
482 			mp->b_prev = first_mp;
483 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
484 			BUMP_MIB(&ip_mib, ipInDiscards);
485 			freemsg(first_mp);
486 		}
487 		mutex_exit(&sctp->sctp_lock);
488 	} else {
489 		sctp->sctp_running = B_TRUE;
490 		mutex_exit(&sctp->sctp_lock);
491 
492 		mutex_enter(&sctp->sctp_recvq_lock);
493 		if (sctp->sctp_recvq != NULL) {
494 			if (mctl_present)
495 				mp->b_prev = first_mp;
496 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
497 				BUMP_MIB(&ip_mib, ipInDiscards);
498 				freemsg(first_mp);
499 			}
500 			mutex_exit(&sctp->sctp_recvq_lock);
501 			WAKE_SCTP(sctp);
502 		} else {
503 			mutex_exit(&sctp->sctp_recvq_lock);
504 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
505 			    NULL));
506 			WAKE_SCTP(sctp);
507 			sctp_process_sendq(sctp);
508 		}
509 	}
510 	SCTP_REFRELE(sctp);
511 }
512 
513 void
514 sctp_conn_hash_remove(sctp_t *sctp)
515 {
516 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
517 
518 	if (!tf) {
519 		return;
520 	}
521 	/*
522 	 * On a clustered note send this notification to the clustering
523 	 * subsystem.
524 	 */
525 	if (cl_sctp_disconnect != NULL) {
526 		(*cl_sctp_disconnect)(sctp->sctp_family,
527 		    (cl_sctp_handle_t)sctp);
528 	}
529 
530 	mutex_enter(&tf->tf_lock);
531 	ASSERT(tf->tf_sctp);
532 	if (tf->tf_sctp == sctp) {
533 		tf->tf_sctp = sctp->sctp_conn_hash_next;
534 		if (sctp->sctp_conn_hash_next) {
535 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
536 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
537 		}
538 	} else {
539 		ASSERT(sctp->sctp_conn_hash_prev);
540 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
541 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
542 		    sctp->sctp_conn_hash_next;
543 
544 		if (sctp->sctp_conn_hash_next) {
545 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
546 			    == sctp);
547 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
548 			    sctp->sctp_conn_hash_prev;
549 		}
550 	}
551 	sctp->sctp_conn_hash_next = NULL;
552 	sctp->sctp_conn_hash_prev = NULL;
553 	sctp->sctp_conn_tfp = NULL;
554 	mutex_exit(&tf->tf_lock);
555 }
556 
557 void
558 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
559 {
560 	if (sctp->sctp_conn_tfp) {
561 		sctp_conn_hash_remove(sctp);
562 	}
563 
564 	if (!caller_holds_lock) {
565 		mutex_enter(&tf->tf_lock);
566 	} else {
567 		ASSERT(MUTEX_HELD(&tf->tf_lock));
568 	}
569 
570 	sctp->sctp_conn_hash_next = tf->tf_sctp;
571 	if (tf->tf_sctp) {
572 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
573 	}
574 	sctp->sctp_conn_hash_prev = NULL;
575 	tf->tf_sctp = sctp;
576 	sctp->sctp_conn_tfp = tf;
577 	if (!caller_holds_lock) {
578 		mutex_exit(&tf->tf_lock);
579 	}
580 }
581 
582 void
583 sctp_listen_hash_remove(sctp_t *sctp)
584 {
585 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
586 
587 	if (!tf) {
588 		return;
589 	}
590 	/*
591 	 * On a clustered note send this notification to the clustering
592 	 * subsystem.
593 	 */
594 	if (cl_sctp_unlisten != NULL) {
595 		uchar_t	*slist;
596 		ssize_t	ssize;
597 
598 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
599 		slist = kmem_alloc(ssize, KM_SLEEP);
600 		sctp_get_saddr_list(sctp, slist, ssize);
601 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
602 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
603 		/* list will be freed by the clustering module */
604 	}
605 
606 	mutex_enter(&tf->tf_lock);
607 	ASSERT(tf->tf_sctp);
608 	if (tf->tf_sctp == sctp) {
609 		tf->tf_sctp = sctp->sctp_listen_hash_next;
610 		if (sctp->sctp_listen_hash_next) {
611 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
612 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
613 		}
614 	} else {
615 		ASSERT(sctp->sctp_listen_hash_prev);
616 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
617 		    sctp);
618 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
619 		    sctp->sctp_listen_hash_next;
620 
621 		if (sctp->sctp_listen_hash_next) {
622 			ASSERT(
623 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
624 			    sctp);
625 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
626 			    sctp->sctp_listen_hash_prev;
627 		}
628 	}
629 	sctp->sctp_listen_hash_next = NULL;
630 	sctp->sctp_listen_hash_prev = NULL;
631 	sctp->sctp_listen_tfp = NULL;
632 	mutex_exit(&tf->tf_lock);
633 }
634 
635 void
636 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
637 {
638 	if (sctp->sctp_listen_tfp) {
639 		sctp_listen_hash_remove(sctp);
640 	}
641 
642 	mutex_enter(&tf->tf_lock);
643 	sctp->sctp_listen_hash_next = tf->tf_sctp;
644 	if (tf->tf_sctp) {
645 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
646 	}
647 	sctp->sctp_listen_hash_prev = NULL;
648 	tf->tf_sctp = sctp;
649 	sctp->sctp_listen_tfp = tf;
650 	mutex_exit(&tf->tf_lock);
651 	/*
652 	 * On a clustered note send this notification to the clustering
653 	 * subsystem.
654 	 */
655 	if (cl_sctp_listen != NULL) {
656 		uchar_t	*slist;
657 		ssize_t	ssize;
658 
659 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
660 		slist = kmem_alloc(ssize, KM_SLEEP);
661 		sctp_get_saddr_list(sctp, slist, ssize);
662 		(*cl_sctp_listen)(sctp->sctp_family, slist,
663 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
664 		/* list will be freed by the clustering module */
665 	}
666 }
667 
668 /*
669  * Hash list insertion routine for sctp_t structures.
670  * Inserts entries with the ones bound to a specific IP address first
671  * followed by those bound to INADDR_ANY.
672  */
673 void
674 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
675 {
676 	sctp_t	**sctpp;
677 	sctp_t	*sctpnext;
678 
679 	if (sctp->sctp_ptpbhn != NULL) {
680 		ASSERT(!caller_holds_lock);
681 		sctp_bind_hash_remove(sctp);
682 	}
683 	sctpp = &tbf->tf_sctp;
684 	if (!caller_holds_lock) {
685 		mutex_enter(&tbf->tf_lock);
686 	} else {
687 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
688 	}
689 	sctpnext = sctpp[0];
690 	if (sctpnext) {
691 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
692 	}
693 	sctp->sctp_bind_hash = sctpnext;
694 	sctp->sctp_ptpbhn = sctpp;
695 	sctpp[0] = sctp;
696 	/* For sctp_*_hash_remove */
697 	sctp->sctp_bind_lockp = &tbf->tf_lock;
698 	if (!caller_holds_lock)
699 		mutex_exit(&tbf->tf_lock);
700 }
701 
702 /*
703  * Hash list removal routine for sctp_t structures.
704  */
705 void
706 sctp_bind_hash_remove(sctp_t *sctp)
707 {
708 	sctp_t	*sctpnext;
709 	kmutex_t *lockp;
710 
711 	lockp = sctp->sctp_bind_lockp;
712 
713 	if (sctp->sctp_ptpbhn == NULL)
714 		return;
715 
716 	ASSERT(lockp != NULL);
717 	mutex_enter(lockp);
718 	if (sctp->sctp_ptpbhn) {
719 		sctpnext = sctp->sctp_bind_hash;
720 		if (sctpnext) {
721 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
722 			sctp->sctp_bind_hash = NULL;
723 		}
724 		*sctp->sctp_ptpbhn = sctpnext;
725 		sctp->sctp_ptpbhn = NULL;
726 	}
727 	mutex_exit(lockp);
728 	sctp->sctp_bind_lockp = NULL;
729 }
730 
731 /*
732  * Similar to but more general than ip_sctp's conn_match().
733  *
734  * Matches sets of addresses as follows: if the argument addr set is
735  * a complete subset of the corresponding addr set in the sctp_t, it
736  * is a match.
737  *
738  * Caller must hold tf->tf_lock.
739  *
740  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
741  */
742 sctp_t *
743 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
744     int min_state)
745 {
746 
747 	sctp_t *sctp;
748 	sctp_faddr_t *fp;
749 
750 	ASSERT(MUTEX_HELD(&tf->tf_lock));
751 
752 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
753 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
754 		    min_state) {
755 			continue;
756 		}
757 
758 		/* check for faddr match */
759 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
760 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
761 				break;
762 			}
763 		}
764 
765 		if (!fp) {
766 			/* no faddr match; keep looking */
767 			continue;
768 		}
769 
770 		/* check for laddr subset match */
771 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
772 			goto done;
773 		}
774 
775 		/* no match; continue searching */
776 	}
777 
778 done:
779 	if (sctp) {
780 		SCTP_REFHOLD(sctp);
781 	}
782 	return (sctp);
783 }
784 
785 boolean_t
786 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
787 {
788 	uint16_t lport;
789 
790 	if (connp->conn_fully_bound) {
791 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
792 		    ipha->ipha_dst, ports));
793 	} else {
794 		lport = htons(ntohl(ports) & 0xFFFF);
795 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
796 		    lport));
797 	}
798 }
799 
800 boolean_t
801 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
802     boolean_t for_v4)
803 {
804 	uint16_t lport;
805 	in6_addr_t	v6dst;
806 
807 	if (!for_v4 && connp->conn_fully_bound) {
808 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
809 		    ip6h->ip6_dst, ports));
810 	} else {
811 		lport = htons(ntohl(ports) & 0xFFFF);
812 		if (for_v4)
813 			v6dst = ipv6_all_zeros;
814 		else
815 			v6dst = ip6h->ip6_dst;
816 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
817 	}
818 }
819