xref: /titanic_41/usr/src/uts/common/inet/sctp/sctp_hash.c (revision d89fccd8788afe1e920f842edd883fe192a1b8fe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/socket.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/tsol/tndb.h>
33 #include <sys/tsol/tnet.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ipclassifier.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/ipp_common.h>
44 #include <inet/sctp_ip.h>
45 
46 #include "sctp_impl.h"
47 #include "sctp_addr.h"
48 
49 /* SCTP bind hash list - all sctp_t with state >= BOUND. */
50 sctp_tf_t	sctp_bind_fanout[SCTP_BIND_FANOUT_SIZE];
51 /* SCTP listen hash list - all sctp_t with state == LISTEN. */
52 sctp_tf_t	sctp_listen_fanout[SCTP_LISTEN_FANOUT_SIZE];
53 
54 /* Default association hash size.  The size must be a power of 2. */
55 #define	SCTP_CONN_HASH_SIZE	8192
56 
57 sctp_tf_t	*sctp_conn_fanout;
58 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
59 
60 /*
61  * Cluster networking hook for traversing current assoc list.
62  * This routine is used to extract the current list of live associations
63  * which must continue to to be dispatched to this node.
64  */
65 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
66     boolean_t);
67 
68 void
69 sctp_hash_init()
70 {
71 	int i;
72 
73 	if (sctp_conn_hash_size & (sctp_conn_hash_size - 1)) {
74 		/* Not a power of two. Round up to nearest power of two */
75 		for (i = 0; i < 31; i++) {
76 			if (sctp_conn_hash_size < (1 << i))
77 				break;
78 		}
79 		sctp_conn_hash_size = 1 << i;
80 	}
81 	if (sctp_conn_hash_size < SCTP_CONN_HASH_SIZE) {
82 		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
83 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
84 		    sctp_conn_hash_size);
85 	}
86 	sctp_conn_fanout =
87 		(sctp_tf_t *)kmem_zalloc(sctp_conn_hash_size *
88 		    sizeof (sctp_tf_t),	KM_SLEEP);
89 	for (i = 0; i < sctp_conn_hash_size; i++) {
90 		mutex_init(&sctp_conn_fanout[i].tf_lock, NULL,
91 			    MUTEX_DEFAULT, NULL);
92 	}
93 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
94 		mutex_init(&sctp_listen_fanout[i].tf_lock, NULL,
95 		    MUTEX_DEFAULT, NULL);
96 	}
97 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
98 		mutex_init(&sctp_bind_fanout[i].tf_lock, NULL,
99 		    MUTEX_DEFAULT, NULL);
100 	}
101 }
102 
103 void
104 sctp_hash_destroy()
105 {
106 	int i;
107 
108 	for (i = 0; i < sctp_conn_hash_size; i++) {
109 		mutex_destroy(&sctp_conn_fanout[i].tf_lock);
110 	}
111 	kmem_free(sctp_conn_fanout, sctp_conn_hash_size * sizeof (sctp_tf_t));
112 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
113 		mutex_destroy(&sctp_listen_fanout[i].tf_lock);
114 	}
115 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
116 		mutex_destroy(&sctp_bind_fanout[i].tf_lock);
117 	}
118 }
119 
120 /* Walk the SCTP global list and refrele the ire for this ipif */
121 void
122 sctp_ire_cache_flush(ipif_t *ipif)
123 {
124 	sctp_t			*sctp;
125 	sctp_t			*sctp_prev = NULL;
126 	sctp_faddr_t		*fp;
127 	conn_t			*connp;
128 	ire_t			*ire;
129 
130 	sctp = gsctp;
131 	mutex_enter(&sctp_g_lock);
132 	while (sctp != NULL) {
133 		mutex_enter(&sctp->sctp_reflock);
134 		if (sctp->sctp_condemned) {
135 			mutex_exit(&sctp->sctp_reflock);
136 			sctp = list_next(&sctp_g_list, sctp);
137 			continue;
138 		}
139 		sctp->sctp_refcnt++;
140 		mutex_exit(&sctp->sctp_reflock);
141 		mutex_exit(&sctp_g_lock);
142 		if (sctp_prev != NULL)
143 			SCTP_REFRELE(sctp_prev);
144 
145 		RUN_SCTP(sctp);
146 		connp = sctp->sctp_connp;
147 		mutex_enter(&connp->conn_lock);
148 		ire = connp->conn_ire_cache;
149 		if (ire != NULL &&
150 		    (ipif == NULL || ire->ire_ipif == ipif)) {
151 			connp->conn_ire_cache = NULL;
152 			mutex_exit(&connp->conn_lock);
153 			IRE_REFRELE_NOTR(ire);
154 		} else {
155 			mutex_exit(&connp->conn_lock);
156 		}
157 		/* check for ires cached in faddr */
158 		for (fp = sctp->sctp_faddrs; fp != NULL;
159 		    fp = fp->next) {
160 			ire = fp->ire;
161 			if (ire != NULL && (ipif == NULL ||
162 			    ire->ire_ipif == ipif)) {
163 				fp->ire = NULL;
164 				IRE_REFRELE_NOTR(ire);
165 			}
166 		}
167 		WAKE_SCTP(sctp);
168 		sctp_prev = sctp;
169 		mutex_enter(&sctp_g_lock);
170 		sctp = list_next(&sctp_g_list, sctp);
171 	}
172 	mutex_exit(&sctp_g_lock);
173 	if (sctp_prev != NULL)
174 		SCTP_REFRELE(sctp_prev);
175 }
176 
177 /*
178  * Exported routine for extracting active SCTP associations.
179  * Like TCP, we terminate the walk if the callback returns non-zero.
180  */
181 int
182 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg,
183     boolean_t cansleep)
184 {
185 	sctp_t		*sctp;
186 	sctp_t		*sctp_prev;
187 	cl_sctp_info_t	cl_sctpi;
188 	uchar_t		*slist;
189 	uchar_t		*flist;
190 
191 	sctp = gsctp;
192 	sctp_prev = NULL;
193 	mutex_enter(&sctp_g_lock);
194 	while (sctp != NULL) {
195 		size_t	ssize;
196 		size_t	fsize;
197 
198 		mutex_enter(&sctp->sctp_reflock);
199 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
200 			mutex_exit(&sctp->sctp_reflock);
201 			sctp = list_next(&sctp_g_list, sctp);
202 			continue;
203 		}
204 		sctp->sctp_refcnt++;
205 		mutex_exit(&sctp->sctp_reflock);
206 		mutex_exit(&sctp_g_lock);
207 		if (sctp_prev != NULL)
208 			SCTP_REFRELE(sctp_prev);
209 		RUN_SCTP(sctp);
210 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
211 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
212 
213 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
214 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
215 		if (slist == NULL || flist == NULL) {
216 			WAKE_SCTP(sctp);
217 			if (slist != NULL)
218 				kmem_free(slist, ssize);
219 			if (flist != NULL)
220 				kmem_free(flist, fsize);
221 			SCTP_REFRELE(sctp);
222 			return (1);
223 		}
224 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
225 		sctp_get_saddr_list(sctp, slist, ssize);
226 		sctp_get_faddr_list(sctp, flist, fsize);
227 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
228 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
229 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
230 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
231 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
232 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
233 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
234 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
235 		WAKE_SCTP(sctp);
236 		cl_sctpi.cl_sctpi_laddrp = slist;
237 		cl_sctpi.cl_sctpi_faddrp = flist;
238 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
239 			kmem_free(slist, ssize);
240 			kmem_free(flist, fsize);
241 			SCTP_REFRELE(sctp);
242 			return (1);
243 		}
244 		/* list will be freed by cl_callback */
245 		sctp_prev = sctp;
246 		mutex_enter(&sctp_g_lock);
247 		sctp = list_next(&sctp_g_list, sctp);
248 	}
249 	mutex_exit(&sctp_g_lock);
250 	if (sctp_prev != NULL)
251 		SCTP_REFRELE(sctp_prev);
252 	return (0);
253 }
254 
255 sctp_t *
256 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
257     uint_t ipif_seqid, zoneid_t zoneid)
258 {
259 	sctp_tf_t		*tf;
260 	sctp_t			*sctp;
261 	sctp_faddr_t		*fp;
262 
263 	tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
264 	mutex_enter(&tf->tf_lock);
265 
266 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
267 		if (ports != sctp->sctp_ports || (zoneid != ALL_ZONES &&
268 		    zoneid != sctp->sctp_zoneid)) {
269 			continue;
270 		}
271 
272 		/* check for faddr match */
273 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
274 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
275 				break;
276 			}
277 		}
278 
279 		if (!fp) {
280 			/* no faddr match; keep looking */
281 			continue;
282 		}
283 
284 		/* check for laddr match */
285 		if (ipif_seqid == 0) {
286 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
287 				SCTP_REFHOLD(sctp);
288 				goto done;
289 			}
290 		} else {
291 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
292 				SCTP_REFHOLD(sctp);
293 				goto done;
294 			}
295 		/* no match; continue to the next in the chain */
296 		}
297 	}
298 
299 done:
300 	mutex_exit(&tf->tf_lock);
301 	return (sctp);
302 }
303 
304 static sctp_t *
305 listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid,
306     zoneid_t zoneid)
307 {
308 	sctp_t			*sctp;
309 	sctp_tf_t		*tf;
310 	uint16_t		lport;
311 
312 	lport = ((uint16_t *)&ports)[1];
313 
314 	tf = &(sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
315 	mutex_enter(&tf->tf_lock);
316 
317 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
318 		if (lport != sctp->sctp_lport || (zoneid != ALL_ZONES &&
319 		    zoneid != sctp->sctp_zoneid)) {
320 			continue;
321 		}
322 
323 		if (ipif_seqid == 0) {
324 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
325 				SCTP_REFHOLD(sctp);
326 				goto done;
327 			}
328 		} else {
329 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
330 				SCTP_REFHOLD(sctp);
331 				goto done;
332 			}
333 		}
334 		/* no match; continue to the next in the chain */
335 	}
336 
337 done:
338 	mutex_exit(&tf->tf_lock);
339 	return (sctp);
340 }
341 
342 /* called by ipsec_sctp_pol */
343 conn_t *
344 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
345     uint_t ipif_seqid, zoneid_t zoneid)
346 {
347 	sctp_t *sctp;
348 
349 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
350 	    zoneid)) == NULL) {
351 		/* Not in conn fanout; check listen fanout */
352 		if ((sctp = listen_match(dst, ports, ipif_seqid,
353 		    zoneid)) == NULL) {
354 			return (NULL);
355 		}
356 	}
357 	return (sctp->sctp_connp);
358 }
359 
360 conn_t *
361 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
362     uint_t ipif_seqid, zoneid_t zoneid, mblk_t *mp)
363 {
364 	sctp_t *sctp;
365 
366 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
367 	    zoneid)) == NULL) {
368 		if (zoneid == ALL_ZONES) {
369 			zoneid = tsol_mlp_findzone(IPPROTO_SCTP,
370 			    htons(ntohl(ports) & 0xFFFF));
371 			/*
372 			 * If no shared MLP is found, tsol_mlp_findzone returns
373 			 * ALL_ZONES.  In that case, we assume it's SLP, and
374 			 * search for the zone based on the packet label.
375 			 * That will also return ALL_ZONES on failure.
376 			 */
377 			if (zoneid == ALL_ZONES)
378 				zoneid = tsol_packet_to_zoneid(mp);
379 			if (zoneid == ALL_ZONES)
380 				return (NULL);
381 		}
382 		/* Not in conn fanout; check listen fanout */
383 		if ((sctp = listen_match(dst, ports, ipif_seqid,
384 		    zoneid)) == NULL) {
385 			return (NULL);
386 		}
387 	}
388 	return (sctp->sctp_connp);
389 }
390 
391 /*
392  * Fanout for SCTP packets
393  * The caller puts <fport, lport> in the ports parameter.
394  */
395 /* ARGSUSED */
396 void
397 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
398     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
399     uint_t ipif_seqid, zoneid_t zoneid)
400 {
401 	sctp_t *sctp;
402 	boolean_t isv4;
403 	conn_t *connp;
404 	mblk_t *first_mp;
405 	ip6_t *ip6h;
406 	in6_addr_t map_src, map_dst;
407 	in6_addr_t *src, *dst;
408 
409 	first_mp = mp;
410 	if (mctl_present) {
411 		mp = first_mp->b_cont;
412 		ASSERT(mp != NULL);
413 	}
414 
415 	/* Assume IP provides aligned packets - otherwise toss */
416 	if (!OK_32PTR(mp->b_rptr)) {
417 		BUMP_MIB(&ip_mib, ipInDiscards);
418 		freemsg(first_mp);
419 		return;
420 	}
421 
422 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
423 		ip6h = (ip6_t *)ipha;
424 		src = &ip6h->ip6_src;
425 		dst = &ip6h->ip6_dst;
426 		isv4 = B_FALSE;
427 	} else {
428 		ip6h = NULL;
429 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
430 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
431 		src = &map_src;
432 		dst = &map_dst;
433 		isv4 = B_TRUE;
434 	}
435 	if ((connp = sctp_fanout(src, dst, ports, ipif_seqid, zoneid, mp)) ==
436 	    NULL) {
437 		ip_fanout_sctp_raw(mp, recv_ill, ipha, isv4,
438 		    ports, mctl_present, flags, ip_policy,
439 		    ipif_seqid, zoneid);
440 		return;
441 	}
442 	sctp = CONN2SCTP(connp);
443 
444 	/* Found a client; up it goes */
445 	BUMP_MIB(&ip_mib, ipInDelivers);
446 
447 	/*
448 	 * We check some fields in conn_t without holding a lock.
449 	 * This should be fine.
450 	 */
451 	if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) {
452 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
453 		    ipha, NULL, mctl_present);
454 		if (first_mp == NULL) {
455 			SCTP_REFRELE(sctp);
456 			return;
457 		}
458 	}
459 
460 	/* Initiate IPPF processing for fastpath */
461 	if (IPP_ENABLED(IPP_LOCAL_IN)) {
462 		ip_process(IPP_LOCAL_IN, &mp,
463 		    recv_ill->ill_phyint->phyint_ifindex);
464 		if (mp == NULL) {
465 			SCTP_REFRELE(sctp);
466 			if (mctl_present)
467 				freeb(first_mp);
468 			return;
469 		} else if (mctl_present) {
470 			/*
471 			 * ip_process might return a new mp.
472 			 */
473 			ASSERT(first_mp != mp);
474 			first_mp->b_cont = mp;
475 		} else {
476 			first_mp = mp;
477 		}
478 	}
479 
480 	if (connp->conn_recvif || connp->conn_recvslla ||
481 	    connp->conn_ipv6_recvpktinfo) {
482 		int in_flags = 0;
483 
484 		if (connp->conn_recvif || connp->conn_ipv6_recvpktinfo) {
485 			in_flags = IPF_RECVIF;
486 		}
487 		if (connp->conn_recvslla) {
488 			in_flags |= IPF_RECVSLLA;
489 		}
490 		if (isv4) {
491 			mp = ip_add_info(mp, recv_ill, in_flags);
492 		} else {
493 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
494 		}
495 		if (mp == NULL) {
496 			SCTP_REFRELE(sctp);
497 			if (mctl_present)
498 				freeb(first_mp);
499 			return;
500 		} else if (mctl_present) {
501 			/*
502 			 * ip_add_info might return a new mp.
503 			 */
504 			ASSERT(first_mp != mp);
505 			first_mp->b_cont = mp;
506 		} else {
507 			first_mp = mp;
508 		}
509 	}
510 
511 	mutex_enter(&sctp->sctp_lock);
512 	if (sctp->sctp_running) {
513 		if (mctl_present)
514 			mp->b_prev = first_mp;
515 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
516 			BUMP_MIB(&ip_mib, ipInDiscards);
517 			freemsg(first_mp);
518 		}
519 		mutex_exit(&sctp->sctp_lock);
520 	} else {
521 		sctp->sctp_running = B_TRUE;
522 		mutex_exit(&sctp->sctp_lock);
523 
524 		mutex_enter(&sctp->sctp_recvq_lock);
525 		if (sctp->sctp_recvq != NULL) {
526 			if (mctl_present)
527 				mp->b_prev = first_mp;
528 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
529 				BUMP_MIB(&ip_mib, ipInDiscards);
530 				freemsg(first_mp);
531 			}
532 			mutex_exit(&sctp->sctp_recvq_lock);
533 			WAKE_SCTP(sctp);
534 		} else {
535 			mutex_exit(&sctp->sctp_recvq_lock);
536 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
537 			    NULL));
538 			WAKE_SCTP(sctp);
539 			sctp_process_sendq(sctp);
540 		}
541 	}
542 	SCTP_REFRELE(sctp);
543 }
544 
545 void
546 sctp_conn_hash_remove(sctp_t *sctp)
547 {
548 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
549 
550 	if (!tf) {
551 		return;
552 	}
553 	/*
554 	 * On a clustered note send this notification to the clustering
555 	 * subsystem.
556 	 */
557 	if (cl_sctp_disconnect != NULL) {
558 		(*cl_sctp_disconnect)(sctp->sctp_family,
559 		    (cl_sctp_handle_t)sctp);
560 	}
561 
562 	mutex_enter(&tf->tf_lock);
563 	ASSERT(tf->tf_sctp);
564 	if (tf->tf_sctp == sctp) {
565 		tf->tf_sctp = sctp->sctp_conn_hash_next;
566 		if (sctp->sctp_conn_hash_next) {
567 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
568 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
569 		}
570 	} else {
571 		ASSERT(sctp->sctp_conn_hash_prev);
572 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
573 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
574 		    sctp->sctp_conn_hash_next;
575 
576 		if (sctp->sctp_conn_hash_next) {
577 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
578 			    == sctp);
579 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
580 			    sctp->sctp_conn_hash_prev;
581 		}
582 	}
583 	sctp->sctp_conn_hash_next = NULL;
584 	sctp->sctp_conn_hash_prev = NULL;
585 	sctp->sctp_conn_tfp = NULL;
586 	mutex_exit(&tf->tf_lock);
587 }
588 
589 void
590 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
591 {
592 	if (sctp->sctp_conn_tfp) {
593 		sctp_conn_hash_remove(sctp);
594 	}
595 
596 	if (!caller_holds_lock) {
597 		mutex_enter(&tf->tf_lock);
598 	} else {
599 		ASSERT(MUTEX_HELD(&tf->tf_lock));
600 	}
601 
602 	sctp->sctp_conn_hash_next = tf->tf_sctp;
603 	if (tf->tf_sctp) {
604 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
605 	}
606 	sctp->sctp_conn_hash_prev = NULL;
607 	tf->tf_sctp = sctp;
608 	sctp->sctp_conn_tfp = tf;
609 	if (!caller_holds_lock) {
610 		mutex_exit(&tf->tf_lock);
611 	}
612 }
613 
614 void
615 sctp_listen_hash_remove(sctp_t *sctp)
616 {
617 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
618 
619 	if (!tf) {
620 		return;
621 	}
622 	/*
623 	 * On a clustered note send this notification to the clustering
624 	 * subsystem.
625 	 */
626 	if (cl_sctp_unlisten != NULL) {
627 		uchar_t	*slist;
628 		ssize_t	ssize;
629 
630 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
631 		slist = kmem_alloc(ssize, KM_SLEEP);
632 		sctp_get_saddr_list(sctp, slist, ssize);
633 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
634 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
635 		/* list will be freed by the clustering module */
636 	}
637 
638 	mutex_enter(&tf->tf_lock);
639 	ASSERT(tf->tf_sctp);
640 	if (tf->tf_sctp == sctp) {
641 		tf->tf_sctp = sctp->sctp_listen_hash_next;
642 		if (sctp->sctp_listen_hash_next) {
643 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
644 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
645 		}
646 	} else {
647 		ASSERT(sctp->sctp_listen_hash_prev);
648 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
649 		    sctp);
650 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
651 		    sctp->sctp_listen_hash_next;
652 
653 		if (sctp->sctp_listen_hash_next) {
654 			ASSERT(
655 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
656 			    sctp);
657 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
658 			    sctp->sctp_listen_hash_prev;
659 		}
660 	}
661 	sctp->sctp_listen_hash_next = NULL;
662 	sctp->sctp_listen_hash_prev = NULL;
663 	sctp->sctp_listen_tfp = NULL;
664 	mutex_exit(&tf->tf_lock);
665 }
666 
667 void
668 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
669 {
670 	if (sctp->sctp_listen_tfp) {
671 		sctp_listen_hash_remove(sctp);
672 	}
673 
674 	mutex_enter(&tf->tf_lock);
675 	sctp->sctp_listen_hash_next = tf->tf_sctp;
676 	if (tf->tf_sctp) {
677 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
678 	}
679 	sctp->sctp_listen_hash_prev = NULL;
680 	tf->tf_sctp = sctp;
681 	sctp->sctp_listen_tfp = tf;
682 	mutex_exit(&tf->tf_lock);
683 	/*
684 	 * On a clustered note send this notification to the clustering
685 	 * subsystem.
686 	 */
687 	if (cl_sctp_listen != NULL) {
688 		uchar_t	*slist;
689 		ssize_t	ssize;
690 
691 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
692 		slist = kmem_alloc(ssize, KM_SLEEP);
693 		sctp_get_saddr_list(sctp, slist, ssize);
694 		(*cl_sctp_listen)(sctp->sctp_family, slist,
695 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
696 		/* list will be freed by the clustering module */
697 	}
698 }
699 
700 /*
701  * Hash list insertion routine for sctp_t structures.
702  * Inserts entries with the ones bound to a specific IP address first
703  * followed by those bound to INADDR_ANY.
704  */
705 void
706 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
707 {
708 	sctp_t	**sctpp;
709 	sctp_t	*sctpnext;
710 
711 	if (sctp->sctp_ptpbhn != NULL) {
712 		ASSERT(!caller_holds_lock);
713 		sctp_bind_hash_remove(sctp);
714 	}
715 	sctpp = &tbf->tf_sctp;
716 	if (!caller_holds_lock) {
717 		mutex_enter(&tbf->tf_lock);
718 	} else {
719 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
720 	}
721 	sctpnext = sctpp[0];
722 	if (sctpnext) {
723 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
724 	}
725 	sctp->sctp_bind_hash = sctpnext;
726 	sctp->sctp_ptpbhn = sctpp;
727 	sctpp[0] = sctp;
728 	/* For sctp_*_hash_remove */
729 	sctp->sctp_bind_lockp = &tbf->tf_lock;
730 	if (!caller_holds_lock)
731 		mutex_exit(&tbf->tf_lock);
732 }
733 
734 /*
735  * Hash list removal routine for sctp_t structures.
736  */
737 void
738 sctp_bind_hash_remove(sctp_t *sctp)
739 {
740 	sctp_t	*sctpnext;
741 	kmutex_t *lockp;
742 
743 	lockp = sctp->sctp_bind_lockp;
744 
745 	if (sctp->sctp_ptpbhn == NULL)
746 		return;
747 
748 	ASSERT(lockp != NULL);
749 	mutex_enter(lockp);
750 	if (sctp->sctp_ptpbhn) {
751 		sctpnext = sctp->sctp_bind_hash;
752 		if (sctpnext) {
753 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
754 			sctp->sctp_bind_hash = NULL;
755 		}
756 		*sctp->sctp_ptpbhn = sctpnext;
757 		sctp->sctp_ptpbhn = NULL;
758 	}
759 	mutex_exit(lockp);
760 	sctp->sctp_bind_lockp = NULL;
761 }
762 
763 /*
764  * Similar to but more general than ip_sctp's conn_match().
765  *
766  * Matches sets of addresses as follows: if the argument addr set is
767  * a complete subset of the corresponding addr set in the sctp_t, it
768  * is a match.
769  *
770  * Caller must hold tf->tf_lock.
771  *
772  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
773  */
774 sctp_t *
775 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
776     int min_state)
777 {
778 
779 	sctp_t *sctp;
780 	sctp_faddr_t *fp;
781 
782 	ASSERT(MUTEX_HELD(&tf->tf_lock));
783 
784 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
785 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
786 		    min_state) {
787 			continue;
788 		}
789 
790 		/* check for faddr match */
791 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
792 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
793 				break;
794 			}
795 		}
796 
797 		if (!fp) {
798 			/* no faddr match; keep looking */
799 			continue;
800 		}
801 
802 		/* check for laddr subset match */
803 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
804 			goto done;
805 		}
806 
807 		/* no match; continue searching */
808 	}
809 
810 done:
811 	if (sctp) {
812 		SCTP_REFHOLD(sctp);
813 	}
814 	return (sctp);
815 }
816 
817 boolean_t
818 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
819 {
820 	uint16_t lport;
821 
822 	if (connp->conn_fully_bound) {
823 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
824 		    ipha->ipha_dst, ports));
825 	} else {
826 		lport = htons(ntohl(ports) & 0xFFFF);
827 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
828 		    lport));
829 	}
830 }
831 
832 boolean_t
833 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
834     boolean_t for_v4)
835 {
836 	uint16_t lport;
837 	in6_addr_t	v6dst;
838 
839 	if (!for_v4 && connp->conn_fully_bound) {
840 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
841 		    ip6h->ip6_dst, ports));
842 	} else {
843 		lport = htons(ntohl(ports) & 0xFFFF);
844 		if (for_v4)
845 			v6dst = ipv6_all_zeros;
846 		else
847 			v6dst = ip6h->ip6_dst;
848 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
849 	}
850 }
851