xref: /titanic_44/usr/src/uts/common/inet/sctp/sctp_hash.c (revision b98131cff90a91303826565dacf89c46a422e6c5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/socket.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/tsol/tndb.h>
31 #include <sys/tsol/tnet.h>
32 
33 #include <netinet/in.h>
34 #include <netinet/ip6.h>
35 
36 #include <inet/common.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 #include <inet/ipclassifier.h>
40 #include <inet/ipsec_impl.h>
41 #include <inet/ipp_common.h>
42 #include <inet/sctp_ip.h>
43 
44 #include "sctp_impl.h"
45 #include "sctp_addr.h"
46 
47 /* Default association hash size.  The size must be a power of 2. */
48 #define	SCTP_CONN_HASH_SIZE	8192
49 
50 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
51 
52 /*
53  * Cluster networking hook for traversing current assoc list.
54  * This routine is used to extract the current list of live associations
55  * which must continue to to be dispatched to this node.
56  */
57 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
58     boolean_t);
59 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
60     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
61 
62 void
63 sctp_hash_init(sctp_stack_t *sctps)
64 {
65 	int i;
66 
67 	/* Start with /etc/system value */
68 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
69 
70 	if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) {
71 		/* Not a power of two. Round up to nearest power of two */
72 		for (i = 0; i < 31; i++) {
73 			if (sctps->sctps_conn_hash_size < (1 << i))
74 				break;
75 		}
76 		sctps->sctps_conn_hash_size = 1 << i;
77 	}
78 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
79 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
80 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
81 		    sctps->sctps_conn_hash_size);
82 	}
83 	sctps->sctps_conn_fanout =
84 	    (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
85 	    sizeof (sctp_tf_t), KM_SLEEP);
86 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
87 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
88 		    MUTEX_DEFAULT, NULL);
89 	}
90 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
91 	    sizeof (sctp_tf_t),	KM_SLEEP);
92 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
93 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
94 		    MUTEX_DEFAULT, NULL);
95 	}
96 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
97 	    sizeof (sctp_tf_t),	KM_SLEEP);
98 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
99 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
100 		    MUTEX_DEFAULT, NULL);
101 	}
102 }
103 
104 void
105 sctp_hash_destroy(sctp_stack_t *sctps)
106 {
107 	int i;
108 
109 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
110 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
111 	}
112 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
113 	    sizeof (sctp_tf_t));
114 	sctps->sctps_conn_fanout = NULL;
115 
116 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
117 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
118 	}
119 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
120 	    sizeof (sctp_tf_t));
121 	sctps->sctps_listen_fanout = NULL;
122 
123 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
124 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
125 	}
126 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
127 	    sizeof (sctp_tf_t));
128 	sctps->sctps_bind_fanout = NULL;
129 }
130 
131 /*
132  * Exported routine for extracting active SCTP associations.
133  * Like TCP, we terminate the walk if the callback returns non-zero.
134  *
135  * Need to walk all sctp_stack_t instances since this clustering
136  * interface is assumed global for all instances
137  */
138 int
139 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
140     void *arg, boolean_t cansleep)
141 {
142 	netstack_handle_t nh;
143 	netstack_t *ns;
144 	int ret = 0;
145 
146 	netstack_next_init(&nh);
147 	while ((ns = netstack_next(&nh)) != NULL) {
148 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
149 		    ns->netstack_sctp);
150 		netstack_rele(ns);
151 	}
152 	netstack_next_fini(&nh);
153 	return (ret);
154 }
155 
156 static int
157 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
158     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
159 {
160 	sctp_t		*sctp;
161 	sctp_t		*sctp_prev;
162 	cl_sctp_info_t	cl_sctpi;
163 	uchar_t		*slist;
164 	uchar_t		*flist;
165 
166 	sctp_prev = NULL;
167 	mutex_enter(&sctps->sctps_g_lock);
168 	sctp = list_head(&sctps->sctps_g_list);
169 	while (sctp != NULL) {
170 		size_t	ssize;
171 		size_t	fsize;
172 
173 		mutex_enter(&sctp->sctp_reflock);
174 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
175 			mutex_exit(&sctp->sctp_reflock);
176 			sctp = list_next(&sctps->sctps_g_list, sctp);
177 			continue;
178 		}
179 		sctp->sctp_refcnt++;
180 		mutex_exit(&sctp->sctp_reflock);
181 		mutex_exit(&sctps->sctps_g_lock);
182 		if (sctp_prev != NULL)
183 			SCTP_REFRELE(sctp_prev);
184 		RUN_SCTP(sctp);
185 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
186 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
187 
188 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
189 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
190 		if (slist == NULL || flist == NULL) {
191 			WAKE_SCTP(sctp);
192 			if (slist != NULL)
193 				kmem_free(slist, ssize);
194 			if (flist != NULL)
195 				kmem_free(flist, fsize);
196 			SCTP_REFRELE(sctp);
197 			return (1);
198 		}
199 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
200 		sctp_get_saddr_list(sctp, slist, ssize);
201 		sctp_get_faddr_list(sctp, flist, fsize);
202 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
203 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
204 		cl_sctpi.cl_sctpi_family = sctp->sctp_connp->conn_family;
205 		if (cl_sctpi.cl_sctpi_family == AF_INET)
206 			cl_sctpi.cl_sctpi_ipversion = IPV4_VERSION;
207 		else
208 			cl_sctpi.cl_sctpi_ipversion = IPV6_VERSION;
209 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
210 		cl_sctpi.cl_sctpi_lport = sctp->sctp_connp->conn_lport;
211 		cl_sctpi.cl_sctpi_fport = sctp->sctp_connp->conn_fport;
212 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
213 		WAKE_SCTP(sctp);
214 		cl_sctpi.cl_sctpi_laddrp = slist;
215 		cl_sctpi.cl_sctpi_faddrp = flist;
216 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
217 			kmem_free(slist, ssize);
218 			kmem_free(flist, fsize);
219 			SCTP_REFRELE(sctp);
220 			return (1);
221 		}
222 		/* list will be freed by cl_callback */
223 		sctp_prev = sctp;
224 		mutex_enter(&sctps->sctps_g_lock);
225 		sctp = list_next(&sctps->sctps_g_list, sctp);
226 	}
227 	mutex_exit(&sctps->sctps_g_lock);
228 	if (sctp_prev != NULL)
229 		SCTP_REFRELE(sctp_prev);
230 	return (0);
231 }
232 
233 sctp_t *
234 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
235     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
236 {
237 	sctp_tf_t		*tf;
238 	sctp_t			*sctp;
239 	sctp_faddr_t		*fp;
240 	conn_t			*connp;
241 
242 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
243 	mutex_enter(&tf->tf_lock);
244 
245 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
246 		connp = sctp->sctp_connp;
247 		if (ports != connp->conn_ports)
248 			continue;
249 		if (!(connp->conn_zoneid == zoneid ||
250 		    connp->conn_allzones ||
251 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
252 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
253 		    (iraflags & IRAF_TX_SHARED_ADDR))))
254 			continue;
255 
256 		/* check for faddr match */
257 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
258 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
259 				break;
260 			}
261 		}
262 
263 		/* no faddr match; keep looking */
264 		if (fp == NULL)
265 			continue;
266 
267 		/* check for laddr match */
268 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
269 			SCTP_REFHOLD(sctp);
270 			goto done;
271 		}
272 		/* no match; continue to the next in the chain */
273 	}
274 
275 done:
276 	mutex_exit(&tf->tf_lock);
277 	return (sctp);
278 }
279 
280 static sctp_t *
281 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
282     iaflags_t iraflags, sctp_stack_t *sctps)
283 {
284 	sctp_t			*sctp;
285 	sctp_tf_t		*tf;
286 	uint16_t		lport;
287 	conn_t			*connp;
288 
289 	lport = ((uint16_t *)&ports)[1];
290 
291 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
292 	mutex_enter(&tf->tf_lock);
293 
294 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
295 		connp = sctp->sctp_connp;
296 		if (lport != connp->conn_lport)
297 			continue;
298 
299 		if (!(connp->conn_zoneid == zoneid ||
300 		    connp->conn_allzones ||
301 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
302 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
303 		    (iraflags & IRAF_TX_SHARED_ADDR))))
304 			continue;
305 
306 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
307 			SCTP_REFHOLD(sctp);
308 			goto done;
309 		}
310 		/* no match; continue to the next in the chain */
311 	}
312 
313 done:
314 	mutex_exit(&tf->tf_lock);
315 	return (sctp);
316 }
317 
318 /* called by ipsec_sctp_pol */
319 conn_t *
320 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
321     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
322 {
323 	sctp_t *sctp;
324 
325 	sctp = sctp_conn_match(src, dst, ports, zoneid, iraflags, sctps);
326 	if (sctp == NULL) {
327 		/* Not in conn fanout; check listen fanout */
328 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
329 		if (sctp == NULL)
330 			return (NULL);
331 	}
332 	return (sctp->sctp_connp);
333 }
334 
335 /*
336  * Fanout to a sctp instance.
337  */
338 conn_t *
339 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
340     ip_recv_attr_t *ira, mblk_t *mp, sctp_stack_t *sctps)
341 {
342 	zoneid_t zoneid = ira->ira_zoneid;
343 	iaflags_t iraflags = ira->ira_flags;
344 	sctp_t *sctp;
345 
346 	sctp = sctp_conn_match(src, dst, ports, zoneid, iraflags, sctps);
347 	if (sctp == NULL) {
348 		/* Not in conn fanout; check listen fanout */
349 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
350 		if (sctp == NULL)
351 			return (NULL);
352 		/*
353 		 * On systems running trusted extensions, check if dst
354 		 * should accept the packet. "IPV6_VERSION" indicates
355 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
356 		 * IPv6 addresses are supported.
357 		 */
358 		if ((iraflags & IRAF_SYSTEM_LABELED) &&
359 		    !tsol_receive_local(mp, dst, IPV6_VERSION, ira,
360 		    sctp->sctp_connp)) {
361 			DTRACE_PROBE3(
362 			    tx__ip__log__info__classify__sctp,
363 			    char *,
364 			    "connp(1) could not receive mp(2)",
365 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
366 			SCTP_REFRELE(sctp);
367 			return (NULL);
368 		}
369 	}
370 	/*
371 	 * For labeled systems, there's no need to check the
372 	 * label here.  It's known to be good as we checked
373 	 * before allowing the connection to become bound.
374 	 */
375 	return (sctp->sctp_connp);
376 }
377 
378 /*
379  * Fanout for ICMP errors for SCTP
380  * The caller puts <fport, lport> in the ports parameter.
381  */
382 void
383 ip_fanout_sctp(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t ports,
384     ip_recv_attr_t *ira)
385 {
386 	sctp_t		*sctp;
387 	conn_t		*connp;
388 	in6_addr_t	map_src, map_dst;
389 	in6_addr_t	*src, *dst;
390 	boolean_t	secure;
391 	ill_t		*ill = ira->ira_ill;
392 	ip_stack_t	*ipst = ill->ill_ipst;
393 	netstack_t	*ns = ipst->ips_netstack;
394 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
395 	sctp_stack_t	*sctps = ns->netstack_sctp;
396 	iaflags_t	iraflags = ira->ira_flags;
397 	ill_t		*rill = ira->ira_rill;
398 
399 	ASSERT(iraflags & IRAF_ICMP_ERROR);
400 
401 	secure = iraflags & IRAF_IPSEC_SECURE;
402 
403 	/* Assume IP provides aligned packets - otherwise toss */
404 	if (!OK_32PTR(mp->b_rptr)) {
405 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
406 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
407 		freemsg(mp);
408 		return;
409 	}
410 
411 	if (!(iraflags & IRAF_IS_IPV4)) {
412 		src = &ip6h->ip6_src;
413 		dst = &ip6h->ip6_dst;
414 	} else {
415 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
416 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
417 		src = &map_src;
418 		dst = &map_dst;
419 	}
420 	connp = sctp_fanout(src, dst, ports, ira, mp, sctps);
421 	if (connp == NULL) {
422 		ip_fanout_sctp_raw(mp, ipha, ip6h, ports, ira);
423 		return;
424 	}
425 	sctp = CONN2SCTP(connp);
426 
427 	/*
428 	 * We check some fields in conn_t without holding a lock.
429 	 * This should be fine.
430 	 */
431 	if (((iraflags & IRAF_IS_IPV4) ?
432 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
433 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
434 	    secure) {
435 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
436 		    ip6h, ira);
437 		if (mp == NULL) {
438 			SCTP_REFRELE(sctp);
439 			return;
440 		}
441 	}
442 
443 	ira->ira_ill = ira->ira_rill = NULL;
444 
445 	mutex_enter(&sctp->sctp_lock);
446 	if (sctp->sctp_running) {
447 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
448 		mutex_exit(&sctp->sctp_lock);
449 	} else {
450 		sctp->sctp_running = B_TRUE;
451 		mutex_exit(&sctp->sctp_lock);
452 
453 		mutex_enter(&sctp->sctp_recvq_lock);
454 		if (sctp->sctp_recvq != NULL) {
455 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
456 			mutex_exit(&sctp->sctp_recvq_lock);
457 			WAKE_SCTP(sctp);
458 		} else {
459 			mutex_exit(&sctp->sctp_recvq_lock);
460 			if (ira->ira_flags & IRAF_ICMP_ERROR) {
461 				sctp_icmp_error(sctp, mp);
462 			} else {
463 				sctp_input_data(sctp, mp, ira);
464 			}
465 			WAKE_SCTP(sctp);
466 		}
467 	}
468 	SCTP_REFRELE(sctp);
469 	ira->ira_ill = ill;
470 	ira->ira_rill = rill;
471 }
472 
473 void
474 sctp_conn_hash_remove(sctp_t *sctp)
475 {
476 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
477 
478 	if (!tf) {
479 		return;
480 	}
481 	/*
482 	 * On a clustered note send this notification to the clustering
483 	 * subsystem.
484 	 */
485 	if (cl_sctp_disconnect != NULL) {
486 		(*cl_sctp_disconnect)(sctp->sctp_connp->conn_family,
487 		    (cl_sctp_handle_t)sctp);
488 	}
489 
490 	mutex_enter(&tf->tf_lock);
491 	ASSERT(tf->tf_sctp);
492 	if (tf->tf_sctp == sctp) {
493 		tf->tf_sctp = sctp->sctp_conn_hash_next;
494 		if (sctp->sctp_conn_hash_next) {
495 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
496 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
497 		}
498 	} else {
499 		ASSERT(sctp->sctp_conn_hash_prev);
500 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
501 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
502 		    sctp->sctp_conn_hash_next;
503 
504 		if (sctp->sctp_conn_hash_next) {
505 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
506 			    == sctp);
507 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
508 			    sctp->sctp_conn_hash_prev;
509 		}
510 	}
511 	sctp->sctp_conn_hash_next = NULL;
512 	sctp->sctp_conn_hash_prev = NULL;
513 	sctp->sctp_conn_tfp = NULL;
514 	mutex_exit(&tf->tf_lock);
515 }
516 
517 void
518 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
519 {
520 	if (sctp->sctp_conn_tfp) {
521 		sctp_conn_hash_remove(sctp);
522 	}
523 
524 	if (!caller_holds_lock) {
525 		mutex_enter(&tf->tf_lock);
526 	} else {
527 		ASSERT(MUTEX_HELD(&tf->tf_lock));
528 	}
529 
530 	sctp->sctp_conn_hash_next = tf->tf_sctp;
531 	if (tf->tf_sctp) {
532 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
533 	}
534 	sctp->sctp_conn_hash_prev = NULL;
535 	tf->tf_sctp = sctp;
536 	sctp->sctp_conn_tfp = tf;
537 	if (!caller_holds_lock) {
538 		mutex_exit(&tf->tf_lock);
539 	}
540 }
541 
542 void
543 sctp_listen_hash_remove(sctp_t *sctp)
544 {
545 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
546 	conn_t	*connp = sctp->sctp_connp;
547 
548 	if (!tf) {
549 		return;
550 	}
551 	/*
552 	 * On a clustered note send this notification to the clustering
553 	 * subsystem.
554 	 */
555 	if (cl_sctp_unlisten != NULL) {
556 		uchar_t	*slist;
557 		ssize_t	ssize;
558 
559 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
560 		slist = kmem_alloc(ssize, KM_SLEEP);
561 		sctp_get_saddr_list(sctp, slist, ssize);
562 		(*cl_sctp_unlisten)(connp->conn_family, slist,
563 		    sctp->sctp_nsaddrs, connp->conn_lport);
564 		/* list will be freed by the clustering module */
565 	}
566 
567 	mutex_enter(&tf->tf_lock);
568 	ASSERT(tf->tf_sctp);
569 	if (tf->tf_sctp == sctp) {
570 		tf->tf_sctp = sctp->sctp_listen_hash_next;
571 		if (sctp->sctp_listen_hash_next != NULL) {
572 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
573 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
574 		}
575 	} else {
576 		ASSERT(sctp->sctp_listen_hash_prev);
577 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
578 		    sctp);
579 		ASSERT(sctp->sctp_listen_hash_next == NULL ||
580 		    sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
581 
582 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
583 		    sctp->sctp_listen_hash_next;
584 
585 		if (sctp->sctp_listen_hash_next != NULL) {
586 			sctp_t *next = sctp->sctp_listen_hash_next;
587 
588 			ASSERT(next->sctp_listen_hash_prev == sctp);
589 			next->sctp_listen_hash_prev =
590 			    sctp->sctp_listen_hash_prev;
591 		}
592 	}
593 	sctp->sctp_listen_hash_next = NULL;
594 	sctp->sctp_listen_hash_prev = NULL;
595 	sctp->sctp_listen_tfp = NULL;
596 	mutex_exit(&tf->tf_lock);
597 }
598 
599 void
600 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
601 {
602 	conn_t	*connp = sctp->sctp_connp;
603 
604 	if (sctp->sctp_listen_tfp) {
605 		sctp_listen_hash_remove(sctp);
606 	}
607 
608 	mutex_enter(&tf->tf_lock);
609 	sctp->sctp_listen_hash_next = tf->tf_sctp;
610 	if (tf->tf_sctp) {
611 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
612 	}
613 	sctp->sctp_listen_hash_prev = NULL;
614 	tf->tf_sctp = sctp;
615 	sctp->sctp_listen_tfp = tf;
616 	mutex_exit(&tf->tf_lock);
617 	/*
618 	 * On a clustered note send this notification to the clustering
619 	 * subsystem.
620 	 */
621 	if (cl_sctp_listen != NULL) {
622 		uchar_t	*slist;
623 		ssize_t	ssize;
624 
625 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
626 		slist = kmem_alloc(ssize, KM_SLEEP);
627 		sctp_get_saddr_list(sctp, slist, ssize);
628 		(*cl_sctp_listen)(connp->conn_family, slist,
629 		    sctp->sctp_nsaddrs, connp->conn_lport);
630 		/* list will be freed by the clustering module */
631 	}
632 }
633 
634 /*
635  * Hash list insertion routine for sctp_t structures.
636  * Inserts entries with the ones bound to a specific IP address first
637  * followed by those bound to INADDR_ANY.
638  */
639 void
640 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
641 {
642 	sctp_t	**sctpp;
643 	sctp_t	*sctpnext;
644 
645 	if (sctp->sctp_ptpbhn != NULL) {
646 		ASSERT(!caller_holds_lock);
647 		sctp_bind_hash_remove(sctp);
648 	}
649 	sctpp = &tbf->tf_sctp;
650 	if (!caller_holds_lock) {
651 		mutex_enter(&tbf->tf_lock);
652 	} else {
653 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
654 	}
655 	sctpnext = sctpp[0];
656 	if (sctpnext) {
657 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
658 	}
659 	sctp->sctp_bind_hash = sctpnext;
660 	sctp->sctp_ptpbhn = sctpp;
661 	sctpp[0] = sctp;
662 	/* For sctp_*_hash_remove */
663 	sctp->sctp_bind_lockp = &tbf->tf_lock;
664 	if (!caller_holds_lock)
665 		mutex_exit(&tbf->tf_lock);
666 }
667 
668 /*
669  * Hash list removal routine for sctp_t structures.
670  */
671 void
672 sctp_bind_hash_remove(sctp_t *sctp)
673 {
674 	sctp_t	*sctpnext;
675 	kmutex_t *lockp;
676 
677 	lockp = sctp->sctp_bind_lockp;
678 
679 	if (sctp->sctp_ptpbhn == NULL)
680 		return;
681 
682 	ASSERT(lockp != NULL);
683 	mutex_enter(lockp);
684 	if (sctp->sctp_ptpbhn) {
685 		sctpnext = sctp->sctp_bind_hash;
686 		if (sctpnext) {
687 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
688 			sctp->sctp_bind_hash = NULL;
689 		}
690 		*sctp->sctp_ptpbhn = sctpnext;
691 		sctp->sctp_ptpbhn = NULL;
692 	}
693 	mutex_exit(lockp);
694 	sctp->sctp_bind_lockp = NULL;
695 }
696 
697 /*
698  * Similar to but different from sctp_conn_match().
699  *
700  * Matches sets of addresses as follows: if the argument addr set is
701  * a complete subset of the corresponding addr set in the sctp_t, it
702  * is a match.
703  *
704  * Caller must hold tf->tf_lock.
705  *
706  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
707  */
708 sctp_t *
709 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
710     int min_state)
711 {
712 	sctp_t *sctp;
713 	sctp_faddr_t *fp;
714 
715 	ASSERT(MUTEX_HELD(&tf->tf_lock));
716 
717 	for (sctp = tf->tf_sctp; sctp != NULL;
718 	    sctp = sctp->sctp_conn_hash_next) {
719 		if (*ports != sctp->sctp_connp->conn_ports ||
720 		    sctp->sctp_state < min_state) {
721 			continue;
722 		}
723 
724 		/* check for faddr match */
725 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
726 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
727 				break;
728 			}
729 		}
730 
731 		if (fp == NULL) {
732 			/* no faddr match; keep looking */
733 			continue;
734 		}
735 
736 		/*
737 		 * There is an existing association with the same peer
738 		 * address.  So now we need to check if our local address
739 		 * set overlaps with the one of the existing association.
740 		 * If they overlap, we should return it.
741 		 */
742 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
743 			goto done;
744 		}
745 
746 		/* no match; continue searching */
747 	}
748 
749 done:
750 	if (sctp != NULL) {
751 		SCTP_REFHOLD(sctp);
752 	}
753 	return (sctp);
754 }
755