xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_hash.c (revision facf4a8d7b59fde89a8662b4f4c73a758e6c402c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/socket.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/tsol/tndb.h>
33 #include <sys/tsol/tnet.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ipclassifier.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/ipp_common.h>
44 #include <inet/sctp_ip.h>
45 
46 #include "sctp_impl.h"
47 #include "sctp_addr.h"
48 
49 /* SCTP bind hash list - all sctp_t with state >= BOUND. */
50 sctp_tf_t	sctp_bind_fanout[SCTP_BIND_FANOUT_SIZE];
51 /* SCTP listen hash list - all sctp_t with state == LISTEN. */
52 sctp_tf_t	sctp_listen_fanout[SCTP_LISTEN_FANOUT_SIZE];
53 
54 /* Default association hash size.  The size must be a power of 2. */
55 #define	SCTP_CONN_HASH_SIZE	8192
56 
57 sctp_tf_t	*sctp_conn_fanout;
58 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
59 
60 /*
61  * Cluster networking hook for traversing current assoc list.
62  * This routine is used to extract the current list of live associations
63  * which must continue to to be dispatched to this node.
64  */
65 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
66     boolean_t);
67 
68 void
69 sctp_hash_init()
70 {
71 	int i;
72 
73 	if (sctp_conn_hash_size & (sctp_conn_hash_size - 1)) {
74 		/* Not a power of two. Round up to nearest power of two */
75 		for (i = 0; i < 31; i++) {
76 			if (sctp_conn_hash_size < (1 << i))
77 				break;
78 		}
79 		sctp_conn_hash_size = 1 << i;
80 	}
81 	if (sctp_conn_hash_size < SCTP_CONN_HASH_SIZE) {
82 		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
83 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
84 		    sctp_conn_hash_size);
85 	}
86 	sctp_conn_fanout =
87 		(sctp_tf_t *)kmem_zalloc(sctp_conn_hash_size *
88 		    sizeof (sctp_tf_t),	KM_SLEEP);
89 	for (i = 0; i < sctp_conn_hash_size; i++) {
90 		mutex_init(&sctp_conn_fanout[i].tf_lock, NULL,
91 			    MUTEX_DEFAULT, NULL);
92 	}
93 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
94 		mutex_init(&sctp_listen_fanout[i].tf_lock, NULL,
95 		    MUTEX_DEFAULT, NULL);
96 	}
97 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
98 		mutex_init(&sctp_bind_fanout[i].tf_lock, NULL,
99 		    MUTEX_DEFAULT, NULL);
100 	}
101 }
102 
103 void
104 sctp_hash_destroy()
105 {
106 	int i;
107 
108 	for (i = 0; i < sctp_conn_hash_size; i++) {
109 		mutex_destroy(&sctp_conn_fanout[i].tf_lock);
110 	}
111 	kmem_free(sctp_conn_fanout, sctp_conn_hash_size * sizeof (sctp_tf_t));
112 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
113 		mutex_destroy(&sctp_listen_fanout[i].tf_lock);
114 	}
115 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
116 		mutex_destroy(&sctp_bind_fanout[i].tf_lock);
117 	}
118 }
119 
120 /*
121  * Walk the SCTP global list and refrele the ire for this ipif
122  * This is called when an address goes down, so that we release any reference
123  * to the ire associated with this address. Additionally, for any SCTP if
124  * this was the only/last address in its source list, we don't kill the
125  * assoc., if there is no address added subsequently, or if this does not
126  * come up, then the assoc. will die a natural death (i.e. timeout).
127  */
128 void
129 sctp_ire_cache_flush(ipif_t *ipif)
130 {
131 	sctp_t			*sctp;
132 	sctp_t			*sctp_prev = NULL;
133 	sctp_faddr_t		*fp;
134 	conn_t			*connp;
135 	ire_t			*ire;
136 
137 	sctp = gsctp;
138 	mutex_enter(&sctp_g_lock);
139 	while (sctp != NULL) {
140 		mutex_enter(&sctp->sctp_reflock);
141 		if (sctp->sctp_condemned) {
142 			mutex_exit(&sctp->sctp_reflock);
143 			sctp = list_next(&sctp_g_list, sctp);
144 			continue;
145 		}
146 		sctp->sctp_refcnt++;
147 		mutex_exit(&sctp->sctp_reflock);
148 		mutex_exit(&sctp_g_lock);
149 		if (sctp_prev != NULL)
150 			SCTP_REFRELE(sctp_prev);
151 
152 		RUN_SCTP(sctp);
153 		connp = sctp->sctp_connp;
154 		mutex_enter(&connp->conn_lock);
155 		ire = connp->conn_ire_cache;
156 		if (ire != NULL && ire->ire_ipif == ipif) {
157 			connp->conn_ire_cache = NULL;
158 			mutex_exit(&connp->conn_lock);
159 			IRE_REFRELE_NOTR(ire);
160 		} else {
161 			mutex_exit(&connp->conn_lock);
162 		}
163 		/* check for ires cached in faddr */
164 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
165 			/*
166 			 * If this ipif is being used as the source address
167 			 * we need to update it as well, else we will end
168 			 * up using the dead source address.
169 			 */
170 			ire = fp->ire;
171 			if (ire != NULL && ire->ire_ipif == ipif) {
172 				fp->ire = NULL;
173 				IRE_REFRELE_NOTR(ire);
174 			}
175 			/*
176 			 * This may result in setting the fp as unreachable,
177 			 * i.e. if all the source addresses are down. In
178 			 * that case the assoc. would timeout.
179 			 */
180 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
181 			    &fp->saddr)) {
182 				sctp_set_saddr(sctp, fp);
183 				if (fp == sctp->sctp_current &&
184 				    fp->state != SCTP_FADDRS_UNREACH) {
185 					sctp_set_faddr_current(sctp, fp);
186 				}
187 			}
188 		}
189 		WAKE_SCTP(sctp);
190 		sctp_prev = sctp;
191 		mutex_enter(&sctp_g_lock);
192 		sctp = list_next(&sctp_g_list, sctp);
193 	}
194 	mutex_exit(&sctp_g_lock);
195 	if (sctp_prev != NULL)
196 		SCTP_REFRELE(sctp_prev);
197 }
198 
199 /*
200  * Exported routine for extracting active SCTP associations.
201  * Like TCP, we terminate the walk if the callback returns non-zero.
202  */
203 int
204 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg,
205     boolean_t cansleep)
206 {
207 	sctp_t		*sctp;
208 	sctp_t		*sctp_prev;
209 	cl_sctp_info_t	cl_sctpi;
210 	uchar_t		*slist;
211 	uchar_t		*flist;
212 
213 	sctp = gsctp;
214 	sctp_prev = NULL;
215 	mutex_enter(&sctp_g_lock);
216 	while (sctp != NULL) {
217 		size_t	ssize;
218 		size_t	fsize;
219 
220 		mutex_enter(&sctp->sctp_reflock);
221 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
222 			mutex_exit(&sctp->sctp_reflock);
223 			sctp = list_next(&sctp_g_list, sctp);
224 			continue;
225 		}
226 		sctp->sctp_refcnt++;
227 		mutex_exit(&sctp->sctp_reflock);
228 		mutex_exit(&sctp_g_lock);
229 		if (sctp_prev != NULL)
230 			SCTP_REFRELE(sctp_prev);
231 		RUN_SCTP(sctp);
232 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
233 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
234 
235 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
236 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
237 		if (slist == NULL || flist == NULL) {
238 			WAKE_SCTP(sctp);
239 			if (slist != NULL)
240 				kmem_free(slist, ssize);
241 			if (flist != NULL)
242 				kmem_free(flist, fsize);
243 			SCTP_REFRELE(sctp);
244 			return (1);
245 		}
246 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
247 		sctp_get_saddr_list(sctp, slist, ssize);
248 		sctp_get_faddr_list(sctp, flist, fsize);
249 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
250 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
251 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
252 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
253 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
254 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
255 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
256 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
257 		WAKE_SCTP(sctp);
258 		cl_sctpi.cl_sctpi_laddrp = slist;
259 		cl_sctpi.cl_sctpi_faddrp = flist;
260 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
261 			kmem_free(slist, ssize);
262 			kmem_free(flist, fsize);
263 			SCTP_REFRELE(sctp);
264 			return (1);
265 		}
266 		/* list will be freed by cl_callback */
267 		sctp_prev = sctp;
268 		mutex_enter(&sctp_g_lock);
269 		sctp = list_next(&sctp_g_list, sctp);
270 	}
271 	mutex_exit(&sctp_g_lock);
272 	if (sctp_prev != NULL)
273 		SCTP_REFRELE(sctp_prev);
274 	return (0);
275 }
276 
277 sctp_t *
278 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
279     uint_t ipif_seqid, zoneid_t zoneid)
280 {
281 	sctp_tf_t		*tf;
282 	sctp_t			*sctp;
283 	sctp_faddr_t		*fp;
284 
285 	tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
286 	mutex_enter(&tf->tf_lock);
287 
288 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
289 		if (ports != sctp->sctp_ports ||
290 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
291 			continue;
292 		}
293 
294 		/* check for faddr match */
295 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
296 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
297 				break;
298 			}
299 		}
300 
301 		if (!fp) {
302 			/* no faddr match; keep looking */
303 			continue;
304 		}
305 
306 		/* check for laddr match */
307 		if (ipif_seqid == 0) {
308 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
309 				SCTP_REFHOLD(sctp);
310 				goto done;
311 			}
312 		} else {
313 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
314 				SCTP_REFHOLD(sctp);
315 				goto done;
316 			}
317 		/* no match; continue to the next in the chain */
318 		}
319 	}
320 
321 done:
322 	mutex_exit(&tf->tf_lock);
323 	return (sctp);
324 }
325 
326 static sctp_t *
327 listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid,
328     zoneid_t zoneid)
329 {
330 	sctp_t			*sctp;
331 	sctp_tf_t		*tf;
332 	uint16_t		lport;
333 
334 	lport = ((uint16_t *)&ports)[1];
335 
336 	tf = &(sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
337 	mutex_enter(&tf->tf_lock);
338 
339 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
340 		if (lport != sctp->sctp_lport ||
341 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
342 			continue;
343 		}
344 
345 		if (ipif_seqid == 0) {
346 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
347 				SCTP_REFHOLD(sctp);
348 				goto done;
349 			}
350 		} else {
351 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
352 				SCTP_REFHOLD(sctp);
353 				goto done;
354 			}
355 		}
356 		/* no match; continue to the next in the chain */
357 	}
358 
359 done:
360 	mutex_exit(&tf->tf_lock);
361 	return (sctp);
362 }
363 
364 /* called by ipsec_sctp_pol */
365 conn_t *
366 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
367     uint_t ipif_seqid, zoneid_t zoneid)
368 {
369 	sctp_t *sctp;
370 
371 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
372 	    zoneid)) == NULL) {
373 		/* Not in conn fanout; check listen fanout */
374 		if ((sctp = listen_match(dst, ports, ipif_seqid,
375 		    zoneid)) == NULL) {
376 			return (NULL);
377 		}
378 	}
379 	return (sctp->sctp_connp);
380 }
381 
382 conn_t *
383 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
384     uint_t ipif_seqid, zoneid_t zoneid, mblk_t *mp)
385 {
386 	sctp_t *sctp;
387 
388 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
389 	    zoneid)) == NULL) {
390 		if (zoneid == ALL_ZONES) {
391 			zoneid = tsol_mlp_findzone(IPPROTO_SCTP,
392 			    htons(ntohl(ports) & 0xFFFF));
393 			/*
394 			 * If no shared MLP is found, tsol_mlp_findzone returns
395 			 * ALL_ZONES.  In that case, we assume it's SLP, and
396 			 * search for the zone based on the packet label.
397 			 * That will also return ALL_ZONES on failure.
398 			 */
399 			if (zoneid == ALL_ZONES)
400 				zoneid = tsol_packet_to_zoneid(mp);
401 			if (zoneid == ALL_ZONES)
402 				return (NULL);
403 		}
404 		/* Not in conn fanout; check listen fanout */
405 		if ((sctp = listen_match(dst, ports, ipif_seqid,
406 		    zoneid)) == NULL) {
407 			return (NULL);
408 		}
409 	}
410 	return (sctp->sctp_connp);
411 }
412 
413 /*
414  * Fanout for SCTP packets
415  * The caller puts <fport, lport> in the ports parameter.
416  */
417 /* ARGSUSED */
418 void
419 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
420     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
421     uint_t ipif_seqid, zoneid_t zoneid)
422 {
423 	sctp_t *sctp;
424 	boolean_t isv4;
425 	conn_t *connp;
426 	mblk_t *first_mp;
427 	ip6_t *ip6h;
428 	in6_addr_t map_src, map_dst;
429 	in6_addr_t *src, *dst;
430 
431 	first_mp = mp;
432 	if (mctl_present) {
433 		mp = first_mp->b_cont;
434 		ASSERT(mp != NULL);
435 	}
436 
437 	/* Assume IP provides aligned packets - otherwise toss */
438 	if (!OK_32PTR(mp->b_rptr)) {
439 		BUMP_MIB(&ip_mib, ipInDiscards);
440 		freemsg(first_mp);
441 		return;
442 	}
443 
444 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
445 		ip6h = (ip6_t *)ipha;
446 		src = &ip6h->ip6_src;
447 		dst = &ip6h->ip6_dst;
448 		isv4 = B_FALSE;
449 	} else {
450 		ip6h = NULL;
451 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
452 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
453 		src = &map_src;
454 		dst = &map_dst;
455 		isv4 = B_TRUE;
456 	}
457 	if ((connp = sctp_fanout(src, dst, ports, ipif_seqid, zoneid, mp)) ==
458 	    NULL) {
459 		ip_fanout_sctp_raw(first_mp, recv_ill, ipha, isv4,
460 		    ports, mctl_present, flags, ip_policy,
461 		    ipif_seqid, zoneid);
462 		return;
463 	}
464 	sctp = CONN2SCTP(connp);
465 
466 	/* Found a client; up it goes */
467 	BUMP_MIB(&ip_mib, ipInDelivers);
468 
469 	/*
470 	 * We check some fields in conn_t without holding a lock.
471 	 * This should be fine.
472 	 */
473 	if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) {
474 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
475 		    ipha, NULL, mctl_present);
476 		if (first_mp == NULL) {
477 			SCTP_REFRELE(sctp);
478 			return;
479 		}
480 	}
481 
482 	/* Initiate IPPF processing for fastpath */
483 	if (IPP_ENABLED(IPP_LOCAL_IN)) {
484 		ip_process(IPP_LOCAL_IN, &mp,
485 		    recv_ill->ill_phyint->phyint_ifindex);
486 		if (mp == NULL) {
487 			SCTP_REFRELE(sctp);
488 			if (mctl_present)
489 				freeb(first_mp);
490 			return;
491 		} else if (mctl_present) {
492 			/*
493 			 * ip_process might return a new mp.
494 			 */
495 			ASSERT(first_mp != mp);
496 			first_mp->b_cont = mp;
497 		} else {
498 			first_mp = mp;
499 		}
500 	}
501 
502 	if (connp->conn_recvif || connp->conn_recvslla ||
503 	    connp->conn_ipv6_recvpktinfo) {
504 		int in_flags = 0;
505 
506 		if (connp->conn_recvif || connp->conn_ipv6_recvpktinfo) {
507 			in_flags = IPF_RECVIF;
508 		}
509 		if (connp->conn_recvslla) {
510 			in_flags |= IPF_RECVSLLA;
511 		}
512 		if (isv4) {
513 			mp = ip_add_info(mp, recv_ill, in_flags);
514 		} else {
515 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
516 		}
517 		if (mp == NULL) {
518 			SCTP_REFRELE(sctp);
519 			if (mctl_present)
520 				freeb(first_mp);
521 			return;
522 		} else if (mctl_present) {
523 			/*
524 			 * ip_add_info might return a new mp.
525 			 */
526 			ASSERT(first_mp != mp);
527 			first_mp->b_cont = mp;
528 		} else {
529 			first_mp = mp;
530 		}
531 	}
532 
533 	mutex_enter(&sctp->sctp_lock);
534 	if (sctp->sctp_running) {
535 		if (mctl_present)
536 			mp->b_prev = first_mp;
537 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
538 			BUMP_MIB(&ip_mib, ipInDiscards);
539 			freemsg(first_mp);
540 		}
541 		mutex_exit(&sctp->sctp_lock);
542 	} else {
543 		sctp->sctp_running = B_TRUE;
544 		mutex_exit(&sctp->sctp_lock);
545 
546 		mutex_enter(&sctp->sctp_recvq_lock);
547 		if (sctp->sctp_recvq != NULL) {
548 			if (mctl_present)
549 				mp->b_prev = first_mp;
550 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
551 				BUMP_MIB(&ip_mib, ipInDiscards);
552 				freemsg(first_mp);
553 			}
554 			mutex_exit(&sctp->sctp_recvq_lock);
555 			WAKE_SCTP(sctp);
556 		} else {
557 			mutex_exit(&sctp->sctp_recvq_lock);
558 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
559 			    NULL));
560 			WAKE_SCTP(sctp);
561 			sctp_process_sendq(sctp);
562 		}
563 	}
564 	SCTP_REFRELE(sctp);
565 }
566 
567 void
568 sctp_conn_hash_remove(sctp_t *sctp)
569 {
570 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
571 
572 	if (!tf) {
573 		return;
574 	}
575 	/*
576 	 * On a clustered note send this notification to the clustering
577 	 * subsystem.
578 	 */
579 	if (cl_sctp_disconnect != NULL) {
580 		(*cl_sctp_disconnect)(sctp->sctp_family,
581 		    (cl_sctp_handle_t)sctp);
582 	}
583 
584 	mutex_enter(&tf->tf_lock);
585 	ASSERT(tf->tf_sctp);
586 	if (tf->tf_sctp == sctp) {
587 		tf->tf_sctp = sctp->sctp_conn_hash_next;
588 		if (sctp->sctp_conn_hash_next) {
589 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
590 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
591 		}
592 	} else {
593 		ASSERT(sctp->sctp_conn_hash_prev);
594 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
595 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
596 		    sctp->sctp_conn_hash_next;
597 
598 		if (sctp->sctp_conn_hash_next) {
599 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
600 			    == sctp);
601 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
602 			    sctp->sctp_conn_hash_prev;
603 		}
604 	}
605 	sctp->sctp_conn_hash_next = NULL;
606 	sctp->sctp_conn_hash_prev = NULL;
607 	sctp->sctp_conn_tfp = NULL;
608 	mutex_exit(&tf->tf_lock);
609 }
610 
611 void
612 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
613 {
614 	if (sctp->sctp_conn_tfp) {
615 		sctp_conn_hash_remove(sctp);
616 	}
617 
618 	if (!caller_holds_lock) {
619 		mutex_enter(&tf->tf_lock);
620 	} else {
621 		ASSERT(MUTEX_HELD(&tf->tf_lock));
622 	}
623 
624 	sctp->sctp_conn_hash_next = tf->tf_sctp;
625 	if (tf->tf_sctp) {
626 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
627 	}
628 	sctp->sctp_conn_hash_prev = NULL;
629 	tf->tf_sctp = sctp;
630 	sctp->sctp_conn_tfp = tf;
631 	if (!caller_holds_lock) {
632 		mutex_exit(&tf->tf_lock);
633 	}
634 }
635 
636 void
637 sctp_listen_hash_remove(sctp_t *sctp)
638 {
639 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
640 
641 	if (!tf) {
642 		return;
643 	}
644 	/*
645 	 * On a clustered note send this notification to the clustering
646 	 * subsystem.
647 	 */
648 	if (cl_sctp_unlisten != NULL) {
649 		uchar_t	*slist;
650 		ssize_t	ssize;
651 
652 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
653 		slist = kmem_alloc(ssize, KM_SLEEP);
654 		sctp_get_saddr_list(sctp, slist, ssize);
655 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
656 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
657 		/* list will be freed by the clustering module */
658 	}
659 
660 	mutex_enter(&tf->tf_lock);
661 	ASSERT(tf->tf_sctp);
662 	if (tf->tf_sctp == sctp) {
663 		tf->tf_sctp = sctp->sctp_listen_hash_next;
664 		if (sctp->sctp_listen_hash_next) {
665 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
666 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
667 		}
668 	} else {
669 		ASSERT(sctp->sctp_listen_hash_prev);
670 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
671 		    sctp);
672 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
673 		    sctp->sctp_listen_hash_next;
674 
675 		if (sctp->sctp_listen_hash_next) {
676 			ASSERT(
677 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
678 			    sctp);
679 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
680 			    sctp->sctp_listen_hash_prev;
681 		}
682 	}
683 	sctp->sctp_listen_hash_next = NULL;
684 	sctp->sctp_listen_hash_prev = NULL;
685 	sctp->sctp_listen_tfp = NULL;
686 	mutex_exit(&tf->tf_lock);
687 }
688 
689 void
690 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
691 {
692 	if (sctp->sctp_listen_tfp) {
693 		sctp_listen_hash_remove(sctp);
694 	}
695 
696 	mutex_enter(&tf->tf_lock);
697 	sctp->sctp_listen_hash_next = tf->tf_sctp;
698 	if (tf->tf_sctp) {
699 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
700 	}
701 	sctp->sctp_listen_hash_prev = NULL;
702 	tf->tf_sctp = sctp;
703 	sctp->sctp_listen_tfp = tf;
704 	mutex_exit(&tf->tf_lock);
705 	/*
706 	 * On a clustered note send this notification to the clustering
707 	 * subsystem.
708 	 */
709 	if (cl_sctp_listen != NULL) {
710 		uchar_t	*slist;
711 		ssize_t	ssize;
712 
713 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
714 		slist = kmem_alloc(ssize, KM_SLEEP);
715 		sctp_get_saddr_list(sctp, slist, ssize);
716 		(*cl_sctp_listen)(sctp->sctp_family, slist,
717 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
718 		/* list will be freed by the clustering module */
719 	}
720 }
721 
722 /*
723  * Hash list insertion routine for sctp_t structures.
724  * Inserts entries with the ones bound to a specific IP address first
725  * followed by those bound to INADDR_ANY.
726  */
727 void
728 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
729 {
730 	sctp_t	**sctpp;
731 	sctp_t	*sctpnext;
732 
733 	if (sctp->sctp_ptpbhn != NULL) {
734 		ASSERT(!caller_holds_lock);
735 		sctp_bind_hash_remove(sctp);
736 	}
737 	sctpp = &tbf->tf_sctp;
738 	if (!caller_holds_lock) {
739 		mutex_enter(&tbf->tf_lock);
740 	} else {
741 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
742 	}
743 	sctpnext = sctpp[0];
744 	if (sctpnext) {
745 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
746 	}
747 	sctp->sctp_bind_hash = sctpnext;
748 	sctp->sctp_ptpbhn = sctpp;
749 	sctpp[0] = sctp;
750 	/* For sctp_*_hash_remove */
751 	sctp->sctp_bind_lockp = &tbf->tf_lock;
752 	if (!caller_holds_lock)
753 		mutex_exit(&tbf->tf_lock);
754 }
755 
756 /*
757  * Hash list removal routine for sctp_t structures.
758  */
759 void
760 sctp_bind_hash_remove(sctp_t *sctp)
761 {
762 	sctp_t	*sctpnext;
763 	kmutex_t *lockp;
764 
765 	lockp = sctp->sctp_bind_lockp;
766 
767 	if (sctp->sctp_ptpbhn == NULL)
768 		return;
769 
770 	ASSERT(lockp != NULL);
771 	mutex_enter(lockp);
772 	if (sctp->sctp_ptpbhn) {
773 		sctpnext = sctp->sctp_bind_hash;
774 		if (sctpnext) {
775 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
776 			sctp->sctp_bind_hash = NULL;
777 		}
778 		*sctp->sctp_ptpbhn = sctpnext;
779 		sctp->sctp_ptpbhn = NULL;
780 	}
781 	mutex_exit(lockp);
782 	sctp->sctp_bind_lockp = NULL;
783 }
784 
785 /*
786  * Similar to but more general than ip_sctp's conn_match().
787  *
788  * Matches sets of addresses as follows: if the argument addr set is
789  * a complete subset of the corresponding addr set in the sctp_t, it
790  * is a match.
791  *
792  * Caller must hold tf->tf_lock.
793  *
794  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
795  */
796 sctp_t *
797 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
798     int min_state)
799 {
800 
801 	sctp_t *sctp;
802 	sctp_faddr_t *fp;
803 
804 	ASSERT(MUTEX_HELD(&tf->tf_lock));
805 
806 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
807 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
808 		    min_state) {
809 			continue;
810 		}
811 
812 		/* check for faddr match */
813 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
814 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
815 				break;
816 			}
817 		}
818 
819 		if (!fp) {
820 			/* no faddr match; keep looking */
821 			continue;
822 		}
823 
824 		/* check for laddr subset match */
825 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
826 			goto done;
827 		}
828 
829 		/* no match; continue searching */
830 	}
831 
832 done:
833 	if (sctp) {
834 		SCTP_REFHOLD(sctp);
835 	}
836 	return (sctp);
837 }
838 
839 boolean_t
840 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
841 {
842 	uint16_t lport;
843 
844 	if (connp->conn_fully_bound) {
845 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
846 		    ipha->ipha_dst, ports));
847 	} else {
848 		lport = htons(ntohl(ports) & 0xFFFF);
849 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
850 		    lport));
851 	}
852 }
853 
854 boolean_t
855 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
856     boolean_t for_v4)
857 {
858 	uint16_t lport;
859 	in6_addr_t	v6dst;
860 
861 	if (!for_v4 && connp->conn_fully_bound) {
862 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
863 		    ip6h->ip6_dst, ports));
864 	} else {
865 		lport = htons(ntohl(ports) & 0xFFFF);
866 		if (for_v4)
867 			v6dst = ipv6_all_zeros;
868 		else
869 			v6dst = ip6h->ip6_dst;
870 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
871 	}
872 }
873