xref: /titanic_52/usr/src/uts/common/inet/sctp/sctp_hash.c (revision ae115bc77f6fcde83175c75b4206dc2e50747966)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/socket.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/tsol/tndb.h>
33 #include <sys/tsol/tnet.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ipclassifier.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/ipp_common.h>
44 #include <inet/sctp_ip.h>
45 
46 #include "sctp_impl.h"
47 #include "sctp_addr.h"
48 
49 /* SCTP bind hash list - all sctp_t with state >= BOUND. */
50 sctp_tf_t	sctp_bind_fanout[SCTP_BIND_FANOUT_SIZE];
51 /* SCTP listen hash list - all sctp_t with state == LISTEN. */
52 sctp_tf_t	sctp_listen_fanout[SCTP_LISTEN_FANOUT_SIZE];
53 
54 /* Default association hash size.  The size must be a power of 2. */
55 #define	SCTP_CONN_HASH_SIZE	8192
56 
57 sctp_tf_t	*sctp_conn_fanout;
58 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
59 
60 /*
61  * Cluster networking hook for traversing current assoc list.
62  * This routine is used to extract the current list of live associations
63  * which must continue to to be dispatched to this node.
64  */
65 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
66     boolean_t);
67 
68 void
69 sctp_hash_init()
70 {
71 	int i;
72 
73 	if (sctp_conn_hash_size & (sctp_conn_hash_size - 1)) {
74 		/* Not a power of two. Round up to nearest power of two */
75 		for (i = 0; i < 31; i++) {
76 			if (sctp_conn_hash_size < (1 << i))
77 				break;
78 		}
79 		sctp_conn_hash_size = 1 << i;
80 	}
81 	if (sctp_conn_hash_size < SCTP_CONN_HASH_SIZE) {
82 		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
83 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
84 		    sctp_conn_hash_size);
85 	}
86 	sctp_conn_fanout =
87 		(sctp_tf_t *)kmem_zalloc(sctp_conn_hash_size *
88 		    sizeof (sctp_tf_t),	KM_SLEEP);
89 	for (i = 0; i < sctp_conn_hash_size; i++) {
90 		mutex_init(&sctp_conn_fanout[i].tf_lock, NULL,
91 			    MUTEX_DEFAULT, NULL);
92 	}
93 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
94 		mutex_init(&sctp_listen_fanout[i].tf_lock, NULL,
95 		    MUTEX_DEFAULT, NULL);
96 	}
97 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
98 		mutex_init(&sctp_bind_fanout[i].tf_lock, NULL,
99 		    MUTEX_DEFAULT, NULL);
100 	}
101 }
102 
103 void
104 sctp_hash_destroy()
105 {
106 	int i;
107 
108 	for (i = 0; i < sctp_conn_hash_size; i++) {
109 		mutex_destroy(&sctp_conn_fanout[i].tf_lock);
110 	}
111 	kmem_free(sctp_conn_fanout, sctp_conn_hash_size * sizeof (sctp_tf_t));
112 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
113 		mutex_destroy(&sctp_listen_fanout[i].tf_lock);
114 	}
115 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
116 		mutex_destroy(&sctp_bind_fanout[i].tf_lock);
117 	}
118 }
119 
120 /*
121  * Walk the SCTP global list and refrele the ire for this ipif
122  * This is called when an address goes down, so that we release any reference
123  * to the ire associated with this address. Additionally, for any SCTP if
124  * this was the only/last address in its source list, we don't kill the
125  * assoc., if there is no address added subsequently, or if this does not
126  * come up, then the assoc. will die a natural death (i.e. timeout).
127  */
128 void
129 sctp_ire_cache_flush(ipif_t *ipif)
130 {
131 	sctp_t			*sctp;
132 	sctp_t			*sctp_prev = NULL;
133 	sctp_faddr_t		*fp;
134 	conn_t			*connp;
135 	ire_t			*ire;
136 
137 	sctp = gsctp;
138 	mutex_enter(&sctp_g_lock);
139 	while (sctp != NULL) {
140 		mutex_enter(&sctp->sctp_reflock);
141 		if (sctp->sctp_condemned) {
142 			mutex_exit(&sctp->sctp_reflock);
143 			sctp = list_next(&sctp_g_list, sctp);
144 			continue;
145 		}
146 		sctp->sctp_refcnt++;
147 		mutex_exit(&sctp->sctp_reflock);
148 		mutex_exit(&sctp_g_lock);
149 		if (sctp_prev != NULL)
150 			SCTP_REFRELE(sctp_prev);
151 
152 		RUN_SCTP(sctp);
153 		connp = sctp->sctp_connp;
154 		mutex_enter(&connp->conn_lock);
155 		ire = connp->conn_ire_cache;
156 		if (ire != NULL && ire->ire_ipif == ipif) {
157 			connp->conn_ire_cache = NULL;
158 			mutex_exit(&connp->conn_lock);
159 			IRE_REFRELE_NOTR(ire);
160 		} else {
161 			mutex_exit(&connp->conn_lock);
162 		}
163 		/* check for ires cached in faddr */
164 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
165 			/*
166 			 * If this ipif is being used as the source address
167 			 * we need to update it as well, else we will end
168 			 * up using the dead source address.
169 			 */
170 			ire = fp->ire;
171 			if (ire != NULL && ire->ire_ipif == ipif) {
172 				fp->ire = NULL;
173 				IRE_REFRELE_NOTR(ire);
174 			}
175 			/*
176 			 * This may result in setting the fp as unreachable,
177 			 * i.e. if all the source addresses are down. In
178 			 * that case the assoc. would timeout.
179 			 */
180 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
181 			    &fp->saddr)) {
182 				sctp_set_saddr(sctp, fp);
183 				if (fp == sctp->sctp_current &&
184 				    fp->state != SCTP_FADDRS_UNREACH) {
185 					sctp_set_faddr_current(sctp, fp);
186 				}
187 			}
188 		}
189 		WAKE_SCTP(sctp);
190 		sctp_prev = sctp;
191 		mutex_enter(&sctp_g_lock);
192 		sctp = list_next(&sctp_g_list, sctp);
193 	}
194 	mutex_exit(&sctp_g_lock);
195 	if (sctp_prev != NULL)
196 		SCTP_REFRELE(sctp_prev);
197 }
198 
199 /*
200  * Exported routine for extracting active SCTP associations.
201  * Like TCP, we terminate the walk if the callback returns non-zero.
202  */
203 int
204 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg,
205     boolean_t cansleep)
206 {
207 	sctp_t		*sctp;
208 	sctp_t		*sctp_prev;
209 	cl_sctp_info_t	cl_sctpi;
210 	uchar_t		*slist;
211 	uchar_t		*flist;
212 
213 	sctp = gsctp;
214 	sctp_prev = NULL;
215 	mutex_enter(&sctp_g_lock);
216 	while (sctp != NULL) {
217 		size_t	ssize;
218 		size_t	fsize;
219 
220 		mutex_enter(&sctp->sctp_reflock);
221 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
222 			mutex_exit(&sctp->sctp_reflock);
223 			sctp = list_next(&sctp_g_list, sctp);
224 			continue;
225 		}
226 		sctp->sctp_refcnt++;
227 		mutex_exit(&sctp->sctp_reflock);
228 		mutex_exit(&sctp_g_lock);
229 		if (sctp_prev != NULL)
230 			SCTP_REFRELE(sctp_prev);
231 		RUN_SCTP(sctp);
232 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
233 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
234 
235 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
236 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
237 		if (slist == NULL || flist == NULL) {
238 			WAKE_SCTP(sctp);
239 			if (slist != NULL)
240 				kmem_free(slist, ssize);
241 			if (flist != NULL)
242 				kmem_free(flist, fsize);
243 			SCTP_REFRELE(sctp);
244 			return (1);
245 		}
246 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
247 		sctp_get_saddr_list(sctp, slist, ssize);
248 		sctp_get_faddr_list(sctp, flist, fsize);
249 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
250 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
251 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
252 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
253 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
254 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
255 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
256 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
257 		WAKE_SCTP(sctp);
258 		cl_sctpi.cl_sctpi_laddrp = slist;
259 		cl_sctpi.cl_sctpi_faddrp = flist;
260 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
261 			kmem_free(slist, ssize);
262 			kmem_free(flist, fsize);
263 			SCTP_REFRELE(sctp);
264 			return (1);
265 		}
266 		/* list will be freed by cl_callback */
267 		sctp_prev = sctp;
268 		mutex_enter(&sctp_g_lock);
269 		sctp = list_next(&sctp_g_list, sctp);
270 	}
271 	mutex_exit(&sctp_g_lock);
272 	if (sctp_prev != NULL)
273 		SCTP_REFRELE(sctp_prev);
274 	return (0);
275 }
276 
277 sctp_t *
278 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
279     uint_t ipif_seqid, zoneid_t zoneid)
280 {
281 	sctp_tf_t		*tf;
282 	sctp_t			*sctp;
283 	sctp_faddr_t		*fp;
284 
285 	tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
286 	mutex_enter(&tf->tf_lock);
287 
288 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
289 		if (ports != sctp->sctp_ports ||
290 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
291 			continue;
292 		}
293 
294 		/* check for faddr match */
295 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
296 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
297 				break;
298 			}
299 		}
300 
301 		if (!fp) {
302 			/* no faddr match; keep looking */
303 			continue;
304 		}
305 
306 		/* check for laddr match */
307 		if (ipif_seqid == 0) {
308 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
309 				SCTP_REFHOLD(sctp);
310 				goto done;
311 			}
312 		} else {
313 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
314 				SCTP_REFHOLD(sctp);
315 				goto done;
316 			}
317 		/* no match; continue to the next in the chain */
318 		}
319 	}
320 
321 done:
322 	mutex_exit(&tf->tf_lock);
323 	return (sctp);
324 }
325 
326 static sctp_t *
327 listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid,
328     zoneid_t zoneid)
329 {
330 	sctp_t			*sctp;
331 	sctp_tf_t		*tf;
332 	uint16_t		lport;
333 
334 	lport = ((uint16_t *)&ports)[1];
335 
336 	tf = &(sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
337 	mutex_enter(&tf->tf_lock);
338 
339 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
340 		if (lport != sctp->sctp_lport ||
341 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
342 			continue;
343 		}
344 
345 		if (ipif_seqid == 0) {
346 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
347 				SCTP_REFHOLD(sctp);
348 				goto done;
349 			}
350 		} else {
351 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
352 				SCTP_REFHOLD(sctp);
353 				goto done;
354 			}
355 		}
356 		/* no match; continue to the next in the chain */
357 	}
358 
359 done:
360 	mutex_exit(&tf->tf_lock);
361 	return (sctp);
362 }
363 
364 /* called by ipsec_sctp_pol */
365 conn_t *
366 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
367     uint_t ipif_seqid, zoneid_t zoneid)
368 {
369 	sctp_t *sctp;
370 
371 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
372 	    zoneid)) == NULL) {
373 		/* Not in conn fanout; check listen fanout */
374 		if ((sctp = listen_match(dst, ports, ipif_seqid,
375 		    zoneid)) == NULL) {
376 			return (NULL);
377 		}
378 	}
379 	return (sctp->sctp_connp);
380 }
381 
382 conn_t *
383 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
384     uint_t ipif_seqid, zoneid_t zoneid, mblk_t *mp)
385 {
386 	sctp_t *sctp;
387 	boolean_t shared_addr;
388 
389 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
390 	    zoneid)) == NULL) {
391 		shared_addr = (zoneid == ALL_ZONES);
392 		if (shared_addr) {
393 			zoneid = tsol_mlp_findzone(IPPROTO_SCTP,
394 			    htons(ntohl(ports) & 0xFFFF));
395 			/*
396 			 * If no shared MLP is found, tsol_mlp_findzone returns
397 			 * ALL_ZONES.  In that case, we assume it's SLP, and
398 			 * search for the zone based on the packet label.
399 			 * That will also return ALL_ZONES on failure.
400 			 */
401 			if (zoneid == ALL_ZONES)
402 				zoneid = tsol_packet_to_zoneid(mp);
403 			if (zoneid == ALL_ZONES)
404 				return (NULL);
405 		}
406 		/* Not in conn fanout; check listen fanout */
407 		if ((sctp = listen_match(dst, ports, ipif_seqid,
408 		    zoneid)) == NULL) {
409 			return (NULL);
410 		}
411 		/*
412 		 * On systems running trusted extensions, check if dst
413 		 * should accept the packet. "IPV6_VERSION" indicates
414 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
415 		 * IPv6 addresses are supported.
416 		 */
417 		if (is_system_labeled() &&
418 		    !tsol_receive_local(mp, dst, IPV6_VERSION,
419 		    shared_addr, sctp->sctp_connp)) {
420 			DTRACE_PROBE3(
421 			    tx__ip__log__info__classify__sctp,
422 			    char *,
423 			    "connp(1) could not receive mp(2)",
424 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
425 			SCTP_REFRELE(sctp);
426 			return (NULL);
427 		}
428 	}
429 	return (sctp->sctp_connp);
430 }
431 
432 /*
433  * Fanout for SCTP packets
434  * The caller puts <fport, lport> in the ports parameter.
435  */
436 /* ARGSUSED */
437 void
438 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
439     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
440     uint_t ipif_seqid, zoneid_t zoneid)
441 {
442 	sctp_t *sctp;
443 	boolean_t isv4;
444 	conn_t *connp;
445 	mblk_t *first_mp;
446 	ip6_t *ip6h;
447 	in6_addr_t map_src, map_dst;
448 	in6_addr_t *src, *dst;
449 
450 	first_mp = mp;
451 	if (mctl_present) {
452 		mp = first_mp->b_cont;
453 		ASSERT(mp != NULL);
454 	}
455 
456 	/* Assume IP provides aligned packets - otherwise toss */
457 	if (!OK_32PTR(mp->b_rptr)) {
458 		BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
459 		freemsg(first_mp);
460 		return;
461 	}
462 
463 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
464 		ip6h = (ip6_t *)ipha;
465 		src = &ip6h->ip6_src;
466 		dst = &ip6h->ip6_dst;
467 		isv4 = B_FALSE;
468 	} else {
469 		ip6h = NULL;
470 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
471 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
472 		src = &map_src;
473 		dst = &map_dst;
474 		isv4 = B_TRUE;
475 	}
476 	if ((connp = sctp_fanout(src, dst, ports, ipif_seqid, zoneid, mp)) ==
477 	    NULL) {
478 		ip_fanout_sctp_raw(first_mp, recv_ill, ipha, isv4,
479 		    ports, mctl_present, flags, ip_policy,
480 		    ipif_seqid, zoneid);
481 		return;
482 	}
483 	sctp = CONN2SCTP(connp);
484 
485 	/* Found a client; up it goes */
486 	BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsHCInDelivers);
487 
488 	/*
489 	 * We check some fields in conn_t without holding a lock.
490 	 * This should be fine.
491 	 */
492 	if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) {
493 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
494 		    ipha, NULL, mctl_present);
495 		if (first_mp == NULL) {
496 			SCTP_REFRELE(sctp);
497 			return;
498 		}
499 	}
500 
501 	/* Initiate IPPF processing for fastpath */
502 	if (IPP_ENABLED(IPP_LOCAL_IN)) {
503 		ip_process(IPP_LOCAL_IN, &mp,
504 		    recv_ill->ill_phyint->phyint_ifindex);
505 		if (mp == NULL) {
506 			SCTP_REFRELE(sctp);
507 			if (mctl_present)
508 				freeb(first_mp);
509 			return;
510 		} else if (mctl_present) {
511 			/*
512 			 * ip_process might return a new mp.
513 			 */
514 			ASSERT(first_mp != mp);
515 			first_mp->b_cont = mp;
516 		} else {
517 			first_mp = mp;
518 		}
519 	}
520 
521 	if (connp->conn_recvif || connp->conn_recvslla ||
522 	    connp->conn_ip_recvpktinfo) {
523 		int in_flags = 0;
524 
525 		if (connp->conn_recvif || connp->conn_ip_recvpktinfo) {
526 			in_flags = IPF_RECVIF;
527 		}
528 		if (connp->conn_recvslla) {
529 			in_flags |= IPF_RECVSLLA;
530 		}
531 		if (isv4) {
532 			mp = ip_add_info(mp, recv_ill, in_flags,
533 			    IPCL_ZONEID(connp));
534 		} else {
535 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
536 		}
537 		if (mp == NULL) {
538 			SCTP_REFRELE(sctp);
539 			if (mctl_present)
540 				freeb(first_mp);
541 			return;
542 		} else if (mctl_present) {
543 			/*
544 			 * ip_add_info might return a new mp.
545 			 */
546 			ASSERT(first_mp != mp);
547 			first_mp->b_cont = mp;
548 		} else {
549 			first_mp = mp;
550 		}
551 	}
552 
553 	mutex_enter(&sctp->sctp_lock);
554 	if (sctp->sctp_running) {
555 		if (mctl_present)
556 			mp->b_prev = first_mp;
557 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
558 			BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
559 			freemsg(first_mp);
560 		}
561 		mutex_exit(&sctp->sctp_lock);
562 	} else {
563 		sctp->sctp_running = B_TRUE;
564 		mutex_exit(&sctp->sctp_lock);
565 
566 		mutex_enter(&sctp->sctp_recvq_lock);
567 		if (sctp->sctp_recvq != NULL) {
568 			if (mctl_present)
569 				mp->b_prev = first_mp;
570 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
571 				BUMP_MIB(recv_ill->ill_ip_mib,
572 				    ipIfStatsInDiscards);
573 				freemsg(first_mp);
574 			}
575 			mutex_exit(&sctp->sctp_recvq_lock);
576 			WAKE_SCTP(sctp);
577 		} else {
578 			mutex_exit(&sctp->sctp_recvq_lock);
579 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
580 			    NULL));
581 			WAKE_SCTP(sctp);
582 			sctp_process_sendq(sctp);
583 		}
584 	}
585 	SCTP_REFRELE(sctp);
586 }
587 
588 void
589 sctp_conn_hash_remove(sctp_t *sctp)
590 {
591 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
592 
593 	if (!tf) {
594 		return;
595 	}
596 	/*
597 	 * On a clustered note send this notification to the clustering
598 	 * subsystem.
599 	 */
600 	if (cl_sctp_disconnect != NULL) {
601 		(*cl_sctp_disconnect)(sctp->sctp_family,
602 		    (cl_sctp_handle_t)sctp);
603 	}
604 
605 	mutex_enter(&tf->tf_lock);
606 	ASSERT(tf->tf_sctp);
607 	if (tf->tf_sctp == sctp) {
608 		tf->tf_sctp = sctp->sctp_conn_hash_next;
609 		if (sctp->sctp_conn_hash_next) {
610 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
611 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
612 		}
613 	} else {
614 		ASSERT(sctp->sctp_conn_hash_prev);
615 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
616 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
617 		    sctp->sctp_conn_hash_next;
618 
619 		if (sctp->sctp_conn_hash_next) {
620 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
621 			    == sctp);
622 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
623 			    sctp->sctp_conn_hash_prev;
624 		}
625 	}
626 	sctp->sctp_conn_hash_next = NULL;
627 	sctp->sctp_conn_hash_prev = NULL;
628 	sctp->sctp_conn_tfp = NULL;
629 	mutex_exit(&tf->tf_lock);
630 }
631 
632 void
633 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
634 {
635 	if (sctp->sctp_conn_tfp) {
636 		sctp_conn_hash_remove(sctp);
637 	}
638 
639 	if (!caller_holds_lock) {
640 		mutex_enter(&tf->tf_lock);
641 	} else {
642 		ASSERT(MUTEX_HELD(&tf->tf_lock));
643 	}
644 
645 	sctp->sctp_conn_hash_next = tf->tf_sctp;
646 	if (tf->tf_sctp) {
647 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
648 	}
649 	sctp->sctp_conn_hash_prev = NULL;
650 	tf->tf_sctp = sctp;
651 	sctp->sctp_conn_tfp = tf;
652 	if (!caller_holds_lock) {
653 		mutex_exit(&tf->tf_lock);
654 	}
655 }
656 
657 void
658 sctp_listen_hash_remove(sctp_t *sctp)
659 {
660 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
661 
662 	if (!tf) {
663 		return;
664 	}
665 	/*
666 	 * On a clustered note send this notification to the clustering
667 	 * subsystem.
668 	 */
669 	if (cl_sctp_unlisten != NULL) {
670 		uchar_t	*slist;
671 		ssize_t	ssize;
672 
673 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
674 		slist = kmem_alloc(ssize, KM_SLEEP);
675 		sctp_get_saddr_list(sctp, slist, ssize);
676 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
677 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
678 		/* list will be freed by the clustering module */
679 	}
680 
681 	mutex_enter(&tf->tf_lock);
682 	ASSERT(tf->tf_sctp);
683 	if (tf->tf_sctp == sctp) {
684 		tf->tf_sctp = sctp->sctp_listen_hash_next;
685 		if (sctp->sctp_listen_hash_next) {
686 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
687 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
688 		}
689 	} else {
690 		ASSERT(sctp->sctp_listen_hash_prev);
691 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
692 		    sctp);
693 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
694 		    sctp->sctp_listen_hash_next;
695 
696 		if (sctp->sctp_listen_hash_next) {
697 			ASSERT(
698 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
699 			    sctp);
700 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
701 			    sctp->sctp_listen_hash_prev;
702 		}
703 	}
704 	sctp->sctp_listen_hash_next = NULL;
705 	sctp->sctp_listen_hash_prev = NULL;
706 	sctp->sctp_listen_tfp = NULL;
707 	mutex_exit(&tf->tf_lock);
708 }
709 
710 void
711 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
712 {
713 	if (sctp->sctp_listen_tfp) {
714 		sctp_listen_hash_remove(sctp);
715 	}
716 
717 	mutex_enter(&tf->tf_lock);
718 	sctp->sctp_listen_hash_next = tf->tf_sctp;
719 	if (tf->tf_sctp) {
720 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
721 	}
722 	sctp->sctp_listen_hash_prev = NULL;
723 	tf->tf_sctp = sctp;
724 	sctp->sctp_listen_tfp = tf;
725 	mutex_exit(&tf->tf_lock);
726 	/*
727 	 * On a clustered note send this notification to the clustering
728 	 * subsystem.
729 	 */
730 	if (cl_sctp_listen != NULL) {
731 		uchar_t	*slist;
732 		ssize_t	ssize;
733 
734 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
735 		slist = kmem_alloc(ssize, KM_SLEEP);
736 		sctp_get_saddr_list(sctp, slist, ssize);
737 		(*cl_sctp_listen)(sctp->sctp_family, slist,
738 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
739 		/* list will be freed by the clustering module */
740 	}
741 }
742 
743 /*
744  * Hash list insertion routine for sctp_t structures.
745  * Inserts entries with the ones bound to a specific IP address first
746  * followed by those bound to INADDR_ANY.
747  */
748 void
749 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
750 {
751 	sctp_t	**sctpp;
752 	sctp_t	*sctpnext;
753 
754 	if (sctp->sctp_ptpbhn != NULL) {
755 		ASSERT(!caller_holds_lock);
756 		sctp_bind_hash_remove(sctp);
757 	}
758 	sctpp = &tbf->tf_sctp;
759 	if (!caller_holds_lock) {
760 		mutex_enter(&tbf->tf_lock);
761 	} else {
762 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
763 	}
764 	sctpnext = sctpp[0];
765 	if (sctpnext) {
766 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
767 	}
768 	sctp->sctp_bind_hash = sctpnext;
769 	sctp->sctp_ptpbhn = sctpp;
770 	sctpp[0] = sctp;
771 	/* For sctp_*_hash_remove */
772 	sctp->sctp_bind_lockp = &tbf->tf_lock;
773 	if (!caller_holds_lock)
774 		mutex_exit(&tbf->tf_lock);
775 }
776 
777 /*
778  * Hash list removal routine for sctp_t structures.
779  */
780 void
781 sctp_bind_hash_remove(sctp_t *sctp)
782 {
783 	sctp_t	*sctpnext;
784 	kmutex_t *lockp;
785 
786 	lockp = sctp->sctp_bind_lockp;
787 
788 	if (sctp->sctp_ptpbhn == NULL)
789 		return;
790 
791 	ASSERT(lockp != NULL);
792 	mutex_enter(lockp);
793 	if (sctp->sctp_ptpbhn) {
794 		sctpnext = sctp->sctp_bind_hash;
795 		if (sctpnext) {
796 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
797 			sctp->sctp_bind_hash = NULL;
798 		}
799 		*sctp->sctp_ptpbhn = sctpnext;
800 		sctp->sctp_ptpbhn = NULL;
801 	}
802 	mutex_exit(lockp);
803 	sctp->sctp_bind_lockp = NULL;
804 }
805 
806 /*
807  * Similar to but more general than ip_sctp's conn_match().
808  *
809  * Matches sets of addresses as follows: if the argument addr set is
810  * a complete subset of the corresponding addr set in the sctp_t, it
811  * is a match.
812  *
813  * Caller must hold tf->tf_lock.
814  *
815  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
816  */
817 sctp_t *
818 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
819     int min_state)
820 {
821 
822 	sctp_t *sctp;
823 	sctp_faddr_t *fp;
824 
825 	ASSERT(MUTEX_HELD(&tf->tf_lock));
826 
827 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
828 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
829 		    min_state) {
830 			continue;
831 		}
832 
833 		/* check for faddr match */
834 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
835 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
836 				break;
837 			}
838 		}
839 
840 		if (!fp) {
841 			/* no faddr match; keep looking */
842 			continue;
843 		}
844 
845 		/* check for laddr subset match */
846 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
847 			goto done;
848 		}
849 
850 		/* no match; continue searching */
851 	}
852 
853 done:
854 	if (sctp) {
855 		SCTP_REFHOLD(sctp);
856 	}
857 	return (sctp);
858 }
859 
860 boolean_t
861 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
862 {
863 	uint16_t lport;
864 
865 	if (connp->conn_fully_bound) {
866 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
867 		    ipha->ipha_dst, ports));
868 	} else {
869 		lport = htons(ntohl(ports) & 0xFFFF);
870 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
871 		    lport));
872 	}
873 }
874 
875 boolean_t
876 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
877     boolean_t for_v4)
878 {
879 	uint16_t lport;
880 	in6_addr_t	v6dst;
881 
882 	if (!for_v4 && connp->conn_fully_bound) {
883 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
884 		    ip6h->ip6_dst, ports));
885 	} else {
886 		lport = htons(ntohl(ports) & 0xFFFF);
887 		if (for_v4)
888 			v6dst = ipv6_all_zeros;
889 		else
890 			v6dst = ip6h->ip6_dst;
891 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
892 	}
893 }
894