xref: /titanic_44/usr/src/uts/common/inet/sctp/sctp_hash.c (revision 051d39bbeea3e1b0fd8395dc97be34acb3241891)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/socket.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/tsol/tndb.h>
33 #include <sys/tsol/tnet.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ipclassifier.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/ipp_common.h>
44 #include <inet/sctp_ip.h>
45 
46 #include "sctp_impl.h"
47 #include "sctp_addr.h"
48 
49 /* SCTP bind hash list - all sctp_t with state >= BOUND. */
50 sctp_tf_t	sctp_bind_fanout[SCTP_BIND_FANOUT_SIZE];
51 /* SCTP listen hash list - all sctp_t with state == LISTEN. */
52 sctp_tf_t	sctp_listen_fanout[SCTP_LISTEN_FANOUT_SIZE];
53 
54 /* Default association hash size.  The size must be a power of 2. */
55 #define	SCTP_CONN_HASH_SIZE	8192
56 
57 sctp_tf_t	*sctp_conn_fanout;
58 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
59 
60 /*
61  * Cluster networking hook for traversing current assoc list.
62  * This routine is used to extract the current list of live associations
63  * which must continue to to be dispatched to this node.
64  */
65 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
66     boolean_t);
67 
68 void
69 sctp_hash_init()
70 {
71 	int i;
72 
73 	if (sctp_conn_hash_size & (sctp_conn_hash_size - 1)) {
74 		/* Not a power of two. Round up to nearest power of two */
75 		for (i = 0; i < 31; i++) {
76 			if (sctp_conn_hash_size < (1 << i))
77 				break;
78 		}
79 		sctp_conn_hash_size = 1 << i;
80 	}
81 	if (sctp_conn_hash_size < SCTP_CONN_HASH_SIZE) {
82 		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
83 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
84 		    sctp_conn_hash_size);
85 	}
86 	sctp_conn_fanout =
87 		(sctp_tf_t *)kmem_zalloc(sctp_conn_hash_size *
88 		    sizeof (sctp_tf_t),	KM_SLEEP);
89 	for (i = 0; i < sctp_conn_hash_size; i++) {
90 		mutex_init(&sctp_conn_fanout[i].tf_lock, NULL,
91 			    MUTEX_DEFAULT, NULL);
92 	}
93 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
94 		mutex_init(&sctp_listen_fanout[i].tf_lock, NULL,
95 		    MUTEX_DEFAULT, NULL);
96 	}
97 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
98 		mutex_init(&sctp_bind_fanout[i].tf_lock, NULL,
99 		    MUTEX_DEFAULT, NULL);
100 	}
101 }
102 
103 void
104 sctp_hash_destroy()
105 {
106 	int i;
107 
108 	for (i = 0; i < sctp_conn_hash_size; i++) {
109 		mutex_destroy(&sctp_conn_fanout[i].tf_lock);
110 	}
111 	kmem_free(sctp_conn_fanout, sctp_conn_hash_size * sizeof (sctp_tf_t));
112 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
113 		mutex_destroy(&sctp_listen_fanout[i].tf_lock);
114 	}
115 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
116 		mutex_destroy(&sctp_bind_fanout[i].tf_lock);
117 	}
118 }
119 
120 /*
121  * Walk the SCTP global list and refrele the ire for this ipif
122  * This is called when an address goes down, so that we release any reference
123  * to the ire associated with this address. Additionally, for any SCTP if
124  * this was the only/last address in its source list, we don't kill the
125  * assoc., if there is no address added subsequently, or if this does not
126  * come up, then the assoc. will die a natural death (i.e. timeout).
127  */
128 void
129 sctp_ire_cache_flush(ipif_t *ipif)
130 {
131 	sctp_t			*sctp;
132 	sctp_t			*sctp_prev = NULL;
133 	sctp_faddr_t		*fp;
134 	conn_t			*connp;
135 	ire_t			*ire;
136 
137 	sctp = gsctp;
138 	mutex_enter(&sctp_g_lock);
139 	while (sctp != NULL) {
140 		mutex_enter(&sctp->sctp_reflock);
141 		if (sctp->sctp_condemned) {
142 			mutex_exit(&sctp->sctp_reflock);
143 			sctp = list_next(&sctp_g_list, sctp);
144 			continue;
145 		}
146 		sctp->sctp_refcnt++;
147 		mutex_exit(&sctp->sctp_reflock);
148 		mutex_exit(&sctp_g_lock);
149 		if (sctp_prev != NULL)
150 			SCTP_REFRELE(sctp_prev);
151 
152 		RUN_SCTP(sctp);
153 		connp = sctp->sctp_connp;
154 		mutex_enter(&connp->conn_lock);
155 		ire = connp->conn_ire_cache;
156 		if (ire != NULL && ire->ire_ipif == ipif) {
157 			connp->conn_ire_cache = NULL;
158 			mutex_exit(&connp->conn_lock);
159 			IRE_REFRELE_NOTR(ire);
160 		} else {
161 			mutex_exit(&connp->conn_lock);
162 		}
163 		/* check for ires cached in faddr */
164 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
165 			/*
166 			 * If this ipif is being used as the source address
167 			 * we need to update it as well, else we will end
168 			 * up using the dead source address.
169 			 */
170 			ire = fp->ire;
171 			if (ire != NULL && ire->ire_ipif == ipif) {
172 				fp->ire = NULL;
173 				IRE_REFRELE_NOTR(ire);
174 			}
175 			/*
176 			 * This may result in setting the fp as unreachable,
177 			 * i.e. if all the source addresses are down. In
178 			 * that case the assoc. would timeout.
179 			 */
180 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
181 			    &fp->saddr)) {
182 				sctp_set_saddr(sctp, fp);
183 				if (fp == sctp->sctp_current &&
184 				    fp->state != SCTP_FADDRS_UNREACH) {
185 					sctp_set_faddr_current(sctp, fp);
186 				}
187 			}
188 		}
189 		WAKE_SCTP(sctp);
190 		sctp_prev = sctp;
191 		mutex_enter(&sctp_g_lock);
192 		sctp = list_next(&sctp_g_list, sctp);
193 	}
194 	mutex_exit(&sctp_g_lock);
195 	if (sctp_prev != NULL)
196 		SCTP_REFRELE(sctp_prev);
197 }
198 
199 /*
200  * Exported routine for extracting active SCTP associations.
201  * Like TCP, we terminate the walk if the callback returns non-zero.
202  */
203 int
204 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg,
205     boolean_t cansleep)
206 {
207 	sctp_t		*sctp;
208 	sctp_t		*sctp_prev;
209 	cl_sctp_info_t	cl_sctpi;
210 	uchar_t		*slist;
211 	uchar_t		*flist;
212 
213 	sctp = gsctp;
214 	sctp_prev = NULL;
215 	mutex_enter(&sctp_g_lock);
216 	while (sctp != NULL) {
217 		size_t	ssize;
218 		size_t	fsize;
219 
220 		mutex_enter(&sctp->sctp_reflock);
221 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
222 			mutex_exit(&sctp->sctp_reflock);
223 			sctp = list_next(&sctp_g_list, sctp);
224 			continue;
225 		}
226 		sctp->sctp_refcnt++;
227 		mutex_exit(&sctp->sctp_reflock);
228 		mutex_exit(&sctp_g_lock);
229 		if (sctp_prev != NULL)
230 			SCTP_REFRELE(sctp_prev);
231 		RUN_SCTP(sctp);
232 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
233 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
234 
235 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
236 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
237 		if (slist == NULL || flist == NULL) {
238 			WAKE_SCTP(sctp);
239 			if (slist != NULL)
240 				kmem_free(slist, ssize);
241 			if (flist != NULL)
242 				kmem_free(flist, fsize);
243 			SCTP_REFRELE(sctp);
244 			return (1);
245 		}
246 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
247 		sctp_get_saddr_list(sctp, slist, ssize);
248 		sctp_get_faddr_list(sctp, flist, fsize);
249 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
250 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
251 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
252 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
253 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
254 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
255 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
256 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
257 		WAKE_SCTP(sctp);
258 		cl_sctpi.cl_sctpi_laddrp = slist;
259 		cl_sctpi.cl_sctpi_faddrp = flist;
260 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
261 			kmem_free(slist, ssize);
262 			kmem_free(flist, fsize);
263 			SCTP_REFRELE(sctp);
264 			return (1);
265 		}
266 		/* list will be freed by cl_callback */
267 		sctp_prev = sctp;
268 		mutex_enter(&sctp_g_lock);
269 		sctp = list_next(&sctp_g_list, sctp);
270 	}
271 	mutex_exit(&sctp_g_lock);
272 	if (sctp_prev != NULL)
273 		SCTP_REFRELE(sctp_prev);
274 	return (0);
275 }
276 
277 sctp_t *
278 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
279     uint_t ipif_seqid, zoneid_t zoneid)
280 {
281 	sctp_tf_t		*tf;
282 	sctp_t			*sctp;
283 	sctp_faddr_t		*fp;
284 
285 	tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
286 	mutex_enter(&tf->tf_lock);
287 
288 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
289 		if (ports != sctp->sctp_ports ||
290 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
291 			continue;
292 		}
293 
294 		/* check for faddr match */
295 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
296 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
297 				break;
298 			}
299 		}
300 
301 		if (!fp) {
302 			/* no faddr match; keep looking */
303 			continue;
304 		}
305 
306 		/* check for laddr match */
307 		if (ipif_seqid == 0) {
308 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
309 				SCTP_REFHOLD(sctp);
310 				goto done;
311 			}
312 		} else {
313 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
314 				SCTP_REFHOLD(sctp);
315 				goto done;
316 			}
317 		/* no match; continue to the next in the chain */
318 		}
319 	}
320 
321 done:
322 	mutex_exit(&tf->tf_lock);
323 	return (sctp);
324 }
325 
326 static sctp_t *
327 listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid,
328     zoneid_t zoneid)
329 {
330 	sctp_t			*sctp;
331 	sctp_tf_t		*tf;
332 	uint16_t		lport;
333 
334 	lport = ((uint16_t *)&ports)[1];
335 
336 	tf = &(sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
337 	mutex_enter(&tf->tf_lock);
338 
339 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
340 		if (lport != sctp->sctp_lport ||
341 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
342 			continue;
343 		}
344 
345 		if (ipif_seqid == 0) {
346 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
347 				SCTP_REFHOLD(sctp);
348 				goto done;
349 			}
350 		} else {
351 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
352 				SCTP_REFHOLD(sctp);
353 				goto done;
354 			}
355 		}
356 		/* no match; continue to the next in the chain */
357 	}
358 
359 done:
360 	mutex_exit(&tf->tf_lock);
361 	return (sctp);
362 }
363 
364 /* called by ipsec_sctp_pol */
365 conn_t *
366 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
367     uint_t ipif_seqid, zoneid_t zoneid)
368 {
369 	sctp_t *sctp;
370 
371 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
372 	    zoneid)) == NULL) {
373 		/* Not in conn fanout; check listen fanout */
374 		if ((sctp = listen_match(dst, ports, ipif_seqid,
375 		    zoneid)) == NULL) {
376 			return (NULL);
377 		}
378 	}
379 	return (sctp->sctp_connp);
380 }
381 
382 conn_t *
383 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
384     uint_t ipif_seqid, zoneid_t zoneid, mblk_t *mp)
385 {
386 	sctp_t *sctp;
387 	boolean_t shared_addr;
388 
389 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
390 	    zoneid)) == NULL) {
391 		shared_addr = (zoneid == ALL_ZONES);
392 		if (shared_addr) {
393 			zoneid = tsol_mlp_findzone(IPPROTO_SCTP,
394 			    htons(ntohl(ports) & 0xFFFF));
395 			/*
396 			 * If no shared MLP is found, tsol_mlp_findzone returns
397 			 * ALL_ZONES.  In that case, we assume it's SLP, and
398 			 * search for the zone based on the packet label.
399 			 * That will also return ALL_ZONES on failure.
400 			 */
401 			if (zoneid == ALL_ZONES)
402 				zoneid = tsol_packet_to_zoneid(mp);
403 			if (zoneid == ALL_ZONES)
404 				return (NULL);
405 		}
406 		/* Not in conn fanout; check listen fanout */
407 		if ((sctp = listen_match(dst, ports, ipif_seqid,
408 		    zoneid)) == NULL) {
409 			return (NULL);
410 		}
411 		/*
412 		 * On systems running trusted extensions, check if dst
413 		 * should accept the packet. "IPV6_VERSION" indicates
414 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
415 		 * IPv6 addresses are supported.
416 		 */
417 		if (is_system_labeled() &&
418 		    !tsol_receive_local(mp, dst, IPV6_VERSION,
419 		    shared_addr, sctp->sctp_connp)) {
420 			DTRACE_PROBE3(
421 			    tx__ip__log__info__classify__sctp,
422 			    char *,
423 			    "connp(1) could not receive mp(2)",
424 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
425 			SCTP_REFRELE(sctp);
426 			return (NULL);
427 		}
428 	}
429 	return (sctp->sctp_connp);
430 }
431 
432 /*
433  * Fanout for SCTP packets
434  * The caller puts <fport, lport> in the ports parameter.
435  */
436 /* ARGSUSED */
437 void
438 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
439     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
440     uint_t ipif_seqid, zoneid_t zoneid)
441 {
442 	sctp_t *sctp;
443 	boolean_t isv4;
444 	conn_t *connp;
445 	mblk_t *first_mp;
446 	ip6_t *ip6h;
447 	in6_addr_t map_src, map_dst;
448 	in6_addr_t *src, *dst;
449 
450 	first_mp = mp;
451 	if (mctl_present) {
452 		mp = first_mp->b_cont;
453 		ASSERT(mp != NULL);
454 	}
455 
456 	/* Assume IP provides aligned packets - otherwise toss */
457 	if (!OK_32PTR(mp->b_rptr)) {
458 		BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
459 		freemsg(first_mp);
460 		return;
461 	}
462 
463 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
464 		ip6h = (ip6_t *)ipha;
465 		src = &ip6h->ip6_src;
466 		dst = &ip6h->ip6_dst;
467 		isv4 = B_FALSE;
468 	} else {
469 		ip6h = NULL;
470 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
471 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
472 		src = &map_src;
473 		dst = &map_dst;
474 		isv4 = B_TRUE;
475 	}
476 	if ((connp = sctp_fanout(src, dst, ports, ipif_seqid, zoneid, mp)) ==
477 	    NULL) {
478 		ip_fanout_sctp_raw(first_mp, recv_ill, ipha, isv4,
479 		    ports, mctl_present, flags, ip_policy,
480 		    ipif_seqid, zoneid);
481 		return;
482 	}
483 	sctp = CONN2SCTP(connp);
484 
485 	/* Found a client; up it goes */
486 	BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsHCInDelivers);
487 
488 	/*
489 	 * We check some fields in conn_t without holding a lock.
490 	 * This should be fine.
491 	 */
492 	if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) {
493 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
494 		    ipha, NULL, mctl_present);
495 		if (first_mp == NULL) {
496 			SCTP_REFRELE(sctp);
497 			return;
498 		}
499 	}
500 
501 	/* Initiate IPPF processing for fastpath */
502 	if (IPP_ENABLED(IPP_LOCAL_IN)) {
503 		ip_process(IPP_LOCAL_IN, &mp,
504 		    recv_ill->ill_phyint->phyint_ifindex);
505 		if (mp == NULL) {
506 			SCTP_REFRELE(sctp);
507 			if (mctl_present)
508 				freeb(first_mp);
509 			return;
510 		} else if (mctl_present) {
511 			/*
512 			 * ip_process might return a new mp.
513 			 */
514 			ASSERT(first_mp != mp);
515 			first_mp->b_cont = mp;
516 		} else {
517 			first_mp = mp;
518 		}
519 	}
520 
521 	if (connp->conn_recvif || connp->conn_recvslla ||
522 	    connp->conn_ipv6_recvpktinfo) {
523 		int in_flags = 0;
524 
525 		if (connp->conn_recvif || connp->conn_ipv6_recvpktinfo) {
526 			in_flags = IPF_RECVIF;
527 		}
528 		if (connp->conn_recvslla) {
529 			in_flags |= IPF_RECVSLLA;
530 		}
531 		if (isv4) {
532 			mp = ip_add_info(mp, recv_ill, in_flags);
533 		} else {
534 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
535 		}
536 		if (mp == NULL) {
537 			SCTP_REFRELE(sctp);
538 			if (mctl_present)
539 				freeb(first_mp);
540 			return;
541 		} else if (mctl_present) {
542 			/*
543 			 * ip_add_info might return a new mp.
544 			 */
545 			ASSERT(first_mp != mp);
546 			first_mp->b_cont = mp;
547 		} else {
548 			first_mp = mp;
549 		}
550 	}
551 
552 	mutex_enter(&sctp->sctp_lock);
553 	if (sctp->sctp_running) {
554 		if (mctl_present)
555 			mp->b_prev = first_mp;
556 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
557 			BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
558 			freemsg(first_mp);
559 		}
560 		mutex_exit(&sctp->sctp_lock);
561 	} else {
562 		sctp->sctp_running = B_TRUE;
563 		mutex_exit(&sctp->sctp_lock);
564 
565 		mutex_enter(&sctp->sctp_recvq_lock);
566 		if (sctp->sctp_recvq != NULL) {
567 			if (mctl_present)
568 				mp->b_prev = first_mp;
569 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
570 				BUMP_MIB(recv_ill->ill_ip_mib,
571 				    ipIfStatsInDiscards);
572 				freemsg(first_mp);
573 			}
574 			mutex_exit(&sctp->sctp_recvq_lock);
575 			WAKE_SCTP(sctp);
576 		} else {
577 			mutex_exit(&sctp->sctp_recvq_lock);
578 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
579 			    NULL));
580 			WAKE_SCTP(sctp);
581 			sctp_process_sendq(sctp);
582 		}
583 	}
584 	SCTP_REFRELE(sctp);
585 }
586 
587 void
588 sctp_conn_hash_remove(sctp_t *sctp)
589 {
590 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
591 
592 	if (!tf) {
593 		return;
594 	}
595 	/*
596 	 * On a clustered note send this notification to the clustering
597 	 * subsystem.
598 	 */
599 	if (cl_sctp_disconnect != NULL) {
600 		(*cl_sctp_disconnect)(sctp->sctp_family,
601 		    (cl_sctp_handle_t)sctp);
602 	}
603 
604 	mutex_enter(&tf->tf_lock);
605 	ASSERT(tf->tf_sctp);
606 	if (tf->tf_sctp == sctp) {
607 		tf->tf_sctp = sctp->sctp_conn_hash_next;
608 		if (sctp->sctp_conn_hash_next) {
609 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
610 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
611 		}
612 	} else {
613 		ASSERT(sctp->sctp_conn_hash_prev);
614 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
615 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
616 		    sctp->sctp_conn_hash_next;
617 
618 		if (sctp->sctp_conn_hash_next) {
619 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
620 			    == sctp);
621 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
622 			    sctp->sctp_conn_hash_prev;
623 		}
624 	}
625 	sctp->sctp_conn_hash_next = NULL;
626 	sctp->sctp_conn_hash_prev = NULL;
627 	sctp->sctp_conn_tfp = NULL;
628 	mutex_exit(&tf->tf_lock);
629 }
630 
631 void
632 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
633 {
634 	if (sctp->sctp_conn_tfp) {
635 		sctp_conn_hash_remove(sctp);
636 	}
637 
638 	if (!caller_holds_lock) {
639 		mutex_enter(&tf->tf_lock);
640 	} else {
641 		ASSERT(MUTEX_HELD(&tf->tf_lock));
642 	}
643 
644 	sctp->sctp_conn_hash_next = tf->tf_sctp;
645 	if (tf->tf_sctp) {
646 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
647 	}
648 	sctp->sctp_conn_hash_prev = NULL;
649 	tf->tf_sctp = sctp;
650 	sctp->sctp_conn_tfp = tf;
651 	if (!caller_holds_lock) {
652 		mutex_exit(&tf->tf_lock);
653 	}
654 }
655 
656 void
657 sctp_listen_hash_remove(sctp_t *sctp)
658 {
659 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
660 
661 	if (!tf) {
662 		return;
663 	}
664 	/*
665 	 * On a clustered note send this notification to the clustering
666 	 * subsystem.
667 	 */
668 	if (cl_sctp_unlisten != NULL) {
669 		uchar_t	*slist;
670 		ssize_t	ssize;
671 
672 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
673 		slist = kmem_alloc(ssize, KM_SLEEP);
674 		sctp_get_saddr_list(sctp, slist, ssize);
675 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
676 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
677 		/* list will be freed by the clustering module */
678 	}
679 
680 	mutex_enter(&tf->tf_lock);
681 	ASSERT(tf->tf_sctp);
682 	if (tf->tf_sctp == sctp) {
683 		tf->tf_sctp = sctp->sctp_listen_hash_next;
684 		if (sctp->sctp_listen_hash_next) {
685 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
686 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
687 		}
688 	} else {
689 		ASSERT(sctp->sctp_listen_hash_prev);
690 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
691 		    sctp);
692 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
693 		    sctp->sctp_listen_hash_next;
694 
695 		if (sctp->sctp_listen_hash_next) {
696 			ASSERT(
697 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
698 			    sctp);
699 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
700 			    sctp->sctp_listen_hash_prev;
701 		}
702 	}
703 	sctp->sctp_listen_hash_next = NULL;
704 	sctp->sctp_listen_hash_prev = NULL;
705 	sctp->sctp_listen_tfp = NULL;
706 	mutex_exit(&tf->tf_lock);
707 }
708 
709 void
710 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
711 {
712 	if (sctp->sctp_listen_tfp) {
713 		sctp_listen_hash_remove(sctp);
714 	}
715 
716 	mutex_enter(&tf->tf_lock);
717 	sctp->sctp_listen_hash_next = tf->tf_sctp;
718 	if (tf->tf_sctp) {
719 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
720 	}
721 	sctp->sctp_listen_hash_prev = NULL;
722 	tf->tf_sctp = sctp;
723 	sctp->sctp_listen_tfp = tf;
724 	mutex_exit(&tf->tf_lock);
725 	/*
726 	 * On a clustered note send this notification to the clustering
727 	 * subsystem.
728 	 */
729 	if (cl_sctp_listen != NULL) {
730 		uchar_t	*slist;
731 		ssize_t	ssize;
732 
733 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
734 		slist = kmem_alloc(ssize, KM_SLEEP);
735 		sctp_get_saddr_list(sctp, slist, ssize);
736 		(*cl_sctp_listen)(sctp->sctp_family, slist,
737 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
738 		/* list will be freed by the clustering module */
739 	}
740 }
741 
742 /*
743  * Hash list insertion routine for sctp_t structures.
744  * Inserts entries with the ones bound to a specific IP address first
745  * followed by those bound to INADDR_ANY.
746  */
747 void
748 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
749 {
750 	sctp_t	**sctpp;
751 	sctp_t	*sctpnext;
752 
753 	if (sctp->sctp_ptpbhn != NULL) {
754 		ASSERT(!caller_holds_lock);
755 		sctp_bind_hash_remove(sctp);
756 	}
757 	sctpp = &tbf->tf_sctp;
758 	if (!caller_holds_lock) {
759 		mutex_enter(&tbf->tf_lock);
760 	} else {
761 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
762 	}
763 	sctpnext = sctpp[0];
764 	if (sctpnext) {
765 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
766 	}
767 	sctp->sctp_bind_hash = sctpnext;
768 	sctp->sctp_ptpbhn = sctpp;
769 	sctpp[0] = sctp;
770 	/* For sctp_*_hash_remove */
771 	sctp->sctp_bind_lockp = &tbf->tf_lock;
772 	if (!caller_holds_lock)
773 		mutex_exit(&tbf->tf_lock);
774 }
775 
776 /*
777  * Hash list removal routine for sctp_t structures.
778  */
779 void
780 sctp_bind_hash_remove(sctp_t *sctp)
781 {
782 	sctp_t	*sctpnext;
783 	kmutex_t *lockp;
784 
785 	lockp = sctp->sctp_bind_lockp;
786 
787 	if (sctp->sctp_ptpbhn == NULL)
788 		return;
789 
790 	ASSERT(lockp != NULL);
791 	mutex_enter(lockp);
792 	if (sctp->sctp_ptpbhn) {
793 		sctpnext = sctp->sctp_bind_hash;
794 		if (sctpnext) {
795 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
796 			sctp->sctp_bind_hash = NULL;
797 		}
798 		*sctp->sctp_ptpbhn = sctpnext;
799 		sctp->sctp_ptpbhn = NULL;
800 	}
801 	mutex_exit(lockp);
802 	sctp->sctp_bind_lockp = NULL;
803 }
804 
805 /*
806  * Similar to but more general than ip_sctp's conn_match().
807  *
808  * Matches sets of addresses as follows: if the argument addr set is
809  * a complete subset of the corresponding addr set in the sctp_t, it
810  * is a match.
811  *
812  * Caller must hold tf->tf_lock.
813  *
814  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
815  */
816 sctp_t *
817 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
818     int min_state)
819 {
820 
821 	sctp_t *sctp;
822 	sctp_faddr_t *fp;
823 
824 	ASSERT(MUTEX_HELD(&tf->tf_lock));
825 
826 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
827 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
828 		    min_state) {
829 			continue;
830 		}
831 
832 		/* check for faddr match */
833 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
834 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
835 				break;
836 			}
837 		}
838 
839 		if (!fp) {
840 			/* no faddr match; keep looking */
841 			continue;
842 		}
843 
844 		/* check for laddr subset match */
845 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
846 			goto done;
847 		}
848 
849 		/* no match; continue searching */
850 	}
851 
852 done:
853 	if (sctp) {
854 		SCTP_REFHOLD(sctp);
855 	}
856 	return (sctp);
857 }
858 
859 boolean_t
860 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
861 {
862 	uint16_t lport;
863 
864 	if (connp->conn_fully_bound) {
865 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
866 		    ipha->ipha_dst, ports));
867 	} else {
868 		lport = htons(ntohl(ports) & 0xFFFF);
869 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
870 		    lport));
871 	}
872 }
873 
874 boolean_t
875 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
876     boolean_t for_v4)
877 {
878 	uint16_t lport;
879 	in6_addr_t	v6dst;
880 
881 	if (!for_v4 && connp->conn_fully_bound) {
882 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
883 		    ip6h->ip6_dst, ports));
884 	} else {
885 		lport = htons(ntohl(ports) & 0xFFFF);
886 		if (for_v4)
887 			v6dst = ipv6_all_zeros;
888 		else
889 			v6dst = ip6h->ip6_dst;
890 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
891 	}
892 }
893