xref: /illumos-gate/usr/src/uts/common/inet/sctp/sctp_hash.c (revision 60a3f738d56f92ae8b80e4b62a2331c6e1f2311f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/socket.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/tsol/tndb.h>
33 #include <sys/tsol/tnet.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ipclassifier.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/ipp_common.h>
44 #include <inet/sctp_ip.h>
45 
46 #include "sctp_impl.h"
47 #include "sctp_addr.h"
48 
49 /* SCTP bind hash list - all sctp_t with state >= BOUND. */
50 sctp_tf_t	sctp_bind_fanout[SCTP_BIND_FANOUT_SIZE];
51 /* SCTP listen hash list - all sctp_t with state == LISTEN. */
52 sctp_tf_t	sctp_listen_fanout[SCTP_LISTEN_FANOUT_SIZE];
53 
54 /* Default association hash size.  The size must be a power of 2. */
55 #define	SCTP_CONN_HASH_SIZE	8192
56 
57 sctp_tf_t	*sctp_conn_fanout;
58 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
59 
60 /*
61  * Cluster networking hook for traversing current assoc list.
62  * This routine is used to extract the current list of live associations
63  * which must continue to to be dispatched to this node.
64  */
65 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
66     boolean_t);
67 
68 void
69 sctp_hash_init()
70 {
71 	int i;
72 
73 	if (sctp_conn_hash_size & (sctp_conn_hash_size - 1)) {
74 		/* Not a power of two. Round up to nearest power of two */
75 		for (i = 0; i < 31; i++) {
76 			if (sctp_conn_hash_size < (1 << i))
77 				break;
78 		}
79 		sctp_conn_hash_size = 1 << i;
80 	}
81 	if (sctp_conn_hash_size < SCTP_CONN_HASH_SIZE) {
82 		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
83 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
84 		    sctp_conn_hash_size);
85 	}
86 	sctp_conn_fanout =
87 		(sctp_tf_t *)kmem_zalloc(sctp_conn_hash_size *
88 		    sizeof (sctp_tf_t),	KM_SLEEP);
89 	for (i = 0; i < sctp_conn_hash_size; i++) {
90 		mutex_init(&sctp_conn_fanout[i].tf_lock, NULL,
91 			    MUTEX_DEFAULT, NULL);
92 	}
93 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
94 		mutex_init(&sctp_listen_fanout[i].tf_lock, NULL,
95 		    MUTEX_DEFAULT, NULL);
96 	}
97 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
98 		mutex_init(&sctp_bind_fanout[i].tf_lock, NULL,
99 		    MUTEX_DEFAULT, NULL);
100 	}
101 }
102 
103 void
104 sctp_hash_destroy()
105 {
106 	int i;
107 
108 	for (i = 0; i < sctp_conn_hash_size; i++) {
109 		mutex_destroy(&sctp_conn_fanout[i].tf_lock);
110 	}
111 	kmem_free(sctp_conn_fanout, sctp_conn_hash_size * sizeof (sctp_tf_t));
112 	for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
113 		mutex_destroy(&sctp_listen_fanout[i].tf_lock);
114 	}
115 	for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
116 		mutex_destroy(&sctp_bind_fanout[i].tf_lock);
117 	}
118 }
119 
120 /*
121  * Walk the SCTP global list and refrele the ire for this ipif
122  * This is called when an address goes down, so that we release any reference
123  * to the ire associated with this address. Additionally, for any SCTP if
124  * this was the only/last address in its source list, we don't kill the
125  * assoc., if there is no address added subsequently, or if this does not
126  * come up, then the assoc. will die a natural death (i.e. timeout).
127  */
128 void
129 sctp_ire_cache_flush(ipif_t *ipif)
130 {
131 	sctp_t			*sctp;
132 	sctp_t			*sctp_prev = NULL;
133 	sctp_faddr_t		*fp;
134 	conn_t			*connp;
135 	ire_t			*ire;
136 
137 	sctp = gsctp;
138 	mutex_enter(&sctp_g_lock);
139 	while (sctp != NULL) {
140 		mutex_enter(&sctp->sctp_reflock);
141 		if (sctp->sctp_condemned) {
142 			mutex_exit(&sctp->sctp_reflock);
143 			sctp = list_next(&sctp_g_list, sctp);
144 			continue;
145 		}
146 		sctp->sctp_refcnt++;
147 		mutex_exit(&sctp->sctp_reflock);
148 		mutex_exit(&sctp_g_lock);
149 		if (sctp_prev != NULL)
150 			SCTP_REFRELE(sctp_prev);
151 
152 		RUN_SCTP(sctp);
153 		connp = sctp->sctp_connp;
154 		mutex_enter(&connp->conn_lock);
155 		ire = connp->conn_ire_cache;
156 		if (ire != NULL && ire->ire_ipif == ipif) {
157 			connp->conn_ire_cache = NULL;
158 			mutex_exit(&connp->conn_lock);
159 			IRE_REFRELE_NOTR(ire);
160 		} else {
161 			mutex_exit(&connp->conn_lock);
162 		}
163 		/* check for ires cached in faddr */
164 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
165 			/*
166 			 * If this ipif is being used as the source address
167 			 * we need to update it as well, else we will end
168 			 * up using the dead source address.
169 			 */
170 			ire = fp->ire;
171 			if (ire != NULL && ire->ire_ipif == ipif) {
172 				fp->ire = NULL;
173 				IRE_REFRELE_NOTR(ire);
174 			}
175 			/*
176 			 * This may result in setting the fp as unreachable,
177 			 * i.e. if all the source addresses are down. In
178 			 * that case the assoc. would timeout.
179 			 */
180 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
181 			    &fp->saddr)) {
182 				sctp_set_saddr(sctp, fp);
183 				if (fp == sctp->sctp_current &&
184 				    fp->state != SCTP_FADDRS_UNREACH) {
185 					sctp_set_faddr_current(sctp, fp);
186 				}
187 			}
188 		}
189 		WAKE_SCTP(sctp);
190 		sctp_prev = sctp;
191 		mutex_enter(&sctp_g_lock);
192 		sctp = list_next(&sctp_g_list, sctp);
193 	}
194 	mutex_exit(&sctp_g_lock);
195 	if (sctp_prev != NULL)
196 		SCTP_REFRELE(sctp_prev);
197 }
198 
199 /*
200  * Exported routine for extracting active SCTP associations.
201  * Like TCP, we terminate the walk if the callback returns non-zero.
202  */
203 int
204 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg,
205     boolean_t cansleep)
206 {
207 	sctp_t		*sctp;
208 	sctp_t		*sctp_prev;
209 	cl_sctp_info_t	cl_sctpi;
210 	uchar_t		*slist;
211 	uchar_t		*flist;
212 
213 	sctp = gsctp;
214 	sctp_prev = NULL;
215 	mutex_enter(&sctp_g_lock);
216 	while (sctp != NULL) {
217 		size_t	ssize;
218 		size_t	fsize;
219 
220 		mutex_enter(&sctp->sctp_reflock);
221 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
222 			mutex_exit(&sctp->sctp_reflock);
223 			sctp = list_next(&sctp_g_list, sctp);
224 			continue;
225 		}
226 		sctp->sctp_refcnt++;
227 		mutex_exit(&sctp->sctp_reflock);
228 		mutex_exit(&sctp_g_lock);
229 		if (sctp_prev != NULL)
230 			SCTP_REFRELE(sctp_prev);
231 		RUN_SCTP(sctp);
232 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
233 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
234 
235 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
236 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
237 		if (slist == NULL || flist == NULL) {
238 			WAKE_SCTP(sctp);
239 			if (slist != NULL)
240 				kmem_free(slist, ssize);
241 			if (flist != NULL)
242 				kmem_free(flist, fsize);
243 			SCTP_REFRELE(sctp);
244 			return (1);
245 		}
246 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
247 		sctp_get_saddr_list(sctp, slist, ssize);
248 		sctp_get_faddr_list(sctp, flist, fsize);
249 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
250 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
251 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
252 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
253 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
254 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
255 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
256 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
257 		WAKE_SCTP(sctp);
258 		cl_sctpi.cl_sctpi_laddrp = slist;
259 		cl_sctpi.cl_sctpi_faddrp = flist;
260 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
261 			kmem_free(slist, ssize);
262 			kmem_free(flist, fsize);
263 			SCTP_REFRELE(sctp);
264 			return (1);
265 		}
266 		/* list will be freed by cl_callback */
267 		sctp_prev = sctp;
268 		mutex_enter(&sctp_g_lock);
269 		sctp = list_next(&sctp_g_list, sctp);
270 	}
271 	mutex_exit(&sctp_g_lock);
272 	if (sctp_prev != NULL)
273 		SCTP_REFRELE(sctp_prev);
274 	return (0);
275 }
276 
277 sctp_t *
278 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
279     uint_t ipif_seqid, zoneid_t zoneid)
280 {
281 	sctp_tf_t		*tf;
282 	sctp_t			*sctp;
283 	sctp_faddr_t		*fp;
284 
285 	tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
286 	mutex_enter(&tf->tf_lock);
287 
288 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
289 		if (ports != sctp->sctp_ports ||
290 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
291 			continue;
292 		}
293 
294 		/* check for faddr match */
295 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
296 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
297 				break;
298 			}
299 		}
300 
301 		if (!fp) {
302 			/* no faddr match; keep looking */
303 			continue;
304 		}
305 
306 		/* check for laddr match */
307 		if (ipif_seqid == 0) {
308 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
309 				SCTP_REFHOLD(sctp);
310 				goto done;
311 			}
312 		} else {
313 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
314 				SCTP_REFHOLD(sctp);
315 				goto done;
316 			}
317 		/* no match; continue to the next in the chain */
318 		}
319 	}
320 
321 done:
322 	mutex_exit(&tf->tf_lock);
323 	return (sctp);
324 }
325 
326 static sctp_t *
327 listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid,
328     zoneid_t zoneid)
329 {
330 	sctp_t			*sctp;
331 	sctp_tf_t		*tf;
332 	uint16_t		lport;
333 
334 	lport = ((uint16_t *)&ports)[1];
335 
336 	tf = &(sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
337 	mutex_enter(&tf->tf_lock);
338 
339 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
340 		if (lport != sctp->sctp_lport ||
341 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
342 			continue;
343 		}
344 
345 		if (ipif_seqid == 0) {
346 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
347 				SCTP_REFHOLD(sctp);
348 				goto done;
349 			}
350 		} else {
351 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
352 				SCTP_REFHOLD(sctp);
353 				goto done;
354 			}
355 		}
356 		/* no match; continue to the next in the chain */
357 	}
358 
359 done:
360 	mutex_exit(&tf->tf_lock);
361 	return (sctp);
362 }
363 
364 /* called by ipsec_sctp_pol */
365 conn_t *
366 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
367     uint_t ipif_seqid, zoneid_t zoneid)
368 {
369 	sctp_t *sctp;
370 
371 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
372 	    zoneid)) == NULL) {
373 		/* Not in conn fanout; check listen fanout */
374 		if ((sctp = listen_match(dst, ports, ipif_seqid,
375 		    zoneid)) == NULL) {
376 			return (NULL);
377 		}
378 	}
379 	return (sctp->sctp_connp);
380 }
381 
382 conn_t *
383 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
384     uint_t ipif_seqid, zoneid_t zoneid, mblk_t *mp)
385 {
386 	sctp_t *sctp;
387 	boolean_t shared_addr;
388 
389 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
390 	    zoneid)) == NULL) {
391 		shared_addr = (zoneid == ALL_ZONES);
392 		if (shared_addr) {
393 			zoneid = tsol_mlp_findzone(IPPROTO_SCTP,
394 			    htons(ntohl(ports) & 0xFFFF));
395 			/*
396 			 * If no shared MLP is found, tsol_mlp_findzone returns
397 			 * ALL_ZONES.  In that case, we assume it's SLP, and
398 			 * search for the zone based on the packet label.
399 			 * That will also return ALL_ZONES on failure.
400 			 */
401 			if (zoneid == ALL_ZONES)
402 				zoneid = tsol_packet_to_zoneid(mp);
403 			if (zoneid == ALL_ZONES)
404 				return (NULL);
405 		}
406 		/* Not in conn fanout; check listen fanout */
407 		if ((sctp = listen_match(dst, ports, ipif_seqid,
408 		    zoneid)) == NULL) {
409 			return (NULL);
410 		}
411 		/*
412 		 * On systems running trusted extensions, check if dst
413 		 * should accept the packet. "IPV6_VERSION" indicates
414 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
415 		 * IPv6 addresses are supported.
416 		 */
417 		if (is_system_labeled() &&
418 		    !tsol_receive_local(mp, dst, IPV6_VERSION,
419 		    shared_addr, sctp->sctp_connp)) {
420 			DTRACE_PROBE3(
421 			    tx__ip__log__info__classify__sctp,
422 			    char *,
423 			    "connp(1) could not receive mp(2)",
424 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
425 			SCTP_REFRELE(sctp);
426 			return (NULL);
427 		}
428 	}
429 	return (sctp->sctp_connp);
430 }
431 
432 /*
433  * Fanout for SCTP packets
434  * The caller puts <fport, lport> in the ports parameter.
435  */
436 /* ARGSUSED */
437 void
438 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
439     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
440     uint_t ipif_seqid, zoneid_t zoneid)
441 {
442 	sctp_t *sctp;
443 	boolean_t isv4;
444 	conn_t *connp;
445 	mblk_t *first_mp;
446 	ip6_t *ip6h;
447 	in6_addr_t map_src, map_dst;
448 	in6_addr_t *src, *dst;
449 
450 	first_mp = mp;
451 	if (mctl_present) {
452 		mp = first_mp->b_cont;
453 		ASSERT(mp != NULL);
454 	}
455 
456 	/* Assume IP provides aligned packets - otherwise toss */
457 	if (!OK_32PTR(mp->b_rptr)) {
458 		BUMP_MIB(&ip_mib, ipInDiscards);
459 		freemsg(first_mp);
460 		return;
461 	}
462 
463 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
464 		ip6h = (ip6_t *)ipha;
465 		src = &ip6h->ip6_src;
466 		dst = &ip6h->ip6_dst;
467 		isv4 = B_FALSE;
468 	} else {
469 		ip6h = NULL;
470 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
471 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
472 		src = &map_src;
473 		dst = &map_dst;
474 		isv4 = B_TRUE;
475 	}
476 	if ((connp = sctp_fanout(src, dst, ports, ipif_seqid, zoneid, mp)) ==
477 	    NULL) {
478 		ip_fanout_sctp_raw(first_mp, recv_ill, ipha, isv4,
479 		    ports, mctl_present, flags, ip_policy,
480 		    ipif_seqid, zoneid);
481 		return;
482 	}
483 	sctp = CONN2SCTP(connp);
484 
485 	/* Found a client; up it goes */
486 	BUMP_MIB(&ip_mib, ipInDelivers);
487 
488 	/*
489 	 * We check some fields in conn_t without holding a lock.
490 	 * This should be fine.
491 	 */
492 	if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) {
493 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
494 		    ipha, NULL, mctl_present);
495 		if (first_mp == NULL) {
496 			SCTP_REFRELE(sctp);
497 			return;
498 		}
499 	}
500 
501 	/* Initiate IPPF processing for fastpath */
502 	if (IPP_ENABLED(IPP_LOCAL_IN)) {
503 		ip_process(IPP_LOCAL_IN, &mp,
504 		    recv_ill->ill_phyint->phyint_ifindex);
505 		if (mp == NULL) {
506 			SCTP_REFRELE(sctp);
507 			if (mctl_present)
508 				freeb(first_mp);
509 			return;
510 		} else if (mctl_present) {
511 			/*
512 			 * ip_process might return a new mp.
513 			 */
514 			ASSERT(first_mp != mp);
515 			first_mp->b_cont = mp;
516 		} else {
517 			first_mp = mp;
518 		}
519 	}
520 
521 	if (connp->conn_recvif || connp->conn_recvslla ||
522 	    connp->conn_ipv6_recvpktinfo) {
523 		int in_flags = 0;
524 
525 		if (connp->conn_recvif || connp->conn_ipv6_recvpktinfo) {
526 			in_flags = IPF_RECVIF;
527 		}
528 		if (connp->conn_recvslla) {
529 			in_flags |= IPF_RECVSLLA;
530 		}
531 		if (isv4) {
532 			mp = ip_add_info(mp, recv_ill, in_flags);
533 		} else {
534 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
535 		}
536 		if (mp == NULL) {
537 			SCTP_REFRELE(sctp);
538 			if (mctl_present)
539 				freeb(first_mp);
540 			return;
541 		} else if (mctl_present) {
542 			/*
543 			 * ip_add_info might return a new mp.
544 			 */
545 			ASSERT(first_mp != mp);
546 			first_mp->b_cont = mp;
547 		} else {
548 			first_mp = mp;
549 		}
550 	}
551 
552 	mutex_enter(&sctp->sctp_lock);
553 	if (sctp->sctp_running) {
554 		if (mctl_present)
555 			mp->b_prev = first_mp;
556 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
557 			BUMP_MIB(&ip_mib, ipInDiscards);
558 			freemsg(first_mp);
559 		}
560 		mutex_exit(&sctp->sctp_lock);
561 	} else {
562 		sctp->sctp_running = B_TRUE;
563 		mutex_exit(&sctp->sctp_lock);
564 
565 		mutex_enter(&sctp->sctp_recvq_lock);
566 		if (sctp->sctp_recvq != NULL) {
567 			if (mctl_present)
568 				mp->b_prev = first_mp;
569 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
570 				BUMP_MIB(&ip_mib, ipInDiscards);
571 				freemsg(first_mp);
572 			}
573 			mutex_exit(&sctp->sctp_recvq_lock);
574 			WAKE_SCTP(sctp);
575 		} else {
576 			mutex_exit(&sctp->sctp_recvq_lock);
577 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
578 			    NULL));
579 			WAKE_SCTP(sctp);
580 			sctp_process_sendq(sctp);
581 		}
582 	}
583 	SCTP_REFRELE(sctp);
584 }
585 
586 void
587 sctp_conn_hash_remove(sctp_t *sctp)
588 {
589 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
590 
591 	if (!tf) {
592 		return;
593 	}
594 	/*
595 	 * On a clustered note send this notification to the clustering
596 	 * subsystem.
597 	 */
598 	if (cl_sctp_disconnect != NULL) {
599 		(*cl_sctp_disconnect)(sctp->sctp_family,
600 		    (cl_sctp_handle_t)sctp);
601 	}
602 
603 	mutex_enter(&tf->tf_lock);
604 	ASSERT(tf->tf_sctp);
605 	if (tf->tf_sctp == sctp) {
606 		tf->tf_sctp = sctp->sctp_conn_hash_next;
607 		if (sctp->sctp_conn_hash_next) {
608 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
609 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
610 		}
611 	} else {
612 		ASSERT(sctp->sctp_conn_hash_prev);
613 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
614 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
615 		    sctp->sctp_conn_hash_next;
616 
617 		if (sctp->sctp_conn_hash_next) {
618 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
619 			    == sctp);
620 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
621 			    sctp->sctp_conn_hash_prev;
622 		}
623 	}
624 	sctp->sctp_conn_hash_next = NULL;
625 	sctp->sctp_conn_hash_prev = NULL;
626 	sctp->sctp_conn_tfp = NULL;
627 	mutex_exit(&tf->tf_lock);
628 }
629 
630 void
631 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
632 {
633 	if (sctp->sctp_conn_tfp) {
634 		sctp_conn_hash_remove(sctp);
635 	}
636 
637 	if (!caller_holds_lock) {
638 		mutex_enter(&tf->tf_lock);
639 	} else {
640 		ASSERT(MUTEX_HELD(&tf->tf_lock));
641 	}
642 
643 	sctp->sctp_conn_hash_next = tf->tf_sctp;
644 	if (tf->tf_sctp) {
645 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
646 	}
647 	sctp->sctp_conn_hash_prev = NULL;
648 	tf->tf_sctp = sctp;
649 	sctp->sctp_conn_tfp = tf;
650 	if (!caller_holds_lock) {
651 		mutex_exit(&tf->tf_lock);
652 	}
653 }
654 
655 void
656 sctp_listen_hash_remove(sctp_t *sctp)
657 {
658 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
659 
660 	if (!tf) {
661 		return;
662 	}
663 	/*
664 	 * On a clustered note send this notification to the clustering
665 	 * subsystem.
666 	 */
667 	if (cl_sctp_unlisten != NULL) {
668 		uchar_t	*slist;
669 		ssize_t	ssize;
670 
671 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
672 		slist = kmem_alloc(ssize, KM_SLEEP);
673 		sctp_get_saddr_list(sctp, slist, ssize);
674 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
675 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
676 		/* list will be freed by the clustering module */
677 	}
678 
679 	mutex_enter(&tf->tf_lock);
680 	ASSERT(tf->tf_sctp);
681 	if (tf->tf_sctp == sctp) {
682 		tf->tf_sctp = sctp->sctp_listen_hash_next;
683 		if (sctp->sctp_listen_hash_next) {
684 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
685 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
686 		}
687 	} else {
688 		ASSERT(sctp->sctp_listen_hash_prev);
689 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
690 		    sctp);
691 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
692 		    sctp->sctp_listen_hash_next;
693 
694 		if (sctp->sctp_listen_hash_next) {
695 			ASSERT(
696 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
697 			    sctp);
698 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
699 			    sctp->sctp_listen_hash_prev;
700 		}
701 	}
702 	sctp->sctp_listen_hash_next = NULL;
703 	sctp->sctp_listen_hash_prev = NULL;
704 	sctp->sctp_listen_tfp = NULL;
705 	mutex_exit(&tf->tf_lock);
706 }
707 
708 void
709 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
710 {
711 	if (sctp->sctp_listen_tfp) {
712 		sctp_listen_hash_remove(sctp);
713 	}
714 
715 	mutex_enter(&tf->tf_lock);
716 	sctp->sctp_listen_hash_next = tf->tf_sctp;
717 	if (tf->tf_sctp) {
718 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
719 	}
720 	sctp->sctp_listen_hash_prev = NULL;
721 	tf->tf_sctp = sctp;
722 	sctp->sctp_listen_tfp = tf;
723 	mutex_exit(&tf->tf_lock);
724 	/*
725 	 * On a clustered note send this notification to the clustering
726 	 * subsystem.
727 	 */
728 	if (cl_sctp_listen != NULL) {
729 		uchar_t	*slist;
730 		ssize_t	ssize;
731 
732 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
733 		slist = kmem_alloc(ssize, KM_SLEEP);
734 		sctp_get_saddr_list(sctp, slist, ssize);
735 		(*cl_sctp_listen)(sctp->sctp_family, slist,
736 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
737 		/* list will be freed by the clustering module */
738 	}
739 }
740 
741 /*
742  * Hash list insertion routine for sctp_t structures.
743  * Inserts entries with the ones bound to a specific IP address first
744  * followed by those bound to INADDR_ANY.
745  */
746 void
747 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
748 {
749 	sctp_t	**sctpp;
750 	sctp_t	*sctpnext;
751 
752 	if (sctp->sctp_ptpbhn != NULL) {
753 		ASSERT(!caller_holds_lock);
754 		sctp_bind_hash_remove(sctp);
755 	}
756 	sctpp = &tbf->tf_sctp;
757 	if (!caller_holds_lock) {
758 		mutex_enter(&tbf->tf_lock);
759 	} else {
760 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
761 	}
762 	sctpnext = sctpp[0];
763 	if (sctpnext) {
764 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
765 	}
766 	sctp->sctp_bind_hash = sctpnext;
767 	sctp->sctp_ptpbhn = sctpp;
768 	sctpp[0] = sctp;
769 	/* For sctp_*_hash_remove */
770 	sctp->sctp_bind_lockp = &tbf->tf_lock;
771 	if (!caller_holds_lock)
772 		mutex_exit(&tbf->tf_lock);
773 }
774 
775 /*
776  * Hash list removal routine for sctp_t structures.
777  */
778 void
779 sctp_bind_hash_remove(sctp_t *sctp)
780 {
781 	sctp_t	*sctpnext;
782 	kmutex_t *lockp;
783 
784 	lockp = sctp->sctp_bind_lockp;
785 
786 	if (sctp->sctp_ptpbhn == NULL)
787 		return;
788 
789 	ASSERT(lockp != NULL);
790 	mutex_enter(lockp);
791 	if (sctp->sctp_ptpbhn) {
792 		sctpnext = sctp->sctp_bind_hash;
793 		if (sctpnext) {
794 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
795 			sctp->sctp_bind_hash = NULL;
796 		}
797 		*sctp->sctp_ptpbhn = sctpnext;
798 		sctp->sctp_ptpbhn = NULL;
799 	}
800 	mutex_exit(lockp);
801 	sctp->sctp_bind_lockp = NULL;
802 }
803 
804 /*
805  * Similar to but more general than ip_sctp's conn_match().
806  *
807  * Matches sets of addresses as follows: if the argument addr set is
808  * a complete subset of the corresponding addr set in the sctp_t, it
809  * is a match.
810  *
811  * Caller must hold tf->tf_lock.
812  *
813  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
814  */
815 sctp_t *
816 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
817     int min_state)
818 {
819 
820 	sctp_t *sctp;
821 	sctp_faddr_t *fp;
822 
823 	ASSERT(MUTEX_HELD(&tf->tf_lock));
824 
825 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
826 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
827 		    min_state) {
828 			continue;
829 		}
830 
831 		/* check for faddr match */
832 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
833 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
834 				break;
835 			}
836 		}
837 
838 		if (!fp) {
839 			/* no faddr match; keep looking */
840 			continue;
841 		}
842 
843 		/* check for laddr subset match */
844 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
845 			goto done;
846 		}
847 
848 		/* no match; continue searching */
849 	}
850 
851 done:
852 	if (sctp) {
853 		SCTP_REFHOLD(sctp);
854 	}
855 	return (sctp);
856 }
857 
858 boolean_t
859 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
860 {
861 	uint16_t lport;
862 
863 	if (connp->conn_fully_bound) {
864 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
865 		    ipha->ipha_dst, ports));
866 	} else {
867 		lport = htons(ntohl(ports) & 0xFFFF);
868 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
869 		    lport));
870 	}
871 }
872 
873 boolean_t
874 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
875     boolean_t for_v4)
876 {
877 	uint16_t lport;
878 	in6_addr_t	v6dst;
879 
880 	if (!for_v4 && connp->conn_fully_bound) {
881 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
882 		    ip6h->ip6_dst, ports));
883 	} else {
884 		lport = htons(ntohl(ports) & 0xFFFF);
885 		if (for_v4)
886 			v6dst = ipv6_all_zeros;
887 		else
888 			v6dst = ip6h->ip6_dst;
889 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
890 	}
891 }
892