xref: /titanic_44/usr/src/uts/common/inet/sctp/sctp_hash.c (revision 2b4a78020b9c38d1b95e2f3fefa6d6e4be382d1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/socket.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/tsol/tndb.h>
31 #include <sys/tsol/tnet.h>
32 
33 #include <netinet/in.h>
34 #include <netinet/ip6.h>
35 
36 #include <inet/common.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 #include <inet/ipclassifier.h>
40 #include <inet/ipsec_impl.h>
41 #include <inet/ipp_common.h>
42 #include <inet/sctp_ip.h>
43 
44 #include "sctp_impl.h"
45 #include "sctp_addr.h"
46 
47 /* Default association hash size.  The size must be a power of 2. */
48 #define	SCTP_CONN_HASH_SIZE	8192
49 
50 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
51 
52 /*
53  * Cluster networking hook for traversing current assoc list.
54  * This routine is used to extract the current list of live associations
55  * which must continue to to be dispatched to this node.
56  */
57 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
58     boolean_t);
59 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
60     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
61 
62 void
63 sctp_hash_init(sctp_stack_t *sctps)
64 {
65 	int i;
66 
67 	/* Start with /etc/system value */
68 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
69 
70 	if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) {
71 		/* Not a power of two. Round up to nearest power of two */
72 		for (i = 0; i < 31; i++) {
73 			if (sctps->sctps_conn_hash_size < (1 << i))
74 				break;
75 		}
76 		sctps->sctps_conn_hash_size = 1 << i;
77 	}
78 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
79 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
80 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
81 		    sctps->sctps_conn_hash_size);
82 	}
83 	sctps->sctps_conn_fanout =
84 	    (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
85 	    sizeof (sctp_tf_t),	KM_SLEEP);
86 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
87 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
88 		    MUTEX_DEFAULT, NULL);
89 	}
90 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
91 	    sizeof (sctp_tf_t),	KM_SLEEP);
92 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
93 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
94 		    MUTEX_DEFAULT, NULL);
95 	}
96 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
97 	    sizeof (sctp_tf_t),	KM_SLEEP);
98 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
99 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
100 		    MUTEX_DEFAULT, NULL);
101 	}
102 }
103 
104 void
105 sctp_hash_destroy(sctp_stack_t *sctps)
106 {
107 	int i;
108 
109 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
110 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
111 	}
112 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
113 	    sizeof (sctp_tf_t));
114 	sctps->sctps_conn_fanout = NULL;
115 
116 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
117 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
118 	}
119 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
120 	    sizeof (sctp_tf_t));
121 	sctps->sctps_listen_fanout = NULL;
122 
123 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
124 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
125 	}
126 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
127 	    sizeof (sctp_tf_t));
128 	sctps->sctps_bind_fanout = NULL;
129 }
130 
131 /*
132  * Walk the SCTP global list and refrele the ire for this ipif
133  * This is called when an address goes down, so that we release any reference
134  * to the ire associated with this address. Additionally, for any SCTP if
135  * this was the only/last address in its source list, we don't kill the
136  * assoc., if there is no address added subsequently, or if this does not
137  * come up, then the assoc. will die a natural death (i.e. timeout).
138  */
139 void
140 sctp_ire_cache_flush(ipif_t *ipif)
141 {
142 	sctp_t			*sctp;
143 	sctp_t			*sctp_prev = NULL;
144 	sctp_faddr_t		*fp;
145 	conn_t			*connp;
146 	ire_t			*ire;
147 	sctp_stack_t		*sctps = ipif->ipif_ill->ill_ipst->
148 	    ips_netstack->netstack_sctp;
149 
150 	sctp = sctps->sctps_gsctp;
151 	mutex_enter(&sctps->sctps_g_lock);
152 	while (sctp != NULL) {
153 		mutex_enter(&sctp->sctp_reflock);
154 		if (sctp->sctp_condemned) {
155 			mutex_exit(&sctp->sctp_reflock);
156 			sctp = list_next(&sctps->sctps_g_list, sctp);
157 			continue;
158 		}
159 		sctp->sctp_refcnt++;
160 		mutex_exit(&sctp->sctp_reflock);
161 		mutex_exit(&sctps->sctps_g_lock);
162 		if (sctp_prev != NULL)
163 			SCTP_REFRELE(sctp_prev);
164 
165 		RUN_SCTP(sctp);
166 		connp = sctp->sctp_connp;
167 		mutex_enter(&connp->conn_lock);
168 		ire = connp->conn_ire_cache;
169 		if (ire != NULL && ire->ire_ipif == ipif) {
170 			connp->conn_ire_cache = NULL;
171 			mutex_exit(&connp->conn_lock);
172 			IRE_REFRELE_NOTR(ire);
173 		} else {
174 			mutex_exit(&connp->conn_lock);
175 		}
176 		/* check for ires cached in faddr */
177 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
178 			/*
179 			 * If this ipif is being used as the source address
180 			 * we need to update it as well, else we will end
181 			 * up using the dead source address.
182 			 */
183 			ire = fp->ire;
184 			if (ire != NULL && ire->ire_ipif == ipif) {
185 				fp->ire = NULL;
186 				IRE_REFRELE_NOTR(ire);
187 			}
188 			/*
189 			 * This may result in setting the fp as unreachable,
190 			 * i.e. if all the source addresses are down. In
191 			 * that case the assoc. would timeout.
192 			 */
193 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
194 			    &fp->saddr)) {
195 				sctp_set_saddr(sctp, fp);
196 				if (fp == sctp->sctp_current &&
197 				    fp->state != SCTP_FADDRS_UNREACH) {
198 					sctp_set_faddr_current(sctp, fp);
199 				}
200 			}
201 		}
202 		WAKE_SCTP(sctp);
203 		sctp_prev = sctp;
204 		mutex_enter(&sctps->sctps_g_lock);
205 		sctp = list_next(&sctps->sctps_g_list, sctp);
206 	}
207 	mutex_exit(&sctps->sctps_g_lock);
208 	if (sctp_prev != NULL)
209 		SCTP_REFRELE(sctp_prev);
210 }
211 
212 /*
213  * Exported routine for extracting active SCTP associations.
214  * Like TCP, we terminate the walk if the callback returns non-zero.
215  *
216  * Need to walk all sctp_stack_t instances since this clustering
217  * interface is assumed global for all instances
218  */
219 int
220 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
221     void *arg, boolean_t cansleep)
222 {
223 	netstack_handle_t nh;
224 	netstack_t *ns;
225 	int ret = 0;
226 
227 	netstack_next_init(&nh);
228 	while ((ns = netstack_next(&nh)) != NULL) {
229 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
230 		    ns->netstack_sctp);
231 		netstack_rele(ns);
232 	}
233 	netstack_next_fini(&nh);
234 	return (ret);
235 }
236 
237 static int
238 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
239     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
240 {
241 	sctp_t		*sctp;
242 	sctp_t		*sctp_prev;
243 	cl_sctp_info_t	cl_sctpi;
244 	uchar_t		*slist;
245 	uchar_t		*flist;
246 
247 	sctp = sctps->sctps_gsctp;
248 	sctp_prev = NULL;
249 	mutex_enter(&sctps->sctps_g_lock);
250 	while (sctp != NULL) {
251 		size_t	ssize;
252 		size_t	fsize;
253 
254 		mutex_enter(&sctp->sctp_reflock);
255 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
256 			mutex_exit(&sctp->sctp_reflock);
257 			sctp = list_next(&sctps->sctps_g_list, sctp);
258 			continue;
259 		}
260 		sctp->sctp_refcnt++;
261 		mutex_exit(&sctp->sctp_reflock);
262 		mutex_exit(&sctps->sctps_g_lock);
263 		if (sctp_prev != NULL)
264 			SCTP_REFRELE(sctp_prev);
265 		RUN_SCTP(sctp);
266 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
267 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
268 
269 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
270 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
271 		if (slist == NULL || flist == NULL) {
272 			WAKE_SCTP(sctp);
273 			if (slist != NULL)
274 				kmem_free(slist, ssize);
275 			if (flist != NULL)
276 				kmem_free(flist, fsize);
277 			SCTP_REFRELE(sctp);
278 			return (1);
279 		}
280 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
281 		sctp_get_saddr_list(sctp, slist, ssize);
282 		sctp_get_faddr_list(sctp, flist, fsize);
283 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
284 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
285 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
286 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
287 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
288 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
289 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
290 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
291 		WAKE_SCTP(sctp);
292 		cl_sctpi.cl_sctpi_laddrp = slist;
293 		cl_sctpi.cl_sctpi_faddrp = flist;
294 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
295 			kmem_free(slist, ssize);
296 			kmem_free(flist, fsize);
297 			SCTP_REFRELE(sctp);
298 			return (1);
299 		}
300 		/* list will be freed by cl_callback */
301 		sctp_prev = sctp;
302 		mutex_enter(&sctps->sctps_g_lock);
303 		sctp = list_next(&sctps->sctps_g_list, sctp);
304 	}
305 	mutex_exit(&sctps->sctps_g_lock);
306 	if (sctp_prev != NULL)
307 		SCTP_REFRELE(sctp_prev);
308 	return (0);
309 }
310 
311 sctp_t *
312 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
313     zoneid_t zoneid, sctp_stack_t *sctps)
314 {
315 	sctp_tf_t		*tf;
316 	sctp_t			*sctp;
317 	sctp_faddr_t		*fp;
318 
319 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
320 	mutex_enter(&tf->tf_lock);
321 
322 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
323 		if (ports != sctp->sctp_ports ||
324 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
325 			continue;
326 		}
327 
328 		/* check for faddr match */
329 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
330 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
331 				break;
332 			}
333 		}
334 
335 		/* no faddr match; keep looking */
336 		if (fp == NULL)
337 			continue;
338 
339 		/* check for laddr match */
340 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
341 			SCTP_REFHOLD(sctp);
342 			goto done;
343 		}
344 		/* no match; continue to the next in the chain */
345 	}
346 
347 done:
348 	mutex_exit(&tf->tf_lock);
349 	return (sctp);
350 }
351 
352 static sctp_t *
353 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
354     sctp_stack_t *sctps)
355 {
356 	sctp_t			*sctp;
357 	sctp_tf_t		*tf;
358 	uint16_t		lport;
359 
360 	lport = ((uint16_t *)&ports)[1];
361 
362 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
363 	mutex_enter(&tf->tf_lock);
364 
365 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
366 		if (lport != sctp->sctp_lport ||
367 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
368 			continue;
369 		}
370 
371 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
372 			SCTP_REFHOLD(sctp);
373 			goto done;
374 		}
375 		/* no match; continue to the next in the chain */
376 	}
377 
378 done:
379 	mutex_exit(&tf->tf_lock);
380 	return (sctp);
381 }
382 
383 /* called by ipsec_sctp_pol */
384 conn_t *
385 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
386     zoneid_t zoneid, sctp_stack_t *sctps)
387 {
388 	sctp_t *sctp;
389 
390 	if ((sctp = sctp_conn_match(src, dst, ports, zoneid, sctps)) == NULL) {
391 		/* Not in conn fanout; check listen fanout */
392 		if ((sctp = listen_match(dst, ports, zoneid, sctps)) == NULL)
393 			return (NULL);
394 	}
395 	return (sctp->sctp_connp);
396 }
397 
398 conn_t *
399 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
400     zoneid_t zoneid, mblk_t *mp, sctp_stack_t *sctps)
401 
402 {
403 	sctp_t *sctp;
404 	boolean_t shared_addr;
405 
406 	if ((sctp = sctp_conn_match(src, dst, ports, zoneid, sctps)) == NULL) {
407 		shared_addr = (zoneid == ALL_ZONES);
408 		if (shared_addr) {
409 			/*
410 			 * No need to handle exclusive-stack zones since
411 			 * ALL_ZONES only applies to the shared stack.
412 			 */
413 			zoneid = tsol_mlp_findzone(IPPROTO_SCTP,
414 			    htons(ntohl(ports) & 0xFFFF));
415 			/*
416 			 * If no shared MLP is found, tsol_mlp_findzone returns
417 			 * ALL_ZONES.  In that case, we assume it's SLP, and
418 			 * search for the zone based on the packet label.
419 			 * That will also return ALL_ZONES on failure.
420 			 */
421 			if (zoneid == ALL_ZONES)
422 				zoneid = tsol_packet_to_zoneid(mp);
423 			if (zoneid == ALL_ZONES)
424 				return (NULL);
425 		}
426 		/* Not in conn fanout; check listen fanout */
427 		if ((sctp = listen_match(dst, ports, zoneid, sctps)) == NULL)
428 			return (NULL);
429 		/*
430 		 * On systems running trusted extensions, check if dst
431 		 * should accept the packet. "IPV6_VERSION" indicates
432 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
433 		 * IPv6 addresses are supported.
434 		 */
435 		if (is_system_labeled() &&
436 		    !tsol_receive_local(mp, dst, IPV6_VERSION,
437 		    shared_addr, sctp->sctp_connp)) {
438 			DTRACE_PROBE3(
439 			    tx__ip__log__info__classify__sctp,
440 			    char *,
441 			    "connp(1) could not receive mp(2)",
442 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
443 			SCTP_REFRELE(sctp);
444 			return (NULL);
445 		}
446 	}
447 	return (sctp->sctp_connp);
448 }
449 
450 /*
451  * Fanout for SCTP packets
452  * The caller puts <fport, lport> in the ports parameter.
453  */
454 /* ARGSUSED */
455 void
456 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
457     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
458     zoneid_t zoneid)
459 {
460 	sctp_t *sctp;
461 	boolean_t isv4;
462 	conn_t *connp;
463 	mblk_t *first_mp;
464 	ip6_t *ip6h;
465 	in6_addr_t map_src, map_dst;
466 	in6_addr_t *src, *dst;
467 	ip_stack_t	*ipst;
468 	ipsec_stack_t	*ipss;
469 	sctp_stack_t	*sctps;
470 
471 	ASSERT(recv_ill != NULL);
472 	ipst = recv_ill->ill_ipst;
473 	sctps = ipst->ips_netstack->netstack_sctp;
474 	ipss = ipst->ips_netstack->netstack_ipsec;
475 
476 	first_mp = mp;
477 	if (mctl_present) {
478 		mp = first_mp->b_cont;
479 		ASSERT(mp != NULL);
480 	}
481 
482 	/* Assume IP provides aligned packets - otherwise toss */
483 	if (!OK_32PTR(mp->b_rptr)) {
484 		BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
485 		freemsg(first_mp);
486 		return;
487 	}
488 
489 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
490 		ip6h = (ip6_t *)ipha;
491 		src = &ip6h->ip6_src;
492 		dst = &ip6h->ip6_dst;
493 		isv4 = B_FALSE;
494 	} else {
495 		ip6h = NULL;
496 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
497 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
498 		src = &map_src;
499 		dst = &map_dst;
500 		isv4 = B_TRUE;
501 	}
502 	connp = sctp_fanout(src, dst, ports, zoneid, mp, sctps);
503 	if (connp == NULL) {
504 		ip_fanout_sctp_raw(first_mp, recv_ill, ipha, isv4,
505 		    ports, mctl_present, flags, ip_policy, zoneid);
506 		return;
507 	}
508 	sctp = CONN2SCTP(connp);
509 
510 	/* Found a client; up it goes */
511 	BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsHCInDelivers);
512 
513 	/*
514 	 * We check some fields in conn_t without holding a lock.
515 	 * This should be fine.
516 	 */
517 	if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || mctl_present) {
518 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
519 		    ipha, NULL, mctl_present);
520 		if (first_mp == NULL) {
521 			SCTP_REFRELE(sctp);
522 			return;
523 		}
524 	}
525 
526 	/* Initiate IPPF processing for fastpath */
527 	if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) {
528 		ip_process(IPP_LOCAL_IN, &mp,
529 		    recv_ill->ill_phyint->phyint_ifindex);
530 		if (mp == NULL) {
531 			SCTP_REFRELE(sctp);
532 			if (mctl_present)
533 				freeb(first_mp);
534 			return;
535 		} else if (mctl_present) {
536 			/*
537 			 * ip_process might return a new mp.
538 			 */
539 			ASSERT(first_mp != mp);
540 			first_mp->b_cont = mp;
541 		} else {
542 			first_mp = mp;
543 		}
544 	}
545 
546 	if (connp->conn_recvif || connp->conn_recvslla ||
547 	    connp->conn_ip_recvpktinfo) {
548 		int in_flags = 0;
549 
550 		if (connp->conn_recvif || connp->conn_ip_recvpktinfo) {
551 			in_flags = IPF_RECVIF;
552 		}
553 		if (connp->conn_recvslla) {
554 			in_flags |= IPF_RECVSLLA;
555 		}
556 		if (isv4) {
557 			mp = ip_add_info(mp, recv_ill, in_flags,
558 			    IPCL_ZONEID(connp), ipst);
559 		} else {
560 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
561 		}
562 		if (mp == NULL) {
563 			SCTP_REFRELE(sctp);
564 			if (mctl_present)
565 				freeb(first_mp);
566 			return;
567 		} else if (mctl_present) {
568 			/*
569 			 * ip_add_info might return a new mp.
570 			 */
571 			ASSERT(first_mp != mp);
572 			first_mp->b_cont = mp;
573 		} else {
574 			first_mp = mp;
575 		}
576 	}
577 
578 	mutex_enter(&sctp->sctp_lock);
579 	if (sctp->sctp_running) {
580 		if (mctl_present)
581 			mp->b_prev = first_mp;
582 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
583 			BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
584 			freemsg(first_mp);
585 		}
586 		mutex_exit(&sctp->sctp_lock);
587 	} else {
588 		sctp->sctp_running = B_TRUE;
589 		mutex_exit(&sctp->sctp_lock);
590 
591 		mutex_enter(&sctp->sctp_recvq_lock);
592 		if (sctp->sctp_recvq != NULL) {
593 			if (mctl_present)
594 				mp->b_prev = first_mp;
595 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
596 				BUMP_MIB(recv_ill->ill_ip_mib,
597 				    ipIfStatsInDiscards);
598 				freemsg(first_mp);
599 			}
600 			mutex_exit(&sctp->sctp_recvq_lock);
601 			WAKE_SCTP(sctp);
602 		} else {
603 			mutex_exit(&sctp->sctp_recvq_lock);
604 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
605 			    NULL));
606 			WAKE_SCTP(sctp);
607 			sctp_process_sendq(sctp);
608 		}
609 	}
610 	SCTP_REFRELE(sctp);
611 }
612 
613 void
614 sctp_conn_hash_remove(sctp_t *sctp)
615 {
616 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
617 
618 	if (!tf) {
619 		return;
620 	}
621 	/*
622 	 * On a clustered note send this notification to the clustering
623 	 * subsystem.
624 	 */
625 	if (cl_sctp_disconnect != NULL) {
626 		(*cl_sctp_disconnect)(sctp->sctp_family,
627 		    (cl_sctp_handle_t)sctp);
628 	}
629 
630 	mutex_enter(&tf->tf_lock);
631 	ASSERT(tf->tf_sctp);
632 	if (tf->tf_sctp == sctp) {
633 		tf->tf_sctp = sctp->sctp_conn_hash_next;
634 		if (sctp->sctp_conn_hash_next) {
635 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
636 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
637 		}
638 	} else {
639 		ASSERT(sctp->sctp_conn_hash_prev);
640 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
641 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
642 		    sctp->sctp_conn_hash_next;
643 
644 		if (sctp->sctp_conn_hash_next) {
645 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
646 			    == sctp);
647 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
648 			    sctp->sctp_conn_hash_prev;
649 		}
650 	}
651 	sctp->sctp_conn_hash_next = NULL;
652 	sctp->sctp_conn_hash_prev = NULL;
653 	sctp->sctp_conn_tfp = NULL;
654 	mutex_exit(&tf->tf_lock);
655 }
656 
657 void
658 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
659 {
660 	if (sctp->sctp_conn_tfp) {
661 		sctp_conn_hash_remove(sctp);
662 	}
663 
664 	if (!caller_holds_lock) {
665 		mutex_enter(&tf->tf_lock);
666 	} else {
667 		ASSERT(MUTEX_HELD(&tf->tf_lock));
668 	}
669 
670 	sctp->sctp_conn_hash_next = tf->tf_sctp;
671 	if (tf->tf_sctp) {
672 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
673 	}
674 	sctp->sctp_conn_hash_prev = NULL;
675 	tf->tf_sctp = sctp;
676 	sctp->sctp_conn_tfp = tf;
677 	if (!caller_holds_lock) {
678 		mutex_exit(&tf->tf_lock);
679 	}
680 }
681 
682 void
683 sctp_listen_hash_remove(sctp_t *sctp)
684 {
685 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
686 
687 	if (!tf) {
688 		return;
689 	}
690 	/*
691 	 * On a clustered note send this notification to the clustering
692 	 * subsystem.
693 	 */
694 	if (cl_sctp_unlisten != NULL) {
695 		uchar_t	*slist;
696 		ssize_t	ssize;
697 
698 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
699 		slist = kmem_alloc(ssize, KM_SLEEP);
700 		sctp_get_saddr_list(sctp, slist, ssize);
701 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
702 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
703 		/* list will be freed by the clustering module */
704 	}
705 
706 	mutex_enter(&tf->tf_lock);
707 	ASSERT(tf->tf_sctp);
708 	if (tf->tf_sctp == sctp) {
709 		tf->tf_sctp = sctp->sctp_listen_hash_next;
710 		if (sctp->sctp_listen_hash_next != NULL) {
711 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
712 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
713 		}
714 	} else {
715 		ASSERT(sctp->sctp_listen_hash_prev);
716 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
717 		    sctp);
718 		ASSERT(sctp->sctp_listen_hash_next == NULL ||
719 		    sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
720 
721 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
722 		    sctp->sctp_listen_hash_next;
723 
724 		if (sctp->sctp_listen_hash_next != NULL) {
725 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
726 			    sctp->sctp_listen_hash_prev;
727 		}
728 	}
729 	sctp->sctp_listen_hash_next = NULL;
730 	sctp->sctp_listen_hash_prev = NULL;
731 	sctp->sctp_listen_tfp = NULL;
732 	mutex_exit(&tf->tf_lock);
733 }
734 
735 void
736 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
737 {
738 	if (sctp->sctp_listen_tfp) {
739 		sctp_listen_hash_remove(sctp);
740 	}
741 
742 	mutex_enter(&tf->tf_lock);
743 	sctp->sctp_listen_hash_next = tf->tf_sctp;
744 	if (tf->tf_sctp) {
745 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
746 	}
747 	sctp->sctp_listen_hash_prev = NULL;
748 	tf->tf_sctp = sctp;
749 	sctp->sctp_listen_tfp = tf;
750 	mutex_exit(&tf->tf_lock);
751 	/*
752 	 * On a clustered note send this notification to the clustering
753 	 * subsystem.
754 	 */
755 	if (cl_sctp_listen != NULL) {
756 		uchar_t	*slist;
757 		ssize_t	ssize;
758 
759 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
760 		slist = kmem_alloc(ssize, KM_SLEEP);
761 		sctp_get_saddr_list(sctp, slist, ssize);
762 		(*cl_sctp_listen)(sctp->sctp_family, slist,
763 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
764 		/* list will be freed by the clustering module */
765 	}
766 }
767 
768 /*
769  * Hash list insertion routine for sctp_t structures.
770  * Inserts entries with the ones bound to a specific IP address first
771  * followed by those bound to INADDR_ANY.
772  */
773 void
774 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
775 {
776 	sctp_t	**sctpp;
777 	sctp_t	*sctpnext;
778 
779 	if (sctp->sctp_ptpbhn != NULL) {
780 		ASSERT(!caller_holds_lock);
781 		sctp_bind_hash_remove(sctp);
782 	}
783 	sctpp = &tbf->tf_sctp;
784 	if (!caller_holds_lock) {
785 		mutex_enter(&tbf->tf_lock);
786 	} else {
787 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
788 	}
789 	sctpnext = sctpp[0];
790 	if (sctpnext) {
791 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
792 	}
793 	sctp->sctp_bind_hash = sctpnext;
794 	sctp->sctp_ptpbhn = sctpp;
795 	sctpp[0] = sctp;
796 	/* For sctp_*_hash_remove */
797 	sctp->sctp_bind_lockp = &tbf->tf_lock;
798 	if (!caller_holds_lock)
799 		mutex_exit(&tbf->tf_lock);
800 }
801 
802 /*
803  * Hash list removal routine for sctp_t structures.
804  */
805 void
806 sctp_bind_hash_remove(sctp_t *sctp)
807 {
808 	sctp_t	*sctpnext;
809 	kmutex_t *lockp;
810 
811 	lockp = sctp->sctp_bind_lockp;
812 
813 	if (sctp->sctp_ptpbhn == NULL)
814 		return;
815 
816 	ASSERT(lockp != NULL);
817 	mutex_enter(lockp);
818 	if (sctp->sctp_ptpbhn) {
819 		sctpnext = sctp->sctp_bind_hash;
820 		if (sctpnext) {
821 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
822 			sctp->sctp_bind_hash = NULL;
823 		}
824 		*sctp->sctp_ptpbhn = sctpnext;
825 		sctp->sctp_ptpbhn = NULL;
826 	}
827 	mutex_exit(lockp);
828 	sctp->sctp_bind_lockp = NULL;
829 }
830 
831 /*
832  * Similar to but different from sctp_conn_match().
833  *
834  * Matches sets of addresses as follows: if the argument addr set is
835  * a complete subset of the corresponding addr set in the sctp_t, it
836  * is a match.
837  *
838  * Caller must hold tf->tf_lock.
839  *
840  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
841  */
842 sctp_t *
843 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
844     int min_state)
845 {
846 	sctp_t *sctp;
847 	sctp_faddr_t *fp;
848 
849 	ASSERT(MUTEX_HELD(&tf->tf_lock));
850 
851 	for (sctp = tf->tf_sctp; sctp != NULL;
852 	    sctp = sctp->sctp_conn_hash_next) {
853 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
854 		    min_state) {
855 			continue;
856 		}
857 
858 		/* check for faddr match */
859 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
860 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
861 				break;
862 			}
863 		}
864 
865 		if (fp == NULL) {
866 			/* no faddr match; keep looking */
867 			continue;
868 		}
869 
870 		/*
871 		 * There is an existing association with the same peer
872 		 * address.  So now we need to check if our local address
873 		 * set overlaps with the one of the existing association.
874 		 * If they overlap, we should return it.
875 		 */
876 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
877 			goto done;
878 		}
879 
880 		/* no match; continue searching */
881 	}
882 
883 done:
884 	if (sctp != NULL) {
885 		SCTP_REFHOLD(sctp);
886 	}
887 	return (sctp);
888 }
889 
890 boolean_t
891 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
892 {
893 	uint16_t lport;
894 
895 	if (connp->conn_fully_bound) {
896 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
897 		    ipha->ipha_dst, ports));
898 	} else {
899 		lport = htons(ntohl(ports) & 0xFFFF);
900 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
901 		    lport));
902 	}
903 }
904 
905 boolean_t
906 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
907     boolean_t for_v4)
908 {
909 	uint16_t lport;
910 	in6_addr_t	v6dst;
911 
912 	if (!for_v4 && connp->conn_fully_bound) {
913 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
914 		    ip6h->ip6_dst, ports));
915 	} else {
916 		lport = htons(ntohl(ports) & 0xFFFF);
917 		if (for_v4)
918 			v6dst = ipv6_all_zeros;
919 		else
920 			v6dst = ip6h->ip6_dst;
921 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
922 	}
923 }
924