xref: /titanic_50/usr/src/uts/common/inet/sctp/sctp_hash.c (revision 60d0a5907c4864f769e937ae18e629d2f4104c89)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/socket.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/tsol/tndb.h>
33 #include <sys/tsol/tnet.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ipclassifier.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/ipp_common.h>
44 #include <inet/sctp_ip.h>
45 
46 #include "sctp_impl.h"
47 #include "sctp_addr.h"
48 
49 /* Default association hash size.  The size must be a power of 2. */
50 #define	SCTP_CONN_HASH_SIZE	8192
51 
52 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
53 
54 /*
55  * Cluster networking hook for traversing current assoc list.
56  * This routine is used to extract the current list of live associations
57  * which must continue to to be dispatched to this node.
58  */
59 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
60     boolean_t);
61 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
62     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
63 
64 void
65 sctp_hash_init(sctp_stack_t *sctps)
66 {
67 	int i;
68 
69 	/* Start with /etc/system value */
70 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
71 
72 	if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) {
73 		/* Not a power of two. Round up to nearest power of two */
74 		for (i = 0; i < 31; i++) {
75 			if (sctps->sctps_conn_hash_size < (1 << i))
76 				break;
77 		}
78 		sctps->sctps_conn_hash_size = 1 << i;
79 	}
80 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
81 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
82 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
83 		    sctps->sctps_conn_hash_size);
84 	}
85 	sctps->sctps_conn_fanout =
86 		(sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
87 		    sizeof (sctp_tf_t),	KM_SLEEP);
88 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
89 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
90 			    MUTEX_DEFAULT, NULL);
91 	}
92 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
93 	    sizeof (sctp_tf_t),	KM_SLEEP);
94 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
95 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
96 		    MUTEX_DEFAULT, NULL);
97 	}
98 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
99 	    sizeof (sctp_tf_t),	KM_SLEEP);
100 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
101 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
102 		    MUTEX_DEFAULT, NULL);
103 	}
104 }
105 
106 void
107 sctp_hash_destroy(sctp_stack_t *sctps)
108 {
109 	int i;
110 
111 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
112 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
113 	}
114 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
115 	    sizeof (sctp_tf_t));
116 	sctps->sctps_conn_fanout = NULL;
117 
118 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
119 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
120 	}
121 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
122 	    sizeof (sctp_tf_t));
123 	sctps->sctps_listen_fanout = NULL;
124 
125 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
126 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
127 	}
128 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
129 	    sizeof (sctp_tf_t));
130 	sctps->sctps_bind_fanout = NULL;
131 }
132 
133 /*
134  * Walk the SCTP global list and refrele the ire for this ipif
135  * This is called when an address goes down, so that we release any reference
136  * to the ire associated with this address. Additionally, for any SCTP if
137  * this was the only/last address in its source list, we don't kill the
138  * assoc., if there is no address added subsequently, or if this does not
139  * come up, then the assoc. will die a natural death (i.e. timeout).
140  */
141 void
142 sctp_ire_cache_flush(ipif_t *ipif)
143 {
144 	sctp_t			*sctp;
145 	sctp_t			*sctp_prev = NULL;
146 	sctp_faddr_t		*fp;
147 	conn_t			*connp;
148 	ire_t			*ire;
149 	sctp_stack_t		*sctps = ipif->ipif_ill->ill_ipst->
150 	    ips_netstack->netstack_sctp;
151 
152 	sctp = sctps->sctps_gsctp;
153 	mutex_enter(&sctps->sctps_g_lock);
154 	while (sctp != NULL) {
155 		mutex_enter(&sctp->sctp_reflock);
156 		if (sctp->sctp_condemned) {
157 			mutex_exit(&sctp->sctp_reflock);
158 			sctp = list_next(&sctps->sctps_g_list, sctp);
159 			continue;
160 		}
161 		sctp->sctp_refcnt++;
162 		mutex_exit(&sctp->sctp_reflock);
163 		mutex_exit(&sctps->sctps_g_lock);
164 		if (sctp_prev != NULL)
165 			SCTP_REFRELE(sctp_prev);
166 
167 		RUN_SCTP(sctp);
168 		connp = sctp->sctp_connp;
169 		mutex_enter(&connp->conn_lock);
170 		ire = connp->conn_ire_cache;
171 		if (ire != NULL && ire->ire_ipif == ipif) {
172 			connp->conn_ire_cache = NULL;
173 			mutex_exit(&connp->conn_lock);
174 			IRE_REFRELE_NOTR(ire);
175 		} else {
176 			mutex_exit(&connp->conn_lock);
177 		}
178 		/* check for ires cached in faddr */
179 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
180 			/*
181 			 * If this ipif is being used as the source address
182 			 * we need to update it as well, else we will end
183 			 * up using the dead source address.
184 			 */
185 			ire = fp->ire;
186 			if (ire != NULL && ire->ire_ipif == ipif) {
187 				fp->ire = NULL;
188 				IRE_REFRELE_NOTR(ire);
189 			}
190 			/*
191 			 * This may result in setting the fp as unreachable,
192 			 * i.e. if all the source addresses are down. In
193 			 * that case the assoc. would timeout.
194 			 */
195 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
196 			    &fp->saddr)) {
197 				sctp_set_saddr(sctp, fp);
198 				if (fp == sctp->sctp_current &&
199 				    fp->state != SCTP_FADDRS_UNREACH) {
200 					sctp_set_faddr_current(sctp, fp);
201 				}
202 			}
203 		}
204 		WAKE_SCTP(sctp);
205 		sctp_prev = sctp;
206 		mutex_enter(&sctps->sctps_g_lock);
207 		sctp = list_next(&sctps->sctps_g_list, sctp);
208 	}
209 	mutex_exit(&sctps->sctps_g_lock);
210 	if (sctp_prev != NULL)
211 		SCTP_REFRELE(sctp_prev);
212 }
213 
214 /*
215  * Exported routine for extracting active SCTP associations.
216  * Like TCP, we terminate the walk if the callback returns non-zero.
217  *
218  * Need to walk all sctp_stack_t instances since this clustering
219  * interface is assumed global for all instances
220  */
221 int
222 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
223     void *arg, boolean_t cansleep)
224 {
225 	netstack_handle_t nh;
226 	netstack_t *ns;
227 	int ret = 0;
228 
229 	netstack_next_init(&nh);
230 	while ((ns = netstack_next(&nh)) != NULL) {
231 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
232 		    ns->netstack_sctp);
233 		netstack_rele(ns);
234 	}
235 	netstack_next_fini(&nh);
236 	return (ret);
237 }
238 
239 static int
240 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
241     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
242 {
243 	sctp_t		*sctp;
244 	sctp_t		*sctp_prev;
245 	cl_sctp_info_t	cl_sctpi;
246 	uchar_t		*slist;
247 	uchar_t		*flist;
248 
249 	sctp = sctps->sctps_gsctp;
250 	sctp_prev = NULL;
251 	mutex_enter(&sctps->sctps_g_lock);
252 	while (sctp != NULL) {
253 		size_t	ssize;
254 		size_t	fsize;
255 
256 		mutex_enter(&sctp->sctp_reflock);
257 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
258 			mutex_exit(&sctp->sctp_reflock);
259 			sctp = list_next(&sctps->sctps_g_list, sctp);
260 			continue;
261 		}
262 		sctp->sctp_refcnt++;
263 		mutex_exit(&sctp->sctp_reflock);
264 		mutex_exit(&sctps->sctps_g_lock);
265 		if (sctp_prev != NULL)
266 			SCTP_REFRELE(sctp_prev);
267 		RUN_SCTP(sctp);
268 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
269 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
270 
271 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
272 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
273 		if (slist == NULL || flist == NULL) {
274 			WAKE_SCTP(sctp);
275 			if (slist != NULL)
276 				kmem_free(slist, ssize);
277 			if (flist != NULL)
278 				kmem_free(flist, fsize);
279 			SCTP_REFRELE(sctp);
280 			return (1);
281 		}
282 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
283 		sctp_get_saddr_list(sctp, slist, ssize);
284 		sctp_get_faddr_list(sctp, flist, fsize);
285 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
286 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
287 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
288 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
289 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
290 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
291 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
292 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
293 		WAKE_SCTP(sctp);
294 		cl_sctpi.cl_sctpi_laddrp = slist;
295 		cl_sctpi.cl_sctpi_faddrp = flist;
296 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
297 			kmem_free(slist, ssize);
298 			kmem_free(flist, fsize);
299 			SCTP_REFRELE(sctp);
300 			return (1);
301 		}
302 		/* list will be freed by cl_callback */
303 		sctp_prev = sctp;
304 		mutex_enter(&sctps->sctps_g_lock);
305 		sctp = list_next(&sctps->sctps_g_list, sctp);
306 	}
307 	mutex_exit(&sctps->sctps_g_lock);
308 	if (sctp_prev != NULL)
309 		SCTP_REFRELE(sctp_prev);
310 	return (0);
311 }
312 
313 sctp_t *
314 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
315     uint_t ipif_seqid, zoneid_t zoneid, sctp_stack_t *sctps)
316 {
317 	sctp_tf_t		*tf;
318 	sctp_t			*sctp;
319 	sctp_faddr_t		*fp;
320 
321 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
322 	mutex_enter(&tf->tf_lock);
323 
324 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
325 		if (ports != sctp->sctp_ports ||
326 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
327 			continue;
328 		}
329 
330 		/* check for faddr match */
331 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
332 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
333 				break;
334 			}
335 		}
336 
337 		if (!fp) {
338 			/* no faddr match; keep looking */
339 			continue;
340 		}
341 
342 		/* check for laddr match */
343 		if (ipif_seqid == 0) {
344 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
345 				SCTP_REFHOLD(sctp);
346 				goto done;
347 			}
348 		} else {
349 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
350 				SCTP_REFHOLD(sctp);
351 				goto done;
352 			}
353 		/* no match; continue to the next in the chain */
354 		}
355 	}
356 
357 done:
358 	mutex_exit(&tf->tf_lock);
359 	return (sctp);
360 }
361 
362 static sctp_t *
363 listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid,
364     zoneid_t zoneid, sctp_stack_t *sctps)
365 {
366 	sctp_t			*sctp;
367 	sctp_tf_t		*tf;
368 	uint16_t		lport;
369 
370 	lport = ((uint16_t *)&ports)[1];
371 
372 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
373 	mutex_enter(&tf->tf_lock);
374 
375 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
376 		if (lport != sctp->sctp_lport ||
377 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
378 			continue;
379 		}
380 
381 		if (ipif_seqid == 0) {
382 			if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
383 				SCTP_REFHOLD(sctp);
384 				goto done;
385 			}
386 		} else {
387 			if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
388 				SCTP_REFHOLD(sctp);
389 				goto done;
390 			}
391 		}
392 		/* no match; continue to the next in the chain */
393 	}
394 
395 done:
396 	mutex_exit(&tf->tf_lock);
397 	return (sctp);
398 }
399 
400 /* called by ipsec_sctp_pol */
401 conn_t *
402 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
403     uint_t ipif_seqid, zoneid_t zoneid, sctp_stack_t *sctps)
404 {
405 	sctp_t *sctp;
406 
407 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
408 	    zoneid, sctps)) == NULL) {
409 		/* Not in conn fanout; check listen fanout */
410 		if ((sctp = listen_match(dst, ports, ipif_seqid,
411 		    zoneid, sctps)) == NULL) {
412 			return (NULL);
413 		}
414 	}
415 	return (sctp->sctp_connp);
416 }
417 
418 conn_t *
419 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
420     uint_t ipif_seqid, zoneid_t zoneid, mblk_t *mp, sctp_stack_t *sctps)
421 
422 {
423 	sctp_t *sctp;
424 	boolean_t shared_addr;
425 
426 	if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
427 	    zoneid, sctps)) == NULL) {
428 		shared_addr = (zoneid == ALL_ZONES);
429 		if (shared_addr) {
430 			/*
431 			 * No need to handle exclusive-stack zones since
432 			 * ALL_ZONES only applies to the shared stack.
433 			 */
434 			zoneid = tsol_mlp_findzone(IPPROTO_SCTP,
435 			    htons(ntohl(ports) & 0xFFFF));
436 			/*
437 			 * If no shared MLP is found, tsol_mlp_findzone returns
438 			 * ALL_ZONES.  In that case, we assume it's SLP, and
439 			 * search for the zone based on the packet label.
440 			 * That will also return ALL_ZONES on failure.
441 			 */
442 			if (zoneid == ALL_ZONES)
443 				zoneid = tsol_packet_to_zoneid(mp);
444 			if (zoneid == ALL_ZONES)
445 				return (NULL);
446 		}
447 		/* Not in conn fanout; check listen fanout */
448 		if ((sctp = listen_match(dst, ports, ipif_seqid,
449 		    zoneid, sctps)) == NULL) {
450 			return (NULL);
451 		}
452 		/*
453 		 * On systems running trusted extensions, check if dst
454 		 * should accept the packet. "IPV6_VERSION" indicates
455 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
456 		 * IPv6 addresses are supported.
457 		 */
458 		if (is_system_labeled() &&
459 		    !tsol_receive_local(mp, dst, IPV6_VERSION,
460 		    shared_addr, sctp->sctp_connp)) {
461 			DTRACE_PROBE3(
462 			    tx__ip__log__info__classify__sctp,
463 			    char *,
464 			    "connp(1) could not receive mp(2)",
465 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
466 			SCTP_REFRELE(sctp);
467 			return (NULL);
468 		}
469 	}
470 	return (sctp->sctp_connp);
471 }
472 
473 /*
474  * Fanout for SCTP packets
475  * The caller puts <fport, lport> in the ports parameter.
476  */
477 /* ARGSUSED */
478 void
479 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
480     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
481     uint_t ipif_seqid, zoneid_t zoneid)
482 {
483 	sctp_t *sctp;
484 	boolean_t isv4;
485 	conn_t *connp;
486 	mblk_t *first_mp;
487 	ip6_t *ip6h;
488 	in6_addr_t map_src, map_dst;
489 	in6_addr_t *src, *dst;
490 	ip_stack_t	*ipst;
491 	ipsec_stack_t	*ipss;
492 	sctp_stack_t	*sctps;
493 
494 	ASSERT(recv_ill != NULL);
495 	ipst = recv_ill->ill_ipst;
496 	sctps = ipst->ips_netstack->netstack_sctp;
497 	ipss = ipst->ips_netstack->netstack_ipsec;
498 
499 	first_mp = mp;
500 	if (mctl_present) {
501 		mp = first_mp->b_cont;
502 		ASSERT(mp != NULL);
503 	}
504 
505 	/* Assume IP provides aligned packets - otherwise toss */
506 	if (!OK_32PTR(mp->b_rptr)) {
507 		BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
508 		freemsg(first_mp);
509 		return;
510 	}
511 
512 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
513 		ip6h = (ip6_t *)ipha;
514 		src = &ip6h->ip6_src;
515 		dst = &ip6h->ip6_dst;
516 		isv4 = B_FALSE;
517 	} else {
518 		ip6h = NULL;
519 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
520 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
521 		src = &map_src;
522 		dst = &map_dst;
523 		isv4 = B_TRUE;
524 	}
525 	connp = sctp_find_conn(src, dst, ports, ipif_seqid, zoneid, sctps);
526 	if (connp == NULL) {
527 		ip_fanout_sctp_raw(first_mp, recv_ill, ipha, isv4,
528 		    ports, mctl_present, flags, ip_policy,
529 		    ipif_seqid, zoneid);
530 		return;
531 	}
532 	sctp = CONN2SCTP(connp);
533 
534 	/* Found a client; up it goes */
535 	BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsHCInDelivers);
536 
537 	/*
538 	 * We check some fields in conn_t without holding a lock.
539 	 * This should be fine.
540 	 */
541 	if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || mctl_present) {
542 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
543 		    ipha, NULL, mctl_present);
544 		if (first_mp == NULL) {
545 			SCTP_REFRELE(sctp);
546 			return;
547 		}
548 	}
549 
550 	/* Initiate IPPF processing for fastpath */
551 	if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) {
552 		ip_process(IPP_LOCAL_IN, &mp,
553 		    recv_ill->ill_phyint->phyint_ifindex);
554 		if (mp == NULL) {
555 			SCTP_REFRELE(sctp);
556 			if (mctl_present)
557 				freeb(first_mp);
558 			return;
559 		} else if (mctl_present) {
560 			/*
561 			 * ip_process might return a new mp.
562 			 */
563 			ASSERT(first_mp != mp);
564 			first_mp->b_cont = mp;
565 		} else {
566 			first_mp = mp;
567 		}
568 	}
569 
570 	if (connp->conn_recvif || connp->conn_recvslla ||
571 	    connp->conn_ip_recvpktinfo) {
572 		int in_flags = 0;
573 
574 		if (connp->conn_recvif || connp->conn_ip_recvpktinfo) {
575 			in_flags = IPF_RECVIF;
576 		}
577 		if (connp->conn_recvslla) {
578 			in_flags |= IPF_RECVSLLA;
579 		}
580 		if (isv4) {
581 			mp = ip_add_info(mp, recv_ill, in_flags,
582 			    IPCL_ZONEID(connp), ipst);
583 		} else {
584 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
585 		}
586 		if (mp == NULL) {
587 			SCTP_REFRELE(sctp);
588 			if (mctl_present)
589 				freeb(first_mp);
590 			return;
591 		} else if (mctl_present) {
592 			/*
593 			 * ip_add_info might return a new mp.
594 			 */
595 			ASSERT(first_mp != mp);
596 			first_mp->b_cont = mp;
597 		} else {
598 			first_mp = mp;
599 		}
600 	}
601 
602 	mutex_enter(&sctp->sctp_lock);
603 	if (sctp->sctp_running) {
604 		if (mctl_present)
605 			mp->b_prev = first_mp;
606 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
607 			BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
608 			freemsg(first_mp);
609 		}
610 		mutex_exit(&sctp->sctp_lock);
611 	} else {
612 		sctp->sctp_running = B_TRUE;
613 		mutex_exit(&sctp->sctp_lock);
614 
615 		mutex_enter(&sctp->sctp_recvq_lock);
616 		if (sctp->sctp_recvq != NULL) {
617 			if (mctl_present)
618 				mp->b_prev = first_mp;
619 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
620 				BUMP_MIB(recv_ill->ill_ip_mib,
621 				    ipIfStatsInDiscards);
622 				freemsg(first_mp);
623 			}
624 			mutex_exit(&sctp->sctp_recvq_lock);
625 			WAKE_SCTP(sctp);
626 		} else {
627 			mutex_exit(&sctp->sctp_recvq_lock);
628 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
629 			    NULL));
630 			WAKE_SCTP(sctp);
631 			sctp_process_sendq(sctp);
632 		}
633 	}
634 	SCTP_REFRELE(sctp);
635 }
636 
637 void
638 sctp_conn_hash_remove(sctp_t *sctp)
639 {
640 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
641 
642 	if (!tf) {
643 		return;
644 	}
645 	/*
646 	 * On a clustered note send this notification to the clustering
647 	 * subsystem.
648 	 */
649 	if (cl_sctp_disconnect != NULL) {
650 		(*cl_sctp_disconnect)(sctp->sctp_family,
651 		    (cl_sctp_handle_t)sctp);
652 	}
653 
654 	mutex_enter(&tf->tf_lock);
655 	ASSERT(tf->tf_sctp);
656 	if (tf->tf_sctp == sctp) {
657 		tf->tf_sctp = sctp->sctp_conn_hash_next;
658 		if (sctp->sctp_conn_hash_next) {
659 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
660 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
661 		}
662 	} else {
663 		ASSERT(sctp->sctp_conn_hash_prev);
664 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
665 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
666 		    sctp->sctp_conn_hash_next;
667 
668 		if (sctp->sctp_conn_hash_next) {
669 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
670 			    == sctp);
671 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
672 			    sctp->sctp_conn_hash_prev;
673 		}
674 	}
675 	sctp->sctp_conn_hash_next = NULL;
676 	sctp->sctp_conn_hash_prev = NULL;
677 	sctp->sctp_conn_tfp = NULL;
678 	mutex_exit(&tf->tf_lock);
679 }
680 
681 void
682 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
683 {
684 	if (sctp->sctp_conn_tfp) {
685 		sctp_conn_hash_remove(sctp);
686 	}
687 
688 	if (!caller_holds_lock) {
689 		mutex_enter(&tf->tf_lock);
690 	} else {
691 		ASSERT(MUTEX_HELD(&tf->tf_lock));
692 	}
693 
694 	sctp->sctp_conn_hash_next = tf->tf_sctp;
695 	if (tf->tf_sctp) {
696 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
697 	}
698 	sctp->sctp_conn_hash_prev = NULL;
699 	tf->tf_sctp = sctp;
700 	sctp->sctp_conn_tfp = tf;
701 	if (!caller_holds_lock) {
702 		mutex_exit(&tf->tf_lock);
703 	}
704 }
705 
706 void
707 sctp_listen_hash_remove(sctp_t *sctp)
708 {
709 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
710 
711 	if (!tf) {
712 		return;
713 	}
714 	/*
715 	 * On a clustered note send this notification to the clustering
716 	 * subsystem.
717 	 */
718 	if (cl_sctp_unlisten != NULL) {
719 		uchar_t	*slist;
720 		ssize_t	ssize;
721 
722 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
723 		slist = kmem_alloc(ssize, KM_SLEEP);
724 		sctp_get_saddr_list(sctp, slist, ssize);
725 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
726 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
727 		/* list will be freed by the clustering module */
728 	}
729 
730 	mutex_enter(&tf->tf_lock);
731 	ASSERT(tf->tf_sctp);
732 	if (tf->tf_sctp == sctp) {
733 		tf->tf_sctp = sctp->sctp_listen_hash_next;
734 		if (sctp->sctp_listen_hash_next) {
735 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
736 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
737 		}
738 	} else {
739 		ASSERT(sctp->sctp_listen_hash_prev);
740 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
741 		    sctp);
742 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
743 		    sctp->sctp_listen_hash_next;
744 
745 		if (sctp->sctp_listen_hash_next) {
746 			ASSERT(
747 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
748 			    sctp);
749 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
750 			    sctp->sctp_listen_hash_prev;
751 		}
752 	}
753 	sctp->sctp_listen_hash_next = NULL;
754 	sctp->sctp_listen_hash_prev = NULL;
755 	sctp->sctp_listen_tfp = NULL;
756 	mutex_exit(&tf->tf_lock);
757 }
758 
759 void
760 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
761 {
762 	if (sctp->sctp_listen_tfp) {
763 		sctp_listen_hash_remove(sctp);
764 	}
765 
766 	mutex_enter(&tf->tf_lock);
767 	sctp->sctp_listen_hash_next = tf->tf_sctp;
768 	if (tf->tf_sctp) {
769 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
770 	}
771 	sctp->sctp_listen_hash_prev = NULL;
772 	tf->tf_sctp = sctp;
773 	sctp->sctp_listen_tfp = tf;
774 	mutex_exit(&tf->tf_lock);
775 	/*
776 	 * On a clustered note send this notification to the clustering
777 	 * subsystem.
778 	 */
779 	if (cl_sctp_listen != NULL) {
780 		uchar_t	*slist;
781 		ssize_t	ssize;
782 
783 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
784 		slist = kmem_alloc(ssize, KM_SLEEP);
785 		sctp_get_saddr_list(sctp, slist, ssize);
786 		(*cl_sctp_listen)(sctp->sctp_family, slist,
787 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
788 		/* list will be freed by the clustering module */
789 	}
790 }
791 
792 /*
793  * Hash list insertion routine for sctp_t structures.
794  * Inserts entries with the ones bound to a specific IP address first
795  * followed by those bound to INADDR_ANY.
796  */
797 void
798 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
799 {
800 	sctp_t	**sctpp;
801 	sctp_t	*sctpnext;
802 
803 	if (sctp->sctp_ptpbhn != NULL) {
804 		ASSERT(!caller_holds_lock);
805 		sctp_bind_hash_remove(sctp);
806 	}
807 	sctpp = &tbf->tf_sctp;
808 	if (!caller_holds_lock) {
809 		mutex_enter(&tbf->tf_lock);
810 	} else {
811 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
812 	}
813 	sctpnext = sctpp[0];
814 	if (sctpnext) {
815 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
816 	}
817 	sctp->sctp_bind_hash = sctpnext;
818 	sctp->sctp_ptpbhn = sctpp;
819 	sctpp[0] = sctp;
820 	/* For sctp_*_hash_remove */
821 	sctp->sctp_bind_lockp = &tbf->tf_lock;
822 	if (!caller_holds_lock)
823 		mutex_exit(&tbf->tf_lock);
824 }
825 
826 /*
827  * Hash list removal routine for sctp_t structures.
828  */
829 void
830 sctp_bind_hash_remove(sctp_t *sctp)
831 {
832 	sctp_t	*sctpnext;
833 	kmutex_t *lockp;
834 
835 	lockp = sctp->sctp_bind_lockp;
836 
837 	if (sctp->sctp_ptpbhn == NULL)
838 		return;
839 
840 	ASSERT(lockp != NULL);
841 	mutex_enter(lockp);
842 	if (sctp->sctp_ptpbhn) {
843 		sctpnext = sctp->sctp_bind_hash;
844 		if (sctpnext) {
845 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
846 			sctp->sctp_bind_hash = NULL;
847 		}
848 		*sctp->sctp_ptpbhn = sctpnext;
849 		sctp->sctp_ptpbhn = NULL;
850 	}
851 	mutex_exit(lockp);
852 	sctp->sctp_bind_lockp = NULL;
853 }
854 
855 /*
856  * Similar to but more general than ip_sctp's conn_match().
857  *
858  * Matches sets of addresses as follows: if the argument addr set is
859  * a complete subset of the corresponding addr set in the sctp_t, it
860  * is a match.
861  *
862  * Caller must hold tf->tf_lock.
863  *
864  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
865  */
866 sctp_t *
867 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
868     int min_state)
869 {
870 
871 	sctp_t *sctp;
872 	sctp_faddr_t *fp;
873 
874 	ASSERT(MUTEX_HELD(&tf->tf_lock));
875 
876 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
877 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
878 		    min_state) {
879 			continue;
880 		}
881 
882 		/* check for faddr match */
883 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
884 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
885 				break;
886 			}
887 		}
888 
889 		if (!fp) {
890 			/* no faddr match; keep looking */
891 			continue;
892 		}
893 
894 		/* check for laddr subset match */
895 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
896 			goto done;
897 		}
898 
899 		/* no match; continue searching */
900 	}
901 
902 done:
903 	if (sctp) {
904 		SCTP_REFHOLD(sctp);
905 	}
906 	return (sctp);
907 }
908 
909 boolean_t
910 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
911 {
912 	uint16_t lport;
913 
914 	if (connp->conn_fully_bound) {
915 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
916 		    ipha->ipha_dst, ports));
917 	} else {
918 		lport = htons(ntohl(ports) & 0xFFFF);
919 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
920 		    lport));
921 	}
922 }
923 
924 boolean_t
925 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
926     boolean_t for_v4)
927 {
928 	uint16_t lport;
929 	in6_addr_t	v6dst;
930 
931 	if (!for_v4 && connp->conn_fully_bound) {
932 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
933 		    ip6h->ip6_dst, ports));
934 	} else {
935 		lport = htons(ntohl(ports) & 0xFFFF);
936 		if (for_v4)
937 			v6dst = ipv6_all_zeros;
938 		else
939 			v6dst = ip6h->ip6_dst;
940 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
941 	}
942 }
943