xref: /titanic_52/usr/src/uts/common/inet/ip/ip_dce.c (revision 1a5e258f5471356ca102c7176637cdce45bac147)
1bd670b35SErik Nordmark /*
2bd670b35SErik Nordmark  * CDDL HEADER START
3bd670b35SErik Nordmark  *
4bd670b35SErik Nordmark  * The contents of this file are subject to the terms of the
5bd670b35SErik Nordmark  * Common Development and Distribution License (the "License").
6bd670b35SErik Nordmark  * You may not use this file except in compliance with the License.
7bd670b35SErik Nordmark  *
8bd670b35SErik Nordmark  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9bd670b35SErik Nordmark  * or http://www.opensolaris.org/os/licensing.
10bd670b35SErik Nordmark  * See the License for the specific language governing permissions
11bd670b35SErik Nordmark  * and limitations under the License.
12bd670b35SErik Nordmark  *
13bd670b35SErik Nordmark  * When distributing Covered Code, include this CDDL HEADER in each
14bd670b35SErik Nordmark  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15bd670b35SErik Nordmark  * If applicable, add the following below this CDDL HEADER, with the
16bd670b35SErik Nordmark  * fields enclosed by brackets "[]" replaced with your own identifying
17bd670b35SErik Nordmark  * information: Portions Copyright [yyyy] [name of copyright owner]
18bd670b35SErik Nordmark  *
19bd670b35SErik Nordmark  * CDDL HEADER END
20bd670b35SErik Nordmark  */
21bd670b35SErik Nordmark 
22bd670b35SErik Nordmark /*
231eee170aSErik Nordmark  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
247c6d7024SJerry Jelinek  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
25bd670b35SErik Nordmark  */
26bd670b35SErik Nordmark 
27bd670b35SErik Nordmark #include <sys/types.h>
28bd670b35SErik Nordmark #include <sys/stream.h>
29bd670b35SErik Nordmark #include <sys/strsun.h>
30bd670b35SErik Nordmark #include <sys/zone.h>
31bd670b35SErik Nordmark #include <sys/ddi.h>
327c6d7024SJerry Jelinek #include <sys/disp.h>
33bd670b35SErik Nordmark #include <sys/sunddi.h>
34bd670b35SErik Nordmark #include <sys/cmn_err.h>
35bd670b35SErik Nordmark #include <sys/debug.h>
36bd670b35SErik Nordmark #include <sys/atomic.h>
377c6d7024SJerry Jelinek #include <sys/callb.h>
38bd670b35SErik Nordmark #define	_SUN_TPI_VERSION 2
39bd670b35SErik Nordmark #include <sys/tihdr.h>
40bd670b35SErik Nordmark 
41bd670b35SErik Nordmark #include <inet/common.h>
42bd670b35SErik Nordmark #include <inet/mi.h>
43bd670b35SErik Nordmark #include <inet/mib2.h>
44bd670b35SErik Nordmark #include <inet/snmpcom.h>
45bd670b35SErik Nordmark 
46bd670b35SErik Nordmark #include <netinet/ip6.h>
47bd670b35SErik Nordmark #include <netinet/icmp6.h>
48bd670b35SErik Nordmark 
49bd670b35SErik Nordmark #include <inet/ip.h>
50bd670b35SErik Nordmark #include <inet/ip_impl.h>
51bd670b35SErik Nordmark #include <inet/ip6.h>
52bd670b35SErik Nordmark #include <inet/ip6_asp.h>
53bd670b35SErik Nordmark #include <inet/ip_multi.h>
54bd670b35SErik Nordmark #include <inet/ip_if.h>
55bd670b35SErik Nordmark #include <inet/ip_ire.h>
56bd670b35SErik Nordmark #include <inet/ip_ftable.h>
57bd670b35SErik Nordmark #include <inet/ip_rts.h>
58bd670b35SErik Nordmark #include <inet/ip_ndp.h>
59bd670b35SErik Nordmark #include <inet/ipclassifier.h>
60bd670b35SErik Nordmark #include <inet/ip_listutils.h>
61bd670b35SErik Nordmark 
62bd670b35SErik Nordmark #include <sys/sunddi.h>
63bd670b35SErik Nordmark 
64bd670b35SErik Nordmark /*
65bd670b35SErik Nordmark  * Routines for handling destination cache entries.
66bd670b35SErik Nordmark  * There is always one DCEF_DEFAULT for each ip_stack_t created at init time.
67bd670b35SErik Nordmark  * That entry holds both the IP ident value and the dce generation number.
68bd670b35SErik Nordmark  *
69bd670b35SErik Nordmark  * Any time a DCE is changed significantly (different path MTU, but NOT
70bd670b35SErik Nordmark  * different ULP info!), the dce_generation number is increased.
71bd670b35SErik Nordmark  * Also, when a new DCE is created, the dce_generation number in the default
72bd670b35SErik Nordmark  * DCE is bumped. That allows the dce_t information to be cached efficiently
73bd670b35SErik Nordmark  * as long as the entity caching the dce_t also caches the dce_generation,
74bd670b35SErik Nordmark  * and compares the cached generation to detect any changes.
75bd670b35SErik Nordmark  * Furthermore, when a DCE is deleted, if there are any outstanding references
76bd670b35SErik Nordmark  * to the DCE it will be marked as condemned. The condemned mark is
77bd670b35SErik Nordmark  * a designated generation number which is never otherwise used, hence
78bd670b35SErik Nordmark  * the single comparison with the generation number captures that as well.
79bd670b35SErik Nordmark  *
80bd670b35SErik Nordmark  * An example of code which caches is as follows:
81bd670b35SErik Nordmark  *
82bd670b35SErik Nordmark  *	if (mystruct->my_dce_generation != mystruct->my_dce->dce_generation) {
83bd670b35SErik Nordmark  *		The DCE has changed
84bd670b35SErik Nordmark  *		mystruct->my_dce = dce_lookup_pkt(mp, ixa,
85bd670b35SErik Nordmark  *		    &mystruct->my_dce_generation);
86bd670b35SErik Nordmark  *		Not needed in practice, since we have the default DCE:
87bd670b35SErik Nordmark  *		if (DCE_IS_CONDEMNED(mystruct->my_dce))
88bd670b35SErik Nordmark  *			return failure;
89bd670b35SErik Nordmark  *	}
90bd670b35SErik Nordmark  *
91bd670b35SErik Nordmark  * Note that for IPv6 link-local addresses we record the ifindex since the
92bd670b35SErik Nordmark  * link-locals are not globally unique.
93bd670b35SErik Nordmark  */
94bd670b35SErik Nordmark 
95bd670b35SErik Nordmark /*
96bd670b35SErik Nordmark  * Hash bucket structure for DCEs
97bd670b35SErik Nordmark  */
98bd670b35SErik Nordmark typedef struct dcb_s {
99bd670b35SErik Nordmark 	krwlock_t	dcb_lock;
100bd670b35SErik Nordmark 	uint32_t	dcb_cnt;
101bd670b35SErik Nordmark 	dce_t		*dcb_dce;
102bd670b35SErik Nordmark } dcb_t;
103bd670b35SErik Nordmark 
104bd670b35SErik Nordmark static void	dce_delete_locked(dcb_t *, dce_t *);
105bd670b35SErik Nordmark static void	dce_make_condemned(dce_t *);
106bd670b35SErik Nordmark 
107bd670b35SErik Nordmark static kmem_cache_t *dce_cache;
1087c6d7024SJerry Jelinek static kthread_t *dce_reclaim_thread;
1097c6d7024SJerry Jelinek static kmutex_t dce_reclaim_lock;
1107c6d7024SJerry Jelinek static kcondvar_t dce_reclaim_cv;
1117c6d7024SJerry Jelinek static int dce_reclaim_shutdown;
112bd670b35SErik Nordmark 
1137c6d7024SJerry Jelinek /* Global so it can be tuned in /etc/system. This must be a power of two. */
1147c6d7024SJerry Jelinek uint_t ip_dce_hash_size = 1024;
1157c6d7024SJerry Jelinek 
1167c6d7024SJerry Jelinek /* The time in seconds between executions of the IP DCE reclaim worker. */
1177c6d7024SJerry Jelinek uint_t ip_dce_reclaim_interval = 60;
1187c6d7024SJerry Jelinek 
1197c6d7024SJerry Jelinek /* The factor of the DCE threshold at which to start hard reclaims */
1207c6d7024SJerry Jelinek uint_t ip_dce_reclaim_threshold_hard = 2;
121bd670b35SErik Nordmark 
122bd670b35SErik Nordmark /* Operates on a uint64_t */
123bd670b35SErik Nordmark #define	RANDOM_HASH(p) ((p) ^ ((p)>>16) ^ ((p)>>32) ^ ((p)>>48))
124bd670b35SErik Nordmark 
125bd670b35SErik Nordmark /*
126bd670b35SErik Nordmark  * Reclaim a fraction of dce's in the dcb.
127bd670b35SErik Nordmark  * For now we have a higher probability to delete DCEs without DCE_PMTU.
128bd670b35SErik Nordmark  */
129bd670b35SErik Nordmark static void
130bd670b35SErik Nordmark dcb_reclaim(dcb_t *dcb, ip_stack_t *ipst, uint_t fraction)
131bd670b35SErik Nordmark {
132bd670b35SErik Nordmark 	uint_t	fraction_pmtu = fraction*4;
133bd670b35SErik Nordmark 	uint_t	hash;
134bd670b35SErik Nordmark 	dce_t	*dce, *nextdce;
1357c6d7024SJerry Jelinek 	hrtime_t seed = gethrtime();
1367c6d7024SJerry Jelinek 	uint_t	retained = 0;
1377c6d7024SJerry Jelinek 	uint_t	max = ipst->ips_ip_dce_reclaim_threshold;
1387c6d7024SJerry Jelinek 
1397c6d7024SJerry Jelinek 	max *= ip_dce_reclaim_threshold_hard;
140bd670b35SErik Nordmark 
141bd670b35SErik Nordmark 	rw_enter(&dcb->dcb_lock, RW_WRITER);
142bd670b35SErik Nordmark 	for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) {
143bd670b35SErik Nordmark 		nextdce = dce->dce_next;
144bd670b35SErik Nordmark 		/* Clear DCEF_PMTU if the pmtu is too old */
145bd670b35SErik Nordmark 		mutex_enter(&dce->dce_lock);
146bd670b35SErik Nordmark 		if ((dce->dce_flags & DCEF_PMTU) &&
147d3d50737SRafael Vanoni 		    TICK_TO_SEC(ddi_get_lbolt64()) - dce->dce_last_change_time >
148bd670b35SErik Nordmark 		    ipst->ips_ip_pathmtu_interval) {
149bd670b35SErik Nordmark 			dce->dce_flags &= ~DCEF_PMTU;
150bd670b35SErik Nordmark 			mutex_exit(&dce->dce_lock);
151bd670b35SErik Nordmark 			dce_increment_generation(dce);
152bd670b35SErik Nordmark 		} else {
153bd670b35SErik Nordmark 			mutex_exit(&dce->dce_lock);
154bd670b35SErik Nordmark 		}
1557c6d7024SJerry Jelinek 
1567c6d7024SJerry Jelinek 		if (max == 0 || retained < max) {
1577c6d7024SJerry Jelinek 			hash = RANDOM_HASH((uint64_t)((uintptr_t)dce | seed));
1587c6d7024SJerry Jelinek 
159bd670b35SErik Nordmark 			if (dce->dce_flags & DCEF_PMTU) {
1607c6d7024SJerry Jelinek 				if (hash % fraction_pmtu != 0) {
1617c6d7024SJerry Jelinek 					retained++;
162bd670b35SErik Nordmark 					continue;
1637c6d7024SJerry Jelinek 				}
164bd670b35SErik Nordmark 			} else {
1657c6d7024SJerry Jelinek 				if (hash % fraction != 0) {
1667c6d7024SJerry Jelinek 					retained++;
167bd670b35SErik Nordmark 					continue;
168bd670b35SErik Nordmark 				}
1697c6d7024SJerry Jelinek 			}
1707c6d7024SJerry Jelinek 		}
171bd670b35SErik Nordmark 
172bd670b35SErik Nordmark 		IP_STAT(ipst, ip_dce_reclaim_deleted);
173bd670b35SErik Nordmark 		dce_delete_locked(dcb, dce);
174bd670b35SErik Nordmark 		dce_refrele(dce);
175bd670b35SErik Nordmark 	}
176bd670b35SErik Nordmark 	rw_exit(&dcb->dcb_lock);
177bd670b35SErik Nordmark }
178bd670b35SErik Nordmark 
179bd670b35SErik Nordmark /*
180bd670b35SErik Nordmark  * kmem_cache callback to free up memory.
181bd670b35SErik Nordmark  *
182bd670b35SErik Nordmark  */
183bd670b35SErik Nordmark static void
184bd670b35SErik Nordmark ip_dce_reclaim_stack(ip_stack_t *ipst)
185bd670b35SErik Nordmark {
186bd670b35SErik Nordmark 	int	i;
187bd670b35SErik Nordmark 
188bd670b35SErik Nordmark 	IP_STAT(ipst, ip_dce_reclaim_calls);
189bd670b35SErik Nordmark 	for (i = 0; i < ipst->ips_dce_hashsize; i++) {
190bd670b35SErik Nordmark 		dcb_reclaim(&ipst->ips_dce_hash_v4[i], ipst,
191bd670b35SErik Nordmark 		    ipst->ips_ip_dce_reclaim_fraction);
192bd670b35SErik Nordmark 
193bd670b35SErik Nordmark 		dcb_reclaim(&ipst->ips_dce_hash_v6[i], ipst,
194bd670b35SErik Nordmark 		    ipst->ips_ip_dce_reclaim_fraction);
195bd670b35SErik Nordmark 	}
196bd670b35SErik Nordmark 
197bd670b35SErik Nordmark 	/*
198bd670b35SErik Nordmark 	 * Walk all CONNs that can have a reference on an ire, nce or dce.
199bd670b35SErik Nordmark 	 * Get them to update any stale references to drop any refholds they
200bd670b35SErik Nordmark 	 * have.
201bd670b35SErik Nordmark 	 */
202bd670b35SErik Nordmark 	ipcl_walk(conn_ixa_cleanup, (void *)B_FALSE, ipst);
203bd670b35SErik Nordmark }
204bd670b35SErik Nordmark 
205bd670b35SErik Nordmark /*
2067c6d7024SJerry Jelinek  * Called by dce_reclaim_worker() below, and no one else.  Typically this will
2077c6d7024SJerry Jelinek  * mean that the number of entries in the hash buckets has exceeded a tunable
2087c6d7024SJerry Jelinek  * threshold.
209bd670b35SErik Nordmark  */
2107c6d7024SJerry Jelinek static void
2117c6d7024SJerry Jelinek ip_dce_reclaim(void)
212bd670b35SErik Nordmark {
213bd670b35SErik Nordmark 	netstack_handle_t nh;
214bd670b35SErik Nordmark 	netstack_t *ns;
2154ba231ceSKacheong Poon 	ip_stack_t *ipst;
216bd670b35SErik Nordmark 
2177c6d7024SJerry Jelinek 	ASSERT(curthread == dce_reclaim_thread);
2187c6d7024SJerry Jelinek 
219bd670b35SErik Nordmark 	netstack_next_init(&nh);
220bd670b35SErik Nordmark 	while ((ns = netstack_next(&nh)) != NULL) {
2214ba231ceSKacheong Poon 		/*
2224ba231ceSKacheong Poon 		 * netstack_next() can return a netstack_t with a NULL
2234ba231ceSKacheong Poon 		 * netstack_ip at boot time.
2244ba231ceSKacheong Poon 		 */
2254ba231ceSKacheong Poon 		if ((ipst = ns->netstack_ip) == NULL) {
2264ba231ceSKacheong Poon 			netstack_rele(ns);
2274ba231ceSKacheong Poon 			continue;
2284ba231ceSKacheong Poon 		}
2297c6d7024SJerry Jelinek 		if (atomic_swap_uint(&ipst->ips_dce_reclaim_needed, 0) != 0)
2304ba231ceSKacheong Poon 			ip_dce_reclaim_stack(ipst);
231bd670b35SErik Nordmark 		netstack_rele(ns);
232bd670b35SErik Nordmark 	}
233bd670b35SErik Nordmark 	netstack_next_fini(&nh);
234bd670b35SErik Nordmark }
235bd670b35SErik Nordmark 
2367c6d7024SJerry Jelinek /* ARGSUSED */
2377c6d7024SJerry Jelinek static void
2387c6d7024SJerry Jelinek dce_reclaim_worker(void *arg)
2397c6d7024SJerry Jelinek {
2407c6d7024SJerry Jelinek 	callb_cpr_t	cprinfo;
2417c6d7024SJerry Jelinek 
2427c6d7024SJerry Jelinek 	CALLB_CPR_INIT(&cprinfo, &dce_reclaim_lock, callb_generic_cpr,
2437c6d7024SJerry Jelinek 	    "dce_reclaim_worker");
2447c6d7024SJerry Jelinek 
2457c6d7024SJerry Jelinek 	mutex_enter(&dce_reclaim_lock);
2467c6d7024SJerry Jelinek 	while (!dce_reclaim_shutdown) {
2477c6d7024SJerry Jelinek 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
2487c6d7024SJerry Jelinek 		(void) cv_timedwait(&dce_reclaim_cv, &dce_reclaim_lock,
2497c6d7024SJerry Jelinek 		    ddi_get_lbolt() + ip_dce_reclaim_interval * hz);
2507c6d7024SJerry Jelinek 		CALLB_CPR_SAFE_END(&cprinfo, &dce_reclaim_lock);
2517c6d7024SJerry Jelinek 
2527c6d7024SJerry Jelinek 		if (dce_reclaim_shutdown)
2537c6d7024SJerry Jelinek 			break;
2547c6d7024SJerry Jelinek 
2557c6d7024SJerry Jelinek 		mutex_exit(&dce_reclaim_lock);
2567c6d7024SJerry Jelinek 		ip_dce_reclaim();
2577c6d7024SJerry Jelinek 		mutex_enter(&dce_reclaim_lock);
2587c6d7024SJerry Jelinek 	}
2597c6d7024SJerry Jelinek 
2607c6d7024SJerry Jelinek 	ASSERT(MUTEX_HELD(&dce_reclaim_lock));
2617c6d7024SJerry Jelinek 	dce_reclaim_thread = NULL;
2627c6d7024SJerry Jelinek 	dce_reclaim_shutdown = 0;
2637c6d7024SJerry Jelinek 	cv_broadcast(&dce_reclaim_cv);
2647c6d7024SJerry Jelinek 	CALLB_CPR_EXIT(&cprinfo);	/* drops the lock */
2657c6d7024SJerry Jelinek 
2667c6d7024SJerry Jelinek 	thread_exit();
2677c6d7024SJerry Jelinek }
2687c6d7024SJerry Jelinek 
269bd670b35SErik Nordmark void
270bd670b35SErik Nordmark dce_g_init(void)
271bd670b35SErik Nordmark {
272bd670b35SErik Nordmark 	dce_cache = kmem_cache_create("dce_cache",
2737c6d7024SJerry Jelinek 	    sizeof (dce_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
2747c6d7024SJerry Jelinek 
2757c6d7024SJerry Jelinek 	mutex_init(&dce_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
2767c6d7024SJerry Jelinek 	cv_init(&dce_reclaim_cv, NULL, CV_DEFAULT, NULL);
2777c6d7024SJerry Jelinek 
2787c6d7024SJerry Jelinek 	dce_reclaim_thread = thread_create(NULL, 0, dce_reclaim_worker,
2797c6d7024SJerry Jelinek 	    NULL, 0, &p0, TS_RUN, minclsyspri);
280bd670b35SErik Nordmark }
281bd670b35SErik Nordmark 
282bd670b35SErik Nordmark void
283bd670b35SErik Nordmark dce_g_destroy(void)
284bd670b35SErik Nordmark {
2857c6d7024SJerry Jelinek 	mutex_enter(&dce_reclaim_lock);
2867c6d7024SJerry Jelinek 	dce_reclaim_shutdown = 1;
2877c6d7024SJerry Jelinek 	cv_signal(&dce_reclaim_cv);
2887c6d7024SJerry Jelinek 	while (dce_reclaim_thread != NULL)
2897c6d7024SJerry Jelinek 		cv_wait(&dce_reclaim_cv, &dce_reclaim_lock);
2907c6d7024SJerry Jelinek 	mutex_exit(&dce_reclaim_lock);
2917c6d7024SJerry Jelinek 
2927c6d7024SJerry Jelinek 	cv_destroy(&dce_reclaim_cv);
2937c6d7024SJerry Jelinek 	mutex_destroy(&dce_reclaim_lock);
2947c6d7024SJerry Jelinek 
295bd670b35SErik Nordmark 	kmem_cache_destroy(dce_cache);
296bd670b35SErik Nordmark }
297bd670b35SErik Nordmark 
298bd670b35SErik Nordmark /*
299bd670b35SErik Nordmark  * Allocate a default DCE and a hash table for per-IP address DCEs
300bd670b35SErik Nordmark  */
301bd670b35SErik Nordmark void
302bd670b35SErik Nordmark dce_stack_init(ip_stack_t *ipst)
303bd670b35SErik Nordmark {
304bd670b35SErik Nordmark 	int	i;
305bd670b35SErik Nordmark 
306bd670b35SErik Nordmark 	ipst->ips_dce_default = kmem_cache_alloc(dce_cache, KM_SLEEP);
307bd670b35SErik Nordmark 	bzero(ipst->ips_dce_default, sizeof (dce_t));
308bd670b35SErik Nordmark 	ipst->ips_dce_default->dce_flags = DCEF_DEFAULT;
309bd670b35SErik Nordmark 	ipst->ips_dce_default->dce_generation = DCE_GENERATION_INITIAL;
310d3d50737SRafael Vanoni 	ipst->ips_dce_default->dce_last_change_time =
311d3d50737SRafael Vanoni 	    TICK_TO_SEC(ddi_get_lbolt64());
312bd670b35SErik Nordmark 	ipst->ips_dce_default->dce_refcnt = 1;	/* Should never go away */
313bd670b35SErik Nordmark 	ipst->ips_dce_default->dce_ipst = ipst;
314bd670b35SErik Nordmark 
315bd670b35SErik Nordmark 	/* This must be a power of two since we are using IRE_ADDR_HASH macro */
3167c6d7024SJerry Jelinek 	ipst->ips_dce_hashsize = ip_dce_hash_size;
317bd670b35SErik Nordmark 	ipst->ips_dce_hash_v4 = kmem_zalloc(ipst->ips_dce_hashsize *
318bd670b35SErik Nordmark 	    sizeof (dcb_t), KM_SLEEP);
319bd670b35SErik Nordmark 	ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize *
320bd670b35SErik Nordmark 	    sizeof (dcb_t), KM_SLEEP);
321bd670b35SErik Nordmark 	for (i = 0; i < ipst->ips_dce_hashsize; i++) {
322bd670b35SErik Nordmark 		rw_init(&ipst->ips_dce_hash_v4[i].dcb_lock, NULL, RW_DEFAULT,
323bd670b35SErik Nordmark 		    NULL);
324bd670b35SErik Nordmark 		rw_init(&ipst->ips_dce_hash_v6[i].dcb_lock, NULL, RW_DEFAULT,
325bd670b35SErik Nordmark 		    NULL);
326bd670b35SErik Nordmark 	}
327bd670b35SErik Nordmark }
328bd670b35SErik Nordmark 
329bd670b35SErik Nordmark void
330bd670b35SErik Nordmark dce_stack_destroy(ip_stack_t *ipst)
331bd670b35SErik Nordmark {
332bd670b35SErik Nordmark 	int i;
333bd670b35SErik Nordmark 	for (i = 0; i < ipst->ips_dce_hashsize; i++) {
334bd670b35SErik Nordmark 		rw_destroy(&ipst->ips_dce_hash_v4[i].dcb_lock);
335bd670b35SErik Nordmark 		rw_destroy(&ipst->ips_dce_hash_v6[i].dcb_lock);
336bd670b35SErik Nordmark 	}
337bd670b35SErik Nordmark 	kmem_free(ipst->ips_dce_hash_v4,
338bd670b35SErik Nordmark 	    ipst->ips_dce_hashsize * sizeof (dcb_t));
339bd670b35SErik Nordmark 	ipst->ips_dce_hash_v4 = NULL;
340bd670b35SErik Nordmark 	kmem_free(ipst->ips_dce_hash_v6,
341bd670b35SErik Nordmark 	    ipst->ips_dce_hashsize * sizeof (dcb_t));
342bd670b35SErik Nordmark 	ipst->ips_dce_hash_v6 = NULL;
343bd670b35SErik Nordmark 	ipst->ips_dce_hashsize = 0;
344bd670b35SErik Nordmark 
345bd670b35SErik Nordmark 	ASSERT(ipst->ips_dce_default->dce_refcnt == 1);
346bd670b35SErik Nordmark 	kmem_cache_free(dce_cache, ipst->ips_dce_default);
347bd670b35SErik Nordmark 	ipst->ips_dce_default = NULL;
348bd670b35SErik Nordmark }
349bd670b35SErik Nordmark 
350bd670b35SErik Nordmark /* When any DCE is good enough */
351bd670b35SErik Nordmark dce_t *
352bd670b35SErik Nordmark dce_get_default(ip_stack_t *ipst)
353bd670b35SErik Nordmark {
354bd670b35SErik Nordmark 	dce_t		*dce;
355bd670b35SErik Nordmark 
356bd670b35SErik Nordmark 	dce = ipst->ips_dce_default;
357bd670b35SErik Nordmark 	dce_refhold(dce);
358bd670b35SErik Nordmark 	return (dce);
359bd670b35SErik Nordmark }
360bd670b35SErik Nordmark 
361bd670b35SErik Nordmark /*
362bd670b35SErik Nordmark  * Generic for IPv4 and IPv6.
363bd670b35SErik Nordmark  *
364bd670b35SErik Nordmark  * Used by callers that need to cache e.g., the datapath
365bd670b35SErik Nordmark  * Returns the generation number in the last argument.
366bd670b35SErik Nordmark  */
367bd670b35SErik Nordmark dce_t *
368bd670b35SErik Nordmark dce_lookup_pkt(mblk_t *mp, ip_xmit_attr_t *ixa, uint_t *generationp)
369bd670b35SErik Nordmark {
370bd670b35SErik Nordmark 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
371bd670b35SErik Nordmark 		/*
372bd670b35SErik Nordmark 		 * If we have a source route we need to look for the final
373bd670b35SErik Nordmark 		 * destination in the source route option.
374bd670b35SErik Nordmark 		 */
375bd670b35SErik Nordmark 		ipaddr_t final_dst;
376bd670b35SErik Nordmark 		ipha_t *ipha = (ipha_t *)mp->b_rptr;
377bd670b35SErik Nordmark 
378bd670b35SErik Nordmark 		final_dst = ip_get_dst(ipha);
379bd670b35SErik Nordmark 		return (dce_lookup_v4(final_dst, ixa->ixa_ipst, generationp));
380bd670b35SErik Nordmark 	} else {
381bd670b35SErik Nordmark 		uint_t ifindex;
382bd670b35SErik Nordmark 		/*
383bd670b35SErik Nordmark 		 * If we have a routing header we need to look for the final
384bd670b35SErik Nordmark 		 * destination in the routing extension header.
385bd670b35SErik Nordmark 		 */
386bd670b35SErik Nordmark 		in6_addr_t final_dst;
387bd670b35SErik Nordmark 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
388bd670b35SErik Nordmark 
389bd670b35SErik Nordmark 		final_dst = ip_get_dst_v6(ip6h, mp, NULL);
390bd670b35SErik Nordmark 		ifindex = 0;
391bd670b35SErik Nordmark 		if (IN6_IS_ADDR_LINKSCOPE(&final_dst) && ixa->ixa_nce != NULL) {
392bd670b35SErik Nordmark 			ifindex = ixa->ixa_nce->nce_common->ncec_ill->
393bd670b35SErik Nordmark 			    ill_phyint->phyint_ifindex;
394bd670b35SErik Nordmark 		}
395bd670b35SErik Nordmark 		return (dce_lookup_v6(&final_dst, ifindex, ixa->ixa_ipst,
396bd670b35SErik Nordmark 		    generationp));
397bd670b35SErik Nordmark 	}
398bd670b35SErik Nordmark }
399bd670b35SErik Nordmark 
400bd670b35SErik Nordmark /*
401bd670b35SErik Nordmark  * Used by callers that need to cache e.g., the datapath
402bd670b35SErik Nordmark  * Returns the generation number in the last argument.
403bd670b35SErik Nordmark  */
404bd670b35SErik Nordmark dce_t *
405bd670b35SErik Nordmark dce_lookup_v4(ipaddr_t dst, ip_stack_t *ipst, uint_t *generationp)
406bd670b35SErik Nordmark {
407bd670b35SErik Nordmark 	uint_t		hash;
408bd670b35SErik Nordmark 	dcb_t		*dcb;
409bd670b35SErik Nordmark 	dce_t		*dce;
410bd670b35SErik Nordmark 
411bd670b35SErik Nordmark 	/* Set *generationp before dropping the lock(s) that allow additions */
412bd670b35SErik Nordmark 	if (generationp != NULL)
413bd670b35SErik Nordmark 		*generationp = ipst->ips_dce_default->dce_generation;
414bd670b35SErik Nordmark 
415bd670b35SErik Nordmark 	hash = IRE_ADDR_HASH(dst, ipst->ips_dce_hashsize);
416bd670b35SErik Nordmark 	dcb = &ipst->ips_dce_hash_v4[hash];
417bd670b35SErik Nordmark 	rw_enter(&dcb->dcb_lock, RW_READER);
418bd670b35SErik Nordmark 	for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
419bd670b35SErik Nordmark 		if (dce->dce_v4addr == dst) {
420bd670b35SErik Nordmark 			mutex_enter(&dce->dce_lock);
421bd670b35SErik Nordmark 			if (!DCE_IS_CONDEMNED(dce)) {
422bd670b35SErik Nordmark 				dce_refhold(dce);
423bd670b35SErik Nordmark 				if (generationp != NULL)
424bd670b35SErik Nordmark 					*generationp = dce->dce_generation;
425bd670b35SErik Nordmark 				mutex_exit(&dce->dce_lock);
426bd670b35SErik Nordmark 				rw_exit(&dcb->dcb_lock);
427bd670b35SErik Nordmark 				return (dce);
428bd670b35SErik Nordmark 			}
429bd670b35SErik Nordmark 			mutex_exit(&dce->dce_lock);
430bd670b35SErik Nordmark 		}
431bd670b35SErik Nordmark 	}
432bd670b35SErik Nordmark 	rw_exit(&dcb->dcb_lock);
433bd670b35SErik Nordmark 	/* Not found */
434bd670b35SErik Nordmark 	dce = ipst->ips_dce_default;
435bd670b35SErik Nordmark 	dce_refhold(dce);
436bd670b35SErik Nordmark 	return (dce);
437bd670b35SErik Nordmark }
438bd670b35SErik Nordmark 
439bd670b35SErik Nordmark /*
440bd670b35SErik Nordmark  * Used by callers that need to cache e.g., the datapath
441bd670b35SErik Nordmark  * Returns the generation number in the last argument.
442bd670b35SErik Nordmark  * ifindex should only be set for link-locals
443bd670b35SErik Nordmark  */
444bd670b35SErik Nordmark dce_t *
445bd670b35SErik Nordmark dce_lookup_v6(const in6_addr_t *dst, uint_t ifindex, ip_stack_t *ipst,
446bd670b35SErik Nordmark     uint_t *generationp)
447bd670b35SErik Nordmark {
448bd670b35SErik Nordmark 	uint_t		hash;
449bd670b35SErik Nordmark 	dcb_t		*dcb;
450bd670b35SErik Nordmark 	dce_t		*dce;
451bd670b35SErik Nordmark 
452bd670b35SErik Nordmark 	/* Set *generationp before dropping the lock(s) that allow additions */
453bd670b35SErik Nordmark 	if (generationp != NULL)
454bd670b35SErik Nordmark 		*generationp = ipst->ips_dce_default->dce_generation;
455bd670b35SErik Nordmark 
456bd670b35SErik Nordmark 	hash = IRE_ADDR_HASH_V6(*dst, ipst->ips_dce_hashsize);
457bd670b35SErik Nordmark 	dcb = &ipst->ips_dce_hash_v6[hash];
458bd670b35SErik Nordmark 	rw_enter(&dcb->dcb_lock, RW_READER);
459bd670b35SErik Nordmark 	for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
460bd670b35SErik Nordmark 		if (IN6_ARE_ADDR_EQUAL(&dce->dce_v6addr, dst) &&
461bd670b35SErik Nordmark 		    dce->dce_ifindex == ifindex) {
462bd670b35SErik Nordmark 			mutex_enter(&dce->dce_lock);
463bd670b35SErik Nordmark 			if (!DCE_IS_CONDEMNED(dce)) {
464bd670b35SErik Nordmark 				dce_refhold(dce);
465bd670b35SErik Nordmark 				if (generationp != NULL)
466bd670b35SErik Nordmark 					*generationp = dce->dce_generation;
467bd670b35SErik Nordmark 				mutex_exit(&dce->dce_lock);
468bd670b35SErik Nordmark 				rw_exit(&dcb->dcb_lock);
469bd670b35SErik Nordmark 				return (dce);
470bd670b35SErik Nordmark 			}
471bd670b35SErik Nordmark 			mutex_exit(&dce->dce_lock);
472bd670b35SErik Nordmark 		}
473bd670b35SErik Nordmark 	}
474bd670b35SErik Nordmark 	rw_exit(&dcb->dcb_lock);
475bd670b35SErik Nordmark 	/* Not found */
476bd670b35SErik Nordmark 	dce = ipst->ips_dce_default;
477bd670b35SErik Nordmark 	dce_refhold(dce);
478bd670b35SErik Nordmark 	return (dce);
479bd670b35SErik Nordmark }
480bd670b35SErik Nordmark 
481bd670b35SErik Nordmark /*
482bd670b35SErik Nordmark  * Atomically looks for a non-default DCE, and if not found tries to create one.
483bd670b35SErik Nordmark  * If there is no memory it returns NULL.
484bd670b35SErik Nordmark  * When an entry is created we increase the generation number on
485bd670b35SErik Nordmark  * the default DCE so that conn_ip_output will detect there is a new DCE.
486bd670b35SErik Nordmark  */
487bd670b35SErik Nordmark dce_t *
488bd670b35SErik Nordmark dce_lookup_and_add_v4(ipaddr_t dst, ip_stack_t *ipst)
489bd670b35SErik Nordmark {
490bd670b35SErik Nordmark 	uint_t		hash;
491bd670b35SErik Nordmark 	dcb_t		*dcb;
492bd670b35SErik Nordmark 	dce_t		*dce;
493bd670b35SErik Nordmark 
494bd670b35SErik Nordmark 	hash = IRE_ADDR_HASH(dst, ipst->ips_dce_hashsize);
495bd670b35SErik Nordmark 	dcb = &ipst->ips_dce_hash_v4[hash];
4967c6d7024SJerry Jelinek 	/*
4977c6d7024SJerry Jelinek 	 * Assuming that we get fairly even distribution across all of the
4987c6d7024SJerry Jelinek 	 * buckets, once one bucket is overly full, prune the whole cache.
4997c6d7024SJerry Jelinek 	 */
5007c6d7024SJerry Jelinek 	if (dcb->dcb_cnt > ipst->ips_ip_dce_reclaim_threshold)
5017c6d7024SJerry Jelinek 		atomic_or_uint(&ipst->ips_dce_reclaim_needed, 1);
502bd670b35SErik Nordmark 	rw_enter(&dcb->dcb_lock, RW_WRITER);
503bd670b35SErik Nordmark 	for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
504bd670b35SErik Nordmark 		if (dce->dce_v4addr == dst) {
505bd670b35SErik Nordmark 			mutex_enter(&dce->dce_lock);
506bd670b35SErik Nordmark 			if (!DCE_IS_CONDEMNED(dce)) {
507bd670b35SErik Nordmark 				dce_refhold(dce);
508bd670b35SErik Nordmark 				mutex_exit(&dce->dce_lock);
509bd670b35SErik Nordmark 				rw_exit(&dcb->dcb_lock);
510bd670b35SErik Nordmark 				return (dce);
511bd670b35SErik Nordmark 			}
512bd670b35SErik Nordmark 			mutex_exit(&dce->dce_lock);
513bd670b35SErik Nordmark 		}
514bd670b35SErik Nordmark 	}
515bd670b35SErik Nordmark 	dce = kmem_cache_alloc(dce_cache, KM_NOSLEEP);
516bd670b35SErik Nordmark 	if (dce == NULL) {
517bd670b35SErik Nordmark 		rw_exit(&dcb->dcb_lock);
518bd670b35SErik Nordmark 		return (NULL);
519bd670b35SErik Nordmark 	}
520bd670b35SErik Nordmark 	bzero(dce, sizeof (dce_t));
521bd670b35SErik Nordmark 	dce->dce_ipst = ipst;	/* No netstack_hold */
522bd670b35SErik Nordmark 	dce->dce_v4addr = dst;
523bd670b35SErik Nordmark 	dce->dce_generation = DCE_GENERATION_INITIAL;
524bd670b35SErik Nordmark 	dce->dce_ipversion = IPV4_VERSION;
525d3d50737SRafael Vanoni 	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
526bd670b35SErik Nordmark 	dce_refhold(dce);	/* For the hash list */
527bd670b35SErik Nordmark 
528bd670b35SErik Nordmark 	/* Link into list */
529bd670b35SErik Nordmark 	if (dcb->dcb_dce != NULL)
530bd670b35SErik Nordmark 		dcb->dcb_dce->dce_ptpn = &dce->dce_next;
531bd670b35SErik Nordmark 	dce->dce_next = dcb->dcb_dce;
532bd670b35SErik Nordmark 	dce->dce_ptpn = &dcb->dcb_dce;
533bd670b35SErik Nordmark 	dcb->dcb_dce = dce;
534bd670b35SErik Nordmark 	dce->dce_bucket = dcb;
535*1a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&dcb->dcb_cnt);
536bd670b35SErik Nordmark 	dce_refhold(dce);	/* For the caller */
537bd670b35SErik Nordmark 	rw_exit(&dcb->dcb_lock);
538bd670b35SErik Nordmark 
539bd670b35SErik Nordmark 	/* Initialize dce_ident to be different than for the last packet */
540bd670b35SErik Nordmark 	dce->dce_ident = ipst->ips_dce_default->dce_ident + 1;
541bd670b35SErik Nordmark 
542bd670b35SErik Nordmark 	dce_increment_generation(ipst->ips_dce_default);
543bd670b35SErik Nordmark 	return (dce);
544bd670b35SErik Nordmark }
545bd670b35SErik Nordmark 
546bd670b35SErik Nordmark /*
547bd670b35SErik Nordmark  * Atomically looks for a non-default DCE, and if not found tries to create one.
548bd670b35SErik Nordmark  * If there is no memory it returns NULL.
549bd670b35SErik Nordmark  * When an entry is created we increase the generation number on
550bd670b35SErik Nordmark  * the default DCE so that conn_ip_output will detect there is a new DCE.
551bd670b35SErik Nordmark  * ifindex should only be used with link-local addresses.
552bd670b35SErik Nordmark  */
553bd670b35SErik Nordmark dce_t *
554bd670b35SErik Nordmark dce_lookup_and_add_v6(const in6_addr_t *dst, uint_t ifindex, ip_stack_t *ipst)
555bd670b35SErik Nordmark {
556bd670b35SErik Nordmark 	uint_t		hash;
557bd670b35SErik Nordmark 	dcb_t		*dcb;
558bd670b35SErik Nordmark 	dce_t		*dce;
559bd670b35SErik Nordmark 
560bd670b35SErik Nordmark 	/* We should not create entries for link-locals w/o an ifindex */
561bd670b35SErik Nordmark 	ASSERT(!(IN6_IS_ADDR_LINKSCOPE(dst)) || ifindex != 0);
562bd670b35SErik Nordmark 
563bd670b35SErik Nordmark 	hash = IRE_ADDR_HASH_V6(*dst, ipst->ips_dce_hashsize);
564bd670b35SErik Nordmark 	dcb = &ipst->ips_dce_hash_v6[hash];
5657c6d7024SJerry Jelinek 	/*
5667c6d7024SJerry Jelinek 	 * Assuming that we get fairly even distribution across all of the
5677c6d7024SJerry Jelinek 	 * buckets, once one bucket is overly full, prune the whole cache.
5687c6d7024SJerry Jelinek 	 */
5697c6d7024SJerry Jelinek 	if (dcb->dcb_cnt > ipst->ips_ip_dce_reclaim_threshold)
5707c6d7024SJerry Jelinek 		atomic_or_uint(&ipst->ips_dce_reclaim_needed, 1);
571bd670b35SErik Nordmark 	rw_enter(&dcb->dcb_lock, RW_WRITER);
572bd670b35SErik Nordmark 	for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
573bd670b35SErik Nordmark 		if (IN6_ARE_ADDR_EQUAL(&dce->dce_v6addr, dst) &&
574bd670b35SErik Nordmark 		    dce->dce_ifindex == ifindex) {
575bd670b35SErik Nordmark 			mutex_enter(&dce->dce_lock);
576bd670b35SErik Nordmark 			if (!DCE_IS_CONDEMNED(dce)) {
577bd670b35SErik Nordmark 				dce_refhold(dce);
578bd670b35SErik Nordmark 				mutex_exit(&dce->dce_lock);
579bd670b35SErik Nordmark 				rw_exit(&dcb->dcb_lock);
580bd670b35SErik Nordmark 				return (dce);
581bd670b35SErik Nordmark 			}
582bd670b35SErik Nordmark 			mutex_exit(&dce->dce_lock);
583bd670b35SErik Nordmark 		}
584bd670b35SErik Nordmark 	}
585bd670b35SErik Nordmark 
586bd670b35SErik Nordmark 	dce = kmem_cache_alloc(dce_cache, KM_NOSLEEP);
587bd670b35SErik Nordmark 	if (dce == NULL) {
588bd670b35SErik Nordmark 		rw_exit(&dcb->dcb_lock);
589bd670b35SErik Nordmark 		return (NULL);
590bd670b35SErik Nordmark 	}
591bd670b35SErik Nordmark 	bzero(dce, sizeof (dce_t));
592bd670b35SErik Nordmark 	dce->dce_ipst = ipst;	/* No netstack_hold */
593bd670b35SErik Nordmark 	dce->dce_v6addr = *dst;
594bd670b35SErik Nordmark 	dce->dce_ifindex = ifindex;
595bd670b35SErik Nordmark 	dce->dce_generation = DCE_GENERATION_INITIAL;
596bd670b35SErik Nordmark 	dce->dce_ipversion = IPV6_VERSION;
597d3d50737SRafael Vanoni 	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
598bd670b35SErik Nordmark 	dce_refhold(dce);	/* For the hash list */
599bd670b35SErik Nordmark 
600bd670b35SErik Nordmark 	/* Link into list */
601bd670b35SErik Nordmark 	if (dcb->dcb_dce != NULL)
602bd670b35SErik Nordmark 		dcb->dcb_dce->dce_ptpn = &dce->dce_next;
603bd670b35SErik Nordmark 	dce->dce_next = dcb->dcb_dce;
604bd670b35SErik Nordmark 	dce->dce_ptpn = &dcb->dcb_dce;
605bd670b35SErik Nordmark 	dcb->dcb_dce = dce;
606bd670b35SErik Nordmark 	dce->dce_bucket = dcb;
607*1a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&dcb->dcb_cnt);
608bd670b35SErik Nordmark 	dce_refhold(dce);	/* For the caller */
609bd670b35SErik Nordmark 	rw_exit(&dcb->dcb_lock);
610bd670b35SErik Nordmark 
611bd670b35SErik Nordmark 	/* Initialize dce_ident to be different than for the last packet */
612bd670b35SErik Nordmark 	dce->dce_ident = ipst->ips_dce_default->dce_ident + 1;
613bd670b35SErik Nordmark 	dce_increment_generation(ipst->ips_dce_default);
614bd670b35SErik Nordmark 	return (dce);
615bd670b35SErik Nordmark }
616bd670b35SErik Nordmark 
617bd670b35SErik Nordmark /*
618bd670b35SErik Nordmark  * Set/update uinfo. Creates a per-destination dce if none exists.
619bd670b35SErik Nordmark  *
620bd670b35SErik Nordmark  * Note that we do not bump the generation number here.
621bd670b35SErik Nordmark  * New connections will find the new uinfo.
622bd670b35SErik Nordmark  *
623bd670b35SErik Nordmark  * The only use of this (tcp, sctp using iulp_t) is to set rtt+rtt_sd.
624bd670b35SErik Nordmark  */
625bd670b35SErik Nordmark static void
626bd670b35SErik Nordmark dce_setuinfo(dce_t *dce, iulp_t *uinfo)
627bd670b35SErik Nordmark {
628bd670b35SErik Nordmark 	/*
629bd670b35SErik Nordmark 	 * Update the round trip time estimate and/or the max frag size
630bd670b35SErik Nordmark 	 * and/or the slow start threshold.
631bd670b35SErik Nordmark 	 *
632bd670b35SErik Nordmark 	 * We serialize multiple advises using dce_lock.
633bd670b35SErik Nordmark 	 */
634bd670b35SErik Nordmark 	mutex_enter(&dce->dce_lock);
635bd670b35SErik Nordmark 	/* Gard against setting to zero */
636bd670b35SErik Nordmark 	if (uinfo->iulp_rtt != 0) {
637bd670b35SErik Nordmark 		/*
638bd670b35SErik Nordmark 		 * If there is no old cached values, initialize them
639bd670b35SErik Nordmark 		 * conservatively.  Set them to be (1.5 * new value).
640bd670b35SErik Nordmark 		 */
641bd670b35SErik Nordmark 		if (dce->dce_uinfo.iulp_rtt != 0) {
642bd670b35SErik Nordmark 			dce->dce_uinfo.iulp_rtt = (dce->dce_uinfo.iulp_rtt +
643bd670b35SErik Nordmark 			    uinfo->iulp_rtt) >> 1;
644bd670b35SErik Nordmark 		} else {
645bd670b35SErik Nordmark 			dce->dce_uinfo.iulp_rtt = uinfo->iulp_rtt +
646bd670b35SErik Nordmark 			    (uinfo->iulp_rtt >> 1);
647bd670b35SErik Nordmark 		}
648bd670b35SErik Nordmark 		if (dce->dce_uinfo.iulp_rtt_sd != 0) {
649bd670b35SErik Nordmark 			dce->dce_uinfo.iulp_rtt_sd =
650bd670b35SErik Nordmark 			    (dce->dce_uinfo.iulp_rtt_sd +
651bd670b35SErik Nordmark 			    uinfo->iulp_rtt_sd) >> 1;
652bd670b35SErik Nordmark 		} else {
653bd670b35SErik Nordmark 			dce->dce_uinfo.iulp_rtt_sd = uinfo->iulp_rtt_sd +
654bd670b35SErik Nordmark 			    (uinfo->iulp_rtt_sd >> 1);
655bd670b35SErik Nordmark 		}
656bd670b35SErik Nordmark 	}
657bd670b35SErik Nordmark 	if (uinfo->iulp_mtu != 0) {
658bd670b35SErik Nordmark 		if (dce->dce_flags & DCEF_PMTU) {
659bd670b35SErik Nordmark 			dce->dce_pmtu = MIN(uinfo->iulp_mtu, dce->dce_pmtu);
660bd670b35SErik Nordmark 		} else {
661bd670b35SErik Nordmark 			dce->dce_pmtu = MIN(uinfo->iulp_mtu, IP_MAXPACKET);
662bd670b35SErik Nordmark 			dce->dce_flags |= DCEF_PMTU;
663bd670b35SErik Nordmark 		}
664d3d50737SRafael Vanoni 		dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
665bd670b35SErik Nordmark 	}
666bd670b35SErik Nordmark 	if (uinfo->iulp_ssthresh != 0) {
667bd670b35SErik Nordmark 		if (dce->dce_uinfo.iulp_ssthresh != 0)
668bd670b35SErik Nordmark 			dce->dce_uinfo.iulp_ssthresh =
669bd670b35SErik Nordmark 			    (uinfo->iulp_ssthresh +
670bd670b35SErik Nordmark 			    dce->dce_uinfo.iulp_ssthresh) >> 1;
671bd670b35SErik Nordmark 		else
672bd670b35SErik Nordmark 			dce->dce_uinfo.iulp_ssthresh = uinfo->iulp_ssthresh;
673bd670b35SErik Nordmark 	}
674bd670b35SErik Nordmark 	/* We have uinfo for sure */
675bd670b35SErik Nordmark 	dce->dce_flags |= DCEF_UINFO;
676bd670b35SErik Nordmark 	mutex_exit(&dce->dce_lock);
677bd670b35SErik Nordmark }
678bd670b35SErik Nordmark 
679bd670b35SErik Nordmark 
680bd670b35SErik Nordmark int
681bd670b35SErik Nordmark dce_update_uinfo_v4(ipaddr_t dst, iulp_t *uinfo, ip_stack_t *ipst)
682bd670b35SErik Nordmark {
683bd670b35SErik Nordmark 	dce_t *dce;
684bd670b35SErik Nordmark 
685bd670b35SErik Nordmark 	dce = dce_lookup_and_add_v4(dst, ipst);
686bd670b35SErik Nordmark 	if (dce == NULL)
687bd670b35SErik Nordmark 		return (ENOMEM);
688bd670b35SErik Nordmark 
689bd670b35SErik Nordmark 	dce_setuinfo(dce, uinfo);
690bd670b35SErik Nordmark 	dce_refrele(dce);
691bd670b35SErik Nordmark 	return (0);
692bd670b35SErik Nordmark }
693bd670b35SErik Nordmark 
694bd670b35SErik Nordmark int
695bd670b35SErik Nordmark dce_update_uinfo_v6(const in6_addr_t *dst, uint_t ifindex, iulp_t *uinfo,
696bd670b35SErik Nordmark     ip_stack_t *ipst)
697bd670b35SErik Nordmark {
698bd670b35SErik Nordmark 	dce_t *dce;
699bd670b35SErik Nordmark 
700bd670b35SErik Nordmark 	dce = dce_lookup_and_add_v6(dst, ifindex, ipst);
701bd670b35SErik Nordmark 	if (dce == NULL)
702bd670b35SErik Nordmark 		return (ENOMEM);
703bd670b35SErik Nordmark 
704bd670b35SErik Nordmark 	dce_setuinfo(dce, uinfo);
705bd670b35SErik Nordmark 	dce_refrele(dce);
706bd670b35SErik Nordmark 	return (0);
707bd670b35SErik Nordmark }
708bd670b35SErik Nordmark 
709bd670b35SErik Nordmark /* Common routine for IPv4 and IPv6 */
710bd670b35SErik Nordmark int
711bd670b35SErik Nordmark dce_update_uinfo(const in6_addr_t *dst, uint_t ifindex, iulp_t *uinfo,
712bd670b35SErik Nordmark     ip_stack_t *ipst)
713bd670b35SErik Nordmark {
714bd670b35SErik Nordmark 	ipaddr_t dst4;
715bd670b35SErik Nordmark 
716bd670b35SErik Nordmark 	if (IN6_IS_ADDR_V4MAPPED_ANY(dst)) {
717bd670b35SErik Nordmark 		IN6_V4MAPPED_TO_IPADDR(dst, dst4);
718bd670b35SErik Nordmark 		return (dce_update_uinfo_v4(dst4, uinfo, ipst));
719bd670b35SErik Nordmark 	} else {
720bd670b35SErik Nordmark 		return (dce_update_uinfo_v6(dst, ifindex, uinfo, ipst));
721bd670b35SErik Nordmark 	}
722bd670b35SErik Nordmark }
723bd670b35SErik Nordmark 
724bd670b35SErik Nordmark static void
725bd670b35SErik Nordmark dce_make_condemned(dce_t *dce)
726bd670b35SErik Nordmark {
727bd670b35SErik Nordmark 	ip_stack_t	*ipst = dce->dce_ipst;
728bd670b35SErik Nordmark 
729bd670b35SErik Nordmark 	mutex_enter(&dce->dce_lock);
730bd670b35SErik Nordmark 	ASSERT(!DCE_IS_CONDEMNED(dce));
731bd670b35SErik Nordmark 	dce->dce_generation = DCE_GENERATION_CONDEMNED;
732bd670b35SErik Nordmark 	mutex_exit(&dce->dce_lock);
733bd670b35SErik Nordmark 	/* Count how many condemned dces for kmem_cache callback */
734*1a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&ipst->ips_num_dce_condemned);
735bd670b35SErik Nordmark }
736bd670b35SErik Nordmark 
737bd670b35SErik Nordmark /*
738bd670b35SErik Nordmark  * Increment the generation avoiding the special condemned value
739bd670b35SErik Nordmark  */
740bd670b35SErik Nordmark void
741bd670b35SErik Nordmark dce_increment_generation(dce_t *dce)
742bd670b35SErik Nordmark {
743bd670b35SErik Nordmark 	uint_t generation;
744bd670b35SErik Nordmark 
745bd670b35SErik Nordmark 	mutex_enter(&dce->dce_lock);
746bd670b35SErik Nordmark 	if (!DCE_IS_CONDEMNED(dce)) {
747bd670b35SErik Nordmark 		generation = dce->dce_generation + 1;
748bd670b35SErik Nordmark 		if (generation == DCE_GENERATION_CONDEMNED)
749bd670b35SErik Nordmark 			generation = DCE_GENERATION_INITIAL;
750bd670b35SErik Nordmark 		ASSERT(generation != DCE_GENERATION_VERIFY);
751bd670b35SErik Nordmark 		dce->dce_generation = generation;
752bd670b35SErik Nordmark 	}
753bd670b35SErik Nordmark 	mutex_exit(&dce->dce_lock);
754bd670b35SErik Nordmark }
755bd670b35SErik Nordmark 
756bd670b35SErik Nordmark /*
757bd670b35SErik Nordmark  * Increment the generation number on all dces that have a path MTU and
7581eee170aSErik Nordmark  * the default DCE. Used when ill_mtu or ill_mc_mtu changes.
759bd670b35SErik Nordmark  */
760bd670b35SErik Nordmark void
761bd670b35SErik Nordmark dce_increment_all_generations(boolean_t isv6, ip_stack_t *ipst)
762bd670b35SErik Nordmark {
763bd670b35SErik Nordmark 	int		i;
764bd670b35SErik Nordmark 	dcb_t		*dcb;
765bd670b35SErik Nordmark 	dce_t		*dce;
766bd670b35SErik Nordmark 
767bd670b35SErik Nordmark 	for (i = 0; i < ipst->ips_dce_hashsize; i++) {
768bd670b35SErik Nordmark 		if (isv6)
769bd670b35SErik Nordmark 			dcb = &ipst->ips_dce_hash_v6[i];
770bd670b35SErik Nordmark 		else
771bd670b35SErik Nordmark 			dcb = &ipst->ips_dce_hash_v4[i];
772bd670b35SErik Nordmark 		rw_enter(&dcb->dcb_lock, RW_WRITER);
773bd670b35SErik Nordmark 		for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
774bd670b35SErik Nordmark 			if (DCE_IS_CONDEMNED(dce))
775bd670b35SErik Nordmark 				continue;
776bd670b35SErik Nordmark 			dce_increment_generation(dce);
777bd670b35SErik Nordmark 		}
778bd670b35SErik Nordmark 		rw_exit(&dcb->dcb_lock);
779bd670b35SErik Nordmark 	}
780bd670b35SErik Nordmark 	dce_increment_generation(ipst->ips_dce_default);
781bd670b35SErik Nordmark }
782bd670b35SErik Nordmark 
783bd670b35SErik Nordmark /*
784bd670b35SErik Nordmark  * Caller needs to do a dce_refrele since we can't do the
785bd670b35SErik Nordmark  * dce_refrele under dcb_lock.
786bd670b35SErik Nordmark  */
787bd670b35SErik Nordmark static void
788bd670b35SErik Nordmark dce_delete_locked(dcb_t *dcb, dce_t *dce)
789bd670b35SErik Nordmark {
790bd670b35SErik Nordmark 	dce->dce_bucket = NULL;
791bd670b35SErik Nordmark 	*dce->dce_ptpn = dce->dce_next;
792bd670b35SErik Nordmark 	if (dce->dce_next != NULL)
793bd670b35SErik Nordmark 		dce->dce_next->dce_ptpn = dce->dce_ptpn;
794bd670b35SErik Nordmark 	dce->dce_ptpn = NULL;
795bd670b35SErik Nordmark 	dce->dce_next = NULL;
796*1a5e258fSJosef 'Jeff' Sipek 	atomic_dec_32(&dcb->dcb_cnt);
797bd670b35SErik Nordmark 	dce_make_condemned(dce);
798bd670b35SErik Nordmark }
799bd670b35SErik Nordmark 
800bd670b35SErik Nordmark static void
801bd670b35SErik Nordmark dce_inactive(dce_t *dce)
802bd670b35SErik Nordmark {
803bd670b35SErik Nordmark 	ip_stack_t	*ipst = dce->dce_ipst;
804bd670b35SErik Nordmark 
805bd670b35SErik Nordmark 	ASSERT(!(dce->dce_flags & DCEF_DEFAULT));
806bd670b35SErik Nordmark 	ASSERT(dce->dce_ptpn == NULL);
807bd670b35SErik Nordmark 	ASSERT(dce->dce_bucket == NULL);
808bd670b35SErik Nordmark 
809bd670b35SErik Nordmark 	/* Count how many condemned dces for kmem_cache callback */
810bd670b35SErik Nordmark 	if (DCE_IS_CONDEMNED(dce))
811*1a5e258fSJosef 'Jeff' Sipek 		atomic_dec_32(&ipst->ips_num_dce_condemned);
812bd670b35SErik Nordmark 
813bd670b35SErik Nordmark 	kmem_cache_free(dce_cache, dce);
814bd670b35SErik Nordmark }
815bd670b35SErik Nordmark 
816bd670b35SErik Nordmark void
817bd670b35SErik Nordmark dce_refrele(dce_t *dce)
818bd670b35SErik Nordmark {
819bd670b35SErik Nordmark 	ASSERT(dce->dce_refcnt != 0);
820*1a5e258fSJosef 'Jeff' Sipek 	if (atomic_dec_32_nv(&dce->dce_refcnt) == 0)
821bd670b35SErik Nordmark 		dce_inactive(dce);
822bd670b35SErik Nordmark }
823bd670b35SErik Nordmark 
824bd670b35SErik Nordmark void
825bd670b35SErik Nordmark dce_refhold(dce_t *dce)
826bd670b35SErik Nordmark {
827*1a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&dce->dce_refcnt);
828bd670b35SErik Nordmark 	ASSERT(dce->dce_refcnt != 0);
829bd670b35SErik Nordmark }
830bd670b35SErik Nordmark 
831bd670b35SErik Nordmark /* No tracing support yet hence the same as the above functions */
832bd670b35SErik Nordmark void
833bd670b35SErik Nordmark dce_refrele_notr(dce_t *dce)
834bd670b35SErik Nordmark {
835bd670b35SErik Nordmark 	ASSERT(dce->dce_refcnt != 0);
836*1a5e258fSJosef 'Jeff' Sipek 	if (atomic_dec_32_nv(&dce->dce_refcnt) == 0)
837bd670b35SErik Nordmark 		dce_inactive(dce);
838bd670b35SErik Nordmark }
839bd670b35SErik Nordmark 
840bd670b35SErik Nordmark void
841bd670b35SErik Nordmark dce_refhold_notr(dce_t *dce)
842bd670b35SErik Nordmark {
843*1a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&dce->dce_refcnt);
844bd670b35SErik Nordmark 	ASSERT(dce->dce_refcnt != 0);
845bd670b35SErik Nordmark }
846bd670b35SErik Nordmark 
847bd670b35SErik Nordmark /* Report both the IPv4 and IPv6 DCEs. */
848bd670b35SErik Nordmark mblk_t *
849bd670b35SErik Nordmark ip_snmp_get_mib2_ip_dce(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst)
850bd670b35SErik Nordmark {
851bd670b35SErik Nordmark 	struct opthdr		*optp;
852bd670b35SErik Nordmark 	mblk_t			*mp2ctl;
853bd670b35SErik Nordmark 	dest_cache_entry_t	dest_cache;
854bd670b35SErik Nordmark 	mblk_t			*mp_tail = NULL;
855bd670b35SErik Nordmark 	dce_t			*dce;
856bd670b35SErik Nordmark 	dcb_t			*dcb;
857bd670b35SErik Nordmark 	int			i;
858bd670b35SErik Nordmark 	uint64_t		current_time;
859bd670b35SErik Nordmark 
860d3d50737SRafael Vanoni 	current_time = TICK_TO_SEC(ddi_get_lbolt64());
861bd670b35SErik Nordmark 
862bd670b35SErik Nordmark 	/*
863bd670b35SErik Nordmark 	 * make a copy of the original message
864bd670b35SErik Nordmark 	 */
865bd670b35SErik Nordmark 	mp2ctl = copymsg(mpctl);
866bd670b35SErik Nordmark 
867bd670b35SErik Nordmark 	/* First we do IPv4 entries */
868bd670b35SErik Nordmark 	optp = (struct opthdr *)&mpctl->b_rptr[
869bd670b35SErik Nordmark 	    sizeof (struct T_optmgmt_ack)];
870bd670b35SErik Nordmark 	optp->level = MIB2_IP;
871bd670b35SErik Nordmark 	optp->name = EXPER_IP_DCE;
872bd670b35SErik Nordmark 
873bd670b35SErik Nordmark 	for (i = 0; i < ipst->ips_dce_hashsize; i++) {
874bd670b35SErik Nordmark 		dcb = &ipst->ips_dce_hash_v4[i];
875bd670b35SErik Nordmark 		rw_enter(&dcb->dcb_lock, RW_READER);
876bd670b35SErik Nordmark 		for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
877bd670b35SErik Nordmark 			dest_cache.DestIpv4Address = dce->dce_v4addr;
878bd670b35SErik Nordmark 			dest_cache.DestFlags = dce->dce_flags;
879bd670b35SErik Nordmark 			if (dce->dce_flags & DCEF_PMTU)
880bd670b35SErik Nordmark 				dest_cache.DestPmtu = dce->dce_pmtu;
881bd670b35SErik Nordmark 			else
882bd670b35SErik Nordmark 				dest_cache.DestPmtu = 0;
883bd670b35SErik Nordmark 			dest_cache.DestIdent = dce->dce_ident;
884bd670b35SErik Nordmark 			dest_cache.DestIfindex = 0;
885bd670b35SErik Nordmark 			dest_cache.DestAge = current_time -
886bd670b35SErik Nordmark 			    dce->dce_last_change_time;
887bd670b35SErik Nordmark 			if (!snmp_append_data2(mpctl->b_cont, &mp_tail,
888bd670b35SErik Nordmark 			    (char *)&dest_cache, (int)sizeof (dest_cache))) {
889bd670b35SErik Nordmark 				ip1dbg(("ip_snmp_get_mib2_ip_dce: "
890bd670b35SErik Nordmark 				    "failed to allocate %u bytes\n",
891bd670b35SErik Nordmark 				    (uint_t)sizeof (dest_cache)));
892bd670b35SErik Nordmark 			}
893bd670b35SErik Nordmark 		}
894bd670b35SErik Nordmark 		rw_exit(&dcb->dcb_lock);
895bd670b35SErik Nordmark 	}
896bd670b35SErik Nordmark 	optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont);
897bd670b35SErik Nordmark 	ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n",
898bd670b35SErik Nordmark 	    (int)optp->level, (int)optp->name, (int)optp->len));
899bd670b35SErik Nordmark 	qreply(q, mpctl);
900bd670b35SErik Nordmark 
901bd670b35SErik Nordmark 	if (mp2ctl == NULL) {
902bd670b35SErik Nordmark 		/* Copymsg failed above */
903bd670b35SErik Nordmark 		return (NULL);
904bd670b35SErik Nordmark 	}
905bd670b35SErik Nordmark 
906bd670b35SErik Nordmark 	/* Now for IPv6 */
907bd670b35SErik Nordmark 	mpctl = mp2ctl;
908bd670b35SErik Nordmark 	mp_tail = NULL;
909bd670b35SErik Nordmark 	mp2ctl = copymsg(mpctl);
910bd670b35SErik Nordmark 	optp = (struct opthdr *)&mpctl->b_rptr[
911bd670b35SErik Nordmark 	    sizeof (struct T_optmgmt_ack)];
912bd670b35SErik Nordmark 	optp->level = MIB2_IP6;
913bd670b35SErik Nordmark 	optp->name = EXPER_IP_DCE;
914bd670b35SErik Nordmark 
915bd670b35SErik Nordmark 	for (i = 0; i < ipst->ips_dce_hashsize; i++) {
916bd670b35SErik Nordmark 		dcb = &ipst->ips_dce_hash_v6[i];
917bd670b35SErik Nordmark 		rw_enter(&dcb->dcb_lock, RW_READER);
918bd670b35SErik Nordmark 		for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
919bd670b35SErik Nordmark 			dest_cache.DestIpv6Address = dce->dce_v6addr;
920bd670b35SErik Nordmark 			dest_cache.DestFlags = dce->dce_flags;
921bd670b35SErik Nordmark 			if (dce->dce_flags & DCEF_PMTU)
922bd670b35SErik Nordmark 				dest_cache.DestPmtu = dce->dce_pmtu;
923bd670b35SErik Nordmark 			else
924bd670b35SErik Nordmark 				dest_cache.DestPmtu = 0;
925bd670b35SErik Nordmark 			dest_cache.DestIdent = dce->dce_ident;
926bd670b35SErik Nordmark 			if (IN6_IS_ADDR_LINKSCOPE(&dce->dce_v6addr))
927bd670b35SErik Nordmark 				dest_cache.DestIfindex = dce->dce_ifindex;
928bd670b35SErik Nordmark 			else
929bd670b35SErik Nordmark 				dest_cache.DestIfindex = 0;
930bd670b35SErik Nordmark 			dest_cache.DestAge = current_time -
931bd670b35SErik Nordmark 			    dce->dce_last_change_time;
932bd670b35SErik Nordmark 			if (!snmp_append_data2(mpctl->b_cont, &mp_tail,
933bd670b35SErik Nordmark 			    (char *)&dest_cache, (int)sizeof (dest_cache))) {
934bd670b35SErik Nordmark 				ip1dbg(("ip_snmp_get_mib2_ip_dce: "
935bd670b35SErik Nordmark 				    "failed to allocate %u bytes\n",
936bd670b35SErik Nordmark 				    (uint_t)sizeof (dest_cache)));
937bd670b35SErik Nordmark 			}
938bd670b35SErik Nordmark 		}
939bd670b35SErik Nordmark 		rw_exit(&dcb->dcb_lock);
940bd670b35SErik Nordmark 	}
941bd670b35SErik Nordmark 	optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont);
942bd670b35SErik Nordmark 	ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n",
943bd670b35SErik Nordmark 	    (int)optp->level, (int)optp->name, (int)optp->len));
944bd670b35SErik Nordmark 	qreply(q, mpctl);
945bd670b35SErik Nordmark 
946bd670b35SErik Nordmark 	return (mp2ctl);
947bd670b35SErik Nordmark }
948bd670b35SErik Nordmark 
949bd670b35SErik Nordmark /*
950bd670b35SErik Nordmark  * Remove IPv6 DCEs which refer to an ifindex that is going away.
951bd670b35SErik Nordmark  * This is not required for correctness, but it avoids netstat -d
952bd670b35SErik Nordmark  * showing stale stuff that will never be used.
953bd670b35SErik Nordmark  */
954bd670b35SErik Nordmark void
955bd670b35SErik Nordmark dce_cleanup(uint_t ifindex, ip_stack_t *ipst)
956bd670b35SErik Nordmark {
957bd670b35SErik Nordmark 	uint_t	i;
958bd670b35SErik Nordmark 	dcb_t	*dcb;
959bd670b35SErik Nordmark 	dce_t	*dce, *nextdce;
960bd670b35SErik Nordmark 
961bd670b35SErik Nordmark 	for (i = 0; i < ipst->ips_dce_hashsize; i++) {
962bd670b35SErik Nordmark 		dcb = &ipst->ips_dce_hash_v6[i];
963bd670b35SErik Nordmark 		rw_enter(&dcb->dcb_lock, RW_WRITER);
964bd670b35SErik Nordmark 
965bd670b35SErik Nordmark 		for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) {
966bd670b35SErik Nordmark 			nextdce = dce->dce_next;
967bd670b35SErik Nordmark 			if (dce->dce_ifindex == ifindex) {
968bd670b35SErik Nordmark 				dce_delete_locked(dcb, dce);
969bd670b35SErik Nordmark 				dce_refrele(dce);
970bd670b35SErik Nordmark 			}
971bd670b35SErik Nordmark 		}
972bd670b35SErik Nordmark 		rw_exit(&dcb->dcb_lock);
973bd670b35SErik Nordmark 	}
974bd670b35SErik Nordmark }
975