1bd670b35SErik Nordmark /* 2bd670b35SErik Nordmark * CDDL HEADER START 3bd670b35SErik Nordmark * 4bd670b35SErik Nordmark * The contents of this file are subject to the terms of the 5bd670b35SErik Nordmark * Common Development and Distribution License (the "License"). 6bd670b35SErik Nordmark * You may not use this file except in compliance with the License. 7bd670b35SErik Nordmark * 8bd670b35SErik Nordmark * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9bd670b35SErik Nordmark * or http://www.opensolaris.org/os/licensing. 10bd670b35SErik Nordmark * See the License for the specific language governing permissions 11bd670b35SErik Nordmark * and limitations under the License. 12bd670b35SErik Nordmark * 13bd670b35SErik Nordmark * When distributing Covered Code, include this CDDL HEADER in each 14bd670b35SErik Nordmark * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15bd670b35SErik Nordmark * If applicable, add the following below this CDDL HEADER, with the 16bd670b35SErik Nordmark * fields enclosed by brackets "[]" replaced with your own identifying 17bd670b35SErik Nordmark * information: Portions Copyright [yyyy] [name of copyright owner] 18bd670b35SErik Nordmark * 19bd670b35SErik Nordmark * CDDL HEADER END 20bd670b35SErik Nordmark */ 21bd670b35SErik Nordmark 22bd670b35SErik Nordmark /* 231eee170aSErik Nordmark * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 247c6d7024SJerry Jelinek * Copyright (c) 2012, Joyent, Inc. All rights reserved. 25bd670b35SErik Nordmark */ 26bd670b35SErik Nordmark 27bd670b35SErik Nordmark #include <sys/types.h> 28bd670b35SErik Nordmark #include <sys/stream.h> 29bd670b35SErik Nordmark #include <sys/strsun.h> 30bd670b35SErik Nordmark #include <sys/zone.h> 31bd670b35SErik Nordmark #include <sys/ddi.h> 327c6d7024SJerry Jelinek #include <sys/disp.h> 33bd670b35SErik Nordmark #include <sys/sunddi.h> 34bd670b35SErik Nordmark #include <sys/cmn_err.h> 35bd670b35SErik Nordmark #include <sys/debug.h> 36bd670b35SErik Nordmark #include <sys/atomic.h> 377c6d7024SJerry Jelinek #include <sys/callb.h> 38bd670b35SErik Nordmark #define _SUN_TPI_VERSION 2 39bd670b35SErik Nordmark #include <sys/tihdr.h> 40bd670b35SErik Nordmark 41bd670b35SErik Nordmark #include <inet/common.h> 42bd670b35SErik Nordmark #include <inet/mi.h> 43bd670b35SErik Nordmark #include <inet/mib2.h> 44bd670b35SErik Nordmark #include <inet/snmpcom.h> 45bd670b35SErik Nordmark 46bd670b35SErik Nordmark #include <netinet/ip6.h> 47bd670b35SErik Nordmark #include <netinet/icmp6.h> 48bd670b35SErik Nordmark 49bd670b35SErik Nordmark #include <inet/ip.h> 50bd670b35SErik Nordmark #include <inet/ip_impl.h> 51bd670b35SErik Nordmark #include <inet/ip6.h> 52bd670b35SErik Nordmark #include <inet/ip6_asp.h> 53bd670b35SErik Nordmark #include <inet/ip_multi.h> 54bd670b35SErik Nordmark #include <inet/ip_if.h> 55bd670b35SErik Nordmark #include <inet/ip_ire.h> 56bd670b35SErik Nordmark #include <inet/ip_ftable.h> 57bd670b35SErik Nordmark #include <inet/ip_rts.h> 58bd670b35SErik Nordmark #include <inet/ip_ndp.h> 59bd670b35SErik Nordmark #include <inet/ipclassifier.h> 60bd670b35SErik Nordmark #include <inet/ip_listutils.h> 61bd670b35SErik Nordmark 62bd670b35SErik Nordmark #include <sys/sunddi.h> 63bd670b35SErik Nordmark 64bd670b35SErik Nordmark /* 65bd670b35SErik Nordmark * Routines for handling destination cache entries. 66bd670b35SErik Nordmark * There is always one DCEF_DEFAULT for each ip_stack_t created at init time. 67bd670b35SErik Nordmark * That entry holds both the IP ident value and the dce generation number. 68bd670b35SErik Nordmark * 69bd670b35SErik Nordmark * Any time a DCE is changed significantly (different path MTU, but NOT 70bd670b35SErik Nordmark * different ULP info!), the dce_generation number is increased. 71bd670b35SErik Nordmark * Also, when a new DCE is created, the dce_generation number in the default 72bd670b35SErik Nordmark * DCE is bumped. That allows the dce_t information to be cached efficiently 73bd670b35SErik Nordmark * as long as the entity caching the dce_t also caches the dce_generation, 74bd670b35SErik Nordmark * and compares the cached generation to detect any changes. 75bd670b35SErik Nordmark * Furthermore, when a DCE is deleted, if there are any outstanding references 76bd670b35SErik Nordmark * to the DCE it will be marked as condemned. The condemned mark is 77bd670b35SErik Nordmark * a designated generation number which is never otherwise used, hence 78bd670b35SErik Nordmark * the single comparison with the generation number captures that as well. 79bd670b35SErik Nordmark * 80bd670b35SErik Nordmark * An example of code which caches is as follows: 81bd670b35SErik Nordmark * 82bd670b35SErik Nordmark * if (mystruct->my_dce_generation != mystruct->my_dce->dce_generation) { 83bd670b35SErik Nordmark * The DCE has changed 84bd670b35SErik Nordmark * mystruct->my_dce = dce_lookup_pkt(mp, ixa, 85bd670b35SErik Nordmark * &mystruct->my_dce_generation); 86bd670b35SErik Nordmark * Not needed in practice, since we have the default DCE: 87bd670b35SErik Nordmark * if (DCE_IS_CONDEMNED(mystruct->my_dce)) 88bd670b35SErik Nordmark * return failure; 89bd670b35SErik Nordmark * } 90bd670b35SErik Nordmark * 91bd670b35SErik Nordmark * Note that for IPv6 link-local addresses we record the ifindex since the 92bd670b35SErik Nordmark * link-locals are not globally unique. 93bd670b35SErik Nordmark */ 94bd670b35SErik Nordmark 95bd670b35SErik Nordmark /* 96bd670b35SErik Nordmark * Hash bucket structure for DCEs 97bd670b35SErik Nordmark */ 98bd670b35SErik Nordmark typedef struct dcb_s { 99bd670b35SErik Nordmark krwlock_t dcb_lock; 100bd670b35SErik Nordmark uint32_t dcb_cnt; 101bd670b35SErik Nordmark dce_t *dcb_dce; 102bd670b35SErik Nordmark } dcb_t; 103bd670b35SErik Nordmark 104bd670b35SErik Nordmark static void dce_delete_locked(dcb_t *, dce_t *); 105bd670b35SErik Nordmark static void dce_make_condemned(dce_t *); 106bd670b35SErik Nordmark 107bd670b35SErik Nordmark static kmem_cache_t *dce_cache; 1087c6d7024SJerry Jelinek static kthread_t *dce_reclaim_thread; 1097c6d7024SJerry Jelinek static kmutex_t dce_reclaim_lock; 1107c6d7024SJerry Jelinek static kcondvar_t dce_reclaim_cv; 1117c6d7024SJerry Jelinek static int dce_reclaim_shutdown; 112bd670b35SErik Nordmark 1137c6d7024SJerry Jelinek /* Global so it can be tuned in /etc/system. This must be a power of two. */ 1147c6d7024SJerry Jelinek uint_t ip_dce_hash_size = 1024; 1157c6d7024SJerry Jelinek 1167c6d7024SJerry Jelinek /* The time in seconds between executions of the IP DCE reclaim worker. */ 1177c6d7024SJerry Jelinek uint_t ip_dce_reclaim_interval = 60; 1187c6d7024SJerry Jelinek 1197c6d7024SJerry Jelinek /* The factor of the DCE threshold at which to start hard reclaims */ 1207c6d7024SJerry Jelinek uint_t ip_dce_reclaim_threshold_hard = 2; 121bd670b35SErik Nordmark 122bd670b35SErik Nordmark /* Operates on a uint64_t */ 123bd670b35SErik Nordmark #define RANDOM_HASH(p) ((p) ^ ((p)>>16) ^ ((p)>>32) ^ ((p)>>48)) 124bd670b35SErik Nordmark 125bd670b35SErik Nordmark /* 126bd670b35SErik Nordmark * Reclaim a fraction of dce's in the dcb. 127bd670b35SErik Nordmark * For now we have a higher probability to delete DCEs without DCE_PMTU. 128bd670b35SErik Nordmark */ 129bd670b35SErik Nordmark static void 130bd670b35SErik Nordmark dcb_reclaim(dcb_t *dcb, ip_stack_t *ipst, uint_t fraction) 131bd670b35SErik Nordmark { 132bd670b35SErik Nordmark uint_t fraction_pmtu = fraction*4; 133bd670b35SErik Nordmark uint_t hash; 134bd670b35SErik Nordmark dce_t *dce, *nextdce; 1357c6d7024SJerry Jelinek hrtime_t seed = gethrtime(); 1367c6d7024SJerry Jelinek uint_t retained = 0; 1377c6d7024SJerry Jelinek uint_t max = ipst->ips_ip_dce_reclaim_threshold; 1387c6d7024SJerry Jelinek 1397c6d7024SJerry Jelinek max *= ip_dce_reclaim_threshold_hard; 140bd670b35SErik Nordmark 141bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_WRITER); 142bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) { 143bd670b35SErik Nordmark nextdce = dce->dce_next; 144bd670b35SErik Nordmark /* Clear DCEF_PMTU if the pmtu is too old */ 145bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 146bd670b35SErik Nordmark if ((dce->dce_flags & DCEF_PMTU) && 147d3d50737SRafael Vanoni TICK_TO_SEC(ddi_get_lbolt64()) - dce->dce_last_change_time > 148bd670b35SErik Nordmark ipst->ips_ip_pathmtu_interval) { 149bd670b35SErik Nordmark dce->dce_flags &= ~DCEF_PMTU; 150bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 151bd670b35SErik Nordmark dce_increment_generation(dce); 152bd670b35SErik Nordmark } else { 153bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 154bd670b35SErik Nordmark } 1557c6d7024SJerry Jelinek 1567c6d7024SJerry Jelinek if (max == 0 || retained < max) { 1577c6d7024SJerry Jelinek hash = RANDOM_HASH((uint64_t)((uintptr_t)dce | seed)); 1587c6d7024SJerry Jelinek 159bd670b35SErik Nordmark if (dce->dce_flags & DCEF_PMTU) { 1607c6d7024SJerry Jelinek if (hash % fraction_pmtu != 0) { 1617c6d7024SJerry Jelinek retained++; 162bd670b35SErik Nordmark continue; 1637c6d7024SJerry Jelinek } 164bd670b35SErik Nordmark } else { 1657c6d7024SJerry Jelinek if (hash % fraction != 0) { 1667c6d7024SJerry Jelinek retained++; 167bd670b35SErik Nordmark continue; 168bd670b35SErik Nordmark } 1697c6d7024SJerry Jelinek } 1707c6d7024SJerry Jelinek } 171bd670b35SErik Nordmark 172bd670b35SErik Nordmark IP_STAT(ipst, ip_dce_reclaim_deleted); 173bd670b35SErik Nordmark dce_delete_locked(dcb, dce); 174bd670b35SErik Nordmark dce_refrele(dce); 175bd670b35SErik Nordmark } 176bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 177bd670b35SErik Nordmark } 178bd670b35SErik Nordmark 179bd670b35SErik Nordmark /* 180bd670b35SErik Nordmark * kmem_cache callback to free up memory. 181bd670b35SErik Nordmark * 182bd670b35SErik Nordmark */ 183bd670b35SErik Nordmark static void 184bd670b35SErik Nordmark ip_dce_reclaim_stack(ip_stack_t *ipst) 185bd670b35SErik Nordmark { 186bd670b35SErik Nordmark int i; 187bd670b35SErik Nordmark 188bd670b35SErik Nordmark IP_STAT(ipst, ip_dce_reclaim_calls); 189bd670b35SErik Nordmark for (i = 0; i < ipst->ips_dce_hashsize; i++) { 190bd670b35SErik Nordmark dcb_reclaim(&ipst->ips_dce_hash_v4[i], ipst, 191bd670b35SErik Nordmark ipst->ips_ip_dce_reclaim_fraction); 192bd670b35SErik Nordmark 193bd670b35SErik Nordmark dcb_reclaim(&ipst->ips_dce_hash_v6[i], ipst, 194bd670b35SErik Nordmark ipst->ips_ip_dce_reclaim_fraction); 195bd670b35SErik Nordmark } 196bd670b35SErik Nordmark 197bd670b35SErik Nordmark /* 198bd670b35SErik Nordmark * Walk all CONNs that can have a reference on an ire, nce or dce. 199bd670b35SErik Nordmark * Get them to update any stale references to drop any refholds they 200bd670b35SErik Nordmark * have. 201bd670b35SErik Nordmark */ 202bd670b35SErik Nordmark ipcl_walk(conn_ixa_cleanup, (void *)B_FALSE, ipst); 203bd670b35SErik Nordmark } 204bd670b35SErik Nordmark 205bd670b35SErik Nordmark /* 2067c6d7024SJerry Jelinek * Called by dce_reclaim_worker() below, and no one else. Typically this will 2077c6d7024SJerry Jelinek * mean that the number of entries in the hash buckets has exceeded a tunable 2087c6d7024SJerry Jelinek * threshold. 209bd670b35SErik Nordmark */ 2107c6d7024SJerry Jelinek static void 2117c6d7024SJerry Jelinek ip_dce_reclaim(void) 212bd670b35SErik Nordmark { 213bd670b35SErik Nordmark netstack_handle_t nh; 214bd670b35SErik Nordmark netstack_t *ns; 2154ba231ceSKacheong Poon ip_stack_t *ipst; 216bd670b35SErik Nordmark 2177c6d7024SJerry Jelinek ASSERT(curthread == dce_reclaim_thread); 2187c6d7024SJerry Jelinek 219bd670b35SErik Nordmark netstack_next_init(&nh); 220bd670b35SErik Nordmark while ((ns = netstack_next(&nh)) != NULL) { 2214ba231ceSKacheong Poon /* 2224ba231ceSKacheong Poon * netstack_next() can return a netstack_t with a NULL 2234ba231ceSKacheong Poon * netstack_ip at boot time. 2244ba231ceSKacheong Poon */ 2254ba231ceSKacheong Poon if ((ipst = ns->netstack_ip) == NULL) { 2264ba231ceSKacheong Poon netstack_rele(ns); 2274ba231ceSKacheong Poon continue; 2284ba231ceSKacheong Poon } 2297c6d7024SJerry Jelinek if (atomic_swap_uint(&ipst->ips_dce_reclaim_needed, 0) != 0) 2304ba231ceSKacheong Poon ip_dce_reclaim_stack(ipst); 231bd670b35SErik Nordmark netstack_rele(ns); 232bd670b35SErik Nordmark } 233bd670b35SErik Nordmark netstack_next_fini(&nh); 234bd670b35SErik Nordmark } 235bd670b35SErik Nordmark 2367c6d7024SJerry Jelinek /* ARGSUSED */ 2377c6d7024SJerry Jelinek static void 2387c6d7024SJerry Jelinek dce_reclaim_worker(void *arg) 2397c6d7024SJerry Jelinek { 2407c6d7024SJerry Jelinek callb_cpr_t cprinfo; 2417c6d7024SJerry Jelinek 2427c6d7024SJerry Jelinek CALLB_CPR_INIT(&cprinfo, &dce_reclaim_lock, callb_generic_cpr, 2437c6d7024SJerry Jelinek "dce_reclaim_worker"); 2447c6d7024SJerry Jelinek 2457c6d7024SJerry Jelinek mutex_enter(&dce_reclaim_lock); 2467c6d7024SJerry Jelinek while (!dce_reclaim_shutdown) { 2477c6d7024SJerry Jelinek CALLB_CPR_SAFE_BEGIN(&cprinfo); 2487c6d7024SJerry Jelinek (void) cv_timedwait(&dce_reclaim_cv, &dce_reclaim_lock, 2497c6d7024SJerry Jelinek ddi_get_lbolt() + ip_dce_reclaim_interval * hz); 2507c6d7024SJerry Jelinek CALLB_CPR_SAFE_END(&cprinfo, &dce_reclaim_lock); 2517c6d7024SJerry Jelinek 2527c6d7024SJerry Jelinek if (dce_reclaim_shutdown) 2537c6d7024SJerry Jelinek break; 2547c6d7024SJerry Jelinek 2557c6d7024SJerry Jelinek mutex_exit(&dce_reclaim_lock); 2567c6d7024SJerry Jelinek ip_dce_reclaim(); 2577c6d7024SJerry Jelinek mutex_enter(&dce_reclaim_lock); 2587c6d7024SJerry Jelinek } 2597c6d7024SJerry Jelinek 2607c6d7024SJerry Jelinek ASSERT(MUTEX_HELD(&dce_reclaim_lock)); 2617c6d7024SJerry Jelinek dce_reclaim_thread = NULL; 2627c6d7024SJerry Jelinek dce_reclaim_shutdown = 0; 2637c6d7024SJerry Jelinek cv_broadcast(&dce_reclaim_cv); 2647c6d7024SJerry Jelinek CALLB_CPR_EXIT(&cprinfo); /* drops the lock */ 2657c6d7024SJerry Jelinek 2667c6d7024SJerry Jelinek thread_exit(); 2677c6d7024SJerry Jelinek } 2687c6d7024SJerry Jelinek 269bd670b35SErik Nordmark void 270bd670b35SErik Nordmark dce_g_init(void) 271bd670b35SErik Nordmark { 272bd670b35SErik Nordmark dce_cache = kmem_cache_create("dce_cache", 2737c6d7024SJerry Jelinek sizeof (dce_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 2747c6d7024SJerry Jelinek 2757c6d7024SJerry Jelinek mutex_init(&dce_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); 2767c6d7024SJerry Jelinek cv_init(&dce_reclaim_cv, NULL, CV_DEFAULT, NULL); 2777c6d7024SJerry Jelinek 2787c6d7024SJerry Jelinek dce_reclaim_thread = thread_create(NULL, 0, dce_reclaim_worker, 2797c6d7024SJerry Jelinek NULL, 0, &p0, TS_RUN, minclsyspri); 280bd670b35SErik Nordmark } 281bd670b35SErik Nordmark 282bd670b35SErik Nordmark void 283bd670b35SErik Nordmark dce_g_destroy(void) 284bd670b35SErik Nordmark { 2857c6d7024SJerry Jelinek mutex_enter(&dce_reclaim_lock); 2867c6d7024SJerry Jelinek dce_reclaim_shutdown = 1; 2877c6d7024SJerry Jelinek cv_signal(&dce_reclaim_cv); 2887c6d7024SJerry Jelinek while (dce_reclaim_thread != NULL) 2897c6d7024SJerry Jelinek cv_wait(&dce_reclaim_cv, &dce_reclaim_lock); 2907c6d7024SJerry Jelinek mutex_exit(&dce_reclaim_lock); 2917c6d7024SJerry Jelinek 2927c6d7024SJerry Jelinek cv_destroy(&dce_reclaim_cv); 2937c6d7024SJerry Jelinek mutex_destroy(&dce_reclaim_lock); 2947c6d7024SJerry Jelinek 295bd670b35SErik Nordmark kmem_cache_destroy(dce_cache); 296bd670b35SErik Nordmark } 297bd670b35SErik Nordmark 298bd670b35SErik Nordmark /* 299bd670b35SErik Nordmark * Allocate a default DCE and a hash table for per-IP address DCEs 300bd670b35SErik Nordmark */ 301bd670b35SErik Nordmark void 302bd670b35SErik Nordmark dce_stack_init(ip_stack_t *ipst) 303bd670b35SErik Nordmark { 304bd670b35SErik Nordmark int i; 305bd670b35SErik Nordmark 306bd670b35SErik Nordmark ipst->ips_dce_default = kmem_cache_alloc(dce_cache, KM_SLEEP); 307bd670b35SErik Nordmark bzero(ipst->ips_dce_default, sizeof (dce_t)); 308bd670b35SErik Nordmark ipst->ips_dce_default->dce_flags = DCEF_DEFAULT; 309bd670b35SErik Nordmark ipst->ips_dce_default->dce_generation = DCE_GENERATION_INITIAL; 310d3d50737SRafael Vanoni ipst->ips_dce_default->dce_last_change_time = 311d3d50737SRafael Vanoni TICK_TO_SEC(ddi_get_lbolt64()); 312bd670b35SErik Nordmark ipst->ips_dce_default->dce_refcnt = 1; /* Should never go away */ 313bd670b35SErik Nordmark ipst->ips_dce_default->dce_ipst = ipst; 314bd670b35SErik Nordmark 315bd670b35SErik Nordmark /* This must be a power of two since we are using IRE_ADDR_HASH macro */ 3167c6d7024SJerry Jelinek ipst->ips_dce_hashsize = ip_dce_hash_size; 317bd670b35SErik Nordmark ipst->ips_dce_hash_v4 = kmem_zalloc(ipst->ips_dce_hashsize * 318bd670b35SErik Nordmark sizeof (dcb_t), KM_SLEEP); 319bd670b35SErik Nordmark ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize * 320bd670b35SErik Nordmark sizeof (dcb_t), KM_SLEEP); 321bd670b35SErik Nordmark for (i = 0; i < ipst->ips_dce_hashsize; i++) { 322bd670b35SErik Nordmark rw_init(&ipst->ips_dce_hash_v4[i].dcb_lock, NULL, RW_DEFAULT, 323bd670b35SErik Nordmark NULL); 324bd670b35SErik Nordmark rw_init(&ipst->ips_dce_hash_v6[i].dcb_lock, NULL, RW_DEFAULT, 325bd670b35SErik Nordmark NULL); 326bd670b35SErik Nordmark } 327bd670b35SErik Nordmark } 328bd670b35SErik Nordmark 329bd670b35SErik Nordmark void 330bd670b35SErik Nordmark dce_stack_destroy(ip_stack_t *ipst) 331bd670b35SErik Nordmark { 332bd670b35SErik Nordmark int i; 333bd670b35SErik Nordmark for (i = 0; i < ipst->ips_dce_hashsize; i++) { 334bd670b35SErik Nordmark rw_destroy(&ipst->ips_dce_hash_v4[i].dcb_lock); 335bd670b35SErik Nordmark rw_destroy(&ipst->ips_dce_hash_v6[i].dcb_lock); 336bd670b35SErik Nordmark } 337bd670b35SErik Nordmark kmem_free(ipst->ips_dce_hash_v4, 338bd670b35SErik Nordmark ipst->ips_dce_hashsize * sizeof (dcb_t)); 339bd670b35SErik Nordmark ipst->ips_dce_hash_v4 = NULL; 340bd670b35SErik Nordmark kmem_free(ipst->ips_dce_hash_v6, 341bd670b35SErik Nordmark ipst->ips_dce_hashsize * sizeof (dcb_t)); 342bd670b35SErik Nordmark ipst->ips_dce_hash_v6 = NULL; 343bd670b35SErik Nordmark ipst->ips_dce_hashsize = 0; 344bd670b35SErik Nordmark 345bd670b35SErik Nordmark ASSERT(ipst->ips_dce_default->dce_refcnt == 1); 346bd670b35SErik Nordmark kmem_cache_free(dce_cache, ipst->ips_dce_default); 347bd670b35SErik Nordmark ipst->ips_dce_default = NULL; 348bd670b35SErik Nordmark } 349bd670b35SErik Nordmark 350bd670b35SErik Nordmark /* When any DCE is good enough */ 351bd670b35SErik Nordmark dce_t * 352bd670b35SErik Nordmark dce_get_default(ip_stack_t *ipst) 353bd670b35SErik Nordmark { 354bd670b35SErik Nordmark dce_t *dce; 355bd670b35SErik Nordmark 356bd670b35SErik Nordmark dce = ipst->ips_dce_default; 357bd670b35SErik Nordmark dce_refhold(dce); 358bd670b35SErik Nordmark return (dce); 359bd670b35SErik Nordmark } 360bd670b35SErik Nordmark 361bd670b35SErik Nordmark /* 362bd670b35SErik Nordmark * Generic for IPv4 and IPv6. 363bd670b35SErik Nordmark * 364bd670b35SErik Nordmark * Used by callers that need to cache e.g., the datapath 365bd670b35SErik Nordmark * Returns the generation number in the last argument. 366bd670b35SErik Nordmark */ 367bd670b35SErik Nordmark dce_t * 368bd670b35SErik Nordmark dce_lookup_pkt(mblk_t *mp, ip_xmit_attr_t *ixa, uint_t *generationp) 369bd670b35SErik Nordmark { 370bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_IS_IPV4) { 371bd670b35SErik Nordmark /* 372bd670b35SErik Nordmark * If we have a source route we need to look for the final 373bd670b35SErik Nordmark * destination in the source route option. 374bd670b35SErik Nordmark */ 375bd670b35SErik Nordmark ipaddr_t final_dst; 376bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)mp->b_rptr; 377bd670b35SErik Nordmark 378bd670b35SErik Nordmark final_dst = ip_get_dst(ipha); 379bd670b35SErik Nordmark return (dce_lookup_v4(final_dst, ixa->ixa_ipst, generationp)); 380bd670b35SErik Nordmark } else { 381bd670b35SErik Nordmark uint_t ifindex; 382bd670b35SErik Nordmark /* 383bd670b35SErik Nordmark * If we have a routing header we need to look for the final 384bd670b35SErik Nordmark * destination in the routing extension header. 385bd670b35SErik Nordmark */ 386bd670b35SErik Nordmark in6_addr_t final_dst; 387bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)mp->b_rptr; 388bd670b35SErik Nordmark 389bd670b35SErik Nordmark final_dst = ip_get_dst_v6(ip6h, mp, NULL); 390bd670b35SErik Nordmark ifindex = 0; 391bd670b35SErik Nordmark if (IN6_IS_ADDR_LINKSCOPE(&final_dst) && ixa->ixa_nce != NULL) { 392bd670b35SErik Nordmark ifindex = ixa->ixa_nce->nce_common->ncec_ill-> 393bd670b35SErik Nordmark ill_phyint->phyint_ifindex; 394bd670b35SErik Nordmark } 395bd670b35SErik Nordmark return (dce_lookup_v6(&final_dst, ifindex, ixa->ixa_ipst, 396bd670b35SErik Nordmark generationp)); 397bd670b35SErik Nordmark } 398bd670b35SErik Nordmark } 399bd670b35SErik Nordmark 400bd670b35SErik Nordmark /* 401bd670b35SErik Nordmark * Used by callers that need to cache e.g., the datapath 402bd670b35SErik Nordmark * Returns the generation number in the last argument. 403bd670b35SErik Nordmark */ 404bd670b35SErik Nordmark dce_t * 405bd670b35SErik Nordmark dce_lookup_v4(ipaddr_t dst, ip_stack_t *ipst, uint_t *generationp) 406bd670b35SErik Nordmark { 407bd670b35SErik Nordmark uint_t hash; 408bd670b35SErik Nordmark dcb_t *dcb; 409bd670b35SErik Nordmark dce_t *dce; 410bd670b35SErik Nordmark 411bd670b35SErik Nordmark /* Set *generationp before dropping the lock(s) that allow additions */ 412bd670b35SErik Nordmark if (generationp != NULL) 413bd670b35SErik Nordmark *generationp = ipst->ips_dce_default->dce_generation; 414bd670b35SErik Nordmark 415bd670b35SErik Nordmark hash = IRE_ADDR_HASH(dst, ipst->ips_dce_hashsize); 416bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v4[hash]; 417bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_READER); 418bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { 419bd670b35SErik Nordmark if (dce->dce_v4addr == dst) { 420bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 421bd670b35SErik Nordmark if (!DCE_IS_CONDEMNED(dce)) { 422bd670b35SErik Nordmark dce_refhold(dce); 423bd670b35SErik Nordmark if (generationp != NULL) 424bd670b35SErik Nordmark *generationp = dce->dce_generation; 425bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 426bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 427bd670b35SErik Nordmark return (dce); 428bd670b35SErik Nordmark } 429bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 430bd670b35SErik Nordmark } 431bd670b35SErik Nordmark } 432bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 433bd670b35SErik Nordmark /* Not found */ 434bd670b35SErik Nordmark dce = ipst->ips_dce_default; 435bd670b35SErik Nordmark dce_refhold(dce); 436bd670b35SErik Nordmark return (dce); 437bd670b35SErik Nordmark } 438bd670b35SErik Nordmark 439bd670b35SErik Nordmark /* 440bd670b35SErik Nordmark * Used by callers that need to cache e.g., the datapath 441bd670b35SErik Nordmark * Returns the generation number in the last argument. 442bd670b35SErik Nordmark * ifindex should only be set for link-locals 443bd670b35SErik Nordmark */ 444bd670b35SErik Nordmark dce_t * 445bd670b35SErik Nordmark dce_lookup_v6(const in6_addr_t *dst, uint_t ifindex, ip_stack_t *ipst, 446bd670b35SErik Nordmark uint_t *generationp) 447bd670b35SErik Nordmark { 448bd670b35SErik Nordmark uint_t hash; 449bd670b35SErik Nordmark dcb_t *dcb; 450bd670b35SErik Nordmark dce_t *dce; 451bd670b35SErik Nordmark 452bd670b35SErik Nordmark /* Set *generationp before dropping the lock(s) that allow additions */ 453bd670b35SErik Nordmark if (generationp != NULL) 454bd670b35SErik Nordmark *generationp = ipst->ips_dce_default->dce_generation; 455bd670b35SErik Nordmark 456bd670b35SErik Nordmark hash = IRE_ADDR_HASH_V6(*dst, ipst->ips_dce_hashsize); 457bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v6[hash]; 458bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_READER); 459bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { 460bd670b35SErik Nordmark if (IN6_ARE_ADDR_EQUAL(&dce->dce_v6addr, dst) && 461bd670b35SErik Nordmark dce->dce_ifindex == ifindex) { 462bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 463bd670b35SErik Nordmark if (!DCE_IS_CONDEMNED(dce)) { 464bd670b35SErik Nordmark dce_refhold(dce); 465bd670b35SErik Nordmark if (generationp != NULL) 466bd670b35SErik Nordmark *generationp = dce->dce_generation; 467bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 468bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 469bd670b35SErik Nordmark return (dce); 470bd670b35SErik Nordmark } 471bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 472bd670b35SErik Nordmark } 473bd670b35SErik Nordmark } 474bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 475bd670b35SErik Nordmark /* Not found */ 476bd670b35SErik Nordmark dce = ipst->ips_dce_default; 477bd670b35SErik Nordmark dce_refhold(dce); 478bd670b35SErik Nordmark return (dce); 479bd670b35SErik Nordmark } 480bd670b35SErik Nordmark 481bd670b35SErik Nordmark /* 482bd670b35SErik Nordmark * Atomically looks for a non-default DCE, and if not found tries to create one. 483bd670b35SErik Nordmark * If there is no memory it returns NULL. 484bd670b35SErik Nordmark * When an entry is created we increase the generation number on 485bd670b35SErik Nordmark * the default DCE so that conn_ip_output will detect there is a new DCE. 486bd670b35SErik Nordmark */ 487bd670b35SErik Nordmark dce_t * 488bd670b35SErik Nordmark dce_lookup_and_add_v4(ipaddr_t dst, ip_stack_t *ipst) 489bd670b35SErik Nordmark { 490bd670b35SErik Nordmark uint_t hash; 491bd670b35SErik Nordmark dcb_t *dcb; 492bd670b35SErik Nordmark dce_t *dce; 493bd670b35SErik Nordmark 494bd670b35SErik Nordmark hash = IRE_ADDR_HASH(dst, ipst->ips_dce_hashsize); 495bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v4[hash]; 4967c6d7024SJerry Jelinek /* 4977c6d7024SJerry Jelinek * Assuming that we get fairly even distribution across all of the 4987c6d7024SJerry Jelinek * buckets, once one bucket is overly full, prune the whole cache. 4997c6d7024SJerry Jelinek */ 5007c6d7024SJerry Jelinek if (dcb->dcb_cnt > ipst->ips_ip_dce_reclaim_threshold) 5017c6d7024SJerry Jelinek atomic_or_uint(&ipst->ips_dce_reclaim_needed, 1); 502bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_WRITER); 503bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { 504bd670b35SErik Nordmark if (dce->dce_v4addr == dst) { 505bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 506bd670b35SErik Nordmark if (!DCE_IS_CONDEMNED(dce)) { 507bd670b35SErik Nordmark dce_refhold(dce); 508bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 509bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 510bd670b35SErik Nordmark return (dce); 511bd670b35SErik Nordmark } 512bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 513bd670b35SErik Nordmark } 514bd670b35SErik Nordmark } 515bd670b35SErik Nordmark dce = kmem_cache_alloc(dce_cache, KM_NOSLEEP); 516bd670b35SErik Nordmark if (dce == NULL) { 517bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 518bd670b35SErik Nordmark return (NULL); 519bd670b35SErik Nordmark } 520bd670b35SErik Nordmark bzero(dce, sizeof (dce_t)); 521bd670b35SErik Nordmark dce->dce_ipst = ipst; /* No netstack_hold */ 522bd670b35SErik Nordmark dce->dce_v4addr = dst; 523bd670b35SErik Nordmark dce->dce_generation = DCE_GENERATION_INITIAL; 524bd670b35SErik Nordmark dce->dce_ipversion = IPV4_VERSION; 525d3d50737SRafael Vanoni dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 526bd670b35SErik Nordmark dce_refhold(dce); /* For the hash list */ 527bd670b35SErik Nordmark 528bd670b35SErik Nordmark /* Link into list */ 529bd670b35SErik Nordmark if (dcb->dcb_dce != NULL) 530bd670b35SErik Nordmark dcb->dcb_dce->dce_ptpn = &dce->dce_next; 531bd670b35SErik Nordmark dce->dce_next = dcb->dcb_dce; 532bd670b35SErik Nordmark dce->dce_ptpn = &dcb->dcb_dce; 533bd670b35SErik Nordmark dcb->dcb_dce = dce; 534bd670b35SErik Nordmark dce->dce_bucket = dcb; 535*1a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&dcb->dcb_cnt); 536bd670b35SErik Nordmark dce_refhold(dce); /* For the caller */ 537bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 538bd670b35SErik Nordmark 539bd670b35SErik Nordmark /* Initialize dce_ident to be different than for the last packet */ 540bd670b35SErik Nordmark dce->dce_ident = ipst->ips_dce_default->dce_ident + 1; 541bd670b35SErik Nordmark 542bd670b35SErik Nordmark dce_increment_generation(ipst->ips_dce_default); 543bd670b35SErik Nordmark return (dce); 544bd670b35SErik Nordmark } 545bd670b35SErik Nordmark 546bd670b35SErik Nordmark /* 547bd670b35SErik Nordmark * Atomically looks for a non-default DCE, and if not found tries to create one. 548bd670b35SErik Nordmark * If there is no memory it returns NULL. 549bd670b35SErik Nordmark * When an entry is created we increase the generation number on 550bd670b35SErik Nordmark * the default DCE so that conn_ip_output will detect there is a new DCE. 551bd670b35SErik Nordmark * ifindex should only be used with link-local addresses. 552bd670b35SErik Nordmark */ 553bd670b35SErik Nordmark dce_t * 554bd670b35SErik Nordmark dce_lookup_and_add_v6(const in6_addr_t *dst, uint_t ifindex, ip_stack_t *ipst) 555bd670b35SErik Nordmark { 556bd670b35SErik Nordmark uint_t hash; 557bd670b35SErik Nordmark dcb_t *dcb; 558bd670b35SErik Nordmark dce_t *dce; 559bd670b35SErik Nordmark 560bd670b35SErik Nordmark /* We should not create entries for link-locals w/o an ifindex */ 561bd670b35SErik Nordmark ASSERT(!(IN6_IS_ADDR_LINKSCOPE(dst)) || ifindex != 0); 562bd670b35SErik Nordmark 563bd670b35SErik Nordmark hash = IRE_ADDR_HASH_V6(*dst, ipst->ips_dce_hashsize); 564bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v6[hash]; 5657c6d7024SJerry Jelinek /* 5667c6d7024SJerry Jelinek * Assuming that we get fairly even distribution across all of the 5677c6d7024SJerry Jelinek * buckets, once one bucket is overly full, prune the whole cache. 5687c6d7024SJerry Jelinek */ 5697c6d7024SJerry Jelinek if (dcb->dcb_cnt > ipst->ips_ip_dce_reclaim_threshold) 5707c6d7024SJerry Jelinek atomic_or_uint(&ipst->ips_dce_reclaim_needed, 1); 571bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_WRITER); 572bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { 573bd670b35SErik Nordmark if (IN6_ARE_ADDR_EQUAL(&dce->dce_v6addr, dst) && 574bd670b35SErik Nordmark dce->dce_ifindex == ifindex) { 575bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 576bd670b35SErik Nordmark if (!DCE_IS_CONDEMNED(dce)) { 577bd670b35SErik Nordmark dce_refhold(dce); 578bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 579bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 580bd670b35SErik Nordmark return (dce); 581bd670b35SErik Nordmark } 582bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 583bd670b35SErik Nordmark } 584bd670b35SErik Nordmark } 585bd670b35SErik Nordmark 586bd670b35SErik Nordmark dce = kmem_cache_alloc(dce_cache, KM_NOSLEEP); 587bd670b35SErik Nordmark if (dce == NULL) { 588bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 589bd670b35SErik Nordmark return (NULL); 590bd670b35SErik Nordmark } 591bd670b35SErik Nordmark bzero(dce, sizeof (dce_t)); 592bd670b35SErik Nordmark dce->dce_ipst = ipst; /* No netstack_hold */ 593bd670b35SErik Nordmark dce->dce_v6addr = *dst; 594bd670b35SErik Nordmark dce->dce_ifindex = ifindex; 595bd670b35SErik Nordmark dce->dce_generation = DCE_GENERATION_INITIAL; 596bd670b35SErik Nordmark dce->dce_ipversion = IPV6_VERSION; 597d3d50737SRafael Vanoni dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 598bd670b35SErik Nordmark dce_refhold(dce); /* For the hash list */ 599bd670b35SErik Nordmark 600bd670b35SErik Nordmark /* Link into list */ 601bd670b35SErik Nordmark if (dcb->dcb_dce != NULL) 602bd670b35SErik Nordmark dcb->dcb_dce->dce_ptpn = &dce->dce_next; 603bd670b35SErik Nordmark dce->dce_next = dcb->dcb_dce; 604bd670b35SErik Nordmark dce->dce_ptpn = &dcb->dcb_dce; 605bd670b35SErik Nordmark dcb->dcb_dce = dce; 606bd670b35SErik Nordmark dce->dce_bucket = dcb; 607*1a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&dcb->dcb_cnt); 608bd670b35SErik Nordmark dce_refhold(dce); /* For the caller */ 609bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 610bd670b35SErik Nordmark 611bd670b35SErik Nordmark /* Initialize dce_ident to be different than for the last packet */ 612bd670b35SErik Nordmark dce->dce_ident = ipst->ips_dce_default->dce_ident + 1; 613bd670b35SErik Nordmark dce_increment_generation(ipst->ips_dce_default); 614bd670b35SErik Nordmark return (dce); 615bd670b35SErik Nordmark } 616bd670b35SErik Nordmark 617bd670b35SErik Nordmark /* 618bd670b35SErik Nordmark * Set/update uinfo. Creates a per-destination dce if none exists. 619bd670b35SErik Nordmark * 620bd670b35SErik Nordmark * Note that we do not bump the generation number here. 621bd670b35SErik Nordmark * New connections will find the new uinfo. 622bd670b35SErik Nordmark * 623bd670b35SErik Nordmark * The only use of this (tcp, sctp using iulp_t) is to set rtt+rtt_sd. 624bd670b35SErik Nordmark */ 625bd670b35SErik Nordmark static void 626bd670b35SErik Nordmark dce_setuinfo(dce_t *dce, iulp_t *uinfo) 627bd670b35SErik Nordmark { 628bd670b35SErik Nordmark /* 629bd670b35SErik Nordmark * Update the round trip time estimate and/or the max frag size 630bd670b35SErik Nordmark * and/or the slow start threshold. 631bd670b35SErik Nordmark * 632bd670b35SErik Nordmark * We serialize multiple advises using dce_lock. 633bd670b35SErik Nordmark */ 634bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 635bd670b35SErik Nordmark /* Gard against setting to zero */ 636bd670b35SErik Nordmark if (uinfo->iulp_rtt != 0) { 637bd670b35SErik Nordmark /* 638bd670b35SErik Nordmark * If there is no old cached values, initialize them 639bd670b35SErik Nordmark * conservatively. Set them to be (1.5 * new value). 640bd670b35SErik Nordmark */ 641bd670b35SErik Nordmark if (dce->dce_uinfo.iulp_rtt != 0) { 642bd670b35SErik Nordmark dce->dce_uinfo.iulp_rtt = (dce->dce_uinfo.iulp_rtt + 643bd670b35SErik Nordmark uinfo->iulp_rtt) >> 1; 644bd670b35SErik Nordmark } else { 645bd670b35SErik Nordmark dce->dce_uinfo.iulp_rtt = uinfo->iulp_rtt + 646bd670b35SErik Nordmark (uinfo->iulp_rtt >> 1); 647bd670b35SErik Nordmark } 648bd670b35SErik Nordmark if (dce->dce_uinfo.iulp_rtt_sd != 0) { 649bd670b35SErik Nordmark dce->dce_uinfo.iulp_rtt_sd = 650bd670b35SErik Nordmark (dce->dce_uinfo.iulp_rtt_sd + 651bd670b35SErik Nordmark uinfo->iulp_rtt_sd) >> 1; 652bd670b35SErik Nordmark } else { 653bd670b35SErik Nordmark dce->dce_uinfo.iulp_rtt_sd = uinfo->iulp_rtt_sd + 654bd670b35SErik Nordmark (uinfo->iulp_rtt_sd >> 1); 655bd670b35SErik Nordmark } 656bd670b35SErik Nordmark } 657bd670b35SErik Nordmark if (uinfo->iulp_mtu != 0) { 658bd670b35SErik Nordmark if (dce->dce_flags & DCEF_PMTU) { 659bd670b35SErik Nordmark dce->dce_pmtu = MIN(uinfo->iulp_mtu, dce->dce_pmtu); 660bd670b35SErik Nordmark } else { 661bd670b35SErik Nordmark dce->dce_pmtu = MIN(uinfo->iulp_mtu, IP_MAXPACKET); 662bd670b35SErik Nordmark dce->dce_flags |= DCEF_PMTU; 663bd670b35SErik Nordmark } 664d3d50737SRafael Vanoni dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 665bd670b35SErik Nordmark } 666bd670b35SErik Nordmark if (uinfo->iulp_ssthresh != 0) { 667bd670b35SErik Nordmark if (dce->dce_uinfo.iulp_ssthresh != 0) 668bd670b35SErik Nordmark dce->dce_uinfo.iulp_ssthresh = 669bd670b35SErik Nordmark (uinfo->iulp_ssthresh + 670bd670b35SErik Nordmark dce->dce_uinfo.iulp_ssthresh) >> 1; 671bd670b35SErik Nordmark else 672bd670b35SErik Nordmark dce->dce_uinfo.iulp_ssthresh = uinfo->iulp_ssthresh; 673bd670b35SErik Nordmark } 674bd670b35SErik Nordmark /* We have uinfo for sure */ 675bd670b35SErik Nordmark dce->dce_flags |= DCEF_UINFO; 676bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 677bd670b35SErik Nordmark } 678bd670b35SErik Nordmark 679bd670b35SErik Nordmark 680bd670b35SErik Nordmark int 681bd670b35SErik Nordmark dce_update_uinfo_v4(ipaddr_t dst, iulp_t *uinfo, ip_stack_t *ipst) 682bd670b35SErik Nordmark { 683bd670b35SErik Nordmark dce_t *dce; 684bd670b35SErik Nordmark 685bd670b35SErik Nordmark dce = dce_lookup_and_add_v4(dst, ipst); 686bd670b35SErik Nordmark if (dce == NULL) 687bd670b35SErik Nordmark return (ENOMEM); 688bd670b35SErik Nordmark 689bd670b35SErik Nordmark dce_setuinfo(dce, uinfo); 690bd670b35SErik Nordmark dce_refrele(dce); 691bd670b35SErik Nordmark return (0); 692bd670b35SErik Nordmark } 693bd670b35SErik Nordmark 694bd670b35SErik Nordmark int 695bd670b35SErik Nordmark dce_update_uinfo_v6(const in6_addr_t *dst, uint_t ifindex, iulp_t *uinfo, 696bd670b35SErik Nordmark ip_stack_t *ipst) 697bd670b35SErik Nordmark { 698bd670b35SErik Nordmark dce_t *dce; 699bd670b35SErik Nordmark 700bd670b35SErik Nordmark dce = dce_lookup_and_add_v6(dst, ifindex, ipst); 701bd670b35SErik Nordmark if (dce == NULL) 702bd670b35SErik Nordmark return (ENOMEM); 703bd670b35SErik Nordmark 704bd670b35SErik Nordmark dce_setuinfo(dce, uinfo); 705bd670b35SErik Nordmark dce_refrele(dce); 706bd670b35SErik Nordmark return (0); 707bd670b35SErik Nordmark } 708bd670b35SErik Nordmark 709bd670b35SErik Nordmark /* Common routine for IPv4 and IPv6 */ 710bd670b35SErik Nordmark int 711bd670b35SErik Nordmark dce_update_uinfo(const in6_addr_t *dst, uint_t ifindex, iulp_t *uinfo, 712bd670b35SErik Nordmark ip_stack_t *ipst) 713bd670b35SErik Nordmark { 714bd670b35SErik Nordmark ipaddr_t dst4; 715bd670b35SErik Nordmark 716bd670b35SErik Nordmark if (IN6_IS_ADDR_V4MAPPED_ANY(dst)) { 717bd670b35SErik Nordmark IN6_V4MAPPED_TO_IPADDR(dst, dst4); 718bd670b35SErik Nordmark return (dce_update_uinfo_v4(dst4, uinfo, ipst)); 719bd670b35SErik Nordmark } else { 720bd670b35SErik Nordmark return (dce_update_uinfo_v6(dst, ifindex, uinfo, ipst)); 721bd670b35SErik Nordmark } 722bd670b35SErik Nordmark } 723bd670b35SErik Nordmark 724bd670b35SErik Nordmark static void 725bd670b35SErik Nordmark dce_make_condemned(dce_t *dce) 726bd670b35SErik Nordmark { 727bd670b35SErik Nordmark ip_stack_t *ipst = dce->dce_ipst; 728bd670b35SErik Nordmark 729bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 730bd670b35SErik Nordmark ASSERT(!DCE_IS_CONDEMNED(dce)); 731bd670b35SErik Nordmark dce->dce_generation = DCE_GENERATION_CONDEMNED; 732bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 733bd670b35SErik Nordmark /* Count how many condemned dces for kmem_cache callback */ 734*1a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&ipst->ips_num_dce_condemned); 735bd670b35SErik Nordmark } 736bd670b35SErik Nordmark 737bd670b35SErik Nordmark /* 738bd670b35SErik Nordmark * Increment the generation avoiding the special condemned value 739bd670b35SErik Nordmark */ 740bd670b35SErik Nordmark void 741bd670b35SErik Nordmark dce_increment_generation(dce_t *dce) 742bd670b35SErik Nordmark { 743bd670b35SErik Nordmark uint_t generation; 744bd670b35SErik Nordmark 745bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 746bd670b35SErik Nordmark if (!DCE_IS_CONDEMNED(dce)) { 747bd670b35SErik Nordmark generation = dce->dce_generation + 1; 748bd670b35SErik Nordmark if (generation == DCE_GENERATION_CONDEMNED) 749bd670b35SErik Nordmark generation = DCE_GENERATION_INITIAL; 750bd670b35SErik Nordmark ASSERT(generation != DCE_GENERATION_VERIFY); 751bd670b35SErik Nordmark dce->dce_generation = generation; 752bd670b35SErik Nordmark } 753bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 754bd670b35SErik Nordmark } 755bd670b35SErik Nordmark 756bd670b35SErik Nordmark /* 757bd670b35SErik Nordmark * Increment the generation number on all dces that have a path MTU and 7581eee170aSErik Nordmark * the default DCE. Used when ill_mtu or ill_mc_mtu changes. 759bd670b35SErik Nordmark */ 760bd670b35SErik Nordmark void 761bd670b35SErik Nordmark dce_increment_all_generations(boolean_t isv6, ip_stack_t *ipst) 762bd670b35SErik Nordmark { 763bd670b35SErik Nordmark int i; 764bd670b35SErik Nordmark dcb_t *dcb; 765bd670b35SErik Nordmark dce_t *dce; 766bd670b35SErik Nordmark 767bd670b35SErik Nordmark for (i = 0; i < ipst->ips_dce_hashsize; i++) { 768bd670b35SErik Nordmark if (isv6) 769bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v6[i]; 770bd670b35SErik Nordmark else 771bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v4[i]; 772bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_WRITER); 773bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { 774bd670b35SErik Nordmark if (DCE_IS_CONDEMNED(dce)) 775bd670b35SErik Nordmark continue; 776bd670b35SErik Nordmark dce_increment_generation(dce); 777bd670b35SErik Nordmark } 778bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 779bd670b35SErik Nordmark } 780bd670b35SErik Nordmark dce_increment_generation(ipst->ips_dce_default); 781bd670b35SErik Nordmark } 782bd670b35SErik Nordmark 783bd670b35SErik Nordmark /* 784bd670b35SErik Nordmark * Caller needs to do a dce_refrele since we can't do the 785bd670b35SErik Nordmark * dce_refrele under dcb_lock. 786bd670b35SErik Nordmark */ 787bd670b35SErik Nordmark static void 788bd670b35SErik Nordmark dce_delete_locked(dcb_t *dcb, dce_t *dce) 789bd670b35SErik Nordmark { 790bd670b35SErik Nordmark dce->dce_bucket = NULL; 791bd670b35SErik Nordmark *dce->dce_ptpn = dce->dce_next; 792bd670b35SErik Nordmark if (dce->dce_next != NULL) 793bd670b35SErik Nordmark dce->dce_next->dce_ptpn = dce->dce_ptpn; 794bd670b35SErik Nordmark dce->dce_ptpn = NULL; 795bd670b35SErik Nordmark dce->dce_next = NULL; 796*1a5e258fSJosef 'Jeff' Sipek atomic_dec_32(&dcb->dcb_cnt); 797bd670b35SErik Nordmark dce_make_condemned(dce); 798bd670b35SErik Nordmark } 799bd670b35SErik Nordmark 800bd670b35SErik Nordmark static void 801bd670b35SErik Nordmark dce_inactive(dce_t *dce) 802bd670b35SErik Nordmark { 803bd670b35SErik Nordmark ip_stack_t *ipst = dce->dce_ipst; 804bd670b35SErik Nordmark 805bd670b35SErik Nordmark ASSERT(!(dce->dce_flags & DCEF_DEFAULT)); 806bd670b35SErik Nordmark ASSERT(dce->dce_ptpn == NULL); 807bd670b35SErik Nordmark ASSERT(dce->dce_bucket == NULL); 808bd670b35SErik Nordmark 809bd670b35SErik Nordmark /* Count how many condemned dces for kmem_cache callback */ 810bd670b35SErik Nordmark if (DCE_IS_CONDEMNED(dce)) 811*1a5e258fSJosef 'Jeff' Sipek atomic_dec_32(&ipst->ips_num_dce_condemned); 812bd670b35SErik Nordmark 813bd670b35SErik Nordmark kmem_cache_free(dce_cache, dce); 814bd670b35SErik Nordmark } 815bd670b35SErik Nordmark 816bd670b35SErik Nordmark void 817bd670b35SErik Nordmark dce_refrele(dce_t *dce) 818bd670b35SErik Nordmark { 819bd670b35SErik Nordmark ASSERT(dce->dce_refcnt != 0); 820*1a5e258fSJosef 'Jeff' Sipek if (atomic_dec_32_nv(&dce->dce_refcnt) == 0) 821bd670b35SErik Nordmark dce_inactive(dce); 822bd670b35SErik Nordmark } 823bd670b35SErik Nordmark 824bd670b35SErik Nordmark void 825bd670b35SErik Nordmark dce_refhold(dce_t *dce) 826bd670b35SErik Nordmark { 827*1a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&dce->dce_refcnt); 828bd670b35SErik Nordmark ASSERT(dce->dce_refcnt != 0); 829bd670b35SErik Nordmark } 830bd670b35SErik Nordmark 831bd670b35SErik Nordmark /* No tracing support yet hence the same as the above functions */ 832bd670b35SErik Nordmark void 833bd670b35SErik Nordmark dce_refrele_notr(dce_t *dce) 834bd670b35SErik Nordmark { 835bd670b35SErik Nordmark ASSERT(dce->dce_refcnt != 0); 836*1a5e258fSJosef 'Jeff' Sipek if (atomic_dec_32_nv(&dce->dce_refcnt) == 0) 837bd670b35SErik Nordmark dce_inactive(dce); 838bd670b35SErik Nordmark } 839bd670b35SErik Nordmark 840bd670b35SErik Nordmark void 841bd670b35SErik Nordmark dce_refhold_notr(dce_t *dce) 842bd670b35SErik Nordmark { 843*1a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&dce->dce_refcnt); 844bd670b35SErik Nordmark ASSERT(dce->dce_refcnt != 0); 845bd670b35SErik Nordmark } 846bd670b35SErik Nordmark 847bd670b35SErik Nordmark /* Report both the IPv4 and IPv6 DCEs. */ 848bd670b35SErik Nordmark mblk_t * 849bd670b35SErik Nordmark ip_snmp_get_mib2_ip_dce(queue_t *q, mblk_t *mpctl, ip_stack_t *ipst) 850bd670b35SErik Nordmark { 851bd670b35SErik Nordmark struct opthdr *optp; 852bd670b35SErik Nordmark mblk_t *mp2ctl; 853bd670b35SErik Nordmark dest_cache_entry_t dest_cache; 854bd670b35SErik Nordmark mblk_t *mp_tail = NULL; 855bd670b35SErik Nordmark dce_t *dce; 856bd670b35SErik Nordmark dcb_t *dcb; 857bd670b35SErik Nordmark int i; 858bd670b35SErik Nordmark uint64_t current_time; 859bd670b35SErik Nordmark 860d3d50737SRafael Vanoni current_time = TICK_TO_SEC(ddi_get_lbolt64()); 861bd670b35SErik Nordmark 862bd670b35SErik Nordmark /* 863bd670b35SErik Nordmark * make a copy of the original message 864bd670b35SErik Nordmark */ 865bd670b35SErik Nordmark mp2ctl = copymsg(mpctl); 866bd670b35SErik Nordmark 867bd670b35SErik Nordmark /* First we do IPv4 entries */ 868bd670b35SErik Nordmark optp = (struct opthdr *)&mpctl->b_rptr[ 869bd670b35SErik Nordmark sizeof (struct T_optmgmt_ack)]; 870bd670b35SErik Nordmark optp->level = MIB2_IP; 871bd670b35SErik Nordmark optp->name = EXPER_IP_DCE; 872bd670b35SErik Nordmark 873bd670b35SErik Nordmark for (i = 0; i < ipst->ips_dce_hashsize; i++) { 874bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v4[i]; 875bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_READER); 876bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { 877bd670b35SErik Nordmark dest_cache.DestIpv4Address = dce->dce_v4addr; 878bd670b35SErik Nordmark dest_cache.DestFlags = dce->dce_flags; 879bd670b35SErik Nordmark if (dce->dce_flags & DCEF_PMTU) 880bd670b35SErik Nordmark dest_cache.DestPmtu = dce->dce_pmtu; 881bd670b35SErik Nordmark else 882bd670b35SErik Nordmark dest_cache.DestPmtu = 0; 883bd670b35SErik Nordmark dest_cache.DestIdent = dce->dce_ident; 884bd670b35SErik Nordmark dest_cache.DestIfindex = 0; 885bd670b35SErik Nordmark dest_cache.DestAge = current_time - 886bd670b35SErik Nordmark dce->dce_last_change_time; 887bd670b35SErik Nordmark if (!snmp_append_data2(mpctl->b_cont, &mp_tail, 888bd670b35SErik Nordmark (char *)&dest_cache, (int)sizeof (dest_cache))) { 889bd670b35SErik Nordmark ip1dbg(("ip_snmp_get_mib2_ip_dce: " 890bd670b35SErik Nordmark "failed to allocate %u bytes\n", 891bd670b35SErik Nordmark (uint_t)sizeof (dest_cache))); 892bd670b35SErik Nordmark } 893bd670b35SErik Nordmark } 894bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 895bd670b35SErik Nordmark } 896bd670b35SErik Nordmark optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); 897bd670b35SErik Nordmark ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n", 898bd670b35SErik Nordmark (int)optp->level, (int)optp->name, (int)optp->len)); 899bd670b35SErik Nordmark qreply(q, mpctl); 900bd670b35SErik Nordmark 901bd670b35SErik Nordmark if (mp2ctl == NULL) { 902bd670b35SErik Nordmark /* Copymsg failed above */ 903bd670b35SErik Nordmark return (NULL); 904bd670b35SErik Nordmark } 905bd670b35SErik Nordmark 906bd670b35SErik Nordmark /* Now for IPv6 */ 907bd670b35SErik Nordmark mpctl = mp2ctl; 908bd670b35SErik Nordmark mp_tail = NULL; 909bd670b35SErik Nordmark mp2ctl = copymsg(mpctl); 910bd670b35SErik Nordmark optp = (struct opthdr *)&mpctl->b_rptr[ 911bd670b35SErik Nordmark sizeof (struct T_optmgmt_ack)]; 912bd670b35SErik Nordmark optp->level = MIB2_IP6; 913bd670b35SErik Nordmark optp->name = EXPER_IP_DCE; 914bd670b35SErik Nordmark 915bd670b35SErik Nordmark for (i = 0; i < ipst->ips_dce_hashsize; i++) { 916bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v6[i]; 917bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_READER); 918bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { 919bd670b35SErik Nordmark dest_cache.DestIpv6Address = dce->dce_v6addr; 920bd670b35SErik Nordmark dest_cache.DestFlags = dce->dce_flags; 921bd670b35SErik Nordmark if (dce->dce_flags & DCEF_PMTU) 922bd670b35SErik Nordmark dest_cache.DestPmtu = dce->dce_pmtu; 923bd670b35SErik Nordmark else 924bd670b35SErik Nordmark dest_cache.DestPmtu = 0; 925bd670b35SErik Nordmark dest_cache.DestIdent = dce->dce_ident; 926bd670b35SErik Nordmark if (IN6_IS_ADDR_LINKSCOPE(&dce->dce_v6addr)) 927bd670b35SErik Nordmark dest_cache.DestIfindex = dce->dce_ifindex; 928bd670b35SErik Nordmark else 929bd670b35SErik Nordmark dest_cache.DestIfindex = 0; 930bd670b35SErik Nordmark dest_cache.DestAge = current_time - 931bd670b35SErik Nordmark dce->dce_last_change_time; 932bd670b35SErik Nordmark if (!snmp_append_data2(mpctl->b_cont, &mp_tail, 933bd670b35SErik Nordmark (char *)&dest_cache, (int)sizeof (dest_cache))) { 934bd670b35SErik Nordmark ip1dbg(("ip_snmp_get_mib2_ip_dce: " 935bd670b35SErik Nordmark "failed to allocate %u bytes\n", 936bd670b35SErik Nordmark (uint_t)sizeof (dest_cache))); 937bd670b35SErik Nordmark } 938bd670b35SErik Nordmark } 939bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 940bd670b35SErik Nordmark } 941bd670b35SErik Nordmark optp->len = (t_uscalar_t)msgdsize(mpctl->b_cont); 942bd670b35SErik Nordmark ip3dbg(("ip_snmp_get: level %d, name %d, len %d\n", 943bd670b35SErik Nordmark (int)optp->level, (int)optp->name, (int)optp->len)); 944bd670b35SErik Nordmark qreply(q, mpctl); 945bd670b35SErik Nordmark 946bd670b35SErik Nordmark return (mp2ctl); 947bd670b35SErik Nordmark } 948bd670b35SErik Nordmark 949bd670b35SErik Nordmark /* 950bd670b35SErik Nordmark * Remove IPv6 DCEs which refer to an ifindex that is going away. 951bd670b35SErik Nordmark * This is not required for correctness, but it avoids netstat -d 952bd670b35SErik Nordmark * showing stale stuff that will never be used. 953bd670b35SErik Nordmark */ 954bd670b35SErik Nordmark void 955bd670b35SErik Nordmark dce_cleanup(uint_t ifindex, ip_stack_t *ipst) 956bd670b35SErik Nordmark { 957bd670b35SErik Nordmark uint_t i; 958bd670b35SErik Nordmark dcb_t *dcb; 959bd670b35SErik Nordmark dce_t *dce, *nextdce; 960bd670b35SErik Nordmark 961bd670b35SErik Nordmark for (i = 0; i < ipst->ips_dce_hashsize; i++) { 962bd670b35SErik Nordmark dcb = &ipst->ips_dce_hash_v6[i]; 963bd670b35SErik Nordmark rw_enter(&dcb->dcb_lock, RW_WRITER); 964bd670b35SErik Nordmark 965bd670b35SErik Nordmark for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) { 966bd670b35SErik Nordmark nextdce = dce->dce_next; 967bd670b35SErik Nordmark if (dce->dce_ifindex == ifindex) { 968bd670b35SErik Nordmark dce_delete_locked(dcb, dce); 969bd670b35SErik Nordmark dce_refrele(dce); 970bd670b35SErik Nordmark } 971bd670b35SErik Nordmark } 972bd670b35SErik Nordmark rw_exit(&dcb->dcb_lock); 973bd670b35SErik Nordmark } 974bd670b35SErik Nordmark } 975