1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #ifndef _INET_IP_IRE_H 28 #define _INET_IP_IRE_H 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 #define IPV6_LL_PREFIXLEN 10 /* Number of bits in link-local pref */ 35 36 #define IP_CACHE_TABLE_SIZE 256 37 #define IP_MASK_TABLE_SIZE (IP_ABITS + 1) /* 33 ptrs */ 38 39 #define IP6_FTABLE_HASH_SIZE 32 /* size of each hash table in ptrs */ 40 #define IP6_CACHE_TABLE_SIZE 256 41 #define IP6_MASK_TABLE_SIZE (IPV6_ABITS + 1) /* 129 ptrs */ 42 43 /* 44 * We use the common modulo hash function. In ip_ire_init(), we make 45 * sure that the cache table size is always a power of 2. That's why 46 * we can use & instead of %. Also note that we try hard to make sure 47 * the lower bits of an address capture most info from the whole address. 48 * The reason being that since our hash table is probably a lot smaller 49 * than 2^32 buckets so the lower bits are the most important. 50 */ 51 #define IRE_ADDR_HASH(addr, table_size) \ 52 (((addr) ^ ((addr) >> 8) ^ ((addr) >> 16) ^ ((addr) >> 24)) & \ 53 ((table_size) - 1)) 54 55 /* 56 * To make a byte-order neutral hash for IPv6, just take all the 57 * bytes in the bottom 32 bits into account. 58 */ 59 #define IRE_ADDR_HASH_V6(addr, table_size) \ 60 IRE_ADDR_HASH((addr).s6_addr32[3], table_size) 61 62 /* This assumes that the ftable size is a power of 2. */ 63 #define IRE_ADDR_MASK_HASH_V6(addr, mask, table_size) \ 64 ((((addr).s6_addr8[8] & (mask).s6_addr8[8]) ^ \ 65 ((addr).s6_addr8[9] & (mask).s6_addr8[9]) ^ \ 66 ((addr).s6_addr8[10] & (mask).s6_addr8[10]) ^ \ 67 ((addr).s6_addr8[13] & (mask).s6_addr8[13]) ^ \ 68 ((addr).s6_addr8[14] & (mask).s6_addr8[14]) ^ \ 69 ((addr).s6_addr8[15] & (mask).s6_addr8[15])) & ((table_size) - 1)) 70 71 /* 72 * match parameter definitions for IRE lookup routines. 73 */ 74 #define MATCH_IRE_DSTONLY 0x0000 /* Match just the address */ 75 #define MATCH_IRE_TYPE 0x0001 /* Match IRE type */ 76 #define MATCH_IRE_SRC 0x0002 /* Match IRE source address */ 77 #define MATCH_IRE_MASK 0x0004 /* Match IRE mask */ 78 #define MATCH_IRE_WQ 0x0008 /* Match IRE ire_stq to write queue */ 79 #define MATCH_IRE_GW 0x0010 /* Match IRE gateway */ 80 #define MATCH_IRE_IPIF 0x0020 /* Match IRE ipif */ 81 #define MATCH_IRE_RECURSIVE 0x0040 /* Do recursive lookup if necessary */ 82 #define MATCH_IRE_DEFAULT 0x0080 /* Return default route if no route */ 83 /* found. */ 84 #define MATCH_IRE_RJ_BHOLE 0x0100 /* During lookup if we hit an ire */ 85 /* with RTF_REJECT or RTF_BLACKHOLE, */ 86 /* return the ire. No recursive */ 87 /* lookup should be done. */ 88 #define MATCH_IRE_IHANDLE 0x0200 /* Match IRE on ihandle */ 89 #define MATCH_IRE_MARK_TESTHIDDEN 0x0400 /* Match IRE_MARK_TESTHIDDEN IREs */ 90 91 /* 92 * MATCH_IRE_PARENT is used whenever we unconditionally want to get the 93 * parent IRE (sire) while recursively searching IREs for an offsubnet 94 * destination. With this flag, even if no IRE_CACHETABLE or IRE_INTERFACE 95 * is found to help resolving IRE_OFFSUBNET in lookup routines, the 96 * IRE_OFFSUBNET sire, if any, is returned to the caller. 97 */ 98 /* UNUSED 0x0800 */ 99 #define MATCH_IRE_ILL 0x1000 /* Match IRE on the ill */ 100 101 #define MATCH_IRE_PARENT 0x2000 /* Match parent ire, if any, */ 102 /* even if ire is not matched. */ 103 #define MATCH_IRE_ZONEONLY 0x4000 /* Match IREs in specified zone, ie */ 104 /* don't match IRE_LOCALs from other */ 105 /* zones or shared IREs */ 106 #define MATCH_IRE_MARK_PRIVATE_ADDR 0x8000 /* Match IRE ire_marks with */ 107 /* IRE_MARK_PRIVATE_ADDR. */ 108 #define MATCH_IRE_SECATTR 0x10000 /* Match gateway security attributes */ 109 #define MATCH_IRE_COMPLETE 0x20000 /* ire_ftable_lookup() can return */ 110 /* IRE_CACHE entry only if it is */ 111 /* ND_REACHABLE */ 112 113 /* 114 * Any ire to nce association is long term, and 115 * the refhold and refrele may be done by different 116 * threads. So all cases of making or breaking ire to 117 * nce association should all effectively use the NOTR variants. 118 * To understand the *effectively* part read on. 119 * 120 * ndp_lookup() and ndp_add_v4()/ndp_add_v6() implicitly do 121 * NCE_REFHOLD. So wherever we make ire to nce association after 122 * calling these functions, we effectively want to end up with 123 * NCE_REFHOLD_NOTR. We call this macro to achieve this effect. This 124 * macro changes a NCE_REFHOLD to a NCE_REFHOLD_NOTR. The macro's 125 * NCE_REFRELE cancels off ndp_lookup[ndp_add]'s implicit NCE_REFHOLD, 126 * and what you are left with is a NCE_REFHOLD_NOTR 127 */ 128 #define NCE_REFHOLD_TO_REFHOLD_NOTR(nce) { \ 129 NCE_REFHOLD_NOTR(nce); \ 130 NCE_REFRELE(nce); \ 131 } 132 133 /* 134 * find the next ire_t entry in the ire_next chain starting at ire 135 * that is not CONDEMNED. ire is set to NULL if we reach the end of the list. 136 * Caller must hold the ire_bucket lock. 137 */ 138 139 #define IRE_FIND_NEXT_ORIGIN(ire) { \ 140 while ((ire) != NULL && ((ire)->ire_marks & IRE_MARK_CONDEMNED))\ 141 (ire) = (ire)->ire_next; \ 142 } 143 144 145 /* Structure for ire_cache_count() */ 146 typedef struct { 147 int icc_total; /* Total number of IRE_CACHE */ 148 int icc_unused; /* # off/no PMTU unused since last reclaim */ 149 int icc_offlink; /* # offlink without PMTU information */ 150 int icc_pmtu; /* # offlink with PMTU information */ 151 int icc_onlink; /* # onlink */ 152 } ire_cache_count_t; 153 154 /* 155 * Structure for ire_cache_reclaim(). Each field is a fraction i.e. 1 meaning 156 * reclaim all, N meaning reclaim 1/Nth of all entries, 0 meaning reclaim none. 157 * 158 * The comment below (and for other netstack_t references) refers 159 * to the fact that we only do netstack_hold in particular cases, 160 * such as the references from open streams (ill_t and conn_t's 161 * pointers). Internally within IP we rely on IP's ability to cleanup e.g. 162 * ire_t's when an ill goes away. 163 */ 164 typedef struct { 165 int icr_unused; /* Fraction for unused since last reclaim */ 166 int icr_offlink; /* Fraction for offlink without PMTU info */ 167 int icr_pmtu; /* Fraction for offlink with PMTU info */ 168 int icr_onlink; /* Fraction for onlink */ 169 ip_stack_t *icr_ipst; /* Does not have a netstack_hold */ 170 } ire_cache_reclaim_t; 171 172 /* 173 * We use atomics so that we get an accurate accounting on the ires. 174 * Otherwise we can't determine leaks correctly. 175 */ 176 #define BUMP_IRE_STATS(ire_stats, x) atomic_add_64(&(ire_stats).x, 1) 177 178 #ifdef _KERNEL 179 /* 180 * Structure for passing args for the IRE cache lookup functions. 181 */ 182 typedef struct ire_ctable_args_s { 183 void *ict_addr; 184 void *ict_gateway; 185 int ict_type; 186 const ipif_t *ict_ipif; 187 zoneid_t ict_zoneid; 188 const ts_label_t *ict_tsl; 189 int ict_flags; 190 ip_stack_t *ict_ipst; 191 queue_t *ict_wq; 192 } ire_ctable_args_t; 193 194 struct ts_label_s; 195 struct nce_s; 196 197 extern ipaddr_t ip_plen_to_mask(uint_t); 198 extern in6_addr_t *ip_plen_to_mask_v6(uint_t, in6_addr_t *); 199 200 extern int ip_ire_advise(queue_t *, mblk_t *, cred_t *); 201 extern int ip_ire_delete(queue_t *, mblk_t *, cred_t *); 202 extern boolean_t ip_ire_clookup_and_delete(ipaddr_t, ipif_t *, ip_stack_t *); 203 extern void ip_ire_clookup_and_delete_v6(const in6_addr_t *, 204 ip_stack_t *); 205 206 extern void ip_ire_req(queue_t *, mblk_t *); 207 208 extern int ip_mask_to_plen(ipaddr_t); 209 extern int ip_mask_to_plen_v6(const in6_addr_t *); 210 211 extern ire_t *ipif_to_ire(const ipif_t *); 212 extern ire_t *ipif_to_ire_v6(const ipif_t *); 213 214 extern int ire_add(ire_t **, queue_t *, mblk_t *, ipsq_func_t, boolean_t); 215 extern void ire_add_then_send(queue_t *, ire_t *, mblk_t *); 216 extern int ire_add_v6(ire_t **, queue_t *, mblk_t *, ipsq_func_t); 217 extern int ire_atomic_start(irb_t *irb_ptr, ire_t *ire, queue_t *q, 218 mblk_t *mp, ipsq_func_t func); 219 extern void ire_atomic_end(irb_t *irb_ptr, ire_t *ire); 220 221 extern void ire_cache_count(ire_t *, char *); 222 extern ire_t *ire_cache_lookup(ipaddr_t, zoneid_t, 223 const struct ts_label_s *, ip_stack_t *); 224 extern ire_t *ire_cache_lookup_simple(ipaddr_t, ip_stack_t *); 225 extern ire_t *ire_cache_lookup_v6(const in6_addr_t *, zoneid_t, 226 const struct ts_label_s *, ip_stack_t *); 227 extern void ire_cache_reclaim(ire_t *, char *); 228 229 extern ire_t *ire_create_mp(uchar_t *, uchar_t *, uchar_t *, uchar_t *, 230 uint_t, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *, ipaddr_t, 231 uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, 232 ip_stack_t *); 233 extern ire_t *ire_create(uchar_t *, uchar_t *, uchar_t *, uchar_t *, 234 uint_t *, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *, 235 ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *, 236 tsol_gcgrp_t *, ip_stack_t *); 237 238 extern ire_t **ire_check_and_create_bcast(ipif_t *, ipaddr_t, 239 ire_t **, int); 240 extern ire_t **ire_create_bcast(ipif_t *, ipaddr_t, ire_t **); 241 extern ire_t *ire_init(ire_t *, uchar_t *, uchar_t *, uchar_t *, uchar_t *, 242 uint_t *, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *, 243 ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *, 244 tsol_gcgrp_t *, ip_stack_t *); 245 246 extern boolean_t ire_init_common(ire_t *, uint_t *, struct nce_s *, queue_t *, 247 queue_t *, ushort_t, ipif_t *, uint32_t, uint32_t, uint32_t, uchar_t, 248 const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 249 250 extern ire_t *ire_create_v6(const in6_addr_t *, const in6_addr_t *, 251 const in6_addr_t *, const in6_addr_t *, uint_t *, struct nce_s *, queue_t *, 252 queue_t *, ushort_t, ipif_t *, 253 const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *, 254 tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 255 256 extern ire_t *ire_create_mp_v6(const in6_addr_t *, const in6_addr_t *, 257 const in6_addr_t *, const in6_addr_t *, struct nce_s *, queue_t *, 258 queue_t *, ushort_t, ipif_t *, 259 const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *, 260 tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 261 262 263 extern void ire_clookup_delete_cache_gw(ipaddr_t, zoneid_t, 264 ip_stack_t *); 265 extern void ire_clookup_delete_cache_gw_v6(const in6_addr_t *, zoneid_t, 266 ip_stack_t *); 267 268 extern ire_t *ire_ctable_lookup(ipaddr_t, ipaddr_t, int, const ipif_t *, 269 zoneid_t, const struct ts_label_s *, int, ip_stack_t *); 270 271 extern ire_t *ire_ctable_lookup_v6(const in6_addr_t *, const in6_addr_t *, 272 int, const ipif_t *, zoneid_t, const struct ts_label_s *, int, 273 ip_stack_t *); 274 275 extern void ire_delete(ire_t *); 276 extern void ire_delete_cache_gw(ire_t *, char *); 277 extern void ire_delete_cache_gw_v6(ire_t *, char *); 278 extern void ire_delete_cache_v6(ire_t *, char *); 279 extern void ire_delete_v6(ire_t *); 280 281 extern void ire_expire(ire_t *, char *); 282 283 extern void ire_flush_cache_v4(ire_t *, int); 284 extern void ire_flush_cache_v6(ire_t *, int); 285 286 extern ire_t *ire_ftable_lookup_v6(const in6_addr_t *, const in6_addr_t *, 287 const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t, 288 uint32_t, const struct ts_label_s *, int, ip_stack_t *); 289 290 extern ire_t *ire_ihandle_lookup_onlink(ire_t *); 291 extern ire_t *ire_ihandle_lookup_offlink(ire_t *, ire_t *); 292 extern ire_t *ire_ihandle_lookup_offlink_v6(ire_t *, ire_t *); 293 294 extern boolean_t ire_local_same_lan(ire_t *, ire_t *); 295 extern boolean_t ire_local_ok_across_zones(ire_t *, zoneid_t, void *, 296 const struct ts_label_s *, ip_stack_t *); 297 298 extern ire_t *ire_lookup_local(zoneid_t, ip_stack_t *); 299 extern ire_t *ire_lookup_local_v6(zoneid_t, ip_stack_t *); 300 301 extern ire_t *ire_lookup_multi(ipaddr_t, zoneid_t, ip_stack_t *); 302 extern ire_t *ire_lookup_multi_v6(const in6_addr_t *, zoneid_t, 303 ip_stack_t *); 304 305 extern void ire_refrele(ire_t *); 306 extern void ire_refrele_notr(ire_t *); 307 extern ire_t *ire_route_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int, 308 const ipif_t *, ire_t **, zoneid_t, const struct ts_label_s *, int, 309 ip_stack_t *); 310 311 extern ire_t *ire_route_lookup_v6(const in6_addr_t *, const in6_addr_t *, 312 const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t, 313 const struct ts_label_s *, int, ip_stack_t *); 314 315 extern ill_t *ire_to_ill(const ire_t *); 316 317 extern void ire_walk(pfv_t, void *, ip_stack_t *); 318 extern void ire_walk_ill(uint_t, uint_t, pfv_t, void *, ill_t *); 319 extern void ire_walk_ill_v4(uint_t, uint_t, pfv_t, void *, ill_t *); 320 extern void ire_walk_ill_v6(uint_t, uint_t, pfv_t, void *, ill_t *); 321 extern void ire_walk_v4(pfv_t, void *, zoneid_t, ip_stack_t *); 322 extern void ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, 323 pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz, 324 irb_t **ipftbl, size_t ctbl_sz, irb_t *ipctbl, ill_t *ill, 325 zoneid_t zoneid, ip_stack_t *); 326 extern void ire_walk_v6(pfv_t, void *, zoneid_t, ip_stack_t *); 327 328 extern boolean_t ire_multirt_lookup(ire_t **, ire_t **, uint32_t, 329 const struct ts_label_s *, ip_stack_t *); 330 extern boolean_t ire_multirt_need_resolve(ipaddr_t, 331 const struct ts_label_s *, ip_stack_t *); 332 extern boolean_t ire_multirt_lookup_v6(ire_t **, ire_t **, uint32_t, 333 const struct ts_label_s *, ip_stack_t *); 334 extern boolean_t ire_multirt_need_resolve_v6(const in6_addr_t *, 335 const struct ts_label_s *, ip_stack_t *); 336 337 extern ire_t *ipif_lookup_multi_ire(ipif_t *, ipaddr_t); 338 extern ire_t *ipif_lookup_multi_ire_v6(ipif_t *, const in6_addr_t *); 339 340 extern ire_t *ire_get_next_bcast_ire(ire_t *, ire_t *); 341 extern ire_t *ire_get_next_default_ire(ire_t *, ire_t *); 342 343 extern void ire_arpresolve(ire_t *); 344 extern void ire_freemblk(ire_t *); 345 extern boolean_t ire_match_args(ire_t *, ipaddr_t, ipaddr_t, ipaddr_t, 346 int, const ipif_t *, zoneid_t, uint32_t, const struct ts_label_s *, int, 347 queue_t *); 348 extern int ire_nce_init(ire_t *, struct nce_s *); 349 extern boolean_t ire_walk_ill_match(uint_t, uint_t, ire_t *, ill_t *, 350 zoneid_t, ip_stack_t *); 351 extern ire_t *ire_arpresolve_lookup(ipaddr_t, ipaddr_t, ipif_t *, zoneid_t, 352 ip_stack_t *, queue_t *); 353 354 #endif /* _KERNEL */ 355 356 #ifdef __cplusplus 357 } 358 #endif 359 360 #endif /* _INET_IP_IRE_H */ 361