1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #ifndef _INET_IP_IRE_H 28 #define _INET_IP_IRE_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 #define IPV6_LL_PREFIXLEN 10 /* Number of bits in link-local pref */ 37 38 #define IP_CACHE_TABLE_SIZE 256 39 #define IP_MRTUN_TABLE_SIZE 256 /* Mobile IP reverse tunnel table */ 40 /* size. Only used by mipagent */ 41 #define IP_SRCIF_TABLE_SIZE 256 /* Per interface routing table size */ 42 #define IP_MASK_TABLE_SIZE (IP_ABITS + 1) /* 33 ptrs */ 43 44 #define IP6_FTABLE_HASH_SIZE 32 /* size of each hash table in ptrs */ 45 #define IP6_CACHE_TABLE_SIZE 256 46 #define IP6_MASK_TABLE_SIZE (IPV6_ABITS + 1) /* 129 ptrs */ 47 48 /* 49 * We use the common modulo hash function. In ip_ire_init(), we make 50 * sure that the cache table size is always a power of 2. That's why 51 * we can use & instead of %. Also note that we try hard to make sure 52 * the lower bits of an address capture most info from the whole address. 53 * The reason being that since our hash table is probably a lot smaller 54 * than 2^32 buckets so the lower bits are the most important. 55 */ 56 #define IRE_ADDR_HASH(addr, table_size) \ 57 (((addr) ^ ((addr) >> 8) ^ ((addr) >> 16) ^ ((addr) >> 24)) & \ 58 ((table_size) - 1)) 59 60 /* 61 * Exclusive-or those bytes that are likely to contain the MAC 62 * address. Assumes EUI-64 format for good hashing. 63 */ 64 #define IRE_ADDR_HASH_V6(addr, table_size) \ 65 (((addr).s6_addr32[3] ^ \ 66 (((addr).s6_addr32[3] ^ (addr).s6_addr32[2]) >> 12)) & \ 67 ((table_size) - 1)) 68 /* This assumes that the ftable size is a power of 2. */ 69 #define IRE_ADDR_MASK_HASH_V6(addr, mask, table_size) \ 70 ((((addr).s6_addr8[8] & (mask).s6_addr8[8]) ^ \ 71 ((addr).s6_addr8[9] & (mask).s6_addr8[9]) ^ \ 72 ((addr).s6_addr8[10] & (mask).s6_addr8[10]) ^ \ 73 ((addr).s6_addr8[13] & (mask).s6_addr8[13]) ^ \ 74 ((addr).s6_addr8[14] & (mask).s6_addr8[14]) ^ \ 75 ((addr).s6_addr8[15] & (mask).s6_addr8[15])) & ((table_size) - 1)) 76 77 /* 78 * match parameter definitions for 79 * IRE lookup routines. 80 */ 81 #define MATCH_IRE_DSTONLY 0x0000 /* Match just the address */ 82 #define MATCH_IRE_TYPE 0x0001 /* Match IRE type */ 83 #define MATCH_IRE_SRC 0x0002 /* Match IRE source address */ 84 #define MATCH_IRE_MASK 0x0004 /* Match IRE mask */ 85 #define MATCH_IRE_WQ 0x0008 /* Match IRE Write Q */ 86 #define MATCH_IRE_GW 0x0010 /* Match IRE gateway */ 87 #define MATCH_IRE_IPIF 0x0020 /* Match IRE ipif */ 88 #define MATCH_IRE_RECURSIVE 0x0040 /* Do recursive lookup if necessary */ 89 #define MATCH_IRE_DEFAULT 0x0080 /* Return default route if no route */ 90 /* found. */ 91 #define MATCH_IRE_RJ_BHOLE 0x0100 /* During lookup if we hit an ire */ 92 /* with RTF_REJECT or RTF_BLACKHOLE, */ 93 /* return the ire. No recursive */ 94 /* lookup should be done. */ 95 #define MATCH_IRE_IHANDLE 0x0200 /* Match IRE on ihandle */ 96 #define MATCH_IRE_MARK_HIDDEN 0x0400 /* Match IRE ire_marks with */ 97 /* IRE_MARK_HIDDEN. */ 98 /* 99 * MATCH_IRE_ILL is used whenever we want to specifically match an IRE 100 * whose ire_ipif->ipif_ill or (ill_t *)ire_stq->q_ptr matches a given 101 * ill. When MATCH_IRE_ILL is used to locate an IRE_CACHE, it implies 102 * that the packet will not be load balanced. This is normally used 103 * by in.mpathd to send out failure detection probes. 104 * 105 * MATCH_IRE_ILL_GROUP is used whenever we are not specific about which 106 * interface (ill) the packet should be sent out. This implies that the 107 * packets will be subjected to load balancing and it might go out on 108 * any interface in the group. When there is only interface in the group, 109 * MATCH_IRE_ILL_GROUP becomes MATCH_IRE_ILL. Most of the code uses 110 * MATCH_IRE_ILL_GROUP and MATCH_IRE_ILL is used in very few cases where 111 * we want to disable load balancing. 112 * 113 * MATCH_IRE_PARENT is used whenever we unconditionally want to get the 114 * parent IRE (sire) while recursively searching IREs for an offsubnet 115 * destination. With this flag, even if no IRE_CACHETABLE or IRE_INTERFACE 116 * is found to help resolving IRE_OFFSUBNET in lookup routines, the 117 * IRE_OFFSUBNET sire, if any, is returned to the caller. 118 */ 119 #define MATCH_IRE_ILL_GROUP 0x0800 /* Match IRE on ill or the ill_group. */ 120 #define MATCH_IRE_ILL 0x1000 /* Match IRE on the ill only */ 121 122 #define MATCH_IRE_PARENT 0x2000 /* Match parent ire, if any, */ 123 /* even if ire is not matched. */ 124 #define MATCH_IRE_ZONEONLY 0x4000 /* Match IREs in specified zone, ie */ 125 /* don't match IRE_LOCALs from other */ 126 /* zones or shared IREs */ 127 #define MATCH_IRE_MARK_PRIVATE_ADDR 0x8000 /* Match IRE ire_marks with */ 128 /* IRE_MARK_PRIVATE_ADDR. */ 129 #define MATCH_IRE_SECATTR 0x10000 /* Match gateway security attributes */ 130 #define MATCH_IRE_COMPLETE 0x20000 /* ire_ftable_lookup() can return */ 131 /* IRE_CACHE entry only if it is */ 132 /* ND_REACHABLE */ 133 134 /* 135 * Any ire to nce association is long term, and 136 * the refhold and refrele may be done by different 137 * threads. So all cases of making or breaking ire to 138 * nce association should all effectively use the NOTR variants. 139 * To understand the *effectively* part read on. 140 * 141 * ndp_lookup() and ndp_add() implicitly does NCE_REFHOLD. So wherever we 142 * make ire to nce association after calling these functions, 143 * we effectively want to end up with NCE_REFHOLD_NOTR, 144 * We call this macro to achieve this effect. This macro changes 145 * a NCE_REFHOLD to a NCE_REFHOLD_NOTR. The macro's NCE_REFRELE 146 * cancels off ndp_lookup[ndp_add]'s implicit NCE_REFHOLD, and what 147 * you are left with is a NCE_REFHOLD_NOTR 148 */ 149 #define NCE_REFHOLD_TO_REFHOLD_NOTR(nce) { \ 150 NCE_REFHOLD_NOTR(nce); \ 151 NCE_REFRELE(nce); \ 152 } 153 154 /* 155 * find the next ire_t entry in the ire_next chain starting at ire 156 * that is not CONDEMNED. ire is set to NULL if we reach the end of the list. 157 * Caller must hold the ire_bucket lock. 158 */ 159 160 #define IRE_FIND_NEXT_ORIGIN(ire) { \ 161 while ((ire) != NULL && ((ire)->ire_marks & IRE_MARK_CONDEMNED))\ 162 (ire) = (ire)->ire_next; \ 163 } 164 165 166 /* Structure for ire_cache_count() */ 167 typedef struct { 168 int icc_total; /* Total number of IRE_CACHE */ 169 int icc_unused; /* # off/no PMTU unused since last reclaim */ 170 int icc_offlink; /* # offlink without PMTU information */ 171 int icc_pmtu; /* # offlink with PMTU information */ 172 int icc_onlink; /* # onlink */ 173 } ire_cache_count_t; 174 175 /* 176 * Structure for ire_cache_reclaim(). Each field is a fraction i.e. 1 meaning 177 * reclaim all, N meaning reclaim 1/Nth of all entries, 0 meaning reclaim none. 178 */ 179 typedef struct { 180 int icr_unused; /* Fraction for unused since last reclaim */ 181 int icr_offlink; /* Fraction for offlink without PMTU info */ 182 int icr_pmtu; /* Fraction for offlink with PMTU info */ 183 int icr_onlink; /* Fraction for onlink */ 184 } ire_cache_reclaim_t; 185 186 typedef struct { 187 uint64_t ire_stats_alloced; /* # of ires alloced */ 188 uint64_t ire_stats_freed; /* # of ires freed */ 189 uint64_t ire_stats_inserted; /* # of ires inserted in the bucket */ 190 uint64_t ire_stats_deleted; /* # of ires deleted from the bucket */ 191 } ire_stats_t; 192 193 extern ire_stats_t ire_stats_v4; 194 extern uint32_t ip_cache_table_size; 195 extern uint32_t ip6_cache_table_size; 196 extern irb_t *ip_cache_table; 197 extern uint32_t ip6_ftable_hash_size; 198 199 /* 200 * We use atomics so that we get an accurate accounting on the ires. 201 * Otherwise we can't determine leaks correctly. 202 */ 203 #define BUMP_IRE_STATS(ire_stats, x) atomic_add_64(&(ire_stats).x, 1) 204 205 extern irb_t *ip_forwarding_table_v6[]; 206 extern irb_t *ip_cache_table_v6; 207 extern irb_t *ip_mrtun_table; 208 extern irb_t *ip_srcif_table; 209 extern kmutex_t ire_ft_init_lock; 210 extern kmutex_t ire_mrtun_lock; 211 extern kmutex_t ire_srcif_table_lock; 212 extern ire_stats_t ire_stats_v6; 213 extern uint_t ire_mrtun_count; 214 extern uint_t ire_srcif_table_count; 215 216 #ifdef _KERNEL 217 struct ts_label_s; 218 219 extern ipaddr_t ip_plen_to_mask(uint_t); 220 extern in6_addr_t *ip_plen_to_mask_v6(uint_t, in6_addr_t *); 221 222 extern int ip_ire_advise(queue_t *, mblk_t *, cred_t *); 223 extern int ip_ire_delete(queue_t *, mblk_t *, cred_t *); 224 extern boolean_t ip_ire_clookup_and_delete(ipaddr_t, ipif_t *); 225 extern void ip_ire_clookup_and_delete_v6(const in6_addr_t *); 226 227 extern int ip_ire_report(queue_t *, mblk_t *, caddr_t, cred_t *); 228 extern int ip_ire_report_mrtun(queue_t *, mblk_t *, caddr_t, cred_t *); 229 extern int ip_ire_report_srcif(queue_t *, mblk_t *, caddr_t, cred_t *); 230 extern int ip_ire_report_v6(queue_t *, mblk_t *, caddr_t, cred_t *); 231 extern void ire_report_ftable(ire_t *, char *); 232 233 extern void ip_ire_req(queue_t *, mblk_t *); 234 235 extern int ip_mask_to_plen(ipaddr_t); 236 extern int ip_mask_to_plen_v6(const in6_addr_t *); 237 238 extern ire_t *ipif_to_ire(const ipif_t *); 239 extern ire_t *ipif_to_ire_v6(const ipif_t *); 240 241 extern int ire_add(ire_t **, queue_t *, mblk_t *, ipsq_func_t, boolean_t); 242 extern int ire_add_mrtun(ire_t **, queue_t *, mblk_t *, ipsq_func_t); 243 extern void ire_add_then_send(queue_t *, ire_t *, mblk_t *); 244 extern int ire_add_v6(ire_t **, queue_t *, mblk_t *, ipsq_func_t); 245 extern int ire_atomic_start(irb_t *irb_ptr, ire_t *ire, queue_t *q, 246 mblk_t *mp, ipsq_func_t func); 247 extern void ire_atomic_end(irb_t *irb_ptr, ire_t *ire); 248 249 extern void ire_cache_count(ire_t *, char *); 250 extern ire_t *ire_cache_lookup(ipaddr_t, zoneid_t, 251 const struct ts_label_s *); 252 extern ire_t *ire_cache_lookup_v6(const in6_addr_t *, zoneid_t, 253 const struct ts_label_s *); 254 extern void ire_cache_reclaim(ire_t *, char *); 255 256 extern void ire_check_bcast_present(ipif_t *, ipaddr_t, int, boolean_t *, 257 boolean_t *); 258 259 extern ire_t *ire_create_mp(uchar_t *, uchar_t *, uchar_t *, uchar_t *, 260 uchar_t *, uint_t, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, 261 ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, 262 tsol_gc_t *, tsol_gcgrp_t *); 263 264 extern ire_t *ire_create(uchar_t *, uchar_t *, uchar_t *, uchar_t *, 265 uchar_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *, 266 ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, 267 tsol_gc_t *, tsol_gcgrp_t *); 268 269 extern ire_t **ire_check_and_create_bcast(ipif_t *, ipaddr_t, 270 ire_t **, int); 271 extern ire_t **ire_create_bcast(ipif_t *, ipaddr_t, ire_t **); 272 extern ire_t *ire_init(ire_t *, uchar_t *, uchar_t *, uchar_t *, 273 uchar_t *, uchar_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t, 274 mblk_t *, ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t, 275 const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *); 276 277 extern boolean_t ire_init_common(ire_t *, uint_t *, mblk_t *, queue_t *, 278 queue_t *, ushort_t, mblk_t *, ipif_t *, ill_t *, uint32_t, 279 uint32_t, uint32_t, uchar_t, const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *); 280 281 extern ire_t *ire_create_v6(const in6_addr_t *, const in6_addr_t *, 282 const in6_addr_t *, const in6_addr_t *, uint_t *, mblk_t *, queue_t *, 283 queue_t *, ushort_t, mblk_t *, ipif_t *, 284 const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *, 285 tsol_gc_t *, tsol_gcgrp_t *); 286 287 extern ire_t *ire_create_mp_v6(const in6_addr_t *, const in6_addr_t *, 288 const in6_addr_t *, const in6_addr_t *, mblk_t *, queue_t *, 289 queue_t *, ushort_t, mblk_t *, ipif_t *, 290 const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *, 291 tsol_gc_t *, tsol_gcgrp_t *); 292 293 extern ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 294 const in6_addr_t *, const in6_addr_t *, uint_t *, mblk_t *, queue_t *, 295 queue_t *, ushort_t, mblk_t *, ipif_t *, 296 const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *, 297 tsol_gc_t *, tsol_gcgrp_t *); 298 299 extern void ire_clookup_delete_cache_gw(ipaddr_t, zoneid_t); 300 extern void ire_clookup_delete_cache_gw_v6(const in6_addr_t *, zoneid_t); 301 302 extern ire_t *ire_ctable_lookup(ipaddr_t, ipaddr_t, int, const ipif_t *, 303 zoneid_t, const struct ts_label_s *, int); 304 305 extern ire_t *ire_ctable_lookup_v6(const in6_addr_t *, const in6_addr_t *, 306 int, const ipif_t *, zoneid_t, const struct ts_label_s *, int); 307 308 extern void ire_delete(ire_t *); 309 extern void ire_delete_cache_gw(ire_t *, char *); 310 extern void ire_delete_cache_gw_v6(ire_t *, char *); 311 extern void ire_delete_cache_v6(ire_t *, char *); 312 extern void ire_delete_srcif(ire_t *); 313 extern void ire_delete_v6(ire_t *); 314 315 extern void ire_expire(ire_t *, char *); 316 extern void ire_fastpath_flush(ire_t *, void *); 317 extern boolean_t ire_fastpath_update(ire_t *, void *); 318 319 extern void ire_flush_cache_v4(ire_t *, int); 320 extern void ire_flush_cache_v6(ire_t *, int); 321 322 extern ire_t *ire_ftable_lookup_v6(const in6_addr_t *, const in6_addr_t *, 323 const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t, 324 uint32_t, const struct ts_label_s *, int); 325 326 extern ire_t *ire_ihandle_lookup_onlink(ire_t *); 327 extern ire_t *ire_ihandle_lookup_offlink(ire_t *, ire_t *); 328 extern ire_t *ire_ihandle_lookup_offlink_v6(ire_t *, ire_t *); 329 330 extern boolean_t ire_local_same_ill_group(ire_t *, ire_t *); 331 extern boolean_t ire_local_ok_across_zones(ire_t *, zoneid_t, void *, 332 const struct ts_label_s *tsl); 333 334 extern ire_t *ire_lookup_local(zoneid_t); 335 extern ire_t *ire_lookup_local_v6(zoneid_t); 336 337 extern ire_t *ire_lookup_multi(ipaddr_t, zoneid_t); 338 extern ire_t *ire_lookup_multi_v6(const in6_addr_t *, zoneid_t); 339 340 extern ire_t *ire_mrtun_lookup(ipaddr_t, ill_t *); 341 342 extern void ire_refrele(ire_t *); 343 extern void ire_refrele_notr(ire_t *); 344 extern ire_t *ire_route_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int, 345 const ipif_t *, ire_t **, zoneid_t, const struct ts_label_s *, int); 346 347 extern ire_t *ire_route_lookup_v6(const in6_addr_t *, const in6_addr_t *, 348 const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t, 349 const struct ts_label_s *, int); 350 351 extern ire_t *ire_srcif_table_lookup(ipaddr_t, int, ipif_t *, ill_t *, int); 352 extern ill_t *ire_to_ill(const ire_t *); 353 354 extern void ire_walk(pfv_t, void *); 355 extern void ire_walk_ill(uint_t, uint_t, pfv_t, void *, ill_t *); 356 extern void ire_walk_ill_mrtun(uint_t, uint_t, pfv_t, void *, ill_t *); 357 extern void ire_walk_ill_v4(uint_t, uint_t, pfv_t, void *, ill_t *); 358 extern void ire_walk_ill_v6(uint_t, uint_t, pfv_t, void *, ill_t *); 359 extern void ire_walk_v4(pfv_t, void *, zoneid_t); 360 extern void ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, 361 pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz, 362 irb_t **ipftbl, size_t ctbl_sz, irb_t *ipctbl, ill_t *ill, 363 zoneid_t zoneid); 364 extern void ire_walk_srcif_table_v4(pfv_t, void *); 365 extern void ire_walk_v6(pfv_t, void *, zoneid_t); 366 367 extern boolean_t ire_multirt_lookup(ire_t **, ire_t **, uint32_t, 368 const struct ts_label_s *); 369 extern boolean_t ire_multirt_need_resolve(ipaddr_t, 370 const struct ts_label_s *); 371 extern boolean_t ire_multirt_lookup_v6(ire_t **, ire_t **, uint32_t, 372 const struct ts_label_s *); 373 extern boolean_t ire_multirt_need_resolve_v6(const in6_addr_t *, 374 const struct ts_label_s *); 375 376 extern ire_t *ipif_lookup_multi_ire(ipif_t *, ipaddr_t); 377 extern ire_t *ipif_lookup_multi_ire_v6(ipif_t *, const in6_addr_t *); 378 379 extern void ire_fastpath_list_dispatch(ill_t *, 380 boolean_t (*)(ire_t *, void *), void *); 381 extern void ire_fastpath_list_delete(ill_t *, ire_t *); 382 383 extern mblk_t *ip_nexthop_route(const struct sockaddr *, char *); 384 extern mblk_t *ip_nexthop(const struct sockaddr *, const char *); 385 386 extern ire_t *ire_get_next_bcast_ire(ire_t *, ire_t *); 387 extern ire_t *ire_get_next_default_ire(ire_t *, ire_t *); 388 389 extern void ire_arpresolve(ire_t *, ill_t *); 390 extern void ire_freemblk(ire_t *); 391 extern void ire_fastpath(ire_t *); 392 extern boolean_t ire_match_args(ire_t *, ipaddr_t, ipaddr_t, ipaddr_t, 393 int, const ipif_t *, zoneid_t, uint32_t, const struct ts_label_s *, int); 394 extern int ire_nce_init(ire_t *, mblk_t *, mblk_t *); 395 extern boolean_t ire_walk_ill_match(uint_t, uint_t, ire_t *, ill_t *, 396 zoneid_t); 397 398 #endif /* _KERNEL */ 399 400 #ifdef __cplusplus 401 } 402 #endif 403 404 #endif /* _INET_IP_IRE_H */ 405