xref: /titanic_44/usr/src/uts/common/inet/ip_ire.h (revision 749f21d359d8fbd020c974a1a5227316221bfc9c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #ifndef	_INET_IP_IRE_H
29 #define	_INET_IP_IRE_H
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #ifdef	__cplusplus
34 extern "C" {
35 #endif
36 
37 #define	IPV6_LL_PREFIXLEN	10	/* Number of bits in link-local pref */
38 
39 #define	IP_FTABLE_HASH_SIZE	32	/* size of each hash table in ptrs */
40 #define	IP_CACHE_TABLE_SIZE	256
41 #define	IP_MRTUN_TABLE_SIZE	256	/* Mobile IP reverse tunnel table */
42 					/* size. Only used by mipagent */
43 #define	IP_SRCIF_TABLE_SIZE	256	/* Per interface routing table size */
44 #define	IP_MASK_TABLE_SIZE	(IP_ABITS + 1)		/* 33 ptrs */
45 
46 #define	IP6_FTABLE_HASH_SIZE	32	/* size of each hash table in ptrs */
47 #define	IP6_CACHE_TABLE_SIZE	256
48 #define	IP6_MASK_TABLE_SIZE	(IPV6_ABITS + 1)	/* 129 ptrs */
49 
50 /*
51  * We use the common modulo hash function.  In ip_ire_init(), we make
52  * sure that the cache table size is always a power of 2.  That's why
53  * we can use & instead of %.  Also note that we try hard to make sure
54  * the lower bits of an address capture most info from the whole address.
55  * The reason being that since our hash table is probably a lot smaller
56  * than 2^32 buckets so the lower bits are the most important.
57  */
58 #define	IRE_ADDR_HASH(addr, table_size) \
59 	(((addr) ^ ((addr) >> 8) ^ ((addr) >> 16) ^ ((addr) >> 24)) &	\
60 	((table_size) - 1))
61 
62 /*
63  * Exclusive-or those bytes that are likely to contain the MAC
64  * address.  Assumes EUI-64 format for good hashing.
65  */
66 #define	IRE_ADDR_HASH_V6(addr, table_size) 				\
67 	(((addr).s6_addr32[3] ^						\
68 	(((addr).s6_addr32[3] ^ (addr).s6_addr32[2]) >> 12)) &		\
69 	((table_size) - 1))
70 /* This assumes that the ftable size is a power of 2. */
71 #define	IRE_ADDR_MASK_HASH_V6(addr, mask, table_size) 			\
72 	((((addr).s6_addr8[8] & (mask).s6_addr8[8]) ^ 			\
73 	((addr).s6_addr8[9] & (mask).s6_addr8[9]) ^			\
74 	((addr).s6_addr8[10] & (mask).s6_addr8[10]) ^ 			\
75 	((addr).s6_addr8[13] & (mask).s6_addr8[13]) ^ 			\
76 	((addr).s6_addr8[14] & (mask).s6_addr8[14]) ^ 			\
77 	((addr).s6_addr8[15] & (mask).s6_addr8[15])) & ((table_size) - 1))
78 
79 /*
80  * match parameter definitions for
81  * IRE lookup routines.
82  */
83 #define	MATCH_IRE_DSTONLY	0x0000	/* Match just the address */
84 #define	MATCH_IRE_TYPE		0x0001	/* Match IRE type */
85 #define	MATCH_IRE_SRC		0x0002	/* Match IRE source address */
86 #define	MATCH_IRE_MASK		0x0004	/* Match IRE mask */
87 #define	MATCH_IRE_WQ		0x0008	/* Match IRE Write Q */
88 #define	MATCH_IRE_GW		0x0010	/* Match IRE gateway */
89 #define	MATCH_IRE_IPIF		0x0020	/* Match IRE ipif */
90 #define	MATCH_IRE_RECURSIVE	0x0040	/* Do recursive lookup if necessary */
91 #define	MATCH_IRE_DEFAULT	0x0080	/* Return default route if no route */
92 					/* found. */
93 #define	MATCH_IRE_RJ_BHOLE	0x0100	/* During lookup if we hit an ire */
94 					/* with RTF_REJECT or RTF_BLACKHOLE, */
95 					/* return the ire. No recursive */
96 					/* lookup should be done. */
97 #define	MATCH_IRE_IHANDLE	0x0200	/* Match IRE on ihandle */
98 #define	MATCH_IRE_MARK_HIDDEN	0x0400	/* Match IRE ire_marks with */
99 					/* IRE_MARK_HIDDEN. */
100 /*
101  * MATCH_IRE_ILL is used whenever we want to specifically match an IRE
102  * whose ire_ipif->ipif_ill or (ill_t *)ire_stq->q_ptr matches a given
103  * ill. When MATCH_IRE_ILL is used to locate an IRE_CACHE, it implies
104  * that the packet will not be load balanced. This is normally used
105  * by in.mpathd to send out failure detection probes.
106  *
107  * MATCH_IRE_ILL_GROUP is used whenever we are not specific about which
108  * interface (ill) the packet should be sent out. This implies that the
109  * packets will be subjected to load balancing and it might go out on
110  * any interface in the group. When there is only interface in the group,
111  * MATCH_IRE_ILL_GROUP becomes MATCH_IRE_ILL. Most of the code uses
112  * MATCH_IRE_ILL_GROUP and MATCH_IRE_ILL is used in very few cases where
113  * we want to disable load balancing.
114  *
115  * MATCH_IRE_PARENT is used whenever we unconditionally want to get the
116  * parent IRE (sire) while recursively searching IREs for an offsubnet
117  * destination. With this flag, even if no IRE_CACHETABLE or IRE_INTERFACE
118  * is found to help resolving IRE_OFFSUBNET in lookup routines, the
119  * IRE_OFFSUBNET sire, if any, is returned to the caller.
120  */
121 #define	MATCH_IRE_ILL_GROUP	0x0800	/* Match IRE on ill or the ill_group. */
122 #define	MATCH_IRE_ILL		0x1000	/* Match IRE on the ill only */
123 
124 #define	MATCH_IRE_PARENT	0x2000	/* Match parent ire, if any, */
125 					/* even if ire is not matched. */
126 #define	MATCH_IRE_ZONEONLY	0x4000	/* Match IREs in specified zone, ie */
127 					/* don't match IRE_LOCALs from other */
128 					/* zones or shared IREs */
129 #define	MATCH_IRE_MARK_PRIVATE_ADDR	0x8000	/* Match IRE ire_marks with */
130 						/* IRE_MARK_PRIVATE_ADDR. */
131 
132 /* Structure for ire_cache_count() */
133 typedef struct {
134 	int	icc_total;	/* Total number of IRE_CACHE */
135 	int	icc_unused;	/* # off/no PMTU unused since last reclaim */
136 	int	icc_offlink;	/* # offlink without PMTU information */
137 	int	icc_pmtu;	/* # offlink with PMTU information */
138 	int	icc_onlink;	/* # onlink */
139 } ire_cache_count_t;
140 
141 /*
142  * Structure for ire_cache_reclaim(). Each field is a fraction i.e. 1 meaning
143  * reclaim all, N meaning reclaim 1/Nth of all entries, 0 meaning reclaim none.
144  */
145 typedef struct {
146 	int	icr_unused;	/* Fraction for unused since last reclaim */
147 	int	icr_offlink;	/* Fraction for offlink without PMTU info */
148 	int	icr_pmtu;	/* Fraction for offlink with PMTU info */
149 	int	icr_onlink;	/* Fraction for onlink */
150 } ire_cache_reclaim_t;
151 
152 typedef struct {
153 	uint64_t ire_stats_alloced;	/* # of ires alloced */
154 	uint64_t ire_stats_freed;	/* # of ires freed */
155 	uint64_t ire_stats_inserted;	/* # of ires inserted in the bucket */
156 	uint64_t ire_stats_deleted;	/* # of ires deleted from the bucket */
157 } ire_stats_t;
158 
159 extern ire_stats_t ire_stats_v4;
160 
161 /*
162  * We use atomics so that we get an accurate accounting on the ires.
163  * Otherwise we can't determine leaks correctly.
164  */
165 #define	BUMP_IRE_STATS(ire_stats, x) atomic_add_64(&(ire_stats).x, 1)
166 
167 extern irb_t *ip_forwarding_table_v6[];
168 extern irb_t *ip_cache_table_v6;
169 extern irb_t *ip_mrtun_table;
170 extern irb_t *ip_srcif_table;
171 extern kmutex_t ire_ft_init_lock;
172 extern kmutex_t	ire_mrtun_lock;
173 extern kmutex_t ire_srcif_table_lock;
174 extern ire_stats_t ire_stats_v6;
175 extern uint_t	ire_mrtun_count;
176 extern uint_t ire_srcif_table_count;
177 
178 #ifdef _KERNEL
179 extern	ipaddr_t	ip_plen_to_mask(uint_t);
180 extern	in6_addr_t	*ip_plen_to_mask_v6(uint_t, in6_addr_t *);
181 
182 extern	int	ip_ire_advise(queue_t *, mblk_t *, cred_t *);
183 extern	int	ip_ire_delete(queue_t *, mblk_t *, cred_t *);
184 extern	boolean_t ip_ire_clookup_and_delete(ipaddr_t, ipif_t *);
185 extern	void	ip_ire_clookup_and_delete_v6(const in6_addr_t *);
186 
187 extern	int	ip_ire_report(queue_t *, mblk_t *, caddr_t, cred_t *);
188 extern	int	ip_ire_report_mrtun(queue_t *, mblk_t *, caddr_t, cred_t *);
189 extern	int	ip_ire_report_srcif(queue_t *, mblk_t *, caddr_t, cred_t *);
190 extern	int	ip_ire_report_v6(queue_t *, mblk_t *, caddr_t, cred_t *);
191 
192 extern	void	ip_ire_req(queue_t *, mblk_t *);
193 
194 extern	int	ip_mask_to_plen(ipaddr_t);
195 extern	int	ip_mask_to_plen_v6(const in6_addr_t *);
196 
197 extern	ire_t	*ipif_to_ire(ipif_t *);
198 extern	ire_t	*ipif_to_ire_v6(ipif_t *);
199 
200 extern	int	ire_add(ire_t **, queue_t *, mblk_t *, ipsq_func_t);
201 extern	int	ire_add_mrtun(ire_t **, queue_t *, mblk_t *, ipsq_func_t);
202 extern	void	ire_add_then_send(queue_t *, ire_t *, mblk_t *);
203 extern	int	ire_add_v6(ire_t **, queue_t *, mblk_t *, ipsq_func_t);
204 extern	int	ire_atomic_start(irb_t *irb_ptr, ire_t *ire, queue_t *q,
205     mblk_t *mp, ipsq_func_t func);
206 extern	void	ire_atomic_end(irb_t *irb_ptr, ire_t *ire);
207 
208 extern	void	ire_cache_count(ire_t *, char *);
209 extern	ire_t	*ire_cache_lookup(ipaddr_t, zoneid_t);
210 extern	ire_t	*ire_cache_lookup_v6(const in6_addr_t *, zoneid_t);
211 extern	void	ire_cache_reclaim(ire_t *, char *);
212 
213 extern	void	ire_check_bcast_present(ipif_t *, ipaddr_t, int, boolean_t *,
214     boolean_t *);
215 extern	ire_t	*ire_create_mp(uchar_t *, uchar_t *, uchar_t *, uchar_t *,
216     uchar_t *, uint_t, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *,
217     ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *);
218 
219 extern	ire_t	*ire_create(uchar_t *, uchar_t *, uchar_t *, uchar_t *,
220     uchar_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t, mblk_t *,
221     ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *);
222 
223 extern	ire_t	**ire_check_and_create_bcast(ipif_t *, ipaddr_t,
224     ire_t **, int);
225 extern	ire_t	**ire_create_bcast(ipif_t *, ipaddr_t, ire_t **);
226 extern	ire_t	*ire_init(ire_t *, uchar_t *, uchar_t *, uchar_t *,
227     uchar_t *, uchar_t *, uint_t *, mblk_t *, queue_t *, queue_t *, ushort_t,
228     mblk_t *, ipif_t *, ill_t *, ipaddr_t, uint32_t, uint32_t, uint32_t,
229     const iulp_t *);
230 
231 extern	void	ire_init_common(ire_t *, uint_t *, mblk_t *, queue_t *,
232     queue_t *, ushort_t, mblk_t *, ipif_t *, ill_t *, uint32_t,
233     uint32_t, uint32_t, uchar_t, const iulp_t *);
234 
235 extern	ire_t	*ire_create_v6(const in6_addr_t *, const in6_addr_t *,
236     const in6_addr_t *, const in6_addr_t *, uint_t *, mblk_t *, queue_t *,
237     queue_t *, ushort_t, mblk_t *, ipif_t *,
238     const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *);
239 
240 extern	ire_t	*ire_create_mp_v6(const in6_addr_t *, const in6_addr_t *,
241     const in6_addr_t *, const in6_addr_t *, mblk_t *, queue_t *,
242     queue_t *, ushort_t, mblk_t *, ipif_t *,
243     const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *);
244 
245 extern	ire_t	*ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *,
246     const in6_addr_t *, const in6_addr_t *, uint_t *, mblk_t *, queue_t *,
247     queue_t *, ushort_t, mblk_t *, ipif_t *,
248     const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *);
249 
250 extern	ire_t	*ire_ctable_lookup(ipaddr_t, ipaddr_t, int, ipif_t *,
251     zoneid_t, int);
252 
253 extern	ire_t	*ire_ctable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
254     int, ipif_t *, zoneid_t, int);
255 
256 extern	void	ire_delete(ire_t *);
257 extern	void	ire_delete_cache_gw(ire_t *, char *);
258 extern	void	ire_delete_cache_gw_v6(ire_t *, char *);
259 extern	void	ire_delete_cache_v6(ire_t *, char *);
260 extern	void	ire_delete_srcif(ire_t *);
261 extern	void	ire_delete_v6(ire_t *);
262 
263 extern	void	ire_expire(ire_t *, char *);
264 extern	void	ire_fastpath_flush(ire_t *, void *);
265 extern	boolean_t ire_fastpath_update(ire_t *, void *);
266 
267 extern	void	ire_flush_cache_v4(ire_t *, int);
268 extern	void	ire_flush_cache_v6(ire_t *, int);
269 
270 extern	ire_t	*ire_ftable_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int, ipif_t *,
271     ire_t **, zoneid_t, uint32_t, int);
272 
273 extern	ire_t	*ire_ftable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
274     const in6_addr_t *, int, ipif_t *, ire_t **, zoneid_t, uint32_t, int);
275 
276 extern	ire_t	*ire_ihandle_lookup_onlink(ire_t *);
277 extern	ire_t	*ire_ihandle_lookup_offlink(ire_t *, ire_t *);
278 extern	ire_t	*ire_ihandle_lookup_offlink_v6(ire_t *, ire_t *);
279 
280 extern	ire_t 	*ire_lookup_local(zoneid_t);
281 extern	ire_t 	*ire_lookup_local_v6(zoneid_t);
282 
283 extern  ire_t	*ire_lookup_multi(ipaddr_t, zoneid_t);
284 extern  ire_t	*ire_lookup_multi_v6(const in6_addr_t *, zoneid_t);
285 
286 extern ire_t	*ire_mrtun_lookup(ipaddr_t, ill_t *);
287 
288 extern	void	ire_refrele(ire_t *);
289 extern	void	ire_refrele_notr(ire_t *);
290 extern	ire_t	*ire_route_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int, ipif_t *,
291     ire_t **, zoneid_t, int);
292 
293 extern	ire_t	*ire_route_lookup_v6(const in6_addr_t *, const in6_addr_t *,
294     const in6_addr_t *, int, ipif_t *, ire_t **, zoneid_t, int);
295 
296 extern	ire_t	*ire_srcif_table_lookup(ipaddr_t, int, ipif_t *, ill_t *, int);
297 extern ill_t	*ire_to_ill(ire_t *);
298 
299 extern	void	ire_walk(pfv_t, char *);
300 extern	void	ire_walk_ill(uint_t, uint_t, pfv_t, char *, ill_t *);
301 extern	void	ire_walk_ill_mrtun(uint_t, uint_t, pfv_t, void *, ill_t *);
302 extern	void	ire_walk_ill_v4(uint_t, uint_t, pfv_t, char *, ill_t *);
303 extern	void	ire_walk_ill_v6(uint_t, uint_t, pfv_t, char *, ill_t *);
304 extern	void	ire_walk_v4(pfv_t, char *, zoneid_t);
305 extern	void	ire_walk_srcif_table_v4(pfv_t, char *);
306 extern	void	ire_walk_v6(pfv_t, char *, zoneid_t);
307 
308 extern boolean_t	ire_multirt_lookup(ire_t **, ire_t **, uint32_t);
309 extern boolean_t	ire_multirt_need_resolve(ipaddr_t);
310 extern boolean_t	ire_multirt_lookup_v6(ire_t **, ire_t **, uint32_t);
311 extern boolean_t	ire_multirt_need_resolve_v6(const in6_addr_t *);
312 
313 extern ire_t	*ipif_lookup_multi_ire(ipif_t *, ipaddr_t);
314 extern ire_t	*ipif_lookup_multi_ire_v6(ipif_t *, const in6_addr_t *);
315 
316 extern void	ire_fastpath_list_dispatch(ill_t *,
317     boolean_t (*)(ire_t *, void *), void *);
318 extern void	ire_fastpath_list_delete(ill_t *, ire_t *);
319 
320 extern mblk_t *ip_nexthop_route(const struct sockaddr *, char *);
321 extern mblk_t *ip_nexthop(const struct sockaddr *, const char *);
322 
323 extern ire_t	*ire_get_next_bcast_ire(ire_t *, ire_t *);
324 extern ire_t	*ire_get_next_default_ire(ire_t *, ire_t *);
325 
326 #endif /* _KERNEL */
327 
328 #ifdef	__cplusplus
329 }
330 #endif
331 
332 #endif	/* _INET_IP_IRE_H */
333