xref: /titanic_52/usr/src/uts/common/inet/ip_ire.h (revision 5c45adf04db8ffdcb5dd969bb5203ff9b17677db)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #ifndef	_INET_IP_IRE_H
28 #define	_INET_IP_IRE_H
29 
30 #ifdef	__cplusplus
31 extern "C" {
32 #endif
33 
34 #define	IPV6_LL_PREFIXLEN	10	/* Number of bits in link-local pref */
35 
36 #define	IP_CACHE_TABLE_SIZE	256
37 #define	IP_MASK_TABLE_SIZE	(IP_ABITS + 1)		/* 33 ptrs */
38 
39 #define	IP6_FTABLE_HASH_SIZE	32	/* size of each hash table in ptrs */
40 #define	IP6_CACHE_TABLE_SIZE	256
41 #define	IP6_MASK_TABLE_SIZE	(IPV6_ABITS + 1)	/* 129 ptrs */
42 
43 /*
44  * We use the common modulo hash function.  In ip_ire_init(), we make
45  * sure that the cache table size is always a power of 2.  That's why
46  * we can use & instead of %.  Also note that we try hard to make sure
47  * the lower bits of an address capture most info from the whole address.
48  * The reason being that since our hash table is probably a lot smaller
49  * than 2^32 buckets so the lower bits are the most important.
50  */
51 #define	IRE_ADDR_HASH(addr, table_size) \
52 	(((addr) ^ ((addr) >> 8) ^ ((addr) >> 16) ^ ((addr) >> 24)) &	\
53 	((table_size) - 1))
54 
55 /*
56  * To make a byte-order neutral hash for IPv6, just take all the
57  * bytes in the bottom 32 bits into account.
58  */
59 #define	IRE_ADDR_HASH_V6(addr, table_size) 				\
60 	IRE_ADDR_HASH((addr).s6_addr32[3], table_size)
61 
62 /*
63  * This assumes that the ftable size is a power of 2.
64  * We include some high-order bytes to avoid all IRE_LOCALs in the same
65  * bucket for performance reasons.
66  */
67 #define	IRE_ADDR_MASK_HASH_V6(addr, mask, table_size) 			\
68 	((((addr).s6_addr8[0] & (mask).s6_addr8[0]) ^ 			\
69 	((addr).s6_addr8[1] & (mask).s6_addr8[1]) ^ 			\
70 	((addr).s6_addr8[6] & (mask).s6_addr8[6]) ^ 			\
71 	((addr).s6_addr8[7] & (mask).s6_addr8[7]) ^ 			\
72 	((addr).s6_addr8[8] & (mask).s6_addr8[8]) ^ 			\
73 	((addr).s6_addr8[9] & (mask).s6_addr8[9]) ^			\
74 	((addr).s6_addr8[10] & (mask).s6_addr8[10]) ^ 			\
75 	((addr).s6_addr8[13] & (mask).s6_addr8[13]) ^ 			\
76 	((addr).s6_addr8[14] & (mask).s6_addr8[14]) ^ 			\
77 	((addr).s6_addr8[15] & (mask).s6_addr8[15])) & ((table_size) - 1))
78 
79 #define	IRE_HIDDEN_TYPE(ire_type) ((ire_type) &			\
80 	(IRE_HOST | IRE_PREFIX | IRE_DEFAULT | IRE_IF_ALL | IRE_BROADCAST))
81 
82 /*
83  * match parameter definitions for IRE lookup routines.
84  */
85 #define	MATCH_IRE_DSTONLY	0x0000	/* Match just the address */
86 #define	MATCH_IRE_TYPE		0x0001	/* Match IRE type */
87 #define	MATCH_IRE_MASK		0x0002	/* Match IRE mask */
88 #define	MATCH_IRE_SHORTERMASK	0x0004	/* A mask shorter than the argument */
89 #define	MATCH_IRE_GW		0x0008	/* Match IRE gateway */
90 #define	MATCH_IRE_ILL		0x0010	/* Match IRE on the ill */
91 #define	MATCH_IRE_ZONEONLY	0x0020	/* Match IREs in specified zone, ie */
92 					/* don't match IRE_LOCALs from other */
93 					/* zones or shared IREs */
94 #define	MATCH_IRE_SECATTR	0x0040	/* Match gateway security attributes */
95 #define	MATCH_IRE_TESTHIDDEN 	0x0080	/* Match ire_testhidden IREs */
96 #define	MATCH_IRE_SRC_ILL	0x0100	/* ire_ill uses a src address on ill */
97 
98 #define	MAX_IRE_RECURSION	4	/* Max IREs in ire_route_recursive */
99 
100 
101 /*
102  * We use atomics so that we get an accurate accounting on the ires.
103  * Otherwise we can't determine leaks correctly.
104  */
105 #define	BUMP_IRE_STATS(ire_stats, x) atomic_add_64(&(ire_stats).x, 1)
106 
107 #ifdef _KERNEL
108 struct ts_label_s;
109 struct nce_s;
110 /*
111  * structure for passing args between ire_ftable_lookup and ire_find_best_route
112  */
113 typedef struct ire_ftable_args_s {
114 	in6_addr_t		ift_addr_v6;
115 	in6_addr_t		ift_mask_v6;
116 	in6_addr_t		ift_gateway_v6;
117 #define	ift_addr		V4_PART_OF_V6(ift_addr_v6)
118 #define	ift_mask		V4_PART_OF_V6(ift_mask_v6)
119 #define	ift_gateway		V4_PART_OF_V6(ift_gateway_v6)
120 	int			ift_type;
121 	const ill_t		*ift_ill;
122 	zoneid_t		ift_zoneid;
123 	const ts_label_t	*ift_tsl;
124 	int			ift_flags;
125 	ire_t			*ift_best_ire;
126 } ire_ftable_args_t;
127 
128 extern	ipaddr_t	ip_plen_to_mask(uint_t);
129 extern	in6_addr_t	*ip_plen_to_mask_v6(uint_t, in6_addr_t *);
130 
131 extern	int	ip_ire_advise(queue_t *, mblk_t *, cred_t *);
132 extern	int	ip_ire_delete(queue_t *, mblk_t *, cred_t *);
133 extern	void	ip_ire_reclaim(void *);
134 
135 extern	int	ip_mask_to_plen(ipaddr_t);
136 extern	int	ip_mask_to_plen_v6(const in6_addr_t *);
137 
138 extern	ire_t	*ire_add(ire_t *);
139 extern	ire_t	*ire_add_v6(ire_t *);
140 extern	int	ire_atomic_start(irb_t *irb_ptr, ire_t *ire);
141 extern	void	ire_atomic_end(irb_t *irb_ptr, ire_t *ire);
142 
143 extern	ire_t	*ire_create(uchar_t *, uchar_t *, uchar_t *,
144     ushort_t, ill_t *, zoneid_t, uint_t, tsol_gc_t *, ip_stack_t *);
145 
146 extern	ire_t	**ire_create_bcast(ill_t *, ipaddr_t, zoneid_t, ire_t **);
147 extern	ire_t	*ire_create_if_clone(ire_t *, const in6_addr_t *, uint_t *);
148 extern	ire_t	*ire_lookup_bcast(ill_t *, ipaddr_t, zoneid_t);
149 extern	int	ire_init_v4(ire_t *, uchar_t *, uchar_t *, uchar_t *,
150     ushort_t, ill_t *, zoneid_t, uint_t, tsol_gc_t *, ip_stack_t *);
151 extern	int	ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *,
152     const in6_addr_t *, ushort_t, ill_t *, zoneid_t, uint_t, tsol_gc_t *,
153     ip_stack_t *);
154 
155 extern	int	ire_init_common(ire_t *, ushort_t, ill_t *, zoneid_t, uint_t,
156     uchar_t, tsol_gc_t *, ip_stack_t *);
157 
158 extern	ire_t	*ire_create_v6(const in6_addr_t *, const in6_addr_t *,
159     const in6_addr_t *, ushort_t, ill_t *, zoneid_t, uint_t,
160     tsol_gc_t *, ip_stack_t *);
161 
162 extern	void	ire_delete(ire_t *);
163 extern	void	ire_delete_v6(ire_t *);
164 
165 /*
166  * ire_pref used to make sure we don't set up routing loops in the ire_dep
167  * chain.
168  */
169 extern	int	ire_pref(ire_t *);
170 extern	boolean_t ire_dep_build(ire_t *[], uint_t [], uint_t);
171 extern	void	ire_dep_delete_if_clone(ire_t *);
172 extern	void	ire_dep_incr_generation(ire_t *);
173 extern	void	ire_dep_remove(ire_t *);
174 extern	void	ire_dep_unbuild(ire_t *[], uint_t);
175 extern	uint_t	ire_dep_validate_generations(ire_t *);
176 extern	void	ire_dep_invalidate_generations(ire_t *);
177 extern	boolean_t ire_determine_nce_capable(ire_t *);
178 
179 extern	void	ire_flush_cache_v4(ire_t *, int);
180 extern	void	ire_flush_cache_v6(ire_t *, int);
181 
182 extern	ire_t	*ire_ftable_lookup_v4(ipaddr_t, ipaddr_t, ipaddr_t, int,
183     const ill_t *, zoneid_t, const struct ts_label_s *, int, uint32_t,
184     ip_stack_t *, uint_t *);
185 extern	ire_t	*ire_ftable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
186     const in6_addr_t *, int, const ill_t *, zoneid_t,
187     const struct ts_label_s *, int, uint32_t, ip_stack_t *, uint_t *);
188 
189 extern	ire_t	*ire_ftable_lookup_simple_v4(ipaddr_t, uint32_t, ip_stack_t *,
190     uint_t *);
191 extern	ire_t	*ire_ftable_lookup_simple_v6(const in6_addr_t *, uint32_t,
192     ip_stack_t *, uint_t *);
193 
194 extern boolean_t ire_gateway_ok_zone_v4(ipaddr_t, zoneid_t, ill_t *,
195     const ts_label_t *, ip_stack_t *, boolean_t);
196 extern boolean_t ire_gateway_ok_zone_v6(const in6_addr_t *, zoneid_t, ill_t *,
197     const ts_label_t *, ip_stack_t *, boolean_t);
198 
199 extern ire_t	*ire_alt_local(ire_t *, zoneid_t, const ts_label_t *,
200     const ill_t *, uint_t *);
201 
202 extern  ill_t	*ire_lookup_multi_ill_v4(ipaddr_t, zoneid_t, ip_stack_t *,
203     boolean_t *, ipaddr_t *);
204 extern  ill_t	*ire_lookup_multi_ill_v6(const in6_addr_t *, zoneid_t,
205     ip_stack_t *, boolean_t *, in6_addr_t *);
206 
207 extern	ire_t	*ire_nexthop(ire_t *);
208 extern	ill_t	*ire_nexthop_ill(ire_t *);
209 extern	ill_t	*ire_nce_ill(ire_t *);
210 
211 extern	ire_t	*ire_reject(ip_stack_t *, boolean_t);
212 extern	ire_t	*ire_blackhole(ip_stack_t *, boolean_t);
213 extern	ire_t	*ire_multicast(ill_t *);
214 
215 /* The different ire_recvfn functions */
216 extern void	ire_recv_forward_v4(ire_t *, mblk_t *, void *,
217     ip_recv_attr_t *);
218 extern void	ire_recv_noroute_v4(ire_t *, mblk_t *, void *,
219     ip_recv_attr_t *);
220 extern void	ire_recv_broadcast_v4(ire_t *, mblk_t *, void *,
221     ip_recv_attr_t *);
222 extern void	ire_recv_multicast_v4(ire_t *, mblk_t *, void *,
223     ip_recv_attr_t *);
224 extern void	ire_recv_multirt_v4(ire_t *, mblk_t *, void *,
225     ip_recv_attr_t *);
226 extern void	ire_recv_loopback_v4(ire_t *, mblk_t *, void *,
227     ip_recv_attr_t *);
228 extern void	ire_recv_local_v4(ire_t *, mblk_t *, void *,
229     ip_recv_attr_t *);
230 extern void	ire_recv_noaccept_v4(ire_t *, mblk_t *, void *,
231     ip_recv_attr_t *);
232 
233 extern void	ire_recv_forward_v6(ire_t *, mblk_t *, void *,
234     ip_recv_attr_t *);
235 extern void	ire_recv_noroute_v6(ire_t *, mblk_t *, void *,
236     ip_recv_attr_t *);
237 extern void	ire_recv_multicast_v6(ire_t *, mblk_t *, void *,
238     ip_recv_attr_t *);
239 extern void	ire_recv_multirt_v6(ire_t *, mblk_t *, void *,
240     ip_recv_attr_t *);
241 extern void	ire_recv_loopback_v6(ire_t *, mblk_t *, void *,
242     ip_recv_attr_t *);
243 extern void	ire_recv_local_v6(ire_t *, mblk_t *, void *, ip_recv_attr_t *);
244 extern void	ire_recv_noaccept_v6(ire_t *, mblk_t *, void *,
245     ip_recv_attr_t *);
246 
247 extern	void	irb_refhold(irb_t *);
248 extern	void	irb_refhold_locked(irb_t *);
249 extern	void	irb_refrele(irb_t *);
250 extern  void	irb_increment_generation(irb_t *);
251 
252 extern	void	ire_refhold(ire_t *);
253 extern	void	ire_refhold_notr(ire_t *);
254 extern	void	ire_refhold_locked(ire_t *);
255 extern	void	ire_refrele(ire_t *);
256 extern	void	ire_refrele_notr(ire_t *);
257 extern	void	ire_make_condemned(ire_t *);
258 extern	boolean_t ire_no_good(ire_t *);
259 extern	nce_t	*ire_handle_condemned_nce(nce_t *, ire_t *, ipha_t *, ip6_t *,
260     boolean_t);
261 
262 extern ire_t   	*ire_round_robin(irb_t *, ire_ftable_args_t *, uint_t,
263     ire_t *, ip_stack_t *);
264 
265 extern ire_t	*ire_route_recursive_v4(ipaddr_t, uint_t, const ill_t *,
266     zoneid_t, const ts_label_t *, uint_t, uint_t, uint32_t, ip_stack_t *,
267     ipaddr_t *, tsol_ire_gw_secattr_t **, uint_t *);
268 extern ire_t	*ire_route_recursive_v6(const in6_addr_t *, uint_t,
269     const ill_t *, zoneid_t, const ts_label_t *, uint_t, uint_t, uint32_t,
270     ip_stack_t *, in6_addr_t *, tsol_ire_gw_secattr_t **, uint_t *);
271 extern ire_t	*ire_route_recursive_dstonly_v4(ipaddr_t, uint_t,
272     uint32_t, ip_stack_t *);
273 extern ire_t	*ire_route_recursive_dstonly_v6(const in6_addr_t *, uint_t,
274     uint32_t, ip_stack_t *);
275 extern ire_t	*ire_route_recursive_impl_v4(ire_t *ire, ipaddr_t, uint_t,
276     const ill_t *, zoneid_t, const ts_label_t *, uint_t, uint_t, uint32_t,
277     ip_stack_t *, ipaddr_t *, tsol_ire_gw_secattr_t **, uint_t *);
278 extern ire_t	*ire_route_recursive_impl_v6(ire_t *ire, const in6_addr_t *,
279     uint_t, const ill_t *, zoneid_t, const ts_label_t *, uint_t, uint_t,
280     uint32_t, ip_stack_t *, in6_addr_t *, tsol_ire_gw_secattr_t **, uint_t *);
281 
282 /* The different ire_sendfn functions */
283 extern int	ire_send_local_v4(ire_t *, mblk_t *, void *,
284     ip_xmit_attr_t *, uint32_t *);
285 extern int	ire_send_multirt_v4(ire_t *, mblk_t *, void *,
286     ip_xmit_attr_t *, uint32_t *);
287 extern int	ire_send_noroute_v4(ire_t *, mblk_t *, void *,
288     ip_xmit_attr_t *, uint32_t *);
289 extern int	ire_send_multicast_v4(ire_t *, mblk_t *, void *,
290     ip_xmit_attr_t *, uint32_t *);
291 extern int	ire_send_broadcast_v4(ire_t *, mblk_t *, void *,
292     ip_xmit_attr_t *, uint32_t *);
293 extern int	ire_send_wire_v4(ire_t *, mblk_t *, void *,
294     ip_xmit_attr_t *, uint32_t *);
295 extern int	ire_send_local_v6(ire_t *, mblk_t *, void *,
296     ip_xmit_attr_t *, uint32_t *);
297 extern int	ire_send_multirt_v6(ire_t *, mblk_t *, void *,
298     ip_xmit_attr_t *, uint32_t *);
299 extern int	ire_send_noroute_v6(ire_t *, mblk_t *, void *,
300     ip_xmit_attr_t *, uint32_t *);
301 extern int	ire_send_multicast_v6(ire_t *, mblk_t *, void *,
302     ip_xmit_attr_t *, uint32_t *);
303 extern int	ire_send_wire_v6(ire_t *, mblk_t *, void *,
304     ip_xmit_attr_t *, uint32_t *);
305 
306 extern nce_t	*ire_to_nce_pkt(ire_t *, mblk_t *);
307 extern nce_t	*ire_to_nce(ire_t *, ipaddr_t, const in6_addr_t *);
308 
309 /* Different ire_postfragfn functions */
310 extern int	ip_xmit(mblk_t *, struct nce_s *,
311     iaflags_t, uint_t, uint32_t, zoneid_t, zoneid_t, uintptr_t *);
312 extern int	ip_postfrag_loopcheck(mblk_t *, struct nce_s *,
313     iaflags_t, uint_t, uint32_t, zoneid_t, zoneid_t, uintptr_t *);
314 extern int	ip_postfrag_multirt_v4(mblk_t *, struct nce_s *,
315     iaflags_t, uint_t, uint32_t, zoneid_t, zoneid_t, uintptr_t *);
316 extern int	ip_postfrag_multirt_v6(mblk_t *, struct nce_s *,
317     iaflags_t, uint_t, uint32_t, zoneid_t, zoneid_t, uintptr_t *);
318 
319 extern void	ip_postfrag_loopback(mblk_t *, struct nce_s *,
320     iaflags_t, uint_t, zoneid_t);
321 extern int	ire_revalidate_nce(ire_t *);
322 
323 extern ire_t	*ip_select_route_pkt(mblk_t *, ip_xmit_attr_t *,
324     uint_t *, int *, boolean_t *);
325 extern ire_t	*ip_select_route(const in6_addr_t *, const in6_addr_t,
326     ip_xmit_attr_t *, uint_t *, in6_addr_t *, int *, boolean_t *);
327 extern ire_t	*ip_select_route_v4(ipaddr_t, ipaddr_t, ip_xmit_attr_t *,
328     uint_t *, ipaddr_t *, int *, boolean_t *);
329 extern ire_t	*ip_select_route_v6(const in6_addr_t *, const in6_addr_t,
330     ip_xmit_attr_t *, uint_t *, in6_addr_t *, int *, boolean_t *);
331 
332 extern	void	ire_walk(pfv_t, void *, ip_stack_t *);
333 extern	void	ire_walk_ill(uint_t, uint_t, pfv_t, void *, ill_t *);
334 extern	void	ire_walk_v4(pfv_t, void *, zoneid_t, ip_stack_t *);
335 extern  void	ire_walk_ill_tables(uint_t match_flags, uint_t ire_type,
336     pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz,
337     irb_t **ipftbl, ill_t *ill,
338     zoneid_t zoneid, ip_stack_t *);
339 extern	void	ire_walk_v6(pfv_t, void *, zoneid_t, ip_stack_t *);
340 
341 extern boolean_t	ire_match_args(ire_t *, ipaddr_t, ipaddr_t, ipaddr_t,
342     int, const ill_t *, zoneid_t, const struct ts_label_s *, int);
343 extern boolean_t	ire_match_args_v6(ire_t *, const in6_addr_t *,
344     const in6_addr_t *, const in6_addr_t *, int, const ill_t *, zoneid_t,
345     const ts_label_t *, int);
346 
347 extern  struct nce_s	*arp_nce_init(ill_t *, in_addr_t, int);
348 extern  boolean_t	ire_walk_ill_match(uint_t, uint_t, ire_t *, ill_t *,
349     zoneid_t, ip_stack_t *);
350 extern  void ire_increment_generation(ire_t *);
351 extern  void ire_increment_multicast_generation(ip_stack_t *, boolean_t);
352 extern	void ire_rebind(ire_t *);
353 
354 #endif /* _KERNEL */
355 
356 #ifdef	__cplusplus
357 }
358 #endif
359 
360 #endif	/* _INET_IP_IRE_H */
361