xref: /linux/include/net/route.h (revision 2bd87951de659df3381ce083342aaf5b1ea24689)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4  *		operating system.  INET  is implemented using the  BSD Socket
5  *		interface as the means of communication with the user level.
6  *
7  *		Definitions for the IP router.
8  *
9  * Version:	@(#)route.h	1.0.4	05/27/93
10  *
11  * Authors:	Ross Biro
12  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
13  * Fixes:
14  *		Alan Cox	:	Reformatted. Added ip_rt_local()
15  *		Alan Cox	:	Support for TCP parameters.
16  *		Alexey Kuznetsov:	Major changes for new routing code.
17  *		Mike McLagan    :	Routing by source
18  *		Robert Olsson   :	Added rt_cache statistics
19  */
20 #ifndef _ROUTE_H
21 #define _ROUTE_H
22 
23 #include <net/dst.h>
24 #include <net/inetpeer.h>
25 #include <net/flow.h>
26 #include <net/inet_sock.h>
27 #include <net/ip_fib.h>
28 #include <net/arp.h>
29 #include <net/ndisc.h>
30 #include <linux/in_route.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/rcupdate.h>
33 #include <linux/route.h>
34 #include <linux/ip.h>
35 #include <linux/cache.h>
36 #include <linux/security.h>
37 
38 static inline __u8 ip_sock_rt_scope(const struct sock *sk)
39 {
40 	if (sock_flag(sk, SOCK_LOCALROUTE))
41 		return RT_SCOPE_LINK;
42 
43 	return RT_SCOPE_UNIVERSE;
44 }
45 
46 static inline __u8 ip_sock_rt_tos(const struct sock *sk)
47 {
48 	return RT_TOS(READ_ONCE(inet_sk(sk)->tos));
49 }
50 
51 struct ip_tunnel_info;
52 struct fib_nh;
53 struct fib_info;
54 struct uncached_list;
55 struct rtable {
56 	struct dst_entry	dst;
57 
58 	int			rt_genid;
59 	unsigned int		rt_flags;
60 	__u16			rt_type;
61 	__u8			rt_is_input;
62 	__u8			rt_uses_gateway;
63 
64 	int			rt_iif;
65 
66 	u8			rt_gw_family;
67 	/* Info on neighbour */
68 	union {
69 		__be32		rt_gw4;
70 		struct in6_addr	rt_gw6;
71 	};
72 
73 	/* Miscellaneous cached information */
74 	u32			rt_mtu_locked:1,
75 				rt_pmtu:31;
76 };
77 
78 static inline bool rt_is_input_route(const struct rtable *rt)
79 {
80 	return rt->rt_is_input != 0;
81 }
82 
83 static inline bool rt_is_output_route(const struct rtable *rt)
84 {
85 	return rt->rt_is_input == 0;
86 }
87 
88 static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
89 {
90 	if (rt->rt_gw_family == AF_INET)
91 		return rt->rt_gw4;
92 	return daddr;
93 }
94 
95 struct ip_rt_acct {
96 	__u32 	o_bytes;
97 	__u32 	o_packets;
98 	__u32 	i_bytes;
99 	__u32 	i_packets;
100 };
101 
102 struct rt_cache_stat {
103         unsigned int in_slow_tot;
104         unsigned int in_slow_mc;
105         unsigned int in_no_route;
106         unsigned int in_brd;
107         unsigned int in_martian_dst;
108         unsigned int in_martian_src;
109         unsigned int out_slow_tot;
110         unsigned int out_slow_mc;
111 };
112 
113 extern struct ip_rt_acct __percpu *ip_rt_acct;
114 
115 struct in_device;
116 
117 int ip_rt_init(void);
118 void rt_cache_flush(struct net *net);
119 void rt_flush_dev(struct net_device *dev);
120 struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
121 					const struct sk_buff *skb);
122 struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp,
123 					    struct fib_result *res,
124 					    const struct sk_buff *skb);
125 
126 static inline struct rtable *__ip_route_output_key(struct net *net,
127 						   struct flowi4 *flp)
128 {
129 	return ip_route_output_key_hash(net, flp, NULL);
130 }
131 
132 struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
133 				    const struct sock *sk);
134 struct dst_entry *ipv4_blackhole_route(struct net *net,
135 				       struct dst_entry *dst_orig);
136 
137 static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 *flp)
138 {
139 	return ip_route_output_flow(net, flp, NULL);
140 }
141 
142 /* Simplistic IPv4 route lookup function.
143  * This is only suitable for some particular use cases: since the flowi4
144  * structure is only partially set, it may bypass some fib-rules.
145  */
146 static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
147 					     __be32 saddr, u8 tos, int oif,
148 					     __u8 scope)
149 {
150 	struct flowi4 fl4 = {
151 		.flowi4_oif = oif,
152 		.flowi4_tos = tos,
153 		.flowi4_scope = scope,
154 		.daddr = daddr,
155 		.saddr = saddr,
156 	};
157 
158 	return ip_route_output_key(net, &fl4);
159 }
160 
161 static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4,
162 						   const struct sock *sk,
163 						   __be32 daddr, __be32 saddr,
164 						   __be16 dport, __be16 sport,
165 						   __u8 proto, __u8 tos, int oif)
166 {
167 	flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
168 			   sk ? ip_sock_rt_scope(sk) : RT_SCOPE_UNIVERSE,
169 			   proto, sk ? inet_sk_flowi_flags(sk) : 0,
170 			   daddr, saddr, dport, sport, sock_net_uid(net, sk));
171 	if (sk)
172 		security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
173 	return ip_route_output_flow(net, fl4, sk);
174 }
175 
176 static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 *fl4,
177 						 __be32 daddr, __be32 saddr,
178 						 __be32 gre_key, __u8 tos, int oif)
179 {
180 	memset(fl4, 0, sizeof(*fl4));
181 	fl4->flowi4_oif = oif;
182 	fl4->daddr = daddr;
183 	fl4->saddr = saddr;
184 	fl4->flowi4_tos = tos;
185 	fl4->flowi4_proto = IPPROTO_GRE;
186 	fl4->fl4_gre_key = gre_key;
187 	return ip_route_output_key(net, fl4);
188 }
189 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
190 			  u8 tos, struct net_device *dev,
191 			  struct in_device *in_dev, u32 *itag);
192 int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
193 			 u8 tos, struct net_device *devin);
194 int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
195 		      u8 tos, struct net_device *devin,
196 		      const struct sk_buff *hint);
197 
198 static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
199 				 u8 tos, struct net_device *devin)
200 {
201 	int err;
202 
203 	rcu_read_lock();
204 	err = ip_route_input_noref(skb, dst, src, tos, devin);
205 	if (!err) {
206 		skb_dst_force(skb);
207 		if (!skb_dst(skb))
208 			err = -EINVAL;
209 	}
210 	rcu_read_unlock();
211 
212 	return err;
213 }
214 
215 void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
216 		      u8 protocol);
217 void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu);
218 void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol);
219 void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);
220 void ip_rt_send_redirect(struct sk_buff *skb);
221 
222 unsigned int inet_addr_type(struct net *net, __be32 addr);
223 unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id);
224 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
225 				__be32 addr);
226 unsigned int inet_addr_type_dev_table(struct net *net,
227 				      const struct net_device *dev,
228 				      __be32 addr);
229 void ip_rt_multicast_event(struct in_device *);
230 int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt);
231 void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
232 struct rtable *rt_dst_alloc(struct net_device *dev,
233 			    unsigned int flags, u16 type, bool noxfrm);
234 struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
235 
236 struct in_ifaddr;
237 void fib_add_ifaddr(struct in_ifaddr *);
238 void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
239 void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric);
240 
241 void rt_add_uncached_list(struct rtable *rt);
242 void rt_del_uncached_list(struct rtable *rt);
243 
244 int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
245 		       u32 table_id, struct fib_info *fi,
246 		       int *fa_index, int fa_start, unsigned int flags);
247 
248 static inline void ip_rt_put(struct rtable *rt)
249 {
250 	/* dst_release() accepts a NULL parameter.
251 	 * We rely on dst being first structure in struct rtable
252 	 */
253 	BUILD_BUG_ON(offsetof(struct rtable, dst) != 0);
254 	dst_release(&rt->dst);
255 }
256 
257 #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
258 
259 extern const __u8 ip_tos2prio[16];
260 
261 static inline char rt_tos2priority(u8 tos)
262 {
263 	return ip_tos2prio[IPTOS_TOS(tos)>>1];
264 }
265 
266 /* ip_route_connect() and ip_route_newports() work in tandem whilst
267  * binding a socket for a new outgoing connection.
268  *
269  * In order to use IPSEC properly, we must, in the end, have a
270  * route that was looked up using all available keys including source
271  * and destination ports.
272  *
273  * However, if a source port needs to be allocated (the user specified
274  * a wildcard source port) we need to obtain addressing information
275  * in order to perform that allocation.
276  *
277  * So ip_route_connect() looks up a route using wildcarded source and
278  * destination ports in the key, simply so that we can get a pair of
279  * addresses to use for port allocation.
280  *
281  * Later, once the ports are allocated, ip_route_newports() will make
282  * another route lookup if needed to make sure we catch any IPSEC
283  * rules keyed on the port information.
284  *
285  * The callers allocate the flow key on their stack, and must pass in
286  * the same flowi4 object to both the ip_route_connect() and the
287  * ip_route_newports() calls.
288  */
289 
290 static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
291 					 __be32 src, int oif, u8 protocol,
292 					 __be16 sport, __be16 dport,
293 					 const struct sock *sk)
294 {
295 	__u8 flow_flags = 0;
296 
297 	if (inet_test_bit(TRANSPARENT, sk))
298 		flow_flags |= FLOWI_FLAG_ANYSRC;
299 
300 	flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
301 			   ip_sock_rt_scope(sk), protocol, flow_flags, dst,
302 			   src, dport, sport, sk->sk_uid);
303 }
304 
305 static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
306 					      __be32 src, int oif, u8 protocol,
307 					      __be16 sport, __be16 dport,
308 					      const struct sock *sk)
309 {
310 	struct net *net = sock_net(sk);
311 	struct rtable *rt;
312 
313 	ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk);
314 
315 	if (!dst || !src) {
316 		rt = __ip_route_output_key(net, fl4);
317 		if (IS_ERR(rt))
318 			return rt;
319 		ip_rt_put(rt);
320 		flowi4_update_output(fl4, oif, fl4->daddr, fl4->saddr);
321 	}
322 	security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
323 	return ip_route_output_flow(net, fl4, sk);
324 }
325 
326 static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt,
327 					       __be16 orig_sport, __be16 orig_dport,
328 					       __be16 sport, __be16 dport,
329 					       const struct sock *sk)
330 {
331 	if (sport != orig_sport || dport != orig_dport) {
332 		fl4->fl4_dport = dport;
333 		fl4->fl4_sport = sport;
334 		ip_rt_put(rt);
335 		flowi4_update_output(fl4, sk->sk_bound_dev_if, fl4->daddr,
336 				     fl4->saddr);
337 		security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
338 		return ip_route_output_flow(sock_net(sk), fl4, sk);
339 	}
340 	return rt;
341 }
342 
343 static inline int inet_iif(const struct sk_buff *skb)
344 {
345 	struct rtable *rt = skb_rtable(skb);
346 
347 	if (rt && rt->rt_iif)
348 		return rt->rt_iif;
349 
350 	return skb->skb_iif;
351 }
352 
353 static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
354 {
355 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
356 	struct net *net = dev_net(dst->dev);
357 
358 	if (hoplimit == 0)
359 		hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
360 	return hoplimit;
361 }
362 
363 static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
364 					     __be32 daddr)
365 {
366 	struct neighbour *neigh;
367 
368 	neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr);
369 	if (unlikely(!neigh))
370 		neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
371 
372 	return neigh;
373 }
374 
375 static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
376 						struct sk_buff *skb,
377 						bool *is_v6gw)
378 {
379 	struct net_device *dev = rt->dst.dev;
380 	struct neighbour *neigh;
381 
382 	if (likely(rt->rt_gw_family == AF_INET)) {
383 		neigh = ip_neigh_gw4(dev, rt->rt_gw4);
384 	} else if (rt->rt_gw_family == AF_INET6) {
385 		neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
386 		*is_v6gw = true;
387 	} else {
388 		neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
389 	}
390 	return neigh;
391 }
392 
393 #endif	/* _ROUTE_H */
394