xref: /linux/include/net/dst.h (revision 37a93dd5c49b5fda807fd204edf2547c3493319c)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * net/dst.h	Protocol independent destination cache definitions.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  */
8 
9 #ifndef _NET_DST_H
10 #define _NET_DST_H
11 
12 #include <net/dst_ops.h>
13 #include <linux/netdevice.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/rcupdate.h>
16 #include <linux/bug.h>
17 #include <linux/jiffies.h>
18 #include <linux/refcount.h>
19 #include <linux/rcuref.h>
20 #include <net/neighbour.h>
21 #include <asm/processor.h>
22 #include <linux/indirect_call_wrapper.h>
23 
24 struct sk_buff;
25 
26 struct dst_entry {
27 	union {
28 		struct net_device       *dev;
29 		struct net_device __rcu *dev_rcu;
30 	};
31 	struct  dst_ops	        *ops;
32 	unsigned long		_metrics;
33 	unsigned long           expires;
34 #ifdef CONFIG_XFRM
35 	struct xfrm_state	*xfrm;
36 #else
37 	void			*__pad1;
38 #endif
39 	int			(*input)(struct sk_buff *);
40 	int			(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
41 
42 	unsigned short		flags;
43 #define DST_NOXFRM		0x0002
44 #define DST_NOPOLICY		0x0004
45 #define DST_NOCOUNT		0x0008
46 #define DST_FAKE_RTABLE		0x0010
47 #define DST_XFRM_TUNNEL		0x0020
48 #define DST_XFRM_QUEUE		0x0040
49 #define DST_METADATA		0x0080
50 
51 	/* A non-zero value of dst->obsolete forces by-hand validation
52 	 * of the route entry.  Positive values are set by the generic
53 	 * dst layer to indicate that the entry has been forcefully
54 	 * destroyed.
55 	 *
56 	 * Negative values are used by the implementation layer code to
57 	 * force invocation of the dst_ops->check() method.
58 	 */
59 	short			obsolete;
60 #define DST_OBSOLETE_NONE	0
61 #define DST_OBSOLETE_DEAD	2
62 #define DST_OBSOLETE_FORCE_CHK	-1
63 #define DST_OBSOLETE_KILL	-2
64 	unsigned short		header_len;	/* more space at head required */
65 	unsigned short		trailer_len;	/* space to reserve at tail */
66 
67 	/*
68 	 * __rcuref wants to be on a different cache line from
69 	 * input/output/ops or performance tanks badly
70 	 */
71 #ifdef CONFIG_64BIT
72 	rcuref_t		__rcuref;	/* 64-bit offset 64 */
73 #endif
74 	int			__use;
75 	unsigned long		lastuse;
76 	struct rcu_head		rcu_head;
77 	short			error;
78 	short			__pad;
79 	__u32			tclassid;
80 #ifndef CONFIG_64BIT
81 	struct lwtunnel_state   *lwtstate;
82 	rcuref_t		__rcuref;	/* 32-bit offset 64 */
83 #endif
84 	netdevice_tracker	dev_tracker;
85 
86 	/*
87 	 * Used by rtable and rt6_info. Moves lwtstate into the next cache
88 	 * line on 64bit so that lwtstate does not cause false sharing with
89 	 * __rcuref under contention of __rcuref. This also puts the
90 	 * frequently accessed members of rtable and rt6_info out of the
91 	 * __rcuref cache line.
92 	 */
93 	struct list_head	rt_uncached;
94 	struct uncached_list	*rt_uncached_list;
95 #ifdef CONFIG_64BIT
96 	struct lwtunnel_state   *lwtstate;
97 #endif
98 };
99 
100 struct dst_metrics {
101 	u32		metrics[RTAX_MAX];
102 	refcount_t	refcnt;
103 } __aligned(4);		/* Low pointer bits contain DST_METRICS_FLAGS */
104 extern const struct dst_metrics dst_default_metrics;
105 
106 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
107 
108 #define DST_METRICS_READ_ONLY		0x1UL
109 #define DST_METRICS_REFCOUNTED		0x2UL
110 #define DST_METRICS_FLAGS		0x3UL
111 #define __DST_METRICS_PTR(Y)	\
112 	((u32 *)((Y) & ~DST_METRICS_FLAGS))
113 #define DST_METRICS_PTR(X)	__DST_METRICS_PTR((X)->_metrics)
114 
115 static inline bool dst_metrics_read_only(const struct dst_entry *dst)
116 {
117 	return dst->_metrics & DST_METRICS_READ_ONLY;
118 }
119 
120 void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old);
121 
122 static inline void dst_destroy_metrics_generic(struct dst_entry *dst)
123 {
124 	unsigned long val = dst->_metrics;
125 	if (!(val & DST_METRICS_READ_ONLY))
126 		__dst_destroy_metrics_generic(dst, val);
127 }
128 
129 static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst)
130 {
131 	unsigned long p = dst->_metrics;
132 
133 	BUG_ON(!p);
134 
135 	if (p & DST_METRICS_READ_ONLY)
136 		return dst->ops->cow_metrics(dst, p);
137 	return __DST_METRICS_PTR(p);
138 }
139 
140 /* This may only be invoked before the entry has reached global
141  * visibility.
142  */
143 static inline void dst_init_metrics(struct dst_entry *dst,
144 				    const u32 *src_metrics,
145 				    bool read_only)
146 {
147 	dst->_metrics = ((unsigned long) src_metrics) |
148 		(read_only ? DST_METRICS_READ_ONLY : 0);
149 }
150 
151 static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src)
152 {
153 	u32 *dst_metrics = dst_metrics_write_ptr(dest);
154 
155 	if (dst_metrics) {
156 		u32 *src_metrics = DST_METRICS_PTR(src);
157 
158 		memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32));
159 	}
160 }
161 
162 static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
163 {
164 	return DST_METRICS_PTR(dst);
165 }
166 
167 static inline u32
168 dst_metric_raw(const struct dst_entry *dst, const int metric)
169 {
170 	u32 *p = DST_METRICS_PTR(dst);
171 
172 	return p[metric-1];
173 }
174 
175 static inline u32
176 dst_metric(const struct dst_entry *dst, const int metric)
177 {
178 	WARN_ON_ONCE(metric == RTAX_HOPLIMIT ||
179 		     metric == RTAX_ADVMSS ||
180 		     metric == RTAX_MTU);
181 	return dst_metric_raw(dst, metric);
182 }
183 
184 static inline u32
185 dst_metric_advmss(const struct dst_entry *dst)
186 {
187 	u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS);
188 
189 	if (!advmss)
190 		advmss = dst->ops->default_advmss(dst);
191 
192 	return advmss;
193 }
194 
195 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
196 {
197 	u32 *p = dst_metrics_write_ptr(dst);
198 
199 	if (p)
200 		p[metric-1] = val;
201 }
202 
203 /* Kernel-internal feature bits that are unallocated in user space. */
204 #define DST_FEATURE_ECN_CA	(1U << 31)
205 
206 #define DST_FEATURE_MASK	(DST_FEATURE_ECN_CA)
207 #define DST_FEATURE_ECN_MASK	(DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN)
208 
209 static inline u32
210 dst_feature(const struct dst_entry *dst, u32 feature)
211 {
212 	return dst_metric(dst, RTAX_FEATURES) & feature;
213 }
214 
215 INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *));
216 INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *));
217 static inline u32 dst_mtu(const struct dst_entry *dst)
218 {
219 	return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst);
220 }
221 
222 /* Variant of dst_mtu() for IPv4 users. */
223 static inline u32 dst4_mtu(const struct dst_entry *dst)
224 {
225 	return INDIRECT_CALL_1(dst->ops->mtu, ipv4_mtu, dst);
226 }
227 
228 /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
229 static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric)
230 {
231 	return msecs_to_jiffies(dst_metric(dst, metric));
232 }
233 
234 static inline int
235 dst_metric_locked(const struct dst_entry *dst, int metric)
236 {
237 	return dst_metric(dst, RTAX_LOCK) & (1 << metric);
238 }
239 
240 static inline void dst_hold(struct dst_entry *dst)
241 {
242 	/*
243 	 * If your kernel compilation stops here, please check
244 	 * the placement of __rcuref in struct dst_entry
245 	 */
246 	BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63);
247 	WARN_ON(!rcuref_get(&dst->__rcuref));
248 }
249 
250 static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
251 {
252 	if (unlikely(time != READ_ONCE(dst->lastuse))) {
253 		dst->__use++;
254 		WRITE_ONCE(dst->lastuse, time);
255 	}
256 }
257 
258 static inline struct dst_entry *dst_clone(struct dst_entry *dst)
259 {
260 	if (dst)
261 		dst_hold(dst);
262 	return dst;
263 }
264 
265 void dst_release(struct dst_entry *dst);
266 
267 void dst_release_immediate(struct dst_entry *dst);
268 
269 static inline void refdst_drop(unsigned long refdst)
270 {
271 	if (!(refdst & SKB_DST_NOREF))
272 		dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK));
273 }
274 
275 /**
276  * skb_dst_drop - drops skb dst
277  * @skb: buffer
278  *
279  * Drops dst reference count if a reference was taken.
280  */
281 static inline void skb_dst_drop(struct sk_buff *skb)
282 {
283 	if (skb->_skb_refdst) {
284 		refdst_drop(skb->_skb_refdst);
285 		skb->_skb_refdst = 0UL;
286 	}
287 }
288 
289 static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
290 {
291 	nskb->slow_gro |= !!refdst;
292 	nskb->_skb_refdst = refdst;
293 	if (!(nskb->_skb_refdst & SKB_DST_NOREF))
294 		dst_clone(skb_dst(nskb));
295 }
296 
297 static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
298 {
299 	__skb_dst_copy(nskb, oskb->_skb_refdst);
300 }
301 
302 /**
303  * dst_hold_safe - Take a reference on a dst if possible
304  * @dst: pointer to dst entry
305  *
306  * This helper returns false if it could not safely
307  * take a reference on a dst.
308  */
309 static inline bool dst_hold_safe(struct dst_entry *dst)
310 {
311 	return rcuref_get(&dst->__rcuref);
312 }
313 
314 /**
315  * skb_dst_force - makes sure skb dst is refcounted
316  * @skb: buffer
317  *
318  * If dst is not yet refcounted and not destroyed, grab a ref on it.
319  * Returns: true if dst is refcounted.
320  */
321 static inline bool skb_dst_force(struct sk_buff *skb)
322 {
323 	if (skb_dst_is_noref(skb)) {
324 		struct dst_entry *dst = skb_dst(skb);
325 
326 		WARN_ON(!rcu_read_lock_held());
327 		if (!dst_hold_safe(dst))
328 			dst = NULL;
329 
330 		skb->_skb_refdst = (unsigned long)dst;
331 		skb->slow_gro |= !!dst;
332 	}
333 
334 	return skb->_skb_refdst != 0UL;
335 }
336 
337 
338 /**
339  *	__skb_tunnel_rx - prepare skb for rx reinsert
340  *	@skb: buffer
341  *	@dev: tunnel device
342  *	@net: netns for packet i/o
343  *
344  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
345  *	so make some cleanups. (no accounting done)
346  */
347 static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
348 				   struct net *net)
349 {
350 	skb->dev = dev;
351 
352 	/*
353 	 * Clear hash so that we can recalculate the hash for the
354 	 * encapsulated packet, unless we have already determine the hash
355 	 * over the L4 4-tuple.
356 	 */
357 	skb_clear_hash_if_not_l4(skb);
358 	skb_set_queue_mapping(skb, 0);
359 	skb_scrub_packet(skb, !net_eq(net, dev_net(dev)));
360 }
361 
362 /**
363  *	skb_tunnel_rx - prepare skb for rx reinsert
364  *	@skb: buffer
365  *	@dev: tunnel device
366  *	@net: netns for packet i/o
367  *
368  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
369  *	so make some cleanups, and perform accounting.
370  *	Note: this accounting is not SMP safe.
371  */
372 static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
373 				 struct net *net)
374 {
375 	DEV_STATS_INC(dev, rx_packets);
376 	DEV_STATS_ADD(dev, rx_bytes, skb->len);
377 	__skb_tunnel_rx(skb, dev, net);
378 }
379 
380 static inline u32 dst_tclassid(const struct sk_buff *skb)
381 {
382 #ifdef CONFIG_IP_ROUTE_CLASSID
383 	const struct dst_entry *dst;
384 
385 	dst = skb_dst(skb);
386 	if (dst)
387 		return dst->tclassid;
388 #endif
389 	return 0;
390 }
391 
392 int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
393 static inline int dst_discard(struct sk_buff *skb)
394 {
395 	return dst_discard_out(&init_net, skb->sk, skb);
396 }
397 void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
398 		int initial_obsolete, unsigned short flags);
399 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
400 	      struct net_device *dev, int initial_obsolete,
401 	      unsigned short flags);
402 void dst_dev_put(struct dst_entry *dst);
403 
404 static inline void dst_confirm(struct dst_entry *dst)
405 {
406 }
407 
408 static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
409 {
410 	struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr);
411 	return IS_ERR(n) ? NULL : n;
412 }
413 
414 static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
415 						     struct sk_buff *skb)
416 {
417 	struct neighbour *n;
418 
419 	if (WARN_ON_ONCE(!dst->ops->neigh_lookup))
420 		return NULL;
421 
422 	n = dst->ops->neigh_lookup(dst, skb, NULL);
423 
424 	return IS_ERR(n) ? NULL : n;
425 }
426 
427 static inline void dst_confirm_neigh(const struct dst_entry *dst,
428 				     const void *daddr)
429 {
430 	if (dst->ops->confirm_neigh)
431 		dst->ops->confirm_neigh(dst, daddr);
432 }
433 
434 static inline void dst_link_failure(struct sk_buff *skb)
435 {
436 	struct dst_entry *dst = skb_dst(skb);
437 	if (dst && dst->ops && dst->ops->link_failure)
438 		dst->ops->link_failure(skb);
439 }
440 
441 static inline void dst_set_expires(struct dst_entry *dst, int timeout)
442 {
443 	unsigned long old, expires = jiffies + timeout;
444 
445 	if (expires == 0)
446 		expires = 1;
447 
448 	old = READ_ONCE(dst->expires);
449 
450 	if (!old || time_before(expires, old))
451 		WRITE_ONCE(dst->expires, expires);
452 }
453 
454 static inline unsigned int dst_dev_overhead(struct dst_entry *dst,
455 					    struct sk_buff *skb)
456 {
457 	if (likely(dst))
458 		return LL_RESERVED_SPACE(dst->dev);
459 
460 	return skb->mac_len;
461 }
462 
463 INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *,
464 					 struct sk_buff *));
465 INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
466 					 struct sk_buff *));
467 /* Output packet to network from transport.  */
468 static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb)
469 {
470 	return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->output),
471 				  ip6_output, ip_output,
472 				  net, sk, skb);
473 }
474 
475 INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *));
476 INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *));
477 /* Input packet from network to transport.  */
478 static inline int dst_input(struct sk_buff *skb)
479 {
480 	return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->input),
481 				  ip6_input, ip_local_deliver, skb);
482 }
483 
484 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
485 							  u32));
486 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
487 							   u32));
488 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
489 {
490 	if (READ_ONCE(dst->obsolete))
491 		dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check,
492 					 ipv4_dst_check, dst, cookie);
493 	return dst;
494 }
495 
496 /* Flags for xfrm_lookup flags argument. */
497 enum {
498 	XFRM_LOOKUP_ICMP = 1 << 0,
499 	XFRM_LOOKUP_QUEUE = 1 << 1,
500 	XFRM_LOOKUP_KEEP_DST_REF = 1 << 2,
501 };
502 
503 struct flowi;
504 #ifndef CONFIG_XFRM
505 static inline struct dst_entry *xfrm_lookup(struct net *net,
506 					    struct dst_entry *dst_orig,
507 					    const struct flowi *fl,
508 					    const struct sock *sk,
509 					    int flags)
510 {
511 	return dst_orig;
512 }
513 
514 static inline struct dst_entry *
515 xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig,
516 		      const struct flowi *fl, const struct sock *sk,
517 		      int flags, u32 if_id)
518 {
519 	return dst_orig;
520 }
521 
522 static inline struct dst_entry *xfrm_lookup_route(struct net *net,
523 						  struct dst_entry *dst_orig,
524 						  const struct flowi *fl,
525 						  const struct sock *sk,
526 						  int flags)
527 {
528 	return dst_orig;
529 }
530 
531 static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
532 {
533 	return NULL;
534 }
535 
536 #else
537 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
538 			      const struct flowi *fl, const struct sock *sk,
539 			      int flags);
540 
541 struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
542 					struct dst_entry *dst_orig,
543 					const struct flowi *fl,
544 					const struct sock *sk, int flags,
545 					u32 if_id);
546 
547 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
548 				    const struct flowi *fl, const struct sock *sk,
549 				    int flags);
550 
551 /* skb attached with this dst needs transformation if dst->xfrm is valid */
552 static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
553 {
554 	return dst->xfrm;
555 }
556 #endif
557 
558 static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
559 {
560 	struct dst_entry *dst = skb_dst(skb);
561 
562 	if (dst && dst->ops->update_pmtu)
563 		dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
564 }
565 
566 /* update dst pmtu but not do neighbor confirm */
567 static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
568 {
569 	struct dst_entry *dst = skb_dst(skb);
570 
571 	if (dst && dst->ops->update_pmtu)
572 		dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
573 }
574 
575 static inline struct net_device *dst_dev(const struct dst_entry *dst)
576 {
577 	return READ_ONCE(dst->dev);
578 }
579 
580 static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
581 {
582 	return rcu_dereference(dst->dev_rcu);
583 }
584 
585 static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst)
586 {
587 	return dev_net_rcu(dst_dev_rcu(dst));
588 }
589 
590 static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
591 {
592 	return dst_dev(skb_dst(skb));
593 }
594 
595 static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb)
596 {
597 	return dst_dev_rcu(skb_dst(skb));
598 }
599 
600 static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
601 {
602 	return dev_net(skb_dst_dev(skb));
603 }
604 
605 static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
606 {
607 	return dev_net_rcu(skb_dst_dev_rcu(skb));
608 }
609 
610 struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
611 void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
612 			       struct sk_buff *skb, u32 mtu, bool confirm_neigh);
613 void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
614 			    struct sk_buff *skb);
615 u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old);
616 struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst,
617 					     struct sk_buff *skb,
618 					     const void *daddr);
619 unsigned int dst_blackhole_mtu(const struct dst_entry *dst);
620 
621 #endif /* _NET_DST_H */
622