xref: /linux/include/net/dst.h (revision 9f9581ba74a931843c6d807ecfeaff9fb8c1b731)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * net/dst.h	Protocol independent destination cache definitions.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  */
8 
9 #ifndef _NET_DST_H
10 #define _NET_DST_H
11 
12 #include <net/dst_ops.h>
13 #include <linux/netdevice.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/rcupdate.h>
16 #include <linux/bug.h>
17 #include <linux/jiffies.h>
18 #include <linux/refcount.h>
19 #include <linux/rcuref.h>
20 #include <net/neighbour.h>
21 #include <asm/processor.h>
22 #include <linux/indirect_call_wrapper.h>
23 
24 struct sk_buff;
25 
26 struct dst_entry {
27 	union {
28 		struct net_device       *dev;
29 		struct net_device __rcu *dev_rcu;
30 	};
31 	struct  dst_ops	        *ops;
32 	unsigned long		_metrics;
33 	unsigned long           expires;
34 #ifdef CONFIG_XFRM
35 	struct xfrm_state	*xfrm;
36 #else
37 	void			*__pad1;
38 #endif
39 	int			(*input)(struct sk_buff *);
40 	int			(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
41 
42 	unsigned short		flags;
43 #define DST_NOXFRM		0x0002
44 #define DST_NOPOLICY		0x0004
45 #define DST_NOCOUNT		0x0008
46 #define DST_FAKE_RTABLE		0x0010
47 #define DST_XFRM_TUNNEL		0x0020
48 #define DST_XFRM_QUEUE		0x0040
49 #define DST_METADATA		0x0080
50 
51 	/* A non-zero value of dst->obsolete forces by-hand validation
52 	 * of the route entry.  Positive values are set by the generic
53 	 * dst layer to indicate that the entry has been forcefully
54 	 * destroyed.
55 	 *
56 	 * Negative values are used by the implementation layer code to
57 	 * force invocation of the dst_ops->check() method.
58 	 */
59 	short			obsolete;
60 #define DST_OBSOLETE_NONE	0
61 #define DST_OBSOLETE_DEAD	2
62 #define DST_OBSOLETE_FORCE_CHK	-1
63 #define DST_OBSOLETE_KILL	-2
64 	unsigned short		header_len;	/* more space at head required */
65 	unsigned short		trailer_len;	/* space to reserve at tail */
66 
67 	/*
68 	 * __rcuref wants to be on a different cache line from
69 	 * input/output/ops or performance tanks badly
70 	 */
71 #ifdef CONFIG_64BIT
72 	rcuref_t		__rcuref;	/* 64-bit offset 64 */
73 #endif
74 	int			__use;
75 	unsigned long		lastuse;
76 	struct rcu_head		rcu_head;
77 	short			error;
78 	short			__pad;
79 	__u32			tclassid;
80 #ifndef CONFIG_64BIT
81 	struct lwtunnel_state   *lwtstate;
82 	rcuref_t		__rcuref;	/* 32-bit offset 64 */
83 #endif
84 	netdevice_tracker	dev_tracker;
85 
86 	/*
87 	 * Used by rtable and rt6_info. Moves lwtstate into the next cache
88 	 * line on 64bit so that lwtstate does not cause false sharing with
89 	 * __rcuref under contention of __rcuref. This also puts the
90 	 * frequently accessed members of rtable and rt6_info out of the
91 	 * __rcuref cache line.
92 	 */
93 	struct list_head	rt_uncached;
94 	struct uncached_list	*rt_uncached_list;
95 #ifdef CONFIG_64BIT
96 	struct lwtunnel_state   *lwtstate;
97 #endif
98 };
99 
100 struct dst_metrics {
101 	u32		metrics[RTAX_MAX];
102 	refcount_t	refcnt;
103 } __aligned(4);		/* Low pointer bits contain DST_METRICS_FLAGS */
104 extern const struct dst_metrics dst_default_metrics;
105 
106 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
107 
108 #define DST_METRICS_READ_ONLY		0x1UL
109 #define DST_METRICS_REFCOUNTED		0x2UL
110 #define DST_METRICS_FLAGS		0x3UL
111 #define __DST_METRICS_PTR(Y)	\
112 	((u32 *)((Y) & ~DST_METRICS_FLAGS))
113 #define DST_METRICS_PTR(X)	__DST_METRICS_PTR((X)->_metrics)
114 
115 static inline bool dst_metrics_read_only(const struct dst_entry *dst)
116 {
117 	return dst->_metrics & DST_METRICS_READ_ONLY;
118 }
119 
120 void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old);
121 
122 static inline void dst_destroy_metrics_generic(struct dst_entry *dst)
123 {
124 	unsigned long val = dst->_metrics;
125 	if (!(val & DST_METRICS_READ_ONLY))
126 		__dst_destroy_metrics_generic(dst, val);
127 }
128 
129 static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst)
130 {
131 	unsigned long p = dst->_metrics;
132 
133 	BUG_ON(!p);
134 
135 	if (p & DST_METRICS_READ_ONLY)
136 		return dst->ops->cow_metrics(dst, p);
137 	return __DST_METRICS_PTR(p);
138 }
139 
140 /* This may only be invoked before the entry has reached global
141  * visibility.
142  */
143 static inline void dst_init_metrics(struct dst_entry *dst,
144 				    const u32 *src_metrics,
145 				    bool read_only)
146 {
147 	dst->_metrics = ((unsigned long) src_metrics) |
148 		(read_only ? DST_METRICS_READ_ONLY : 0);
149 }
150 
151 static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src)
152 {
153 	u32 *dst_metrics = dst_metrics_write_ptr(dest);
154 
155 	if (dst_metrics) {
156 		u32 *src_metrics = DST_METRICS_PTR(src);
157 
158 		memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32));
159 	}
160 }
161 
162 static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
163 {
164 	return DST_METRICS_PTR(dst);
165 }
166 
167 static inline u32
168 dst_metric_raw(const struct dst_entry *dst, const int metric)
169 {
170 	u32 *p = DST_METRICS_PTR(dst);
171 
172 	return p[metric-1];
173 }
174 
175 static inline u32
176 dst_metric(const struct dst_entry *dst, const int metric)
177 {
178 	WARN_ON_ONCE(metric == RTAX_HOPLIMIT ||
179 		     metric == RTAX_ADVMSS ||
180 		     metric == RTAX_MTU);
181 	return dst_metric_raw(dst, metric);
182 }
183 
184 static inline u32
185 dst_metric_advmss(const struct dst_entry *dst)
186 {
187 	u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS);
188 
189 	if (!advmss)
190 		advmss = dst->ops->default_advmss(dst);
191 
192 	return advmss;
193 }
194 
195 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
196 {
197 	u32 *p = dst_metrics_write_ptr(dst);
198 
199 	if (p)
200 		p[metric-1] = val;
201 }
202 
203 /* Kernel-internal feature bits that are unallocated in user space. */
204 #define DST_FEATURE_ECN_CA	(1U << 31)
205 
206 #define DST_FEATURE_MASK	(DST_FEATURE_ECN_CA)
207 #define DST_FEATURE_ECN_MASK	(DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN)
208 
209 static inline u32
210 dst_feature(const struct dst_entry *dst, u32 feature)
211 {
212 	return dst_metric(dst, RTAX_FEATURES) & feature;
213 }
214 
215 INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *));
216 INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *));
217 static inline u32 dst_mtu(const struct dst_entry *dst)
218 {
219 	return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst);
220 }
221 
222 /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
223 static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric)
224 {
225 	return msecs_to_jiffies(dst_metric(dst, metric));
226 }
227 
228 static inline int
229 dst_metric_locked(const struct dst_entry *dst, int metric)
230 {
231 	return dst_metric(dst, RTAX_LOCK) & (1 << metric);
232 }
233 
234 static inline void dst_hold(struct dst_entry *dst)
235 {
236 	/*
237 	 * If your kernel compilation stops here, please check
238 	 * the placement of __rcuref in struct dst_entry
239 	 */
240 	BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63);
241 	WARN_ON(!rcuref_get(&dst->__rcuref));
242 }
243 
244 static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
245 {
246 	if (unlikely(time != READ_ONCE(dst->lastuse))) {
247 		dst->__use++;
248 		WRITE_ONCE(dst->lastuse, time);
249 	}
250 }
251 
252 static inline struct dst_entry *dst_clone(struct dst_entry *dst)
253 {
254 	if (dst)
255 		dst_hold(dst);
256 	return dst;
257 }
258 
259 void dst_release(struct dst_entry *dst);
260 
261 void dst_release_immediate(struct dst_entry *dst);
262 
263 static inline void refdst_drop(unsigned long refdst)
264 {
265 	if (!(refdst & SKB_DST_NOREF))
266 		dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK));
267 }
268 
269 /**
270  * skb_dst_drop - drops skb dst
271  * @skb: buffer
272  *
273  * Drops dst reference count if a reference was taken.
274  */
275 static inline void skb_dst_drop(struct sk_buff *skb)
276 {
277 	if (skb->_skb_refdst) {
278 		refdst_drop(skb->_skb_refdst);
279 		skb->_skb_refdst = 0UL;
280 	}
281 }
282 
283 static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
284 {
285 	nskb->slow_gro |= !!refdst;
286 	nskb->_skb_refdst = refdst;
287 	if (!(nskb->_skb_refdst & SKB_DST_NOREF))
288 		dst_clone(skb_dst(nskb));
289 }
290 
291 static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
292 {
293 	__skb_dst_copy(nskb, oskb->_skb_refdst);
294 }
295 
296 /**
297  * dst_hold_safe - Take a reference on a dst if possible
298  * @dst: pointer to dst entry
299  *
300  * This helper returns false if it could not safely
301  * take a reference on a dst.
302  */
303 static inline bool dst_hold_safe(struct dst_entry *dst)
304 {
305 	return rcuref_get(&dst->__rcuref);
306 }
307 
308 /**
309  * skb_dst_force - makes sure skb dst is refcounted
310  * @skb: buffer
311  *
312  * If dst is not yet refcounted and not destroyed, grab a ref on it.
313  * Returns: true if dst is refcounted.
314  */
315 static inline bool skb_dst_force(struct sk_buff *skb)
316 {
317 	if (skb_dst_is_noref(skb)) {
318 		struct dst_entry *dst = skb_dst(skb);
319 
320 		WARN_ON(!rcu_read_lock_held());
321 		if (!dst_hold_safe(dst))
322 			dst = NULL;
323 
324 		skb->_skb_refdst = (unsigned long)dst;
325 		skb->slow_gro |= !!dst;
326 	}
327 
328 	return skb->_skb_refdst != 0UL;
329 }
330 
331 
332 /**
333  *	__skb_tunnel_rx - prepare skb for rx reinsert
334  *	@skb: buffer
335  *	@dev: tunnel device
336  *	@net: netns for packet i/o
337  *
338  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
339  *	so make some cleanups. (no accounting done)
340  */
341 static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
342 				   struct net *net)
343 {
344 	skb->dev = dev;
345 
346 	/*
347 	 * Clear hash so that we can recalculate the hash for the
348 	 * encapsulated packet, unless we have already determine the hash
349 	 * over the L4 4-tuple.
350 	 */
351 	skb_clear_hash_if_not_l4(skb);
352 	skb_set_queue_mapping(skb, 0);
353 	skb_scrub_packet(skb, !net_eq(net, dev_net(dev)));
354 }
355 
356 /**
357  *	skb_tunnel_rx - prepare skb for rx reinsert
358  *	@skb: buffer
359  *	@dev: tunnel device
360  *	@net: netns for packet i/o
361  *
362  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
363  *	so make some cleanups, and perform accounting.
364  *	Note: this accounting is not SMP safe.
365  */
366 static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
367 				 struct net *net)
368 {
369 	DEV_STATS_INC(dev, rx_packets);
370 	DEV_STATS_ADD(dev, rx_bytes, skb->len);
371 	__skb_tunnel_rx(skb, dev, net);
372 }
373 
374 static inline u32 dst_tclassid(const struct sk_buff *skb)
375 {
376 #ifdef CONFIG_IP_ROUTE_CLASSID
377 	const struct dst_entry *dst;
378 
379 	dst = skb_dst(skb);
380 	if (dst)
381 		return dst->tclassid;
382 #endif
383 	return 0;
384 }
385 
386 int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
387 static inline int dst_discard(struct sk_buff *skb)
388 {
389 	return dst_discard_out(&init_net, skb->sk, skb);
390 }
391 void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
392 		int initial_obsolete, unsigned short flags);
393 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
394 	      struct net_device *dev, int initial_obsolete,
395 	      unsigned short flags);
396 void dst_dev_put(struct dst_entry *dst);
397 
398 static inline void dst_confirm(struct dst_entry *dst)
399 {
400 }
401 
402 static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
403 {
404 	struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr);
405 	return IS_ERR(n) ? NULL : n;
406 }
407 
408 static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
409 						     struct sk_buff *skb)
410 {
411 	struct neighbour *n;
412 
413 	if (WARN_ON_ONCE(!dst->ops->neigh_lookup))
414 		return NULL;
415 
416 	n = dst->ops->neigh_lookup(dst, skb, NULL);
417 
418 	return IS_ERR(n) ? NULL : n;
419 }
420 
421 static inline void dst_confirm_neigh(const struct dst_entry *dst,
422 				     const void *daddr)
423 {
424 	if (dst->ops->confirm_neigh)
425 		dst->ops->confirm_neigh(dst, daddr);
426 }
427 
428 static inline void dst_link_failure(struct sk_buff *skb)
429 {
430 	struct dst_entry *dst = skb_dst(skb);
431 	if (dst && dst->ops && dst->ops->link_failure)
432 		dst->ops->link_failure(skb);
433 }
434 
435 static inline void dst_set_expires(struct dst_entry *dst, int timeout)
436 {
437 	unsigned long old, expires = jiffies + timeout;
438 
439 	if (expires == 0)
440 		expires = 1;
441 
442 	old = READ_ONCE(dst->expires);
443 
444 	if (!old || time_before(expires, old))
445 		WRITE_ONCE(dst->expires, expires);
446 }
447 
448 static inline unsigned int dst_dev_overhead(struct dst_entry *dst,
449 					    struct sk_buff *skb)
450 {
451 	if (likely(dst))
452 		return LL_RESERVED_SPACE(dst->dev);
453 
454 	return skb->mac_len;
455 }
456 
457 INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *,
458 					 struct sk_buff *));
459 INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
460 					 struct sk_buff *));
461 /* Output packet to network from transport.  */
462 static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb)
463 {
464 	return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->output),
465 				  ip6_output, ip_output,
466 				  net, sk, skb);
467 }
468 
469 INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *));
470 INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *));
471 /* Input packet from network to transport.  */
472 static inline int dst_input(struct sk_buff *skb)
473 {
474 	return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->input),
475 				  ip6_input, ip_local_deliver, skb);
476 }
477 
478 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
479 							  u32));
480 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
481 							   u32));
482 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
483 {
484 	if (READ_ONCE(dst->obsolete))
485 		dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check,
486 					 ipv4_dst_check, dst, cookie);
487 	return dst;
488 }
489 
490 /* Flags for xfrm_lookup flags argument. */
491 enum {
492 	XFRM_LOOKUP_ICMP = 1 << 0,
493 	XFRM_LOOKUP_QUEUE = 1 << 1,
494 	XFRM_LOOKUP_KEEP_DST_REF = 1 << 2,
495 };
496 
497 struct flowi;
498 #ifndef CONFIG_XFRM
499 static inline struct dst_entry *xfrm_lookup(struct net *net,
500 					    struct dst_entry *dst_orig,
501 					    const struct flowi *fl,
502 					    const struct sock *sk,
503 					    int flags)
504 {
505 	return dst_orig;
506 }
507 
508 static inline struct dst_entry *
509 xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig,
510 		      const struct flowi *fl, const struct sock *sk,
511 		      int flags, u32 if_id)
512 {
513 	return dst_orig;
514 }
515 
516 static inline struct dst_entry *xfrm_lookup_route(struct net *net,
517 						  struct dst_entry *dst_orig,
518 						  const struct flowi *fl,
519 						  const struct sock *sk,
520 						  int flags)
521 {
522 	return dst_orig;
523 }
524 
525 static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
526 {
527 	return NULL;
528 }
529 
530 #else
531 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
532 			      const struct flowi *fl, const struct sock *sk,
533 			      int flags);
534 
535 struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
536 					struct dst_entry *dst_orig,
537 					const struct flowi *fl,
538 					const struct sock *sk, int flags,
539 					u32 if_id);
540 
541 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
542 				    const struct flowi *fl, const struct sock *sk,
543 				    int flags);
544 
545 /* skb attached with this dst needs transformation if dst->xfrm is valid */
546 static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
547 {
548 	return dst->xfrm;
549 }
550 #endif
551 
552 static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
553 {
554 	struct dst_entry *dst = skb_dst(skb);
555 
556 	if (dst && dst->ops->update_pmtu)
557 		dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
558 }
559 
560 /* update dst pmtu but not do neighbor confirm */
561 static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
562 {
563 	struct dst_entry *dst = skb_dst(skb);
564 
565 	if (dst && dst->ops->update_pmtu)
566 		dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
567 }
568 
569 static inline struct net_device *dst_dev(const struct dst_entry *dst)
570 {
571 	return READ_ONCE(dst->dev);
572 }
573 
574 static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
575 {
576 	return rcu_dereference(dst->dev_rcu);
577 }
578 
579 static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst)
580 {
581 	return dev_net_rcu(dst_dev_rcu(dst));
582 }
583 
584 static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
585 {
586 	return dst_dev(skb_dst(skb));
587 }
588 
589 static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb)
590 {
591 	return dst_dev_rcu(skb_dst(skb));
592 }
593 
594 static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
595 {
596 	return dev_net(skb_dst_dev(skb));
597 }
598 
599 static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
600 {
601 	return dev_net_rcu(skb_dst_dev_rcu(skb));
602 }
603 
604 struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
605 void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
606 			       struct sk_buff *skb, u32 mtu, bool confirm_neigh);
607 void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
608 			    struct sk_buff *skb);
609 u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old);
610 struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst,
611 					     struct sk_buff *skb,
612 					     const void *daddr);
613 unsigned int dst_blackhole_mtu(const struct dst_entry *dst);
614 
615 #endif /* _NET_DST_H */
616