1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * net/dst.h Protocol independent destination cache definitions. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 */ 8 9 #ifndef _NET_DST_H 10 #define _NET_DST_H 11 12 #include <net/dst_ops.h> 13 #include <linux/netdevice.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/rcupdate.h> 16 #include <linux/bug.h> 17 #include <linux/jiffies.h> 18 #include <linux/refcount.h> 19 #include <linux/rcuref.h> 20 #include <net/neighbour.h> 21 #include <asm/processor.h> 22 #include <linux/indirect_call_wrapper.h> 23 24 struct sk_buff; 25 26 struct dst_entry { 27 union { 28 struct net_device *dev; 29 struct net_device __rcu *dev_rcu; 30 }; 31 struct dst_ops *ops; 32 unsigned long _metrics; 33 unsigned long expires; 34 #ifdef CONFIG_XFRM 35 struct xfrm_state *xfrm; 36 #else 37 void *__pad1; 38 #endif 39 int (*input)(struct sk_buff *); 40 int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); 41 42 unsigned short flags; 43 #define DST_NOXFRM 0x0002 44 #define DST_NOPOLICY 0x0004 45 #define DST_NOCOUNT 0x0008 46 #define DST_FAKE_RTABLE 0x0010 47 #define DST_XFRM_TUNNEL 0x0020 48 #define DST_XFRM_QUEUE 0x0040 49 #define DST_METADATA 0x0080 50 51 /* A non-zero value of dst->obsolete forces by-hand validation 52 * of the route entry. Positive values are set by the generic 53 * dst layer to indicate that the entry has been forcefully 54 * destroyed. 55 * 56 * Negative values are used by the implementation layer code to 57 * force invocation of the dst_ops->check() method. 58 */ 59 short obsolete; 60 #define DST_OBSOLETE_NONE 0 61 #define DST_OBSOLETE_DEAD 2 62 #define DST_OBSOLETE_FORCE_CHK -1 63 #define DST_OBSOLETE_KILL -2 64 unsigned short header_len; /* more space at head required */ 65 unsigned short trailer_len; /* space to reserve at tail */ 66 67 /* 68 * __rcuref wants to be on a different cache line from 69 * input/output/ops or performance tanks badly 70 */ 71 #ifdef CONFIG_64BIT 72 rcuref_t __rcuref; /* 64-bit offset 64 */ 73 #endif 74 int __use; 75 unsigned long lastuse; 76 struct rcu_head rcu_head; 77 short error; 78 short __pad; 79 __u32 tclassid; 80 #ifndef CONFIG_64BIT 81 struct lwtunnel_state *lwtstate; 82 rcuref_t __rcuref; /* 32-bit offset 64 */ 83 #endif 84 netdevice_tracker dev_tracker; 85 86 /* 87 * Used by rtable and rt6_info. Moves lwtstate into the next cache 88 * line on 64bit so that lwtstate does not cause false sharing with 89 * __rcuref under contention of __rcuref. This also puts the 90 * frequently accessed members of rtable and rt6_info out of the 91 * __rcuref cache line. 92 */ 93 struct list_head rt_uncached; 94 struct uncached_list *rt_uncached_list; 95 #ifdef CONFIG_64BIT 96 struct lwtunnel_state *lwtstate; 97 #endif 98 }; 99 100 struct dst_metrics { 101 u32 metrics[RTAX_MAX]; 102 refcount_t refcnt; 103 } __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */ 104 extern const struct dst_metrics dst_default_metrics; 105 106 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); 107 108 #define DST_METRICS_READ_ONLY 0x1UL 109 #define DST_METRICS_REFCOUNTED 0x2UL 110 #define DST_METRICS_FLAGS 0x3UL 111 #define __DST_METRICS_PTR(Y) \ 112 ((u32 *)((Y) & ~DST_METRICS_FLAGS)) 113 #define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics) 114 115 static inline bool dst_metrics_read_only(const struct dst_entry *dst) 116 { 117 return dst->_metrics & DST_METRICS_READ_ONLY; 118 } 119 120 void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); 121 122 static inline void dst_destroy_metrics_generic(struct dst_entry *dst) 123 { 124 unsigned long val = dst->_metrics; 125 if (!(val & DST_METRICS_READ_ONLY)) 126 __dst_destroy_metrics_generic(dst, val); 127 } 128 129 static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst) 130 { 131 unsigned long p = dst->_metrics; 132 133 BUG_ON(!p); 134 135 if (p & DST_METRICS_READ_ONLY) 136 return dst->ops->cow_metrics(dst, p); 137 return __DST_METRICS_PTR(p); 138 } 139 140 /* This may only be invoked before the entry has reached global 141 * visibility. 142 */ 143 static inline void dst_init_metrics(struct dst_entry *dst, 144 const u32 *src_metrics, 145 bool read_only) 146 { 147 dst->_metrics = ((unsigned long) src_metrics) | 148 (read_only ? DST_METRICS_READ_ONLY : 0); 149 } 150 151 static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) 152 { 153 u32 *dst_metrics = dst_metrics_write_ptr(dest); 154 155 if (dst_metrics) { 156 u32 *src_metrics = DST_METRICS_PTR(src); 157 158 memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32)); 159 } 160 } 161 162 static inline u32 *dst_metrics_ptr(struct dst_entry *dst) 163 { 164 return DST_METRICS_PTR(dst); 165 } 166 167 static inline u32 168 dst_metric_raw(const struct dst_entry *dst, const int metric) 169 { 170 u32 *p = DST_METRICS_PTR(dst); 171 172 return p[metric-1]; 173 } 174 175 static inline u32 176 dst_metric(const struct dst_entry *dst, const int metric) 177 { 178 WARN_ON_ONCE(metric == RTAX_HOPLIMIT || 179 metric == RTAX_ADVMSS || 180 metric == RTAX_MTU); 181 return dst_metric_raw(dst, metric); 182 } 183 184 static inline u32 185 dst_metric_advmss(const struct dst_entry *dst) 186 { 187 u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS); 188 189 if (!advmss) 190 advmss = dst->ops->default_advmss(dst); 191 192 return advmss; 193 } 194 195 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) 196 { 197 u32 *p = dst_metrics_write_ptr(dst); 198 199 if (p) 200 p[metric-1] = val; 201 } 202 203 /* Kernel-internal feature bits that are unallocated in user space. */ 204 #define DST_FEATURE_ECN_CA (1U << 31) 205 206 #define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) 207 #define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) 208 209 static inline u32 210 dst_feature(const struct dst_entry *dst, u32 feature) 211 { 212 return dst_metric(dst, RTAX_FEATURES) & feature; 213 } 214 215 INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *)); 216 INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *)); 217 static inline u32 dst_mtu(const struct dst_entry *dst) 218 { 219 return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); 220 } 221 222 /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ 223 static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) 224 { 225 return msecs_to_jiffies(dst_metric(dst, metric)); 226 } 227 228 static inline int 229 dst_metric_locked(const struct dst_entry *dst, int metric) 230 { 231 return dst_metric(dst, RTAX_LOCK) & (1 << metric); 232 } 233 234 static inline void dst_hold(struct dst_entry *dst) 235 { 236 /* 237 * If your kernel compilation stops here, please check 238 * the placement of __rcuref in struct dst_entry 239 */ 240 BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63); 241 WARN_ON(!rcuref_get(&dst->__rcuref)); 242 } 243 244 static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) 245 { 246 if (unlikely(time != READ_ONCE(dst->lastuse))) { 247 dst->__use++; 248 WRITE_ONCE(dst->lastuse, time); 249 } 250 } 251 252 static inline struct dst_entry *dst_clone(struct dst_entry *dst) 253 { 254 if (dst) 255 dst_hold(dst); 256 return dst; 257 } 258 259 void dst_release(struct dst_entry *dst); 260 261 void dst_release_immediate(struct dst_entry *dst); 262 263 static inline void refdst_drop(unsigned long refdst) 264 { 265 if (!(refdst & SKB_DST_NOREF)) 266 dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); 267 } 268 269 /** 270 * skb_dst_drop - drops skb dst 271 * @skb: buffer 272 * 273 * Drops dst reference count if a reference was taken. 274 */ 275 static inline void skb_dst_drop(struct sk_buff *skb) 276 { 277 if (skb->_skb_refdst) { 278 refdst_drop(skb->_skb_refdst); 279 skb->_skb_refdst = 0UL; 280 } 281 } 282 283 static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst) 284 { 285 nskb->slow_gro |= !!refdst; 286 nskb->_skb_refdst = refdst; 287 if (!(nskb->_skb_refdst & SKB_DST_NOREF)) 288 dst_clone(skb_dst(nskb)); 289 } 290 291 static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) 292 { 293 __skb_dst_copy(nskb, oskb->_skb_refdst); 294 } 295 296 /** 297 * dst_hold_safe - Take a reference on a dst if possible 298 * @dst: pointer to dst entry 299 * 300 * This helper returns false if it could not safely 301 * take a reference on a dst. 302 */ 303 static inline bool dst_hold_safe(struct dst_entry *dst) 304 { 305 return rcuref_get(&dst->__rcuref); 306 } 307 308 /** 309 * skb_dst_force - makes sure skb dst is refcounted 310 * @skb: buffer 311 * 312 * If dst is not yet refcounted and not destroyed, grab a ref on it. 313 * Returns: true if dst is refcounted. 314 */ 315 static inline bool skb_dst_force(struct sk_buff *skb) 316 { 317 if (skb_dst_is_noref(skb)) { 318 struct dst_entry *dst = skb_dst(skb); 319 320 WARN_ON(!rcu_read_lock_held()); 321 if (!dst_hold_safe(dst)) 322 dst = NULL; 323 324 skb->_skb_refdst = (unsigned long)dst; 325 skb->slow_gro |= !!dst; 326 } 327 328 return skb->_skb_refdst != 0UL; 329 } 330 331 332 /** 333 * __skb_tunnel_rx - prepare skb for rx reinsert 334 * @skb: buffer 335 * @dev: tunnel device 336 * @net: netns for packet i/o 337 * 338 * After decapsulation, packet is going to re-enter (netif_rx()) our stack, 339 * so make some cleanups. (no accounting done) 340 */ 341 static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, 342 struct net *net) 343 { 344 skb->dev = dev; 345 346 /* 347 * Clear hash so that we can recalculate the hash for the 348 * encapsulated packet, unless we have already determine the hash 349 * over the L4 4-tuple. 350 */ 351 skb_clear_hash_if_not_l4(skb); 352 skb_set_queue_mapping(skb, 0); 353 skb_scrub_packet(skb, !net_eq(net, dev_net(dev))); 354 } 355 356 /** 357 * skb_tunnel_rx - prepare skb for rx reinsert 358 * @skb: buffer 359 * @dev: tunnel device 360 * @net: netns for packet i/o 361 * 362 * After decapsulation, packet is going to re-enter (netif_rx()) our stack, 363 * so make some cleanups, and perform accounting. 364 * Note: this accounting is not SMP safe. 365 */ 366 static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, 367 struct net *net) 368 { 369 DEV_STATS_INC(dev, rx_packets); 370 DEV_STATS_ADD(dev, rx_bytes, skb->len); 371 __skb_tunnel_rx(skb, dev, net); 372 } 373 374 static inline u32 dst_tclassid(const struct sk_buff *skb) 375 { 376 #ifdef CONFIG_IP_ROUTE_CLASSID 377 const struct dst_entry *dst; 378 379 dst = skb_dst(skb); 380 if (dst) 381 return dst->tclassid; 382 #endif 383 return 0; 384 } 385 386 int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); 387 static inline int dst_discard(struct sk_buff *skb) 388 { 389 return dst_discard_out(&init_net, skb->sk, skb); 390 } 391 void *dst_alloc(struct dst_ops *ops, struct net_device *dev, 392 int initial_obsolete, unsigned short flags); 393 void dst_init(struct dst_entry *dst, struct dst_ops *ops, 394 struct net_device *dev, int initial_obsolete, 395 unsigned short flags); 396 void dst_dev_put(struct dst_entry *dst); 397 398 static inline void dst_confirm(struct dst_entry *dst) 399 { 400 } 401 402 static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) 403 { 404 struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr); 405 return IS_ERR(n) ? NULL : n; 406 } 407 408 static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, 409 struct sk_buff *skb) 410 { 411 struct neighbour *n; 412 413 if (WARN_ON_ONCE(!dst->ops->neigh_lookup)) 414 return NULL; 415 416 n = dst->ops->neigh_lookup(dst, skb, NULL); 417 418 return IS_ERR(n) ? NULL : n; 419 } 420 421 static inline void dst_confirm_neigh(const struct dst_entry *dst, 422 const void *daddr) 423 { 424 if (dst->ops->confirm_neigh) 425 dst->ops->confirm_neigh(dst, daddr); 426 } 427 428 static inline void dst_link_failure(struct sk_buff *skb) 429 { 430 struct dst_entry *dst = skb_dst(skb); 431 if (dst && dst->ops && dst->ops->link_failure) 432 dst->ops->link_failure(skb); 433 } 434 435 static inline void dst_set_expires(struct dst_entry *dst, int timeout) 436 { 437 unsigned long old, expires = jiffies + timeout; 438 439 if (expires == 0) 440 expires = 1; 441 442 old = READ_ONCE(dst->expires); 443 444 if (!old || time_before(expires, old)) 445 WRITE_ONCE(dst->expires, expires); 446 } 447 448 static inline unsigned int dst_dev_overhead(struct dst_entry *dst, 449 struct sk_buff *skb) 450 { 451 if (likely(dst)) 452 return LL_RESERVED_SPACE(dst->dev); 453 454 return skb->mac_len; 455 } 456 457 INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, 458 struct sk_buff *)); 459 INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, 460 struct sk_buff *)); 461 /* Output packet to network from transport. */ 462 static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) 463 { 464 return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->output), 465 ip6_output, ip_output, 466 net, sk, skb); 467 } 468 469 INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *)); 470 INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *)); 471 /* Input packet from network to transport. */ 472 static inline int dst_input(struct sk_buff *skb) 473 { 474 return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->input), 475 ip6_input, ip_local_deliver, skb); 476 } 477 478 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, 479 u32)); 480 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 481 u32)); 482 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) 483 { 484 if (READ_ONCE(dst->obsolete)) 485 dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, 486 ipv4_dst_check, dst, cookie); 487 return dst; 488 } 489 490 /* Flags for xfrm_lookup flags argument. */ 491 enum { 492 XFRM_LOOKUP_ICMP = 1 << 0, 493 XFRM_LOOKUP_QUEUE = 1 << 1, 494 XFRM_LOOKUP_KEEP_DST_REF = 1 << 2, 495 }; 496 497 struct flowi; 498 #ifndef CONFIG_XFRM 499 static inline struct dst_entry *xfrm_lookup(struct net *net, 500 struct dst_entry *dst_orig, 501 const struct flowi *fl, 502 const struct sock *sk, 503 int flags) 504 { 505 return dst_orig; 506 } 507 508 static inline struct dst_entry * 509 xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig, 510 const struct flowi *fl, const struct sock *sk, 511 int flags, u32 if_id) 512 { 513 return dst_orig; 514 } 515 516 static inline struct dst_entry *xfrm_lookup_route(struct net *net, 517 struct dst_entry *dst_orig, 518 const struct flowi *fl, 519 const struct sock *sk, 520 int flags) 521 { 522 return dst_orig; 523 } 524 525 static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) 526 { 527 return NULL; 528 } 529 530 #else 531 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, 532 const struct flowi *fl, const struct sock *sk, 533 int flags); 534 535 struct dst_entry *xfrm_lookup_with_ifid(struct net *net, 536 struct dst_entry *dst_orig, 537 const struct flowi *fl, 538 const struct sock *sk, int flags, 539 u32 if_id); 540 541 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, 542 const struct flowi *fl, const struct sock *sk, 543 int flags); 544 545 /* skb attached with this dst needs transformation if dst->xfrm is valid */ 546 static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) 547 { 548 return dst->xfrm; 549 } 550 #endif 551 552 static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) 553 { 554 struct dst_entry *dst = skb_dst(skb); 555 556 if (dst && dst->ops->update_pmtu) 557 dst->ops->update_pmtu(dst, NULL, skb, mtu, true); 558 } 559 560 /* update dst pmtu but not do neighbor confirm */ 561 static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) 562 { 563 struct dst_entry *dst = skb_dst(skb); 564 565 if (dst && dst->ops->update_pmtu) 566 dst->ops->update_pmtu(dst, NULL, skb, mtu, false); 567 } 568 569 static inline struct net_device *dst_dev(const struct dst_entry *dst) 570 { 571 return READ_ONCE(dst->dev); 572 } 573 574 static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) 575 { 576 return rcu_dereference(dst->dev_rcu); 577 } 578 579 static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst) 580 { 581 return dev_net_rcu(dst_dev_rcu(dst)); 582 } 583 584 static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) 585 { 586 return dst_dev(skb_dst(skb)); 587 } 588 589 static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb) 590 { 591 return dst_dev_rcu(skb_dst(skb)); 592 } 593 594 static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) 595 { 596 return dev_net(skb_dst_dev(skb)); 597 } 598 599 static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) 600 { 601 return dev_net_rcu(skb_dst_dev_rcu(skb)); 602 } 603 604 struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); 605 void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 606 struct sk_buff *skb, u32 mtu, bool confirm_neigh); 607 void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 608 struct sk_buff *skb); 609 u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old); 610 struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, 611 struct sk_buff *skb, 612 const void *daddr); 613 unsigned int dst_blackhole_mtu(const struct dst_entry *dst); 614 615 #endif /* _NET_DST_H */ 616