1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * net/dst.h Protocol independent destination cache definitions. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 */ 8 9 #ifndef _NET_DST_H 10 #define _NET_DST_H 11 12 #include <net/dst_ops.h> 13 #include <linux/netdevice.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/rcupdate.h> 16 #include <linux/bug.h> 17 #include <linux/jiffies.h> 18 #include <linux/refcount.h> 19 #include <linux/rcuref.h> 20 #include <net/neighbour.h> 21 #include <asm/processor.h> 22 #include <linux/indirect_call_wrapper.h> 23 24 struct sk_buff; 25 26 struct dst_entry { 27 union { 28 struct net_device *dev; 29 struct net_device __rcu *dev_rcu; 30 }; 31 struct dst_ops *ops; 32 unsigned long _metrics; 33 unsigned long expires; 34 #ifdef CONFIG_XFRM 35 struct xfrm_state *xfrm; 36 #else 37 void *__pad1; 38 #endif 39 int (*input)(struct sk_buff *); 40 int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); 41 42 unsigned short flags; 43 #define DST_NOXFRM 0x0002 44 #define DST_NOPOLICY 0x0004 45 #define DST_NOCOUNT 0x0008 46 #define DST_FAKE_RTABLE 0x0010 47 #define DST_XFRM_TUNNEL 0x0020 48 #define DST_XFRM_QUEUE 0x0040 49 #define DST_METADATA 0x0080 50 51 /* A non-zero value of dst->obsolete forces by-hand validation 52 * of the route entry. Positive values are set by the generic 53 * dst layer to indicate that the entry has been forcefully 54 * destroyed. 55 * 56 * Negative values are used by the implementation layer code to 57 * force invocation of the dst_ops->check() method. 58 */ 59 short obsolete; 60 #define DST_OBSOLETE_NONE 0 61 #define DST_OBSOLETE_DEAD 2 62 #define DST_OBSOLETE_FORCE_CHK -1 63 #define DST_OBSOLETE_KILL -2 64 unsigned short header_len; /* more space at head required */ 65 unsigned short trailer_len; /* space to reserve at tail */ 66 67 /* 68 * __rcuref wants to be on a different cache line from 69 * input/output/ops or performance tanks badly 70 */ 71 #ifdef CONFIG_64BIT 72 rcuref_t __rcuref; /* 64-bit offset 64 */ 73 #endif 74 int __use; 75 unsigned long lastuse; 76 struct rcu_head rcu_head; 77 short error; 78 short __pad; 79 __u32 tclassid; 80 #ifndef CONFIG_64BIT 81 struct lwtunnel_state *lwtstate; 82 rcuref_t __rcuref; /* 32-bit offset 64 */ 83 #endif 84 netdevice_tracker dev_tracker; 85 86 /* 87 * Used by rtable and rt6_info. Moves lwtstate into the next cache 88 * line on 64bit so that lwtstate does not cause false sharing with 89 * __rcuref under contention of __rcuref. This also puts the 90 * frequently accessed members of rtable and rt6_info out of the 91 * __rcuref cache line. 92 */ 93 struct list_head rt_uncached; 94 struct uncached_list *rt_uncached_list; 95 #ifdef CONFIG_64BIT 96 struct lwtunnel_state *lwtstate; 97 #endif 98 }; 99 100 struct dst_metrics { 101 u32 metrics[RTAX_MAX]; 102 refcount_t refcnt; 103 } __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */ 104 extern const struct dst_metrics dst_default_metrics; 105 106 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); 107 108 #define DST_METRICS_READ_ONLY 0x1UL 109 #define DST_METRICS_REFCOUNTED 0x2UL 110 #define DST_METRICS_FLAGS 0x3UL 111 #define __DST_METRICS_PTR(Y) \ 112 ((u32 *)((Y) & ~DST_METRICS_FLAGS)) 113 #define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics) 114 115 static inline bool dst_metrics_read_only(const struct dst_entry *dst) 116 { 117 return dst->_metrics & DST_METRICS_READ_ONLY; 118 } 119 120 void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); 121 122 static inline void dst_destroy_metrics_generic(struct dst_entry *dst) 123 { 124 unsigned long val = dst->_metrics; 125 if (!(val & DST_METRICS_READ_ONLY)) 126 __dst_destroy_metrics_generic(dst, val); 127 } 128 129 static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst) 130 { 131 unsigned long p = dst->_metrics; 132 133 BUG_ON(!p); 134 135 if (p & DST_METRICS_READ_ONLY) 136 return dst->ops->cow_metrics(dst, p); 137 return __DST_METRICS_PTR(p); 138 } 139 140 /* This may only be invoked before the entry has reached global 141 * visibility. 142 */ 143 static inline void dst_init_metrics(struct dst_entry *dst, 144 const u32 *src_metrics, 145 bool read_only) 146 { 147 dst->_metrics = ((unsigned long) src_metrics) | 148 (read_only ? DST_METRICS_READ_ONLY : 0); 149 } 150 151 static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) 152 { 153 u32 *dst_metrics = dst_metrics_write_ptr(dest); 154 155 if (dst_metrics) { 156 u32 *src_metrics = DST_METRICS_PTR(src); 157 158 memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32)); 159 } 160 } 161 162 static inline u32 *dst_metrics_ptr(struct dst_entry *dst) 163 { 164 return DST_METRICS_PTR(dst); 165 } 166 167 static inline u32 168 dst_metric_raw(const struct dst_entry *dst, const int metric) 169 { 170 u32 *p = DST_METRICS_PTR(dst); 171 172 return p[metric-1]; 173 } 174 175 static inline u32 176 dst_metric(const struct dst_entry *dst, const int metric) 177 { 178 WARN_ON_ONCE(metric == RTAX_HOPLIMIT || 179 metric == RTAX_ADVMSS || 180 metric == RTAX_MTU); 181 return dst_metric_raw(dst, metric); 182 } 183 184 static inline u32 185 dst_metric_advmss(const struct dst_entry *dst) 186 { 187 u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS); 188 189 if (!advmss) 190 advmss = dst->ops->default_advmss(dst); 191 192 return advmss; 193 } 194 195 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) 196 { 197 u32 *p = dst_metrics_write_ptr(dst); 198 199 if (p) 200 p[metric-1] = val; 201 } 202 203 /* Kernel-internal feature bits that are unallocated in user space. */ 204 #define DST_FEATURE_ECN_CA (1U << 31) 205 206 #define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) 207 #define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) 208 209 static inline u32 210 dst_feature(const struct dst_entry *dst, u32 feature) 211 { 212 return dst_metric(dst, RTAX_FEATURES) & feature; 213 } 214 215 INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *)); 216 INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *)); 217 static inline u32 dst_mtu(const struct dst_entry *dst) 218 { 219 return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); 220 } 221 222 /* Variant of dst_mtu() for IPv4 users. */ 223 static inline u32 dst4_mtu(const struct dst_entry *dst) 224 { 225 return INDIRECT_CALL_1(dst->ops->mtu, ipv4_mtu, dst); 226 } 227 228 /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ 229 static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) 230 { 231 return msecs_to_jiffies(dst_metric(dst, metric)); 232 } 233 234 static inline int 235 dst_metric_locked(const struct dst_entry *dst, int metric) 236 { 237 return dst_metric(dst, RTAX_LOCK) & (1 << metric); 238 } 239 240 static inline void dst_hold(struct dst_entry *dst) 241 { 242 /* 243 * If your kernel compilation stops here, please check 244 * the placement of __rcuref in struct dst_entry 245 */ 246 BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63); 247 WARN_ON(!rcuref_get(&dst->__rcuref)); 248 } 249 250 static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) 251 { 252 if (unlikely(time != READ_ONCE(dst->lastuse))) { 253 dst->__use++; 254 WRITE_ONCE(dst->lastuse, time); 255 } 256 } 257 258 static inline struct dst_entry *dst_clone(struct dst_entry *dst) 259 { 260 if (dst) 261 dst_hold(dst); 262 return dst; 263 } 264 265 void dst_release(struct dst_entry *dst); 266 267 void dst_release_immediate(struct dst_entry *dst); 268 269 static inline void refdst_drop(unsigned long refdst) 270 { 271 if (!(refdst & SKB_DST_NOREF)) 272 dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); 273 } 274 275 /** 276 * skb_dst_drop - drops skb dst 277 * @skb: buffer 278 * 279 * Drops dst reference count if a reference was taken. 280 */ 281 static inline void skb_dst_drop(struct sk_buff *skb) 282 { 283 if (skb->_skb_refdst) { 284 refdst_drop(skb->_skb_refdst); 285 skb->_skb_refdst = 0UL; 286 } 287 } 288 289 static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst) 290 { 291 nskb->slow_gro |= !!refdst; 292 nskb->_skb_refdst = refdst; 293 if (!(nskb->_skb_refdst & SKB_DST_NOREF)) 294 dst_clone(skb_dst(nskb)); 295 } 296 297 static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) 298 { 299 __skb_dst_copy(nskb, oskb->_skb_refdst); 300 } 301 302 /** 303 * dst_hold_safe - Take a reference on a dst if possible 304 * @dst: pointer to dst entry 305 * 306 * This helper returns false if it could not safely 307 * take a reference on a dst. 308 */ 309 static inline bool dst_hold_safe(struct dst_entry *dst) 310 { 311 return rcuref_get(&dst->__rcuref); 312 } 313 314 /** 315 * skb_dst_force - makes sure skb dst is refcounted 316 * @skb: buffer 317 * 318 * If dst is not yet refcounted and not destroyed, grab a ref on it. 319 * Returns: true if dst is refcounted. 320 */ 321 static inline bool skb_dst_force(struct sk_buff *skb) 322 { 323 if (skb_dst_is_noref(skb)) { 324 struct dst_entry *dst = skb_dst(skb); 325 326 WARN_ON(!rcu_read_lock_held()); 327 if (!dst_hold_safe(dst)) 328 dst = NULL; 329 330 skb->_skb_refdst = (unsigned long)dst; 331 skb->slow_gro |= !!dst; 332 } 333 334 return skb->_skb_refdst != 0UL; 335 } 336 337 338 /** 339 * __skb_tunnel_rx - prepare skb for rx reinsert 340 * @skb: buffer 341 * @dev: tunnel device 342 * @net: netns for packet i/o 343 * 344 * After decapsulation, packet is going to re-enter (netif_rx()) our stack, 345 * so make some cleanups. (no accounting done) 346 */ 347 static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, 348 struct net *net) 349 { 350 skb->dev = dev; 351 352 /* 353 * Clear hash so that we can recalculate the hash for the 354 * encapsulated packet, unless we have already determine the hash 355 * over the L4 4-tuple. 356 */ 357 skb_clear_hash_if_not_l4(skb); 358 skb_set_queue_mapping(skb, 0); 359 skb_scrub_packet(skb, !net_eq(net, dev_net(dev))); 360 } 361 362 /** 363 * skb_tunnel_rx - prepare skb for rx reinsert 364 * @skb: buffer 365 * @dev: tunnel device 366 * @net: netns for packet i/o 367 * 368 * After decapsulation, packet is going to re-enter (netif_rx()) our stack, 369 * so make some cleanups, and perform accounting. 370 * Note: this accounting is not SMP safe. 371 */ 372 static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, 373 struct net *net) 374 { 375 DEV_STATS_INC(dev, rx_packets); 376 DEV_STATS_ADD(dev, rx_bytes, skb->len); 377 __skb_tunnel_rx(skb, dev, net); 378 } 379 380 static inline u32 dst_tclassid(const struct sk_buff *skb) 381 { 382 #ifdef CONFIG_IP_ROUTE_CLASSID 383 const struct dst_entry *dst; 384 385 dst = skb_dst(skb); 386 if (dst) 387 return dst->tclassid; 388 #endif 389 return 0; 390 } 391 392 int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); 393 static inline int dst_discard(struct sk_buff *skb) 394 { 395 return dst_discard_out(&init_net, skb->sk, skb); 396 } 397 void *dst_alloc(struct dst_ops *ops, struct net_device *dev, 398 int initial_obsolete, unsigned short flags); 399 void dst_init(struct dst_entry *dst, struct dst_ops *ops, 400 struct net_device *dev, int initial_obsolete, 401 unsigned short flags); 402 void dst_dev_put(struct dst_entry *dst); 403 404 static inline void dst_confirm(struct dst_entry *dst) 405 { 406 } 407 408 static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) 409 { 410 struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr); 411 return IS_ERR(n) ? NULL : n; 412 } 413 414 static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, 415 struct sk_buff *skb) 416 { 417 struct neighbour *n; 418 419 if (WARN_ON_ONCE(!dst->ops->neigh_lookup)) 420 return NULL; 421 422 n = dst->ops->neigh_lookup(dst, skb, NULL); 423 424 return IS_ERR(n) ? NULL : n; 425 } 426 427 static inline void dst_confirm_neigh(const struct dst_entry *dst, 428 const void *daddr) 429 { 430 if (dst->ops->confirm_neigh) 431 dst->ops->confirm_neigh(dst, daddr); 432 } 433 434 static inline void dst_link_failure(struct sk_buff *skb) 435 { 436 struct dst_entry *dst = skb_dst(skb); 437 if (dst && dst->ops && dst->ops->link_failure) 438 dst->ops->link_failure(skb); 439 } 440 441 static inline void dst_set_expires(struct dst_entry *dst, int timeout) 442 { 443 unsigned long old, expires = jiffies + timeout; 444 445 if (expires == 0) 446 expires = 1; 447 448 old = READ_ONCE(dst->expires); 449 450 if (!old || time_before(expires, old)) 451 WRITE_ONCE(dst->expires, expires); 452 } 453 454 static inline unsigned int dst_dev_overhead(struct dst_entry *dst, 455 struct sk_buff *skb) 456 { 457 if (likely(dst)) 458 return LL_RESERVED_SPACE(dst->dev); 459 460 return skb->mac_len; 461 } 462 463 INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, 464 struct sk_buff *)); 465 INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, 466 struct sk_buff *)); 467 /* Output packet to network from transport. */ 468 static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) 469 { 470 return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->output), 471 ip6_output, ip_output, 472 net, sk, skb); 473 } 474 475 INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *)); 476 INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *)); 477 /* Input packet from network to transport. */ 478 static inline int dst_input(struct sk_buff *skb) 479 { 480 return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->input), 481 ip6_input, ip_local_deliver, skb); 482 } 483 484 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, 485 u32)); 486 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 487 u32)); 488 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) 489 { 490 if (READ_ONCE(dst->obsolete)) 491 dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, 492 ipv4_dst_check, dst, cookie); 493 return dst; 494 } 495 496 /* Flags for xfrm_lookup flags argument. */ 497 enum { 498 XFRM_LOOKUP_ICMP = 1 << 0, 499 XFRM_LOOKUP_QUEUE = 1 << 1, 500 XFRM_LOOKUP_KEEP_DST_REF = 1 << 2, 501 }; 502 503 struct flowi; 504 #ifndef CONFIG_XFRM 505 static inline struct dst_entry *xfrm_lookup(struct net *net, 506 struct dst_entry *dst_orig, 507 const struct flowi *fl, 508 const struct sock *sk, 509 int flags) 510 { 511 return dst_orig; 512 } 513 514 static inline struct dst_entry * 515 xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig, 516 const struct flowi *fl, const struct sock *sk, 517 int flags, u32 if_id) 518 { 519 return dst_orig; 520 } 521 522 static inline struct dst_entry *xfrm_lookup_route(struct net *net, 523 struct dst_entry *dst_orig, 524 const struct flowi *fl, 525 const struct sock *sk, 526 int flags) 527 { 528 return dst_orig; 529 } 530 531 static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) 532 { 533 return NULL; 534 } 535 536 #else 537 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, 538 const struct flowi *fl, const struct sock *sk, 539 int flags); 540 541 struct dst_entry *xfrm_lookup_with_ifid(struct net *net, 542 struct dst_entry *dst_orig, 543 const struct flowi *fl, 544 const struct sock *sk, int flags, 545 u32 if_id); 546 547 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, 548 const struct flowi *fl, const struct sock *sk, 549 int flags); 550 551 /* skb attached with this dst needs transformation if dst->xfrm is valid */ 552 static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) 553 { 554 return dst->xfrm; 555 } 556 #endif 557 558 static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) 559 { 560 struct dst_entry *dst = skb_dst(skb); 561 562 if (dst && dst->ops->update_pmtu) 563 dst->ops->update_pmtu(dst, NULL, skb, mtu, true); 564 } 565 566 /* update dst pmtu but not do neighbor confirm */ 567 static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) 568 { 569 struct dst_entry *dst = skb_dst(skb); 570 571 if (dst && dst->ops->update_pmtu) 572 dst->ops->update_pmtu(dst, NULL, skb, mtu, false); 573 } 574 575 static inline struct net_device *dst_dev(const struct dst_entry *dst) 576 { 577 return READ_ONCE(dst->dev); 578 } 579 580 static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) 581 { 582 return rcu_dereference(dst->dev_rcu); 583 } 584 585 static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst) 586 { 587 return dev_net_rcu(dst_dev_rcu(dst)); 588 } 589 590 static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) 591 { 592 return dst_dev(skb_dst(skb)); 593 } 594 595 static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb) 596 { 597 return dst_dev_rcu(skb_dst(skb)); 598 } 599 600 static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) 601 { 602 return dev_net(skb_dst_dev(skb)); 603 } 604 605 static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) 606 { 607 return dev_net_rcu(skb_dst_dev_rcu(skb)); 608 } 609 610 struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); 611 void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 612 struct sk_buff *skb, u32 mtu, bool confirm_neigh); 613 void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 614 struct sk_buff *skb); 615 u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old); 616 struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, 617 struct sk_buff *skb, 618 const void *daddr); 619 unsigned int dst_blackhole_mtu(const struct dst_entry *dst); 620 621 #endif /* _NET_DST_H */ 622