1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Generic nexthop implementation 4 * 5 * Copyright (c) 2017-19 Cumulus Networks 6 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 7 */ 8 9 #ifndef __LINUX_NEXTHOP_H 10 #define __LINUX_NEXTHOP_H 11 12 #include <linux/netdevice.h> 13 #include <linux/notifier.h> 14 #include <linux/route.h> 15 #include <linux/types.h> 16 #include <net/ip_fib.h> 17 #include <net/ip6_fib.h> 18 #include <net/netlink.h> 19 20 #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK 21 22 struct nexthop; 23 24 struct nh_config { 25 u32 nh_id; 26 27 u8 nh_family; 28 u8 nh_protocol; 29 u8 nh_blackhole; 30 u8 nh_fdb; 31 u32 nh_flags; 32 33 int nh_ifindex; 34 struct net_device *dev; 35 36 union { 37 __be32 ipv4; 38 struct in6_addr ipv6; 39 } gw; 40 41 struct nlattr *nh_grp; 42 u16 nh_grp_type; 43 u16 nh_grp_res_num_buckets; 44 unsigned long nh_grp_res_idle_timer; 45 unsigned long nh_grp_res_unbalanced_timer; 46 bool nh_grp_res_has_num_buckets; 47 bool nh_grp_res_has_idle_timer; 48 bool nh_grp_res_has_unbalanced_timer; 49 50 bool nh_hw_stats; 51 52 struct nlattr *nh_encap; 53 u16 nh_encap_type; 54 55 u32 nlflags; 56 struct nl_info nlinfo; 57 }; 58 59 struct nh_info { 60 struct hlist_node dev_hash; /* entry on netns devhash */ 61 struct nexthop *nh_parent; 62 63 u8 family; 64 bool reject_nh; 65 bool fdb_nh; 66 67 union { 68 struct fib_nh_common fib_nhc; 69 struct fib_nh fib_nh; 70 struct fib6_nh fib6_nh; 71 }; 72 }; 73 74 struct nh_res_bucket { 75 struct nh_grp_entry __rcu *nh_entry; 76 atomic_long_t used_time; 77 unsigned long migrated_time; 78 bool occupied; 79 u8 nh_flags; 80 }; 81 82 struct nh_res_table { 83 struct net *net; 84 u32 nhg_id; 85 struct delayed_work upkeep_dw; 86 87 /* List of NHGEs that have too few buckets ("uw" for underweight). 88 * Reclaimed buckets will be given to entries in this list. 89 */ 90 struct list_head uw_nh_entries; 91 unsigned long unbalanced_since; 92 93 u32 idle_timer; 94 u32 unbalanced_timer; 95 96 u16 num_nh_buckets; 97 struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); 98 }; 99 100 struct nh_grp_entry_stats { 101 u64_stats_t packets; 102 struct u64_stats_sync syncp; 103 }; 104 105 struct nh_grp_entry { 106 struct nexthop *nh; 107 struct nh_grp_entry_stats __percpu *stats; 108 u16 weight; 109 110 union { 111 struct { 112 atomic_t upper_bound; 113 } hthr; 114 struct { 115 /* Member on uw_nh_entries. */ 116 struct list_head uw_nh_entry; 117 118 u16 count_buckets; 119 u16 wants_buckets; 120 } res; 121 }; 122 123 struct list_head nh_list; 124 struct nexthop *nh_parent; /* nexthop of group with this entry */ 125 u64 packets_hw; 126 }; 127 128 struct nh_group { 129 struct nh_group *spare; /* spare group for removals */ 130 u16 num_nh; 131 bool is_multipath; 132 bool hash_threshold; 133 bool resilient; 134 bool fdb_nh; 135 bool has_v4; 136 bool hw_stats; 137 138 struct nh_res_table __rcu *res_table; 139 struct nh_grp_entry nh_entries[] __counted_by(num_nh); 140 }; 141 142 struct nexthop { 143 struct rb_node rb_node; /* entry on netns rbtree */ 144 struct list_head fi_list; /* v4 entries using nh */ 145 struct list_head f6i_list; /* v6 entries using nh */ 146 struct list_head fdb_list; /* fdb entries using this nh */ 147 struct list_head grp_list; /* nh group entries using this nh */ 148 struct net *net; 149 150 u32 id; 151 152 u8 protocol; /* app managing this nh */ 153 u8 nh_flags; 154 bool is_group; 155 bool dead; 156 spinlock_t lock; /* protect dead and f6i_list */ 157 158 refcount_t refcnt; 159 struct rcu_head rcu; 160 161 union { 162 struct nh_info __rcu *nh_info; 163 struct nh_group __rcu *nh_grp; 164 }; 165 }; 166 167 enum nexthop_event_type { 168 NEXTHOP_EVENT_DEL, 169 NEXTHOP_EVENT_REPLACE, 170 NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, 171 NEXTHOP_EVENT_BUCKET_REPLACE, 172 NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, 173 }; 174 175 enum nh_notifier_info_type { 176 NH_NOTIFIER_INFO_TYPE_SINGLE, 177 NH_NOTIFIER_INFO_TYPE_GRP, 178 NH_NOTIFIER_INFO_TYPE_RES_TABLE, 179 NH_NOTIFIER_INFO_TYPE_RES_BUCKET, 180 NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS, 181 }; 182 183 struct nh_notifier_single_info { 184 struct net_device *dev; 185 u8 gw_family; 186 union { 187 __be32 ipv4; 188 struct in6_addr ipv6; 189 }; 190 u32 id; 191 u8 is_reject:1, 192 is_fdb:1, 193 has_encap:1; 194 }; 195 196 struct nh_notifier_grp_entry_info { 197 u16 weight; 198 struct nh_notifier_single_info nh; 199 }; 200 201 struct nh_notifier_grp_info { 202 u16 num_nh; 203 bool is_fdb; 204 bool hw_stats; 205 struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); 206 }; 207 208 struct nh_notifier_res_bucket_info { 209 u16 bucket_index; 210 unsigned int idle_timer_ms; 211 bool force; 212 struct nh_notifier_single_info old_nh; 213 struct nh_notifier_single_info new_nh; 214 }; 215 216 struct nh_notifier_res_table_info { 217 u16 num_nh_buckets; 218 bool hw_stats; 219 struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); 220 }; 221 222 struct nh_notifier_grp_hw_stats_entry_info { 223 u32 id; 224 u64 packets; 225 }; 226 227 struct nh_notifier_grp_hw_stats_info { 228 u16 num_nh; 229 bool hw_stats_used; 230 struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh); 231 }; 232 233 struct nh_notifier_info { 234 struct net *net; 235 struct netlink_ext_ack *extack; 236 u32 id; 237 enum nh_notifier_info_type type; 238 union { 239 struct nh_notifier_single_info *nh; 240 struct nh_notifier_grp_info *nh_grp; 241 struct nh_notifier_res_table_info *nh_res_table; 242 struct nh_notifier_res_bucket_info *nh_res_bucket; 243 struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats; 244 }; 245 }; 246 247 int register_nexthop_notifier(struct net *net, struct notifier_block *nb, 248 struct netlink_ext_ack *extack); 249 int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 250 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 251 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); 252 void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, 253 bool offload, bool trap); 254 void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, 255 unsigned long *activity); 256 void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, 257 unsigned int nh_idx, 258 u64 delta_packets); 259 260 /* caller is holding rcu or rtnl; no reference taken to nexthop */ 261 struct nexthop *nexthop_find_by_id(struct net *net, u32 id); 262 void nexthop_free_rcu(struct rcu_head *head); 263 264 static inline bool nexthop_get(struct nexthop *nh) 265 { 266 return refcount_inc_not_zero(&nh->refcnt); 267 } 268 269 static inline void nexthop_put(struct nexthop *nh) 270 { 271 if (refcount_dec_and_test(&nh->refcnt)) 272 call_rcu_hurry(&nh->rcu, nexthop_free_rcu); 273 } 274 275 static inline bool nexthop_cmp(const struct nexthop *nh1, 276 const struct nexthop *nh2) 277 { 278 return nh1 == nh2; 279 } 280 281 static inline bool nexthop_is_fdb(const struct nexthop *nh) 282 { 283 if (nh->is_group) { 284 const struct nh_group *nh_grp; 285 286 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 287 return nh_grp->fdb_nh; 288 } else { 289 const struct nh_info *nhi; 290 291 nhi = rcu_dereference_rtnl(nh->nh_info); 292 return nhi->fdb_nh; 293 } 294 } 295 296 static inline bool nexthop_has_v4(const struct nexthop *nh) 297 { 298 if (nh->is_group) { 299 struct nh_group *nh_grp; 300 301 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 302 return nh_grp->has_v4; 303 } 304 return false; 305 } 306 307 static inline bool nexthop_is_multipath(const struct nexthop *nh) 308 { 309 if (nh->is_group) { 310 struct nh_group *nh_grp; 311 312 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 313 return nh_grp->is_multipath; 314 } 315 return false; 316 } 317 318 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); 319 320 static inline unsigned int nexthop_num_path(const struct nexthop *nh) 321 { 322 unsigned int rc = 1; 323 324 if (nh->is_group) { 325 struct nh_group *nh_grp; 326 327 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 328 if (nh_grp->is_multipath) 329 rc = nh_grp->num_nh; 330 } 331 332 return rc; 333 } 334 335 static inline 336 struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) 337 { 338 /* for_nexthops macros in fib_semantics.c grabs a pointer to 339 * the nexthop before checking nhsel 340 */ 341 if (nhsel >= nhg->num_nh) 342 return NULL; 343 344 return nhg->nh_entries[nhsel].nh; 345 } 346 347 static inline 348 int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, 349 u8 rt_family) 350 { 351 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); 352 int i; 353 354 for (i = 0; i < nhg->num_nh; i++) { 355 struct nexthop *nhe = nhg->nh_entries[i].nh; 356 struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); 357 struct fib_nh_common *nhc = &nhi->fib_nhc; 358 int weight = nhg->nh_entries[i].weight; 359 360 if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) 361 return -EMSGSIZE; 362 } 363 364 return 0; 365 } 366 367 /* called with rcu lock */ 368 static inline bool nexthop_is_blackhole(const struct nexthop *nh) 369 { 370 const struct nh_info *nhi; 371 372 if (nh->is_group) { 373 struct nh_group *nh_grp; 374 375 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 376 if (nh_grp->num_nh > 1) 377 return false; 378 379 nh = nh_grp->nh_entries[0].nh; 380 } 381 382 nhi = rcu_dereference_rtnl(nh->nh_info); 383 return nhi->reject_nh; 384 } 385 386 static inline void nexthop_path_fib_result(struct fib_result *res, int hash) 387 { 388 struct nh_info *nhi; 389 struct nexthop *nh; 390 391 nh = nexthop_select_path(res->fi->nh, hash); 392 nhi = rcu_dereference(nh->nh_info); 393 res->nhc = &nhi->fib_nhc; 394 } 395 396 /* called with rcu read lock or rtnl held */ 397 static inline 398 struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) 399 { 400 struct nh_info *nhi; 401 402 BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); 403 BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); 404 405 if (nh->is_group) { 406 struct nh_group *nh_grp; 407 408 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 409 if (nh_grp->is_multipath) { 410 nh = nexthop_mpath_select(nh_grp, nhsel); 411 if (!nh) 412 return NULL; 413 } 414 } 415 416 nhi = rcu_dereference_rtnl(nh->nh_info); 417 return &nhi->fib_nhc; 418 } 419 420 /* called from fib_table_lookup with rcu_lock */ 421 static inline 422 struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, 423 int fib_flags, 424 const struct flowi4 *flp, 425 int *nhsel) 426 { 427 struct nh_info *nhi; 428 429 if (nh->is_group) { 430 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 431 int i; 432 433 for (i = 0; i < nhg->num_nh; i++) { 434 struct nexthop *nhe = nhg->nh_entries[i].nh; 435 436 nhi = rcu_dereference(nhe->nh_info); 437 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 438 *nhsel = i; 439 return &nhi->fib_nhc; 440 } 441 } 442 } else { 443 nhi = rcu_dereference(nh->nh_info); 444 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 445 *nhsel = 0; 446 return &nhi->fib_nhc; 447 } 448 } 449 450 return NULL; 451 } 452 453 static inline bool nexthop_uses_dev(const struct nexthop *nh, 454 const struct net_device *dev) 455 { 456 struct nh_info *nhi; 457 458 if (nh->is_group) { 459 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 460 int i; 461 462 for (i = 0; i < nhg->num_nh; i++) { 463 struct nexthop *nhe = nhg->nh_entries[i].nh; 464 465 nhi = rcu_dereference(nhe->nh_info); 466 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 467 return true; 468 } 469 } else { 470 nhi = rcu_dereference(nh->nh_info); 471 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 472 return true; 473 } 474 475 return false; 476 } 477 478 static inline unsigned int fib_info_num_path(const struct fib_info *fi) 479 { 480 if (unlikely(fi->nh)) 481 return nexthop_num_path(fi->nh); 482 483 return fi->fib_nhs; 484 } 485 486 int fib_check_nexthop(struct nexthop *nh, u8 scope, 487 struct netlink_ext_ack *extack); 488 489 static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) 490 { 491 if (unlikely(fi->nh)) 492 return nexthop_fib_nhc(fi->nh, nhsel); 493 494 return &fi->fib_nh[nhsel].nh_common; 495 } 496 497 /* only used when fib_nh is built into fib_info */ 498 static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) 499 { 500 WARN_ON(fi->nh); 501 502 return &fi->fib_nh[nhsel]; 503 } 504 505 /* 506 * IPv6 variants 507 */ 508 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 509 struct netlink_ext_ack *extack); 510 511 /* Caller should either hold rcu_read_lock(), or RTNL. */ 512 static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) 513 { 514 struct nh_info *nhi; 515 516 if (nh->is_group) { 517 struct nh_group *nh_grp; 518 519 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 520 nh = nexthop_mpath_select(nh_grp, 0); 521 if (!nh) 522 return NULL; 523 } 524 525 nhi = rcu_dereference_rtnl(nh->nh_info); 526 if (nhi->family == AF_INET6) 527 return &nhi->fib6_nh; 528 529 return NULL; 530 } 531 532 static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) 533 { 534 struct fib6_nh *fib6_nh; 535 536 fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; 537 return fib6_nh->fib_nh_dev; 538 } 539 540 static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) 541 { 542 struct nexthop *nh = res->f6i->nh; 543 struct nh_info *nhi; 544 545 nh = nexthop_select_path(nh, hash); 546 547 nhi = rcu_dereference_rtnl(nh->nh_info); 548 if (nhi->reject_nh) { 549 res->fib6_type = RTN_BLACKHOLE; 550 res->fib6_flags |= RTF_REJECT; 551 res->nh = nexthop_fib6_nh(nh); 552 } else { 553 res->nh = &nhi->fib6_nh; 554 } 555 } 556 557 int nexthop_for_each_fib6_nh(struct nexthop *nh, 558 int (*cb)(struct fib6_nh *nh, void *arg), 559 void *arg); 560 561 static inline int nexthop_get_family(struct nexthop *nh) 562 { 563 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 564 565 return nhi->family; 566 } 567 568 static inline 569 struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) 570 { 571 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 572 573 return &nhi->fib_nhc; 574 } 575 576 static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, 577 int hash) 578 { 579 struct nh_info *nhi; 580 struct nexthop *nhp; 581 582 nhp = nexthop_select_path(nh, hash); 583 if (unlikely(!nhp)) 584 return NULL; 585 nhi = rcu_dereference(nhp->nh_info); 586 return &nhi->fib_nhc; 587 } 588 #endif 589