1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Generic nexthop implementation 4 * 5 * Copyright (c) 2017-19 Cumulus Networks 6 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 7 */ 8 9 #ifndef __LINUX_NEXTHOP_H 10 #define __LINUX_NEXTHOP_H 11 12 #include <linux/netdevice.h> 13 #include <linux/notifier.h> 14 #include <linux/route.h> 15 #include <linux/types.h> 16 #include <net/ip_fib.h> 17 #include <net/ip6_fib.h> 18 #include <net/netlink.h> 19 20 #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK 21 22 struct nexthop; 23 24 struct nh_config { 25 u32 nh_id; 26 27 u8 nh_family; 28 u8 nh_protocol; 29 u8 nh_blackhole; 30 u8 nh_fdb; 31 u32 nh_flags; 32 33 int nh_ifindex; 34 struct net_device *dev; 35 36 union { 37 __be32 ipv4; 38 struct in6_addr ipv6; 39 } gw; 40 41 struct nlattr *nh_grp; 42 u16 nh_grp_type; 43 u16 nh_grp_res_num_buckets; 44 unsigned long nh_grp_res_idle_timer; 45 unsigned long nh_grp_res_unbalanced_timer; 46 bool nh_grp_res_has_num_buckets; 47 bool nh_grp_res_has_idle_timer; 48 bool nh_grp_res_has_unbalanced_timer; 49 50 bool nh_hw_stats; 51 52 struct nlattr *nh_encap; 53 u16 nh_encap_type; 54 55 u32 nlflags; 56 struct nl_info nlinfo; 57 }; 58 59 struct nh_info { 60 struct hlist_node dev_hash; /* entry on netns devhash */ 61 struct nexthop *nh_parent; 62 63 u8 family; 64 bool reject_nh; 65 bool fdb_nh; 66 67 union { 68 struct fib_nh_common fib_nhc; 69 struct fib_nh fib_nh; 70 struct fib6_nh fib6_nh; 71 }; 72 }; 73 74 struct nh_res_bucket { 75 struct nh_grp_entry __rcu *nh_entry; 76 atomic_long_t used_time; 77 unsigned long migrated_time; 78 bool occupied; 79 u8 nh_flags; 80 }; 81 82 struct nh_res_table { 83 struct net *net; 84 u32 nhg_id; 85 struct delayed_work upkeep_dw; 86 87 /* List of NHGEs that have too few buckets ("uw" for underweight). 88 * Reclaimed buckets will be given to entries in this list. 89 */ 90 struct list_head uw_nh_entries; 91 unsigned long unbalanced_since; 92 93 u32 idle_timer; 94 u32 unbalanced_timer; 95 96 u16 num_nh_buckets; 97 struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); 98 }; 99 100 struct nh_grp_entry_stats { 101 u64_stats_t packets; 102 struct u64_stats_sync syncp; 103 }; 104 105 struct nh_grp_entry { 106 struct nexthop *nh; 107 struct nh_grp_entry_stats __percpu *stats; 108 u8 weight; 109 110 union { 111 struct { 112 atomic_t upper_bound; 113 } hthr; 114 struct { 115 /* Member on uw_nh_entries. */ 116 struct list_head uw_nh_entry; 117 118 u16 count_buckets; 119 u16 wants_buckets; 120 } res; 121 }; 122 123 struct list_head nh_list; 124 struct nexthop *nh_parent; /* nexthop of group with this entry */ 125 u64 packets_hw; 126 }; 127 128 struct nh_group { 129 struct nh_group *spare; /* spare group for removals */ 130 u16 num_nh; 131 bool is_multipath; 132 bool hash_threshold; 133 bool resilient; 134 bool fdb_nh; 135 bool has_v4; 136 bool hw_stats; 137 138 struct nh_res_table __rcu *res_table; 139 struct nh_grp_entry nh_entries[] __counted_by(num_nh); 140 }; 141 142 struct nexthop { 143 struct rb_node rb_node; /* entry on netns rbtree */ 144 struct list_head fi_list; /* v4 entries using nh */ 145 struct list_head f6i_list; /* v6 entries using nh */ 146 struct list_head fdb_list; /* fdb entries using this nh */ 147 struct list_head grp_list; /* nh group entries using this nh */ 148 struct net *net; 149 150 u32 id; 151 152 u8 protocol; /* app managing this nh */ 153 u8 nh_flags; 154 bool is_group; 155 156 refcount_t refcnt; 157 struct rcu_head rcu; 158 159 union { 160 struct nh_info __rcu *nh_info; 161 struct nh_group __rcu *nh_grp; 162 }; 163 }; 164 165 enum nexthop_event_type { 166 NEXTHOP_EVENT_DEL, 167 NEXTHOP_EVENT_REPLACE, 168 NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, 169 NEXTHOP_EVENT_BUCKET_REPLACE, 170 NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, 171 }; 172 173 enum nh_notifier_info_type { 174 NH_NOTIFIER_INFO_TYPE_SINGLE, 175 NH_NOTIFIER_INFO_TYPE_GRP, 176 NH_NOTIFIER_INFO_TYPE_RES_TABLE, 177 NH_NOTIFIER_INFO_TYPE_RES_BUCKET, 178 NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS, 179 }; 180 181 struct nh_notifier_single_info { 182 struct net_device *dev; 183 u8 gw_family; 184 union { 185 __be32 ipv4; 186 struct in6_addr ipv6; 187 }; 188 u32 id; 189 u8 is_reject:1, 190 is_fdb:1, 191 has_encap:1; 192 }; 193 194 struct nh_notifier_grp_entry_info { 195 u8 weight; 196 struct nh_notifier_single_info nh; 197 }; 198 199 struct nh_notifier_grp_info { 200 u16 num_nh; 201 bool is_fdb; 202 bool hw_stats; 203 struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); 204 }; 205 206 struct nh_notifier_res_bucket_info { 207 u16 bucket_index; 208 unsigned int idle_timer_ms; 209 bool force; 210 struct nh_notifier_single_info old_nh; 211 struct nh_notifier_single_info new_nh; 212 }; 213 214 struct nh_notifier_res_table_info { 215 u16 num_nh_buckets; 216 bool hw_stats; 217 struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); 218 }; 219 220 struct nh_notifier_grp_hw_stats_entry_info { 221 u32 id; 222 u64 packets; 223 }; 224 225 struct nh_notifier_grp_hw_stats_info { 226 u16 num_nh; 227 bool hw_stats_used; 228 struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh); 229 }; 230 231 struct nh_notifier_info { 232 struct net *net; 233 struct netlink_ext_ack *extack; 234 u32 id; 235 enum nh_notifier_info_type type; 236 union { 237 struct nh_notifier_single_info *nh; 238 struct nh_notifier_grp_info *nh_grp; 239 struct nh_notifier_res_table_info *nh_res_table; 240 struct nh_notifier_res_bucket_info *nh_res_bucket; 241 struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats; 242 }; 243 }; 244 245 int register_nexthop_notifier(struct net *net, struct notifier_block *nb, 246 struct netlink_ext_ack *extack); 247 int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 248 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 249 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); 250 void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, 251 bool offload, bool trap); 252 void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, 253 unsigned long *activity); 254 void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, 255 unsigned int nh_idx, 256 u64 delta_packets); 257 258 /* caller is holding rcu or rtnl; no reference taken to nexthop */ 259 struct nexthop *nexthop_find_by_id(struct net *net, u32 id); 260 void nexthop_free_rcu(struct rcu_head *head); 261 262 static inline bool nexthop_get(struct nexthop *nh) 263 { 264 return refcount_inc_not_zero(&nh->refcnt); 265 } 266 267 static inline void nexthop_put(struct nexthop *nh) 268 { 269 if (refcount_dec_and_test(&nh->refcnt)) 270 call_rcu(&nh->rcu, nexthop_free_rcu); 271 } 272 273 static inline bool nexthop_cmp(const struct nexthop *nh1, 274 const struct nexthop *nh2) 275 { 276 return nh1 == nh2; 277 } 278 279 static inline bool nexthop_is_fdb(const struct nexthop *nh) 280 { 281 if (nh->is_group) { 282 const struct nh_group *nh_grp; 283 284 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 285 return nh_grp->fdb_nh; 286 } else { 287 const struct nh_info *nhi; 288 289 nhi = rcu_dereference_rtnl(nh->nh_info); 290 return nhi->fdb_nh; 291 } 292 } 293 294 static inline bool nexthop_has_v4(const struct nexthop *nh) 295 { 296 if (nh->is_group) { 297 struct nh_group *nh_grp; 298 299 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 300 return nh_grp->has_v4; 301 } 302 return false; 303 } 304 305 static inline bool nexthop_is_multipath(const struct nexthop *nh) 306 { 307 if (nh->is_group) { 308 struct nh_group *nh_grp; 309 310 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 311 return nh_grp->is_multipath; 312 } 313 return false; 314 } 315 316 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); 317 318 static inline unsigned int nexthop_num_path(const struct nexthop *nh) 319 { 320 unsigned int rc = 1; 321 322 if (nh->is_group) { 323 struct nh_group *nh_grp; 324 325 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 326 if (nh_grp->is_multipath) 327 rc = nh_grp->num_nh; 328 } 329 330 return rc; 331 } 332 333 static inline 334 struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) 335 { 336 /* for_nexthops macros in fib_semantics.c grabs a pointer to 337 * the nexthop before checking nhsel 338 */ 339 if (nhsel >= nhg->num_nh) 340 return NULL; 341 342 return nhg->nh_entries[nhsel].nh; 343 } 344 345 static inline 346 int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, 347 u8 rt_family) 348 { 349 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); 350 int i; 351 352 for (i = 0; i < nhg->num_nh; i++) { 353 struct nexthop *nhe = nhg->nh_entries[i].nh; 354 struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); 355 struct fib_nh_common *nhc = &nhi->fib_nhc; 356 int weight = nhg->nh_entries[i].weight; 357 358 if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) 359 return -EMSGSIZE; 360 } 361 362 return 0; 363 } 364 365 /* called with rcu lock */ 366 static inline bool nexthop_is_blackhole(const struct nexthop *nh) 367 { 368 const struct nh_info *nhi; 369 370 if (nh->is_group) { 371 struct nh_group *nh_grp; 372 373 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 374 if (nh_grp->num_nh > 1) 375 return false; 376 377 nh = nh_grp->nh_entries[0].nh; 378 } 379 380 nhi = rcu_dereference_rtnl(nh->nh_info); 381 return nhi->reject_nh; 382 } 383 384 static inline void nexthop_path_fib_result(struct fib_result *res, int hash) 385 { 386 struct nh_info *nhi; 387 struct nexthop *nh; 388 389 nh = nexthop_select_path(res->fi->nh, hash); 390 nhi = rcu_dereference(nh->nh_info); 391 res->nhc = &nhi->fib_nhc; 392 } 393 394 /* called with rcu read lock or rtnl held */ 395 static inline 396 struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) 397 { 398 struct nh_info *nhi; 399 400 BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); 401 BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); 402 403 if (nh->is_group) { 404 struct nh_group *nh_grp; 405 406 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 407 if (nh_grp->is_multipath) { 408 nh = nexthop_mpath_select(nh_grp, nhsel); 409 if (!nh) 410 return NULL; 411 } 412 } 413 414 nhi = rcu_dereference_rtnl(nh->nh_info); 415 return &nhi->fib_nhc; 416 } 417 418 /* called from fib_table_lookup with rcu_lock */ 419 static inline 420 struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, 421 int fib_flags, 422 const struct flowi4 *flp, 423 int *nhsel) 424 { 425 struct nh_info *nhi; 426 427 if (nh->is_group) { 428 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 429 int i; 430 431 for (i = 0; i < nhg->num_nh; i++) { 432 struct nexthop *nhe = nhg->nh_entries[i].nh; 433 434 nhi = rcu_dereference(nhe->nh_info); 435 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 436 *nhsel = i; 437 return &nhi->fib_nhc; 438 } 439 } 440 } else { 441 nhi = rcu_dereference(nh->nh_info); 442 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 443 *nhsel = 0; 444 return &nhi->fib_nhc; 445 } 446 } 447 448 return NULL; 449 } 450 451 static inline bool nexthop_uses_dev(const struct nexthop *nh, 452 const struct net_device *dev) 453 { 454 struct nh_info *nhi; 455 456 if (nh->is_group) { 457 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 458 int i; 459 460 for (i = 0; i < nhg->num_nh; i++) { 461 struct nexthop *nhe = nhg->nh_entries[i].nh; 462 463 nhi = rcu_dereference(nhe->nh_info); 464 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 465 return true; 466 } 467 } else { 468 nhi = rcu_dereference(nh->nh_info); 469 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 470 return true; 471 } 472 473 return false; 474 } 475 476 static inline unsigned int fib_info_num_path(const struct fib_info *fi) 477 { 478 if (unlikely(fi->nh)) 479 return nexthop_num_path(fi->nh); 480 481 return fi->fib_nhs; 482 } 483 484 int fib_check_nexthop(struct nexthop *nh, u8 scope, 485 struct netlink_ext_ack *extack); 486 487 static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) 488 { 489 if (unlikely(fi->nh)) 490 return nexthop_fib_nhc(fi->nh, nhsel); 491 492 return &fi->fib_nh[nhsel].nh_common; 493 } 494 495 /* only used when fib_nh is built into fib_info */ 496 static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) 497 { 498 WARN_ON(fi->nh); 499 500 return &fi->fib_nh[nhsel]; 501 } 502 503 /* 504 * IPv6 variants 505 */ 506 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 507 struct netlink_ext_ack *extack); 508 509 /* Caller should either hold rcu_read_lock(), or RTNL. */ 510 static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) 511 { 512 struct nh_info *nhi; 513 514 if (nh->is_group) { 515 struct nh_group *nh_grp; 516 517 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 518 nh = nexthop_mpath_select(nh_grp, 0); 519 if (!nh) 520 return NULL; 521 } 522 523 nhi = rcu_dereference_rtnl(nh->nh_info); 524 if (nhi->family == AF_INET6) 525 return &nhi->fib6_nh; 526 527 return NULL; 528 } 529 530 static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) 531 { 532 struct fib6_nh *fib6_nh; 533 534 fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; 535 return fib6_nh->fib_nh_dev; 536 } 537 538 static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) 539 { 540 struct nexthop *nh = res->f6i->nh; 541 struct nh_info *nhi; 542 543 nh = nexthop_select_path(nh, hash); 544 545 nhi = rcu_dereference_rtnl(nh->nh_info); 546 if (nhi->reject_nh) { 547 res->fib6_type = RTN_BLACKHOLE; 548 res->fib6_flags |= RTF_REJECT; 549 res->nh = nexthop_fib6_nh(nh); 550 } else { 551 res->nh = &nhi->fib6_nh; 552 } 553 } 554 555 int nexthop_for_each_fib6_nh(struct nexthop *nh, 556 int (*cb)(struct fib6_nh *nh, void *arg), 557 void *arg); 558 559 static inline int nexthop_get_family(struct nexthop *nh) 560 { 561 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 562 563 return nhi->family; 564 } 565 566 static inline 567 struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) 568 { 569 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 570 571 return &nhi->fib_nhc; 572 } 573 574 static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, 575 int hash) 576 { 577 struct nh_info *nhi; 578 struct nexthop *nhp; 579 580 nhp = nexthop_select_path(nh, hash); 581 if (unlikely(!nhp)) 582 return NULL; 583 nhi = rcu_dereference(nhp->nh_info); 584 return &nhi->fib_nhc; 585 } 586 #endif 587