1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Generic nexthop implementation 4 * 5 * Copyright (c) 2017-19 Cumulus Networks 6 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 7 */ 8 9 #ifndef __LINUX_NEXTHOP_H 10 #define __LINUX_NEXTHOP_H 11 12 #include <linux/netdevice.h> 13 #include <linux/notifier.h> 14 #include <linux/route.h> 15 #include <linux/types.h> 16 #include <net/ip_fib.h> 17 #include <net/ip6_fib.h> 18 #include <net/netlink.h> 19 20 #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK 21 22 struct nexthop; 23 24 struct nh_config { 25 u32 nh_id; 26 27 u8 nh_family; 28 u8 nh_protocol; 29 u8 nh_blackhole; 30 u8 nh_fdb; 31 u32 nh_flags; 32 33 int nh_ifindex; 34 struct net_device *dev; 35 36 union { 37 __be32 ipv4; 38 struct in6_addr ipv6; 39 } gw; 40 41 struct nlattr *nh_grp; 42 u16 nh_grp_type; 43 u16 nh_grp_res_num_buckets; 44 unsigned long nh_grp_res_idle_timer; 45 unsigned long nh_grp_res_unbalanced_timer; 46 bool nh_grp_res_has_num_buckets; 47 bool nh_grp_res_has_idle_timer; 48 bool nh_grp_res_has_unbalanced_timer; 49 50 struct nlattr *nh_encap; 51 u16 nh_encap_type; 52 53 u32 nlflags; 54 struct nl_info nlinfo; 55 }; 56 57 struct nh_info { 58 struct hlist_node dev_hash; /* entry on netns devhash */ 59 struct nexthop *nh_parent; 60 61 u8 family; 62 bool reject_nh; 63 bool fdb_nh; 64 65 union { 66 struct fib_nh_common fib_nhc; 67 struct fib_nh fib_nh; 68 struct fib6_nh fib6_nh; 69 }; 70 }; 71 72 struct nh_res_bucket { 73 struct nh_grp_entry __rcu *nh_entry; 74 atomic_long_t used_time; 75 unsigned long migrated_time; 76 bool occupied; 77 u8 nh_flags; 78 }; 79 80 struct nh_res_table { 81 struct net *net; 82 u32 nhg_id; 83 struct delayed_work upkeep_dw; 84 85 /* List of NHGEs that have too few buckets ("uw" for underweight). 86 * Reclaimed buckets will be given to entries in this list. 87 */ 88 struct list_head uw_nh_entries; 89 unsigned long unbalanced_since; 90 91 u32 idle_timer; 92 u32 unbalanced_timer; 93 94 u16 num_nh_buckets; 95 struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); 96 }; 97 98 struct nh_grp_entry { 99 struct nexthop *nh; 100 u8 weight; 101 102 union { 103 struct { 104 atomic_t upper_bound; 105 } hthr; 106 struct { 107 /* Member on uw_nh_entries. */ 108 struct list_head uw_nh_entry; 109 110 u16 count_buckets; 111 u16 wants_buckets; 112 } res; 113 }; 114 115 struct list_head nh_list; 116 struct nexthop *nh_parent; /* nexthop of group with this entry */ 117 }; 118 119 struct nh_group { 120 struct nh_group *spare; /* spare group for removals */ 121 u16 num_nh; 122 bool is_multipath; 123 bool hash_threshold; 124 bool resilient; 125 bool fdb_nh; 126 bool has_v4; 127 128 struct nh_res_table __rcu *res_table; 129 struct nh_grp_entry nh_entries[] __counted_by(num_nh); 130 }; 131 132 struct nexthop { 133 struct rb_node rb_node; /* entry on netns rbtree */ 134 struct list_head fi_list; /* v4 entries using nh */ 135 struct list_head f6i_list; /* v6 entries using nh */ 136 struct list_head fdb_list; /* fdb entries using this nh */ 137 struct list_head grp_list; /* nh group entries using this nh */ 138 struct net *net; 139 140 u32 id; 141 142 u8 protocol; /* app managing this nh */ 143 u8 nh_flags; 144 bool is_group; 145 146 refcount_t refcnt; 147 struct rcu_head rcu; 148 149 union { 150 struct nh_info __rcu *nh_info; 151 struct nh_group __rcu *nh_grp; 152 }; 153 }; 154 155 enum nexthop_event_type { 156 NEXTHOP_EVENT_DEL, 157 NEXTHOP_EVENT_REPLACE, 158 NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, 159 NEXTHOP_EVENT_BUCKET_REPLACE, 160 }; 161 162 enum nh_notifier_info_type { 163 NH_NOTIFIER_INFO_TYPE_SINGLE, 164 NH_NOTIFIER_INFO_TYPE_GRP, 165 NH_NOTIFIER_INFO_TYPE_RES_TABLE, 166 NH_NOTIFIER_INFO_TYPE_RES_BUCKET, 167 }; 168 169 struct nh_notifier_single_info { 170 struct net_device *dev; 171 u8 gw_family; 172 union { 173 __be32 ipv4; 174 struct in6_addr ipv6; 175 }; 176 u8 is_reject:1, 177 is_fdb:1, 178 has_encap:1; 179 }; 180 181 struct nh_notifier_grp_entry_info { 182 u8 weight; 183 u32 id; 184 struct nh_notifier_single_info nh; 185 }; 186 187 struct nh_notifier_grp_info { 188 u16 num_nh; 189 bool is_fdb; 190 struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); 191 }; 192 193 struct nh_notifier_res_bucket_info { 194 u16 bucket_index; 195 unsigned int idle_timer_ms; 196 bool force; 197 struct nh_notifier_single_info old_nh; 198 struct nh_notifier_single_info new_nh; 199 }; 200 201 struct nh_notifier_res_table_info { 202 u16 num_nh_buckets; 203 struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); 204 }; 205 206 struct nh_notifier_info { 207 struct net *net; 208 struct netlink_ext_ack *extack; 209 u32 id; 210 enum nh_notifier_info_type type; 211 union { 212 struct nh_notifier_single_info *nh; 213 struct nh_notifier_grp_info *nh_grp; 214 struct nh_notifier_res_table_info *nh_res_table; 215 struct nh_notifier_res_bucket_info *nh_res_bucket; 216 }; 217 }; 218 219 int register_nexthop_notifier(struct net *net, struct notifier_block *nb, 220 struct netlink_ext_ack *extack); 221 int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 222 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 223 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); 224 void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, 225 bool offload, bool trap); 226 void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, 227 unsigned long *activity); 228 229 /* caller is holding rcu or rtnl; no reference taken to nexthop */ 230 struct nexthop *nexthop_find_by_id(struct net *net, u32 id); 231 void nexthop_free_rcu(struct rcu_head *head); 232 233 static inline bool nexthop_get(struct nexthop *nh) 234 { 235 return refcount_inc_not_zero(&nh->refcnt); 236 } 237 238 static inline void nexthop_put(struct nexthop *nh) 239 { 240 if (refcount_dec_and_test(&nh->refcnt)) 241 call_rcu(&nh->rcu, nexthop_free_rcu); 242 } 243 244 static inline bool nexthop_cmp(const struct nexthop *nh1, 245 const struct nexthop *nh2) 246 { 247 return nh1 == nh2; 248 } 249 250 static inline bool nexthop_is_fdb(const struct nexthop *nh) 251 { 252 if (nh->is_group) { 253 const struct nh_group *nh_grp; 254 255 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 256 return nh_grp->fdb_nh; 257 } else { 258 const struct nh_info *nhi; 259 260 nhi = rcu_dereference_rtnl(nh->nh_info); 261 return nhi->fdb_nh; 262 } 263 } 264 265 static inline bool nexthop_has_v4(const struct nexthop *nh) 266 { 267 if (nh->is_group) { 268 struct nh_group *nh_grp; 269 270 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 271 return nh_grp->has_v4; 272 } 273 return false; 274 } 275 276 static inline bool nexthop_is_multipath(const struct nexthop *nh) 277 { 278 if (nh->is_group) { 279 struct nh_group *nh_grp; 280 281 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 282 return nh_grp->is_multipath; 283 } 284 return false; 285 } 286 287 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); 288 289 static inline unsigned int nexthop_num_path(const struct nexthop *nh) 290 { 291 unsigned int rc = 1; 292 293 if (nh->is_group) { 294 struct nh_group *nh_grp; 295 296 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 297 if (nh_grp->is_multipath) 298 rc = nh_grp->num_nh; 299 } 300 301 return rc; 302 } 303 304 static inline 305 struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) 306 { 307 /* for_nexthops macros in fib_semantics.c grabs a pointer to 308 * the nexthop before checking nhsel 309 */ 310 if (nhsel >= nhg->num_nh) 311 return NULL; 312 313 return nhg->nh_entries[nhsel].nh; 314 } 315 316 static inline 317 int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, 318 u8 rt_family) 319 { 320 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 321 int i; 322 323 for (i = 0; i < nhg->num_nh; i++) { 324 struct nexthop *nhe = nhg->nh_entries[i].nh; 325 struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); 326 struct fib_nh_common *nhc = &nhi->fib_nhc; 327 int weight = nhg->nh_entries[i].weight; 328 329 if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) 330 return -EMSGSIZE; 331 } 332 333 return 0; 334 } 335 336 /* called with rcu lock */ 337 static inline bool nexthop_is_blackhole(const struct nexthop *nh) 338 { 339 const struct nh_info *nhi; 340 341 if (nh->is_group) { 342 struct nh_group *nh_grp; 343 344 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 345 if (nh_grp->num_nh > 1) 346 return false; 347 348 nh = nh_grp->nh_entries[0].nh; 349 } 350 351 nhi = rcu_dereference_rtnl(nh->nh_info); 352 return nhi->reject_nh; 353 } 354 355 static inline void nexthop_path_fib_result(struct fib_result *res, int hash) 356 { 357 struct nh_info *nhi; 358 struct nexthop *nh; 359 360 nh = nexthop_select_path(res->fi->nh, hash); 361 nhi = rcu_dereference(nh->nh_info); 362 res->nhc = &nhi->fib_nhc; 363 } 364 365 /* called with rcu read lock or rtnl held */ 366 static inline 367 struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) 368 { 369 struct nh_info *nhi; 370 371 BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); 372 BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); 373 374 if (nh->is_group) { 375 struct nh_group *nh_grp; 376 377 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 378 if (nh_grp->is_multipath) { 379 nh = nexthop_mpath_select(nh_grp, nhsel); 380 if (!nh) 381 return NULL; 382 } 383 } 384 385 nhi = rcu_dereference_rtnl(nh->nh_info); 386 return &nhi->fib_nhc; 387 } 388 389 /* called from fib_table_lookup with rcu_lock */ 390 static inline 391 struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, 392 int fib_flags, 393 const struct flowi4 *flp, 394 int *nhsel) 395 { 396 struct nh_info *nhi; 397 398 if (nh->is_group) { 399 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 400 int i; 401 402 for (i = 0; i < nhg->num_nh; i++) { 403 struct nexthop *nhe = nhg->nh_entries[i].nh; 404 405 nhi = rcu_dereference(nhe->nh_info); 406 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 407 *nhsel = i; 408 return &nhi->fib_nhc; 409 } 410 } 411 } else { 412 nhi = rcu_dereference(nh->nh_info); 413 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 414 *nhsel = 0; 415 return &nhi->fib_nhc; 416 } 417 } 418 419 return NULL; 420 } 421 422 static inline bool nexthop_uses_dev(const struct nexthop *nh, 423 const struct net_device *dev) 424 { 425 struct nh_info *nhi; 426 427 if (nh->is_group) { 428 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 429 int i; 430 431 for (i = 0; i < nhg->num_nh; i++) { 432 struct nexthop *nhe = nhg->nh_entries[i].nh; 433 434 nhi = rcu_dereference(nhe->nh_info); 435 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 436 return true; 437 } 438 } else { 439 nhi = rcu_dereference(nh->nh_info); 440 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 441 return true; 442 } 443 444 return false; 445 } 446 447 static inline unsigned int fib_info_num_path(const struct fib_info *fi) 448 { 449 if (unlikely(fi->nh)) 450 return nexthop_num_path(fi->nh); 451 452 return fi->fib_nhs; 453 } 454 455 int fib_check_nexthop(struct nexthop *nh, u8 scope, 456 struct netlink_ext_ack *extack); 457 458 static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) 459 { 460 if (unlikely(fi->nh)) 461 return nexthop_fib_nhc(fi->nh, nhsel); 462 463 return &fi->fib_nh[nhsel].nh_common; 464 } 465 466 /* only used when fib_nh is built into fib_info */ 467 static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) 468 { 469 WARN_ON(fi->nh); 470 471 return &fi->fib_nh[nhsel]; 472 } 473 474 /* 475 * IPv6 variants 476 */ 477 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 478 struct netlink_ext_ack *extack); 479 480 /* Caller should either hold rcu_read_lock(), or RTNL. */ 481 static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) 482 { 483 struct nh_info *nhi; 484 485 if (nh->is_group) { 486 struct nh_group *nh_grp; 487 488 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 489 nh = nexthop_mpath_select(nh_grp, 0); 490 if (!nh) 491 return NULL; 492 } 493 494 nhi = rcu_dereference_rtnl(nh->nh_info); 495 if (nhi->family == AF_INET6) 496 return &nhi->fib6_nh; 497 498 return NULL; 499 } 500 501 static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) 502 { 503 struct fib6_nh *fib6_nh; 504 505 fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; 506 return fib6_nh->fib_nh_dev; 507 } 508 509 static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) 510 { 511 struct nexthop *nh = res->f6i->nh; 512 struct nh_info *nhi; 513 514 nh = nexthop_select_path(nh, hash); 515 516 nhi = rcu_dereference_rtnl(nh->nh_info); 517 if (nhi->reject_nh) { 518 res->fib6_type = RTN_BLACKHOLE; 519 res->fib6_flags |= RTF_REJECT; 520 res->nh = nexthop_fib6_nh(nh); 521 } else { 522 res->nh = &nhi->fib6_nh; 523 } 524 } 525 526 int nexthop_for_each_fib6_nh(struct nexthop *nh, 527 int (*cb)(struct fib6_nh *nh, void *arg), 528 void *arg); 529 530 static inline int nexthop_get_family(struct nexthop *nh) 531 { 532 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 533 534 return nhi->family; 535 } 536 537 static inline 538 struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) 539 { 540 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 541 542 return &nhi->fib_nhc; 543 } 544 545 static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, 546 int hash) 547 { 548 struct nh_info *nhi; 549 struct nexthop *nhp; 550 551 nhp = nexthop_select_path(nh, hash); 552 if (unlikely(!nhp)) 553 return NULL; 554 nhi = rcu_dereference(nhp->nh_info); 555 return &nhi->fib_nhc; 556 } 557 #endif 558