1 // SPDX-License-Identifier: GPL-2.0 2 /* Generic nexthop implementation 3 * 4 * Copyright (c) 2017-19 Cumulus Networks 5 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 6 */ 7 8 #include <linux/nexthop.h> 9 #include <linux/rtnetlink.h> 10 #include <linux/slab.h> 11 #include <net/arp.h> 12 #include <net/ipv6_stubs.h> 13 #include <net/lwtunnel.h> 14 #include <net/ndisc.h> 15 #include <net/nexthop.h> 16 #include <net/route.h> 17 #include <net/sock.h> 18 19 static void remove_nexthop(struct net *net, struct nexthop *nh, 20 struct nl_info *nlinfo); 21 22 #define NH_DEV_HASHBITS 8 23 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) 24 25 static const struct nla_policy rtm_nh_policy_new[] = { 26 [NHA_ID] = { .type = NLA_U32 }, 27 [NHA_GROUP] = { .type = NLA_BINARY }, 28 [NHA_GROUP_TYPE] = { .type = NLA_U16 }, 29 [NHA_BLACKHOLE] = { .type = NLA_FLAG }, 30 [NHA_OIF] = { .type = NLA_U32 }, 31 [NHA_GATEWAY] = { .type = NLA_BINARY }, 32 [NHA_ENCAP_TYPE] = { .type = NLA_U16 }, 33 [NHA_ENCAP] = { .type = NLA_NESTED }, 34 [NHA_FDB] = { .type = NLA_FLAG }, 35 }; 36 37 static const struct nla_policy rtm_nh_policy_get[] = { 38 [NHA_ID] = { .type = NLA_U32 }, 39 }; 40 41 static const struct nla_policy rtm_nh_policy_dump[] = { 42 [NHA_OIF] = { .type = NLA_U32 }, 43 [NHA_GROUPS] = { .type = NLA_FLAG }, 44 [NHA_MASTER] = { .type = NLA_U32 }, 45 [NHA_FDB] = { .type = NLA_FLAG }, 46 }; 47 48 static bool nexthop_notifiers_is_empty(struct net *net) 49 { 50 return !net->nexthop.notifier_chain.head; 51 } 52 53 static void 54 __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, 55 const struct nexthop *nh) 56 { 57 struct nh_info *nhi = rtnl_dereference(nh->nh_info); 58 59 nh_info->dev = nhi->fib_nhc.nhc_dev; 60 nh_info->gw_family = nhi->fib_nhc.nhc_gw_family; 61 if (nh_info->gw_family == AF_INET) 62 nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4; 63 else if (nh_info->gw_family == AF_INET6) 64 nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; 65 66 nh_info->is_reject = nhi->reject_nh; 67 nh_info->is_fdb = nhi->fdb_nh; 68 nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; 69 } 70 71 static int nh_notifier_single_info_init(struct nh_notifier_info *info, 72 const struct nexthop *nh) 73 { 74 info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); 75 if (!info->nh) 76 return -ENOMEM; 77 78 __nh_notifier_single_info_init(info->nh, nh); 79 80 return 0; 81 } 82 83 static void nh_notifier_single_info_fini(struct nh_notifier_info *info) 84 { 85 kfree(info->nh); 86 } 87 88 static int nh_notifier_grp_info_init(struct nh_notifier_info *info, 89 const struct nexthop *nh) 90 { 91 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 92 u16 num_nh = nhg->num_nh; 93 int i; 94 95 info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), 96 GFP_KERNEL); 97 if (!info->nh_grp) 98 return -ENOMEM; 99 100 info->nh_grp->num_nh = num_nh; 101 info->nh_grp->is_fdb = nhg->fdb_nh; 102 103 for (i = 0; i < num_nh; i++) { 104 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 105 106 info->nh_grp->nh_entries[i].id = nhge->nh->id; 107 info->nh_grp->nh_entries[i].weight = nhge->weight; 108 __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, 109 nhge->nh); 110 } 111 112 return 0; 113 } 114 115 static void nh_notifier_grp_info_fini(struct nh_notifier_info *info) 116 { 117 kfree(info->nh_grp); 118 } 119 120 static int nh_notifier_info_init(struct nh_notifier_info *info, 121 const struct nexthop *nh) 122 { 123 info->id = nh->id; 124 info->is_grp = nh->is_group; 125 126 if (info->is_grp) 127 return nh_notifier_grp_info_init(info, nh); 128 else 129 return nh_notifier_single_info_init(info, nh); 130 } 131 132 static void nh_notifier_info_fini(struct nh_notifier_info *info) 133 { 134 if (info->is_grp) 135 nh_notifier_grp_info_fini(info); 136 else 137 nh_notifier_single_info_fini(info); 138 } 139 140 static int call_nexthop_notifiers(struct net *net, 141 enum nexthop_event_type event_type, 142 struct nexthop *nh, 143 struct netlink_ext_ack *extack) 144 { 145 struct nh_notifier_info info = { 146 .net = net, 147 .extack = extack, 148 }; 149 int err; 150 151 ASSERT_RTNL(); 152 153 if (nexthop_notifiers_is_empty(net)) 154 return 0; 155 156 err = nh_notifier_info_init(&info, nh); 157 if (err) { 158 NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); 159 return err; 160 } 161 162 err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, 163 event_type, &info); 164 nh_notifier_info_fini(&info); 165 166 return notifier_to_errno(err); 167 } 168 169 static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, 170 enum nexthop_event_type event_type, 171 struct nexthop *nh, 172 struct netlink_ext_ack *extack) 173 { 174 struct nh_notifier_info info = { 175 .net = net, 176 .extack = extack, 177 }; 178 int err; 179 180 err = nh_notifier_info_init(&info, nh); 181 if (err) 182 return err; 183 184 err = nb->notifier_call(nb, event_type, &info); 185 nh_notifier_info_fini(&info); 186 187 return notifier_to_errno(err); 188 } 189 190 static unsigned int nh_dev_hashfn(unsigned int val) 191 { 192 unsigned int mask = NH_DEV_HASHSIZE - 1; 193 194 return (val ^ 195 (val >> NH_DEV_HASHBITS) ^ 196 (val >> (NH_DEV_HASHBITS * 2))) & mask; 197 } 198 199 static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) 200 { 201 struct net_device *dev = nhi->fib_nhc.nhc_dev; 202 struct hlist_head *head; 203 unsigned int hash; 204 205 WARN_ON(!dev); 206 207 hash = nh_dev_hashfn(dev->ifindex); 208 head = &net->nexthop.devhash[hash]; 209 hlist_add_head(&nhi->dev_hash, head); 210 } 211 212 static void nexthop_free_mpath(struct nexthop *nh) 213 { 214 struct nh_group *nhg; 215 int i; 216 217 nhg = rcu_dereference_raw(nh->nh_grp); 218 for (i = 0; i < nhg->num_nh; ++i) { 219 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 220 221 WARN_ON(!list_empty(&nhge->nh_list)); 222 nexthop_put(nhge->nh); 223 } 224 225 WARN_ON(nhg->spare == nhg); 226 227 kfree(nhg->spare); 228 kfree(nhg); 229 } 230 231 static void nexthop_free_single(struct nexthop *nh) 232 { 233 struct nh_info *nhi; 234 235 nhi = rcu_dereference_raw(nh->nh_info); 236 switch (nhi->family) { 237 case AF_INET: 238 fib_nh_release(nh->net, &nhi->fib_nh); 239 break; 240 case AF_INET6: 241 ipv6_stub->fib6_nh_release(&nhi->fib6_nh); 242 break; 243 } 244 kfree(nhi); 245 } 246 247 void nexthop_free_rcu(struct rcu_head *head) 248 { 249 struct nexthop *nh = container_of(head, struct nexthop, rcu); 250 251 if (nh->is_group) 252 nexthop_free_mpath(nh); 253 else 254 nexthop_free_single(nh); 255 256 kfree(nh); 257 } 258 EXPORT_SYMBOL_GPL(nexthop_free_rcu); 259 260 static struct nexthop *nexthop_alloc(void) 261 { 262 struct nexthop *nh; 263 264 nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); 265 if (nh) { 266 INIT_LIST_HEAD(&nh->fi_list); 267 INIT_LIST_HEAD(&nh->f6i_list); 268 INIT_LIST_HEAD(&nh->grp_list); 269 INIT_LIST_HEAD(&nh->fdb_list); 270 } 271 return nh; 272 } 273 274 static struct nh_group *nexthop_grp_alloc(u16 num_nh) 275 { 276 struct nh_group *nhg; 277 278 nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL); 279 if (nhg) 280 nhg->num_nh = num_nh; 281 282 return nhg; 283 } 284 285 static void nh_base_seq_inc(struct net *net) 286 { 287 while (++net->nexthop.seq == 0) 288 ; 289 } 290 291 /* no reference taken; rcu lock or rtnl must be held */ 292 struct nexthop *nexthop_find_by_id(struct net *net, u32 id) 293 { 294 struct rb_node **pp, *parent = NULL, *next; 295 296 pp = &net->nexthop.rb_root.rb_node; 297 while (1) { 298 struct nexthop *nh; 299 300 next = rcu_dereference_raw(*pp); 301 if (!next) 302 break; 303 parent = next; 304 305 nh = rb_entry(parent, struct nexthop, rb_node); 306 if (id < nh->id) 307 pp = &next->rb_left; 308 else if (id > nh->id) 309 pp = &next->rb_right; 310 else 311 return nh; 312 } 313 return NULL; 314 } 315 EXPORT_SYMBOL_GPL(nexthop_find_by_id); 316 317 /* used for auto id allocation; called with rtnl held */ 318 static u32 nh_find_unused_id(struct net *net) 319 { 320 u32 id_start = net->nexthop.last_id_allocated; 321 322 while (1) { 323 net->nexthop.last_id_allocated++; 324 if (net->nexthop.last_id_allocated == id_start) 325 break; 326 327 if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated)) 328 return net->nexthop.last_id_allocated; 329 } 330 return 0; 331 } 332 333 static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) 334 { 335 struct nexthop_grp *p; 336 size_t len = nhg->num_nh * sizeof(*p); 337 struct nlattr *nla; 338 u16 group_type = 0; 339 int i; 340 341 if (nhg->mpath) 342 group_type = NEXTHOP_GRP_TYPE_MPATH; 343 344 if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type)) 345 goto nla_put_failure; 346 347 nla = nla_reserve(skb, NHA_GROUP, len); 348 if (!nla) 349 goto nla_put_failure; 350 351 p = nla_data(nla); 352 for (i = 0; i < nhg->num_nh; ++i) { 353 p->id = nhg->nh_entries[i].nh->id; 354 p->weight = nhg->nh_entries[i].weight - 1; 355 p += 1; 356 } 357 358 return 0; 359 360 nla_put_failure: 361 return -EMSGSIZE; 362 } 363 364 static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, 365 int event, u32 portid, u32 seq, unsigned int nlflags) 366 { 367 struct fib6_nh *fib6_nh; 368 struct fib_nh *fib_nh; 369 struct nlmsghdr *nlh; 370 struct nh_info *nhi; 371 struct nhmsg *nhm; 372 373 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags); 374 if (!nlh) 375 return -EMSGSIZE; 376 377 nhm = nlmsg_data(nlh); 378 nhm->nh_family = AF_UNSPEC; 379 nhm->nh_flags = nh->nh_flags; 380 nhm->nh_protocol = nh->protocol; 381 nhm->nh_scope = 0; 382 nhm->resvd = 0; 383 384 if (nla_put_u32(skb, NHA_ID, nh->id)) 385 goto nla_put_failure; 386 387 if (nh->is_group) { 388 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 389 390 if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) 391 goto nla_put_failure; 392 if (nla_put_nh_group(skb, nhg)) 393 goto nla_put_failure; 394 goto out; 395 } 396 397 nhi = rtnl_dereference(nh->nh_info); 398 nhm->nh_family = nhi->family; 399 if (nhi->reject_nh) { 400 if (nla_put_flag(skb, NHA_BLACKHOLE)) 401 goto nla_put_failure; 402 goto out; 403 } else if (nhi->fdb_nh) { 404 if (nla_put_flag(skb, NHA_FDB)) 405 goto nla_put_failure; 406 } else { 407 const struct net_device *dev; 408 409 dev = nhi->fib_nhc.nhc_dev; 410 if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex)) 411 goto nla_put_failure; 412 } 413 414 nhm->nh_scope = nhi->fib_nhc.nhc_scope; 415 switch (nhi->family) { 416 case AF_INET: 417 fib_nh = &nhi->fib_nh; 418 if (fib_nh->fib_nh_gw_family && 419 nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4)) 420 goto nla_put_failure; 421 break; 422 423 case AF_INET6: 424 fib6_nh = &nhi->fib6_nh; 425 if (fib6_nh->fib_nh_gw_family && 426 nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6)) 427 goto nla_put_failure; 428 break; 429 } 430 431 if (nhi->fib_nhc.nhc_lwtstate && 432 lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate, 433 NHA_ENCAP, NHA_ENCAP_TYPE) < 0) 434 goto nla_put_failure; 435 436 out: 437 nlmsg_end(skb, nlh); 438 return 0; 439 440 nla_put_failure: 441 nlmsg_cancel(skb, nlh); 442 return -EMSGSIZE; 443 } 444 445 static size_t nh_nlmsg_size_grp(struct nexthop *nh) 446 { 447 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 448 size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh; 449 450 return nla_total_size(sz) + 451 nla_total_size(2); /* NHA_GROUP_TYPE */ 452 } 453 454 static size_t nh_nlmsg_size_single(struct nexthop *nh) 455 { 456 struct nh_info *nhi = rtnl_dereference(nh->nh_info); 457 size_t sz; 458 459 /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE 460 * are mutually exclusive 461 */ 462 sz = nla_total_size(4); /* NHA_OIF */ 463 464 switch (nhi->family) { 465 case AF_INET: 466 if (nhi->fib_nh.fib_nh_gw_family) 467 sz += nla_total_size(4); /* NHA_GATEWAY */ 468 break; 469 470 case AF_INET6: 471 /* NHA_GATEWAY */ 472 if (nhi->fib6_nh.fib_nh_gw_family) 473 sz += nla_total_size(sizeof(const struct in6_addr)); 474 break; 475 } 476 477 if (nhi->fib_nhc.nhc_lwtstate) { 478 sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate); 479 sz += nla_total_size(2); /* NHA_ENCAP_TYPE */ 480 } 481 482 return sz; 483 } 484 485 static size_t nh_nlmsg_size(struct nexthop *nh) 486 { 487 size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg)); 488 489 sz += nla_total_size(4); /* NHA_ID */ 490 491 if (nh->is_group) 492 sz += nh_nlmsg_size_grp(nh); 493 else 494 sz += nh_nlmsg_size_single(nh); 495 496 return sz; 497 } 498 499 static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) 500 { 501 unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0; 502 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 503 struct sk_buff *skb; 504 int err = -ENOBUFS; 505 506 skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any()); 507 if (!skb) 508 goto errout; 509 510 err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); 511 if (err < 0) { 512 /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ 513 WARN_ON(err == -EMSGSIZE); 514 kfree_skb(skb); 515 goto errout; 516 } 517 518 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP, 519 info->nlh, gfp_any()); 520 return; 521 errout: 522 if (err < 0) 523 rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); 524 } 525 526 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, 527 bool *is_fdb, struct netlink_ext_ack *extack) 528 { 529 if (nh->is_group) { 530 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 531 532 /* nested multipath (group within a group) is not 533 * supported 534 */ 535 if (nhg->mpath) { 536 NL_SET_ERR_MSG(extack, 537 "Multipath group can not be a nexthop within a group"); 538 return false; 539 } 540 *is_fdb = nhg->fdb_nh; 541 } else { 542 struct nh_info *nhi = rtnl_dereference(nh->nh_info); 543 544 if (nhi->reject_nh && npaths > 1) { 545 NL_SET_ERR_MSG(extack, 546 "Blackhole nexthop can not be used in a group with more than 1 path"); 547 return false; 548 } 549 *is_fdb = nhi->fdb_nh; 550 } 551 552 return true; 553 } 554 555 static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, 556 struct netlink_ext_ack *extack) 557 { 558 struct nh_info *nhi; 559 560 nhi = rtnl_dereference(nh->nh_info); 561 562 if (!nhi->fdb_nh) { 563 NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); 564 return -EINVAL; 565 } 566 567 if (*nh_family == AF_UNSPEC) { 568 *nh_family = nhi->family; 569 } else if (*nh_family != nhi->family) { 570 NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops"); 571 return -EINVAL; 572 } 573 574 return 0; 575 } 576 577 static int nh_check_attr_group(struct net *net, 578 struct nlattr *tb[], size_t tb_size, 579 struct netlink_ext_ack *extack) 580 { 581 unsigned int len = nla_len(tb[NHA_GROUP]); 582 u8 nh_family = AF_UNSPEC; 583 struct nexthop_grp *nhg; 584 unsigned int i, j; 585 u8 nhg_fdb = 0; 586 587 if (!len || len & (sizeof(struct nexthop_grp) - 1)) { 588 NL_SET_ERR_MSG(extack, 589 "Invalid length for nexthop group attribute"); 590 return -EINVAL; 591 } 592 593 /* convert len to number of nexthop ids */ 594 len /= sizeof(*nhg); 595 596 nhg = nla_data(tb[NHA_GROUP]); 597 for (i = 0; i < len; ++i) { 598 if (nhg[i].resvd1 || nhg[i].resvd2) { 599 NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0"); 600 return -EINVAL; 601 } 602 if (nhg[i].weight > 254) { 603 NL_SET_ERR_MSG(extack, "Invalid value for weight"); 604 return -EINVAL; 605 } 606 for (j = i + 1; j < len; ++j) { 607 if (nhg[i].id == nhg[j].id) { 608 NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group"); 609 return -EINVAL; 610 } 611 } 612 } 613 614 if (tb[NHA_FDB]) 615 nhg_fdb = 1; 616 nhg = nla_data(tb[NHA_GROUP]); 617 for (i = 0; i < len; ++i) { 618 struct nexthop *nh; 619 bool is_fdb_nh; 620 621 nh = nexthop_find_by_id(net, nhg[i].id); 622 if (!nh) { 623 NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 624 return -EINVAL; 625 } 626 if (!valid_group_nh(nh, len, &is_fdb_nh, extack)) 627 return -EINVAL; 628 629 if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) 630 return -EINVAL; 631 632 if (!nhg_fdb && is_fdb_nh) { 633 NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); 634 return -EINVAL; 635 } 636 } 637 for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { 638 if (!tb[i]) 639 continue; 640 if (i == NHA_FDB) 641 continue; 642 NL_SET_ERR_MSG(extack, 643 "No other attributes can be set in nexthop groups"); 644 return -EINVAL; 645 } 646 647 return 0; 648 } 649 650 static bool ipv6_good_nh(const struct fib6_nh *nh) 651 { 652 int state = NUD_REACHABLE; 653 struct neighbour *n; 654 655 rcu_read_lock_bh(); 656 657 n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); 658 if (n) 659 state = n->nud_state; 660 661 rcu_read_unlock_bh(); 662 663 return !!(state & NUD_VALID); 664 } 665 666 static bool ipv4_good_nh(const struct fib_nh *nh) 667 { 668 int state = NUD_REACHABLE; 669 struct neighbour *n; 670 671 rcu_read_lock_bh(); 672 673 n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 674 (__force u32)nh->fib_nh_gw4); 675 if (n) 676 state = n->nud_state; 677 678 rcu_read_unlock_bh(); 679 680 return !!(state & NUD_VALID); 681 } 682 683 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) 684 { 685 struct nexthop *rc = NULL; 686 struct nh_group *nhg; 687 int i; 688 689 if (!nh->is_group) 690 return nh; 691 692 nhg = rcu_dereference(nh->nh_grp); 693 for (i = 0; i < nhg->num_nh; ++i) { 694 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 695 struct nh_info *nhi; 696 697 if (hash > atomic_read(&nhge->upper_bound)) 698 continue; 699 700 nhi = rcu_dereference(nhge->nh->nh_info); 701 if (nhi->fdb_nh) 702 return nhge->nh; 703 704 /* nexthops always check if it is good and does 705 * not rely on a sysctl for this behavior 706 */ 707 switch (nhi->family) { 708 case AF_INET: 709 if (ipv4_good_nh(&nhi->fib_nh)) 710 return nhge->nh; 711 break; 712 case AF_INET6: 713 if (ipv6_good_nh(&nhi->fib6_nh)) 714 return nhge->nh; 715 break; 716 } 717 718 if (!rc) 719 rc = nhge->nh; 720 } 721 722 return rc; 723 } 724 EXPORT_SYMBOL_GPL(nexthop_select_path); 725 726 int nexthop_for_each_fib6_nh(struct nexthop *nh, 727 int (*cb)(struct fib6_nh *nh, void *arg), 728 void *arg) 729 { 730 struct nh_info *nhi; 731 int err; 732 733 if (nh->is_group) { 734 struct nh_group *nhg; 735 int i; 736 737 nhg = rcu_dereference_rtnl(nh->nh_grp); 738 for (i = 0; i < nhg->num_nh; i++) { 739 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 740 741 nhi = rcu_dereference_rtnl(nhge->nh->nh_info); 742 err = cb(&nhi->fib6_nh, arg); 743 if (err) 744 return err; 745 } 746 } else { 747 nhi = rcu_dereference_rtnl(nh->nh_info); 748 err = cb(&nhi->fib6_nh, arg); 749 if (err) 750 return err; 751 } 752 753 return 0; 754 } 755 EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh); 756 757 static int check_src_addr(const struct in6_addr *saddr, 758 struct netlink_ext_ack *extack) 759 { 760 if (!ipv6_addr_any(saddr)) { 761 NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects"); 762 return -EINVAL; 763 } 764 return 0; 765 } 766 767 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 768 struct netlink_ext_ack *extack) 769 { 770 struct nh_info *nhi; 771 bool is_fdb_nh; 772 773 /* fib6_src is unique to a fib6_info and limits the ability to cache 774 * routes in fib6_nh within a nexthop that is potentially shared 775 * across multiple fib entries. If the config wants to use source 776 * routing it can not use nexthop objects. mlxsw also does not allow 777 * fib6_src on routes. 778 */ 779 if (cfg && check_src_addr(&cfg->fc_src, extack) < 0) 780 return -EINVAL; 781 782 if (nh->is_group) { 783 struct nh_group *nhg; 784 785 nhg = rtnl_dereference(nh->nh_grp); 786 if (nhg->has_v4) 787 goto no_v4_nh; 788 is_fdb_nh = nhg->fdb_nh; 789 } else { 790 nhi = rtnl_dereference(nh->nh_info); 791 if (nhi->family == AF_INET) 792 goto no_v4_nh; 793 is_fdb_nh = nhi->fdb_nh; 794 } 795 796 if (is_fdb_nh) { 797 NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 798 return -EINVAL; 799 } 800 801 return 0; 802 no_v4_nh: 803 NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop"); 804 return -EINVAL; 805 } 806 EXPORT_SYMBOL_GPL(fib6_check_nexthop); 807 808 /* if existing nexthop has ipv6 routes linked to it, need 809 * to verify this new spec works with ipv6 810 */ 811 static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, 812 struct netlink_ext_ack *extack) 813 { 814 struct fib6_info *f6i; 815 816 if (list_empty(&old->f6i_list)) 817 return 0; 818 819 list_for_each_entry(f6i, &old->f6i_list, nh_list) { 820 if (check_src_addr(&f6i->fib6_src.addr, extack) < 0) 821 return -EINVAL; 822 } 823 824 return fib6_check_nexthop(new, NULL, extack); 825 } 826 827 static int nexthop_check_scope(struct nh_info *nhi, u8 scope, 828 struct netlink_ext_ack *extack) 829 { 830 if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { 831 NL_SET_ERR_MSG(extack, 832 "Route with host scope can not have a gateway"); 833 return -EINVAL; 834 } 835 836 if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) { 837 NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop"); 838 return -EINVAL; 839 } 840 841 return 0; 842 } 843 844 /* Invoked by fib add code to verify nexthop by id is ok with 845 * config for prefix; parts of fib_check_nh not done when nexthop 846 * object is used. 847 */ 848 int fib_check_nexthop(struct nexthop *nh, u8 scope, 849 struct netlink_ext_ack *extack) 850 { 851 struct nh_info *nhi; 852 int err = 0; 853 854 if (nh->is_group) { 855 struct nh_group *nhg; 856 857 nhg = rtnl_dereference(nh->nh_grp); 858 if (nhg->fdb_nh) { 859 NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 860 err = -EINVAL; 861 goto out; 862 } 863 864 if (scope == RT_SCOPE_HOST) { 865 NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); 866 err = -EINVAL; 867 goto out; 868 } 869 870 /* all nexthops in a group have the same scope */ 871 nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info); 872 err = nexthop_check_scope(nhi, scope, extack); 873 } else { 874 nhi = rtnl_dereference(nh->nh_info); 875 if (nhi->fdb_nh) { 876 NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 877 err = -EINVAL; 878 goto out; 879 } 880 err = nexthop_check_scope(nhi, scope, extack); 881 } 882 883 out: 884 return err; 885 } 886 887 static int fib_check_nh_list(struct nexthop *old, struct nexthop *new, 888 struct netlink_ext_ack *extack) 889 { 890 struct fib_info *fi; 891 892 list_for_each_entry(fi, &old->fi_list, nh_list) { 893 int err; 894 895 err = fib_check_nexthop(new, fi->fib_scope, extack); 896 if (err) 897 return err; 898 } 899 return 0; 900 } 901 902 static void nh_group_rebalance(struct nh_group *nhg) 903 { 904 int total = 0; 905 int w = 0; 906 int i; 907 908 for (i = 0; i < nhg->num_nh; ++i) 909 total += nhg->nh_entries[i].weight; 910 911 for (i = 0; i < nhg->num_nh; ++i) { 912 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 913 int upper_bound; 914 915 w += nhge->weight; 916 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; 917 atomic_set(&nhge->upper_bound, upper_bound); 918 } 919 } 920 921 static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, 922 struct nl_info *nlinfo) 923 { 924 struct nh_grp_entry *nhges, *new_nhges; 925 struct nexthop *nhp = nhge->nh_parent; 926 struct netlink_ext_ack extack; 927 struct nexthop *nh = nhge->nh; 928 struct nh_group *nhg, *newg; 929 int i, j, err; 930 931 WARN_ON(!nh); 932 933 nhg = rtnl_dereference(nhp->nh_grp); 934 newg = nhg->spare; 935 936 /* last entry, keep it visible and remove the parent */ 937 if (nhg->num_nh == 1) { 938 remove_nexthop(net, nhp, nlinfo); 939 return; 940 } 941 942 newg->has_v4 = false; 943 newg->mpath = nhg->mpath; 944 newg->fdb_nh = nhg->fdb_nh; 945 newg->num_nh = nhg->num_nh; 946 947 /* copy old entries to new except the one getting removed */ 948 nhges = nhg->nh_entries; 949 new_nhges = newg->nh_entries; 950 for (i = 0, j = 0; i < nhg->num_nh; ++i) { 951 struct nh_info *nhi; 952 953 /* current nexthop getting removed */ 954 if (nhg->nh_entries[i].nh == nh) { 955 newg->num_nh--; 956 continue; 957 } 958 959 nhi = rtnl_dereference(nhges[i].nh->nh_info); 960 if (nhi->family == AF_INET) 961 newg->has_v4 = true; 962 963 list_del(&nhges[i].nh_list); 964 new_nhges[j].nh_parent = nhges[i].nh_parent; 965 new_nhges[j].nh = nhges[i].nh; 966 new_nhges[j].weight = nhges[i].weight; 967 list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list); 968 j++; 969 } 970 971 nh_group_rebalance(newg); 972 rcu_assign_pointer(nhp->nh_grp, newg); 973 974 list_del(&nhge->nh_list); 975 nexthop_put(nhge->nh); 976 977 err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack); 978 if (err) 979 pr_err("%s\n", extack._msg); 980 981 if (nlinfo) 982 nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); 983 } 984 985 static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, 986 struct nl_info *nlinfo) 987 { 988 struct nh_grp_entry *nhge, *tmp; 989 990 list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) 991 remove_nh_grp_entry(net, nhge, nlinfo); 992 993 /* make sure all see the newly published array before releasing rtnl */ 994 synchronize_net(); 995 } 996 997 static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) 998 { 999 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); 1000 int i, num_nh = nhg->num_nh; 1001 1002 for (i = 0; i < num_nh; ++i) { 1003 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1004 1005 if (WARN_ON(!nhge->nh)) 1006 continue; 1007 1008 list_del_init(&nhge->nh_list); 1009 } 1010 } 1011 1012 /* not called for nexthop replace */ 1013 static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) 1014 { 1015 struct fib6_info *f6i, *tmp; 1016 bool do_flush = false; 1017 struct fib_info *fi; 1018 1019 list_for_each_entry(fi, &nh->fi_list, nh_list) { 1020 fi->fib_flags |= RTNH_F_DEAD; 1021 do_flush = true; 1022 } 1023 if (do_flush) 1024 fib_flush(net); 1025 1026 /* ip6_del_rt removes the entry from this list hence the _safe */ 1027 list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { 1028 /* __ip6_del_rt does a release, so do a hold here */ 1029 fib6_info_hold(f6i); 1030 ipv6_stub->ip6_del_rt(net, f6i, 1031 !net->ipv4.sysctl_nexthop_compat_mode); 1032 } 1033 } 1034 1035 static void __remove_nexthop(struct net *net, struct nexthop *nh, 1036 struct nl_info *nlinfo) 1037 { 1038 __remove_nexthop_fib(net, nh); 1039 1040 if (nh->is_group) { 1041 remove_nexthop_group(nh, nlinfo); 1042 } else { 1043 struct nh_info *nhi; 1044 1045 nhi = rtnl_dereference(nh->nh_info); 1046 if (nhi->fib_nhc.nhc_dev) 1047 hlist_del(&nhi->dev_hash); 1048 1049 remove_nexthop_from_groups(net, nh, nlinfo); 1050 } 1051 } 1052 1053 static void remove_nexthop(struct net *net, struct nexthop *nh, 1054 struct nl_info *nlinfo) 1055 { 1056 call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL); 1057 1058 /* remove from the tree */ 1059 rb_erase(&nh->rb_node, &net->nexthop.rb_root); 1060 1061 if (nlinfo) 1062 nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo); 1063 1064 __remove_nexthop(net, nh, nlinfo); 1065 nh_base_seq_inc(net); 1066 1067 nexthop_put(nh); 1068 } 1069 1070 /* if any FIB entries reference this nexthop, any dst entries 1071 * need to be regenerated 1072 */ 1073 static void nh_rt_cache_flush(struct net *net, struct nexthop *nh) 1074 { 1075 struct fib6_info *f6i; 1076 1077 if (!list_empty(&nh->fi_list)) 1078 rt_cache_flush(net); 1079 1080 list_for_each_entry(f6i, &nh->f6i_list, nh_list) 1081 ipv6_stub->fib6_update_sernum(net, f6i); 1082 } 1083 1084 static int replace_nexthop_grp(struct net *net, struct nexthop *old, 1085 struct nexthop *new, 1086 struct netlink_ext_ack *extack) 1087 { 1088 struct nh_group *oldg, *newg; 1089 int i, err; 1090 1091 if (!new->is_group) { 1092 NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); 1093 return -EINVAL; 1094 } 1095 1096 err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); 1097 if (err) 1098 return err; 1099 1100 oldg = rtnl_dereference(old->nh_grp); 1101 newg = rtnl_dereference(new->nh_grp); 1102 1103 /* update parents - used by nexthop code for cleanup */ 1104 for (i = 0; i < newg->num_nh; i++) 1105 newg->nh_entries[i].nh_parent = old; 1106 1107 rcu_assign_pointer(old->nh_grp, newg); 1108 1109 for (i = 0; i < oldg->num_nh; i++) 1110 oldg->nh_entries[i].nh_parent = new; 1111 1112 rcu_assign_pointer(new->nh_grp, oldg); 1113 1114 return 0; 1115 } 1116 1117 static void nh_group_v4_update(struct nh_group *nhg) 1118 { 1119 struct nh_grp_entry *nhges; 1120 bool has_v4 = false; 1121 int i; 1122 1123 nhges = nhg->nh_entries; 1124 for (i = 0; i < nhg->num_nh; i++) { 1125 struct nh_info *nhi; 1126 1127 nhi = rtnl_dereference(nhges[i].nh->nh_info); 1128 if (nhi->family == AF_INET) 1129 has_v4 = true; 1130 } 1131 nhg->has_v4 = has_v4; 1132 } 1133 1134 static int replace_nexthop_single(struct net *net, struct nexthop *old, 1135 struct nexthop *new, 1136 struct netlink_ext_ack *extack) 1137 { 1138 u8 old_protocol, old_nh_flags; 1139 struct nh_info *oldi, *newi; 1140 struct nh_grp_entry *nhge; 1141 int err; 1142 1143 if (new->is_group) { 1144 NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); 1145 return -EINVAL; 1146 } 1147 1148 err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); 1149 if (err) 1150 return err; 1151 1152 /* Hardware flags were set on 'old' as 'new' is not in the red-black 1153 * tree. Therefore, inherit the flags from 'old' to 'new'. 1154 */ 1155 new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP); 1156 1157 oldi = rtnl_dereference(old->nh_info); 1158 newi = rtnl_dereference(new->nh_info); 1159 1160 newi->nh_parent = old; 1161 oldi->nh_parent = new; 1162 1163 old_protocol = old->protocol; 1164 old_nh_flags = old->nh_flags; 1165 1166 old->protocol = new->protocol; 1167 old->nh_flags = new->nh_flags; 1168 1169 rcu_assign_pointer(old->nh_info, newi); 1170 rcu_assign_pointer(new->nh_info, oldi); 1171 1172 /* Send a replace notification for all the groups using the nexthop. */ 1173 list_for_each_entry(nhge, &old->grp_list, nh_list) { 1174 struct nexthop *nhp = nhge->nh_parent; 1175 1176 err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, 1177 extack); 1178 if (err) 1179 goto err_notify; 1180 } 1181 1182 /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially 1183 * update IPv4 indication in all the groups using the nexthop. 1184 */ 1185 if (oldi->family == AF_INET && newi->family == AF_INET6) { 1186 list_for_each_entry(nhge, &old->grp_list, nh_list) { 1187 struct nexthop *nhp = nhge->nh_parent; 1188 struct nh_group *nhg; 1189 1190 nhg = rtnl_dereference(nhp->nh_grp); 1191 nh_group_v4_update(nhg); 1192 } 1193 } 1194 1195 return 0; 1196 1197 err_notify: 1198 rcu_assign_pointer(new->nh_info, newi); 1199 rcu_assign_pointer(old->nh_info, oldi); 1200 old->nh_flags = old_nh_flags; 1201 old->protocol = old_protocol; 1202 oldi->nh_parent = old; 1203 newi->nh_parent = new; 1204 list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) { 1205 struct nexthop *nhp = nhge->nh_parent; 1206 1207 call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack); 1208 } 1209 call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack); 1210 return err; 1211 } 1212 1213 static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, 1214 struct nl_info *info) 1215 { 1216 struct fib6_info *f6i; 1217 1218 if (!list_empty(&nh->fi_list)) { 1219 struct fib_info *fi; 1220 1221 /* expectation is a few fib_info per nexthop and then 1222 * a lot of routes per fib_info. So mark the fib_info 1223 * and then walk the fib tables once 1224 */ 1225 list_for_each_entry(fi, &nh->fi_list, nh_list) 1226 fi->nh_updated = true; 1227 1228 fib_info_notify_update(net, info); 1229 1230 list_for_each_entry(fi, &nh->fi_list, nh_list) 1231 fi->nh_updated = false; 1232 } 1233 1234 list_for_each_entry(f6i, &nh->f6i_list, nh_list) 1235 ipv6_stub->fib6_rt_update(net, f6i, info); 1236 } 1237 1238 /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries 1239 * linked to this nexthop and for all groups that the nexthop 1240 * is a member of 1241 */ 1242 static void nexthop_replace_notify(struct net *net, struct nexthop *nh, 1243 struct nl_info *info) 1244 { 1245 struct nh_grp_entry *nhge; 1246 1247 __nexthop_replace_notify(net, nh, info); 1248 1249 list_for_each_entry(nhge, &nh->grp_list, nh_list) 1250 __nexthop_replace_notify(net, nhge->nh_parent, info); 1251 } 1252 1253 static int replace_nexthop(struct net *net, struct nexthop *old, 1254 struct nexthop *new, struct netlink_ext_ack *extack) 1255 { 1256 bool new_is_reject = false; 1257 struct nh_grp_entry *nhge; 1258 int err; 1259 1260 /* check that existing FIB entries are ok with the 1261 * new nexthop definition 1262 */ 1263 err = fib_check_nh_list(old, new, extack); 1264 if (err) 1265 return err; 1266 1267 err = fib6_check_nh_list(old, new, extack); 1268 if (err) 1269 return err; 1270 1271 if (!new->is_group) { 1272 struct nh_info *nhi = rtnl_dereference(new->nh_info); 1273 1274 new_is_reject = nhi->reject_nh; 1275 } 1276 1277 list_for_each_entry(nhge, &old->grp_list, nh_list) { 1278 /* if new nexthop is a blackhole, any groups using this 1279 * nexthop cannot have more than 1 path 1280 */ 1281 if (new_is_reject && 1282 nexthop_num_path(nhge->nh_parent) > 1) { 1283 NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path"); 1284 return -EINVAL; 1285 } 1286 1287 err = fib_check_nh_list(nhge->nh_parent, new, extack); 1288 if (err) 1289 return err; 1290 1291 err = fib6_check_nh_list(nhge->nh_parent, new, extack); 1292 if (err) 1293 return err; 1294 } 1295 1296 if (old->is_group) 1297 err = replace_nexthop_grp(net, old, new, extack); 1298 else 1299 err = replace_nexthop_single(net, old, new, extack); 1300 1301 if (!err) { 1302 nh_rt_cache_flush(net, old); 1303 1304 __remove_nexthop(net, new, NULL); 1305 nexthop_put(new); 1306 } 1307 1308 return err; 1309 } 1310 1311 /* called with rtnl_lock held */ 1312 static int insert_nexthop(struct net *net, struct nexthop *new_nh, 1313 struct nh_config *cfg, struct netlink_ext_ack *extack) 1314 { 1315 struct rb_node **pp, *parent = NULL, *next; 1316 struct rb_root *root = &net->nexthop.rb_root; 1317 bool replace = !!(cfg->nlflags & NLM_F_REPLACE); 1318 bool create = !!(cfg->nlflags & NLM_F_CREATE); 1319 u32 new_id = new_nh->id; 1320 int replace_notify = 0; 1321 int rc = -EEXIST; 1322 1323 pp = &root->rb_node; 1324 while (1) { 1325 struct nexthop *nh; 1326 1327 next = *pp; 1328 if (!next) 1329 break; 1330 1331 parent = next; 1332 1333 nh = rb_entry(parent, struct nexthop, rb_node); 1334 if (new_id < nh->id) { 1335 pp = &next->rb_left; 1336 } else if (new_id > nh->id) { 1337 pp = &next->rb_right; 1338 } else if (replace) { 1339 rc = replace_nexthop(net, nh, new_nh, extack); 1340 if (!rc) { 1341 new_nh = nh; /* send notification with old nh */ 1342 replace_notify = 1; 1343 } 1344 goto out; 1345 } else { 1346 /* id already exists and not a replace */ 1347 goto out; 1348 } 1349 } 1350 1351 if (replace && !create) { 1352 NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists"); 1353 rc = -ENOENT; 1354 goto out; 1355 } 1356 1357 rb_link_node_rcu(&new_nh->rb_node, parent, pp); 1358 rb_insert_color(&new_nh->rb_node, root); 1359 1360 rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); 1361 if (rc) 1362 rb_erase(&new_nh->rb_node, &net->nexthop.rb_root); 1363 1364 out: 1365 if (!rc) { 1366 nh_base_seq_inc(net); 1367 nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); 1368 if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) 1369 nexthop_replace_notify(net, new_nh, &cfg->nlinfo); 1370 } 1371 1372 return rc; 1373 } 1374 1375 /* rtnl */ 1376 /* remove all nexthops tied to a device being deleted */ 1377 static void nexthop_flush_dev(struct net_device *dev) 1378 { 1379 unsigned int hash = nh_dev_hashfn(dev->ifindex); 1380 struct net *net = dev_net(dev); 1381 struct hlist_head *head = &net->nexthop.devhash[hash]; 1382 struct hlist_node *n; 1383 struct nh_info *nhi; 1384 1385 hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 1386 if (nhi->fib_nhc.nhc_dev != dev) 1387 continue; 1388 1389 remove_nexthop(net, nhi->nh_parent, NULL); 1390 } 1391 } 1392 1393 /* rtnl; called when net namespace is deleted */ 1394 static void flush_all_nexthops(struct net *net) 1395 { 1396 struct rb_root *root = &net->nexthop.rb_root; 1397 struct rb_node *node; 1398 struct nexthop *nh; 1399 1400 while ((node = rb_first(root))) { 1401 nh = rb_entry(node, struct nexthop, rb_node); 1402 remove_nexthop(net, nh, NULL); 1403 cond_resched(); 1404 } 1405 } 1406 1407 static struct nexthop *nexthop_create_group(struct net *net, 1408 struct nh_config *cfg) 1409 { 1410 struct nlattr *grps_attr = cfg->nh_grp; 1411 struct nexthop_grp *entry = nla_data(grps_attr); 1412 u16 num_nh = nla_len(grps_attr) / sizeof(*entry); 1413 struct nh_group *nhg; 1414 struct nexthop *nh; 1415 int i; 1416 1417 if (WARN_ON(!num_nh)) 1418 return ERR_PTR(-EINVAL); 1419 1420 nh = nexthop_alloc(); 1421 if (!nh) 1422 return ERR_PTR(-ENOMEM); 1423 1424 nh->is_group = 1; 1425 1426 nhg = nexthop_grp_alloc(num_nh); 1427 if (!nhg) { 1428 kfree(nh); 1429 return ERR_PTR(-ENOMEM); 1430 } 1431 1432 /* spare group used for removals */ 1433 nhg->spare = nexthop_grp_alloc(num_nh); 1434 if (!nhg->spare) { 1435 kfree(nhg); 1436 kfree(nh); 1437 return ERR_PTR(-ENOMEM); 1438 } 1439 nhg->spare->spare = nhg; 1440 1441 for (i = 0; i < nhg->num_nh; ++i) { 1442 struct nexthop *nhe; 1443 struct nh_info *nhi; 1444 1445 nhe = nexthop_find_by_id(net, entry[i].id); 1446 if (!nexthop_get(nhe)) 1447 goto out_no_nh; 1448 1449 nhi = rtnl_dereference(nhe->nh_info); 1450 if (nhi->family == AF_INET) 1451 nhg->has_v4 = true; 1452 1453 nhg->nh_entries[i].nh = nhe; 1454 nhg->nh_entries[i].weight = entry[i].weight + 1; 1455 list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); 1456 nhg->nh_entries[i].nh_parent = nh; 1457 } 1458 1459 if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { 1460 nhg->mpath = 1; 1461 nh_group_rebalance(nhg); 1462 } 1463 1464 if (cfg->nh_fdb) 1465 nhg->fdb_nh = 1; 1466 1467 rcu_assign_pointer(nh->nh_grp, nhg); 1468 1469 return nh; 1470 1471 out_no_nh: 1472 for (i--; i >= 0; --i) { 1473 list_del(&nhg->nh_entries[i].nh_list); 1474 nexthop_put(nhg->nh_entries[i].nh); 1475 } 1476 1477 kfree(nhg->spare); 1478 kfree(nhg); 1479 kfree(nh); 1480 1481 return ERR_PTR(-ENOENT); 1482 } 1483 1484 static int nh_create_ipv4(struct net *net, struct nexthop *nh, 1485 struct nh_info *nhi, struct nh_config *cfg, 1486 struct netlink_ext_ack *extack) 1487 { 1488 struct fib_nh *fib_nh = &nhi->fib_nh; 1489 struct fib_config fib_cfg = { 1490 .fc_oif = cfg->nh_ifindex, 1491 .fc_gw4 = cfg->gw.ipv4, 1492 .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0, 1493 .fc_flags = cfg->nh_flags, 1494 .fc_encap = cfg->nh_encap, 1495 .fc_encap_type = cfg->nh_encap_type, 1496 }; 1497 u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); 1498 int err; 1499 1500 err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); 1501 if (err) { 1502 fib_nh_release(net, fib_nh); 1503 goto out; 1504 } 1505 1506 if (nhi->fdb_nh) 1507 goto out; 1508 1509 /* sets nh_dev if successful */ 1510 err = fib_check_nh(net, fib_nh, tb_id, 0, extack); 1511 if (!err) { 1512 nh->nh_flags = fib_nh->fib_nh_flags; 1513 fib_info_update_nhc_saddr(net, &fib_nh->nh_common, 1514 fib_nh->fib_nh_scope); 1515 } else { 1516 fib_nh_release(net, fib_nh); 1517 } 1518 out: 1519 return err; 1520 } 1521 1522 static int nh_create_ipv6(struct net *net, struct nexthop *nh, 1523 struct nh_info *nhi, struct nh_config *cfg, 1524 struct netlink_ext_ack *extack) 1525 { 1526 struct fib6_nh *fib6_nh = &nhi->fib6_nh; 1527 struct fib6_config fib6_cfg = { 1528 .fc_table = l3mdev_fib_table(cfg->dev), 1529 .fc_ifindex = cfg->nh_ifindex, 1530 .fc_gateway = cfg->gw.ipv6, 1531 .fc_flags = cfg->nh_flags, 1532 .fc_encap = cfg->nh_encap, 1533 .fc_encap_type = cfg->nh_encap_type, 1534 .fc_is_fdb = cfg->nh_fdb, 1535 }; 1536 int err; 1537 1538 if (!ipv6_addr_any(&cfg->gw.ipv6)) 1539 fib6_cfg.fc_flags |= RTF_GATEWAY; 1540 1541 /* sets nh_dev if successful */ 1542 err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, 1543 extack); 1544 if (err) 1545 ipv6_stub->fib6_nh_release(fib6_nh); 1546 else 1547 nh->nh_flags = fib6_nh->fib_nh_flags; 1548 1549 return err; 1550 } 1551 1552 static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, 1553 struct netlink_ext_ack *extack) 1554 { 1555 struct nh_info *nhi; 1556 struct nexthop *nh; 1557 int err = 0; 1558 1559 nh = nexthop_alloc(); 1560 if (!nh) 1561 return ERR_PTR(-ENOMEM); 1562 1563 nhi = kzalloc(sizeof(*nhi), GFP_KERNEL); 1564 if (!nhi) { 1565 kfree(nh); 1566 return ERR_PTR(-ENOMEM); 1567 } 1568 1569 nh->nh_flags = cfg->nh_flags; 1570 nh->net = net; 1571 1572 nhi->nh_parent = nh; 1573 nhi->family = cfg->nh_family; 1574 nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; 1575 1576 if (cfg->nh_fdb) 1577 nhi->fdb_nh = 1; 1578 1579 if (cfg->nh_blackhole) { 1580 nhi->reject_nh = 1; 1581 cfg->nh_ifindex = net->loopback_dev->ifindex; 1582 } 1583 1584 switch (cfg->nh_family) { 1585 case AF_INET: 1586 err = nh_create_ipv4(net, nh, nhi, cfg, extack); 1587 break; 1588 case AF_INET6: 1589 err = nh_create_ipv6(net, nh, nhi, cfg, extack); 1590 break; 1591 } 1592 1593 if (err) { 1594 kfree(nhi); 1595 kfree(nh); 1596 return ERR_PTR(err); 1597 } 1598 1599 /* add the entry to the device based hash */ 1600 if (!nhi->fdb_nh) 1601 nexthop_devhash_add(net, nhi); 1602 1603 rcu_assign_pointer(nh->nh_info, nhi); 1604 1605 return nh; 1606 } 1607 1608 /* called with rtnl lock held */ 1609 static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg, 1610 struct netlink_ext_ack *extack) 1611 { 1612 struct nexthop *nh; 1613 int err; 1614 1615 if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) { 1616 NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); 1617 return ERR_PTR(-EINVAL); 1618 } 1619 1620 if (!cfg->nh_id) { 1621 cfg->nh_id = nh_find_unused_id(net); 1622 if (!cfg->nh_id) { 1623 NL_SET_ERR_MSG(extack, "No unused id"); 1624 return ERR_PTR(-EINVAL); 1625 } 1626 } 1627 1628 if (cfg->nh_grp) 1629 nh = nexthop_create_group(net, cfg); 1630 else 1631 nh = nexthop_create(net, cfg, extack); 1632 1633 if (IS_ERR(nh)) 1634 return nh; 1635 1636 refcount_set(&nh->refcnt, 1); 1637 nh->id = cfg->nh_id; 1638 nh->protocol = cfg->nh_protocol; 1639 nh->net = net; 1640 1641 err = insert_nexthop(net, nh, cfg, extack); 1642 if (err) { 1643 __remove_nexthop(net, nh, NULL); 1644 nexthop_put(nh); 1645 nh = ERR_PTR(err); 1646 } 1647 1648 return nh; 1649 } 1650 1651 static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, 1652 struct nlmsghdr *nlh, struct nh_config *cfg, 1653 struct netlink_ext_ack *extack) 1654 { 1655 struct nhmsg *nhm = nlmsg_data(nlh); 1656 struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; 1657 int err; 1658 1659 err = nlmsg_parse(nlh, sizeof(*nhm), tb, 1660 ARRAY_SIZE(rtm_nh_policy_new) - 1, 1661 rtm_nh_policy_new, extack); 1662 if (err < 0) 1663 return err; 1664 1665 err = -EINVAL; 1666 if (nhm->resvd || nhm->nh_scope) { 1667 NL_SET_ERR_MSG(extack, "Invalid values in ancillary header"); 1668 goto out; 1669 } 1670 if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) { 1671 NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header"); 1672 goto out; 1673 } 1674 1675 switch (nhm->nh_family) { 1676 case AF_INET: 1677 case AF_INET6: 1678 break; 1679 case AF_UNSPEC: 1680 if (tb[NHA_GROUP]) 1681 break; 1682 fallthrough; 1683 default: 1684 NL_SET_ERR_MSG(extack, "Invalid address family"); 1685 goto out; 1686 } 1687 1688 memset(cfg, 0, sizeof(*cfg)); 1689 cfg->nlflags = nlh->nlmsg_flags; 1690 cfg->nlinfo.portid = NETLINK_CB(skb).portid; 1691 cfg->nlinfo.nlh = nlh; 1692 cfg->nlinfo.nl_net = net; 1693 1694 cfg->nh_family = nhm->nh_family; 1695 cfg->nh_protocol = nhm->nh_protocol; 1696 cfg->nh_flags = nhm->nh_flags; 1697 1698 if (tb[NHA_ID]) 1699 cfg->nh_id = nla_get_u32(tb[NHA_ID]); 1700 1701 if (tb[NHA_FDB]) { 1702 if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] || 1703 tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { 1704 NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole"); 1705 goto out; 1706 } 1707 if (nhm->nh_flags) { 1708 NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header"); 1709 goto out; 1710 } 1711 cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); 1712 } 1713 1714 if (tb[NHA_GROUP]) { 1715 if (nhm->nh_family != AF_UNSPEC) { 1716 NL_SET_ERR_MSG(extack, "Invalid family for group"); 1717 goto out; 1718 } 1719 cfg->nh_grp = tb[NHA_GROUP]; 1720 1721 cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH; 1722 if (tb[NHA_GROUP_TYPE]) 1723 cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]); 1724 1725 if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) { 1726 NL_SET_ERR_MSG(extack, "Invalid group type"); 1727 goto out; 1728 } 1729 err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack); 1730 1731 /* no other attributes should be set */ 1732 goto out; 1733 } 1734 1735 if (tb[NHA_BLACKHOLE]) { 1736 if (tb[NHA_GATEWAY] || tb[NHA_OIF] || 1737 tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) { 1738 NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb"); 1739 goto out; 1740 } 1741 1742 cfg->nh_blackhole = 1; 1743 err = 0; 1744 goto out; 1745 } 1746 1747 if (!cfg->nh_fdb && !tb[NHA_OIF]) { 1748 NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); 1749 goto out; 1750 } 1751 1752 if (!cfg->nh_fdb && tb[NHA_OIF]) { 1753 cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); 1754 if (cfg->nh_ifindex) 1755 cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); 1756 1757 if (!cfg->dev) { 1758 NL_SET_ERR_MSG(extack, "Invalid device index"); 1759 goto out; 1760 } else if (!(cfg->dev->flags & IFF_UP)) { 1761 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 1762 err = -ENETDOWN; 1763 goto out; 1764 } else if (!netif_carrier_ok(cfg->dev)) { 1765 NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); 1766 err = -ENETDOWN; 1767 goto out; 1768 } 1769 } 1770 1771 err = -EINVAL; 1772 if (tb[NHA_GATEWAY]) { 1773 struct nlattr *gwa = tb[NHA_GATEWAY]; 1774 1775 switch (cfg->nh_family) { 1776 case AF_INET: 1777 if (nla_len(gwa) != sizeof(u32)) { 1778 NL_SET_ERR_MSG(extack, "Invalid gateway"); 1779 goto out; 1780 } 1781 cfg->gw.ipv4 = nla_get_be32(gwa); 1782 break; 1783 case AF_INET6: 1784 if (nla_len(gwa) != sizeof(struct in6_addr)) { 1785 NL_SET_ERR_MSG(extack, "Invalid gateway"); 1786 goto out; 1787 } 1788 cfg->gw.ipv6 = nla_get_in6_addr(gwa); 1789 break; 1790 default: 1791 NL_SET_ERR_MSG(extack, 1792 "Unknown address family for gateway"); 1793 goto out; 1794 } 1795 } else { 1796 /* device only nexthop (no gateway) */ 1797 if (cfg->nh_flags & RTNH_F_ONLINK) { 1798 NL_SET_ERR_MSG(extack, 1799 "ONLINK flag can not be set for nexthop without a gateway"); 1800 goto out; 1801 } 1802 } 1803 1804 if (tb[NHA_ENCAP]) { 1805 cfg->nh_encap = tb[NHA_ENCAP]; 1806 1807 if (!tb[NHA_ENCAP_TYPE]) { 1808 NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing"); 1809 goto out; 1810 } 1811 1812 cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); 1813 err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack); 1814 if (err < 0) 1815 goto out; 1816 1817 } else if (tb[NHA_ENCAP_TYPE]) { 1818 NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing"); 1819 goto out; 1820 } 1821 1822 1823 err = 0; 1824 out: 1825 return err; 1826 } 1827 1828 /* rtnl */ 1829 static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 1830 struct netlink_ext_ack *extack) 1831 { 1832 struct net *net = sock_net(skb->sk); 1833 struct nh_config cfg; 1834 struct nexthop *nh; 1835 int err; 1836 1837 err = rtm_to_nh_config(net, skb, nlh, &cfg, extack); 1838 if (!err) { 1839 nh = nexthop_add(net, &cfg, extack); 1840 if (IS_ERR(nh)) 1841 err = PTR_ERR(nh); 1842 } 1843 1844 return err; 1845 } 1846 1847 static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id, 1848 struct netlink_ext_ack *extack) 1849 { 1850 struct nhmsg *nhm = nlmsg_data(nlh); 1851 struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; 1852 int err; 1853 1854 err = nlmsg_parse(nlh, sizeof(*nhm), tb, 1855 ARRAY_SIZE(rtm_nh_policy_get) - 1, 1856 rtm_nh_policy_get, extack); 1857 if (err < 0) 1858 return err; 1859 1860 err = -EINVAL; 1861 if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 1862 NL_SET_ERR_MSG(extack, "Invalid values in header"); 1863 goto out; 1864 } 1865 1866 if (!tb[NHA_ID]) { 1867 NL_SET_ERR_MSG(extack, "Nexthop id is missing"); 1868 goto out; 1869 } 1870 1871 *id = nla_get_u32(tb[NHA_ID]); 1872 if (!(*id)) 1873 NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 1874 else 1875 err = 0; 1876 out: 1877 return err; 1878 } 1879 1880 /* rtnl */ 1881 static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 1882 struct netlink_ext_ack *extack) 1883 { 1884 struct net *net = sock_net(skb->sk); 1885 struct nl_info nlinfo = { 1886 .nlh = nlh, 1887 .nl_net = net, 1888 .portid = NETLINK_CB(skb).portid, 1889 }; 1890 struct nexthop *nh; 1891 int err; 1892 u32 id; 1893 1894 err = nh_valid_get_del_req(nlh, &id, extack); 1895 if (err) 1896 return err; 1897 1898 nh = nexthop_find_by_id(net, id); 1899 if (!nh) 1900 return -ENOENT; 1901 1902 remove_nexthop(net, nh, &nlinfo); 1903 1904 return 0; 1905 } 1906 1907 /* rtnl */ 1908 static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, 1909 struct netlink_ext_ack *extack) 1910 { 1911 struct net *net = sock_net(in_skb->sk); 1912 struct sk_buff *skb = NULL; 1913 struct nexthop *nh; 1914 int err; 1915 u32 id; 1916 1917 err = nh_valid_get_del_req(nlh, &id, extack); 1918 if (err) 1919 return err; 1920 1921 err = -ENOBUFS; 1922 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1923 if (!skb) 1924 goto out; 1925 1926 err = -ENOENT; 1927 nh = nexthop_find_by_id(net, id); 1928 if (!nh) 1929 goto errout_free; 1930 1931 err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, 1932 nlh->nlmsg_seq, 0); 1933 if (err < 0) { 1934 WARN_ON(err == -EMSGSIZE); 1935 goto errout_free; 1936 } 1937 1938 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1939 out: 1940 return err; 1941 errout_free: 1942 kfree_skb(skb); 1943 goto out; 1944 } 1945 1946 static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, 1947 bool group_filter, u8 family) 1948 { 1949 const struct net_device *dev; 1950 const struct nh_info *nhi; 1951 1952 if (group_filter && !nh->is_group) 1953 return true; 1954 1955 if (!dev_idx && !master_idx && !family) 1956 return false; 1957 1958 if (nh->is_group) 1959 return true; 1960 1961 nhi = rtnl_dereference(nh->nh_info); 1962 if (family && nhi->family != family) 1963 return true; 1964 1965 dev = nhi->fib_nhc.nhc_dev; 1966 if (dev_idx && (!dev || dev->ifindex != dev_idx)) 1967 return true; 1968 1969 if (master_idx) { 1970 struct net_device *master; 1971 1972 if (!dev) 1973 return true; 1974 1975 master = netdev_master_upper_dev_get((struct net_device *)dev); 1976 if (!master || master->ifindex != master_idx) 1977 return true; 1978 } 1979 1980 return false; 1981 } 1982 1983 static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, 1984 int *master_idx, bool *group_filter, 1985 bool *fdb_filter, struct netlink_callback *cb) 1986 { 1987 struct netlink_ext_ack *extack = cb->extack; 1988 struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; 1989 struct nhmsg *nhm; 1990 int err; 1991 u32 idx; 1992 1993 err = nlmsg_parse(nlh, sizeof(*nhm), tb, 1994 ARRAY_SIZE(rtm_nh_policy_dump) - 1, 1995 rtm_nh_policy_dump, NULL); 1996 if (err < 0) 1997 return err; 1998 1999 if (tb[NHA_OIF]) { 2000 idx = nla_get_u32(tb[NHA_OIF]); 2001 if (idx > INT_MAX) { 2002 NL_SET_ERR_MSG(extack, "Invalid device index"); 2003 return -EINVAL; 2004 } 2005 *dev_idx = idx; 2006 } 2007 if (tb[NHA_MASTER]) { 2008 idx = nla_get_u32(tb[NHA_MASTER]); 2009 if (idx > INT_MAX) { 2010 NL_SET_ERR_MSG(extack, "Invalid master device index"); 2011 return -EINVAL; 2012 } 2013 *master_idx = idx; 2014 } 2015 *group_filter = nla_get_flag(tb[NHA_GROUPS]); 2016 *fdb_filter = nla_get_flag(tb[NHA_FDB]); 2017 2018 nhm = nlmsg_data(nlh); 2019 if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 2020 NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request"); 2021 return -EINVAL; 2022 } 2023 2024 return 0; 2025 } 2026 2027 /* rtnl */ 2028 static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) 2029 { 2030 bool group_filter = false, fdb_filter = false; 2031 struct nhmsg *nhm = nlmsg_data(cb->nlh); 2032 int dev_filter_idx = 0, master_idx = 0; 2033 struct net *net = sock_net(skb->sk); 2034 struct rb_root *root = &net->nexthop.rb_root; 2035 struct rb_node *node; 2036 int idx = 0, s_idx; 2037 int err; 2038 2039 err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, 2040 &group_filter, &fdb_filter, cb); 2041 if (err < 0) 2042 return err; 2043 2044 s_idx = cb->args[0]; 2045 for (node = rb_first(root); node; node = rb_next(node)) { 2046 struct nexthop *nh; 2047 2048 if (idx < s_idx) 2049 goto cont; 2050 2051 nh = rb_entry(node, struct nexthop, rb_node); 2052 if (nh_dump_filtered(nh, dev_filter_idx, master_idx, 2053 group_filter, nhm->nh_family)) 2054 goto cont; 2055 2056 err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, 2057 NETLINK_CB(cb->skb).portid, 2058 cb->nlh->nlmsg_seq, NLM_F_MULTI); 2059 if (err < 0) { 2060 if (likely(skb->len)) 2061 goto out; 2062 2063 goto out_err; 2064 } 2065 cont: 2066 idx++; 2067 } 2068 2069 out: 2070 err = skb->len; 2071 out_err: 2072 cb->args[0] = idx; 2073 cb->seq = net->nexthop.seq; 2074 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 2075 2076 return err; 2077 } 2078 2079 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu) 2080 { 2081 unsigned int hash = nh_dev_hashfn(dev->ifindex); 2082 struct net *net = dev_net(dev); 2083 struct hlist_head *head = &net->nexthop.devhash[hash]; 2084 struct hlist_node *n; 2085 struct nh_info *nhi; 2086 2087 hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 2088 if (nhi->fib_nhc.nhc_dev == dev) { 2089 if (nhi->family == AF_INET) 2090 fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu, 2091 orig_mtu); 2092 } 2093 } 2094 } 2095 2096 /* rtnl */ 2097 static int nh_netdev_event(struct notifier_block *this, 2098 unsigned long event, void *ptr) 2099 { 2100 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2101 struct netdev_notifier_info_ext *info_ext; 2102 2103 switch (event) { 2104 case NETDEV_DOWN: 2105 case NETDEV_UNREGISTER: 2106 nexthop_flush_dev(dev); 2107 break; 2108 case NETDEV_CHANGE: 2109 if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) 2110 nexthop_flush_dev(dev); 2111 break; 2112 case NETDEV_CHANGEMTU: 2113 info_ext = ptr; 2114 nexthop_sync_mtu(dev, info_ext->ext.mtu); 2115 rt_cache_flush(dev_net(dev)); 2116 break; 2117 } 2118 return NOTIFY_DONE; 2119 } 2120 2121 static struct notifier_block nh_netdev_notifier = { 2122 .notifier_call = nh_netdev_event, 2123 }; 2124 2125 static int nexthops_dump(struct net *net, struct notifier_block *nb, 2126 struct netlink_ext_ack *extack) 2127 { 2128 struct rb_root *root = &net->nexthop.rb_root; 2129 struct rb_node *node; 2130 int err = 0; 2131 2132 for (node = rb_first(root); node; node = rb_next(node)) { 2133 struct nexthop *nh; 2134 2135 nh = rb_entry(node, struct nexthop, rb_node); 2136 err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh, 2137 extack); 2138 if (err) 2139 break; 2140 } 2141 2142 return err; 2143 } 2144 2145 int register_nexthop_notifier(struct net *net, struct notifier_block *nb, 2146 struct netlink_ext_ack *extack) 2147 { 2148 int err; 2149 2150 rtnl_lock(); 2151 err = nexthops_dump(net, nb, extack); 2152 if (err) 2153 goto unlock; 2154 err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, 2155 nb); 2156 unlock: 2157 rtnl_unlock(); 2158 return err; 2159 } 2160 EXPORT_SYMBOL(register_nexthop_notifier); 2161 2162 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) 2163 { 2164 return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, 2165 nb); 2166 } 2167 EXPORT_SYMBOL(unregister_nexthop_notifier); 2168 2169 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap) 2170 { 2171 struct nexthop *nexthop; 2172 2173 rcu_read_lock(); 2174 2175 nexthop = nexthop_find_by_id(net, id); 2176 if (!nexthop) 2177 goto out; 2178 2179 nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); 2180 if (offload) 2181 nexthop->nh_flags |= RTNH_F_OFFLOAD; 2182 if (trap) 2183 nexthop->nh_flags |= RTNH_F_TRAP; 2184 2185 out: 2186 rcu_read_unlock(); 2187 } 2188 EXPORT_SYMBOL(nexthop_set_hw_flags); 2189 2190 static void __net_exit nexthop_net_exit(struct net *net) 2191 { 2192 rtnl_lock(); 2193 flush_all_nexthops(net); 2194 rtnl_unlock(); 2195 kfree(net->nexthop.devhash); 2196 } 2197 2198 static int __net_init nexthop_net_init(struct net *net) 2199 { 2200 size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE; 2201 2202 net->nexthop.rb_root = RB_ROOT; 2203 net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); 2204 if (!net->nexthop.devhash) 2205 return -ENOMEM; 2206 BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain); 2207 2208 return 0; 2209 } 2210 2211 static struct pernet_operations nexthop_net_ops = { 2212 .init = nexthop_net_init, 2213 .exit = nexthop_net_exit, 2214 }; 2215 2216 static int __init nexthop_init(void) 2217 { 2218 register_pernet_subsys(&nexthop_net_ops); 2219 2220 register_netdevice_notifier(&nh_netdev_notifier); 2221 2222 rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 2223 rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); 2224 rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, 2225 rtm_dump_nexthop, 0); 2226 2227 rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 2228 rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 2229 2230 rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 2231 rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 2232 2233 return 0; 2234 } 2235 subsys_initcall(nexthop_init); 2236