1 // SPDX-License-Identifier: GPL-2.0 2 /* Generic nexthop implementation 3 * 4 * Copyright (c) 2017-19 Cumulus Networks 5 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 6 */ 7 8 #include <linux/nexthop.h> 9 #include <linux/rtnetlink.h> 10 #include <linux/slab.h> 11 #include <net/arp.h> 12 #include <net/ipv6_stubs.h> 13 #include <net/lwtunnel.h> 14 #include <net/ndisc.h> 15 #include <net/nexthop.h> 16 #include <net/route.h> 17 #include <net/sock.h> 18 19 static void remove_nexthop(struct net *net, struct nexthop *nh, 20 struct nl_info *nlinfo); 21 22 #define NH_DEV_HASHBITS 8 23 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) 24 25 static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { 26 [NHA_UNSPEC] = { .strict_start_type = NHA_UNSPEC + 1 }, 27 [NHA_ID] = { .type = NLA_U32 }, 28 [NHA_GROUP] = { .type = NLA_BINARY }, 29 [NHA_GROUP_TYPE] = { .type = NLA_U16 }, 30 [NHA_BLACKHOLE] = { .type = NLA_FLAG }, 31 [NHA_OIF] = { .type = NLA_U32 }, 32 [NHA_GATEWAY] = { .type = NLA_BINARY }, 33 [NHA_ENCAP_TYPE] = { .type = NLA_U16 }, 34 [NHA_ENCAP] = { .type = NLA_NESTED }, 35 [NHA_GROUPS] = { .type = NLA_FLAG }, 36 [NHA_MASTER] = { .type = NLA_U32 }, 37 }; 38 39 static unsigned int nh_dev_hashfn(unsigned int val) 40 { 41 unsigned int mask = NH_DEV_HASHSIZE - 1; 42 43 return (val ^ 44 (val >> NH_DEV_HASHBITS) ^ 45 (val >> (NH_DEV_HASHBITS * 2))) & mask; 46 } 47 48 static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) 49 { 50 struct net_device *dev = nhi->fib_nhc.nhc_dev; 51 struct hlist_head *head; 52 unsigned int hash; 53 54 WARN_ON(!dev); 55 56 hash = nh_dev_hashfn(dev->ifindex); 57 head = &net->nexthop.devhash[hash]; 58 hlist_add_head(&nhi->dev_hash, head); 59 } 60 61 static void nexthop_free_mpath(struct nexthop *nh) 62 { 63 struct nh_group *nhg; 64 int i; 65 66 nhg = rcu_dereference_raw(nh->nh_grp); 67 for (i = 0; i < nhg->num_nh; ++i) 68 WARN_ON(nhg->nh_entries[i].nh); 69 70 kfree(nhg); 71 } 72 73 static void nexthop_free_single(struct nexthop *nh) 74 { 75 struct nh_info *nhi; 76 77 nhi = rcu_dereference_raw(nh->nh_info); 78 switch (nhi->family) { 79 case AF_INET: 80 fib_nh_release(nh->net, &nhi->fib_nh); 81 break; 82 case AF_INET6: 83 ipv6_stub->fib6_nh_release(&nhi->fib6_nh); 84 break; 85 } 86 kfree(nhi); 87 } 88 89 void nexthop_free_rcu(struct rcu_head *head) 90 { 91 struct nexthop *nh = container_of(head, struct nexthop, rcu); 92 93 if (nh->is_group) 94 nexthop_free_mpath(nh); 95 else 96 nexthop_free_single(nh); 97 98 kfree(nh); 99 } 100 EXPORT_SYMBOL_GPL(nexthop_free_rcu); 101 102 static struct nexthop *nexthop_alloc(void) 103 { 104 struct nexthop *nh; 105 106 nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); 107 if (nh) { 108 INIT_LIST_HEAD(&nh->grp_list); 109 } 110 return nh; 111 } 112 113 static struct nh_group *nexthop_grp_alloc(u16 num_nh) 114 { 115 size_t sz = offsetof(struct nexthop, nh_grp) 116 + sizeof(struct nh_group) 117 + sizeof(struct nh_grp_entry) * num_nh; 118 struct nh_group *nhg; 119 120 nhg = kzalloc(sz, GFP_KERNEL); 121 if (nhg) 122 nhg->num_nh = num_nh; 123 124 return nhg; 125 } 126 127 static void nh_base_seq_inc(struct net *net) 128 { 129 while (++net->nexthop.seq == 0) 130 ; 131 } 132 133 /* no reference taken; rcu lock or rtnl must be held */ 134 struct nexthop *nexthop_find_by_id(struct net *net, u32 id) 135 { 136 struct rb_node **pp, *parent = NULL, *next; 137 138 pp = &net->nexthop.rb_root.rb_node; 139 while (1) { 140 struct nexthop *nh; 141 142 next = rcu_dereference_raw(*pp); 143 if (!next) 144 break; 145 parent = next; 146 147 nh = rb_entry(parent, struct nexthop, rb_node); 148 if (id < nh->id) 149 pp = &next->rb_left; 150 else if (id > nh->id) 151 pp = &next->rb_right; 152 else 153 return nh; 154 } 155 return NULL; 156 } 157 EXPORT_SYMBOL_GPL(nexthop_find_by_id); 158 159 /* used for auto id allocation; called with rtnl held */ 160 static u32 nh_find_unused_id(struct net *net) 161 { 162 u32 id_start = net->nexthop.last_id_allocated; 163 164 while (1) { 165 net->nexthop.last_id_allocated++; 166 if (net->nexthop.last_id_allocated == id_start) 167 break; 168 169 if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated)) 170 return net->nexthop.last_id_allocated; 171 } 172 return 0; 173 } 174 175 static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) 176 { 177 struct nexthop_grp *p; 178 size_t len = nhg->num_nh * sizeof(*p); 179 struct nlattr *nla; 180 u16 group_type = 0; 181 int i; 182 183 if (nhg->mpath) 184 group_type = NEXTHOP_GRP_TYPE_MPATH; 185 186 if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type)) 187 goto nla_put_failure; 188 189 nla = nla_reserve(skb, NHA_GROUP, len); 190 if (!nla) 191 goto nla_put_failure; 192 193 p = nla_data(nla); 194 for (i = 0; i < nhg->num_nh; ++i) { 195 p->id = nhg->nh_entries[i].nh->id; 196 p->weight = nhg->nh_entries[i].weight - 1; 197 p += 1; 198 } 199 200 return 0; 201 202 nla_put_failure: 203 return -EMSGSIZE; 204 } 205 206 static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, 207 int event, u32 portid, u32 seq, unsigned int nlflags) 208 { 209 struct fib6_nh *fib6_nh; 210 struct fib_nh *fib_nh; 211 struct nlmsghdr *nlh; 212 struct nh_info *nhi; 213 struct nhmsg *nhm; 214 215 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags); 216 if (!nlh) 217 return -EMSGSIZE; 218 219 nhm = nlmsg_data(nlh); 220 nhm->nh_family = AF_UNSPEC; 221 nhm->nh_flags = nh->nh_flags; 222 nhm->nh_protocol = nh->protocol; 223 nhm->nh_scope = 0; 224 nhm->resvd = 0; 225 226 if (nla_put_u32(skb, NHA_ID, nh->id)) 227 goto nla_put_failure; 228 229 if (nh->is_group) { 230 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 231 232 if (nla_put_nh_group(skb, nhg)) 233 goto nla_put_failure; 234 goto out; 235 } 236 237 nhi = rtnl_dereference(nh->nh_info); 238 nhm->nh_family = nhi->family; 239 if (nhi->reject_nh) { 240 if (nla_put_flag(skb, NHA_BLACKHOLE)) 241 goto nla_put_failure; 242 goto out; 243 } else { 244 const struct net_device *dev; 245 246 dev = nhi->fib_nhc.nhc_dev; 247 if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex)) 248 goto nla_put_failure; 249 } 250 251 nhm->nh_scope = nhi->fib_nhc.nhc_scope; 252 switch (nhi->family) { 253 case AF_INET: 254 fib_nh = &nhi->fib_nh; 255 if (fib_nh->fib_nh_gw_family && 256 nla_put_u32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4)) 257 goto nla_put_failure; 258 break; 259 260 case AF_INET6: 261 fib6_nh = &nhi->fib6_nh; 262 if (fib6_nh->fib_nh_gw_family && 263 nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6)) 264 goto nla_put_failure; 265 break; 266 } 267 268 if (nhi->fib_nhc.nhc_lwtstate && 269 lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate, 270 NHA_ENCAP, NHA_ENCAP_TYPE) < 0) 271 goto nla_put_failure; 272 273 out: 274 nlmsg_end(skb, nlh); 275 return 0; 276 277 nla_put_failure: 278 return -EMSGSIZE; 279 } 280 281 static size_t nh_nlmsg_size_grp(struct nexthop *nh) 282 { 283 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 284 size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh; 285 286 return nla_total_size(sz) + 287 nla_total_size(2); /* NHA_GROUP_TYPE */ 288 } 289 290 static size_t nh_nlmsg_size_single(struct nexthop *nh) 291 { 292 struct nh_info *nhi = rtnl_dereference(nh->nh_info); 293 size_t sz; 294 295 /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE 296 * are mutually exclusive 297 */ 298 sz = nla_total_size(4); /* NHA_OIF */ 299 300 switch (nhi->family) { 301 case AF_INET: 302 if (nhi->fib_nh.fib_nh_gw_family) 303 sz += nla_total_size(4); /* NHA_GATEWAY */ 304 break; 305 306 case AF_INET6: 307 /* NHA_GATEWAY */ 308 if (nhi->fib6_nh.fib_nh_gw_family) 309 sz += nla_total_size(sizeof(const struct in6_addr)); 310 break; 311 } 312 313 if (nhi->fib_nhc.nhc_lwtstate) { 314 sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate); 315 sz += nla_total_size(2); /* NHA_ENCAP_TYPE */ 316 } 317 318 return sz; 319 } 320 321 static size_t nh_nlmsg_size(struct nexthop *nh) 322 { 323 size_t sz = nla_total_size(4); /* NHA_ID */ 324 325 if (nh->is_group) 326 sz += nh_nlmsg_size_grp(nh); 327 else 328 sz += nh_nlmsg_size_single(nh); 329 330 return sz; 331 } 332 333 static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) 334 { 335 unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0; 336 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 337 struct sk_buff *skb; 338 int err = -ENOBUFS; 339 340 skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any()); 341 if (!skb) 342 goto errout; 343 344 err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); 345 if (err < 0) { 346 /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ 347 WARN_ON(err == -EMSGSIZE); 348 kfree_skb(skb); 349 goto errout; 350 } 351 352 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP, 353 info->nlh, gfp_any()); 354 return; 355 errout: 356 if (err < 0) 357 rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); 358 } 359 360 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, 361 struct netlink_ext_ack *extack) 362 { 363 if (nh->is_group) { 364 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 365 366 /* nested multipath (group within a group) is not 367 * supported 368 */ 369 if (nhg->mpath) { 370 NL_SET_ERR_MSG(extack, 371 "Multipath group can not be a nexthop within a group"); 372 return false; 373 } 374 } else { 375 struct nh_info *nhi = rtnl_dereference(nh->nh_info); 376 377 if (nhi->reject_nh && npaths > 1) { 378 NL_SET_ERR_MSG(extack, 379 "Blackhole nexthop can not be used in a group with more than 1 path"); 380 return false; 381 } 382 } 383 384 return true; 385 } 386 387 static int nh_check_attr_group(struct net *net, struct nlattr *tb[], 388 struct netlink_ext_ack *extack) 389 { 390 unsigned int len = nla_len(tb[NHA_GROUP]); 391 struct nexthop_grp *nhg; 392 unsigned int i, j; 393 394 if (len & (sizeof(struct nexthop_grp) - 1)) { 395 NL_SET_ERR_MSG(extack, 396 "Invalid length for nexthop group attribute"); 397 return -EINVAL; 398 } 399 400 /* convert len to number of nexthop ids */ 401 len /= sizeof(*nhg); 402 403 nhg = nla_data(tb[NHA_GROUP]); 404 for (i = 0; i < len; ++i) { 405 if (nhg[i].resvd1 || nhg[i].resvd2) { 406 NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0"); 407 return -EINVAL; 408 } 409 if (nhg[i].weight > 254) { 410 NL_SET_ERR_MSG(extack, "Invalid value for weight"); 411 return -EINVAL; 412 } 413 for (j = i + 1; j < len; ++j) { 414 if (nhg[i].id == nhg[j].id) { 415 NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group"); 416 return -EINVAL; 417 } 418 } 419 } 420 421 nhg = nla_data(tb[NHA_GROUP]); 422 for (i = 0; i < len; ++i) { 423 struct nexthop *nh; 424 425 nh = nexthop_find_by_id(net, nhg[i].id); 426 if (!nh) { 427 NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 428 return -EINVAL; 429 } 430 if (!valid_group_nh(nh, len, extack)) 431 return -EINVAL; 432 } 433 for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { 434 if (!tb[i]) 435 continue; 436 437 NL_SET_ERR_MSG(extack, 438 "No other attributes can be set in nexthop groups"); 439 return -EINVAL; 440 } 441 442 return 0; 443 } 444 445 static bool ipv6_good_nh(const struct fib6_nh *nh) 446 { 447 int state = NUD_REACHABLE; 448 struct neighbour *n; 449 450 rcu_read_lock_bh(); 451 452 n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); 453 if (n) 454 state = n->nud_state; 455 456 rcu_read_unlock_bh(); 457 458 return !!(state & NUD_VALID); 459 } 460 461 static bool ipv4_good_nh(const struct fib_nh *nh) 462 { 463 int state = NUD_REACHABLE; 464 struct neighbour *n; 465 466 rcu_read_lock_bh(); 467 468 n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 469 (__force u32)nh->fib_nh_gw4); 470 if (n) 471 state = n->nud_state; 472 473 rcu_read_unlock_bh(); 474 475 return !!(state & NUD_VALID); 476 } 477 478 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) 479 { 480 struct nexthop *rc = NULL; 481 struct nh_group *nhg; 482 int i; 483 484 if (!nh->is_group) 485 return nh; 486 487 nhg = rcu_dereference(nh->nh_grp); 488 for (i = 0; i < nhg->num_nh; ++i) { 489 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 490 struct nh_info *nhi; 491 492 if (hash > atomic_read(&nhge->upper_bound)) 493 continue; 494 495 /* nexthops always check if it is good and does 496 * not rely on a sysctl for this behavior 497 */ 498 nhi = rcu_dereference(nhge->nh->nh_info); 499 switch (nhi->family) { 500 case AF_INET: 501 if (ipv4_good_nh(&nhi->fib_nh)) 502 return nhge->nh; 503 break; 504 case AF_INET6: 505 if (ipv6_good_nh(&nhi->fib6_nh)) 506 return nhge->nh; 507 break; 508 } 509 510 if (!rc) 511 rc = nhge->nh; 512 } 513 514 return rc; 515 } 516 EXPORT_SYMBOL_GPL(nexthop_select_path); 517 518 static void nh_group_rebalance(struct nh_group *nhg) 519 { 520 int total = 0; 521 int w = 0; 522 int i; 523 524 for (i = 0; i < nhg->num_nh; ++i) 525 total += nhg->nh_entries[i].weight; 526 527 for (i = 0; i < nhg->num_nh; ++i) { 528 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 529 int upper_bound; 530 531 w += nhge->weight; 532 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; 533 atomic_set(&nhge->upper_bound, upper_bound); 534 } 535 } 536 537 static void remove_nh_grp_entry(struct nh_grp_entry *nhge, 538 struct nh_group *nhg, 539 struct nl_info *nlinfo) 540 { 541 struct nexthop *nh = nhge->nh; 542 struct nh_grp_entry *nhges; 543 bool found = false; 544 int i; 545 546 WARN_ON(!nh); 547 548 nhges = nhg->nh_entries; 549 for (i = 0; i < nhg->num_nh; ++i) { 550 if (found) { 551 nhges[i-1].nh = nhges[i].nh; 552 nhges[i-1].weight = nhges[i].weight; 553 list_del(&nhges[i].nh_list); 554 list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list); 555 } else if (nhg->nh_entries[i].nh == nh) { 556 found = true; 557 } 558 } 559 560 if (WARN_ON(!found)) 561 return; 562 563 nhg->num_nh--; 564 nhg->nh_entries[nhg->num_nh].nh = NULL; 565 566 nh_group_rebalance(nhg); 567 568 nexthop_put(nh); 569 570 if (nlinfo) 571 nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo); 572 } 573 574 static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, 575 struct nl_info *nlinfo) 576 { 577 struct nh_grp_entry *nhge, *tmp; 578 579 list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) { 580 struct nh_group *nhg; 581 582 list_del(&nhge->nh_list); 583 nhg = rtnl_dereference(nhge->nh_parent->nh_grp); 584 remove_nh_grp_entry(nhge, nhg, nlinfo); 585 586 /* if this group has no more entries then remove it */ 587 if (!nhg->num_nh) 588 remove_nexthop(net, nhge->nh_parent, nlinfo); 589 } 590 } 591 592 static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) 593 { 594 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); 595 int i, num_nh = nhg->num_nh; 596 597 for (i = 0; i < num_nh; ++i) { 598 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 599 600 if (WARN_ON(!nhge->nh)) 601 continue; 602 603 list_del(&nhge->nh_list); 604 nexthop_put(nhge->nh); 605 nhge->nh = NULL; 606 nhg->num_nh--; 607 } 608 } 609 610 static void __remove_nexthop(struct net *net, struct nexthop *nh, 611 struct nl_info *nlinfo) 612 { 613 if (nh->is_group) { 614 remove_nexthop_group(nh, nlinfo); 615 } else { 616 struct nh_info *nhi; 617 618 nhi = rtnl_dereference(nh->nh_info); 619 if (nhi->fib_nhc.nhc_dev) 620 hlist_del(&nhi->dev_hash); 621 622 remove_nexthop_from_groups(net, nh, nlinfo); 623 } 624 } 625 626 static void remove_nexthop(struct net *net, struct nexthop *nh, 627 struct nl_info *nlinfo) 628 { 629 /* remove from the tree */ 630 rb_erase(&nh->rb_node, &net->nexthop.rb_root); 631 632 if (nlinfo) 633 nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo); 634 635 __remove_nexthop(net, nh, nlinfo); 636 nh_base_seq_inc(net); 637 638 nexthop_put(nh); 639 } 640 641 static int replace_nexthop(struct net *net, struct nexthop *old, 642 struct nexthop *new, struct netlink_ext_ack *extack) 643 { 644 return -EEXIST; 645 } 646 647 /* called with rtnl_lock held */ 648 static int insert_nexthop(struct net *net, struct nexthop *new_nh, 649 struct nh_config *cfg, struct netlink_ext_ack *extack) 650 { 651 struct rb_node **pp, *parent = NULL, *next; 652 struct rb_root *root = &net->nexthop.rb_root; 653 bool replace = !!(cfg->nlflags & NLM_F_REPLACE); 654 bool create = !!(cfg->nlflags & NLM_F_CREATE); 655 u32 new_id = new_nh->id; 656 int rc = -EEXIST; 657 658 pp = &root->rb_node; 659 while (1) { 660 struct nexthop *nh; 661 662 next = rtnl_dereference(*pp); 663 if (!next) 664 break; 665 666 parent = next; 667 668 nh = rb_entry(parent, struct nexthop, rb_node); 669 if (new_id < nh->id) { 670 pp = &next->rb_left; 671 } else if (new_id > nh->id) { 672 pp = &next->rb_right; 673 } else if (replace) { 674 rc = replace_nexthop(net, nh, new_nh, extack); 675 if (!rc) 676 new_nh = nh; /* send notification with old nh */ 677 goto out; 678 } else { 679 /* id already exists and not a replace */ 680 goto out; 681 } 682 } 683 684 if (replace && !create) { 685 NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists"); 686 rc = -ENOENT; 687 goto out; 688 } 689 690 rb_link_node_rcu(&new_nh->rb_node, parent, pp); 691 rb_insert_color(&new_nh->rb_node, root); 692 rc = 0; 693 out: 694 if (!rc) { 695 nh_base_seq_inc(net); 696 nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); 697 } 698 699 return rc; 700 } 701 702 /* rtnl */ 703 /* remove all nexthops tied to a device being deleted */ 704 static void nexthop_flush_dev(struct net_device *dev) 705 { 706 unsigned int hash = nh_dev_hashfn(dev->ifindex); 707 struct net *net = dev_net(dev); 708 struct hlist_head *head = &net->nexthop.devhash[hash]; 709 struct hlist_node *n; 710 struct nh_info *nhi; 711 712 hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 713 if (nhi->fib_nhc.nhc_dev != dev) 714 continue; 715 716 remove_nexthop(net, nhi->nh_parent, NULL); 717 } 718 } 719 720 /* rtnl; called when net namespace is deleted */ 721 static void flush_all_nexthops(struct net *net) 722 { 723 struct rb_root *root = &net->nexthop.rb_root; 724 struct rb_node *node; 725 struct nexthop *nh; 726 727 while ((node = rb_first(root))) { 728 nh = rb_entry(node, struct nexthop, rb_node); 729 remove_nexthop(net, nh, NULL); 730 cond_resched(); 731 } 732 } 733 734 static struct nexthop *nexthop_create_group(struct net *net, 735 struct nh_config *cfg) 736 { 737 struct nlattr *grps_attr = cfg->nh_grp; 738 struct nexthop_grp *entry = nla_data(grps_attr); 739 struct nh_group *nhg; 740 struct nexthop *nh; 741 int i; 742 743 nh = nexthop_alloc(); 744 if (!nh) 745 return ERR_PTR(-ENOMEM); 746 747 nh->is_group = 1; 748 749 nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry)); 750 if (!nhg) { 751 kfree(nh); 752 return ERR_PTR(-ENOMEM); 753 } 754 755 for (i = 0; i < nhg->num_nh; ++i) { 756 struct nexthop *nhe; 757 struct nh_info *nhi; 758 759 nhe = nexthop_find_by_id(net, entry[i].id); 760 if (!nexthop_get(nhe)) 761 goto out_no_nh; 762 763 nhi = rtnl_dereference(nhe->nh_info); 764 if (nhi->family == AF_INET) 765 nhg->has_v4 = true; 766 767 nhg->nh_entries[i].nh = nhe; 768 nhg->nh_entries[i].weight = entry[i].weight + 1; 769 list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); 770 nhg->nh_entries[i].nh_parent = nh; 771 } 772 773 if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { 774 nhg->mpath = 1; 775 nh_group_rebalance(nhg); 776 } 777 778 rcu_assign_pointer(nh->nh_grp, nhg); 779 780 return nh; 781 782 out_no_nh: 783 for (; i >= 0; --i) 784 nexthop_put(nhg->nh_entries[i].nh); 785 786 kfree(nhg); 787 kfree(nh); 788 789 return ERR_PTR(-ENOENT); 790 } 791 792 static int nh_create_ipv4(struct net *net, struct nexthop *nh, 793 struct nh_info *nhi, struct nh_config *cfg, 794 struct netlink_ext_ack *extack) 795 { 796 struct fib_nh *fib_nh = &nhi->fib_nh; 797 struct fib_config fib_cfg = { 798 .fc_oif = cfg->nh_ifindex, 799 .fc_gw4 = cfg->gw.ipv4, 800 .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0, 801 .fc_flags = cfg->nh_flags, 802 .fc_encap = cfg->nh_encap, 803 .fc_encap_type = cfg->nh_encap_type, 804 }; 805 u32 tb_id = l3mdev_fib_table(cfg->dev); 806 int err = -EINVAL; 807 808 err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); 809 if (err) { 810 fib_nh_release(net, fib_nh); 811 goto out; 812 } 813 814 /* sets nh_dev if successful */ 815 err = fib_check_nh(net, fib_nh, tb_id, 0, extack); 816 if (!err) { 817 nh->nh_flags = fib_nh->fib_nh_flags; 818 fib_info_update_nh_saddr(net, fib_nh, fib_nh->fib_nh_scope); 819 } else { 820 fib_nh_release(net, fib_nh); 821 } 822 out: 823 return err; 824 } 825 826 static int nh_create_ipv6(struct net *net, struct nexthop *nh, 827 struct nh_info *nhi, struct nh_config *cfg, 828 struct netlink_ext_ack *extack) 829 { 830 struct fib6_nh *fib6_nh = &nhi->fib6_nh; 831 struct fib6_config fib6_cfg = { 832 .fc_table = l3mdev_fib_table(cfg->dev), 833 .fc_ifindex = cfg->nh_ifindex, 834 .fc_gateway = cfg->gw.ipv6, 835 .fc_flags = cfg->nh_flags, 836 .fc_encap = cfg->nh_encap, 837 .fc_encap_type = cfg->nh_encap_type, 838 }; 839 int err; 840 841 if (!ipv6_addr_any(&cfg->gw.ipv6)) 842 fib6_cfg.fc_flags |= RTF_GATEWAY; 843 844 /* sets nh_dev if successful */ 845 err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, 846 extack); 847 if (err) 848 ipv6_stub->fib6_nh_release(fib6_nh); 849 else 850 nh->nh_flags = fib6_nh->fib_nh_flags; 851 852 return err; 853 } 854 855 static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, 856 struct netlink_ext_ack *extack) 857 { 858 struct nh_info *nhi; 859 struct nexthop *nh; 860 int err = 0; 861 862 nh = nexthop_alloc(); 863 if (!nh) 864 return ERR_PTR(-ENOMEM); 865 866 nhi = kzalloc(sizeof(*nhi), GFP_KERNEL); 867 if (!nhi) { 868 kfree(nh); 869 return ERR_PTR(-ENOMEM); 870 } 871 872 nh->nh_flags = cfg->nh_flags; 873 nh->net = net; 874 875 nhi->nh_parent = nh; 876 nhi->family = cfg->nh_family; 877 nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; 878 879 if (cfg->nh_blackhole) { 880 nhi->reject_nh = 1; 881 cfg->nh_ifindex = net->loopback_dev->ifindex; 882 } 883 884 switch (cfg->nh_family) { 885 case AF_INET: 886 err = nh_create_ipv4(net, nh, nhi, cfg, extack); 887 break; 888 case AF_INET6: 889 err = nh_create_ipv6(net, nh, nhi, cfg, extack); 890 break; 891 } 892 893 if (err) { 894 kfree(nhi); 895 kfree(nh); 896 return ERR_PTR(err); 897 } 898 899 /* add the entry to the device based hash */ 900 nexthop_devhash_add(net, nhi); 901 902 rcu_assign_pointer(nh->nh_info, nhi); 903 904 return nh; 905 } 906 907 /* called with rtnl lock held */ 908 static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg, 909 struct netlink_ext_ack *extack) 910 { 911 struct nexthop *nh; 912 int err; 913 914 if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) { 915 NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); 916 return ERR_PTR(-EINVAL); 917 } 918 919 if (!cfg->nh_id) { 920 cfg->nh_id = nh_find_unused_id(net); 921 if (!cfg->nh_id) { 922 NL_SET_ERR_MSG(extack, "No unused id"); 923 return ERR_PTR(-EINVAL); 924 } 925 } 926 927 if (cfg->nh_grp) 928 nh = nexthop_create_group(net, cfg); 929 else 930 nh = nexthop_create(net, cfg, extack); 931 932 if (IS_ERR(nh)) 933 return nh; 934 935 refcount_set(&nh->refcnt, 1); 936 nh->id = cfg->nh_id; 937 nh->protocol = cfg->nh_protocol; 938 nh->net = net; 939 940 err = insert_nexthop(net, nh, cfg, extack); 941 if (err) { 942 __remove_nexthop(net, nh, NULL); 943 nexthop_put(nh); 944 nh = ERR_PTR(err); 945 } 946 947 return nh; 948 } 949 950 static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, 951 struct nlmsghdr *nlh, struct nh_config *cfg, 952 struct netlink_ext_ack *extack) 953 { 954 struct nhmsg *nhm = nlmsg_data(nlh); 955 struct nlattr *tb[NHA_MAX + 1]; 956 int err; 957 958 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, 959 extack); 960 if (err < 0) 961 return err; 962 963 err = -EINVAL; 964 if (nhm->resvd || nhm->nh_scope) { 965 NL_SET_ERR_MSG(extack, "Invalid values in ancillary header"); 966 goto out; 967 } 968 if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) { 969 NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header"); 970 goto out; 971 } 972 973 switch (nhm->nh_family) { 974 case AF_INET: 975 case AF_INET6: 976 break; 977 case AF_UNSPEC: 978 if (tb[NHA_GROUP]) 979 break; 980 /* fallthrough */ 981 default: 982 NL_SET_ERR_MSG(extack, "Invalid address family"); 983 goto out; 984 } 985 986 if (tb[NHA_GROUPS] || tb[NHA_MASTER]) { 987 NL_SET_ERR_MSG(extack, "Invalid attributes in request"); 988 goto out; 989 } 990 991 memset(cfg, 0, sizeof(*cfg)); 992 cfg->nlflags = nlh->nlmsg_flags; 993 cfg->nlinfo.portid = NETLINK_CB(skb).portid; 994 cfg->nlinfo.nlh = nlh; 995 cfg->nlinfo.nl_net = net; 996 997 cfg->nh_family = nhm->nh_family; 998 cfg->nh_protocol = nhm->nh_protocol; 999 cfg->nh_flags = nhm->nh_flags; 1000 1001 if (tb[NHA_ID]) 1002 cfg->nh_id = nla_get_u32(tb[NHA_ID]); 1003 1004 if (tb[NHA_GROUP]) { 1005 if (nhm->nh_family != AF_UNSPEC) { 1006 NL_SET_ERR_MSG(extack, "Invalid family for group"); 1007 goto out; 1008 } 1009 cfg->nh_grp = tb[NHA_GROUP]; 1010 1011 cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH; 1012 if (tb[NHA_GROUP_TYPE]) 1013 cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]); 1014 1015 if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) { 1016 NL_SET_ERR_MSG(extack, "Invalid group type"); 1017 goto out; 1018 } 1019 err = nh_check_attr_group(net, tb, extack); 1020 1021 /* no other attributes should be set */ 1022 goto out; 1023 } 1024 1025 if (tb[NHA_BLACKHOLE]) { 1026 if (tb[NHA_GATEWAY] || tb[NHA_OIF] || 1027 tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { 1028 NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif"); 1029 goto out; 1030 } 1031 1032 cfg->nh_blackhole = 1; 1033 err = 0; 1034 goto out; 1035 } 1036 1037 if (!tb[NHA_OIF]) { 1038 NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops"); 1039 goto out; 1040 } 1041 1042 cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); 1043 if (cfg->nh_ifindex) 1044 cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); 1045 1046 if (!cfg->dev) { 1047 NL_SET_ERR_MSG(extack, "Invalid device index"); 1048 goto out; 1049 } else if (!(cfg->dev->flags & IFF_UP)) { 1050 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 1051 err = -ENETDOWN; 1052 goto out; 1053 } else if (!netif_carrier_ok(cfg->dev)) { 1054 NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); 1055 err = -ENETDOWN; 1056 goto out; 1057 } 1058 1059 err = -EINVAL; 1060 if (tb[NHA_GATEWAY]) { 1061 struct nlattr *gwa = tb[NHA_GATEWAY]; 1062 1063 switch (cfg->nh_family) { 1064 case AF_INET: 1065 if (nla_len(gwa) != sizeof(u32)) { 1066 NL_SET_ERR_MSG(extack, "Invalid gateway"); 1067 goto out; 1068 } 1069 cfg->gw.ipv4 = nla_get_be32(gwa); 1070 break; 1071 case AF_INET6: 1072 if (nla_len(gwa) != sizeof(struct in6_addr)) { 1073 NL_SET_ERR_MSG(extack, "Invalid gateway"); 1074 goto out; 1075 } 1076 cfg->gw.ipv6 = nla_get_in6_addr(gwa); 1077 break; 1078 default: 1079 NL_SET_ERR_MSG(extack, 1080 "Unknown address family for gateway"); 1081 goto out; 1082 } 1083 } else { 1084 /* device only nexthop (no gateway) */ 1085 if (cfg->nh_flags & RTNH_F_ONLINK) { 1086 NL_SET_ERR_MSG(extack, 1087 "ONLINK flag can not be set for nexthop without a gateway"); 1088 goto out; 1089 } 1090 } 1091 1092 if (tb[NHA_ENCAP]) { 1093 cfg->nh_encap = tb[NHA_ENCAP]; 1094 1095 if (!tb[NHA_ENCAP_TYPE]) { 1096 NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing"); 1097 goto out; 1098 } 1099 1100 cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); 1101 err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack); 1102 if (err < 0) 1103 goto out; 1104 1105 } else if (tb[NHA_ENCAP_TYPE]) { 1106 NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing"); 1107 goto out; 1108 } 1109 1110 1111 err = 0; 1112 out: 1113 return err; 1114 } 1115 1116 /* rtnl */ 1117 static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 1118 struct netlink_ext_ack *extack) 1119 { 1120 struct net *net = sock_net(skb->sk); 1121 struct nh_config cfg; 1122 struct nexthop *nh; 1123 int err; 1124 1125 err = rtm_to_nh_config(net, skb, nlh, &cfg, extack); 1126 if (!err) { 1127 nh = nexthop_add(net, &cfg, extack); 1128 if (IS_ERR(nh)) 1129 err = PTR_ERR(nh); 1130 } 1131 1132 return err; 1133 } 1134 1135 static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id, 1136 struct netlink_ext_ack *extack) 1137 { 1138 struct nhmsg *nhm = nlmsg_data(nlh); 1139 struct nlattr *tb[NHA_MAX + 1]; 1140 int err, i; 1141 1142 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, 1143 extack); 1144 if (err < 0) 1145 return err; 1146 1147 err = -EINVAL; 1148 for (i = 0; i < __NHA_MAX; ++i) { 1149 if (!tb[i]) 1150 continue; 1151 1152 switch (i) { 1153 case NHA_ID: 1154 break; 1155 default: 1156 NL_SET_ERR_MSG_ATTR(extack, tb[i], 1157 "Unexpected attribute in request"); 1158 goto out; 1159 } 1160 } 1161 if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 1162 NL_SET_ERR_MSG(extack, "Invalid values in header"); 1163 goto out; 1164 } 1165 1166 if (!tb[NHA_ID]) { 1167 NL_SET_ERR_MSG(extack, "Nexthop id is missing"); 1168 goto out; 1169 } 1170 1171 *id = nla_get_u32(tb[NHA_ID]); 1172 if (!(*id)) 1173 NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 1174 else 1175 err = 0; 1176 out: 1177 return err; 1178 } 1179 1180 /* rtnl */ 1181 static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 1182 struct netlink_ext_ack *extack) 1183 { 1184 struct net *net = sock_net(skb->sk); 1185 struct nl_info nlinfo = { 1186 .nlh = nlh, 1187 .nl_net = net, 1188 .portid = NETLINK_CB(skb).portid, 1189 }; 1190 struct nexthop *nh; 1191 int err; 1192 u32 id; 1193 1194 err = nh_valid_get_del_req(nlh, &id, extack); 1195 if (err) 1196 return err; 1197 1198 nh = nexthop_find_by_id(net, id); 1199 if (!nh) 1200 return -ENOENT; 1201 1202 remove_nexthop(net, nh, &nlinfo); 1203 1204 return 0; 1205 } 1206 1207 /* rtnl */ 1208 static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, 1209 struct netlink_ext_ack *extack) 1210 { 1211 struct net *net = sock_net(in_skb->sk); 1212 struct sk_buff *skb = NULL; 1213 struct nexthop *nh; 1214 int err; 1215 u32 id; 1216 1217 err = nh_valid_get_del_req(nlh, &id, extack); 1218 if (err) 1219 return err; 1220 1221 err = -ENOBUFS; 1222 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1223 if (!skb) 1224 goto out; 1225 1226 err = -ENOENT; 1227 nh = nexthop_find_by_id(net, id); 1228 if (!nh) 1229 goto errout_free; 1230 1231 err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, 1232 nlh->nlmsg_seq, 0); 1233 if (err < 0) { 1234 WARN_ON(err == -EMSGSIZE); 1235 goto errout_free; 1236 } 1237 1238 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1239 out: 1240 return err; 1241 errout_free: 1242 kfree_skb(skb); 1243 goto out; 1244 } 1245 1246 static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, 1247 bool group_filter, u8 family) 1248 { 1249 const struct net_device *dev; 1250 const struct nh_info *nhi; 1251 1252 if (group_filter && !nh->is_group) 1253 return true; 1254 1255 if (!dev_idx && !master_idx && !family) 1256 return false; 1257 1258 if (nh->is_group) 1259 return true; 1260 1261 nhi = rtnl_dereference(nh->nh_info); 1262 if (family && nhi->family != family) 1263 return true; 1264 1265 dev = nhi->fib_nhc.nhc_dev; 1266 if (dev_idx && (!dev || dev->ifindex != dev_idx)) 1267 return true; 1268 1269 if (master_idx) { 1270 struct net_device *master; 1271 1272 if (!dev) 1273 return true; 1274 1275 master = netdev_master_upper_dev_get((struct net_device *)dev); 1276 if (!master || master->ifindex != master_idx) 1277 return true; 1278 } 1279 1280 return false; 1281 } 1282 1283 static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, 1284 int *master_idx, bool *group_filter, 1285 struct netlink_callback *cb) 1286 { 1287 struct netlink_ext_ack *extack = cb->extack; 1288 struct nlattr *tb[NHA_MAX + 1]; 1289 struct nhmsg *nhm; 1290 int err, i; 1291 u32 idx; 1292 1293 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, 1294 NULL); 1295 if (err < 0) 1296 return err; 1297 1298 for (i = 0; i <= NHA_MAX; ++i) { 1299 if (!tb[i]) 1300 continue; 1301 1302 switch (i) { 1303 case NHA_OIF: 1304 idx = nla_get_u32(tb[i]); 1305 if (idx > INT_MAX) { 1306 NL_SET_ERR_MSG(extack, "Invalid device index"); 1307 return -EINVAL; 1308 } 1309 *dev_idx = idx; 1310 break; 1311 case NHA_MASTER: 1312 idx = nla_get_u32(tb[i]); 1313 if (idx > INT_MAX) { 1314 NL_SET_ERR_MSG(extack, "Invalid master device index"); 1315 return -EINVAL; 1316 } 1317 *master_idx = idx; 1318 break; 1319 case NHA_GROUPS: 1320 *group_filter = true; 1321 break; 1322 default: 1323 NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); 1324 return -EINVAL; 1325 } 1326 } 1327 1328 nhm = nlmsg_data(nlh); 1329 if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 1330 NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request"); 1331 return -EINVAL; 1332 } 1333 1334 return 0; 1335 } 1336 1337 /* rtnl */ 1338 static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) 1339 { 1340 struct nhmsg *nhm = nlmsg_data(cb->nlh); 1341 int dev_filter_idx = 0, master_idx = 0; 1342 struct net *net = sock_net(skb->sk); 1343 struct rb_root *root = &net->nexthop.rb_root; 1344 bool group_filter = false; 1345 struct rb_node *node; 1346 int idx = 0, s_idx; 1347 int err; 1348 1349 err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, 1350 &group_filter, cb); 1351 if (err < 0) 1352 return err; 1353 1354 s_idx = cb->args[0]; 1355 for (node = rb_first(root); node; node = rb_next(node)) { 1356 struct nexthop *nh; 1357 1358 if (idx < s_idx) 1359 goto cont; 1360 1361 nh = rb_entry(node, struct nexthop, rb_node); 1362 if (nh_dump_filtered(nh, dev_filter_idx, master_idx, 1363 group_filter, nhm->nh_family)) 1364 goto cont; 1365 1366 err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, 1367 NETLINK_CB(cb->skb).portid, 1368 cb->nlh->nlmsg_seq, NLM_F_MULTI); 1369 if (err < 0) { 1370 if (likely(skb->len)) 1371 goto out; 1372 1373 goto out_err; 1374 } 1375 cont: 1376 idx++; 1377 } 1378 1379 out: 1380 err = skb->len; 1381 out_err: 1382 cb->args[0] = idx; 1383 cb->seq = net->nexthop.seq; 1384 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1385 1386 return err; 1387 } 1388 1389 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu) 1390 { 1391 unsigned int hash = nh_dev_hashfn(dev->ifindex); 1392 struct net *net = dev_net(dev); 1393 struct hlist_head *head = &net->nexthop.devhash[hash]; 1394 struct hlist_node *n; 1395 struct nh_info *nhi; 1396 1397 hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 1398 if (nhi->fib_nhc.nhc_dev == dev) { 1399 if (nhi->family == AF_INET) 1400 fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu, 1401 orig_mtu); 1402 } 1403 } 1404 } 1405 1406 /* rtnl */ 1407 static int nh_netdev_event(struct notifier_block *this, 1408 unsigned long event, void *ptr) 1409 { 1410 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1411 struct netdev_notifier_info_ext *info_ext; 1412 1413 switch (event) { 1414 case NETDEV_DOWN: 1415 case NETDEV_UNREGISTER: 1416 nexthop_flush_dev(dev); 1417 break; 1418 case NETDEV_CHANGE: 1419 if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) 1420 nexthop_flush_dev(dev); 1421 break; 1422 case NETDEV_CHANGEMTU: 1423 info_ext = ptr; 1424 nexthop_sync_mtu(dev, info_ext->ext.mtu); 1425 rt_cache_flush(dev_net(dev)); 1426 break; 1427 } 1428 return NOTIFY_DONE; 1429 } 1430 1431 static struct notifier_block nh_netdev_notifier = { 1432 .notifier_call = nh_netdev_event, 1433 }; 1434 1435 static void __net_exit nexthop_net_exit(struct net *net) 1436 { 1437 rtnl_lock(); 1438 flush_all_nexthops(net); 1439 rtnl_unlock(); 1440 kfree(net->nexthop.devhash); 1441 } 1442 1443 static int __net_init nexthop_net_init(struct net *net) 1444 { 1445 size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE; 1446 1447 net->nexthop.rb_root = RB_ROOT; 1448 net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); 1449 if (!net->nexthop.devhash) 1450 return -ENOMEM; 1451 1452 return 0; 1453 } 1454 1455 static struct pernet_operations nexthop_net_ops = { 1456 .init = nexthop_net_init, 1457 .exit = nexthop_net_exit, 1458 }; 1459 1460 static int __init nexthop_init(void) 1461 { 1462 register_pernet_subsys(&nexthop_net_ops); 1463 1464 register_netdevice_notifier(&nh_netdev_notifier); 1465 1466 rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 1467 rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); 1468 rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, 1469 rtm_dump_nexthop, 0); 1470 1471 rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 1472 rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 1473 1474 rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 1475 rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 1476 1477 return 0; 1478 } 1479 subsys_initcall(nexthop_init); 1480