1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IPv4 Forwarding Information Base: semantics. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 */ 11 12 #include <linux/uaccess.h> 13 #include <linux/bitops.h> 14 #include <linux/types.h> 15 #include <linux/kernel.h> 16 #include <linux/jiffies.h> 17 #include <linux/mm.h> 18 #include <linux/string.h> 19 #include <linux/socket.h> 20 #include <linux/sockios.h> 21 #include <linux/errno.h> 22 #include <linux/in.h> 23 #include <linux/inet.h> 24 #include <linux/inetdevice.h> 25 #include <linux/netdevice.h> 26 #include <linux/if_arp.h> 27 #include <linux/proc_fs.h> 28 #include <linux/skbuff.h> 29 #include <linux/init.h> 30 #include <linux/slab.h> 31 #include <linux/netlink.h> 32 #include <linux/hash.h> 33 #include <linux/nospec.h> 34 35 #include <net/arp.h> 36 #include <net/inet_dscp.h> 37 #include <net/ip.h> 38 #include <net/protocol.h> 39 #include <net/route.h> 40 #include <net/tcp.h> 41 #include <net/sock.h> 42 #include <net/ip_fib.h> 43 #include <net/ip6_fib.h> 44 #include <net/nexthop.h> 45 #include <net/netlink.h> 46 #include <net/rtnh.h> 47 #include <net/lwtunnel.h> 48 #include <net/fib_notifier.h> 49 #include <net/addrconf.h> 50 51 #include "fib_lookup.h" 52 53 static struct hlist_head *fib_info_hash; 54 static struct hlist_head *fib_info_laddrhash; 55 static unsigned int fib_info_hash_size; 56 static unsigned int fib_info_hash_bits; 57 static unsigned int fib_info_cnt; 58 59 /* for_nexthops and change_nexthops only used when nexthop object 60 * is not set in a fib_info. The logic within can reference fib_nh. 61 */ 62 #ifdef CONFIG_IP_ROUTE_MULTIPATH 63 64 #define for_nexthops(fi) { \ 65 int nhsel; const struct fib_nh *nh; \ 66 for (nhsel = 0, nh = (fi)->fib_nh; \ 67 nhsel < fib_info_num_path((fi)); \ 68 nh++, nhsel++) 69 70 #define change_nexthops(fi) { \ 71 int nhsel; struct fib_nh *nexthop_nh; \ 72 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 73 nhsel < fib_info_num_path((fi)); \ 74 nexthop_nh++, nhsel++) 75 76 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 77 78 /* Hope, that gcc will optimize it to get rid of dummy loop */ 79 80 #define for_nexthops(fi) { \ 81 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 82 for (nhsel = 0; nhsel < 1; nhsel++) 83 84 #define change_nexthops(fi) { \ 85 int nhsel; \ 86 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 87 for (nhsel = 0; nhsel < 1; nhsel++) 88 89 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 90 91 #define endfor_nexthops(fi) } 92 93 94 const struct fib_prop fib_props[RTN_MAX + 1] = { 95 [RTN_UNSPEC] = { 96 .error = 0, 97 .scope = RT_SCOPE_NOWHERE, 98 }, 99 [RTN_UNICAST] = { 100 .error = 0, 101 .scope = RT_SCOPE_UNIVERSE, 102 }, 103 [RTN_LOCAL] = { 104 .error = 0, 105 .scope = RT_SCOPE_HOST, 106 }, 107 [RTN_BROADCAST] = { 108 .error = 0, 109 .scope = RT_SCOPE_LINK, 110 }, 111 [RTN_ANYCAST] = { 112 .error = 0, 113 .scope = RT_SCOPE_LINK, 114 }, 115 [RTN_MULTICAST] = { 116 .error = 0, 117 .scope = RT_SCOPE_UNIVERSE, 118 }, 119 [RTN_BLACKHOLE] = { 120 .error = -EINVAL, 121 .scope = RT_SCOPE_UNIVERSE, 122 }, 123 [RTN_UNREACHABLE] = { 124 .error = -EHOSTUNREACH, 125 .scope = RT_SCOPE_UNIVERSE, 126 }, 127 [RTN_PROHIBIT] = { 128 .error = -EACCES, 129 .scope = RT_SCOPE_UNIVERSE, 130 }, 131 [RTN_THROW] = { 132 .error = -EAGAIN, 133 .scope = RT_SCOPE_UNIVERSE, 134 }, 135 [RTN_NAT] = { 136 .error = -EINVAL, 137 .scope = RT_SCOPE_NOWHERE, 138 }, 139 [RTN_XRESOLVE] = { 140 .error = -EINVAL, 141 .scope = RT_SCOPE_NOWHERE, 142 }, 143 }; 144 145 static void rt_fibinfo_free(struct rtable __rcu **rtp) 146 { 147 struct rtable *rt = rcu_dereference_protected(*rtp, 1); 148 149 if (!rt) 150 return; 151 152 /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); 153 * because we waited an RCU grace period before calling 154 * free_fib_info_rcu() 155 */ 156 157 dst_dev_put(&rt->dst); 158 dst_release_immediate(&rt->dst); 159 } 160 161 static void free_nh_exceptions(struct fib_nh_common *nhc) 162 { 163 struct fnhe_hash_bucket *hash; 164 int i; 165 166 hash = rcu_dereference_protected(nhc->nhc_exceptions, 1); 167 if (!hash) 168 return; 169 for (i = 0; i < FNHE_HASH_SIZE; i++) { 170 struct fib_nh_exception *fnhe; 171 172 fnhe = rcu_dereference_protected(hash[i].chain, 1); 173 while (fnhe) { 174 struct fib_nh_exception *next; 175 176 next = rcu_dereference_protected(fnhe->fnhe_next, 1); 177 178 rt_fibinfo_free(&fnhe->fnhe_rth_input); 179 rt_fibinfo_free(&fnhe->fnhe_rth_output); 180 181 kfree(fnhe); 182 183 fnhe = next; 184 } 185 } 186 kfree(hash); 187 } 188 189 static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) 190 { 191 int cpu; 192 193 if (!rtp) 194 return; 195 196 for_each_possible_cpu(cpu) { 197 struct rtable *rt; 198 199 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); 200 if (rt) { 201 dst_dev_put(&rt->dst); 202 dst_release_immediate(&rt->dst); 203 } 204 } 205 free_percpu(rtp); 206 } 207 208 void fib_nh_common_release(struct fib_nh_common *nhc) 209 { 210 netdev_put(nhc->nhc_dev, &nhc->nhc_dev_tracker); 211 lwtstate_put(nhc->nhc_lwtstate); 212 rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); 213 rt_fibinfo_free(&nhc->nhc_rth_input); 214 free_nh_exceptions(nhc); 215 } 216 EXPORT_SYMBOL_GPL(fib_nh_common_release); 217 218 void fib_nh_release(struct net *net, struct fib_nh *fib_nh) 219 { 220 #ifdef CONFIG_IP_ROUTE_CLASSID 221 if (fib_nh->nh_tclassid) 222 atomic_dec(&net->ipv4.fib_num_tclassid_users); 223 #endif 224 fib_nh_common_release(&fib_nh->nh_common); 225 } 226 227 /* Release a nexthop info record */ 228 static void free_fib_info_rcu(struct rcu_head *head) 229 { 230 struct fib_info *fi = container_of(head, struct fib_info, rcu); 231 232 if (fi->nh) { 233 nexthop_put(fi->nh); 234 } else { 235 change_nexthops(fi) { 236 fib_nh_release(fi->fib_net, nexthop_nh); 237 } endfor_nexthops(fi); 238 } 239 240 ip_fib_metrics_put(fi->fib_metrics); 241 242 kfree(fi); 243 } 244 245 void free_fib_info(struct fib_info *fi) 246 { 247 if (fi->fib_dead == 0) { 248 pr_warn("Freeing alive fib_info %p\n", fi); 249 return; 250 } 251 252 call_rcu_hurry(&fi->rcu, free_fib_info_rcu); 253 } 254 EXPORT_SYMBOL_GPL(free_fib_info); 255 256 void fib_release_info(struct fib_info *fi) 257 { 258 ASSERT_RTNL(); 259 if (fi && refcount_dec_and_test(&fi->fib_treeref)) { 260 hlist_del(&fi->fib_hash); 261 262 fib_info_cnt--; 263 264 if (fi->fib_prefsrc) 265 hlist_del(&fi->fib_lhash); 266 if (fi->nh) { 267 list_del(&fi->nh_list); 268 } else { 269 change_nexthops(fi) { 270 if (!nexthop_nh->fib_nh_dev) 271 continue; 272 hlist_del_rcu(&nexthop_nh->nh_hash); 273 } endfor_nexthops(fi) 274 } 275 /* Paired with READ_ONCE() from fib_table_lookup() */ 276 WRITE_ONCE(fi->fib_dead, 1); 277 fib_info_put(fi); 278 } 279 } 280 281 static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) 282 { 283 const struct fib_nh *onh; 284 285 if (fi->nh || ofi->nh) 286 return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1; 287 288 if (ofi->fib_nhs == 0) 289 return 0; 290 291 for_nexthops(fi) { 292 onh = fib_info_nh(ofi, nhsel); 293 294 if (nh->fib_nh_oif != onh->fib_nh_oif || 295 nh->fib_nh_gw_family != onh->fib_nh_gw_family || 296 nh->fib_nh_scope != onh->fib_nh_scope || 297 #ifdef CONFIG_IP_ROUTE_MULTIPATH 298 nh->fib_nh_weight != onh->fib_nh_weight || 299 #endif 300 #ifdef CONFIG_IP_ROUTE_CLASSID 301 nh->nh_tclassid != onh->nh_tclassid || 302 #endif 303 lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) || 304 ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK)) 305 return -1; 306 307 if (nh->fib_nh_gw_family == AF_INET && 308 nh->fib_nh_gw4 != onh->fib_nh_gw4) 309 return -1; 310 311 if (nh->fib_nh_gw_family == AF_INET6 && 312 ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6)) 313 return -1; 314 } endfor_nexthops(fi); 315 return 0; 316 } 317 318 static struct hlist_head *fib_nh_head(struct net_device *dev) 319 { 320 return &dev->fib_nh_head; 321 } 322 323 static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, 324 u32 prefsrc, u32 priority) 325 { 326 unsigned int val = init_val; 327 328 val ^= (protocol << 8) | scope; 329 val ^= prefsrc; 330 val ^= priority; 331 332 return val; 333 } 334 335 static unsigned int fib_info_hashfn_result(const struct net *net, 336 unsigned int val) 337 { 338 return hash_32(val ^ net_hash_mix(net), fib_info_hash_bits); 339 } 340 341 static inline unsigned int fib_info_hashfn(struct fib_info *fi) 342 { 343 unsigned int val; 344 345 val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol, 346 fi->fib_scope, (__force u32)fi->fib_prefsrc, 347 fi->fib_priority); 348 349 if (fi->nh) { 350 val ^= fi->nh->id; 351 } else { 352 for_nexthops(fi) { 353 val ^= nh->fib_nh_oif; 354 } endfor_nexthops(fi) 355 } 356 357 return fib_info_hashfn_result(fi->fib_net, val); 358 } 359 360 /* no metrics, only nexthop id */ 361 static struct fib_info *fib_find_info_nh(struct net *net, 362 const struct fib_config *cfg) 363 { 364 struct hlist_head *head; 365 struct fib_info *fi; 366 unsigned int hash; 367 368 hash = fib_info_hashfn_1(cfg->fc_nh_id, 369 cfg->fc_protocol, cfg->fc_scope, 370 (__force u32)cfg->fc_prefsrc, 371 cfg->fc_priority); 372 hash = fib_info_hashfn_result(net, hash); 373 head = &fib_info_hash[hash]; 374 375 hlist_for_each_entry(fi, head, fib_hash) { 376 if (!net_eq(fi->fib_net, net)) 377 continue; 378 if (!fi->nh || fi->nh->id != cfg->fc_nh_id) 379 continue; 380 if (cfg->fc_protocol == fi->fib_protocol && 381 cfg->fc_scope == fi->fib_scope && 382 cfg->fc_prefsrc == fi->fib_prefsrc && 383 cfg->fc_priority == fi->fib_priority && 384 cfg->fc_type == fi->fib_type && 385 cfg->fc_table == fi->fib_tb_id && 386 !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK)) 387 return fi; 388 } 389 390 return NULL; 391 } 392 393 static struct fib_info *fib_find_info(struct fib_info *nfi) 394 { 395 struct hlist_head *head; 396 struct fib_info *fi; 397 unsigned int hash; 398 399 hash = fib_info_hashfn(nfi); 400 head = &fib_info_hash[hash]; 401 402 hlist_for_each_entry(fi, head, fib_hash) { 403 if (!net_eq(fi->fib_net, nfi->fib_net)) 404 continue; 405 if (fi->fib_nhs != nfi->fib_nhs) 406 continue; 407 if (nfi->fib_protocol == fi->fib_protocol && 408 nfi->fib_scope == fi->fib_scope && 409 nfi->fib_prefsrc == fi->fib_prefsrc && 410 nfi->fib_priority == fi->fib_priority && 411 nfi->fib_type == fi->fib_type && 412 nfi->fib_tb_id == fi->fib_tb_id && 413 memcmp(nfi->fib_metrics, fi->fib_metrics, 414 sizeof(u32) * RTAX_MAX) == 0 && 415 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && 416 nh_comp(fi, nfi) == 0) 417 return fi; 418 } 419 420 return NULL; 421 } 422 423 /* Check, that the gateway is already configured. 424 * Used only by redirect accept routine, under rcu_read_lock(); 425 */ 426 int ip_fib_check_default(__be32 gw, struct net_device *dev) 427 { 428 struct hlist_head *head; 429 struct fib_nh *nh; 430 431 head = fib_nh_head(dev); 432 433 hlist_for_each_entry_rcu(nh, head, nh_hash) { 434 DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); 435 if (nh->fib_nh_gw4 == gw && 436 !(nh->fib_nh_flags & RTNH_F_DEAD)) { 437 return 0; 438 } 439 } 440 441 return -1; 442 } 443 444 size_t fib_nlmsg_size(struct fib_info *fi) 445 { 446 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 447 + nla_total_size(4) /* RTA_TABLE */ 448 + nla_total_size(4) /* RTA_DST */ 449 + nla_total_size(4) /* RTA_PRIORITY */ 450 + nla_total_size(4) /* RTA_PREFSRC */ 451 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ 452 unsigned int nhs = fib_info_num_path(fi); 453 454 /* space for nested metrics */ 455 payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 456 457 if (fi->nh) 458 payload += nla_total_size(4); /* RTA_NH_ID */ 459 460 if (nhs) { 461 size_t nh_encapsize = 0; 462 /* Also handles the special case nhs == 1 */ 463 464 /* each nexthop is packed in an attribute */ 465 size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 466 unsigned int i; 467 468 /* may contain flow and gateway attribute */ 469 nhsize += 2 * nla_total_size(4); 470 471 /* grab encap info */ 472 for (i = 0; i < fib_info_num_path(fi); i++) { 473 struct fib_nh_common *nhc = fib_info_nhc(fi, i); 474 475 if (nhc->nhc_lwtstate) { 476 /* RTA_ENCAP_TYPE */ 477 nh_encapsize += lwtunnel_get_encap_size( 478 nhc->nhc_lwtstate); 479 /* RTA_ENCAP */ 480 nh_encapsize += nla_total_size(2); 481 } 482 } 483 484 /* all nexthops are packed in a nested attribute */ 485 payload += nla_total_size((nhs * nhsize) + nh_encapsize); 486 487 } 488 489 return payload; 490 } 491 492 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 493 int dst_len, u32 tb_id, const struct nl_info *info, 494 unsigned int nlm_flags) 495 { 496 struct fib_rt_info fri; 497 struct sk_buff *skb; 498 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 499 int err = -ENOBUFS; 500 501 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 502 if (!skb) 503 goto errout; 504 505 fri.fi = fa->fa_info; 506 fri.tb_id = tb_id; 507 fri.dst = key; 508 fri.dst_len = dst_len; 509 fri.dscp = fa->fa_dscp; 510 fri.type = fa->fa_type; 511 fri.offload = READ_ONCE(fa->offload); 512 fri.trap = READ_ONCE(fa->trap); 513 fri.offload_failed = READ_ONCE(fa->offload_failed); 514 err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags); 515 if (err < 0) { 516 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 517 WARN_ON(err == -EMSGSIZE); 518 kfree_skb(skb); 519 goto errout; 520 } 521 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, 522 info->nlh, GFP_KERNEL); 523 return; 524 errout: 525 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 526 } 527 528 static int fib_detect_death(struct fib_info *fi, int order, 529 struct fib_info **last_resort, int *last_idx, 530 int dflt) 531 { 532 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); 533 struct neighbour *n; 534 int state = NUD_NONE; 535 536 if (likely(nhc->nhc_gw_family == AF_INET)) 537 n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev); 538 else if (nhc->nhc_gw_family == AF_INET6) 539 n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6, 540 nhc->nhc_dev); 541 else 542 n = NULL; 543 544 if (n) { 545 state = READ_ONCE(n->nud_state); 546 neigh_release(n); 547 } else { 548 return 0; 549 } 550 if (state == NUD_REACHABLE) 551 return 0; 552 if ((state & NUD_VALID) && order != dflt) 553 return 0; 554 if ((state & NUD_VALID) || 555 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { 556 *last_resort = fi; 557 *last_idx = order; 558 } 559 return 1; 560 } 561 562 int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, 563 struct nlattr *encap, u16 encap_type, 564 void *cfg, gfp_t gfp_flags, 565 struct netlink_ext_ack *extack) 566 { 567 int err; 568 569 nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, 570 gfp_flags); 571 if (!nhc->nhc_pcpu_rth_output) 572 return -ENOMEM; 573 574 if (encap) { 575 struct lwtunnel_state *lwtstate; 576 577 if (encap_type == LWTUNNEL_ENCAP_NONE) { 578 NL_SET_ERR_MSG(extack, "LWT encap type not specified"); 579 err = -EINVAL; 580 goto lwt_failure; 581 } 582 err = lwtunnel_build_state(net, encap_type, encap, 583 nhc->nhc_family, cfg, &lwtstate, 584 extack); 585 if (err) 586 goto lwt_failure; 587 588 nhc->nhc_lwtstate = lwtstate_get(lwtstate); 589 } 590 591 return 0; 592 593 lwt_failure: 594 rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); 595 nhc->nhc_pcpu_rth_output = NULL; 596 return err; 597 } 598 EXPORT_SYMBOL_GPL(fib_nh_common_init); 599 600 int fib_nh_init(struct net *net, struct fib_nh *nh, 601 struct fib_config *cfg, int nh_weight, 602 struct netlink_ext_ack *extack) 603 { 604 int err; 605 606 nh->fib_nh_family = AF_INET; 607 608 err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap, 609 cfg->fc_encap_type, cfg, GFP_KERNEL, extack); 610 if (err) 611 return err; 612 613 nh->fib_nh_oif = cfg->fc_oif; 614 nh->fib_nh_gw_family = cfg->fc_gw_family; 615 if (cfg->fc_gw_family == AF_INET) 616 nh->fib_nh_gw4 = cfg->fc_gw4; 617 else if (cfg->fc_gw_family == AF_INET6) 618 nh->fib_nh_gw6 = cfg->fc_gw6; 619 620 nh->fib_nh_flags = cfg->fc_flags; 621 622 #ifdef CONFIG_IP_ROUTE_CLASSID 623 nh->nh_tclassid = cfg->fc_flow; 624 if (nh->nh_tclassid) 625 atomic_inc(&net->ipv4.fib_num_tclassid_users); 626 #endif 627 #ifdef CONFIG_IP_ROUTE_MULTIPATH 628 nh->fib_nh_weight = nh_weight; 629 #endif 630 return 0; 631 } 632 633 #ifdef CONFIG_IP_ROUTE_MULTIPATH 634 635 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, 636 struct netlink_ext_ack *extack) 637 { 638 int nhs = 0; 639 640 while (rtnh_ok(rtnh, remaining)) { 641 nhs++; 642 rtnh = rtnh_next(rtnh, &remaining); 643 } 644 645 /* leftover implies invalid nexthop configuration, discard it */ 646 if (remaining > 0) { 647 NL_SET_ERR_MSG(extack, 648 "Invalid nexthop configuration - extra data after nexthops"); 649 nhs = 0; 650 } 651 652 return nhs; 653 } 654 655 static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla, 656 struct netlink_ext_ack *extack) 657 { 658 if (nla_len(nla) < sizeof(*gw)) { 659 NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY"); 660 return -EINVAL; 661 } 662 663 *gw = nla_get_in_addr(nla); 664 665 return 0; 666 } 667 668 /* only called when fib_nh is integrated into fib_info */ 669 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 670 int remaining, struct fib_config *cfg, 671 struct netlink_ext_ack *extack) 672 { 673 struct net *net = fi->fib_net; 674 struct fib_config fib_cfg; 675 struct fib_nh *nh; 676 int ret; 677 678 change_nexthops(fi) { 679 int attrlen; 680 681 memset(&fib_cfg, 0, sizeof(fib_cfg)); 682 683 if (!rtnh_ok(rtnh, remaining)) { 684 NL_SET_ERR_MSG(extack, 685 "Invalid nexthop configuration - extra data after nexthop"); 686 return -EINVAL; 687 } 688 689 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 690 NL_SET_ERR_MSG(extack, 691 "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); 692 return -EINVAL; 693 } 694 695 fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 696 fib_cfg.fc_oif = rtnh->rtnh_ifindex; 697 698 attrlen = rtnh_attrlen(rtnh); 699 if (attrlen > 0) { 700 struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); 701 702 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 703 nlav = nla_find(attrs, attrlen, RTA_VIA); 704 if (nla && nlav) { 705 NL_SET_ERR_MSG(extack, 706 "Nexthop configuration can not contain both GATEWAY and VIA"); 707 return -EINVAL; 708 } 709 if (nla) { 710 ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla, 711 extack); 712 if (ret) 713 goto errout; 714 715 if (fib_cfg.fc_gw4) 716 fib_cfg.fc_gw_family = AF_INET; 717 } else if (nlav) { 718 ret = fib_gw_from_via(&fib_cfg, nlav, extack); 719 if (ret) 720 goto errout; 721 } 722 723 nla = nla_find(attrs, attrlen, RTA_FLOW); 724 if (nla) { 725 if (nla_len(nla) < sizeof(u32)) { 726 NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); 727 return -EINVAL; 728 } 729 fib_cfg.fc_flow = nla_get_u32(nla); 730 } 731 732 fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 733 /* RTA_ENCAP_TYPE length checked in 734 * lwtunnel_valid_encap_type_attr 735 */ 736 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 737 if (nla) 738 fib_cfg.fc_encap_type = nla_get_u16(nla); 739 } 740 741 ret = fib_nh_init(net, nexthop_nh, &fib_cfg, 742 rtnh->rtnh_hops + 1, extack); 743 if (ret) 744 goto errout; 745 746 rtnh = rtnh_next(rtnh, &remaining); 747 } endfor_nexthops(fi); 748 749 ret = -EINVAL; 750 nh = fib_info_nh(fi, 0); 751 if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) { 752 NL_SET_ERR_MSG(extack, 753 "Nexthop device index does not match RTA_OIF"); 754 goto errout; 755 } 756 if (cfg->fc_gw_family) { 757 if (cfg->fc_gw_family != nh->fib_nh_gw_family || 758 (cfg->fc_gw_family == AF_INET && 759 nh->fib_nh_gw4 != cfg->fc_gw4) || 760 (cfg->fc_gw_family == AF_INET6 && 761 ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) { 762 NL_SET_ERR_MSG(extack, 763 "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA"); 764 goto errout; 765 } 766 } 767 #ifdef CONFIG_IP_ROUTE_CLASSID 768 if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) { 769 NL_SET_ERR_MSG(extack, 770 "Nexthop class id does not match RTA_FLOW"); 771 goto errout; 772 } 773 #endif 774 ret = 0; 775 errout: 776 return ret; 777 } 778 779 /* only called when fib_nh is integrated into fib_info */ 780 static void fib_rebalance(struct fib_info *fi) 781 { 782 int total; 783 int w; 784 785 if (fib_info_num_path(fi) < 2) 786 return; 787 788 total = 0; 789 for_nexthops(fi) { 790 if (nh->fib_nh_flags & RTNH_F_DEAD) 791 continue; 792 793 if (ip_ignore_linkdown(nh->fib_nh_dev) && 794 nh->fib_nh_flags & RTNH_F_LINKDOWN) 795 continue; 796 797 total += nh->fib_nh_weight; 798 } endfor_nexthops(fi); 799 800 w = 0; 801 change_nexthops(fi) { 802 int upper_bound; 803 804 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) { 805 upper_bound = -1; 806 } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) && 807 nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) { 808 upper_bound = -1; 809 } else { 810 w += nexthop_nh->fib_nh_weight; 811 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, 812 total) - 1; 813 } 814 815 atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound); 816 } endfor_nexthops(fi); 817 } 818 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 819 820 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 821 int remaining, struct fib_config *cfg, 822 struct netlink_ext_ack *extack) 823 { 824 NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel"); 825 826 return -EINVAL; 827 } 828 829 #define fib_rebalance(fi) do { } while (0) 830 831 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 832 833 static int fib_encap_match(struct net *net, u16 encap_type, 834 struct nlattr *encap, 835 const struct fib_nh *nh, 836 const struct fib_config *cfg, 837 struct netlink_ext_ack *extack) 838 { 839 struct lwtunnel_state *lwtstate; 840 int ret, result = 0; 841 842 if (encap_type == LWTUNNEL_ENCAP_NONE) 843 return 0; 844 845 ret = lwtunnel_build_state(net, encap_type, encap, AF_INET, 846 cfg, &lwtstate, extack); 847 if (!ret) { 848 result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); 849 lwtstate_free(lwtstate); 850 } 851 852 return result; 853 } 854 855 int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, 856 struct netlink_ext_ack *extack) 857 { 858 #ifdef CONFIG_IP_ROUTE_MULTIPATH 859 struct rtnexthop *rtnh; 860 int remaining; 861 #endif 862 863 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 864 return 1; 865 866 if (cfg->fc_nh_id) { 867 if (fi->nh && cfg->fc_nh_id == fi->nh->id) 868 return 0; 869 return 1; 870 } 871 872 if (fi->nh) { 873 if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) 874 return 1; 875 return 0; 876 } 877 878 if (cfg->fc_oif || cfg->fc_gw_family) { 879 struct fib_nh *nh; 880 881 nh = fib_info_nh(fi, 0); 882 if (cfg->fc_encap) { 883 if (fib_encap_match(net, cfg->fc_encap_type, 884 cfg->fc_encap, nh, cfg, extack)) 885 return 1; 886 } 887 #ifdef CONFIG_IP_ROUTE_CLASSID 888 if (cfg->fc_flow && 889 cfg->fc_flow != nh->nh_tclassid) 890 return 1; 891 #endif 892 if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) || 893 (cfg->fc_gw_family && 894 cfg->fc_gw_family != nh->fib_nh_gw_family)) 895 return 1; 896 897 if (cfg->fc_gw_family == AF_INET && 898 cfg->fc_gw4 != nh->fib_nh_gw4) 899 return 1; 900 901 if (cfg->fc_gw_family == AF_INET6 && 902 ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6)) 903 return 1; 904 905 return 0; 906 } 907 908 #ifdef CONFIG_IP_ROUTE_MULTIPATH 909 if (!cfg->fc_mp) 910 return 0; 911 912 rtnh = cfg->fc_mp; 913 remaining = cfg->fc_mp_len; 914 915 for_nexthops(fi) { 916 int attrlen; 917 918 if (!rtnh_ok(rtnh, remaining)) 919 return -EINVAL; 920 921 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif) 922 return 1; 923 924 attrlen = rtnh_attrlen(rtnh); 925 if (attrlen > 0) { 926 struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); 927 int err; 928 929 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 930 nlav = nla_find(attrs, attrlen, RTA_VIA); 931 if (nla && nlav) { 932 NL_SET_ERR_MSG(extack, 933 "Nexthop configuration can not contain both GATEWAY and VIA"); 934 return -EINVAL; 935 } 936 937 if (nla) { 938 __be32 gw; 939 940 err = fib_gw_from_attr(&gw, nla, extack); 941 if (err) 942 return err; 943 944 if (nh->fib_nh_gw_family != AF_INET || 945 gw != nh->fib_nh_gw4) 946 return 1; 947 } else if (nlav) { 948 struct fib_config cfg2; 949 950 err = fib_gw_from_via(&cfg2, nlav, extack); 951 if (err) 952 return err; 953 954 switch (nh->fib_nh_gw_family) { 955 case AF_INET: 956 if (cfg2.fc_gw_family != AF_INET || 957 cfg2.fc_gw4 != nh->fib_nh_gw4) 958 return 1; 959 break; 960 case AF_INET6: 961 if (cfg2.fc_gw_family != AF_INET6 || 962 ipv6_addr_cmp(&cfg2.fc_gw6, 963 &nh->fib_nh_gw6)) 964 return 1; 965 break; 966 } 967 } 968 969 #ifdef CONFIG_IP_ROUTE_CLASSID 970 nla = nla_find(attrs, attrlen, RTA_FLOW); 971 if (nla) { 972 if (nla_len(nla) < sizeof(u32)) { 973 NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); 974 return -EINVAL; 975 } 976 if (nla_get_u32(nla) != nh->nh_tclassid) 977 return 1; 978 } 979 #endif 980 } 981 982 rtnh = rtnh_next(rtnh, &remaining); 983 } endfor_nexthops(fi); 984 #endif 985 return 0; 986 } 987 988 bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) 989 { 990 struct nlattr *nla; 991 int remaining; 992 993 if (!cfg->fc_mx) 994 return true; 995 996 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 997 int type = nla_type(nla); 998 u32 fi_val, val; 999 1000 if (!type) 1001 continue; 1002 if (type > RTAX_MAX) 1003 return false; 1004 1005 type = array_index_nospec(type, RTAX_MAX + 1); 1006 if (type == RTAX_CC_ALGO) { 1007 char tmp[TCP_CA_NAME_MAX]; 1008 bool ecn_ca = false; 1009 1010 nla_strscpy(tmp, nla, sizeof(tmp)); 1011 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 1012 } else { 1013 if (nla_len(nla) != sizeof(u32)) 1014 return false; 1015 val = nla_get_u32(nla); 1016 } 1017 1018 fi_val = fi->fib_metrics->metrics[type - 1]; 1019 if (type == RTAX_FEATURES) 1020 fi_val &= ~DST_FEATURE_ECN_CA; 1021 1022 if (fi_val != val) 1023 return false; 1024 } 1025 1026 return true; 1027 } 1028 1029 static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, 1030 u32 table, struct netlink_ext_ack *extack) 1031 { 1032 struct fib6_config cfg = { 1033 .fc_table = table, 1034 .fc_flags = nh->fib_nh_flags | RTF_GATEWAY, 1035 .fc_ifindex = nh->fib_nh_oif, 1036 .fc_gateway = nh->fib_nh_gw6, 1037 }; 1038 struct fib6_nh fib6_nh = {}; 1039 int err; 1040 1041 err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); 1042 if (!err) { 1043 nh->fib_nh_dev = fib6_nh.fib_nh_dev; 1044 netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, 1045 GFP_KERNEL); 1046 nh->fib_nh_oif = nh->fib_nh_dev->ifindex; 1047 nh->fib_nh_scope = RT_SCOPE_LINK; 1048 1049 ipv6_stub->fib6_nh_release(&fib6_nh); 1050 } 1051 1052 return err; 1053 } 1054 1055 /* 1056 * Picture 1057 * ------- 1058 * 1059 * Semantics of nexthop is very messy by historical reasons. 1060 * We have to take into account, that: 1061 * a) gateway can be actually local interface address, 1062 * so that gatewayed route is direct. 1063 * b) gateway must be on-link address, possibly 1064 * described not by an ifaddr, but also by a direct route. 1065 * c) If both gateway and interface are specified, they should not 1066 * contradict. 1067 * d) If we use tunnel routes, gateway could be not on-link. 1068 * 1069 * Attempt to reconcile all of these (alas, self-contradictory) conditions 1070 * results in pretty ugly and hairy code with obscure logic. 1071 * 1072 * I chose to generalized it instead, so that the size 1073 * of code does not increase practically, but it becomes 1074 * much more general. 1075 * Every prefix is assigned a "scope" value: "host" is local address, 1076 * "link" is direct route, 1077 * [ ... "site" ... "interior" ... ] 1078 * and "universe" is true gateway route with global meaning. 1079 * 1080 * Every prefix refers to a set of "nexthop"s (gw, oif), 1081 * where gw must have narrower scope. This recursion stops 1082 * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 1083 * which means that gw is forced to be on link. 1084 * 1085 * Code is still hairy, but now it is apparently logically 1086 * consistent and very flexible. F.e. as by-product it allows 1087 * to co-exists in peace independent exterior and interior 1088 * routing processes. 1089 * 1090 * Normally it looks as following. 1091 * 1092 * {universe prefix} -> (gw, oif) [scope link] 1093 * | 1094 * |-> {link prefix} -> (gw, oif) [scope local] 1095 * | 1096 * |-> {local prefix} (terminal node) 1097 */ 1098 static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, 1099 u8 scope, struct netlink_ext_ack *extack) 1100 { 1101 struct net_device *dev; 1102 struct fib_result res; 1103 int err = 0; 1104 1105 if (nh->fib_nh_flags & RTNH_F_ONLINK) { 1106 unsigned int addr_type; 1107 1108 if (scope >= RT_SCOPE_LINK) { 1109 NL_SET_ERR_MSG(extack, "Nexthop has invalid scope"); 1110 return -EINVAL; 1111 } 1112 dev = __dev_get_by_index(net, nh->fib_nh_oif); 1113 if (!dev) { 1114 NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); 1115 return -ENODEV; 1116 } 1117 if (!(dev->flags & IFF_UP)) { 1118 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 1119 return -ENETDOWN; 1120 } 1121 addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4); 1122 if (addr_type != RTN_UNICAST) { 1123 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1124 return -EINVAL; 1125 } 1126 if (!netif_carrier_ok(dev)) 1127 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1128 nh->fib_nh_dev = dev; 1129 netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); 1130 nh->fib_nh_scope = RT_SCOPE_LINK; 1131 return 0; 1132 } 1133 rcu_read_lock(); 1134 { 1135 struct fib_table *tbl = NULL; 1136 struct flowi4 fl4 = { 1137 .daddr = nh->fib_nh_gw4, 1138 .flowi4_scope = scope + 1, 1139 .flowi4_oif = nh->fib_nh_oif, 1140 .flowi4_iif = LOOPBACK_IFINDEX, 1141 }; 1142 1143 /* It is not necessary, but requires a bit of thinking */ 1144 if (fl4.flowi4_scope < RT_SCOPE_LINK) 1145 fl4.flowi4_scope = RT_SCOPE_LINK; 1146 1147 if (table && table != RT_TABLE_MAIN) 1148 tbl = fib_get_table(net, table); 1149 1150 if (tbl) 1151 err = fib_table_lookup(tbl, &fl4, &res, 1152 FIB_LOOKUP_IGNORE_LINKSTATE | 1153 FIB_LOOKUP_NOREF); 1154 1155 /* on error or if no table given do full lookup. This 1156 * is needed for example when nexthops are in the local 1157 * table rather than the given table 1158 */ 1159 if (!tbl || err) { 1160 err = fib_lookup(net, &fl4, &res, 1161 FIB_LOOKUP_IGNORE_LINKSTATE); 1162 } 1163 1164 if (err) { 1165 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1166 goto out; 1167 } 1168 } 1169 1170 err = -EINVAL; 1171 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { 1172 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1173 goto out; 1174 } 1175 nh->fib_nh_scope = res.scope; 1176 nh->fib_nh_oif = FIB_RES_OIF(res); 1177 nh->fib_nh_dev = dev = FIB_RES_DEV(res); 1178 if (!dev) { 1179 NL_SET_ERR_MSG(extack, 1180 "No egress device for nexthop gateway"); 1181 goto out; 1182 } 1183 netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); 1184 if (!netif_carrier_ok(dev)) 1185 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1186 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 1187 out: 1188 rcu_read_unlock(); 1189 return err; 1190 } 1191 1192 static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, 1193 struct netlink_ext_ack *extack) 1194 { 1195 struct in_device *in_dev; 1196 int err; 1197 1198 if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { 1199 NL_SET_ERR_MSG(extack, 1200 "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); 1201 return -EINVAL; 1202 } 1203 1204 rcu_read_lock(); 1205 1206 err = -ENODEV; 1207 in_dev = inetdev_by_index(net, nh->fib_nh_oif); 1208 if (!in_dev) 1209 goto out; 1210 err = -ENETDOWN; 1211 if (!(in_dev->dev->flags & IFF_UP)) { 1212 NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); 1213 goto out; 1214 } 1215 1216 nh->fib_nh_dev = in_dev->dev; 1217 netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); 1218 nh->fib_nh_scope = RT_SCOPE_HOST; 1219 if (!netif_carrier_ok(nh->fib_nh_dev)) 1220 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1221 err = 0; 1222 out: 1223 rcu_read_unlock(); 1224 return err; 1225 } 1226 1227 int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, 1228 struct netlink_ext_ack *extack) 1229 { 1230 int err; 1231 1232 if (nh->fib_nh_gw_family == AF_INET) 1233 err = fib_check_nh_v4_gw(net, nh, table, scope, extack); 1234 else if (nh->fib_nh_gw_family == AF_INET6) 1235 err = fib_check_nh_v6_gw(net, nh, table, extack); 1236 else 1237 err = fib_check_nh_nongw(net, nh, extack); 1238 1239 return err; 1240 } 1241 1242 static struct hlist_head * 1243 fib_info_laddrhash_bucket(const struct net *net, __be32 val) 1244 { 1245 u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val, 1246 fib_info_hash_bits); 1247 1248 return &fib_info_laddrhash[slot]; 1249 } 1250 1251 static void fib_info_hash_move(struct hlist_head *new_info_hash, 1252 struct hlist_head *new_laddrhash, 1253 unsigned int new_size) 1254 { 1255 struct hlist_head *old_info_hash, *old_laddrhash; 1256 unsigned int old_size = fib_info_hash_size; 1257 unsigned int i; 1258 1259 ASSERT_RTNL(); 1260 old_info_hash = fib_info_hash; 1261 old_laddrhash = fib_info_laddrhash; 1262 fib_info_hash_size = new_size; 1263 fib_info_hash_bits = ilog2(new_size); 1264 1265 for (i = 0; i < old_size; i++) { 1266 struct hlist_head *head = &fib_info_hash[i]; 1267 struct hlist_node *n; 1268 struct fib_info *fi; 1269 1270 hlist_for_each_entry_safe(fi, n, head, fib_hash) { 1271 struct hlist_head *dest; 1272 unsigned int new_hash; 1273 1274 new_hash = fib_info_hashfn(fi); 1275 dest = &new_info_hash[new_hash]; 1276 hlist_add_head(&fi->fib_hash, dest); 1277 } 1278 } 1279 fib_info_hash = new_info_hash; 1280 1281 fib_info_laddrhash = new_laddrhash; 1282 for (i = 0; i < old_size; i++) { 1283 struct hlist_head *lhead = &old_laddrhash[i]; 1284 struct hlist_node *n; 1285 struct fib_info *fi; 1286 1287 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { 1288 struct hlist_head *ldest; 1289 1290 ldest = fib_info_laddrhash_bucket(fi->fib_net, 1291 fi->fib_prefsrc); 1292 hlist_add_head(&fi->fib_lhash, ldest); 1293 } 1294 } 1295 1296 kvfree(old_info_hash); 1297 kvfree(old_laddrhash); 1298 } 1299 1300 __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, 1301 unsigned char scope) 1302 { 1303 struct fib_nh *nh; 1304 __be32 saddr; 1305 1306 if (nhc->nhc_family != AF_INET) 1307 return inet_select_addr(nhc->nhc_dev, 0, scope); 1308 1309 nh = container_of(nhc, struct fib_nh, nh_common); 1310 saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope); 1311 1312 WRITE_ONCE(nh->nh_saddr, saddr); 1313 WRITE_ONCE(nh->nh_saddr_genid, atomic_read(&net->ipv4.dev_addr_genid)); 1314 1315 return saddr; 1316 } 1317 1318 __be32 fib_result_prefsrc(struct net *net, struct fib_result *res) 1319 { 1320 struct fib_nh_common *nhc = res->nhc; 1321 1322 if (res->fi->fib_prefsrc) 1323 return res->fi->fib_prefsrc; 1324 1325 if (nhc->nhc_family == AF_INET) { 1326 struct fib_nh *nh; 1327 1328 nh = container_of(nhc, struct fib_nh, nh_common); 1329 if (READ_ONCE(nh->nh_saddr_genid) == 1330 atomic_read(&net->ipv4.dev_addr_genid)) 1331 return READ_ONCE(nh->nh_saddr); 1332 } 1333 1334 return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope); 1335 } 1336 1337 static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) 1338 { 1339 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 1340 fib_prefsrc != cfg->fc_dst) { 1341 u32 tb_id = cfg->fc_table; 1342 int rc; 1343 1344 if (tb_id == RT_TABLE_MAIN) 1345 tb_id = RT_TABLE_LOCAL; 1346 1347 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1348 fib_prefsrc, tb_id); 1349 1350 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { 1351 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1352 fib_prefsrc, RT_TABLE_LOCAL); 1353 } 1354 1355 if (rc != RTN_LOCAL) 1356 return false; 1357 } 1358 return true; 1359 } 1360 1361 struct fib_info *fib_create_info(struct fib_config *cfg, 1362 struct netlink_ext_ack *extack) 1363 { 1364 int err; 1365 struct fib_info *fi = NULL; 1366 struct nexthop *nh = NULL; 1367 struct fib_info *ofi; 1368 int nhs = 1; 1369 struct net *net = cfg->fc_nlinfo.nl_net; 1370 1371 ASSERT_RTNL(); 1372 if (cfg->fc_type > RTN_MAX) 1373 goto err_inval; 1374 1375 /* Fast check to catch the most weird cases */ 1376 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) { 1377 NL_SET_ERR_MSG(extack, "Invalid scope"); 1378 goto err_inval; 1379 } 1380 1381 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 1382 NL_SET_ERR_MSG(extack, 1383 "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); 1384 goto err_inval; 1385 } 1386 1387 if (cfg->fc_nh_id) { 1388 if (!cfg->fc_mx) { 1389 fi = fib_find_info_nh(net, cfg); 1390 if (fi) { 1391 refcount_inc(&fi->fib_treeref); 1392 return fi; 1393 } 1394 } 1395 1396 nh = nexthop_find_by_id(net, cfg->fc_nh_id); 1397 if (!nh) { 1398 NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); 1399 goto err_inval; 1400 } 1401 nhs = 0; 1402 } 1403 1404 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1405 if (cfg->fc_mp) { 1406 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); 1407 if (nhs == 0) 1408 goto err_inval; 1409 } 1410 #endif 1411 1412 err = -ENOBUFS; 1413 1414 if (fib_info_cnt >= fib_info_hash_size) { 1415 unsigned int new_size = fib_info_hash_size << 1; 1416 struct hlist_head *new_info_hash; 1417 struct hlist_head *new_laddrhash; 1418 size_t bytes; 1419 1420 if (!new_size) 1421 new_size = 16; 1422 bytes = (size_t)new_size * sizeof(struct hlist_head *); 1423 new_info_hash = kvzalloc(bytes, GFP_KERNEL); 1424 new_laddrhash = kvzalloc(bytes, GFP_KERNEL); 1425 if (!new_info_hash || !new_laddrhash) { 1426 kvfree(new_info_hash); 1427 kvfree(new_laddrhash); 1428 } else { 1429 fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 1430 } 1431 if (!fib_info_hash_size) 1432 goto failure; 1433 } 1434 1435 fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); 1436 if (!fi) 1437 goto failure; 1438 fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); 1439 if (IS_ERR(fi->fib_metrics)) { 1440 err = PTR_ERR(fi->fib_metrics); 1441 kfree(fi); 1442 return ERR_PTR(err); 1443 } 1444 1445 fi->fib_net = net; 1446 fi->fib_protocol = cfg->fc_protocol; 1447 fi->fib_scope = cfg->fc_scope; 1448 fi->fib_flags = cfg->fc_flags; 1449 fi->fib_priority = cfg->fc_priority; 1450 fi->fib_prefsrc = cfg->fc_prefsrc; 1451 fi->fib_type = cfg->fc_type; 1452 fi->fib_tb_id = cfg->fc_table; 1453 1454 fi->fib_nhs = nhs; 1455 if (nh) { 1456 if (!nexthop_get(nh)) { 1457 NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); 1458 err = -EINVAL; 1459 } else { 1460 err = 0; 1461 fi->nh = nh; 1462 } 1463 } else { 1464 change_nexthops(fi) { 1465 nexthop_nh->nh_parent = fi; 1466 } endfor_nexthops(fi) 1467 1468 if (cfg->fc_mp) 1469 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, 1470 extack); 1471 else 1472 err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); 1473 } 1474 1475 if (err != 0) 1476 goto failure; 1477 1478 if (fib_props[cfg->fc_type].error) { 1479 if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) { 1480 NL_SET_ERR_MSG(extack, 1481 "Gateway, device and multipath can not be specified for this route type"); 1482 goto err_inval; 1483 } 1484 goto link_it; 1485 } else { 1486 switch (cfg->fc_type) { 1487 case RTN_UNICAST: 1488 case RTN_LOCAL: 1489 case RTN_BROADCAST: 1490 case RTN_ANYCAST: 1491 case RTN_MULTICAST: 1492 break; 1493 default: 1494 NL_SET_ERR_MSG(extack, "Invalid route type"); 1495 goto err_inval; 1496 } 1497 } 1498 1499 if (cfg->fc_scope > RT_SCOPE_HOST) { 1500 NL_SET_ERR_MSG(extack, "Invalid scope"); 1501 goto err_inval; 1502 } 1503 1504 if (fi->nh) { 1505 err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack); 1506 if (err) 1507 goto failure; 1508 } else if (cfg->fc_scope == RT_SCOPE_HOST) { 1509 struct fib_nh *nh = fi->fib_nh; 1510 1511 /* Local address is added. */ 1512 if (nhs != 1) { 1513 NL_SET_ERR_MSG(extack, 1514 "Route with host scope can not have multiple nexthops"); 1515 goto err_inval; 1516 } 1517 if (nh->fib_nh_gw_family) { 1518 NL_SET_ERR_MSG(extack, 1519 "Route with host scope can not have a gateway"); 1520 goto err_inval; 1521 } 1522 nh->fib_nh_scope = RT_SCOPE_NOWHERE; 1523 nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif); 1524 err = -ENODEV; 1525 if (!nh->fib_nh_dev) 1526 goto failure; 1527 netdev_tracker_alloc(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, 1528 GFP_KERNEL); 1529 } else { 1530 int linkdown = 0; 1531 1532 change_nexthops(fi) { 1533 err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh, 1534 cfg->fc_table, cfg->fc_scope, 1535 extack); 1536 if (err != 0) 1537 goto failure; 1538 if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) 1539 linkdown++; 1540 } endfor_nexthops(fi) 1541 if (linkdown == fi->fib_nhs) 1542 fi->fib_flags |= RTNH_F_LINKDOWN; 1543 } 1544 1545 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) { 1546 NL_SET_ERR_MSG(extack, "Invalid prefsrc address"); 1547 goto err_inval; 1548 } 1549 1550 if (!fi->nh) { 1551 change_nexthops(fi) { 1552 fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common, 1553 fi->fib_scope); 1554 if (nexthop_nh->fib_nh_gw_family == AF_INET6) 1555 fi->fib_nh_is_v6 = true; 1556 } endfor_nexthops(fi) 1557 1558 fib_rebalance(fi); 1559 } 1560 1561 link_it: 1562 ofi = fib_find_info(fi); 1563 if (ofi) { 1564 /* fib_table_lookup() should not see @fi yet. */ 1565 fi->fib_dead = 1; 1566 free_fib_info(fi); 1567 refcount_inc(&ofi->fib_treeref); 1568 return ofi; 1569 } 1570 1571 refcount_set(&fi->fib_treeref, 1); 1572 refcount_set(&fi->fib_clntref, 1); 1573 1574 fib_info_cnt++; 1575 hlist_add_head(&fi->fib_hash, 1576 &fib_info_hash[fib_info_hashfn(fi)]); 1577 if (fi->fib_prefsrc) { 1578 struct hlist_head *head; 1579 1580 head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc); 1581 hlist_add_head(&fi->fib_lhash, head); 1582 } 1583 if (fi->nh) { 1584 list_add(&fi->nh_list, &nh->fi_list); 1585 } else { 1586 change_nexthops(fi) { 1587 struct hlist_head *head; 1588 1589 if (!nexthop_nh->fib_nh_dev) 1590 continue; 1591 head = fib_nh_head(nexthop_nh->fib_nh_dev); 1592 hlist_add_head_rcu(&nexthop_nh->nh_hash, head); 1593 } endfor_nexthops(fi) 1594 } 1595 return fi; 1596 1597 err_inval: 1598 err = -EINVAL; 1599 1600 failure: 1601 if (fi) { 1602 /* fib_table_lookup() should not see @fi yet. */ 1603 fi->fib_dead = 1; 1604 free_fib_info(fi); 1605 } 1606 1607 return ERR_PTR(err); 1608 } 1609 1610 int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, 1611 u8 rt_family, unsigned char *flags, bool skip_oif) 1612 { 1613 if (nhc->nhc_flags & RTNH_F_DEAD) 1614 *flags |= RTNH_F_DEAD; 1615 1616 if (nhc->nhc_flags & RTNH_F_LINKDOWN) { 1617 *flags |= RTNH_F_LINKDOWN; 1618 1619 rcu_read_lock(); 1620 switch (nhc->nhc_family) { 1621 case AF_INET: 1622 if (ip_ignore_linkdown(nhc->nhc_dev)) 1623 *flags |= RTNH_F_DEAD; 1624 break; 1625 case AF_INET6: 1626 if (ip6_ignore_linkdown(nhc->nhc_dev)) 1627 *flags |= RTNH_F_DEAD; 1628 break; 1629 } 1630 rcu_read_unlock(); 1631 } 1632 1633 switch (nhc->nhc_gw_family) { 1634 case AF_INET: 1635 if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) 1636 goto nla_put_failure; 1637 break; 1638 case AF_INET6: 1639 /* if gateway family does not match nexthop family 1640 * gateway is encoded as RTA_VIA 1641 */ 1642 if (rt_family != nhc->nhc_gw_family) { 1643 int alen = sizeof(struct in6_addr); 1644 struct nlattr *nla; 1645 struct rtvia *via; 1646 1647 nla = nla_reserve(skb, RTA_VIA, alen + 2); 1648 if (!nla) 1649 goto nla_put_failure; 1650 1651 via = nla_data(nla); 1652 via->rtvia_family = AF_INET6; 1653 memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen); 1654 } else if (nla_put_in6_addr(skb, RTA_GATEWAY, 1655 &nhc->nhc_gw.ipv6) < 0) { 1656 goto nla_put_failure; 1657 } 1658 break; 1659 } 1660 1661 *flags |= (nhc->nhc_flags & 1662 (RTNH_F_ONLINK | RTNH_F_OFFLOAD | RTNH_F_TRAP)); 1663 1664 if (!skip_oif && nhc->nhc_dev && 1665 nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) 1666 goto nla_put_failure; 1667 1668 if (nhc->nhc_lwtstate && 1669 lwtunnel_fill_encap(skb, nhc->nhc_lwtstate, 1670 RTA_ENCAP, RTA_ENCAP_TYPE) < 0) 1671 goto nla_put_failure; 1672 1673 return 0; 1674 1675 nla_put_failure: 1676 return -EMSGSIZE; 1677 } 1678 EXPORT_SYMBOL_GPL(fib_nexthop_info); 1679 1680 #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) 1681 int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, 1682 int nh_weight, u8 rt_family, u32 nh_tclassid) 1683 { 1684 const struct net_device *dev = nhc->nhc_dev; 1685 struct rtnexthop *rtnh; 1686 unsigned char flags = 0; 1687 1688 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1689 if (!rtnh) 1690 goto nla_put_failure; 1691 1692 rtnh->rtnh_hops = nh_weight - 1; 1693 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; 1694 1695 if (fib_nexthop_info(skb, nhc, rt_family, &flags, true) < 0) 1696 goto nla_put_failure; 1697 1698 rtnh->rtnh_flags = flags; 1699 1700 if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid)) 1701 goto nla_put_failure; 1702 1703 /* length of rtnetlink header + attributes */ 1704 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 1705 1706 return 0; 1707 1708 nla_put_failure: 1709 return -EMSGSIZE; 1710 } 1711 EXPORT_SYMBOL_GPL(fib_add_nexthop); 1712 #endif 1713 1714 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1715 static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) 1716 { 1717 struct nlattr *mp; 1718 1719 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); 1720 if (!mp) 1721 goto nla_put_failure; 1722 1723 if (unlikely(fi->nh)) { 1724 if (nexthop_mpath_fill_node(skb, fi->nh, AF_INET) < 0) 1725 goto nla_put_failure; 1726 goto mp_end; 1727 } 1728 1729 for_nexthops(fi) { 1730 u32 nh_tclassid = 0; 1731 #ifdef CONFIG_IP_ROUTE_CLASSID 1732 nh_tclassid = nh->nh_tclassid; 1733 #endif 1734 if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, 1735 AF_INET, nh_tclassid) < 0) 1736 goto nla_put_failure; 1737 } endfor_nexthops(fi); 1738 1739 mp_end: 1740 nla_nest_end(skb, mp); 1741 1742 return 0; 1743 1744 nla_put_failure: 1745 return -EMSGSIZE; 1746 } 1747 #else 1748 static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) 1749 { 1750 return 0; 1751 } 1752 #endif 1753 1754 int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, 1755 const struct fib_rt_info *fri, unsigned int flags) 1756 { 1757 unsigned int nhs = fib_info_num_path(fri->fi); 1758 struct fib_info *fi = fri->fi; 1759 u32 tb_id = fri->tb_id; 1760 struct nlmsghdr *nlh; 1761 struct rtmsg *rtm; 1762 1763 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 1764 if (!nlh) 1765 return -EMSGSIZE; 1766 1767 rtm = nlmsg_data(nlh); 1768 rtm->rtm_family = AF_INET; 1769 rtm->rtm_dst_len = fri->dst_len; 1770 rtm->rtm_src_len = 0; 1771 rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp); 1772 if (tb_id < 256) 1773 rtm->rtm_table = tb_id; 1774 else 1775 rtm->rtm_table = RT_TABLE_COMPAT; 1776 if (nla_put_u32(skb, RTA_TABLE, tb_id)) 1777 goto nla_put_failure; 1778 rtm->rtm_type = fri->type; 1779 rtm->rtm_flags = fi->fib_flags; 1780 rtm->rtm_scope = fi->fib_scope; 1781 rtm->rtm_protocol = fi->fib_protocol; 1782 1783 if (rtm->rtm_dst_len && 1784 nla_put_in_addr(skb, RTA_DST, fri->dst)) 1785 goto nla_put_failure; 1786 if (fi->fib_priority && 1787 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) 1788 goto nla_put_failure; 1789 if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) 1790 goto nla_put_failure; 1791 1792 if (fi->fib_prefsrc && 1793 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1794 goto nla_put_failure; 1795 1796 if (fi->nh) { 1797 if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id)) 1798 goto nla_put_failure; 1799 if (nexthop_is_blackhole(fi->nh)) 1800 rtm->rtm_type = RTN_BLACKHOLE; 1801 if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) 1802 goto offload; 1803 } 1804 1805 if (nhs == 1) { 1806 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); 1807 unsigned char flags = 0; 1808 1809 if (fib_nexthop_info(skb, nhc, AF_INET, &flags, false) < 0) 1810 goto nla_put_failure; 1811 1812 rtm->rtm_flags = flags; 1813 #ifdef CONFIG_IP_ROUTE_CLASSID 1814 if (nhc->nhc_family == AF_INET) { 1815 struct fib_nh *nh; 1816 1817 nh = container_of(nhc, struct fib_nh, nh_common); 1818 if (nh->nh_tclassid && 1819 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1820 goto nla_put_failure; 1821 } 1822 #endif 1823 } else { 1824 if (fib_add_multipath(skb, fi) < 0) 1825 goto nla_put_failure; 1826 } 1827 1828 offload: 1829 if (fri->offload) 1830 rtm->rtm_flags |= RTM_F_OFFLOAD; 1831 if (fri->trap) 1832 rtm->rtm_flags |= RTM_F_TRAP; 1833 if (fri->offload_failed) 1834 rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED; 1835 1836 nlmsg_end(skb, nlh); 1837 return 0; 1838 1839 nla_put_failure: 1840 nlmsg_cancel(skb, nlh); 1841 return -EMSGSIZE; 1842 } 1843 1844 /* 1845 * Update FIB if: 1846 * - local address disappeared -> we must delete all the entries 1847 * referring to it. 1848 * - device went down -> we must shutdown all nexthops going via it. 1849 */ 1850 int fib_sync_down_addr(struct net_device *dev, __be32 local) 1851 { 1852 int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; 1853 struct net *net = dev_net(dev); 1854 struct hlist_head *head; 1855 struct fib_info *fi; 1856 int ret = 0; 1857 1858 if (!fib_info_laddrhash || local == 0) 1859 return 0; 1860 1861 head = fib_info_laddrhash_bucket(net, local); 1862 hlist_for_each_entry(fi, head, fib_lhash) { 1863 if (!net_eq(fi->fib_net, net) || 1864 fi->fib_tb_id != tb_id) 1865 continue; 1866 if (fi->fib_prefsrc == local) { 1867 fi->fib_flags |= RTNH_F_DEAD; 1868 fi->pfsrc_removed = true; 1869 ret++; 1870 } 1871 } 1872 return ret; 1873 } 1874 1875 static int call_fib_nh_notifiers(struct fib_nh *nh, 1876 enum fib_event_type event_type) 1877 { 1878 bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev); 1879 struct fib_nh_notifier_info info = { 1880 .fib_nh = nh, 1881 }; 1882 1883 switch (event_type) { 1884 case FIB_EVENT_NH_ADD: 1885 if (nh->fib_nh_flags & RTNH_F_DEAD) 1886 break; 1887 if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) 1888 break; 1889 return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, 1890 &info.info); 1891 case FIB_EVENT_NH_DEL: 1892 if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || 1893 (nh->fib_nh_flags & RTNH_F_DEAD)) 1894 return call_fib4_notifiers(dev_net(nh->fib_nh_dev), 1895 event_type, &info.info); 1896 break; 1897 default: 1898 break; 1899 } 1900 1901 return NOTIFY_DONE; 1902 } 1903 1904 /* Update the PMTU of exceptions when: 1905 * - the new MTU of the first hop becomes smaller than the PMTU 1906 * - the old MTU was the same as the PMTU, and it limited discovery of 1907 * larger MTUs on the path. With that limit raised, we can now 1908 * discover larger MTUs 1909 * A special case is locked exceptions, for which the PMTU is smaller 1910 * than the minimal accepted PMTU: 1911 * - if the new MTU is greater than the PMTU, don't make any change 1912 * - otherwise, unlock and set PMTU 1913 */ 1914 void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) 1915 { 1916 struct fnhe_hash_bucket *bucket; 1917 int i; 1918 1919 bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); 1920 if (!bucket) 1921 return; 1922 1923 for (i = 0; i < FNHE_HASH_SIZE; i++) { 1924 struct fib_nh_exception *fnhe; 1925 1926 for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); 1927 fnhe; 1928 fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { 1929 if (fnhe->fnhe_mtu_locked) { 1930 if (new <= fnhe->fnhe_pmtu) { 1931 fnhe->fnhe_pmtu = new; 1932 fnhe->fnhe_mtu_locked = false; 1933 } 1934 } else if (new < fnhe->fnhe_pmtu || 1935 orig == fnhe->fnhe_pmtu) { 1936 fnhe->fnhe_pmtu = new; 1937 } 1938 } 1939 } 1940 } 1941 1942 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) 1943 { 1944 struct hlist_head *head = fib_nh_head(dev); 1945 struct fib_nh *nh; 1946 1947 hlist_for_each_entry(nh, head, nh_hash) { 1948 DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); 1949 fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); 1950 } 1951 } 1952 1953 /* Event force Flags Description 1954 * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host 1955 * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host 1956 * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed 1957 * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed 1958 * 1959 * only used when fib_nh is built into fib_info 1960 */ 1961 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) 1962 { 1963 struct hlist_head *head = fib_nh_head(dev); 1964 struct fib_info *prev_fi = NULL; 1965 int scope = RT_SCOPE_NOWHERE; 1966 struct fib_nh *nh; 1967 int ret = 0; 1968 1969 if (force) 1970 scope = -1; 1971 1972 hlist_for_each_entry(nh, head, nh_hash) { 1973 struct fib_info *fi = nh->nh_parent; 1974 int dead; 1975 1976 BUG_ON(!fi->fib_nhs); 1977 DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); 1978 if (fi == prev_fi) 1979 continue; 1980 prev_fi = fi; 1981 dead = 0; 1982 change_nexthops(fi) { 1983 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) 1984 dead++; 1985 else if (nexthop_nh->fib_nh_dev == dev && 1986 nexthop_nh->fib_nh_scope != scope) { 1987 switch (event) { 1988 case NETDEV_DOWN: 1989 case NETDEV_UNREGISTER: 1990 nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; 1991 fallthrough; 1992 case NETDEV_CHANGE: 1993 nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1994 break; 1995 } 1996 call_fib_nh_notifiers(nexthop_nh, 1997 FIB_EVENT_NH_DEL); 1998 dead++; 1999 } 2000 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2001 if (event == NETDEV_UNREGISTER && 2002 nexthop_nh->fib_nh_dev == dev) { 2003 dead = fi->fib_nhs; 2004 break; 2005 } 2006 #endif 2007 } endfor_nexthops(fi) 2008 if (dead == fi->fib_nhs) { 2009 switch (event) { 2010 case NETDEV_DOWN: 2011 case NETDEV_UNREGISTER: 2012 fi->fib_flags |= RTNH_F_DEAD; 2013 fallthrough; 2014 case NETDEV_CHANGE: 2015 fi->fib_flags |= RTNH_F_LINKDOWN; 2016 break; 2017 } 2018 ret++; 2019 } 2020 2021 fib_rebalance(fi); 2022 } 2023 2024 return ret; 2025 } 2026 2027 /* Must be invoked inside of an RCU protected region. */ 2028 static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) 2029 { 2030 struct fib_info *fi = NULL, *last_resort = NULL; 2031 struct hlist_head *fa_head = res->fa_head; 2032 struct fib_table *tb = res->table; 2033 u8 slen = 32 - res->prefixlen; 2034 int order = -1, last_idx = -1; 2035 struct fib_alias *fa, *fa1 = NULL; 2036 u32 last_prio = res->fi->fib_priority; 2037 dscp_t last_dscp = 0; 2038 2039 hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 2040 struct fib_info *next_fi = fa->fa_info; 2041 struct fib_nh_common *nhc; 2042 2043 if (fa->fa_slen != slen) 2044 continue; 2045 if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp)) 2046 continue; 2047 if (fa->tb_id != tb->tb_id) 2048 continue; 2049 if (next_fi->fib_priority > last_prio && 2050 fa->fa_dscp == last_dscp) { 2051 if (last_dscp) 2052 continue; 2053 break; 2054 } 2055 if (next_fi->fib_flags & RTNH_F_DEAD) 2056 continue; 2057 last_dscp = fa->fa_dscp; 2058 last_prio = next_fi->fib_priority; 2059 2060 if (next_fi->fib_scope != res->scope || 2061 fa->fa_type != RTN_UNICAST) 2062 continue; 2063 2064 nhc = fib_info_nhc(next_fi, 0); 2065 if (!nhc->nhc_gw_family || nhc->nhc_scope != RT_SCOPE_LINK) 2066 continue; 2067 2068 fib_alias_accessed(fa); 2069 2070 if (!fi) { 2071 if (next_fi != res->fi) 2072 break; 2073 fa1 = fa; 2074 } else if (!fib_detect_death(fi, order, &last_resort, 2075 &last_idx, fa1->fa_default)) { 2076 fib_result_assign(res, fi); 2077 fa1->fa_default = order; 2078 goto out; 2079 } 2080 fi = next_fi; 2081 order++; 2082 } 2083 2084 if (order <= 0 || !fi) { 2085 if (fa1) 2086 fa1->fa_default = -1; 2087 goto out; 2088 } 2089 2090 if (!fib_detect_death(fi, order, &last_resort, &last_idx, 2091 fa1->fa_default)) { 2092 fib_result_assign(res, fi); 2093 fa1->fa_default = order; 2094 goto out; 2095 } 2096 2097 if (last_idx >= 0) 2098 fib_result_assign(res, last_resort); 2099 fa1->fa_default = last_idx; 2100 out: 2101 return; 2102 } 2103 2104 /* 2105 * Dead device goes up. We wake up dead nexthops. 2106 * It takes sense only on multipath routes. 2107 * 2108 * only used when fib_nh is built into fib_info 2109 */ 2110 int fib_sync_up(struct net_device *dev, unsigned char nh_flags) 2111 { 2112 struct fib_info *prev_fi; 2113 struct hlist_head *head; 2114 struct fib_nh *nh; 2115 int ret; 2116 2117 if (!(dev->flags & IFF_UP)) 2118 return 0; 2119 2120 if (nh_flags & RTNH_F_DEAD) { 2121 unsigned int flags = dev_get_flags(dev); 2122 2123 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 2124 nh_flags |= RTNH_F_LINKDOWN; 2125 } 2126 2127 prev_fi = NULL; 2128 head = fib_nh_head(dev); 2129 ret = 0; 2130 2131 hlist_for_each_entry(nh, head, nh_hash) { 2132 struct fib_info *fi = nh->nh_parent; 2133 int alive; 2134 2135 BUG_ON(!fi->fib_nhs); 2136 DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); 2137 if (fi == prev_fi) 2138 continue; 2139 2140 prev_fi = fi; 2141 alive = 0; 2142 change_nexthops(fi) { 2143 if (!(nexthop_nh->fib_nh_flags & nh_flags)) { 2144 alive++; 2145 continue; 2146 } 2147 if (!nexthop_nh->fib_nh_dev || 2148 !(nexthop_nh->fib_nh_dev->flags & IFF_UP)) 2149 continue; 2150 if (nexthop_nh->fib_nh_dev != dev || 2151 !__in_dev_get_rtnl(dev)) 2152 continue; 2153 alive++; 2154 nexthop_nh->fib_nh_flags &= ~nh_flags; 2155 call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); 2156 } endfor_nexthops(fi) 2157 2158 if (alive > 0) { 2159 fi->fib_flags &= ~nh_flags; 2160 ret++; 2161 } 2162 2163 fib_rebalance(fi); 2164 } 2165 2166 return ret; 2167 } 2168 2169 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2170 static bool fib_good_nh(const struct fib_nh *nh) 2171 { 2172 int state = NUD_REACHABLE; 2173 2174 if (nh->fib_nh_scope == RT_SCOPE_LINK) { 2175 struct neighbour *n; 2176 2177 rcu_read_lock(); 2178 2179 if (likely(nh->fib_nh_gw_family == AF_INET)) 2180 n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 2181 (__force u32)nh->fib_nh_gw4); 2182 else if (nh->fib_nh_gw_family == AF_INET6) 2183 n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, 2184 &nh->fib_nh_gw6); 2185 else 2186 n = NULL; 2187 if (n) 2188 state = READ_ONCE(n->nud_state); 2189 2190 rcu_read_unlock(); 2191 } 2192 2193 return !!(state & NUD_VALID); 2194 } 2195 2196 void fib_select_multipath(struct fib_result *res, int hash) 2197 { 2198 struct fib_info *fi = res->fi; 2199 struct net *net = fi->fib_net; 2200 bool first = false; 2201 2202 if (unlikely(res->fi->nh)) { 2203 nexthop_path_fib_result(res, hash); 2204 return; 2205 } 2206 2207 change_nexthops(fi) { 2208 if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { 2209 if (!fib_good_nh(nexthop_nh)) 2210 continue; 2211 if (!first) { 2212 res->nh_sel = nhsel; 2213 res->nhc = &nexthop_nh->nh_common; 2214 first = true; 2215 } 2216 } 2217 2218 if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) 2219 continue; 2220 2221 res->nh_sel = nhsel; 2222 res->nhc = &nexthop_nh->nh_common; 2223 return; 2224 } endfor_nexthops(fi); 2225 } 2226 #endif 2227 2228 void fib_select_path(struct net *net, struct fib_result *res, 2229 struct flowi4 *fl4, const struct sk_buff *skb) 2230 { 2231 if (fl4->flowi4_oif) 2232 goto check_saddr; 2233 2234 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2235 if (fib_info_num_path(res->fi) > 1) { 2236 int h = fib_multipath_hash(net, fl4, skb, NULL); 2237 2238 fib_select_multipath(res, h); 2239 } 2240 else 2241 #endif 2242 if (!res->prefixlen && 2243 res->table->tb_num_default > 1 && 2244 res->type == RTN_UNICAST) 2245 fib_select_default(fl4, res); 2246 2247 check_saddr: 2248 if (!fl4->saddr) { 2249 struct net_device *l3mdev; 2250 2251 l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); 2252 2253 if (!l3mdev || 2254 l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) 2255 fl4->saddr = fib_result_prefsrc(net, res); 2256 else 2257 fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); 2258 } 2259 } 2260