1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IPv4 Forwarding Information Base: semantics. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 */ 11 12 #include <linux/uaccess.h> 13 #include <linux/bitops.h> 14 #include <linux/types.h> 15 #include <linux/kernel.h> 16 #include <linux/jiffies.h> 17 #include <linux/mm.h> 18 #include <linux/string.h> 19 #include <linux/socket.h> 20 #include <linux/sockios.h> 21 #include <linux/errno.h> 22 #include <linux/in.h> 23 #include <linux/inet.h> 24 #include <linux/inetdevice.h> 25 #include <linux/netdevice.h> 26 #include <linux/if_arp.h> 27 #include <linux/proc_fs.h> 28 #include <linux/skbuff.h> 29 #include <linux/init.h> 30 #include <linux/slab.h> 31 #include <linux/netlink.h> 32 #include <linux/hash.h> 33 34 #include <net/arp.h> 35 #include <net/inet_dscp.h> 36 #include <net/ip.h> 37 #include <net/protocol.h> 38 #include <net/route.h> 39 #include <net/tcp.h> 40 #include <net/sock.h> 41 #include <net/ip_fib.h> 42 #include <net/ip6_fib.h> 43 #include <net/nexthop.h> 44 #include <net/netlink.h> 45 #include <net/rtnh.h> 46 #include <net/lwtunnel.h> 47 #include <net/fib_notifier.h> 48 #include <net/addrconf.h> 49 50 #include "fib_lookup.h" 51 52 static DEFINE_SPINLOCK(fib_info_lock); 53 static struct hlist_head *fib_info_hash; 54 static struct hlist_head *fib_info_laddrhash; 55 static unsigned int fib_info_hash_size; 56 static unsigned int fib_info_hash_bits; 57 static unsigned int fib_info_cnt; 58 59 #define DEVINDEX_HASHBITS 8 60 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 61 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 62 63 /* for_nexthops and change_nexthops only used when nexthop object 64 * is not set in a fib_info. The logic within can reference fib_nh. 65 */ 66 #ifdef CONFIG_IP_ROUTE_MULTIPATH 67 68 #define for_nexthops(fi) { \ 69 int nhsel; const struct fib_nh *nh; \ 70 for (nhsel = 0, nh = (fi)->fib_nh; \ 71 nhsel < fib_info_num_path((fi)); \ 72 nh++, nhsel++) 73 74 #define change_nexthops(fi) { \ 75 int nhsel; struct fib_nh *nexthop_nh; \ 76 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 77 nhsel < fib_info_num_path((fi)); \ 78 nexthop_nh++, nhsel++) 79 80 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 81 82 /* Hope, that gcc will optimize it to get rid of dummy loop */ 83 84 #define for_nexthops(fi) { \ 85 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 86 for (nhsel = 0; nhsel < 1; nhsel++) 87 88 #define change_nexthops(fi) { \ 89 int nhsel; \ 90 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 91 for (nhsel = 0; nhsel < 1; nhsel++) 92 93 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 94 95 #define endfor_nexthops(fi) } 96 97 98 const struct fib_prop fib_props[RTN_MAX + 1] = { 99 [RTN_UNSPEC] = { 100 .error = 0, 101 .scope = RT_SCOPE_NOWHERE, 102 }, 103 [RTN_UNICAST] = { 104 .error = 0, 105 .scope = RT_SCOPE_UNIVERSE, 106 }, 107 [RTN_LOCAL] = { 108 .error = 0, 109 .scope = RT_SCOPE_HOST, 110 }, 111 [RTN_BROADCAST] = { 112 .error = 0, 113 .scope = RT_SCOPE_LINK, 114 }, 115 [RTN_ANYCAST] = { 116 .error = 0, 117 .scope = RT_SCOPE_LINK, 118 }, 119 [RTN_MULTICAST] = { 120 .error = 0, 121 .scope = RT_SCOPE_UNIVERSE, 122 }, 123 [RTN_BLACKHOLE] = { 124 .error = -EINVAL, 125 .scope = RT_SCOPE_UNIVERSE, 126 }, 127 [RTN_UNREACHABLE] = { 128 .error = -EHOSTUNREACH, 129 .scope = RT_SCOPE_UNIVERSE, 130 }, 131 [RTN_PROHIBIT] = { 132 .error = -EACCES, 133 .scope = RT_SCOPE_UNIVERSE, 134 }, 135 [RTN_THROW] = { 136 .error = -EAGAIN, 137 .scope = RT_SCOPE_UNIVERSE, 138 }, 139 [RTN_NAT] = { 140 .error = -EINVAL, 141 .scope = RT_SCOPE_NOWHERE, 142 }, 143 [RTN_XRESOLVE] = { 144 .error = -EINVAL, 145 .scope = RT_SCOPE_NOWHERE, 146 }, 147 }; 148 149 static void rt_fibinfo_free(struct rtable __rcu **rtp) 150 { 151 struct rtable *rt = rcu_dereference_protected(*rtp, 1); 152 153 if (!rt) 154 return; 155 156 /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); 157 * because we waited an RCU grace period before calling 158 * free_fib_info_rcu() 159 */ 160 161 dst_dev_put(&rt->dst); 162 dst_release_immediate(&rt->dst); 163 } 164 165 static void free_nh_exceptions(struct fib_nh_common *nhc) 166 { 167 struct fnhe_hash_bucket *hash; 168 int i; 169 170 hash = rcu_dereference_protected(nhc->nhc_exceptions, 1); 171 if (!hash) 172 return; 173 for (i = 0; i < FNHE_HASH_SIZE; i++) { 174 struct fib_nh_exception *fnhe; 175 176 fnhe = rcu_dereference_protected(hash[i].chain, 1); 177 while (fnhe) { 178 struct fib_nh_exception *next; 179 180 next = rcu_dereference_protected(fnhe->fnhe_next, 1); 181 182 rt_fibinfo_free(&fnhe->fnhe_rth_input); 183 rt_fibinfo_free(&fnhe->fnhe_rth_output); 184 185 kfree(fnhe); 186 187 fnhe = next; 188 } 189 } 190 kfree(hash); 191 } 192 193 static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) 194 { 195 int cpu; 196 197 if (!rtp) 198 return; 199 200 for_each_possible_cpu(cpu) { 201 struct rtable *rt; 202 203 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); 204 if (rt) { 205 dst_dev_put(&rt->dst); 206 dst_release_immediate(&rt->dst); 207 } 208 } 209 free_percpu(rtp); 210 } 211 212 void fib_nh_common_release(struct fib_nh_common *nhc) 213 { 214 netdev_put(nhc->nhc_dev, &nhc->nhc_dev_tracker); 215 lwtstate_put(nhc->nhc_lwtstate); 216 rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); 217 rt_fibinfo_free(&nhc->nhc_rth_input); 218 free_nh_exceptions(nhc); 219 } 220 EXPORT_SYMBOL_GPL(fib_nh_common_release); 221 222 void fib_nh_release(struct net *net, struct fib_nh *fib_nh) 223 { 224 #ifdef CONFIG_IP_ROUTE_CLASSID 225 if (fib_nh->nh_tclassid) 226 atomic_dec(&net->ipv4.fib_num_tclassid_users); 227 #endif 228 fib_nh_common_release(&fib_nh->nh_common); 229 } 230 231 /* Release a nexthop info record */ 232 static void free_fib_info_rcu(struct rcu_head *head) 233 { 234 struct fib_info *fi = container_of(head, struct fib_info, rcu); 235 236 if (fi->nh) { 237 nexthop_put(fi->nh); 238 } else { 239 change_nexthops(fi) { 240 fib_nh_release(fi->fib_net, nexthop_nh); 241 } endfor_nexthops(fi); 242 } 243 244 ip_fib_metrics_put(fi->fib_metrics); 245 246 kfree(fi); 247 } 248 249 void free_fib_info(struct fib_info *fi) 250 { 251 if (fi->fib_dead == 0) { 252 pr_warn("Freeing alive fib_info %p\n", fi); 253 return; 254 } 255 256 call_rcu(&fi->rcu, free_fib_info_rcu); 257 } 258 EXPORT_SYMBOL_GPL(free_fib_info); 259 260 void fib_release_info(struct fib_info *fi) 261 { 262 spin_lock_bh(&fib_info_lock); 263 if (fi && refcount_dec_and_test(&fi->fib_treeref)) { 264 hlist_del(&fi->fib_hash); 265 266 /* Paired with READ_ONCE() in fib_create_info(). */ 267 WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1); 268 269 if (fi->fib_prefsrc) 270 hlist_del(&fi->fib_lhash); 271 if (fi->nh) { 272 list_del(&fi->nh_list); 273 } else { 274 change_nexthops(fi) { 275 if (!nexthop_nh->fib_nh_dev) 276 continue; 277 hlist_del(&nexthop_nh->nh_hash); 278 } endfor_nexthops(fi) 279 } 280 fi->fib_dead = 1; 281 fib_info_put(fi); 282 } 283 spin_unlock_bh(&fib_info_lock); 284 } 285 286 static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) 287 { 288 const struct fib_nh *onh; 289 290 if (fi->nh || ofi->nh) 291 return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1; 292 293 if (ofi->fib_nhs == 0) 294 return 0; 295 296 for_nexthops(fi) { 297 onh = fib_info_nh(ofi, nhsel); 298 299 if (nh->fib_nh_oif != onh->fib_nh_oif || 300 nh->fib_nh_gw_family != onh->fib_nh_gw_family || 301 nh->fib_nh_scope != onh->fib_nh_scope || 302 #ifdef CONFIG_IP_ROUTE_MULTIPATH 303 nh->fib_nh_weight != onh->fib_nh_weight || 304 #endif 305 #ifdef CONFIG_IP_ROUTE_CLASSID 306 nh->nh_tclassid != onh->nh_tclassid || 307 #endif 308 lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) || 309 ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK)) 310 return -1; 311 312 if (nh->fib_nh_gw_family == AF_INET && 313 nh->fib_nh_gw4 != onh->fib_nh_gw4) 314 return -1; 315 316 if (nh->fib_nh_gw_family == AF_INET6 && 317 ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6)) 318 return -1; 319 } endfor_nexthops(fi); 320 return 0; 321 } 322 323 static inline unsigned int fib_devindex_hashfn(unsigned int val) 324 { 325 return hash_32(val, DEVINDEX_HASHBITS); 326 } 327 328 static struct hlist_head * 329 fib_info_devhash_bucket(const struct net_device *dev) 330 { 331 u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; 332 333 return &fib_info_devhash[fib_devindex_hashfn(val)]; 334 } 335 336 static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, 337 u32 prefsrc, u32 priority) 338 { 339 unsigned int val = init_val; 340 341 val ^= (protocol << 8) | scope; 342 val ^= prefsrc; 343 val ^= priority; 344 345 return val; 346 } 347 348 static unsigned int fib_info_hashfn_result(unsigned int val) 349 { 350 unsigned int mask = (fib_info_hash_size - 1); 351 352 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 353 } 354 355 static inline unsigned int fib_info_hashfn(struct fib_info *fi) 356 { 357 unsigned int val; 358 359 val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol, 360 fi->fib_scope, (__force u32)fi->fib_prefsrc, 361 fi->fib_priority); 362 363 if (fi->nh) { 364 val ^= fib_devindex_hashfn(fi->nh->id); 365 } else { 366 for_nexthops(fi) { 367 val ^= fib_devindex_hashfn(nh->fib_nh_oif); 368 } endfor_nexthops(fi) 369 } 370 371 return fib_info_hashfn_result(val); 372 } 373 374 /* no metrics, only nexthop id */ 375 static struct fib_info *fib_find_info_nh(struct net *net, 376 const struct fib_config *cfg) 377 { 378 struct hlist_head *head; 379 struct fib_info *fi; 380 unsigned int hash; 381 382 hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id), 383 cfg->fc_protocol, cfg->fc_scope, 384 (__force u32)cfg->fc_prefsrc, 385 cfg->fc_priority); 386 hash = fib_info_hashfn_result(hash); 387 head = &fib_info_hash[hash]; 388 389 hlist_for_each_entry(fi, head, fib_hash) { 390 if (!net_eq(fi->fib_net, net)) 391 continue; 392 if (!fi->nh || fi->nh->id != cfg->fc_nh_id) 393 continue; 394 if (cfg->fc_protocol == fi->fib_protocol && 395 cfg->fc_scope == fi->fib_scope && 396 cfg->fc_prefsrc == fi->fib_prefsrc && 397 cfg->fc_priority == fi->fib_priority && 398 cfg->fc_type == fi->fib_type && 399 cfg->fc_table == fi->fib_tb_id && 400 !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK)) 401 return fi; 402 } 403 404 return NULL; 405 } 406 407 static struct fib_info *fib_find_info(struct fib_info *nfi) 408 { 409 struct hlist_head *head; 410 struct fib_info *fi; 411 unsigned int hash; 412 413 hash = fib_info_hashfn(nfi); 414 head = &fib_info_hash[hash]; 415 416 hlist_for_each_entry(fi, head, fib_hash) { 417 if (!net_eq(fi->fib_net, nfi->fib_net)) 418 continue; 419 if (fi->fib_nhs != nfi->fib_nhs) 420 continue; 421 if (nfi->fib_protocol == fi->fib_protocol && 422 nfi->fib_scope == fi->fib_scope && 423 nfi->fib_prefsrc == fi->fib_prefsrc && 424 nfi->fib_priority == fi->fib_priority && 425 nfi->fib_type == fi->fib_type && 426 nfi->fib_tb_id == fi->fib_tb_id && 427 memcmp(nfi->fib_metrics, fi->fib_metrics, 428 sizeof(u32) * RTAX_MAX) == 0 && 429 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && 430 nh_comp(fi, nfi) == 0) 431 return fi; 432 } 433 434 return NULL; 435 } 436 437 /* Check, that the gateway is already configured. 438 * Used only by redirect accept routine. 439 */ 440 int ip_fib_check_default(__be32 gw, struct net_device *dev) 441 { 442 struct hlist_head *head; 443 struct fib_nh *nh; 444 445 spin_lock(&fib_info_lock); 446 447 head = fib_info_devhash_bucket(dev); 448 449 hlist_for_each_entry(nh, head, nh_hash) { 450 if (nh->fib_nh_dev == dev && 451 nh->fib_nh_gw4 == gw && 452 !(nh->fib_nh_flags & RTNH_F_DEAD)) { 453 spin_unlock(&fib_info_lock); 454 return 0; 455 } 456 } 457 458 spin_unlock(&fib_info_lock); 459 460 return -1; 461 } 462 463 size_t fib_nlmsg_size(struct fib_info *fi) 464 { 465 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 466 + nla_total_size(4) /* RTA_TABLE */ 467 + nla_total_size(4) /* RTA_DST */ 468 + nla_total_size(4) /* RTA_PRIORITY */ 469 + nla_total_size(4) /* RTA_PREFSRC */ 470 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ 471 unsigned int nhs = fib_info_num_path(fi); 472 473 /* space for nested metrics */ 474 payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 475 476 if (fi->nh) 477 payload += nla_total_size(4); /* RTA_NH_ID */ 478 479 if (nhs) { 480 size_t nh_encapsize = 0; 481 /* Also handles the special case nhs == 1 */ 482 483 /* each nexthop is packed in an attribute */ 484 size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 485 unsigned int i; 486 487 /* may contain flow and gateway attribute */ 488 nhsize += 2 * nla_total_size(4); 489 490 /* grab encap info */ 491 for (i = 0; i < fib_info_num_path(fi); i++) { 492 struct fib_nh_common *nhc = fib_info_nhc(fi, i); 493 494 if (nhc->nhc_lwtstate) { 495 /* RTA_ENCAP_TYPE */ 496 nh_encapsize += lwtunnel_get_encap_size( 497 nhc->nhc_lwtstate); 498 /* RTA_ENCAP */ 499 nh_encapsize += nla_total_size(2); 500 } 501 } 502 503 /* all nexthops are packed in a nested attribute */ 504 payload += nla_total_size((nhs * nhsize) + nh_encapsize); 505 506 } 507 508 return payload; 509 } 510 511 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 512 int dst_len, u32 tb_id, const struct nl_info *info, 513 unsigned int nlm_flags) 514 { 515 struct fib_rt_info fri; 516 struct sk_buff *skb; 517 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 518 int err = -ENOBUFS; 519 520 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 521 if (!skb) 522 goto errout; 523 524 fri.fi = fa->fa_info; 525 fri.tb_id = tb_id; 526 fri.dst = key; 527 fri.dst_len = dst_len; 528 fri.dscp = fa->fa_dscp; 529 fri.type = fa->fa_type; 530 fri.offload = READ_ONCE(fa->offload); 531 fri.trap = READ_ONCE(fa->trap); 532 fri.offload_failed = READ_ONCE(fa->offload_failed); 533 err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags); 534 if (err < 0) { 535 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 536 WARN_ON(err == -EMSGSIZE); 537 kfree_skb(skb); 538 goto errout; 539 } 540 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, 541 info->nlh, GFP_KERNEL); 542 return; 543 errout: 544 if (err < 0) 545 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 546 } 547 548 static int fib_detect_death(struct fib_info *fi, int order, 549 struct fib_info **last_resort, int *last_idx, 550 int dflt) 551 { 552 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); 553 struct neighbour *n; 554 int state = NUD_NONE; 555 556 if (likely(nhc->nhc_gw_family == AF_INET)) 557 n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev); 558 else if (nhc->nhc_gw_family == AF_INET6) 559 n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6, 560 nhc->nhc_dev); 561 else 562 n = NULL; 563 564 if (n) { 565 state = n->nud_state; 566 neigh_release(n); 567 } else { 568 return 0; 569 } 570 if (state == NUD_REACHABLE) 571 return 0; 572 if ((state & NUD_VALID) && order != dflt) 573 return 0; 574 if ((state & NUD_VALID) || 575 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { 576 *last_resort = fi; 577 *last_idx = order; 578 } 579 return 1; 580 } 581 582 int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, 583 struct nlattr *encap, u16 encap_type, 584 void *cfg, gfp_t gfp_flags, 585 struct netlink_ext_ack *extack) 586 { 587 int err; 588 589 nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, 590 gfp_flags); 591 if (!nhc->nhc_pcpu_rth_output) 592 return -ENOMEM; 593 594 if (encap) { 595 struct lwtunnel_state *lwtstate; 596 597 if (encap_type == LWTUNNEL_ENCAP_NONE) { 598 NL_SET_ERR_MSG(extack, "LWT encap type not specified"); 599 err = -EINVAL; 600 goto lwt_failure; 601 } 602 err = lwtunnel_build_state(net, encap_type, encap, 603 nhc->nhc_family, cfg, &lwtstate, 604 extack); 605 if (err) 606 goto lwt_failure; 607 608 nhc->nhc_lwtstate = lwtstate_get(lwtstate); 609 } 610 611 return 0; 612 613 lwt_failure: 614 rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); 615 nhc->nhc_pcpu_rth_output = NULL; 616 return err; 617 } 618 EXPORT_SYMBOL_GPL(fib_nh_common_init); 619 620 int fib_nh_init(struct net *net, struct fib_nh *nh, 621 struct fib_config *cfg, int nh_weight, 622 struct netlink_ext_ack *extack) 623 { 624 int err; 625 626 nh->fib_nh_family = AF_INET; 627 628 err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap, 629 cfg->fc_encap_type, cfg, GFP_KERNEL, extack); 630 if (err) 631 return err; 632 633 nh->fib_nh_oif = cfg->fc_oif; 634 nh->fib_nh_gw_family = cfg->fc_gw_family; 635 if (cfg->fc_gw_family == AF_INET) 636 nh->fib_nh_gw4 = cfg->fc_gw4; 637 else if (cfg->fc_gw_family == AF_INET6) 638 nh->fib_nh_gw6 = cfg->fc_gw6; 639 640 nh->fib_nh_flags = cfg->fc_flags; 641 642 #ifdef CONFIG_IP_ROUTE_CLASSID 643 nh->nh_tclassid = cfg->fc_flow; 644 if (nh->nh_tclassid) 645 atomic_inc(&net->ipv4.fib_num_tclassid_users); 646 #endif 647 #ifdef CONFIG_IP_ROUTE_MULTIPATH 648 nh->fib_nh_weight = nh_weight; 649 #endif 650 return 0; 651 } 652 653 #ifdef CONFIG_IP_ROUTE_MULTIPATH 654 655 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, 656 struct netlink_ext_ack *extack) 657 { 658 int nhs = 0; 659 660 while (rtnh_ok(rtnh, remaining)) { 661 nhs++; 662 rtnh = rtnh_next(rtnh, &remaining); 663 } 664 665 /* leftover implies invalid nexthop configuration, discard it */ 666 if (remaining > 0) { 667 NL_SET_ERR_MSG(extack, 668 "Invalid nexthop configuration - extra data after nexthops"); 669 nhs = 0; 670 } 671 672 return nhs; 673 } 674 675 static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla, 676 struct netlink_ext_ack *extack) 677 { 678 if (nla_len(nla) < sizeof(*gw)) { 679 NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY"); 680 return -EINVAL; 681 } 682 683 *gw = nla_get_in_addr(nla); 684 685 return 0; 686 } 687 688 /* only called when fib_nh is integrated into fib_info */ 689 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 690 int remaining, struct fib_config *cfg, 691 struct netlink_ext_ack *extack) 692 { 693 struct net *net = fi->fib_net; 694 struct fib_config fib_cfg; 695 struct fib_nh *nh; 696 int ret; 697 698 change_nexthops(fi) { 699 int attrlen; 700 701 memset(&fib_cfg, 0, sizeof(fib_cfg)); 702 703 if (!rtnh_ok(rtnh, remaining)) { 704 NL_SET_ERR_MSG(extack, 705 "Invalid nexthop configuration - extra data after nexthop"); 706 return -EINVAL; 707 } 708 709 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 710 NL_SET_ERR_MSG(extack, 711 "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); 712 return -EINVAL; 713 } 714 715 fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 716 fib_cfg.fc_oif = rtnh->rtnh_ifindex; 717 718 attrlen = rtnh_attrlen(rtnh); 719 if (attrlen > 0) { 720 struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); 721 722 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 723 nlav = nla_find(attrs, attrlen, RTA_VIA); 724 if (nla && nlav) { 725 NL_SET_ERR_MSG(extack, 726 "Nexthop configuration can not contain both GATEWAY and VIA"); 727 return -EINVAL; 728 } 729 if (nla) { 730 ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla, 731 extack); 732 if (ret) 733 goto errout; 734 735 if (fib_cfg.fc_gw4) 736 fib_cfg.fc_gw_family = AF_INET; 737 } else if (nlav) { 738 ret = fib_gw_from_via(&fib_cfg, nlav, extack); 739 if (ret) 740 goto errout; 741 } 742 743 nla = nla_find(attrs, attrlen, RTA_FLOW); 744 if (nla) { 745 if (nla_len(nla) < sizeof(u32)) { 746 NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); 747 return -EINVAL; 748 } 749 fib_cfg.fc_flow = nla_get_u32(nla); 750 } 751 752 fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 753 /* RTA_ENCAP_TYPE length checked in 754 * lwtunnel_valid_encap_type_attr 755 */ 756 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 757 if (nla) 758 fib_cfg.fc_encap_type = nla_get_u16(nla); 759 } 760 761 ret = fib_nh_init(net, nexthop_nh, &fib_cfg, 762 rtnh->rtnh_hops + 1, extack); 763 if (ret) 764 goto errout; 765 766 rtnh = rtnh_next(rtnh, &remaining); 767 } endfor_nexthops(fi); 768 769 ret = -EINVAL; 770 nh = fib_info_nh(fi, 0); 771 if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) { 772 NL_SET_ERR_MSG(extack, 773 "Nexthop device index does not match RTA_OIF"); 774 goto errout; 775 } 776 if (cfg->fc_gw_family) { 777 if (cfg->fc_gw_family != nh->fib_nh_gw_family || 778 (cfg->fc_gw_family == AF_INET && 779 nh->fib_nh_gw4 != cfg->fc_gw4) || 780 (cfg->fc_gw_family == AF_INET6 && 781 ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) { 782 NL_SET_ERR_MSG(extack, 783 "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA"); 784 goto errout; 785 } 786 } 787 #ifdef CONFIG_IP_ROUTE_CLASSID 788 if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) { 789 NL_SET_ERR_MSG(extack, 790 "Nexthop class id does not match RTA_FLOW"); 791 goto errout; 792 } 793 #endif 794 ret = 0; 795 errout: 796 return ret; 797 } 798 799 /* only called when fib_nh is integrated into fib_info */ 800 static void fib_rebalance(struct fib_info *fi) 801 { 802 int total; 803 int w; 804 805 if (fib_info_num_path(fi) < 2) 806 return; 807 808 total = 0; 809 for_nexthops(fi) { 810 if (nh->fib_nh_flags & RTNH_F_DEAD) 811 continue; 812 813 if (ip_ignore_linkdown(nh->fib_nh_dev) && 814 nh->fib_nh_flags & RTNH_F_LINKDOWN) 815 continue; 816 817 total += nh->fib_nh_weight; 818 } endfor_nexthops(fi); 819 820 w = 0; 821 change_nexthops(fi) { 822 int upper_bound; 823 824 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) { 825 upper_bound = -1; 826 } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) && 827 nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) { 828 upper_bound = -1; 829 } else { 830 w += nexthop_nh->fib_nh_weight; 831 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, 832 total) - 1; 833 } 834 835 atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound); 836 } endfor_nexthops(fi); 837 } 838 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 839 840 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 841 int remaining, struct fib_config *cfg, 842 struct netlink_ext_ack *extack) 843 { 844 NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel"); 845 846 return -EINVAL; 847 } 848 849 #define fib_rebalance(fi) do { } while (0) 850 851 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 852 853 static int fib_encap_match(struct net *net, u16 encap_type, 854 struct nlattr *encap, 855 const struct fib_nh *nh, 856 const struct fib_config *cfg, 857 struct netlink_ext_ack *extack) 858 { 859 struct lwtunnel_state *lwtstate; 860 int ret, result = 0; 861 862 if (encap_type == LWTUNNEL_ENCAP_NONE) 863 return 0; 864 865 ret = lwtunnel_build_state(net, encap_type, encap, AF_INET, 866 cfg, &lwtstate, extack); 867 if (!ret) { 868 result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); 869 lwtstate_free(lwtstate); 870 } 871 872 return result; 873 } 874 875 int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, 876 struct netlink_ext_ack *extack) 877 { 878 #ifdef CONFIG_IP_ROUTE_MULTIPATH 879 struct rtnexthop *rtnh; 880 int remaining; 881 #endif 882 883 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 884 return 1; 885 886 if (cfg->fc_nh_id) { 887 if (fi->nh && cfg->fc_nh_id == fi->nh->id) 888 return 0; 889 return 1; 890 } 891 892 if (fi->nh) { 893 if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) 894 return 1; 895 return 0; 896 } 897 898 if (cfg->fc_oif || cfg->fc_gw_family) { 899 struct fib_nh *nh; 900 901 nh = fib_info_nh(fi, 0); 902 if (cfg->fc_encap) { 903 if (fib_encap_match(net, cfg->fc_encap_type, 904 cfg->fc_encap, nh, cfg, extack)) 905 return 1; 906 } 907 #ifdef CONFIG_IP_ROUTE_CLASSID 908 if (cfg->fc_flow && 909 cfg->fc_flow != nh->nh_tclassid) 910 return 1; 911 #endif 912 if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) || 913 (cfg->fc_gw_family && 914 cfg->fc_gw_family != nh->fib_nh_gw_family)) 915 return 1; 916 917 if (cfg->fc_gw_family == AF_INET && 918 cfg->fc_gw4 != nh->fib_nh_gw4) 919 return 1; 920 921 if (cfg->fc_gw_family == AF_INET6 && 922 ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6)) 923 return 1; 924 925 return 0; 926 } 927 928 #ifdef CONFIG_IP_ROUTE_MULTIPATH 929 if (!cfg->fc_mp) 930 return 0; 931 932 rtnh = cfg->fc_mp; 933 remaining = cfg->fc_mp_len; 934 935 for_nexthops(fi) { 936 int attrlen; 937 938 if (!rtnh_ok(rtnh, remaining)) 939 return -EINVAL; 940 941 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif) 942 return 1; 943 944 attrlen = rtnh_attrlen(rtnh); 945 if (attrlen > 0) { 946 struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); 947 int err; 948 949 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 950 nlav = nla_find(attrs, attrlen, RTA_VIA); 951 if (nla && nlav) { 952 NL_SET_ERR_MSG(extack, 953 "Nexthop configuration can not contain both GATEWAY and VIA"); 954 return -EINVAL; 955 } 956 957 if (nla) { 958 __be32 gw; 959 960 err = fib_gw_from_attr(&gw, nla, extack); 961 if (err) 962 return err; 963 964 if (nh->fib_nh_gw_family != AF_INET || 965 gw != nh->fib_nh_gw4) 966 return 1; 967 } else if (nlav) { 968 struct fib_config cfg2; 969 970 err = fib_gw_from_via(&cfg2, nlav, extack); 971 if (err) 972 return err; 973 974 switch (nh->fib_nh_gw_family) { 975 case AF_INET: 976 if (cfg2.fc_gw_family != AF_INET || 977 cfg2.fc_gw4 != nh->fib_nh_gw4) 978 return 1; 979 break; 980 case AF_INET6: 981 if (cfg2.fc_gw_family != AF_INET6 || 982 ipv6_addr_cmp(&cfg2.fc_gw6, 983 &nh->fib_nh_gw6)) 984 return 1; 985 break; 986 } 987 } 988 989 #ifdef CONFIG_IP_ROUTE_CLASSID 990 nla = nla_find(attrs, attrlen, RTA_FLOW); 991 if (nla) { 992 if (nla_len(nla) < sizeof(u32)) { 993 NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); 994 return -EINVAL; 995 } 996 if (nla_get_u32(nla) != nh->nh_tclassid) 997 return 1; 998 } 999 #endif 1000 } 1001 1002 rtnh = rtnh_next(rtnh, &remaining); 1003 } endfor_nexthops(fi); 1004 #endif 1005 return 0; 1006 } 1007 1008 bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) 1009 { 1010 struct nlattr *nla; 1011 int remaining; 1012 1013 if (!cfg->fc_mx) 1014 return true; 1015 1016 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1017 int type = nla_type(nla); 1018 u32 fi_val, val; 1019 1020 if (!type) 1021 continue; 1022 if (type > RTAX_MAX) 1023 return false; 1024 1025 if (type == RTAX_CC_ALGO) { 1026 char tmp[TCP_CA_NAME_MAX]; 1027 bool ecn_ca = false; 1028 1029 nla_strscpy(tmp, nla, sizeof(tmp)); 1030 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); 1031 } else { 1032 if (nla_len(nla) != sizeof(u32)) 1033 return false; 1034 val = nla_get_u32(nla); 1035 } 1036 1037 fi_val = fi->fib_metrics->metrics[type - 1]; 1038 if (type == RTAX_FEATURES) 1039 fi_val &= ~DST_FEATURE_ECN_CA; 1040 1041 if (fi_val != val) 1042 return false; 1043 } 1044 1045 return true; 1046 } 1047 1048 static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, 1049 u32 table, struct netlink_ext_ack *extack) 1050 { 1051 struct fib6_config cfg = { 1052 .fc_table = table, 1053 .fc_flags = nh->fib_nh_flags | RTF_GATEWAY, 1054 .fc_ifindex = nh->fib_nh_oif, 1055 .fc_gateway = nh->fib_nh_gw6, 1056 }; 1057 struct fib6_nh fib6_nh = {}; 1058 int err; 1059 1060 err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); 1061 if (!err) { 1062 nh->fib_nh_dev = fib6_nh.fib_nh_dev; 1063 netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, 1064 GFP_KERNEL); 1065 nh->fib_nh_oif = nh->fib_nh_dev->ifindex; 1066 nh->fib_nh_scope = RT_SCOPE_LINK; 1067 1068 ipv6_stub->fib6_nh_release(&fib6_nh); 1069 } 1070 1071 return err; 1072 } 1073 1074 /* 1075 * Picture 1076 * ------- 1077 * 1078 * Semantics of nexthop is very messy by historical reasons. 1079 * We have to take into account, that: 1080 * a) gateway can be actually local interface address, 1081 * so that gatewayed route is direct. 1082 * b) gateway must be on-link address, possibly 1083 * described not by an ifaddr, but also by a direct route. 1084 * c) If both gateway and interface are specified, they should not 1085 * contradict. 1086 * d) If we use tunnel routes, gateway could be not on-link. 1087 * 1088 * Attempt to reconcile all of these (alas, self-contradictory) conditions 1089 * results in pretty ugly and hairy code with obscure logic. 1090 * 1091 * I chose to generalized it instead, so that the size 1092 * of code does not increase practically, but it becomes 1093 * much more general. 1094 * Every prefix is assigned a "scope" value: "host" is local address, 1095 * "link" is direct route, 1096 * [ ... "site" ... "interior" ... ] 1097 * and "universe" is true gateway route with global meaning. 1098 * 1099 * Every prefix refers to a set of "nexthop"s (gw, oif), 1100 * where gw must have narrower scope. This recursion stops 1101 * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 1102 * which means that gw is forced to be on link. 1103 * 1104 * Code is still hairy, but now it is apparently logically 1105 * consistent and very flexible. F.e. as by-product it allows 1106 * to co-exists in peace independent exterior and interior 1107 * routing processes. 1108 * 1109 * Normally it looks as following. 1110 * 1111 * {universe prefix} -> (gw, oif) [scope link] 1112 * | 1113 * |-> {link prefix} -> (gw, oif) [scope local] 1114 * | 1115 * |-> {local prefix} (terminal node) 1116 */ 1117 static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, 1118 u8 scope, struct netlink_ext_ack *extack) 1119 { 1120 struct net_device *dev; 1121 struct fib_result res; 1122 int err = 0; 1123 1124 if (nh->fib_nh_flags & RTNH_F_ONLINK) { 1125 unsigned int addr_type; 1126 1127 if (scope >= RT_SCOPE_LINK) { 1128 NL_SET_ERR_MSG(extack, "Nexthop has invalid scope"); 1129 return -EINVAL; 1130 } 1131 dev = __dev_get_by_index(net, nh->fib_nh_oif); 1132 if (!dev) { 1133 NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); 1134 return -ENODEV; 1135 } 1136 if (!(dev->flags & IFF_UP)) { 1137 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 1138 return -ENETDOWN; 1139 } 1140 addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4); 1141 if (addr_type != RTN_UNICAST) { 1142 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1143 return -EINVAL; 1144 } 1145 if (!netif_carrier_ok(dev)) 1146 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1147 nh->fib_nh_dev = dev; 1148 netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); 1149 nh->fib_nh_scope = RT_SCOPE_LINK; 1150 return 0; 1151 } 1152 rcu_read_lock(); 1153 { 1154 struct fib_table *tbl = NULL; 1155 struct flowi4 fl4 = { 1156 .daddr = nh->fib_nh_gw4, 1157 .flowi4_scope = scope + 1, 1158 .flowi4_oif = nh->fib_nh_oif, 1159 .flowi4_iif = LOOPBACK_IFINDEX, 1160 }; 1161 1162 /* It is not necessary, but requires a bit of thinking */ 1163 if (fl4.flowi4_scope < RT_SCOPE_LINK) 1164 fl4.flowi4_scope = RT_SCOPE_LINK; 1165 1166 if (table && table != RT_TABLE_MAIN) 1167 tbl = fib_get_table(net, table); 1168 1169 if (tbl) 1170 err = fib_table_lookup(tbl, &fl4, &res, 1171 FIB_LOOKUP_IGNORE_LINKSTATE | 1172 FIB_LOOKUP_NOREF); 1173 1174 /* on error or if no table given do full lookup. This 1175 * is needed for example when nexthops are in the local 1176 * table rather than the given table 1177 */ 1178 if (!tbl || err) { 1179 err = fib_lookup(net, &fl4, &res, 1180 FIB_LOOKUP_IGNORE_LINKSTATE); 1181 } 1182 1183 if (err) { 1184 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1185 goto out; 1186 } 1187 } 1188 1189 err = -EINVAL; 1190 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { 1191 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1192 goto out; 1193 } 1194 nh->fib_nh_scope = res.scope; 1195 nh->fib_nh_oif = FIB_RES_OIF(res); 1196 nh->fib_nh_dev = dev = FIB_RES_DEV(res); 1197 if (!dev) { 1198 NL_SET_ERR_MSG(extack, 1199 "No egress device for nexthop gateway"); 1200 goto out; 1201 } 1202 netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); 1203 if (!netif_carrier_ok(dev)) 1204 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1205 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 1206 out: 1207 rcu_read_unlock(); 1208 return err; 1209 } 1210 1211 static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, 1212 struct netlink_ext_ack *extack) 1213 { 1214 struct in_device *in_dev; 1215 int err; 1216 1217 if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { 1218 NL_SET_ERR_MSG(extack, 1219 "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); 1220 return -EINVAL; 1221 } 1222 1223 rcu_read_lock(); 1224 1225 err = -ENODEV; 1226 in_dev = inetdev_by_index(net, nh->fib_nh_oif); 1227 if (!in_dev) 1228 goto out; 1229 err = -ENETDOWN; 1230 if (!(in_dev->dev->flags & IFF_UP)) { 1231 NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); 1232 goto out; 1233 } 1234 1235 nh->fib_nh_dev = in_dev->dev; 1236 netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); 1237 nh->fib_nh_scope = RT_SCOPE_HOST; 1238 if (!netif_carrier_ok(nh->fib_nh_dev)) 1239 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1240 err = 0; 1241 out: 1242 rcu_read_unlock(); 1243 return err; 1244 } 1245 1246 int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, 1247 struct netlink_ext_ack *extack) 1248 { 1249 int err; 1250 1251 if (nh->fib_nh_gw_family == AF_INET) 1252 err = fib_check_nh_v4_gw(net, nh, table, scope, extack); 1253 else if (nh->fib_nh_gw_family == AF_INET6) 1254 err = fib_check_nh_v6_gw(net, nh, table, extack); 1255 else 1256 err = fib_check_nh_nongw(net, nh, extack); 1257 1258 return err; 1259 } 1260 1261 static struct hlist_head * 1262 fib_info_laddrhash_bucket(const struct net *net, __be32 val) 1263 { 1264 u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val, 1265 fib_info_hash_bits); 1266 1267 return &fib_info_laddrhash[slot]; 1268 } 1269 1270 static void fib_info_hash_move(struct hlist_head *new_info_hash, 1271 struct hlist_head *new_laddrhash, 1272 unsigned int new_size) 1273 { 1274 struct hlist_head *old_info_hash, *old_laddrhash; 1275 unsigned int old_size = fib_info_hash_size; 1276 unsigned int i; 1277 1278 spin_lock_bh(&fib_info_lock); 1279 old_info_hash = fib_info_hash; 1280 old_laddrhash = fib_info_laddrhash; 1281 fib_info_hash_size = new_size; 1282 fib_info_hash_bits = ilog2(new_size); 1283 1284 for (i = 0; i < old_size; i++) { 1285 struct hlist_head *head = &fib_info_hash[i]; 1286 struct hlist_node *n; 1287 struct fib_info *fi; 1288 1289 hlist_for_each_entry_safe(fi, n, head, fib_hash) { 1290 struct hlist_head *dest; 1291 unsigned int new_hash; 1292 1293 new_hash = fib_info_hashfn(fi); 1294 dest = &new_info_hash[new_hash]; 1295 hlist_add_head(&fi->fib_hash, dest); 1296 } 1297 } 1298 fib_info_hash = new_info_hash; 1299 1300 fib_info_laddrhash = new_laddrhash; 1301 for (i = 0; i < old_size; i++) { 1302 struct hlist_head *lhead = &old_laddrhash[i]; 1303 struct hlist_node *n; 1304 struct fib_info *fi; 1305 1306 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { 1307 struct hlist_head *ldest; 1308 1309 ldest = fib_info_laddrhash_bucket(fi->fib_net, 1310 fi->fib_prefsrc); 1311 hlist_add_head(&fi->fib_lhash, ldest); 1312 } 1313 } 1314 1315 spin_unlock_bh(&fib_info_lock); 1316 1317 kvfree(old_info_hash); 1318 kvfree(old_laddrhash); 1319 } 1320 1321 __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, 1322 unsigned char scope) 1323 { 1324 struct fib_nh *nh; 1325 1326 if (nhc->nhc_family != AF_INET) 1327 return inet_select_addr(nhc->nhc_dev, 0, scope); 1328 1329 nh = container_of(nhc, struct fib_nh, nh_common); 1330 nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope); 1331 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); 1332 1333 return nh->nh_saddr; 1334 } 1335 1336 __be32 fib_result_prefsrc(struct net *net, struct fib_result *res) 1337 { 1338 struct fib_nh_common *nhc = res->nhc; 1339 1340 if (res->fi->fib_prefsrc) 1341 return res->fi->fib_prefsrc; 1342 1343 if (nhc->nhc_family == AF_INET) { 1344 struct fib_nh *nh; 1345 1346 nh = container_of(nhc, struct fib_nh, nh_common); 1347 if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid)) 1348 return nh->nh_saddr; 1349 } 1350 1351 return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope); 1352 } 1353 1354 static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) 1355 { 1356 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 1357 fib_prefsrc != cfg->fc_dst) { 1358 u32 tb_id = cfg->fc_table; 1359 int rc; 1360 1361 if (tb_id == RT_TABLE_MAIN) 1362 tb_id = RT_TABLE_LOCAL; 1363 1364 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1365 fib_prefsrc, tb_id); 1366 1367 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { 1368 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1369 fib_prefsrc, RT_TABLE_LOCAL); 1370 } 1371 1372 if (rc != RTN_LOCAL) 1373 return false; 1374 } 1375 return true; 1376 } 1377 1378 struct fib_info *fib_create_info(struct fib_config *cfg, 1379 struct netlink_ext_ack *extack) 1380 { 1381 int err; 1382 struct fib_info *fi = NULL; 1383 struct nexthop *nh = NULL; 1384 struct fib_info *ofi; 1385 int nhs = 1; 1386 struct net *net = cfg->fc_nlinfo.nl_net; 1387 1388 if (cfg->fc_type > RTN_MAX) 1389 goto err_inval; 1390 1391 /* Fast check to catch the most weird cases */ 1392 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) { 1393 NL_SET_ERR_MSG(extack, "Invalid scope"); 1394 goto err_inval; 1395 } 1396 1397 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 1398 NL_SET_ERR_MSG(extack, 1399 "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); 1400 goto err_inval; 1401 } 1402 1403 if (cfg->fc_nh_id) { 1404 if (!cfg->fc_mx) { 1405 fi = fib_find_info_nh(net, cfg); 1406 if (fi) { 1407 refcount_inc(&fi->fib_treeref); 1408 return fi; 1409 } 1410 } 1411 1412 nh = nexthop_find_by_id(net, cfg->fc_nh_id); 1413 if (!nh) { 1414 NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); 1415 goto err_inval; 1416 } 1417 nhs = 0; 1418 } 1419 1420 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1421 if (cfg->fc_mp) { 1422 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); 1423 if (nhs == 0) 1424 goto err_inval; 1425 } 1426 #endif 1427 1428 err = -ENOBUFS; 1429 1430 /* Paired with WRITE_ONCE() in fib_release_info() */ 1431 if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) { 1432 unsigned int new_size = fib_info_hash_size << 1; 1433 struct hlist_head *new_info_hash; 1434 struct hlist_head *new_laddrhash; 1435 size_t bytes; 1436 1437 if (!new_size) 1438 new_size = 16; 1439 bytes = (size_t)new_size * sizeof(struct hlist_head *); 1440 new_info_hash = kvzalloc(bytes, GFP_KERNEL); 1441 new_laddrhash = kvzalloc(bytes, GFP_KERNEL); 1442 if (!new_info_hash || !new_laddrhash) { 1443 kvfree(new_info_hash); 1444 kvfree(new_laddrhash); 1445 } else { 1446 fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 1447 } 1448 if (!fib_info_hash_size) 1449 goto failure; 1450 } 1451 1452 fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); 1453 if (!fi) 1454 goto failure; 1455 fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, 1456 cfg->fc_mx_len, extack); 1457 if (IS_ERR(fi->fib_metrics)) { 1458 err = PTR_ERR(fi->fib_metrics); 1459 kfree(fi); 1460 return ERR_PTR(err); 1461 } 1462 1463 fi->fib_net = net; 1464 fi->fib_protocol = cfg->fc_protocol; 1465 fi->fib_scope = cfg->fc_scope; 1466 fi->fib_flags = cfg->fc_flags; 1467 fi->fib_priority = cfg->fc_priority; 1468 fi->fib_prefsrc = cfg->fc_prefsrc; 1469 fi->fib_type = cfg->fc_type; 1470 fi->fib_tb_id = cfg->fc_table; 1471 1472 fi->fib_nhs = nhs; 1473 if (nh) { 1474 if (!nexthop_get(nh)) { 1475 NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); 1476 err = -EINVAL; 1477 } else { 1478 err = 0; 1479 fi->nh = nh; 1480 } 1481 } else { 1482 change_nexthops(fi) { 1483 nexthop_nh->nh_parent = fi; 1484 } endfor_nexthops(fi) 1485 1486 if (cfg->fc_mp) 1487 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, 1488 extack); 1489 else 1490 err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); 1491 } 1492 1493 if (err != 0) 1494 goto failure; 1495 1496 if (fib_props[cfg->fc_type].error) { 1497 if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) { 1498 NL_SET_ERR_MSG(extack, 1499 "Gateway, device and multipath can not be specified for this route type"); 1500 goto err_inval; 1501 } 1502 goto link_it; 1503 } else { 1504 switch (cfg->fc_type) { 1505 case RTN_UNICAST: 1506 case RTN_LOCAL: 1507 case RTN_BROADCAST: 1508 case RTN_ANYCAST: 1509 case RTN_MULTICAST: 1510 break; 1511 default: 1512 NL_SET_ERR_MSG(extack, "Invalid route type"); 1513 goto err_inval; 1514 } 1515 } 1516 1517 if (cfg->fc_scope > RT_SCOPE_HOST) { 1518 NL_SET_ERR_MSG(extack, "Invalid scope"); 1519 goto err_inval; 1520 } 1521 1522 if (fi->nh) { 1523 err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack); 1524 if (err) 1525 goto failure; 1526 } else if (cfg->fc_scope == RT_SCOPE_HOST) { 1527 struct fib_nh *nh = fi->fib_nh; 1528 1529 /* Local address is added. */ 1530 if (nhs != 1) { 1531 NL_SET_ERR_MSG(extack, 1532 "Route with host scope can not have multiple nexthops"); 1533 goto err_inval; 1534 } 1535 if (nh->fib_nh_gw_family) { 1536 NL_SET_ERR_MSG(extack, 1537 "Route with host scope can not have a gateway"); 1538 goto err_inval; 1539 } 1540 nh->fib_nh_scope = RT_SCOPE_NOWHERE; 1541 nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif); 1542 err = -ENODEV; 1543 if (!nh->fib_nh_dev) 1544 goto failure; 1545 netdev_tracker_alloc(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, 1546 GFP_KERNEL); 1547 } else { 1548 int linkdown = 0; 1549 1550 change_nexthops(fi) { 1551 err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh, 1552 cfg->fc_table, cfg->fc_scope, 1553 extack); 1554 if (err != 0) 1555 goto failure; 1556 if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) 1557 linkdown++; 1558 } endfor_nexthops(fi) 1559 if (linkdown == fi->fib_nhs) 1560 fi->fib_flags |= RTNH_F_LINKDOWN; 1561 } 1562 1563 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) { 1564 NL_SET_ERR_MSG(extack, "Invalid prefsrc address"); 1565 goto err_inval; 1566 } 1567 1568 if (!fi->nh) { 1569 change_nexthops(fi) { 1570 fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common, 1571 fi->fib_scope); 1572 if (nexthop_nh->fib_nh_gw_family == AF_INET6) 1573 fi->fib_nh_is_v6 = true; 1574 } endfor_nexthops(fi) 1575 1576 fib_rebalance(fi); 1577 } 1578 1579 link_it: 1580 ofi = fib_find_info(fi); 1581 if (ofi) { 1582 fi->fib_dead = 1; 1583 free_fib_info(fi); 1584 refcount_inc(&ofi->fib_treeref); 1585 return ofi; 1586 } 1587 1588 refcount_set(&fi->fib_treeref, 1); 1589 refcount_set(&fi->fib_clntref, 1); 1590 spin_lock_bh(&fib_info_lock); 1591 fib_info_cnt++; 1592 hlist_add_head(&fi->fib_hash, 1593 &fib_info_hash[fib_info_hashfn(fi)]); 1594 if (fi->fib_prefsrc) { 1595 struct hlist_head *head; 1596 1597 head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc); 1598 hlist_add_head(&fi->fib_lhash, head); 1599 } 1600 if (fi->nh) { 1601 list_add(&fi->nh_list, &nh->fi_list); 1602 } else { 1603 change_nexthops(fi) { 1604 struct hlist_head *head; 1605 1606 if (!nexthop_nh->fib_nh_dev) 1607 continue; 1608 head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); 1609 hlist_add_head(&nexthop_nh->nh_hash, head); 1610 } endfor_nexthops(fi) 1611 } 1612 spin_unlock_bh(&fib_info_lock); 1613 return fi; 1614 1615 err_inval: 1616 err = -EINVAL; 1617 1618 failure: 1619 if (fi) { 1620 fi->fib_dead = 1; 1621 free_fib_info(fi); 1622 } 1623 1624 return ERR_PTR(err); 1625 } 1626 1627 int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, 1628 u8 rt_family, unsigned char *flags, bool skip_oif) 1629 { 1630 if (nhc->nhc_flags & RTNH_F_DEAD) 1631 *flags |= RTNH_F_DEAD; 1632 1633 if (nhc->nhc_flags & RTNH_F_LINKDOWN) { 1634 *flags |= RTNH_F_LINKDOWN; 1635 1636 rcu_read_lock(); 1637 switch (nhc->nhc_family) { 1638 case AF_INET: 1639 if (ip_ignore_linkdown(nhc->nhc_dev)) 1640 *flags |= RTNH_F_DEAD; 1641 break; 1642 case AF_INET6: 1643 if (ip6_ignore_linkdown(nhc->nhc_dev)) 1644 *flags |= RTNH_F_DEAD; 1645 break; 1646 } 1647 rcu_read_unlock(); 1648 } 1649 1650 switch (nhc->nhc_gw_family) { 1651 case AF_INET: 1652 if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) 1653 goto nla_put_failure; 1654 break; 1655 case AF_INET6: 1656 /* if gateway family does not match nexthop family 1657 * gateway is encoded as RTA_VIA 1658 */ 1659 if (rt_family != nhc->nhc_gw_family) { 1660 int alen = sizeof(struct in6_addr); 1661 struct nlattr *nla; 1662 struct rtvia *via; 1663 1664 nla = nla_reserve(skb, RTA_VIA, alen + 2); 1665 if (!nla) 1666 goto nla_put_failure; 1667 1668 via = nla_data(nla); 1669 via->rtvia_family = AF_INET6; 1670 memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen); 1671 } else if (nla_put_in6_addr(skb, RTA_GATEWAY, 1672 &nhc->nhc_gw.ipv6) < 0) { 1673 goto nla_put_failure; 1674 } 1675 break; 1676 } 1677 1678 *flags |= (nhc->nhc_flags & 1679 (RTNH_F_ONLINK | RTNH_F_OFFLOAD | RTNH_F_TRAP)); 1680 1681 if (!skip_oif && nhc->nhc_dev && 1682 nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) 1683 goto nla_put_failure; 1684 1685 if (nhc->nhc_lwtstate && 1686 lwtunnel_fill_encap(skb, nhc->nhc_lwtstate, 1687 RTA_ENCAP, RTA_ENCAP_TYPE) < 0) 1688 goto nla_put_failure; 1689 1690 return 0; 1691 1692 nla_put_failure: 1693 return -EMSGSIZE; 1694 } 1695 EXPORT_SYMBOL_GPL(fib_nexthop_info); 1696 1697 #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) 1698 int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, 1699 int nh_weight, u8 rt_family, u32 nh_tclassid) 1700 { 1701 const struct net_device *dev = nhc->nhc_dev; 1702 struct rtnexthop *rtnh; 1703 unsigned char flags = 0; 1704 1705 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1706 if (!rtnh) 1707 goto nla_put_failure; 1708 1709 rtnh->rtnh_hops = nh_weight - 1; 1710 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; 1711 1712 if (fib_nexthop_info(skb, nhc, rt_family, &flags, true) < 0) 1713 goto nla_put_failure; 1714 1715 rtnh->rtnh_flags = flags; 1716 1717 if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid)) 1718 goto nla_put_failure; 1719 1720 /* length of rtnetlink header + attributes */ 1721 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 1722 1723 return 0; 1724 1725 nla_put_failure: 1726 return -EMSGSIZE; 1727 } 1728 EXPORT_SYMBOL_GPL(fib_add_nexthop); 1729 #endif 1730 1731 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1732 static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) 1733 { 1734 struct nlattr *mp; 1735 1736 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); 1737 if (!mp) 1738 goto nla_put_failure; 1739 1740 if (unlikely(fi->nh)) { 1741 if (nexthop_mpath_fill_node(skb, fi->nh, AF_INET) < 0) 1742 goto nla_put_failure; 1743 goto mp_end; 1744 } 1745 1746 for_nexthops(fi) { 1747 u32 nh_tclassid = 0; 1748 #ifdef CONFIG_IP_ROUTE_CLASSID 1749 nh_tclassid = nh->nh_tclassid; 1750 #endif 1751 if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, 1752 AF_INET, nh_tclassid) < 0) 1753 goto nla_put_failure; 1754 } endfor_nexthops(fi); 1755 1756 mp_end: 1757 nla_nest_end(skb, mp); 1758 1759 return 0; 1760 1761 nla_put_failure: 1762 return -EMSGSIZE; 1763 } 1764 #else 1765 static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) 1766 { 1767 return 0; 1768 } 1769 #endif 1770 1771 int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, 1772 const struct fib_rt_info *fri, unsigned int flags) 1773 { 1774 unsigned int nhs = fib_info_num_path(fri->fi); 1775 struct fib_info *fi = fri->fi; 1776 u32 tb_id = fri->tb_id; 1777 struct nlmsghdr *nlh; 1778 struct rtmsg *rtm; 1779 1780 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 1781 if (!nlh) 1782 return -EMSGSIZE; 1783 1784 rtm = nlmsg_data(nlh); 1785 rtm->rtm_family = AF_INET; 1786 rtm->rtm_dst_len = fri->dst_len; 1787 rtm->rtm_src_len = 0; 1788 rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp); 1789 if (tb_id < 256) 1790 rtm->rtm_table = tb_id; 1791 else 1792 rtm->rtm_table = RT_TABLE_COMPAT; 1793 if (nla_put_u32(skb, RTA_TABLE, tb_id)) 1794 goto nla_put_failure; 1795 rtm->rtm_type = fri->type; 1796 rtm->rtm_flags = fi->fib_flags; 1797 rtm->rtm_scope = fi->fib_scope; 1798 rtm->rtm_protocol = fi->fib_protocol; 1799 1800 if (rtm->rtm_dst_len && 1801 nla_put_in_addr(skb, RTA_DST, fri->dst)) 1802 goto nla_put_failure; 1803 if (fi->fib_priority && 1804 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) 1805 goto nla_put_failure; 1806 if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) 1807 goto nla_put_failure; 1808 1809 if (fi->fib_prefsrc && 1810 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1811 goto nla_put_failure; 1812 1813 if (fi->nh) { 1814 if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id)) 1815 goto nla_put_failure; 1816 if (nexthop_is_blackhole(fi->nh)) 1817 rtm->rtm_type = RTN_BLACKHOLE; 1818 if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) 1819 goto offload; 1820 } 1821 1822 if (nhs == 1) { 1823 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); 1824 unsigned char flags = 0; 1825 1826 if (fib_nexthop_info(skb, nhc, AF_INET, &flags, false) < 0) 1827 goto nla_put_failure; 1828 1829 rtm->rtm_flags = flags; 1830 #ifdef CONFIG_IP_ROUTE_CLASSID 1831 if (nhc->nhc_family == AF_INET) { 1832 struct fib_nh *nh; 1833 1834 nh = container_of(nhc, struct fib_nh, nh_common); 1835 if (nh->nh_tclassid && 1836 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1837 goto nla_put_failure; 1838 } 1839 #endif 1840 } else { 1841 if (fib_add_multipath(skb, fi) < 0) 1842 goto nla_put_failure; 1843 } 1844 1845 offload: 1846 if (fri->offload) 1847 rtm->rtm_flags |= RTM_F_OFFLOAD; 1848 if (fri->trap) 1849 rtm->rtm_flags |= RTM_F_TRAP; 1850 if (fri->offload_failed) 1851 rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED; 1852 1853 nlmsg_end(skb, nlh); 1854 return 0; 1855 1856 nla_put_failure: 1857 nlmsg_cancel(skb, nlh); 1858 return -EMSGSIZE; 1859 } 1860 1861 /* 1862 * Update FIB if: 1863 * - local address disappeared -> we must delete all the entries 1864 * referring to it. 1865 * - device went down -> we must shutdown all nexthops going via it. 1866 */ 1867 int fib_sync_down_addr(struct net_device *dev, __be32 local) 1868 { 1869 int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; 1870 struct net *net = dev_net(dev); 1871 struct hlist_head *head; 1872 struct fib_info *fi; 1873 int ret = 0; 1874 1875 if (!fib_info_laddrhash || local == 0) 1876 return 0; 1877 1878 head = fib_info_laddrhash_bucket(net, local); 1879 hlist_for_each_entry(fi, head, fib_lhash) { 1880 if (!net_eq(fi->fib_net, net) || 1881 fi->fib_tb_id != tb_id) 1882 continue; 1883 if (fi->fib_prefsrc == local) { 1884 fi->fib_flags |= RTNH_F_DEAD; 1885 ret++; 1886 } 1887 } 1888 return ret; 1889 } 1890 1891 static int call_fib_nh_notifiers(struct fib_nh *nh, 1892 enum fib_event_type event_type) 1893 { 1894 bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev); 1895 struct fib_nh_notifier_info info = { 1896 .fib_nh = nh, 1897 }; 1898 1899 switch (event_type) { 1900 case FIB_EVENT_NH_ADD: 1901 if (nh->fib_nh_flags & RTNH_F_DEAD) 1902 break; 1903 if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) 1904 break; 1905 return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, 1906 &info.info); 1907 case FIB_EVENT_NH_DEL: 1908 if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || 1909 (nh->fib_nh_flags & RTNH_F_DEAD)) 1910 return call_fib4_notifiers(dev_net(nh->fib_nh_dev), 1911 event_type, &info.info); 1912 break; 1913 default: 1914 break; 1915 } 1916 1917 return NOTIFY_DONE; 1918 } 1919 1920 /* Update the PMTU of exceptions when: 1921 * - the new MTU of the first hop becomes smaller than the PMTU 1922 * - the old MTU was the same as the PMTU, and it limited discovery of 1923 * larger MTUs on the path. With that limit raised, we can now 1924 * discover larger MTUs 1925 * A special case is locked exceptions, for which the PMTU is smaller 1926 * than the minimal accepted PMTU: 1927 * - if the new MTU is greater than the PMTU, don't make any change 1928 * - otherwise, unlock and set PMTU 1929 */ 1930 void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) 1931 { 1932 struct fnhe_hash_bucket *bucket; 1933 int i; 1934 1935 bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); 1936 if (!bucket) 1937 return; 1938 1939 for (i = 0; i < FNHE_HASH_SIZE; i++) { 1940 struct fib_nh_exception *fnhe; 1941 1942 for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); 1943 fnhe; 1944 fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { 1945 if (fnhe->fnhe_mtu_locked) { 1946 if (new <= fnhe->fnhe_pmtu) { 1947 fnhe->fnhe_pmtu = new; 1948 fnhe->fnhe_mtu_locked = false; 1949 } 1950 } else if (new < fnhe->fnhe_pmtu || 1951 orig == fnhe->fnhe_pmtu) { 1952 fnhe->fnhe_pmtu = new; 1953 } 1954 } 1955 } 1956 } 1957 1958 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) 1959 { 1960 struct hlist_head *head = fib_info_devhash_bucket(dev); 1961 struct fib_nh *nh; 1962 1963 hlist_for_each_entry(nh, head, nh_hash) { 1964 if (nh->fib_nh_dev == dev) 1965 fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); 1966 } 1967 } 1968 1969 /* Event force Flags Description 1970 * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host 1971 * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host 1972 * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed 1973 * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed 1974 * 1975 * only used when fib_nh is built into fib_info 1976 */ 1977 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) 1978 { 1979 struct hlist_head *head = fib_info_devhash_bucket(dev); 1980 struct fib_info *prev_fi = NULL; 1981 int scope = RT_SCOPE_NOWHERE; 1982 struct fib_nh *nh; 1983 int ret = 0; 1984 1985 if (force) 1986 scope = -1; 1987 1988 hlist_for_each_entry(nh, head, nh_hash) { 1989 struct fib_info *fi = nh->nh_parent; 1990 int dead; 1991 1992 BUG_ON(!fi->fib_nhs); 1993 if (nh->fib_nh_dev != dev || fi == prev_fi) 1994 continue; 1995 prev_fi = fi; 1996 dead = 0; 1997 change_nexthops(fi) { 1998 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) 1999 dead++; 2000 else if (nexthop_nh->fib_nh_dev == dev && 2001 nexthop_nh->fib_nh_scope != scope) { 2002 switch (event) { 2003 case NETDEV_DOWN: 2004 case NETDEV_UNREGISTER: 2005 nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; 2006 fallthrough; 2007 case NETDEV_CHANGE: 2008 nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 2009 break; 2010 } 2011 call_fib_nh_notifiers(nexthop_nh, 2012 FIB_EVENT_NH_DEL); 2013 dead++; 2014 } 2015 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2016 if (event == NETDEV_UNREGISTER && 2017 nexthop_nh->fib_nh_dev == dev) { 2018 dead = fi->fib_nhs; 2019 break; 2020 } 2021 #endif 2022 } endfor_nexthops(fi) 2023 if (dead == fi->fib_nhs) { 2024 switch (event) { 2025 case NETDEV_DOWN: 2026 case NETDEV_UNREGISTER: 2027 fi->fib_flags |= RTNH_F_DEAD; 2028 fallthrough; 2029 case NETDEV_CHANGE: 2030 fi->fib_flags |= RTNH_F_LINKDOWN; 2031 break; 2032 } 2033 ret++; 2034 } 2035 2036 fib_rebalance(fi); 2037 } 2038 2039 return ret; 2040 } 2041 2042 /* Must be invoked inside of an RCU protected region. */ 2043 static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) 2044 { 2045 struct fib_info *fi = NULL, *last_resort = NULL; 2046 struct hlist_head *fa_head = res->fa_head; 2047 struct fib_table *tb = res->table; 2048 u8 slen = 32 - res->prefixlen; 2049 int order = -1, last_idx = -1; 2050 struct fib_alias *fa, *fa1 = NULL; 2051 u32 last_prio = res->fi->fib_priority; 2052 dscp_t last_dscp = 0; 2053 2054 hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 2055 struct fib_info *next_fi = fa->fa_info; 2056 struct fib_nh_common *nhc; 2057 2058 if (fa->fa_slen != slen) 2059 continue; 2060 if (fa->fa_dscp && 2061 fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos)) 2062 continue; 2063 if (fa->tb_id != tb->tb_id) 2064 continue; 2065 if (next_fi->fib_priority > last_prio && 2066 fa->fa_dscp == last_dscp) { 2067 if (last_dscp) 2068 continue; 2069 break; 2070 } 2071 if (next_fi->fib_flags & RTNH_F_DEAD) 2072 continue; 2073 last_dscp = fa->fa_dscp; 2074 last_prio = next_fi->fib_priority; 2075 2076 if (next_fi->fib_scope != res->scope || 2077 fa->fa_type != RTN_UNICAST) 2078 continue; 2079 2080 nhc = fib_info_nhc(next_fi, 0); 2081 if (!nhc->nhc_gw_family || nhc->nhc_scope != RT_SCOPE_LINK) 2082 continue; 2083 2084 fib_alias_accessed(fa); 2085 2086 if (!fi) { 2087 if (next_fi != res->fi) 2088 break; 2089 fa1 = fa; 2090 } else if (!fib_detect_death(fi, order, &last_resort, 2091 &last_idx, fa1->fa_default)) { 2092 fib_result_assign(res, fi); 2093 fa1->fa_default = order; 2094 goto out; 2095 } 2096 fi = next_fi; 2097 order++; 2098 } 2099 2100 if (order <= 0 || !fi) { 2101 if (fa1) 2102 fa1->fa_default = -1; 2103 goto out; 2104 } 2105 2106 if (!fib_detect_death(fi, order, &last_resort, &last_idx, 2107 fa1->fa_default)) { 2108 fib_result_assign(res, fi); 2109 fa1->fa_default = order; 2110 goto out; 2111 } 2112 2113 if (last_idx >= 0) 2114 fib_result_assign(res, last_resort); 2115 fa1->fa_default = last_idx; 2116 out: 2117 return; 2118 } 2119 2120 /* 2121 * Dead device goes up. We wake up dead nexthops. 2122 * It takes sense only on multipath routes. 2123 * 2124 * only used when fib_nh is built into fib_info 2125 */ 2126 int fib_sync_up(struct net_device *dev, unsigned char nh_flags) 2127 { 2128 struct fib_info *prev_fi; 2129 struct hlist_head *head; 2130 struct fib_nh *nh; 2131 int ret; 2132 2133 if (!(dev->flags & IFF_UP)) 2134 return 0; 2135 2136 if (nh_flags & RTNH_F_DEAD) { 2137 unsigned int flags = dev_get_flags(dev); 2138 2139 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 2140 nh_flags |= RTNH_F_LINKDOWN; 2141 } 2142 2143 prev_fi = NULL; 2144 head = fib_info_devhash_bucket(dev); 2145 ret = 0; 2146 2147 hlist_for_each_entry(nh, head, nh_hash) { 2148 struct fib_info *fi = nh->nh_parent; 2149 int alive; 2150 2151 BUG_ON(!fi->fib_nhs); 2152 if (nh->fib_nh_dev != dev || fi == prev_fi) 2153 continue; 2154 2155 prev_fi = fi; 2156 alive = 0; 2157 change_nexthops(fi) { 2158 if (!(nexthop_nh->fib_nh_flags & nh_flags)) { 2159 alive++; 2160 continue; 2161 } 2162 if (!nexthop_nh->fib_nh_dev || 2163 !(nexthop_nh->fib_nh_dev->flags & IFF_UP)) 2164 continue; 2165 if (nexthop_nh->fib_nh_dev != dev || 2166 !__in_dev_get_rtnl(dev)) 2167 continue; 2168 alive++; 2169 nexthop_nh->fib_nh_flags &= ~nh_flags; 2170 call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); 2171 } endfor_nexthops(fi) 2172 2173 if (alive > 0) { 2174 fi->fib_flags &= ~nh_flags; 2175 ret++; 2176 } 2177 2178 fib_rebalance(fi); 2179 } 2180 2181 return ret; 2182 } 2183 2184 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2185 static bool fib_good_nh(const struct fib_nh *nh) 2186 { 2187 int state = NUD_REACHABLE; 2188 2189 if (nh->fib_nh_scope == RT_SCOPE_LINK) { 2190 struct neighbour *n; 2191 2192 rcu_read_lock_bh(); 2193 2194 if (likely(nh->fib_nh_gw_family == AF_INET)) 2195 n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 2196 (__force u32)nh->fib_nh_gw4); 2197 else if (nh->fib_nh_gw_family == AF_INET6) 2198 n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, 2199 &nh->fib_nh_gw6); 2200 else 2201 n = NULL; 2202 if (n) 2203 state = n->nud_state; 2204 2205 rcu_read_unlock_bh(); 2206 } 2207 2208 return !!(state & NUD_VALID); 2209 } 2210 2211 void fib_select_multipath(struct fib_result *res, int hash) 2212 { 2213 struct fib_info *fi = res->fi; 2214 struct net *net = fi->fib_net; 2215 bool first = false; 2216 2217 if (unlikely(res->fi->nh)) { 2218 nexthop_path_fib_result(res, hash); 2219 return; 2220 } 2221 2222 change_nexthops(fi) { 2223 if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { 2224 if (!fib_good_nh(nexthop_nh)) 2225 continue; 2226 if (!first) { 2227 res->nh_sel = nhsel; 2228 res->nhc = &nexthop_nh->nh_common; 2229 first = true; 2230 } 2231 } 2232 2233 if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) 2234 continue; 2235 2236 res->nh_sel = nhsel; 2237 res->nhc = &nexthop_nh->nh_common; 2238 return; 2239 } endfor_nexthops(fi); 2240 } 2241 #endif 2242 2243 void fib_select_path(struct net *net, struct fib_result *res, 2244 struct flowi4 *fl4, const struct sk_buff *skb) 2245 { 2246 if (fl4->flowi4_oif) 2247 goto check_saddr; 2248 2249 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2250 if (fib_info_num_path(res->fi) > 1) { 2251 int h = fib_multipath_hash(net, fl4, skb, NULL); 2252 2253 fib_select_multipath(res, h); 2254 } 2255 else 2256 #endif 2257 if (!res->prefixlen && 2258 res->table->tb_num_default > 1 && 2259 res->type == RTN_UNICAST) 2260 fib_select_default(fl4, res); 2261 2262 check_saddr: 2263 if (!fl4->saddr) 2264 fl4->saddr = fib_result_prefsrc(net, res); 2265 } 2266