1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <linux/jhash.h> 48 #include <net/net_namespace.h> 49 #include <net/snmp.h> 50 #include <net/ipv6.h> 51 #include <net/ip6_fib.h> 52 #include <net/ip6_route.h> 53 #include <net/ndisc.h> 54 #include <net/addrconf.h> 55 #include <net/tcp.h> 56 #include <linux/rtnetlink.h> 57 #include <net/dst.h> 58 #include <net/dst_metadata.h> 59 #include <net/xfrm.h> 60 #include <net/netevent.h> 61 #include <net/netlink.h> 62 #include <net/nexthop.h> 63 #include <net/lwtunnel.h> 64 #include <net/ip_tunnels.h> 65 #include <net/l3mdev.h> 66 #include <trace/events/fib6.h> 67 68 #include <linux/uaccess.h> 69 70 #ifdef CONFIG_SYSCTL 71 #include <linux/sysctl.h> 72 #endif 73 74 enum rt6_nud_state { 75 RT6_NUD_FAIL_HARD = -3, 76 RT6_NUD_FAIL_PROBE = -2, 77 RT6_NUD_FAIL_DO_RR = -1, 78 RT6_NUD_SUCCEED = 1 79 }; 80 81 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); 82 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 83 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 84 static unsigned int ip6_mtu(const struct dst_entry *dst); 85 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 86 static void ip6_dst_destroy(struct dst_entry *); 87 static void ip6_dst_ifdown(struct dst_entry *, 88 struct net_device *dev, int how); 89 static int ip6_dst_gc(struct dst_ops *ops); 90 91 static int ip6_pkt_discard(struct sk_buff *skb); 92 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); 93 static int ip6_pkt_prohibit(struct sk_buff *skb); 94 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); 95 static void ip6_link_failure(struct sk_buff *skb); 96 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 97 struct sk_buff *skb, u32 mtu); 98 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 99 struct sk_buff *skb); 100 static void rt6_dst_from_metrics_check(struct rt6_info *rt); 101 static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 102 static size_t rt6_nlmsg_size(struct rt6_info *rt); 103 static int rt6_fill_node(struct net *net, 104 struct sk_buff *skb, struct rt6_info *rt, 105 struct in6_addr *dst, struct in6_addr *src, 106 int iif, int type, u32 portid, u32 seq, 107 unsigned int flags); 108 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, 109 struct in6_addr *daddr, 110 struct in6_addr *saddr); 111 112 #ifdef CONFIG_IPV6_ROUTE_INFO 113 static struct rt6_info *rt6_add_route_info(struct net *net, 114 const struct in6_addr *prefix, int prefixlen, 115 const struct in6_addr *gwaddr, 116 struct net_device *dev, 117 unsigned int pref); 118 static struct rt6_info *rt6_get_route_info(struct net *net, 119 const struct in6_addr *prefix, int prefixlen, 120 const struct in6_addr *gwaddr, 121 struct net_device *dev); 122 #endif 123 124 struct uncached_list { 125 spinlock_t lock; 126 struct list_head head; 127 }; 128 129 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); 130 131 void rt6_uncached_list_add(struct rt6_info *rt) 132 { 133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); 134 135 rt->rt6i_uncached_list = ul; 136 137 spin_lock_bh(&ul->lock); 138 list_add_tail(&rt->rt6i_uncached, &ul->head); 139 spin_unlock_bh(&ul->lock); 140 } 141 142 void rt6_uncached_list_del(struct rt6_info *rt) 143 { 144 if (!list_empty(&rt->rt6i_uncached)) { 145 struct uncached_list *ul = rt->rt6i_uncached_list; 146 struct net *net = dev_net(rt->dst.dev); 147 148 spin_lock_bh(&ul->lock); 149 list_del(&rt->rt6i_uncached); 150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache); 151 spin_unlock_bh(&ul->lock); 152 } 153 } 154 155 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) 156 { 157 struct net_device *loopback_dev = net->loopback_dev; 158 int cpu; 159 160 if (dev == loopback_dev) 161 return; 162 163 for_each_possible_cpu(cpu) { 164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 165 struct rt6_info *rt; 166 167 spin_lock_bh(&ul->lock); 168 list_for_each_entry(rt, &ul->head, rt6i_uncached) { 169 struct inet6_dev *rt_idev = rt->rt6i_idev; 170 struct net_device *rt_dev = rt->dst.dev; 171 172 if (rt_idev->dev == dev) { 173 rt->rt6i_idev = in6_dev_get(loopback_dev); 174 in6_dev_put(rt_idev); 175 } 176 177 if (rt_dev == dev) { 178 rt->dst.dev = loopback_dev; 179 dev_hold(rt->dst.dev); 180 dev_put(rt_dev); 181 } 182 } 183 spin_unlock_bh(&ul->lock); 184 } 185 } 186 187 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) 188 { 189 return dst_metrics_write_ptr(&rt->from->dst); 190 } 191 192 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 193 { 194 struct rt6_info *rt = (struct rt6_info *)dst; 195 196 if (rt->rt6i_flags & RTF_PCPU) 197 return rt6_pcpu_cow_metrics(rt); 198 else if (rt->rt6i_flags & RTF_CACHE) 199 return NULL; 200 else 201 return dst_cow_metrics_generic(dst, old); 202 } 203 204 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 205 struct sk_buff *skb, 206 const void *daddr) 207 { 208 struct in6_addr *p = &rt->rt6i_gateway; 209 210 if (!ipv6_addr_any(p)) 211 return (const void *) p; 212 else if (skb) 213 return &ipv6_hdr(skb)->daddr; 214 return daddr; 215 } 216 217 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 218 struct sk_buff *skb, 219 const void *daddr) 220 { 221 struct rt6_info *rt = (struct rt6_info *) dst; 222 struct neighbour *n; 223 224 daddr = choose_neigh_daddr(rt, skb, daddr); 225 n = __ipv6_neigh_lookup(dst->dev, daddr); 226 if (n) 227 return n; 228 return neigh_create(&nd_tbl, daddr, dst->dev); 229 } 230 231 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) 232 { 233 struct net_device *dev = dst->dev; 234 struct rt6_info *rt = (struct rt6_info *)dst; 235 236 daddr = choose_neigh_daddr(rt, NULL, daddr); 237 if (!daddr) 238 return; 239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) 240 return; 241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) 242 return; 243 __ipv6_confirm_neigh(dev, daddr); 244 } 245 246 static struct dst_ops ip6_dst_ops_template = { 247 .family = AF_INET6, 248 .gc = ip6_dst_gc, 249 .gc_thresh = 1024, 250 .check = ip6_dst_check, 251 .default_advmss = ip6_default_advmss, 252 .mtu = ip6_mtu, 253 .cow_metrics = ipv6_cow_metrics, 254 .destroy = ip6_dst_destroy, 255 .ifdown = ip6_dst_ifdown, 256 .negative_advice = ip6_negative_advice, 257 .link_failure = ip6_link_failure, 258 .update_pmtu = ip6_rt_update_pmtu, 259 .redirect = rt6_do_redirect, 260 .local_out = __ip6_local_out, 261 .neigh_lookup = ip6_neigh_lookup, 262 .confirm_neigh = ip6_confirm_neigh, 263 }; 264 265 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 266 { 267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 268 269 return mtu ? : dst->dev->mtu; 270 } 271 272 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 273 struct sk_buff *skb, u32 mtu) 274 { 275 } 276 277 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 278 struct sk_buff *skb) 279 { 280 } 281 282 static struct dst_ops ip6_dst_blackhole_ops = { 283 .family = AF_INET6, 284 .destroy = ip6_dst_destroy, 285 .check = ip6_dst_check, 286 .mtu = ip6_blackhole_mtu, 287 .default_advmss = ip6_default_advmss, 288 .update_pmtu = ip6_rt_blackhole_update_pmtu, 289 .redirect = ip6_rt_blackhole_redirect, 290 .cow_metrics = dst_cow_metrics_generic, 291 .neigh_lookup = ip6_neigh_lookup, 292 }; 293 294 static const u32 ip6_template_metrics[RTAX_MAX] = { 295 [RTAX_HOPLIMIT - 1] = 0, 296 }; 297 298 static const struct rt6_info ip6_null_entry_template = { 299 .dst = { 300 .__refcnt = ATOMIC_INIT(1), 301 .__use = 1, 302 .obsolete = DST_OBSOLETE_FORCE_CHK, 303 .error = -ENETUNREACH, 304 .input = ip6_pkt_discard, 305 .output = ip6_pkt_discard_out, 306 }, 307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 308 .rt6i_protocol = RTPROT_KERNEL, 309 .rt6i_metric = ~(u32) 0, 310 .rt6i_ref = ATOMIC_INIT(1), 311 }; 312 313 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 314 315 static const struct rt6_info ip6_prohibit_entry_template = { 316 .dst = { 317 .__refcnt = ATOMIC_INIT(1), 318 .__use = 1, 319 .obsolete = DST_OBSOLETE_FORCE_CHK, 320 .error = -EACCES, 321 .input = ip6_pkt_prohibit, 322 .output = ip6_pkt_prohibit_out, 323 }, 324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 325 .rt6i_protocol = RTPROT_KERNEL, 326 .rt6i_metric = ~(u32) 0, 327 .rt6i_ref = ATOMIC_INIT(1), 328 }; 329 330 static const struct rt6_info ip6_blk_hole_entry_template = { 331 .dst = { 332 .__refcnt = ATOMIC_INIT(1), 333 .__use = 1, 334 .obsolete = DST_OBSOLETE_FORCE_CHK, 335 .error = -EINVAL, 336 .input = dst_discard, 337 .output = dst_discard_out, 338 }, 339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 340 .rt6i_protocol = RTPROT_KERNEL, 341 .rt6i_metric = ~(u32) 0, 342 .rt6i_ref = ATOMIC_INIT(1), 343 }; 344 345 #endif 346 347 static void rt6_info_init(struct rt6_info *rt) 348 { 349 struct dst_entry *dst = &rt->dst; 350 351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 352 INIT_LIST_HEAD(&rt->rt6i_siblings); 353 INIT_LIST_HEAD(&rt->rt6i_uncached); 354 } 355 356 /* allocate dst with ip6_dst_ops */ 357 static struct rt6_info *__ip6_dst_alloc(struct net *net, 358 struct net_device *dev, 359 int flags) 360 { 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 362 1, DST_OBSOLETE_FORCE_CHK, flags); 363 364 if (rt) { 365 rt6_info_init(rt); 366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); 367 } 368 369 return rt; 370 } 371 372 struct rt6_info *ip6_dst_alloc(struct net *net, 373 struct net_device *dev, 374 int flags) 375 { 376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); 377 378 if (rt) { 379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); 380 if (!rt->rt6i_pcpu) { 381 dst_release_immediate(&rt->dst); 382 return NULL; 383 } 384 } 385 386 return rt; 387 } 388 EXPORT_SYMBOL(ip6_dst_alloc); 389 390 static void ip6_dst_destroy(struct dst_entry *dst) 391 { 392 struct rt6_info *rt = (struct rt6_info *)dst; 393 struct rt6_exception_bucket *bucket; 394 struct rt6_info *from = rt->from; 395 struct inet6_dev *idev; 396 397 dst_destroy_metrics_generic(dst); 398 free_percpu(rt->rt6i_pcpu); 399 rt6_uncached_list_del(rt); 400 401 idev = rt->rt6i_idev; 402 if (idev) { 403 rt->rt6i_idev = NULL; 404 in6_dev_put(idev); 405 } 406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); 407 if (bucket) { 408 rt->rt6i_exception_bucket = NULL; 409 kfree(bucket); 410 } 411 412 rt->from = NULL; 413 dst_release(&from->dst); 414 } 415 416 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 417 int how) 418 { 419 struct rt6_info *rt = (struct rt6_info *)dst; 420 struct inet6_dev *idev = rt->rt6i_idev; 421 struct net_device *loopback_dev = 422 dev_net(dev)->loopback_dev; 423 424 if (idev && idev->dev != loopback_dev) { 425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); 426 if (loopback_idev) { 427 rt->rt6i_idev = loopback_idev; 428 in6_dev_put(idev); 429 } 430 } 431 } 432 433 static bool __rt6_check_expired(const struct rt6_info *rt) 434 { 435 if (rt->rt6i_flags & RTF_EXPIRES) 436 return time_after(jiffies, rt->dst.expires); 437 else 438 return false; 439 } 440 441 static bool rt6_check_expired(const struct rt6_info *rt) 442 { 443 if (rt->rt6i_flags & RTF_EXPIRES) { 444 if (time_after(jiffies, rt->dst.expires)) 445 return true; 446 } else if (rt->from) { 447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || 448 rt6_check_expired(rt->from); 449 } 450 return false; 451 } 452 453 static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 454 struct flowi6 *fl6, int oif, 455 int strict) 456 { 457 struct rt6_info *sibling, *next_sibling; 458 459 /* We might have already computed the hash for ICMPv6 errors. In such 460 * case it will always be non-zero. Otherwise now is the time to do it. 461 */ 462 if (!fl6->mp_hash) 463 fl6->mp_hash = rt6_multipath_hash(fl6, NULL); 464 465 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) 466 return match; 467 468 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, 469 rt6i_siblings) { 470 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) 471 continue; 472 if (rt6_score_route(sibling, oif, strict) < 0) 473 break; 474 match = sibling; 475 break; 476 } 477 478 return match; 479 } 480 481 /* 482 * Route lookup. rcu_read_lock() should be held. 483 */ 484 485 static inline struct rt6_info *rt6_device_match(struct net *net, 486 struct rt6_info *rt, 487 const struct in6_addr *saddr, 488 int oif, 489 int flags) 490 { 491 struct rt6_info *local = NULL; 492 struct rt6_info *sprt; 493 494 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) 495 return rt; 496 497 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { 498 struct net_device *dev = sprt->dst.dev; 499 500 if (sprt->rt6i_nh_flags & RTNH_F_DEAD) 501 continue; 502 503 if (oif) { 504 if (dev->ifindex == oif) 505 return sprt; 506 if (dev->flags & IFF_LOOPBACK) { 507 if (!sprt->rt6i_idev || 508 sprt->rt6i_idev->dev->ifindex != oif) { 509 if (flags & RT6_LOOKUP_F_IFACE) 510 continue; 511 if (local && 512 local->rt6i_idev->dev->ifindex == oif) 513 continue; 514 } 515 local = sprt; 516 } 517 } else { 518 if (ipv6_chk_addr(net, saddr, dev, 519 flags & RT6_LOOKUP_F_IFACE)) 520 return sprt; 521 } 522 } 523 524 if (oif) { 525 if (local) 526 return local; 527 528 if (flags & RT6_LOOKUP_F_IFACE) 529 return net->ipv6.ip6_null_entry; 530 } 531 532 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; 533 } 534 535 #ifdef CONFIG_IPV6_ROUTER_PREF 536 struct __rt6_probe_work { 537 struct work_struct work; 538 struct in6_addr target; 539 struct net_device *dev; 540 }; 541 542 static void rt6_probe_deferred(struct work_struct *w) 543 { 544 struct in6_addr mcaddr; 545 struct __rt6_probe_work *work = 546 container_of(w, struct __rt6_probe_work, work); 547 548 addrconf_addr_solict_mult(&work->target, &mcaddr); 549 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); 550 dev_put(work->dev); 551 kfree(work); 552 } 553 554 static void rt6_probe(struct rt6_info *rt) 555 { 556 struct __rt6_probe_work *work; 557 struct neighbour *neigh; 558 /* 559 * Okay, this does not seem to be appropriate 560 * for now, however, we need to check if it 561 * is really so; aka Router Reachability Probing. 562 * 563 * Router Reachability Probe MUST be rate-limited 564 * to no more than one per minute. 565 */ 566 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) 567 return; 568 rcu_read_lock_bh(); 569 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 570 if (neigh) { 571 if (neigh->nud_state & NUD_VALID) 572 goto out; 573 574 work = NULL; 575 write_lock(&neigh->lock); 576 if (!(neigh->nud_state & NUD_VALID) && 577 time_after(jiffies, 578 neigh->updated + 579 rt->rt6i_idev->cnf.rtr_probe_interval)) { 580 work = kmalloc(sizeof(*work), GFP_ATOMIC); 581 if (work) 582 __neigh_set_probe_once(neigh); 583 } 584 write_unlock(&neigh->lock); 585 } else { 586 work = kmalloc(sizeof(*work), GFP_ATOMIC); 587 } 588 589 if (work) { 590 INIT_WORK(&work->work, rt6_probe_deferred); 591 work->target = rt->rt6i_gateway; 592 dev_hold(rt->dst.dev); 593 work->dev = rt->dst.dev; 594 schedule_work(&work->work); 595 } 596 597 out: 598 rcu_read_unlock_bh(); 599 } 600 #else 601 static inline void rt6_probe(struct rt6_info *rt) 602 { 603 } 604 #endif 605 606 /* 607 * Default Router Selection (RFC 2461 6.3.6) 608 */ 609 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 610 { 611 struct net_device *dev = rt->dst.dev; 612 if (!oif || dev->ifindex == oif) 613 return 2; 614 if ((dev->flags & IFF_LOOPBACK) && 615 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 616 return 1; 617 return 0; 618 } 619 620 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) 621 { 622 struct neighbour *neigh; 623 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 624 625 if (rt->rt6i_flags & RTF_NONEXTHOP || 626 !(rt->rt6i_flags & RTF_GATEWAY)) 627 return RT6_NUD_SUCCEED; 628 629 rcu_read_lock_bh(); 630 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 631 if (neigh) { 632 read_lock(&neigh->lock); 633 if (neigh->nud_state & NUD_VALID) 634 ret = RT6_NUD_SUCCEED; 635 #ifdef CONFIG_IPV6_ROUTER_PREF 636 else if (!(neigh->nud_state & NUD_FAILED)) 637 ret = RT6_NUD_SUCCEED; 638 else 639 ret = RT6_NUD_FAIL_PROBE; 640 #endif 641 read_unlock(&neigh->lock); 642 } else { 643 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 644 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 645 } 646 rcu_read_unlock_bh(); 647 648 return ret; 649 } 650 651 static int rt6_score_route(struct rt6_info *rt, int oif, 652 int strict) 653 { 654 int m; 655 656 m = rt6_check_dev(rt, oif); 657 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 658 return RT6_NUD_FAIL_HARD; 659 #ifdef CONFIG_IPV6_ROUTER_PREF 660 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 661 #endif 662 if (strict & RT6_LOOKUP_F_REACHABLE) { 663 int n = rt6_check_neigh(rt); 664 if (n < 0) 665 return n; 666 } 667 return m; 668 } 669 670 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 671 int *mpri, struct rt6_info *match, 672 bool *do_rr) 673 { 674 int m; 675 bool match_do_rr = false; 676 struct inet6_dev *idev = rt->rt6i_idev; 677 678 if (rt->rt6i_nh_flags & RTNH_F_DEAD) 679 goto out; 680 681 if (idev->cnf.ignore_routes_with_linkdown && 682 rt->rt6i_nh_flags & RTNH_F_LINKDOWN && 683 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) 684 goto out; 685 686 if (rt6_check_expired(rt)) 687 goto out; 688 689 m = rt6_score_route(rt, oif, strict); 690 if (m == RT6_NUD_FAIL_DO_RR) { 691 match_do_rr = true; 692 m = 0; /* lowest valid score */ 693 } else if (m == RT6_NUD_FAIL_HARD) { 694 goto out; 695 } 696 697 if (strict & RT6_LOOKUP_F_REACHABLE) 698 rt6_probe(rt); 699 700 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 701 if (m > *mpri) { 702 *do_rr = match_do_rr; 703 *mpri = m; 704 match = rt; 705 } 706 out: 707 return match; 708 } 709 710 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 711 struct rt6_info *leaf, 712 struct rt6_info *rr_head, 713 u32 metric, int oif, int strict, 714 bool *do_rr) 715 { 716 struct rt6_info *rt, *match, *cont; 717 int mpri = -1; 718 719 match = NULL; 720 cont = NULL; 721 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) { 722 if (rt->rt6i_metric != metric) { 723 cont = rt; 724 break; 725 } 726 727 match = find_match(rt, oif, strict, &mpri, match, do_rr); 728 } 729 730 for (rt = leaf; rt && rt != rr_head; 731 rt = rcu_dereference(rt->rt6_next)) { 732 if (rt->rt6i_metric != metric) { 733 cont = rt; 734 break; 735 } 736 737 match = find_match(rt, oif, strict, &mpri, match, do_rr); 738 } 739 740 if (match || !cont) 741 return match; 742 743 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next)) 744 match = find_match(rt, oif, strict, &mpri, match, do_rr); 745 746 return match; 747 } 748 749 static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, 750 int oif, int strict) 751 { 752 struct rt6_info *leaf = rcu_dereference(fn->leaf); 753 struct rt6_info *match, *rt0; 754 bool do_rr = false; 755 int key_plen; 756 757 if (!leaf || leaf == net->ipv6.ip6_null_entry) 758 return net->ipv6.ip6_null_entry; 759 760 rt0 = rcu_dereference(fn->rr_ptr); 761 if (!rt0) 762 rt0 = leaf; 763 764 /* Double check to make sure fn is not an intermediate node 765 * and fn->leaf does not points to its child's leaf 766 * (This might happen if all routes under fn are deleted from 767 * the tree and fib6_repair_tree() is called on the node.) 768 */ 769 key_plen = rt0->rt6i_dst.plen; 770 #ifdef CONFIG_IPV6_SUBTREES 771 if (rt0->rt6i_src.plen) 772 key_plen = rt0->rt6i_src.plen; 773 #endif 774 if (fn->fn_bit != key_plen) 775 return net->ipv6.ip6_null_entry; 776 777 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, 778 &do_rr); 779 780 if (do_rr) { 781 struct rt6_info *next = rcu_dereference(rt0->rt6_next); 782 783 /* no entries matched; do round-robin */ 784 if (!next || next->rt6i_metric != rt0->rt6i_metric) 785 next = leaf; 786 787 if (next != rt0) { 788 spin_lock_bh(&leaf->rt6i_table->tb6_lock); 789 /* make sure next is not being deleted from the tree */ 790 if (next->rt6i_node) 791 rcu_assign_pointer(fn->rr_ptr, next); 792 spin_unlock_bh(&leaf->rt6i_table->tb6_lock); 793 } 794 } 795 796 return match ? match : net->ipv6.ip6_null_entry; 797 } 798 799 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) 800 { 801 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); 802 } 803 804 #ifdef CONFIG_IPV6_ROUTE_INFO 805 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 806 const struct in6_addr *gwaddr) 807 { 808 struct net *net = dev_net(dev); 809 struct route_info *rinfo = (struct route_info *) opt; 810 struct in6_addr prefix_buf, *prefix; 811 unsigned int pref; 812 unsigned long lifetime; 813 struct rt6_info *rt; 814 815 if (len < sizeof(struct route_info)) { 816 return -EINVAL; 817 } 818 819 /* Sanity check for prefix_len and length */ 820 if (rinfo->length > 3) { 821 return -EINVAL; 822 } else if (rinfo->prefix_len > 128) { 823 return -EINVAL; 824 } else if (rinfo->prefix_len > 64) { 825 if (rinfo->length < 2) { 826 return -EINVAL; 827 } 828 } else if (rinfo->prefix_len > 0) { 829 if (rinfo->length < 1) { 830 return -EINVAL; 831 } 832 } 833 834 pref = rinfo->route_pref; 835 if (pref == ICMPV6_ROUTER_PREF_INVALID) 836 return -EINVAL; 837 838 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 839 840 if (rinfo->length == 3) 841 prefix = (struct in6_addr *)rinfo->prefix; 842 else { 843 /* this function is safe */ 844 ipv6_addr_prefix(&prefix_buf, 845 (struct in6_addr *)rinfo->prefix, 846 rinfo->prefix_len); 847 prefix = &prefix_buf; 848 } 849 850 if (rinfo->prefix_len == 0) 851 rt = rt6_get_dflt_router(gwaddr, dev); 852 else 853 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 854 gwaddr, dev); 855 856 if (rt && !lifetime) { 857 ip6_del_rt(rt); 858 rt = NULL; 859 } 860 861 if (!rt && lifetime) 862 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, 863 dev, pref); 864 else if (rt) 865 rt->rt6i_flags = RTF_ROUTEINFO | 866 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 867 868 if (rt) { 869 if (!addrconf_finite_timeout(lifetime)) 870 rt6_clean_expires(rt); 871 else 872 rt6_set_expires(rt, jiffies + HZ * lifetime); 873 874 ip6_rt_put(rt); 875 } 876 return 0; 877 } 878 #endif 879 880 static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 881 struct in6_addr *saddr) 882 { 883 struct fib6_node *pn, *sn; 884 while (1) { 885 if (fn->fn_flags & RTN_TL_ROOT) 886 return NULL; 887 pn = rcu_dereference(fn->parent); 888 sn = FIB6_SUBTREE(pn); 889 if (sn && sn != fn) 890 fn = fib6_lookup(sn, NULL, saddr); 891 else 892 fn = pn; 893 if (fn->fn_flags & RTN_RTINFO) 894 return fn; 895 } 896 } 897 898 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, 899 bool null_fallback) 900 { 901 struct rt6_info *rt = *prt; 902 903 if (dst_hold_safe(&rt->dst)) 904 return true; 905 if (null_fallback) { 906 rt = net->ipv6.ip6_null_entry; 907 dst_hold(&rt->dst); 908 } else { 909 rt = NULL; 910 } 911 *prt = rt; 912 return false; 913 } 914 915 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 916 struct fib6_table *table, 917 struct flowi6 *fl6, int flags) 918 { 919 struct rt6_info *rt, *rt_cache; 920 struct fib6_node *fn; 921 922 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) 923 flags &= ~RT6_LOOKUP_F_IFACE; 924 925 rcu_read_lock(); 926 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 927 restart: 928 rt = rcu_dereference(fn->leaf); 929 if (!rt) { 930 rt = net->ipv6.ip6_null_entry; 931 } else { 932 rt = rt6_device_match(net, rt, &fl6->saddr, 933 fl6->flowi6_oif, flags); 934 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 935 rt = rt6_multipath_select(rt, fl6, 936 fl6->flowi6_oif, flags); 937 } 938 if (rt == net->ipv6.ip6_null_entry) { 939 fn = fib6_backtrack(fn, &fl6->saddr); 940 if (fn) 941 goto restart; 942 } 943 /* Search through exception table */ 944 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); 945 if (rt_cache) 946 rt = rt_cache; 947 948 if (ip6_hold_safe(net, &rt, true)) 949 dst_use_noref(&rt->dst, jiffies); 950 951 rcu_read_unlock(); 952 953 trace_fib6_table_lookup(net, rt, table, fl6); 954 955 return rt; 956 957 } 958 959 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 960 int flags) 961 { 962 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 963 } 964 EXPORT_SYMBOL_GPL(ip6_route_lookup); 965 966 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 967 const struct in6_addr *saddr, int oif, int strict) 968 { 969 struct flowi6 fl6 = { 970 .flowi6_oif = oif, 971 .daddr = *daddr, 972 }; 973 struct dst_entry *dst; 974 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 975 976 if (saddr) { 977 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 978 flags |= RT6_LOOKUP_F_HAS_SADDR; 979 } 980 981 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 982 if (dst->error == 0) 983 return (struct rt6_info *) dst; 984 985 dst_release(dst); 986 987 return NULL; 988 } 989 EXPORT_SYMBOL(rt6_lookup); 990 991 /* ip6_ins_rt is called with FREE table->tb6_lock. 992 * It takes new route entry, the addition fails by any reason the 993 * route is released. 994 * Caller must hold dst before calling it. 995 */ 996 997 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 998 struct mx6_config *mxc, 999 struct netlink_ext_ack *extack) 1000 { 1001 int err; 1002 struct fib6_table *table; 1003 1004 table = rt->rt6i_table; 1005 spin_lock_bh(&table->tb6_lock); 1006 err = fib6_add(&table->tb6_root, rt, info, mxc, extack); 1007 spin_unlock_bh(&table->tb6_lock); 1008 1009 return err; 1010 } 1011 1012 int ip6_ins_rt(struct rt6_info *rt) 1013 { 1014 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; 1015 struct mx6_config mxc = { .mx = NULL, }; 1016 1017 /* Hold dst to account for the reference from the fib6 tree */ 1018 dst_hold(&rt->dst); 1019 return __ip6_ins_rt(rt, &info, &mxc, NULL); 1020 } 1021 1022 /* called with rcu_lock held */ 1023 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) 1024 { 1025 struct net_device *dev = rt->dst.dev; 1026 1027 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { 1028 /* for copies of local routes, dst->dev needs to be the 1029 * device if it is a master device, the master device if 1030 * device is enslaved, and the loopback as the default 1031 */ 1032 if (netif_is_l3_slave(dev) && 1033 !rt6_need_strict(&rt->rt6i_dst.addr)) 1034 dev = l3mdev_master_dev_rcu(dev); 1035 else if (!netif_is_l3_master(dev)) 1036 dev = dev_net(dev)->loopback_dev; 1037 /* last case is netif_is_l3_master(dev) is true in which 1038 * case we want dev returned to be dev 1039 */ 1040 } 1041 1042 return dev; 1043 } 1044 1045 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, 1046 const struct in6_addr *daddr, 1047 const struct in6_addr *saddr) 1048 { 1049 struct net_device *dev; 1050 struct rt6_info *rt; 1051 1052 /* 1053 * Clone the route. 1054 */ 1055 1056 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 1057 ort = ort->from; 1058 1059 rcu_read_lock(); 1060 dev = ip6_rt_get_dev_rcu(ort); 1061 rt = __ip6_dst_alloc(dev_net(dev), dev, 0); 1062 rcu_read_unlock(); 1063 if (!rt) 1064 return NULL; 1065 1066 ip6_rt_copy_init(rt, ort); 1067 rt->rt6i_flags |= RTF_CACHE; 1068 rt->rt6i_metric = 0; 1069 rt->dst.flags |= DST_HOST; 1070 rt->rt6i_dst.addr = *daddr; 1071 rt->rt6i_dst.plen = 128; 1072 1073 if (!rt6_is_gw_or_nonexthop(ort)) { 1074 if (ort->rt6i_dst.plen != 128 && 1075 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 1076 rt->rt6i_flags |= RTF_ANYCAST; 1077 #ifdef CONFIG_IPV6_SUBTREES 1078 if (rt->rt6i_src.plen && saddr) { 1079 rt->rt6i_src.addr = *saddr; 1080 rt->rt6i_src.plen = 128; 1081 } 1082 #endif 1083 } 1084 1085 return rt; 1086 } 1087 1088 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) 1089 { 1090 struct net_device *dev; 1091 struct rt6_info *pcpu_rt; 1092 1093 rcu_read_lock(); 1094 dev = ip6_rt_get_dev_rcu(rt); 1095 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); 1096 rcu_read_unlock(); 1097 if (!pcpu_rt) 1098 return NULL; 1099 ip6_rt_copy_init(pcpu_rt, rt); 1100 pcpu_rt->rt6i_protocol = rt->rt6i_protocol; 1101 pcpu_rt->rt6i_flags |= RTF_PCPU; 1102 return pcpu_rt; 1103 } 1104 1105 /* It should be called with rcu_read_lock() acquired */ 1106 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) 1107 { 1108 struct rt6_info *pcpu_rt, **p; 1109 1110 p = this_cpu_ptr(rt->rt6i_pcpu); 1111 pcpu_rt = *p; 1112 1113 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) 1114 rt6_dst_from_metrics_check(pcpu_rt); 1115 1116 return pcpu_rt; 1117 } 1118 1119 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) 1120 { 1121 struct rt6_info *pcpu_rt, *prev, **p; 1122 1123 pcpu_rt = ip6_rt_pcpu_alloc(rt); 1124 if (!pcpu_rt) { 1125 struct net *net = dev_net(rt->dst.dev); 1126 1127 dst_hold(&net->ipv6.ip6_null_entry->dst); 1128 return net->ipv6.ip6_null_entry; 1129 } 1130 1131 dst_hold(&pcpu_rt->dst); 1132 p = this_cpu_ptr(rt->rt6i_pcpu); 1133 prev = cmpxchg(p, NULL, pcpu_rt); 1134 BUG_ON(prev); 1135 1136 rt6_dst_from_metrics_check(pcpu_rt); 1137 return pcpu_rt; 1138 } 1139 1140 /* exception hash table implementation 1141 */ 1142 static DEFINE_SPINLOCK(rt6_exception_lock); 1143 1144 /* Remove rt6_ex from hash table and free the memory 1145 * Caller must hold rt6_exception_lock 1146 */ 1147 static void rt6_remove_exception(struct rt6_exception_bucket *bucket, 1148 struct rt6_exception *rt6_ex) 1149 { 1150 struct net *net; 1151 1152 if (!bucket || !rt6_ex) 1153 return; 1154 1155 net = dev_net(rt6_ex->rt6i->dst.dev); 1156 rt6_ex->rt6i->rt6i_node = NULL; 1157 hlist_del_rcu(&rt6_ex->hlist); 1158 rt6_release(rt6_ex->rt6i); 1159 kfree_rcu(rt6_ex, rcu); 1160 WARN_ON_ONCE(!bucket->depth); 1161 bucket->depth--; 1162 net->ipv6.rt6_stats->fib_rt_cache--; 1163 } 1164 1165 /* Remove oldest rt6_ex in bucket and free the memory 1166 * Caller must hold rt6_exception_lock 1167 */ 1168 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) 1169 { 1170 struct rt6_exception *rt6_ex, *oldest = NULL; 1171 1172 if (!bucket) 1173 return; 1174 1175 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 1176 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp)) 1177 oldest = rt6_ex; 1178 } 1179 rt6_remove_exception(bucket, oldest); 1180 } 1181 1182 static u32 rt6_exception_hash(const struct in6_addr *dst, 1183 const struct in6_addr *src) 1184 { 1185 static u32 seed __read_mostly; 1186 u32 val; 1187 1188 net_get_random_once(&seed, sizeof(seed)); 1189 val = jhash(dst, sizeof(*dst), seed); 1190 1191 #ifdef CONFIG_IPV6_SUBTREES 1192 if (src) 1193 val = jhash(src, sizeof(*src), val); 1194 #endif 1195 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); 1196 } 1197 1198 /* Helper function to find the cached rt in the hash table 1199 * and update bucket pointer to point to the bucket for this 1200 * (daddr, saddr) pair 1201 * Caller must hold rt6_exception_lock 1202 */ 1203 static struct rt6_exception * 1204 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket, 1205 const struct in6_addr *daddr, 1206 const struct in6_addr *saddr) 1207 { 1208 struct rt6_exception *rt6_ex; 1209 u32 hval; 1210 1211 if (!(*bucket) || !daddr) 1212 return NULL; 1213 1214 hval = rt6_exception_hash(daddr, saddr); 1215 *bucket += hval; 1216 1217 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) { 1218 struct rt6_info *rt6 = rt6_ex->rt6i; 1219 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); 1220 1221 #ifdef CONFIG_IPV6_SUBTREES 1222 if (matched && saddr) 1223 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); 1224 #endif 1225 if (matched) 1226 return rt6_ex; 1227 } 1228 return NULL; 1229 } 1230 1231 /* Helper function to find the cached rt in the hash table 1232 * and update bucket pointer to point to the bucket for this 1233 * (daddr, saddr) pair 1234 * Caller must hold rcu_read_lock() 1235 */ 1236 static struct rt6_exception * 1237 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, 1238 const struct in6_addr *daddr, 1239 const struct in6_addr *saddr) 1240 { 1241 struct rt6_exception *rt6_ex; 1242 u32 hval; 1243 1244 WARN_ON_ONCE(!rcu_read_lock_held()); 1245 1246 if (!(*bucket) || !daddr) 1247 return NULL; 1248 1249 hval = rt6_exception_hash(daddr, saddr); 1250 *bucket += hval; 1251 1252 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) { 1253 struct rt6_info *rt6 = rt6_ex->rt6i; 1254 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); 1255 1256 #ifdef CONFIG_IPV6_SUBTREES 1257 if (matched && saddr) 1258 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); 1259 #endif 1260 if (matched) 1261 return rt6_ex; 1262 } 1263 return NULL; 1264 } 1265 1266 static int rt6_insert_exception(struct rt6_info *nrt, 1267 struct rt6_info *ort) 1268 { 1269 struct net *net = dev_net(ort->dst.dev); 1270 struct rt6_exception_bucket *bucket; 1271 struct in6_addr *src_key = NULL; 1272 struct rt6_exception *rt6_ex; 1273 int err = 0; 1274 1275 /* ort can't be a cache or pcpu route */ 1276 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 1277 ort = ort->from; 1278 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); 1279 1280 spin_lock_bh(&rt6_exception_lock); 1281 1282 if (ort->exception_bucket_flushed) { 1283 err = -EINVAL; 1284 goto out; 1285 } 1286 1287 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket, 1288 lockdep_is_held(&rt6_exception_lock)); 1289 if (!bucket) { 1290 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), 1291 GFP_ATOMIC); 1292 if (!bucket) { 1293 err = -ENOMEM; 1294 goto out; 1295 } 1296 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket); 1297 } 1298 1299 #ifdef CONFIG_IPV6_SUBTREES 1300 /* rt6i_src.plen != 0 indicates ort is in subtree 1301 * and exception table is indexed by a hash of 1302 * both rt6i_dst and rt6i_src. 1303 * Otherwise, the exception table is indexed by 1304 * a hash of only rt6i_dst. 1305 */ 1306 if (ort->rt6i_src.plen) 1307 src_key = &nrt->rt6i_src.addr; 1308 #endif 1309 1310 /* Update rt6i_prefsrc as it could be changed 1311 * in rt6_remove_prefsrc() 1312 */ 1313 nrt->rt6i_prefsrc = ort->rt6i_prefsrc; 1314 /* rt6_mtu_change() might lower mtu on ort. 1315 * Only insert this exception route if its mtu 1316 * is less than ort's mtu value. 1317 */ 1318 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { 1319 err = -EINVAL; 1320 goto out; 1321 } 1322 1323 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr, 1324 src_key); 1325 if (rt6_ex) 1326 rt6_remove_exception(bucket, rt6_ex); 1327 1328 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC); 1329 if (!rt6_ex) { 1330 err = -ENOMEM; 1331 goto out; 1332 } 1333 rt6_ex->rt6i = nrt; 1334 rt6_ex->stamp = jiffies; 1335 atomic_inc(&nrt->rt6i_ref); 1336 nrt->rt6i_node = ort->rt6i_node; 1337 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); 1338 bucket->depth++; 1339 net->ipv6.rt6_stats->fib_rt_cache++; 1340 1341 if (bucket->depth > FIB6_MAX_DEPTH) 1342 rt6_exception_remove_oldest(bucket); 1343 1344 out: 1345 spin_unlock_bh(&rt6_exception_lock); 1346 1347 /* Update fn->fn_sernum to invalidate all cached dst */ 1348 if (!err) { 1349 spin_lock_bh(&ort->rt6i_table->tb6_lock); 1350 fib6_update_sernum(ort); 1351 spin_unlock_bh(&ort->rt6i_table->tb6_lock); 1352 fib6_force_start_gc(net); 1353 } 1354 1355 return err; 1356 } 1357 1358 void rt6_flush_exceptions(struct rt6_info *rt) 1359 { 1360 struct rt6_exception_bucket *bucket; 1361 struct rt6_exception *rt6_ex; 1362 struct hlist_node *tmp; 1363 int i; 1364 1365 spin_lock_bh(&rt6_exception_lock); 1366 /* Prevent rt6_insert_exception() to recreate the bucket list */ 1367 rt->exception_bucket_flushed = 1; 1368 1369 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1370 lockdep_is_held(&rt6_exception_lock)); 1371 if (!bucket) 1372 goto out; 1373 1374 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1375 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) 1376 rt6_remove_exception(bucket, rt6_ex); 1377 WARN_ON_ONCE(bucket->depth); 1378 bucket++; 1379 } 1380 1381 out: 1382 spin_unlock_bh(&rt6_exception_lock); 1383 } 1384 1385 /* Find cached rt in the hash table inside passed in rt 1386 * Caller has to hold rcu_read_lock() 1387 */ 1388 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, 1389 struct in6_addr *daddr, 1390 struct in6_addr *saddr) 1391 { 1392 struct rt6_exception_bucket *bucket; 1393 struct in6_addr *src_key = NULL; 1394 struct rt6_exception *rt6_ex; 1395 struct rt6_info *res = NULL; 1396 1397 bucket = rcu_dereference(rt->rt6i_exception_bucket); 1398 1399 #ifdef CONFIG_IPV6_SUBTREES 1400 /* rt6i_src.plen != 0 indicates rt is in subtree 1401 * and exception table is indexed by a hash of 1402 * both rt6i_dst and rt6i_src. 1403 * Otherwise, the exception table is indexed by 1404 * a hash of only rt6i_dst. 1405 */ 1406 if (rt->rt6i_src.plen) 1407 src_key = saddr; 1408 #endif 1409 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); 1410 1411 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) 1412 res = rt6_ex->rt6i; 1413 1414 return res; 1415 } 1416 1417 /* Remove the passed in cached rt from the hash table that contains it */ 1418 int rt6_remove_exception_rt(struct rt6_info *rt) 1419 { 1420 struct rt6_exception_bucket *bucket; 1421 struct rt6_info *from = rt->from; 1422 struct in6_addr *src_key = NULL; 1423 struct rt6_exception *rt6_ex; 1424 int err; 1425 1426 if (!from || 1427 !(rt->rt6i_flags & RTF_CACHE)) 1428 return -EINVAL; 1429 1430 if (!rcu_access_pointer(from->rt6i_exception_bucket)) 1431 return -ENOENT; 1432 1433 spin_lock_bh(&rt6_exception_lock); 1434 bucket = rcu_dereference_protected(from->rt6i_exception_bucket, 1435 lockdep_is_held(&rt6_exception_lock)); 1436 #ifdef CONFIG_IPV6_SUBTREES 1437 /* rt6i_src.plen != 0 indicates 'from' is in subtree 1438 * and exception table is indexed by a hash of 1439 * both rt6i_dst and rt6i_src. 1440 * Otherwise, the exception table is indexed by 1441 * a hash of only rt6i_dst. 1442 */ 1443 if (from->rt6i_src.plen) 1444 src_key = &rt->rt6i_src.addr; 1445 #endif 1446 rt6_ex = __rt6_find_exception_spinlock(&bucket, 1447 &rt->rt6i_dst.addr, 1448 src_key); 1449 if (rt6_ex) { 1450 rt6_remove_exception(bucket, rt6_ex); 1451 err = 0; 1452 } else { 1453 err = -ENOENT; 1454 } 1455 1456 spin_unlock_bh(&rt6_exception_lock); 1457 return err; 1458 } 1459 1460 /* Find rt6_ex which contains the passed in rt cache and 1461 * refresh its stamp 1462 */ 1463 static void rt6_update_exception_stamp_rt(struct rt6_info *rt) 1464 { 1465 struct rt6_exception_bucket *bucket; 1466 struct rt6_info *from = rt->from; 1467 struct in6_addr *src_key = NULL; 1468 struct rt6_exception *rt6_ex; 1469 1470 if (!from || 1471 !(rt->rt6i_flags & RTF_CACHE)) 1472 return; 1473 1474 rcu_read_lock(); 1475 bucket = rcu_dereference(from->rt6i_exception_bucket); 1476 1477 #ifdef CONFIG_IPV6_SUBTREES 1478 /* rt6i_src.plen != 0 indicates 'from' is in subtree 1479 * and exception table is indexed by a hash of 1480 * both rt6i_dst and rt6i_src. 1481 * Otherwise, the exception table is indexed by 1482 * a hash of only rt6i_dst. 1483 */ 1484 if (from->rt6i_src.plen) 1485 src_key = &rt->rt6i_src.addr; 1486 #endif 1487 rt6_ex = __rt6_find_exception_rcu(&bucket, 1488 &rt->rt6i_dst.addr, 1489 src_key); 1490 if (rt6_ex) 1491 rt6_ex->stamp = jiffies; 1492 1493 rcu_read_unlock(); 1494 } 1495 1496 static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) 1497 { 1498 struct rt6_exception_bucket *bucket; 1499 struct rt6_exception *rt6_ex; 1500 int i; 1501 1502 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1503 lockdep_is_held(&rt6_exception_lock)); 1504 1505 if (bucket) { 1506 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1507 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 1508 rt6_ex->rt6i->rt6i_prefsrc.plen = 0; 1509 } 1510 bucket++; 1511 } 1512 } 1513 } 1514 1515 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, 1516 struct rt6_info *rt, int mtu) 1517 { 1518 /* If the new MTU is lower than the route PMTU, this new MTU will be the 1519 * lowest MTU in the path: always allow updating the route PMTU to 1520 * reflect PMTU decreases. 1521 * 1522 * If the new MTU is higher, and the route PMTU is equal to the local 1523 * MTU, this means the old MTU is the lowest in the path, so allow 1524 * updating it: if other nodes now have lower MTUs, PMTU discovery will 1525 * handle this. 1526 */ 1527 1528 if (dst_mtu(&rt->dst) >= mtu) 1529 return true; 1530 1531 if (dst_mtu(&rt->dst) == idev->cnf.mtu6) 1532 return true; 1533 1534 return false; 1535 } 1536 1537 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, 1538 struct rt6_info *rt, int mtu) 1539 { 1540 struct rt6_exception_bucket *bucket; 1541 struct rt6_exception *rt6_ex; 1542 int i; 1543 1544 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1545 lockdep_is_held(&rt6_exception_lock)); 1546 1547 if (!bucket) 1548 return; 1549 1550 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1551 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 1552 struct rt6_info *entry = rt6_ex->rt6i; 1553 1554 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected 1555 * route), the metrics of its rt->dst.from have already 1556 * been updated. 1557 */ 1558 if (entry->rt6i_pmtu && 1559 rt6_mtu_change_route_allowed(idev, entry, mtu)) 1560 entry->rt6i_pmtu = mtu; 1561 } 1562 bucket++; 1563 } 1564 } 1565 1566 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 1567 1568 static void rt6_exceptions_clean_tohost(struct rt6_info *rt, 1569 struct in6_addr *gateway) 1570 { 1571 struct rt6_exception_bucket *bucket; 1572 struct rt6_exception *rt6_ex; 1573 struct hlist_node *tmp; 1574 int i; 1575 1576 if (!rcu_access_pointer(rt->rt6i_exception_bucket)) 1577 return; 1578 1579 spin_lock_bh(&rt6_exception_lock); 1580 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1581 lockdep_is_held(&rt6_exception_lock)); 1582 1583 if (bucket) { 1584 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1585 hlist_for_each_entry_safe(rt6_ex, tmp, 1586 &bucket->chain, hlist) { 1587 struct rt6_info *entry = rt6_ex->rt6i; 1588 1589 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) == 1590 RTF_CACHE_GATEWAY && 1591 ipv6_addr_equal(gateway, 1592 &entry->rt6i_gateway)) { 1593 rt6_remove_exception(bucket, rt6_ex); 1594 } 1595 } 1596 bucket++; 1597 } 1598 } 1599 1600 spin_unlock_bh(&rt6_exception_lock); 1601 } 1602 1603 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, 1604 struct rt6_exception *rt6_ex, 1605 struct fib6_gc_args *gc_args, 1606 unsigned long now) 1607 { 1608 struct rt6_info *rt = rt6_ex->rt6i; 1609 1610 /* we are pruning and obsoleting aged-out and non gateway exceptions 1611 * even if others have still references to them, so that on next 1612 * dst_check() such references can be dropped. 1613 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when 1614 * expired, independently from their aging, as per RFC 8201 section 4 1615 */ 1616 if (!(rt->rt6i_flags & RTF_EXPIRES)) { 1617 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { 1618 RT6_TRACE("aging clone %p\n", rt); 1619 rt6_remove_exception(bucket, rt6_ex); 1620 return; 1621 } 1622 } else if (time_after(jiffies, rt->dst.expires)) { 1623 RT6_TRACE("purging expired route %p\n", rt); 1624 rt6_remove_exception(bucket, rt6_ex); 1625 return; 1626 } 1627 1628 if (rt->rt6i_flags & RTF_GATEWAY) { 1629 struct neighbour *neigh; 1630 __u8 neigh_flags = 0; 1631 1632 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 1633 if (neigh) 1634 neigh_flags = neigh->flags; 1635 1636 if (!(neigh_flags & NTF_ROUTER)) { 1637 RT6_TRACE("purging route %p via non-router but gateway\n", 1638 rt); 1639 rt6_remove_exception(bucket, rt6_ex); 1640 return; 1641 } 1642 } 1643 1644 gc_args->more++; 1645 } 1646 1647 void rt6_age_exceptions(struct rt6_info *rt, 1648 struct fib6_gc_args *gc_args, 1649 unsigned long now) 1650 { 1651 struct rt6_exception_bucket *bucket; 1652 struct rt6_exception *rt6_ex; 1653 struct hlist_node *tmp; 1654 int i; 1655 1656 if (!rcu_access_pointer(rt->rt6i_exception_bucket)) 1657 return; 1658 1659 rcu_read_lock_bh(); 1660 spin_lock(&rt6_exception_lock); 1661 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1662 lockdep_is_held(&rt6_exception_lock)); 1663 1664 if (bucket) { 1665 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1666 hlist_for_each_entry_safe(rt6_ex, tmp, 1667 &bucket->chain, hlist) { 1668 rt6_age_examine_exception(bucket, rt6_ex, 1669 gc_args, now); 1670 } 1671 bucket++; 1672 } 1673 } 1674 spin_unlock(&rt6_exception_lock); 1675 rcu_read_unlock_bh(); 1676 } 1677 1678 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 1679 int oif, struct flowi6 *fl6, int flags) 1680 { 1681 struct fib6_node *fn, *saved_fn; 1682 struct rt6_info *rt, *rt_cache; 1683 int strict = 0; 1684 1685 strict |= flags & RT6_LOOKUP_F_IFACE; 1686 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; 1687 if (net->ipv6.devconf_all->forwarding == 0) 1688 strict |= RT6_LOOKUP_F_REACHABLE; 1689 1690 rcu_read_lock(); 1691 1692 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1693 saved_fn = fn; 1694 1695 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) 1696 oif = 0; 1697 1698 redo_rt6_select: 1699 rt = rt6_select(net, fn, oif, strict); 1700 if (rt->rt6i_nsiblings) 1701 rt = rt6_multipath_select(rt, fl6, oif, strict); 1702 if (rt == net->ipv6.ip6_null_entry) { 1703 fn = fib6_backtrack(fn, &fl6->saddr); 1704 if (fn) 1705 goto redo_rt6_select; 1706 else if (strict & RT6_LOOKUP_F_REACHABLE) { 1707 /* also consider unreachable route */ 1708 strict &= ~RT6_LOOKUP_F_REACHABLE; 1709 fn = saved_fn; 1710 goto redo_rt6_select; 1711 } 1712 } 1713 1714 /*Search through exception table */ 1715 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); 1716 if (rt_cache) 1717 rt = rt_cache; 1718 1719 if (rt == net->ipv6.ip6_null_entry) { 1720 rcu_read_unlock(); 1721 dst_hold(&rt->dst); 1722 trace_fib6_table_lookup(net, rt, table, fl6); 1723 return rt; 1724 } else if (rt->rt6i_flags & RTF_CACHE) { 1725 if (ip6_hold_safe(net, &rt, true)) { 1726 dst_use_noref(&rt->dst, jiffies); 1727 rt6_dst_from_metrics_check(rt); 1728 } 1729 rcu_read_unlock(); 1730 trace_fib6_table_lookup(net, rt, table, fl6); 1731 return rt; 1732 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1733 !(rt->rt6i_flags & RTF_GATEWAY))) { 1734 /* Create a RTF_CACHE clone which will not be 1735 * owned by the fib6 tree. It is for the special case where 1736 * the daddr in the skb during the neighbor look-up is different 1737 * from the fl6->daddr used to look-up route here. 1738 */ 1739 1740 struct rt6_info *uncached_rt; 1741 1742 if (ip6_hold_safe(net, &rt, true)) { 1743 dst_use_noref(&rt->dst, jiffies); 1744 } else { 1745 rcu_read_unlock(); 1746 uncached_rt = rt; 1747 goto uncached_rt_out; 1748 } 1749 rcu_read_unlock(); 1750 1751 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); 1752 dst_release(&rt->dst); 1753 1754 if (uncached_rt) { 1755 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() 1756 * No need for another dst_hold() 1757 */ 1758 rt6_uncached_list_add(uncached_rt); 1759 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 1760 } else { 1761 uncached_rt = net->ipv6.ip6_null_entry; 1762 dst_hold(&uncached_rt->dst); 1763 } 1764 1765 uncached_rt_out: 1766 trace_fib6_table_lookup(net, uncached_rt, table, fl6); 1767 return uncached_rt; 1768 1769 } else { 1770 /* Get a percpu copy */ 1771 1772 struct rt6_info *pcpu_rt; 1773 1774 dst_use_noref(&rt->dst, jiffies); 1775 local_bh_disable(); 1776 pcpu_rt = rt6_get_pcpu_route(rt); 1777 1778 if (!pcpu_rt) { 1779 /* atomic_inc_not_zero() is needed when using rcu */ 1780 if (atomic_inc_not_zero(&rt->rt6i_ref)) { 1781 /* No dst_hold() on rt is needed because grabbing 1782 * rt->rt6i_ref makes sure rt can't be released. 1783 */ 1784 pcpu_rt = rt6_make_pcpu_route(rt); 1785 rt6_release(rt); 1786 } else { 1787 /* rt is already removed from tree */ 1788 pcpu_rt = net->ipv6.ip6_null_entry; 1789 dst_hold(&pcpu_rt->dst); 1790 } 1791 } 1792 local_bh_enable(); 1793 rcu_read_unlock(); 1794 trace_fib6_table_lookup(net, pcpu_rt, table, fl6); 1795 return pcpu_rt; 1796 } 1797 } 1798 EXPORT_SYMBOL_GPL(ip6_pol_route); 1799 1800 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 1801 struct flowi6 *fl6, int flags) 1802 { 1803 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 1804 } 1805 1806 struct dst_entry *ip6_route_input_lookup(struct net *net, 1807 struct net_device *dev, 1808 struct flowi6 *fl6, int flags) 1809 { 1810 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 1811 flags |= RT6_LOOKUP_F_IFACE; 1812 1813 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 1814 } 1815 EXPORT_SYMBOL_GPL(ip6_route_input_lookup); 1816 1817 static void ip6_multipath_l3_keys(const struct sk_buff *skb, 1818 struct flow_keys *keys) 1819 { 1820 const struct ipv6hdr *outer_iph = ipv6_hdr(skb); 1821 const struct ipv6hdr *key_iph = outer_iph; 1822 const struct ipv6hdr *inner_iph; 1823 const struct icmp6hdr *icmph; 1824 struct ipv6hdr _inner_iph; 1825 1826 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6)) 1827 goto out; 1828 1829 icmph = icmp6_hdr(skb); 1830 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH && 1831 icmph->icmp6_type != ICMPV6_PKT_TOOBIG && 1832 icmph->icmp6_type != ICMPV6_TIME_EXCEED && 1833 icmph->icmp6_type != ICMPV6_PARAMPROB) 1834 goto out; 1835 1836 inner_iph = skb_header_pointer(skb, 1837 skb_transport_offset(skb) + sizeof(*icmph), 1838 sizeof(_inner_iph), &_inner_iph); 1839 if (!inner_iph) 1840 goto out; 1841 1842 key_iph = inner_iph; 1843 out: 1844 memset(keys, 0, sizeof(*keys)); 1845 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1846 keys->addrs.v6addrs.src = key_iph->saddr; 1847 keys->addrs.v6addrs.dst = key_iph->daddr; 1848 keys->tags.flow_label = ip6_flowinfo(key_iph); 1849 keys->basic.ip_proto = key_iph->nexthdr; 1850 } 1851 1852 /* if skb is set it will be used and fl6 can be NULL */ 1853 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb) 1854 { 1855 struct flow_keys hash_keys; 1856 1857 if (skb) { 1858 ip6_multipath_l3_keys(skb, &hash_keys); 1859 return flow_hash_from_keys(&hash_keys) >> 1; 1860 } 1861 1862 return get_hash_from_flowi6(fl6) >> 1; 1863 } 1864 1865 void ip6_route_input(struct sk_buff *skb) 1866 { 1867 const struct ipv6hdr *iph = ipv6_hdr(skb); 1868 struct net *net = dev_net(skb->dev); 1869 int flags = RT6_LOOKUP_F_HAS_SADDR; 1870 struct ip_tunnel_info *tun_info; 1871 struct flowi6 fl6 = { 1872 .flowi6_iif = skb->dev->ifindex, 1873 .daddr = iph->daddr, 1874 .saddr = iph->saddr, 1875 .flowlabel = ip6_flowinfo(iph), 1876 .flowi6_mark = skb->mark, 1877 .flowi6_proto = iph->nexthdr, 1878 }; 1879 1880 tun_info = skb_tunnel_info(skb); 1881 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 1882 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; 1883 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) 1884 fl6.mp_hash = rt6_multipath_hash(&fl6, skb); 1885 skb_dst_drop(skb); 1886 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 1887 } 1888 1889 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 1890 struct flowi6 *fl6, int flags) 1891 { 1892 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1893 } 1894 1895 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 1896 struct flowi6 *fl6, int flags) 1897 { 1898 bool any_src; 1899 1900 if (rt6_need_strict(&fl6->daddr)) { 1901 struct dst_entry *dst; 1902 1903 dst = l3mdev_link_scope_lookup(net, fl6); 1904 if (dst) 1905 return dst; 1906 } 1907 1908 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1909 1910 any_src = ipv6_addr_any(&fl6->saddr); 1911 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 1912 (fl6->flowi6_oif && any_src)) 1913 flags |= RT6_LOOKUP_F_IFACE; 1914 1915 if (!any_src) 1916 flags |= RT6_LOOKUP_F_HAS_SADDR; 1917 else if (sk) 1918 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1919 1920 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1921 } 1922 EXPORT_SYMBOL_GPL(ip6_route_output_flags); 1923 1924 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1925 { 1926 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 1927 struct net_device *loopback_dev = net->loopback_dev; 1928 struct dst_entry *new = NULL; 1929 1930 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, 1931 DST_OBSOLETE_DEAD, 0); 1932 if (rt) { 1933 rt6_info_init(rt); 1934 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); 1935 1936 new = &rt->dst; 1937 new->__use = 1; 1938 new->input = dst_discard; 1939 new->output = dst_discard_out; 1940 1941 dst_copy_metrics(new, &ort->dst); 1942 1943 rt->rt6i_idev = in6_dev_get(loopback_dev); 1944 rt->rt6i_gateway = ort->rt6i_gateway; 1945 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; 1946 rt->rt6i_metric = 0; 1947 1948 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1949 #ifdef CONFIG_IPV6_SUBTREES 1950 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1951 #endif 1952 } 1953 1954 dst_release(dst_orig); 1955 return new ? new : ERR_PTR(-ENOMEM); 1956 } 1957 1958 /* 1959 * Destination cache support functions 1960 */ 1961 1962 static void rt6_dst_from_metrics_check(struct rt6_info *rt) 1963 { 1964 if (rt->from && 1965 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) 1966 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true); 1967 } 1968 1969 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) 1970 { 1971 u32 rt_cookie = 0; 1972 1973 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) 1974 return NULL; 1975 1976 if (rt6_check_expired(rt)) 1977 return NULL; 1978 1979 return &rt->dst; 1980 } 1981 1982 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1983 { 1984 if (!__rt6_check_expired(rt) && 1985 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1986 rt6_check(rt->from, cookie)) 1987 return &rt->dst; 1988 else 1989 return NULL; 1990 } 1991 1992 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1993 { 1994 struct rt6_info *rt; 1995 1996 rt = (struct rt6_info *) dst; 1997 1998 /* All IPV6 dsts are created with ->obsolete set to the value 1999 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 2000 * into this function always. 2001 */ 2002 2003 rt6_dst_from_metrics_check(rt); 2004 2005 if (rt->rt6i_flags & RTF_PCPU || 2006 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) 2007 return rt6_dst_from_check(rt, cookie); 2008 else 2009 return rt6_check(rt, cookie); 2010 } 2011 2012 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 2013 { 2014 struct rt6_info *rt = (struct rt6_info *) dst; 2015 2016 if (rt) { 2017 if (rt->rt6i_flags & RTF_CACHE) { 2018 if (rt6_check_expired(rt)) { 2019 ip6_del_rt(rt); 2020 dst = NULL; 2021 } 2022 } else { 2023 dst_release(dst); 2024 dst = NULL; 2025 } 2026 } 2027 return dst; 2028 } 2029 2030 static void ip6_link_failure(struct sk_buff *skb) 2031 { 2032 struct rt6_info *rt; 2033 2034 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 2035 2036 rt = (struct rt6_info *) skb_dst(skb); 2037 if (rt) { 2038 if (rt->rt6i_flags & RTF_CACHE) { 2039 if (dst_hold_safe(&rt->dst)) 2040 ip6_del_rt(rt); 2041 } else { 2042 struct fib6_node *fn; 2043 2044 rcu_read_lock(); 2045 fn = rcu_dereference(rt->rt6i_node); 2046 if (fn && (rt->rt6i_flags & RTF_DEFAULT)) 2047 fn->fn_sernum = -1; 2048 rcu_read_unlock(); 2049 } 2050 } 2051 } 2052 2053 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) 2054 { 2055 struct net *net = dev_net(rt->dst.dev); 2056 2057 rt->rt6i_flags |= RTF_MODIFIED; 2058 rt->rt6i_pmtu = mtu; 2059 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 2060 } 2061 2062 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) 2063 { 2064 return !(rt->rt6i_flags & RTF_CACHE) && 2065 (rt->rt6i_flags & RTF_PCPU || 2066 rcu_access_pointer(rt->rt6i_node)); 2067 } 2068 2069 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 2070 const struct ipv6hdr *iph, u32 mtu) 2071 { 2072 const struct in6_addr *daddr, *saddr; 2073 struct rt6_info *rt6 = (struct rt6_info *)dst; 2074 2075 if (rt6->rt6i_flags & RTF_LOCAL) 2076 return; 2077 2078 if (dst_metric_locked(dst, RTAX_MTU)) 2079 return; 2080 2081 if (iph) { 2082 daddr = &iph->daddr; 2083 saddr = &iph->saddr; 2084 } else if (sk) { 2085 daddr = &sk->sk_v6_daddr; 2086 saddr = &inet6_sk(sk)->saddr; 2087 } else { 2088 daddr = NULL; 2089 saddr = NULL; 2090 } 2091 dst_confirm_neigh(dst, daddr); 2092 mtu = max_t(u32, mtu, IPV6_MIN_MTU); 2093 if (mtu >= dst_mtu(dst)) 2094 return; 2095 2096 if (!rt6_cache_allowed_for_pmtu(rt6)) { 2097 rt6_do_update_pmtu(rt6, mtu); 2098 /* update rt6_ex->stamp for cache */ 2099 if (rt6->rt6i_flags & RTF_CACHE) 2100 rt6_update_exception_stamp_rt(rt6); 2101 } else if (daddr) { 2102 struct rt6_info *nrt6; 2103 2104 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 2105 if (nrt6) { 2106 rt6_do_update_pmtu(nrt6, mtu); 2107 if (rt6_insert_exception(nrt6, rt6)) 2108 dst_release_immediate(&nrt6->dst); 2109 } 2110 } 2111 } 2112 2113 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 2114 struct sk_buff *skb, u32 mtu) 2115 { 2116 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); 2117 } 2118 2119 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 2120 int oif, u32 mark, kuid_t uid) 2121 { 2122 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 2123 struct dst_entry *dst; 2124 struct flowi6 fl6; 2125 2126 memset(&fl6, 0, sizeof(fl6)); 2127 fl6.flowi6_oif = oif; 2128 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); 2129 fl6.daddr = iph->daddr; 2130 fl6.saddr = iph->saddr; 2131 fl6.flowlabel = ip6_flowinfo(iph); 2132 fl6.flowi6_uid = uid; 2133 2134 dst = ip6_route_output(net, NULL, &fl6); 2135 if (!dst->error) 2136 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); 2137 dst_release(dst); 2138 } 2139 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 2140 2141 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 2142 { 2143 struct dst_entry *dst; 2144 2145 ip6_update_pmtu(skb, sock_net(sk), mtu, 2146 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); 2147 2148 dst = __sk_dst_get(sk); 2149 if (!dst || !dst->obsolete || 2150 dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) 2151 return; 2152 2153 bh_lock_sock(sk); 2154 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) 2155 ip6_datagram_dst_update(sk, false); 2156 bh_unlock_sock(sk); 2157 } 2158 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 2159 2160 /* Handle redirects */ 2161 struct ip6rd_flowi { 2162 struct flowi6 fl6; 2163 struct in6_addr gateway; 2164 }; 2165 2166 static struct rt6_info *__ip6_route_redirect(struct net *net, 2167 struct fib6_table *table, 2168 struct flowi6 *fl6, 2169 int flags) 2170 { 2171 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 2172 struct rt6_info *rt, *rt_cache; 2173 struct fib6_node *fn; 2174 2175 /* Get the "current" route for this destination and 2176 * check if the redirect has come from appropriate router. 2177 * 2178 * RFC 4861 specifies that redirects should only be 2179 * accepted if they come from the nexthop to the target. 2180 * Due to the way the routes are chosen, this notion 2181 * is a bit fuzzy and one might need to check all possible 2182 * routes. 2183 */ 2184 2185 rcu_read_lock(); 2186 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2187 restart: 2188 for_each_fib6_node_rt_rcu(fn) { 2189 if (rt->rt6i_nh_flags & RTNH_F_DEAD) 2190 continue; 2191 if (rt6_check_expired(rt)) 2192 continue; 2193 if (rt->dst.error) 2194 break; 2195 if (!(rt->rt6i_flags & RTF_GATEWAY)) 2196 continue; 2197 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 2198 continue; 2199 /* rt_cache's gateway might be different from its 'parent' 2200 * in the case of an ip redirect. 2201 * So we keep searching in the exception table if the gateway 2202 * is different. 2203 */ 2204 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { 2205 rt_cache = rt6_find_cached_rt(rt, 2206 &fl6->daddr, 2207 &fl6->saddr); 2208 if (rt_cache && 2209 ipv6_addr_equal(&rdfl->gateway, 2210 &rt_cache->rt6i_gateway)) { 2211 rt = rt_cache; 2212 break; 2213 } 2214 continue; 2215 } 2216 break; 2217 } 2218 2219 if (!rt) 2220 rt = net->ipv6.ip6_null_entry; 2221 else if (rt->dst.error) { 2222 rt = net->ipv6.ip6_null_entry; 2223 goto out; 2224 } 2225 2226 if (rt == net->ipv6.ip6_null_entry) { 2227 fn = fib6_backtrack(fn, &fl6->saddr); 2228 if (fn) 2229 goto restart; 2230 } 2231 2232 out: 2233 ip6_hold_safe(net, &rt, true); 2234 2235 rcu_read_unlock(); 2236 2237 trace_fib6_table_lookup(net, rt, table, fl6); 2238 return rt; 2239 }; 2240 2241 static struct dst_entry *ip6_route_redirect(struct net *net, 2242 const struct flowi6 *fl6, 2243 const struct in6_addr *gateway) 2244 { 2245 int flags = RT6_LOOKUP_F_HAS_SADDR; 2246 struct ip6rd_flowi rdfl; 2247 2248 rdfl.fl6 = *fl6; 2249 rdfl.gateway = *gateway; 2250 2251 return fib6_rule_lookup(net, &rdfl.fl6, 2252 flags, __ip6_route_redirect); 2253 } 2254 2255 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, 2256 kuid_t uid) 2257 { 2258 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 2259 struct dst_entry *dst; 2260 struct flowi6 fl6; 2261 2262 memset(&fl6, 0, sizeof(fl6)); 2263 fl6.flowi6_iif = LOOPBACK_IFINDEX; 2264 fl6.flowi6_oif = oif; 2265 fl6.flowi6_mark = mark; 2266 fl6.daddr = iph->daddr; 2267 fl6.saddr = iph->saddr; 2268 fl6.flowlabel = ip6_flowinfo(iph); 2269 fl6.flowi6_uid = uid; 2270 2271 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 2272 rt6_do_redirect(dst, NULL, skb); 2273 dst_release(dst); 2274 } 2275 EXPORT_SYMBOL_GPL(ip6_redirect); 2276 2277 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, 2278 u32 mark) 2279 { 2280 const struct ipv6hdr *iph = ipv6_hdr(skb); 2281 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 2282 struct dst_entry *dst; 2283 struct flowi6 fl6; 2284 2285 memset(&fl6, 0, sizeof(fl6)); 2286 fl6.flowi6_iif = LOOPBACK_IFINDEX; 2287 fl6.flowi6_oif = oif; 2288 fl6.flowi6_mark = mark; 2289 fl6.daddr = msg->dest; 2290 fl6.saddr = iph->daddr; 2291 fl6.flowi6_uid = sock_net_uid(net, NULL); 2292 2293 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 2294 rt6_do_redirect(dst, NULL, skb); 2295 dst_release(dst); 2296 } 2297 2298 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 2299 { 2300 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, 2301 sk->sk_uid); 2302 } 2303 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 2304 2305 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 2306 { 2307 struct net_device *dev = dst->dev; 2308 unsigned int mtu = dst_mtu(dst); 2309 struct net *net = dev_net(dev); 2310 2311 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 2312 2313 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 2314 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 2315 2316 /* 2317 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 2318 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 2319 * IPV6_MAXPLEN is also valid and means: "any MSS, 2320 * rely only on pmtu discovery" 2321 */ 2322 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 2323 mtu = IPV6_MAXPLEN; 2324 return mtu; 2325 } 2326 2327 static unsigned int ip6_mtu(const struct dst_entry *dst) 2328 { 2329 const struct rt6_info *rt = (const struct rt6_info *)dst; 2330 unsigned int mtu = rt->rt6i_pmtu; 2331 struct inet6_dev *idev; 2332 2333 if (mtu) 2334 goto out; 2335 2336 mtu = dst_metric_raw(dst, RTAX_MTU); 2337 if (mtu) 2338 goto out; 2339 2340 mtu = IPV6_MIN_MTU; 2341 2342 rcu_read_lock(); 2343 idev = __in6_dev_get(dst->dev); 2344 if (idev) 2345 mtu = idev->cnf.mtu6; 2346 rcu_read_unlock(); 2347 2348 out: 2349 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); 2350 2351 return mtu - lwtunnel_headroom(dst->lwtstate, mtu); 2352 } 2353 2354 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 2355 struct flowi6 *fl6) 2356 { 2357 struct dst_entry *dst; 2358 struct rt6_info *rt; 2359 struct inet6_dev *idev = in6_dev_get(dev); 2360 struct net *net = dev_net(dev); 2361 2362 if (unlikely(!idev)) 2363 return ERR_PTR(-ENODEV); 2364 2365 rt = ip6_dst_alloc(net, dev, 0); 2366 if (unlikely(!rt)) { 2367 in6_dev_put(idev); 2368 dst = ERR_PTR(-ENOMEM); 2369 goto out; 2370 } 2371 2372 rt->dst.flags |= DST_HOST; 2373 rt->dst.input = ip6_input; 2374 rt->dst.output = ip6_output; 2375 rt->rt6i_gateway = fl6->daddr; 2376 rt->rt6i_dst.addr = fl6->daddr; 2377 rt->rt6i_dst.plen = 128; 2378 rt->rt6i_idev = idev; 2379 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 2380 2381 /* Add this dst into uncached_list so that rt6_disable_ip() can 2382 * do proper release of the net_device 2383 */ 2384 rt6_uncached_list_add(rt); 2385 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 2386 2387 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 2388 2389 out: 2390 return dst; 2391 } 2392 2393 static int ip6_dst_gc(struct dst_ops *ops) 2394 { 2395 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 2396 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 2397 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 2398 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 2399 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 2400 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 2401 int entries; 2402 2403 entries = dst_entries_get_fast(ops); 2404 if (time_after(rt_last_gc + rt_min_interval, jiffies) && 2405 entries <= rt_max_size) 2406 goto out; 2407 2408 net->ipv6.ip6_rt_gc_expire++; 2409 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); 2410 entries = dst_entries_get_slow(ops); 2411 if (entries < ops->gc_thresh) 2412 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 2413 out: 2414 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 2415 return entries > rt_max_size; 2416 } 2417 2418 static int ip6_convert_metrics(struct mx6_config *mxc, 2419 const struct fib6_config *cfg) 2420 { 2421 struct net *net = cfg->fc_nlinfo.nl_net; 2422 bool ecn_ca = false; 2423 struct nlattr *nla; 2424 int remaining; 2425 u32 *mp; 2426 2427 if (!cfg->fc_mx) 2428 return 0; 2429 2430 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 2431 if (unlikely(!mp)) 2432 return -ENOMEM; 2433 2434 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 2435 int type = nla_type(nla); 2436 u32 val; 2437 2438 if (!type) 2439 continue; 2440 if (unlikely(type > RTAX_MAX)) 2441 goto err; 2442 2443 if (type == RTAX_CC_ALGO) { 2444 char tmp[TCP_CA_NAME_MAX]; 2445 2446 nla_strlcpy(tmp, nla, sizeof(tmp)); 2447 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); 2448 if (val == TCP_CA_UNSPEC) 2449 goto err; 2450 } else { 2451 val = nla_get_u32(nla); 2452 } 2453 if (type == RTAX_HOPLIMIT && val > 255) 2454 val = 255; 2455 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 2456 goto err; 2457 2458 mp[type - 1] = val; 2459 __set_bit(type - 1, mxc->mx_valid); 2460 } 2461 2462 if (ecn_ca) { 2463 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); 2464 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; 2465 } 2466 2467 mxc->mx = mp; 2468 return 0; 2469 err: 2470 kfree(mp); 2471 return -EINVAL; 2472 } 2473 2474 static struct rt6_info *ip6_nh_lookup_table(struct net *net, 2475 struct fib6_config *cfg, 2476 const struct in6_addr *gw_addr, 2477 u32 tbid, int flags) 2478 { 2479 struct flowi6 fl6 = { 2480 .flowi6_oif = cfg->fc_ifindex, 2481 .daddr = *gw_addr, 2482 .saddr = cfg->fc_prefsrc, 2483 }; 2484 struct fib6_table *table; 2485 struct rt6_info *rt; 2486 2487 table = fib6_get_table(net, tbid); 2488 if (!table) 2489 return NULL; 2490 2491 if (!ipv6_addr_any(&cfg->fc_prefsrc)) 2492 flags |= RT6_LOOKUP_F_HAS_SADDR; 2493 2494 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; 2495 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); 2496 2497 /* if table lookup failed, fall back to full lookup */ 2498 if (rt == net->ipv6.ip6_null_entry) { 2499 ip6_rt_put(rt); 2500 rt = NULL; 2501 } 2502 2503 return rt; 2504 } 2505 2506 static int ip6_route_check_nh_onlink(struct net *net, 2507 struct fib6_config *cfg, 2508 struct net_device *dev, 2509 struct netlink_ext_ack *extack) 2510 { 2511 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; 2512 const struct in6_addr *gw_addr = &cfg->fc_gateway; 2513 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; 2514 struct rt6_info *grt; 2515 int err; 2516 2517 err = 0; 2518 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); 2519 if (grt) { 2520 if (!grt->dst.error && 2521 (grt->rt6i_flags & flags || dev != grt->dst.dev)) { 2522 NL_SET_ERR_MSG(extack, 2523 "Nexthop has invalid gateway or device mismatch"); 2524 err = -EINVAL; 2525 } 2526 2527 ip6_rt_put(grt); 2528 } 2529 2530 return err; 2531 } 2532 2533 static int ip6_route_check_nh(struct net *net, 2534 struct fib6_config *cfg, 2535 struct net_device **_dev, 2536 struct inet6_dev **idev) 2537 { 2538 const struct in6_addr *gw_addr = &cfg->fc_gateway; 2539 struct net_device *dev = _dev ? *_dev : NULL; 2540 struct rt6_info *grt = NULL; 2541 int err = -EHOSTUNREACH; 2542 2543 if (cfg->fc_table) { 2544 int flags = RT6_LOOKUP_F_IFACE; 2545 2546 grt = ip6_nh_lookup_table(net, cfg, gw_addr, 2547 cfg->fc_table, flags); 2548 if (grt) { 2549 if (grt->rt6i_flags & RTF_GATEWAY || 2550 (dev && dev != grt->dst.dev)) { 2551 ip6_rt_put(grt); 2552 grt = NULL; 2553 } 2554 } 2555 } 2556 2557 if (!grt) 2558 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 2559 2560 if (!grt) 2561 goto out; 2562 2563 if (dev) { 2564 if (dev != grt->dst.dev) { 2565 ip6_rt_put(grt); 2566 goto out; 2567 } 2568 } else { 2569 *_dev = dev = grt->dst.dev; 2570 *idev = grt->rt6i_idev; 2571 dev_hold(dev); 2572 in6_dev_hold(grt->rt6i_idev); 2573 } 2574 2575 if (!(grt->rt6i_flags & RTF_GATEWAY)) 2576 err = 0; 2577 2578 ip6_rt_put(grt); 2579 2580 out: 2581 return err; 2582 } 2583 2584 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, 2585 struct netlink_ext_ack *extack) 2586 { 2587 struct net *net = cfg->fc_nlinfo.nl_net; 2588 struct rt6_info *rt = NULL; 2589 struct net_device *dev = NULL; 2590 struct inet6_dev *idev = NULL; 2591 struct fib6_table *table; 2592 int addr_type; 2593 int err = -EINVAL; 2594 2595 /* RTF_PCPU is an internal flag; can not be set by userspace */ 2596 if (cfg->fc_flags & RTF_PCPU) { 2597 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); 2598 goto out; 2599 } 2600 2601 /* RTF_CACHE is an internal flag; can not be set by userspace */ 2602 if (cfg->fc_flags & RTF_CACHE) { 2603 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); 2604 goto out; 2605 } 2606 2607 if (cfg->fc_dst_len > 128) { 2608 NL_SET_ERR_MSG(extack, "Invalid prefix length"); 2609 goto out; 2610 } 2611 if (cfg->fc_src_len > 128) { 2612 NL_SET_ERR_MSG(extack, "Invalid source address length"); 2613 goto out; 2614 } 2615 #ifndef CONFIG_IPV6_SUBTREES 2616 if (cfg->fc_src_len) { 2617 NL_SET_ERR_MSG(extack, 2618 "Specifying source address requires IPV6_SUBTREES to be enabled"); 2619 goto out; 2620 } 2621 #endif 2622 if (cfg->fc_ifindex) { 2623 err = -ENODEV; 2624 dev = dev_get_by_index(net, cfg->fc_ifindex); 2625 if (!dev) 2626 goto out; 2627 idev = in6_dev_get(dev); 2628 if (!idev) 2629 goto out; 2630 } 2631 2632 if (cfg->fc_metric == 0) 2633 cfg->fc_metric = IP6_RT_PRIO_USER; 2634 2635 if (cfg->fc_flags & RTNH_F_ONLINK) { 2636 if (!dev) { 2637 NL_SET_ERR_MSG(extack, 2638 "Nexthop device required for onlink"); 2639 err = -ENODEV; 2640 goto out; 2641 } 2642 2643 if (!(dev->flags & IFF_UP)) { 2644 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 2645 err = -ENETDOWN; 2646 goto out; 2647 } 2648 } 2649 2650 err = -ENOBUFS; 2651 if (cfg->fc_nlinfo.nlh && 2652 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 2653 table = fib6_get_table(net, cfg->fc_table); 2654 if (!table) { 2655 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 2656 table = fib6_new_table(net, cfg->fc_table); 2657 } 2658 } else { 2659 table = fib6_new_table(net, cfg->fc_table); 2660 } 2661 2662 if (!table) 2663 goto out; 2664 2665 rt = ip6_dst_alloc(net, NULL, 2666 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); 2667 2668 if (!rt) { 2669 err = -ENOMEM; 2670 goto out; 2671 } 2672 2673 if (cfg->fc_flags & RTF_EXPIRES) 2674 rt6_set_expires(rt, jiffies + 2675 clock_t_to_jiffies(cfg->fc_expires)); 2676 else 2677 rt6_clean_expires(rt); 2678 2679 if (cfg->fc_protocol == RTPROT_UNSPEC) 2680 cfg->fc_protocol = RTPROT_BOOT; 2681 rt->rt6i_protocol = cfg->fc_protocol; 2682 2683 addr_type = ipv6_addr_type(&cfg->fc_dst); 2684 2685 if (addr_type & IPV6_ADDR_MULTICAST) 2686 rt->dst.input = ip6_mc_input; 2687 else if (cfg->fc_flags & RTF_LOCAL) 2688 rt->dst.input = ip6_input; 2689 else 2690 rt->dst.input = ip6_forward; 2691 2692 rt->dst.output = ip6_output; 2693 2694 if (cfg->fc_encap) { 2695 struct lwtunnel_state *lwtstate; 2696 2697 err = lwtunnel_build_state(cfg->fc_encap_type, 2698 cfg->fc_encap, AF_INET6, cfg, 2699 &lwtstate, extack); 2700 if (err) 2701 goto out; 2702 rt->dst.lwtstate = lwtstate_get(lwtstate); 2703 if (lwtunnel_output_redirect(rt->dst.lwtstate)) { 2704 rt->dst.lwtstate->orig_output = rt->dst.output; 2705 rt->dst.output = lwtunnel_output; 2706 } 2707 if (lwtunnel_input_redirect(rt->dst.lwtstate)) { 2708 rt->dst.lwtstate->orig_input = rt->dst.input; 2709 rt->dst.input = lwtunnel_input; 2710 } 2711 } 2712 2713 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 2714 rt->rt6i_dst.plen = cfg->fc_dst_len; 2715 if (rt->rt6i_dst.plen == 128) 2716 rt->dst.flags |= DST_HOST; 2717 2718 #ifdef CONFIG_IPV6_SUBTREES 2719 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 2720 rt->rt6i_src.plen = cfg->fc_src_len; 2721 #endif 2722 2723 rt->rt6i_metric = cfg->fc_metric; 2724 rt->rt6i_nh_weight = 1; 2725 2726 /* We cannot add true routes via loopback here, 2727 they would result in kernel looping; promote them to reject routes 2728 */ 2729 if ((cfg->fc_flags & RTF_REJECT) || 2730 (dev && (dev->flags & IFF_LOOPBACK) && 2731 !(addr_type & IPV6_ADDR_LOOPBACK) && 2732 !(cfg->fc_flags & RTF_LOCAL))) { 2733 /* hold loopback dev/idev if we haven't done so. */ 2734 if (dev != net->loopback_dev) { 2735 if (dev) { 2736 dev_put(dev); 2737 in6_dev_put(idev); 2738 } 2739 dev = net->loopback_dev; 2740 dev_hold(dev); 2741 idev = in6_dev_get(dev); 2742 if (!idev) { 2743 err = -ENODEV; 2744 goto out; 2745 } 2746 } 2747 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 2748 switch (cfg->fc_type) { 2749 case RTN_BLACKHOLE: 2750 rt->dst.error = -EINVAL; 2751 rt->dst.output = dst_discard_out; 2752 rt->dst.input = dst_discard; 2753 break; 2754 case RTN_PROHIBIT: 2755 rt->dst.error = -EACCES; 2756 rt->dst.output = ip6_pkt_prohibit_out; 2757 rt->dst.input = ip6_pkt_prohibit; 2758 break; 2759 case RTN_THROW: 2760 case RTN_UNREACHABLE: 2761 default: 2762 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 2763 : (cfg->fc_type == RTN_UNREACHABLE) 2764 ? -EHOSTUNREACH : -ENETUNREACH; 2765 rt->dst.output = ip6_pkt_discard_out; 2766 rt->dst.input = ip6_pkt_discard; 2767 break; 2768 } 2769 goto install_route; 2770 } 2771 2772 if (cfg->fc_flags & RTF_GATEWAY) { 2773 const struct in6_addr *gw_addr; 2774 int gwa_type; 2775 2776 gw_addr = &cfg->fc_gateway; 2777 gwa_type = ipv6_addr_type(gw_addr); 2778 2779 /* if gw_addr is local we will fail to detect this in case 2780 * address is still TENTATIVE (DAD in progress). rt6_lookup() 2781 * will return already-added prefix route via interface that 2782 * prefix route was assigned to, which might be non-loopback. 2783 */ 2784 err = -EINVAL; 2785 if (ipv6_chk_addr_and_flags(net, gw_addr, 2786 gwa_type & IPV6_ADDR_LINKLOCAL ? 2787 dev : NULL, 0, 0)) { 2788 NL_SET_ERR_MSG(extack, "Invalid gateway address"); 2789 goto out; 2790 } 2791 rt->rt6i_gateway = *gw_addr; 2792 2793 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 2794 /* IPv6 strictly inhibits using not link-local 2795 addresses as nexthop address. 2796 Otherwise, router will not able to send redirects. 2797 It is very good, but in some (rare!) circumstances 2798 (SIT, PtP, NBMA NOARP links) it is handy to allow 2799 some exceptions. --ANK 2800 We allow IPv4-mapped nexthops to support RFC4798-type 2801 addressing 2802 */ 2803 if (!(gwa_type & (IPV6_ADDR_UNICAST | 2804 IPV6_ADDR_MAPPED))) { 2805 NL_SET_ERR_MSG(extack, 2806 "Invalid gateway address"); 2807 goto out; 2808 } 2809 2810 if (cfg->fc_flags & RTNH_F_ONLINK) { 2811 err = ip6_route_check_nh_onlink(net, cfg, dev, 2812 extack); 2813 } else { 2814 err = ip6_route_check_nh(net, cfg, &dev, &idev); 2815 } 2816 if (err) 2817 goto out; 2818 } 2819 err = -EINVAL; 2820 if (!dev) { 2821 NL_SET_ERR_MSG(extack, "Egress device not specified"); 2822 goto out; 2823 } else if (dev->flags & IFF_LOOPBACK) { 2824 NL_SET_ERR_MSG(extack, 2825 "Egress device can not be loopback device for this route"); 2826 goto out; 2827 } 2828 } 2829 2830 err = -ENODEV; 2831 if (!dev) 2832 goto out; 2833 2834 if (!(dev->flags & IFF_UP)) { 2835 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 2836 err = -ENETDOWN; 2837 goto out; 2838 } 2839 2840 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 2841 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 2842 NL_SET_ERR_MSG(extack, "Invalid source address"); 2843 err = -EINVAL; 2844 goto out; 2845 } 2846 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 2847 rt->rt6i_prefsrc.plen = 128; 2848 } else 2849 rt->rt6i_prefsrc.plen = 0; 2850 2851 rt->rt6i_flags = cfg->fc_flags; 2852 2853 install_route: 2854 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && 2855 !netif_carrier_ok(dev)) 2856 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; 2857 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); 2858 rt->dst.dev = dev; 2859 rt->rt6i_idev = idev; 2860 rt->rt6i_table = table; 2861 2862 cfg->fc_nlinfo.nl_net = dev_net(dev); 2863 2864 return rt; 2865 out: 2866 if (dev) 2867 dev_put(dev); 2868 if (idev) 2869 in6_dev_put(idev); 2870 if (rt) 2871 dst_release_immediate(&rt->dst); 2872 2873 return ERR_PTR(err); 2874 } 2875 2876 int ip6_route_add(struct fib6_config *cfg, 2877 struct netlink_ext_ack *extack) 2878 { 2879 struct mx6_config mxc = { .mx = NULL, }; 2880 struct rt6_info *rt; 2881 int err; 2882 2883 rt = ip6_route_info_create(cfg, extack); 2884 if (IS_ERR(rt)) { 2885 err = PTR_ERR(rt); 2886 rt = NULL; 2887 goto out; 2888 } 2889 2890 err = ip6_convert_metrics(&mxc, cfg); 2891 if (err) 2892 goto out; 2893 2894 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); 2895 2896 kfree(mxc.mx); 2897 2898 return err; 2899 out: 2900 if (rt) 2901 dst_release_immediate(&rt->dst); 2902 2903 return err; 2904 } 2905 2906 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 2907 { 2908 int err; 2909 struct fib6_table *table; 2910 struct net *net = dev_net(rt->dst.dev); 2911 2912 if (rt == net->ipv6.ip6_null_entry) { 2913 err = -ENOENT; 2914 goto out; 2915 } 2916 2917 table = rt->rt6i_table; 2918 spin_lock_bh(&table->tb6_lock); 2919 err = fib6_del(rt, info); 2920 spin_unlock_bh(&table->tb6_lock); 2921 2922 out: 2923 ip6_rt_put(rt); 2924 return err; 2925 } 2926 2927 int ip6_del_rt(struct rt6_info *rt) 2928 { 2929 struct nl_info info = { 2930 .nl_net = dev_net(rt->dst.dev), 2931 }; 2932 return __ip6_del_rt(rt, &info); 2933 } 2934 2935 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) 2936 { 2937 struct nl_info *info = &cfg->fc_nlinfo; 2938 struct net *net = info->nl_net; 2939 struct sk_buff *skb = NULL; 2940 struct fib6_table *table; 2941 int err = -ENOENT; 2942 2943 if (rt == net->ipv6.ip6_null_entry) 2944 goto out_put; 2945 table = rt->rt6i_table; 2946 spin_lock_bh(&table->tb6_lock); 2947 2948 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { 2949 struct rt6_info *sibling, *next_sibling; 2950 2951 /* prefer to send a single notification with all hops */ 2952 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 2953 if (skb) { 2954 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2955 2956 if (rt6_fill_node(net, skb, rt, 2957 NULL, NULL, 0, RTM_DELROUTE, 2958 info->portid, seq, 0) < 0) { 2959 kfree_skb(skb); 2960 skb = NULL; 2961 } else 2962 info->skip_notify = 1; 2963 } 2964 2965 list_for_each_entry_safe(sibling, next_sibling, 2966 &rt->rt6i_siblings, 2967 rt6i_siblings) { 2968 err = fib6_del(sibling, info); 2969 if (err) 2970 goto out_unlock; 2971 } 2972 } 2973 2974 err = fib6_del(rt, info); 2975 out_unlock: 2976 spin_unlock_bh(&table->tb6_lock); 2977 out_put: 2978 ip6_rt_put(rt); 2979 2980 if (skb) { 2981 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 2982 info->nlh, gfp_any()); 2983 } 2984 return err; 2985 } 2986 2987 static int ip6_route_del(struct fib6_config *cfg, 2988 struct netlink_ext_ack *extack) 2989 { 2990 struct rt6_info *rt, *rt_cache; 2991 struct fib6_table *table; 2992 struct fib6_node *fn; 2993 int err = -ESRCH; 2994 2995 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 2996 if (!table) { 2997 NL_SET_ERR_MSG(extack, "FIB table does not exist"); 2998 return err; 2999 } 3000 3001 rcu_read_lock(); 3002 3003 fn = fib6_locate(&table->tb6_root, 3004 &cfg->fc_dst, cfg->fc_dst_len, 3005 &cfg->fc_src, cfg->fc_src_len, 3006 !(cfg->fc_flags & RTF_CACHE)); 3007 3008 if (fn) { 3009 for_each_fib6_node_rt_rcu(fn) { 3010 if (cfg->fc_flags & RTF_CACHE) { 3011 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, 3012 &cfg->fc_src); 3013 if (!rt_cache) 3014 continue; 3015 rt = rt_cache; 3016 } 3017 if (cfg->fc_ifindex && 3018 (!rt->dst.dev || 3019 rt->dst.dev->ifindex != cfg->fc_ifindex)) 3020 continue; 3021 if (cfg->fc_flags & RTF_GATEWAY && 3022 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 3023 continue; 3024 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 3025 continue; 3026 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) 3027 continue; 3028 if (!dst_hold_safe(&rt->dst)) 3029 break; 3030 rcu_read_unlock(); 3031 3032 /* if gateway was specified only delete the one hop */ 3033 if (cfg->fc_flags & RTF_GATEWAY) 3034 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 3035 3036 return __ip6_del_rt_siblings(rt, cfg); 3037 } 3038 } 3039 rcu_read_unlock(); 3040 3041 return err; 3042 } 3043 3044 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 3045 { 3046 struct netevent_redirect netevent; 3047 struct rt6_info *rt, *nrt = NULL; 3048 struct ndisc_options ndopts; 3049 struct inet6_dev *in6_dev; 3050 struct neighbour *neigh; 3051 struct rd_msg *msg; 3052 int optlen, on_link; 3053 u8 *lladdr; 3054 3055 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 3056 optlen -= sizeof(*msg); 3057 3058 if (optlen < 0) { 3059 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 3060 return; 3061 } 3062 3063 msg = (struct rd_msg *)icmp6_hdr(skb); 3064 3065 if (ipv6_addr_is_multicast(&msg->dest)) { 3066 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 3067 return; 3068 } 3069 3070 on_link = 0; 3071 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 3072 on_link = 1; 3073 } else if (ipv6_addr_type(&msg->target) != 3074 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 3075 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 3076 return; 3077 } 3078 3079 in6_dev = __in6_dev_get(skb->dev); 3080 if (!in6_dev) 3081 return; 3082 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 3083 return; 3084 3085 /* RFC2461 8.1: 3086 * The IP source address of the Redirect MUST be the same as the current 3087 * first-hop router for the specified ICMP Destination Address. 3088 */ 3089 3090 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) { 3091 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 3092 return; 3093 } 3094 3095 lladdr = NULL; 3096 if (ndopts.nd_opts_tgt_lladdr) { 3097 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 3098 skb->dev); 3099 if (!lladdr) { 3100 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 3101 return; 3102 } 3103 } 3104 3105 rt = (struct rt6_info *) dst; 3106 if (rt->rt6i_flags & RTF_REJECT) { 3107 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 3108 return; 3109 } 3110 3111 /* Redirect received -> path was valid. 3112 * Look, redirects are sent only in response to data packets, 3113 * so that this nexthop apparently is reachable. --ANK 3114 */ 3115 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr); 3116 3117 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 3118 if (!neigh) 3119 return; 3120 3121 /* 3122 * We have finally decided to accept it. 3123 */ 3124 3125 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, 3126 NEIGH_UPDATE_F_WEAK_OVERRIDE| 3127 NEIGH_UPDATE_F_OVERRIDE| 3128 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 3129 NEIGH_UPDATE_F_ISROUTER)), 3130 NDISC_REDIRECT, &ndopts); 3131 3132 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); 3133 if (!nrt) 3134 goto out; 3135 3136 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 3137 if (on_link) 3138 nrt->rt6i_flags &= ~RTF_GATEWAY; 3139 3140 nrt->rt6i_protocol = RTPROT_REDIRECT; 3141 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 3142 3143 /* No need to remove rt from the exception table if rt is 3144 * a cached route because rt6_insert_exception() will 3145 * takes care of it 3146 */ 3147 if (rt6_insert_exception(nrt, rt)) { 3148 dst_release_immediate(&nrt->dst); 3149 goto out; 3150 } 3151 3152 netevent.old = &rt->dst; 3153 netevent.new = &nrt->dst; 3154 netevent.daddr = &msg->dest; 3155 netevent.neigh = neigh; 3156 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 3157 3158 out: 3159 neigh_release(neigh); 3160 } 3161 3162 /* 3163 * Misc support functions 3164 */ 3165 3166 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) 3167 { 3168 BUG_ON(from->from); 3169 3170 rt->rt6i_flags &= ~RTF_EXPIRES; 3171 dst_hold(&from->dst); 3172 rt->from = from; 3173 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); 3174 } 3175 3176 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) 3177 { 3178 rt->dst.input = ort->dst.input; 3179 rt->dst.output = ort->dst.output; 3180 rt->rt6i_dst = ort->rt6i_dst; 3181 rt->dst.error = ort->dst.error; 3182 rt->rt6i_idev = ort->rt6i_idev; 3183 if (rt->rt6i_idev) 3184 in6_dev_hold(rt->rt6i_idev); 3185 rt->dst.lastuse = jiffies; 3186 rt->rt6i_gateway = ort->rt6i_gateway; 3187 rt->rt6i_flags = ort->rt6i_flags; 3188 rt6_set_from(rt, ort); 3189 rt->rt6i_metric = ort->rt6i_metric; 3190 #ifdef CONFIG_IPV6_SUBTREES 3191 rt->rt6i_src = ort->rt6i_src; 3192 #endif 3193 rt->rt6i_prefsrc = ort->rt6i_prefsrc; 3194 rt->rt6i_table = ort->rt6i_table; 3195 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); 3196 } 3197 3198 #ifdef CONFIG_IPV6_ROUTE_INFO 3199 static struct rt6_info *rt6_get_route_info(struct net *net, 3200 const struct in6_addr *prefix, int prefixlen, 3201 const struct in6_addr *gwaddr, 3202 struct net_device *dev) 3203 { 3204 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; 3205 int ifindex = dev->ifindex; 3206 struct fib6_node *fn; 3207 struct rt6_info *rt = NULL; 3208 struct fib6_table *table; 3209 3210 table = fib6_get_table(net, tb_id); 3211 if (!table) 3212 return NULL; 3213 3214 rcu_read_lock(); 3215 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true); 3216 if (!fn) 3217 goto out; 3218 3219 for_each_fib6_node_rt_rcu(fn) { 3220 if (rt->dst.dev->ifindex != ifindex) 3221 continue; 3222 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 3223 continue; 3224 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 3225 continue; 3226 ip6_hold_safe(NULL, &rt, false); 3227 break; 3228 } 3229 out: 3230 rcu_read_unlock(); 3231 return rt; 3232 } 3233 3234 static struct rt6_info *rt6_add_route_info(struct net *net, 3235 const struct in6_addr *prefix, int prefixlen, 3236 const struct in6_addr *gwaddr, 3237 struct net_device *dev, 3238 unsigned int pref) 3239 { 3240 struct fib6_config cfg = { 3241 .fc_metric = IP6_RT_PRIO_USER, 3242 .fc_ifindex = dev->ifindex, 3243 .fc_dst_len = prefixlen, 3244 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 3245 RTF_UP | RTF_PREF(pref), 3246 .fc_protocol = RTPROT_RA, 3247 .fc_nlinfo.portid = 0, 3248 .fc_nlinfo.nlh = NULL, 3249 .fc_nlinfo.nl_net = net, 3250 }; 3251 3252 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, 3253 cfg.fc_dst = *prefix; 3254 cfg.fc_gateway = *gwaddr; 3255 3256 /* We should treat it as a default route if prefix length is 0. */ 3257 if (!prefixlen) 3258 cfg.fc_flags |= RTF_DEFAULT; 3259 3260 ip6_route_add(&cfg, NULL); 3261 3262 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); 3263 } 3264 #endif 3265 3266 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 3267 { 3268 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; 3269 struct rt6_info *rt; 3270 struct fib6_table *table; 3271 3272 table = fib6_get_table(dev_net(dev), tb_id); 3273 if (!table) 3274 return NULL; 3275 3276 rcu_read_lock(); 3277 for_each_fib6_node_rt_rcu(&table->tb6_root) { 3278 if (dev == rt->dst.dev && 3279 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 3280 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 3281 break; 3282 } 3283 if (rt) 3284 ip6_hold_safe(NULL, &rt, false); 3285 rcu_read_unlock(); 3286 return rt; 3287 } 3288 3289 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 3290 struct net_device *dev, 3291 unsigned int pref) 3292 { 3293 struct fib6_config cfg = { 3294 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, 3295 .fc_metric = IP6_RT_PRIO_USER, 3296 .fc_ifindex = dev->ifindex, 3297 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 3298 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 3299 .fc_protocol = RTPROT_RA, 3300 .fc_nlinfo.portid = 0, 3301 .fc_nlinfo.nlh = NULL, 3302 .fc_nlinfo.nl_net = dev_net(dev), 3303 }; 3304 3305 cfg.fc_gateway = *gwaddr; 3306 3307 if (!ip6_route_add(&cfg, NULL)) { 3308 struct fib6_table *table; 3309 3310 table = fib6_get_table(dev_net(dev), cfg.fc_table); 3311 if (table) 3312 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; 3313 } 3314 3315 return rt6_get_dflt_router(gwaddr, dev); 3316 } 3317 3318 static void __rt6_purge_dflt_routers(struct fib6_table *table) 3319 { 3320 struct rt6_info *rt; 3321 3322 restart: 3323 rcu_read_lock(); 3324 for_each_fib6_node_rt_rcu(&table->tb6_root) { 3325 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 3326 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 3327 if (dst_hold_safe(&rt->dst)) { 3328 rcu_read_unlock(); 3329 ip6_del_rt(rt); 3330 } else { 3331 rcu_read_unlock(); 3332 } 3333 goto restart; 3334 } 3335 } 3336 rcu_read_unlock(); 3337 3338 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; 3339 } 3340 3341 void rt6_purge_dflt_routers(struct net *net) 3342 { 3343 struct fib6_table *table; 3344 struct hlist_head *head; 3345 unsigned int h; 3346 3347 rcu_read_lock(); 3348 3349 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 3350 head = &net->ipv6.fib_table_hash[h]; 3351 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 3352 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) 3353 __rt6_purge_dflt_routers(table); 3354 } 3355 } 3356 3357 rcu_read_unlock(); 3358 } 3359 3360 static void rtmsg_to_fib6_config(struct net *net, 3361 struct in6_rtmsg *rtmsg, 3362 struct fib6_config *cfg) 3363 { 3364 memset(cfg, 0, sizeof(*cfg)); 3365 3366 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? 3367 : RT6_TABLE_MAIN; 3368 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 3369 cfg->fc_metric = rtmsg->rtmsg_metric; 3370 cfg->fc_expires = rtmsg->rtmsg_info; 3371 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 3372 cfg->fc_src_len = rtmsg->rtmsg_src_len; 3373 cfg->fc_flags = rtmsg->rtmsg_flags; 3374 3375 cfg->fc_nlinfo.nl_net = net; 3376 3377 cfg->fc_dst = rtmsg->rtmsg_dst; 3378 cfg->fc_src = rtmsg->rtmsg_src; 3379 cfg->fc_gateway = rtmsg->rtmsg_gateway; 3380 } 3381 3382 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 3383 { 3384 struct fib6_config cfg; 3385 struct in6_rtmsg rtmsg; 3386 int err; 3387 3388 switch (cmd) { 3389 case SIOCADDRT: /* Add a route */ 3390 case SIOCDELRT: /* Delete a route */ 3391 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 3392 return -EPERM; 3393 err = copy_from_user(&rtmsg, arg, 3394 sizeof(struct in6_rtmsg)); 3395 if (err) 3396 return -EFAULT; 3397 3398 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 3399 3400 rtnl_lock(); 3401 switch (cmd) { 3402 case SIOCADDRT: 3403 err = ip6_route_add(&cfg, NULL); 3404 break; 3405 case SIOCDELRT: 3406 err = ip6_route_del(&cfg, NULL); 3407 break; 3408 default: 3409 err = -EINVAL; 3410 } 3411 rtnl_unlock(); 3412 3413 return err; 3414 } 3415 3416 return -EINVAL; 3417 } 3418 3419 /* 3420 * Drop the packet on the floor 3421 */ 3422 3423 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 3424 { 3425 int type; 3426 struct dst_entry *dst = skb_dst(skb); 3427 switch (ipstats_mib_noroutes) { 3428 case IPSTATS_MIB_INNOROUTES: 3429 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 3430 if (type == IPV6_ADDR_ANY) { 3431 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 3432 IPSTATS_MIB_INADDRERRORS); 3433 break; 3434 } 3435 /* FALLTHROUGH */ 3436 case IPSTATS_MIB_OUTNOROUTES: 3437 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 3438 ipstats_mib_noroutes); 3439 break; 3440 } 3441 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 3442 kfree_skb(skb); 3443 return 0; 3444 } 3445 3446 static int ip6_pkt_discard(struct sk_buff *skb) 3447 { 3448 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 3449 } 3450 3451 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) 3452 { 3453 skb->dev = skb_dst(skb)->dev; 3454 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 3455 } 3456 3457 static int ip6_pkt_prohibit(struct sk_buff *skb) 3458 { 3459 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 3460 } 3461 3462 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb) 3463 { 3464 skb->dev = skb_dst(skb)->dev; 3465 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 3466 } 3467 3468 /* 3469 * Allocate a dst for local (unicast / anycast) address. 3470 */ 3471 3472 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 3473 const struct in6_addr *addr, 3474 bool anycast) 3475 { 3476 u32 tb_id; 3477 struct net *net = dev_net(idev->dev); 3478 struct net_device *dev = idev->dev; 3479 struct rt6_info *rt; 3480 3481 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); 3482 if (!rt) 3483 return ERR_PTR(-ENOMEM); 3484 3485 in6_dev_hold(idev); 3486 3487 rt->dst.flags |= DST_HOST; 3488 rt->dst.input = ip6_input; 3489 rt->dst.output = ip6_output; 3490 rt->rt6i_idev = idev; 3491 3492 rt->rt6i_protocol = RTPROT_KERNEL; 3493 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 3494 if (anycast) 3495 rt->rt6i_flags |= RTF_ANYCAST; 3496 else 3497 rt->rt6i_flags |= RTF_LOCAL; 3498 3499 rt->rt6i_gateway = *addr; 3500 rt->rt6i_dst.addr = *addr; 3501 rt->rt6i_dst.plen = 128; 3502 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; 3503 rt->rt6i_table = fib6_get_table(net, tb_id); 3504 3505 return rt; 3506 } 3507 3508 /* remove deleted ip from prefsrc entries */ 3509 struct arg_dev_net_ip { 3510 struct net_device *dev; 3511 struct net *net; 3512 struct in6_addr *addr; 3513 }; 3514 3515 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 3516 { 3517 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 3518 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 3519 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 3520 3521 if (((void *)rt->dst.dev == dev || !dev) && 3522 rt != net->ipv6.ip6_null_entry && 3523 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 3524 spin_lock_bh(&rt6_exception_lock); 3525 /* remove prefsrc entry */ 3526 rt->rt6i_prefsrc.plen = 0; 3527 /* need to update cache as well */ 3528 rt6_exceptions_remove_prefsrc(rt); 3529 spin_unlock_bh(&rt6_exception_lock); 3530 } 3531 return 0; 3532 } 3533 3534 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 3535 { 3536 struct net *net = dev_net(ifp->idev->dev); 3537 struct arg_dev_net_ip adni = { 3538 .dev = ifp->idev->dev, 3539 .net = net, 3540 .addr = &ifp->addr, 3541 }; 3542 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 3543 } 3544 3545 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 3546 3547 /* Remove routers and update dst entries when gateway turn into host. */ 3548 static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 3549 { 3550 struct in6_addr *gateway = (struct in6_addr *)arg; 3551 3552 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && 3553 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { 3554 return -1; 3555 } 3556 3557 /* Further clean up cached routes in exception table. 3558 * This is needed because cached route may have a different 3559 * gateway than its 'parent' in the case of an ip redirect. 3560 */ 3561 rt6_exceptions_clean_tohost(rt, gateway); 3562 3563 return 0; 3564 } 3565 3566 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 3567 { 3568 fib6_clean_all(net, fib6_clean_tohost, gateway); 3569 } 3570 3571 struct arg_netdev_event { 3572 const struct net_device *dev; 3573 union { 3574 unsigned int nh_flags; 3575 unsigned long event; 3576 }; 3577 }; 3578 3579 static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) 3580 { 3581 struct rt6_info *iter; 3582 struct fib6_node *fn; 3583 3584 fn = rcu_dereference_protected(rt->rt6i_node, 3585 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 3586 iter = rcu_dereference_protected(fn->leaf, 3587 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 3588 while (iter) { 3589 if (iter->rt6i_metric == rt->rt6i_metric && 3590 rt6_qualify_for_ecmp(iter)) 3591 return iter; 3592 iter = rcu_dereference_protected(iter->rt6_next, 3593 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 3594 } 3595 3596 return NULL; 3597 } 3598 3599 static bool rt6_is_dead(const struct rt6_info *rt) 3600 { 3601 if (rt->rt6i_nh_flags & RTNH_F_DEAD || 3602 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && 3603 rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) 3604 return true; 3605 3606 return false; 3607 } 3608 3609 static int rt6_multipath_total_weight(const struct rt6_info *rt) 3610 { 3611 struct rt6_info *iter; 3612 int total = 0; 3613 3614 if (!rt6_is_dead(rt)) 3615 total += rt->rt6i_nh_weight; 3616 3617 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { 3618 if (!rt6_is_dead(iter)) 3619 total += iter->rt6i_nh_weight; 3620 } 3621 3622 return total; 3623 } 3624 3625 static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) 3626 { 3627 int upper_bound = -1; 3628 3629 if (!rt6_is_dead(rt)) { 3630 *weight += rt->rt6i_nh_weight; 3631 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, 3632 total) - 1; 3633 } 3634 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); 3635 } 3636 3637 static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) 3638 { 3639 struct rt6_info *iter; 3640 int weight = 0; 3641 3642 rt6_upper_bound_set(rt, &weight, total); 3643 3644 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3645 rt6_upper_bound_set(iter, &weight, total); 3646 } 3647 3648 void rt6_multipath_rebalance(struct rt6_info *rt) 3649 { 3650 struct rt6_info *first; 3651 int total; 3652 3653 /* In case the entire multipath route was marked for flushing, 3654 * then there is no need to rebalance upon the removal of every 3655 * sibling route. 3656 */ 3657 if (!rt->rt6i_nsiblings || rt->should_flush) 3658 return; 3659 3660 /* During lookup routes are evaluated in order, so we need to 3661 * make sure upper bounds are assigned from the first sibling 3662 * onwards. 3663 */ 3664 first = rt6_multipath_first_sibling(rt); 3665 if (WARN_ON_ONCE(!first)) 3666 return; 3667 3668 total = rt6_multipath_total_weight(first); 3669 rt6_multipath_upper_bound_set(first, total); 3670 } 3671 3672 static int fib6_ifup(struct rt6_info *rt, void *p_arg) 3673 { 3674 const struct arg_netdev_event *arg = p_arg; 3675 const struct net *net = dev_net(arg->dev); 3676 3677 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { 3678 rt->rt6i_nh_flags &= ~arg->nh_flags; 3679 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); 3680 rt6_multipath_rebalance(rt); 3681 } 3682 3683 return 0; 3684 } 3685 3686 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) 3687 { 3688 struct arg_netdev_event arg = { 3689 .dev = dev, 3690 { 3691 .nh_flags = nh_flags, 3692 }, 3693 }; 3694 3695 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev)) 3696 arg.nh_flags |= RTNH_F_LINKDOWN; 3697 3698 fib6_clean_all(dev_net(dev), fib6_ifup, &arg); 3699 } 3700 3701 static bool rt6_multipath_uses_dev(const struct rt6_info *rt, 3702 const struct net_device *dev) 3703 { 3704 struct rt6_info *iter; 3705 3706 if (rt->dst.dev == dev) 3707 return true; 3708 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3709 if (iter->dst.dev == dev) 3710 return true; 3711 3712 return false; 3713 } 3714 3715 static void rt6_multipath_flush(struct rt6_info *rt) 3716 { 3717 struct rt6_info *iter; 3718 3719 rt->should_flush = 1; 3720 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3721 iter->should_flush = 1; 3722 } 3723 3724 static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, 3725 const struct net_device *down_dev) 3726 { 3727 struct rt6_info *iter; 3728 unsigned int dead = 0; 3729 3730 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) 3731 dead++; 3732 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3733 if (iter->dst.dev == down_dev || 3734 iter->rt6i_nh_flags & RTNH_F_DEAD) 3735 dead++; 3736 3737 return dead; 3738 } 3739 3740 static void rt6_multipath_nh_flags_set(struct rt6_info *rt, 3741 const struct net_device *dev, 3742 unsigned int nh_flags) 3743 { 3744 struct rt6_info *iter; 3745 3746 if (rt->dst.dev == dev) 3747 rt->rt6i_nh_flags |= nh_flags; 3748 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3749 if (iter->dst.dev == dev) 3750 iter->rt6i_nh_flags |= nh_flags; 3751 } 3752 3753 /* called with write lock held for table with rt */ 3754 static int fib6_ifdown(struct rt6_info *rt, void *p_arg) 3755 { 3756 const struct arg_netdev_event *arg = p_arg; 3757 const struct net_device *dev = arg->dev; 3758 const struct net *net = dev_net(dev); 3759 3760 if (rt == net->ipv6.ip6_null_entry) 3761 return 0; 3762 3763 switch (arg->event) { 3764 case NETDEV_UNREGISTER: 3765 return rt->dst.dev == dev ? -1 : 0; 3766 case NETDEV_DOWN: 3767 if (rt->should_flush) 3768 return -1; 3769 if (!rt->rt6i_nsiblings) 3770 return rt->dst.dev == dev ? -1 : 0; 3771 if (rt6_multipath_uses_dev(rt, dev)) { 3772 unsigned int count; 3773 3774 count = rt6_multipath_dead_count(rt, dev); 3775 if (rt->rt6i_nsiblings + 1 == count) { 3776 rt6_multipath_flush(rt); 3777 return -1; 3778 } 3779 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | 3780 RTNH_F_LINKDOWN); 3781 fib6_update_sernum(rt); 3782 rt6_multipath_rebalance(rt); 3783 } 3784 return -2; 3785 case NETDEV_CHANGE: 3786 if (rt->dst.dev != dev || 3787 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) 3788 break; 3789 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; 3790 rt6_multipath_rebalance(rt); 3791 break; 3792 } 3793 3794 return 0; 3795 } 3796 3797 void rt6_sync_down_dev(struct net_device *dev, unsigned long event) 3798 { 3799 struct arg_netdev_event arg = { 3800 .dev = dev, 3801 { 3802 .event = event, 3803 }, 3804 }; 3805 3806 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg); 3807 } 3808 3809 void rt6_disable_ip(struct net_device *dev, unsigned long event) 3810 { 3811 rt6_sync_down_dev(dev, event); 3812 rt6_uncached_list_flush_dev(dev_net(dev), dev); 3813 neigh_ifdown(&nd_tbl, dev); 3814 } 3815 3816 struct rt6_mtu_change_arg { 3817 struct net_device *dev; 3818 unsigned int mtu; 3819 }; 3820 3821 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 3822 { 3823 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 3824 struct inet6_dev *idev; 3825 3826 /* In IPv6 pmtu discovery is not optional, 3827 so that RTAX_MTU lock cannot disable it. 3828 We still use this lock to block changes 3829 caused by addrconf/ndisc. 3830 */ 3831 3832 idev = __in6_dev_get(arg->dev); 3833 if (!idev) 3834 return 0; 3835 3836 /* For administrative MTU increase, there is no way to discover 3837 IPv6 PMTU increase, so PMTU increase should be updated here. 3838 Since RFC 1981 doesn't include administrative MTU increase 3839 update PMTU increase is a MUST. (i.e. jumbo frame) 3840 */ 3841 if (rt->dst.dev == arg->dev && 3842 !dst_metric_locked(&rt->dst, RTAX_MTU)) { 3843 spin_lock_bh(&rt6_exception_lock); 3844 if (dst_metric_raw(&rt->dst, RTAX_MTU) && 3845 rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) 3846 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 3847 rt6_exceptions_update_pmtu(idev, rt, arg->mtu); 3848 spin_unlock_bh(&rt6_exception_lock); 3849 } 3850 return 0; 3851 } 3852 3853 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 3854 { 3855 struct rt6_mtu_change_arg arg = { 3856 .dev = dev, 3857 .mtu = mtu, 3858 }; 3859 3860 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 3861 } 3862 3863 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 3864 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 3865 [RTA_OIF] = { .type = NLA_U32 }, 3866 [RTA_IIF] = { .type = NLA_U32 }, 3867 [RTA_PRIORITY] = { .type = NLA_U32 }, 3868 [RTA_METRICS] = { .type = NLA_NESTED }, 3869 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 3870 [RTA_PREF] = { .type = NLA_U8 }, 3871 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 3872 [RTA_ENCAP] = { .type = NLA_NESTED }, 3873 [RTA_EXPIRES] = { .type = NLA_U32 }, 3874 [RTA_UID] = { .type = NLA_U32 }, 3875 [RTA_MARK] = { .type = NLA_U32 }, 3876 }; 3877 3878 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 3879 struct fib6_config *cfg, 3880 struct netlink_ext_ack *extack) 3881 { 3882 struct rtmsg *rtm; 3883 struct nlattr *tb[RTA_MAX+1]; 3884 unsigned int pref; 3885 int err; 3886 3887 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, 3888 NULL); 3889 if (err < 0) 3890 goto errout; 3891 3892 err = -EINVAL; 3893 rtm = nlmsg_data(nlh); 3894 memset(cfg, 0, sizeof(*cfg)); 3895 3896 cfg->fc_table = rtm->rtm_table; 3897 cfg->fc_dst_len = rtm->rtm_dst_len; 3898 cfg->fc_src_len = rtm->rtm_src_len; 3899 cfg->fc_flags = RTF_UP; 3900 cfg->fc_protocol = rtm->rtm_protocol; 3901 cfg->fc_type = rtm->rtm_type; 3902 3903 if (rtm->rtm_type == RTN_UNREACHABLE || 3904 rtm->rtm_type == RTN_BLACKHOLE || 3905 rtm->rtm_type == RTN_PROHIBIT || 3906 rtm->rtm_type == RTN_THROW) 3907 cfg->fc_flags |= RTF_REJECT; 3908 3909 if (rtm->rtm_type == RTN_LOCAL) 3910 cfg->fc_flags |= RTF_LOCAL; 3911 3912 if (rtm->rtm_flags & RTM_F_CLONED) 3913 cfg->fc_flags |= RTF_CACHE; 3914 3915 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); 3916 3917 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 3918 cfg->fc_nlinfo.nlh = nlh; 3919 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 3920 3921 if (tb[RTA_GATEWAY]) { 3922 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 3923 cfg->fc_flags |= RTF_GATEWAY; 3924 } 3925 3926 if (tb[RTA_DST]) { 3927 int plen = (rtm->rtm_dst_len + 7) >> 3; 3928 3929 if (nla_len(tb[RTA_DST]) < plen) 3930 goto errout; 3931 3932 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 3933 } 3934 3935 if (tb[RTA_SRC]) { 3936 int plen = (rtm->rtm_src_len + 7) >> 3; 3937 3938 if (nla_len(tb[RTA_SRC]) < plen) 3939 goto errout; 3940 3941 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 3942 } 3943 3944 if (tb[RTA_PREFSRC]) 3945 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 3946 3947 if (tb[RTA_OIF]) 3948 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 3949 3950 if (tb[RTA_PRIORITY]) 3951 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 3952 3953 if (tb[RTA_METRICS]) { 3954 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 3955 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 3956 } 3957 3958 if (tb[RTA_TABLE]) 3959 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 3960 3961 if (tb[RTA_MULTIPATH]) { 3962 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 3963 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 3964 3965 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, 3966 cfg->fc_mp_len, extack); 3967 if (err < 0) 3968 goto errout; 3969 } 3970 3971 if (tb[RTA_PREF]) { 3972 pref = nla_get_u8(tb[RTA_PREF]); 3973 if (pref != ICMPV6_ROUTER_PREF_LOW && 3974 pref != ICMPV6_ROUTER_PREF_HIGH) 3975 pref = ICMPV6_ROUTER_PREF_MEDIUM; 3976 cfg->fc_flags |= RTF_PREF(pref); 3977 } 3978 3979 if (tb[RTA_ENCAP]) 3980 cfg->fc_encap = tb[RTA_ENCAP]; 3981 3982 if (tb[RTA_ENCAP_TYPE]) { 3983 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); 3984 3985 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); 3986 if (err < 0) 3987 goto errout; 3988 } 3989 3990 if (tb[RTA_EXPIRES]) { 3991 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); 3992 3993 if (addrconf_finite_timeout(timeout)) { 3994 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ); 3995 cfg->fc_flags |= RTF_EXPIRES; 3996 } 3997 } 3998 3999 err = 0; 4000 errout: 4001 return err; 4002 } 4003 4004 struct rt6_nh { 4005 struct rt6_info *rt6_info; 4006 struct fib6_config r_cfg; 4007 struct mx6_config mxc; 4008 struct list_head next; 4009 }; 4010 4011 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) 4012 { 4013 struct rt6_nh *nh; 4014 4015 list_for_each_entry(nh, rt6_nh_list, next) { 4016 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n", 4017 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, 4018 nh->r_cfg.fc_ifindex); 4019 } 4020 } 4021 4022 static int ip6_route_info_append(struct list_head *rt6_nh_list, 4023 struct rt6_info *rt, struct fib6_config *r_cfg) 4024 { 4025 struct rt6_nh *nh; 4026 int err = -EEXIST; 4027 4028 list_for_each_entry(nh, rt6_nh_list, next) { 4029 /* check if rt6_info already exists */ 4030 if (rt6_duplicate_nexthop(nh->rt6_info, rt)) 4031 return err; 4032 } 4033 4034 nh = kzalloc(sizeof(*nh), GFP_KERNEL); 4035 if (!nh) 4036 return -ENOMEM; 4037 nh->rt6_info = rt; 4038 err = ip6_convert_metrics(&nh->mxc, r_cfg); 4039 if (err) { 4040 kfree(nh); 4041 return err; 4042 } 4043 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); 4044 list_add_tail(&nh->next, rt6_nh_list); 4045 4046 return 0; 4047 } 4048 4049 static void ip6_route_mpath_notify(struct rt6_info *rt, 4050 struct rt6_info *rt_last, 4051 struct nl_info *info, 4052 __u16 nlflags) 4053 { 4054 /* if this is an APPEND route, then rt points to the first route 4055 * inserted and rt_last points to last route inserted. Userspace 4056 * wants a consistent dump of the route which starts at the first 4057 * nexthop. Since sibling routes are always added at the end of 4058 * the list, find the first sibling of the last route appended 4059 */ 4060 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { 4061 rt = list_first_entry(&rt_last->rt6i_siblings, 4062 struct rt6_info, 4063 rt6i_siblings); 4064 } 4065 4066 if (rt) 4067 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 4068 } 4069 4070 static int ip6_route_multipath_add(struct fib6_config *cfg, 4071 struct netlink_ext_ack *extack) 4072 { 4073 struct rt6_info *rt_notif = NULL, *rt_last = NULL; 4074 struct nl_info *info = &cfg->fc_nlinfo; 4075 struct fib6_config r_cfg; 4076 struct rtnexthop *rtnh; 4077 struct rt6_info *rt; 4078 struct rt6_nh *err_nh; 4079 struct rt6_nh *nh, *nh_safe; 4080 __u16 nlflags; 4081 int remaining; 4082 int attrlen; 4083 int err = 1; 4084 int nhn = 0; 4085 int replace = (cfg->fc_nlinfo.nlh && 4086 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 4087 LIST_HEAD(rt6_nh_list); 4088 4089 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; 4090 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) 4091 nlflags |= NLM_F_APPEND; 4092 4093 remaining = cfg->fc_mp_len; 4094 rtnh = (struct rtnexthop *)cfg->fc_mp; 4095 4096 /* Parse a Multipath Entry and build a list (rt6_nh_list) of 4097 * rt6_info structs per nexthop 4098 */ 4099 while (rtnh_ok(rtnh, remaining)) { 4100 memcpy(&r_cfg, cfg, sizeof(*cfg)); 4101 if (rtnh->rtnh_ifindex) 4102 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 4103 4104 attrlen = rtnh_attrlen(rtnh); 4105 if (attrlen > 0) { 4106 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4107 4108 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 4109 if (nla) { 4110 r_cfg.fc_gateway = nla_get_in6_addr(nla); 4111 r_cfg.fc_flags |= RTF_GATEWAY; 4112 } 4113 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 4114 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 4115 if (nla) 4116 r_cfg.fc_encap_type = nla_get_u16(nla); 4117 } 4118 4119 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); 4120 rt = ip6_route_info_create(&r_cfg, extack); 4121 if (IS_ERR(rt)) { 4122 err = PTR_ERR(rt); 4123 rt = NULL; 4124 goto cleanup; 4125 } 4126 4127 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; 4128 4129 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 4130 if (err) { 4131 dst_release_immediate(&rt->dst); 4132 goto cleanup; 4133 } 4134 4135 rtnh = rtnh_next(rtnh, &remaining); 4136 } 4137 4138 /* for add and replace send one notification with all nexthops. 4139 * Skip the notification in fib6_add_rt2node and send one with 4140 * the full route when done 4141 */ 4142 info->skip_notify = 1; 4143 4144 err_nh = NULL; 4145 list_for_each_entry(nh, &rt6_nh_list, next) { 4146 rt_last = nh->rt6_info; 4147 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); 4148 /* save reference to first route for notification */ 4149 if (!rt_notif && !err) 4150 rt_notif = nh->rt6_info; 4151 4152 /* nh->rt6_info is used or freed at this point, reset to NULL*/ 4153 nh->rt6_info = NULL; 4154 if (err) { 4155 if (replace && nhn) 4156 ip6_print_replace_route_err(&rt6_nh_list); 4157 err_nh = nh; 4158 goto add_errout; 4159 } 4160 4161 /* Because each route is added like a single route we remove 4162 * these flags after the first nexthop: if there is a collision, 4163 * we have already failed to add the first nexthop: 4164 * fib6_add_rt2node() has rejected it; when replacing, old 4165 * nexthops have been replaced by first new, the rest should 4166 * be added to it. 4167 */ 4168 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 4169 NLM_F_REPLACE); 4170 nhn++; 4171 } 4172 4173 /* success ... tell user about new route */ 4174 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 4175 goto cleanup; 4176 4177 add_errout: 4178 /* send notification for routes that were added so that 4179 * the delete notifications sent by ip6_route_del are 4180 * coherent 4181 */ 4182 if (rt_notif) 4183 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 4184 4185 /* Delete routes that were already added */ 4186 list_for_each_entry(nh, &rt6_nh_list, next) { 4187 if (err_nh == nh) 4188 break; 4189 ip6_route_del(&nh->r_cfg, extack); 4190 } 4191 4192 cleanup: 4193 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { 4194 if (nh->rt6_info) 4195 dst_release_immediate(&nh->rt6_info->dst); 4196 kfree(nh->mxc.mx); 4197 list_del(&nh->next); 4198 kfree(nh); 4199 } 4200 4201 return err; 4202 } 4203 4204 static int ip6_route_multipath_del(struct fib6_config *cfg, 4205 struct netlink_ext_ack *extack) 4206 { 4207 struct fib6_config r_cfg; 4208 struct rtnexthop *rtnh; 4209 int remaining; 4210 int attrlen; 4211 int err = 1, last_err = 0; 4212 4213 remaining = cfg->fc_mp_len; 4214 rtnh = (struct rtnexthop *)cfg->fc_mp; 4215 4216 /* Parse a Multipath Entry */ 4217 while (rtnh_ok(rtnh, remaining)) { 4218 memcpy(&r_cfg, cfg, sizeof(*cfg)); 4219 if (rtnh->rtnh_ifindex) 4220 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 4221 4222 attrlen = rtnh_attrlen(rtnh); 4223 if (attrlen > 0) { 4224 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4225 4226 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 4227 if (nla) { 4228 nla_memcpy(&r_cfg.fc_gateway, nla, 16); 4229 r_cfg.fc_flags |= RTF_GATEWAY; 4230 } 4231 } 4232 err = ip6_route_del(&r_cfg, extack); 4233 if (err) 4234 last_err = err; 4235 4236 rtnh = rtnh_next(rtnh, &remaining); 4237 } 4238 4239 return last_err; 4240 } 4241 4242 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 4243 struct netlink_ext_ack *extack) 4244 { 4245 struct fib6_config cfg; 4246 int err; 4247 4248 err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 4249 if (err < 0) 4250 return err; 4251 4252 if (cfg.fc_mp) 4253 return ip6_route_multipath_del(&cfg, extack); 4254 else { 4255 cfg.fc_delete_all_nh = 1; 4256 return ip6_route_del(&cfg, extack); 4257 } 4258 } 4259 4260 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 4261 struct netlink_ext_ack *extack) 4262 { 4263 struct fib6_config cfg; 4264 int err; 4265 4266 err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 4267 if (err < 0) 4268 return err; 4269 4270 if (cfg.fc_mp) 4271 return ip6_route_multipath_add(&cfg, extack); 4272 else 4273 return ip6_route_add(&cfg, extack); 4274 } 4275 4276 static size_t rt6_nlmsg_size(struct rt6_info *rt) 4277 { 4278 int nexthop_len = 0; 4279 4280 if (rt->rt6i_nsiblings) { 4281 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ 4282 + NLA_ALIGN(sizeof(struct rtnexthop)) 4283 + nla_total_size(16) /* RTA_GATEWAY */ 4284 + lwtunnel_get_encap_size(rt->dst.lwtstate); 4285 4286 nexthop_len *= rt->rt6i_nsiblings; 4287 } 4288 4289 return NLMSG_ALIGN(sizeof(struct rtmsg)) 4290 + nla_total_size(16) /* RTA_SRC */ 4291 + nla_total_size(16) /* RTA_DST */ 4292 + nla_total_size(16) /* RTA_GATEWAY */ 4293 + nla_total_size(16) /* RTA_PREFSRC */ 4294 + nla_total_size(4) /* RTA_TABLE */ 4295 + nla_total_size(4) /* RTA_IIF */ 4296 + nla_total_size(4) /* RTA_OIF */ 4297 + nla_total_size(4) /* RTA_PRIORITY */ 4298 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 4299 + nla_total_size(sizeof(struct rta_cacheinfo)) 4300 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 4301 + nla_total_size(1) /* RTA_PREF */ 4302 + lwtunnel_get_encap_size(rt->dst.lwtstate) 4303 + nexthop_len; 4304 } 4305 4306 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, 4307 unsigned int *flags, bool skip_oif) 4308 { 4309 if (rt->rt6i_nh_flags & RTNH_F_DEAD) 4310 *flags |= RTNH_F_DEAD; 4311 4312 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { 4313 *flags |= RTNH_F_LINKDOWN; 4314 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) 4315 *flags |= RTNH_F_DEAD; 4316 } 4317 4318 if (rt->rt6i_flags & RTF_GATEWAY) { 4319 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) 4320 goto nla_put_failure; 4321 } 4322 4323 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); 4324 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) 4325 *flags |= RTNH_F_OFFLOAD; 4326 4327 /* not needed for multipath encoding b/c it has a rtnexthop struct */ 4328 if (!skip_oif && rt->dst.dev && 4329 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 4330 goto nla_put_failure; 4331 4332 if (rt->dst.lwtstate && 4333 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) 4334 goto nla_put_failure; 4335 4336 return 0; 4337 4338 nla_put_failure: 4339 return -EMSGSIZE; 4340 } 4341 4342 /* add multipath next hop */ 4343 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) 4344 { 4345 struct rtnexthop *rtnh; 4346 unsigned int flags = 0; 4347 4348 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 4349 if (!rtnh) 4350 goto nla_put_failure; 4351 4352 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; 4353 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; 4354 4355 if (rt6_nexthop_info(skb, rt, &flags, true) < 0) 4356 goto nla_put_failure; 4357 4358 rtnh->rtnh_flags = flags; 4359 4360 /* length of rtnetlink header + attributes */ 4361 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 4362 4363 return 0; 4364 4365 nla_put_failure: 4366 return -EMSGSIZE; 4367 } 4368 4369 static int rt6_fill_node(struct net *net, 4370 struct sk_buff *skb, struct rt6_info *rt, 4371 struct in6_addr *dst, struct in6_addr *src, 4372 int iif, int type, u32 portid, u32 seq, 4373 unsigned int flags) 4374 { 4375 u32 metrics[RTAX_MAX]; 4376 struct rtmsg *rtm; 4377 struct nlmsghdr *nlh; 4378 long expires; 4379 u32 table; 4380 4381 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 4382 if (!nlh) 4383 return -EMSGSIZE; 4384 4385 rtm = nlmsg_data(nlh); 4386 rtm->rtm_family = AF_INET6; 4387 rtm->rtm_dst_len = rt->rt6i_dst.plen; 4388 rtm->rtm_src_len = rt->rt6i_src.plen; 4389 rtm->rtm_tos = 0; 4390 if (rt->rt6i_table) 4391 table = rt->rt6i_table->tb6_id; 4392 else 4393 table = RT6_TABLE_UNSPEC; 4394 rtm->rtm_table = table; 4395 if (nla_put_u32(skb, RTA_TABLE, table)) 4396 goto nla_put_failure; 4397 if (rt->rt6i_flags & RTF_REJECT) { 4398 switch (rt->dst.error) { 4399 case -EINVAL: 4400 rtm->rtm_type = RTN_BLACKHOLE; 4401 break; 4402 case -EACCES: 4403 rtm->rtm_type = RTN_PROHIBIT; 4404 break; 4405 case -EAGAIN: 4406 rtm->rtm_type = RTN_THROW; 4407 break; 4408 default: 4409 rtm->rtm_type = RTN_UNREACHABLE; 4410 break; 4411 } 4412 } 4413 else if (rt->rt6i_flags & RTF_LOCAL) 4414 rtm->rtm_type = RTN_LOCAL; 4415 else if (rt->rt6i_flags & RTF_ANYCAST) 4416 rtm->rtm_type = RTN_ANYCAST; 4417 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 4418 rtm->rtm_type = RTN_LOCAL; 4419 else 4420 rtm->rtm_type = RTN_UNICAST; 4421 rtm->rtm_flags = 0; 4422 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 4423 rtm->rtm_protocol = rt->rt6i_protocol; 4424 4425 if (rt->rt6i_flags & RTF_CACHE) 4426 rtm->rtm_flags |= RTM_F_CLONED; 4427 4428 if (dst) { 4429 if (nla_put_in6_addr(skb, RTA_DST, dst)) 4430 goto nla_put_failure; 4431 rtm->rtm_dst_len = 128; 4432 } else if (rtm->rtm_dst_len) 4433 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) 4434 goto nla_put_failure; 4435 #ifdef CONFIG_IPV6_SUBTREES 4436 if (src) { 4437 if (nla_put_in6_addr(skb, RTA_SRC, src)) 4438 goto nla_put_failure; 4439 rtm->rtm_src_len = 128; 4440 } else if (rtm->rtm_src_len && 4441 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) 4442 goto nla_put_failure; 4443 #endif 4444 if (iif) { 4445 #ifdef CONFIG_IPV6_MROUTE 4446 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 4447 int err = ip6mr_get_route(net, skb, rtm, portid); 4448 4449 if (err == 0) 4450 return 0; 4451 if (err < 0) 4452 goto nla_put_failure; 4453 } else 4454 #endif 4455 if (nla_put_u32(skb, RTA_IIF, iif)) 4456 goto nla_put_failure; 4457 } else if (dst) { 4458 struct in6_addr saddr_buf; 4459 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 4460 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 4461 goto nla_put_failure; 4462 } 4463 4464 if (rt->rt6i_prefsrc.plen) { 4465 struct in6_addr saddr_buf; 4466 saddr_buf = rt->rt6i_prefsrc.addr; 4467 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 4468 goto nla_put_failure; 4469 } 4470 4471 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 4472 if (rt->rt6i_pmtu) 4473 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; 4474 if (rtnetlink_put_metrics(skb, metrics) < 0) 4475 goto nla_put_failure; 4476 4477 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 4478 goto nla_put_failure; 4479 4480 /* For multipath routes, walk the siblings list and add 4481 * each as a nexthop within RTA_MULTIPATH. 4482 */ 4483 if (rt->rt6i_nsiblings) { 4484 struct rt6_info *sibling, *next_sibling; 4485 struct nlattr *mp; 4486 4487 mp = nla_nest_start(skb, RTA_MULTIPATH); 4488 if (!mp) 4489 goto nla_put_failure; 4490 4491 if (rt6_add_nexthop(skb, rt) < 0) 4492 goto nla_put_failure; 4493 4494 list_for_each_entry_safe(sibling, next_sibling, 4495 &rt->rt6i_siblings, rt6i_siblings) { 4496 if (rt6_add_nexthop(skb, sibling) < 0) 4497 goto nla_put_failure; 4498 } 4499 4500 nla_nest_end(skb, mp); 4501 } else { 4502 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) 4503 goto nla_put_failure; 4504 } 4505 4506 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 4507 4508 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 4509 goto nla_put_failure; 4510 4511 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) 4512 goto nla_put_failure; 4513 4514 4515 nlmsg_end(skb, nlh); 4516 return 0; 4517 4518 nla_put_failure: 4519 nlmsg_cancel(skb, nlh); 4520 return -EMSGSIZE; 4521 } 4522 4523 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 4524 { 4525 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 4526 struct net *net = arg->net; 4527 4528 if (rt == net->ipv6.ip6_null_entry) 4529 return 0; 4530 4531 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 4532 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 4533 4534 /* user wants prefix routes only */ 4535 if (rtm->rtm_flags & RTM_F_PREFIX && 4536 !(rt->rt6i_flags & RTF_PREFIX_RT)) { 4537 /* success since this is not a prefix route */ 4538 return 1; 4539 } 4540 } 4541 4542 return rt6_fill_node(net, 4543 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 4544 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 4545 NLM_F_MULTI); 4546 } 4547 4548 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 4549 struct netlink_ext_ack *extack) 4550 { 4551 struct net *net = sock_net(in_skb->sk); 4552 struct nlattr *tb[RTA_MAX+1]; 4553 int err, iif = 0, oif = 0; 4554 struct dst_entry *dst; 4555 struct rt6_info *rt; 4556 struct sk_buff *skb; 4557 struct rtmsg *rtm; 4558 struct flowi6 fl6; 4559 bool fibmatch; 4560 4561 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, 4562 extack); 4563 if (err < 0) 4564 goto errout; 4565 4566 err = -EINVAL; 4567 memset(&fl6, 0, sizeof(fl6)); 4568 rtm = nlmsg_data(nlh); 4569 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); 4570 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); 4571 4572 if (tb[RTA_SRC]) { 4573 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 4574 goto errout; 4575 4576 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 4577 } 4578 4579 if (tb[RTA_DST]) { 4580 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 4581 goto errout; 4582 4583 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 4584 } 4585 4586 if (tb[RTA_IIF]) 4587 iif = nla_get_u32(tb[RTA_IIF]); 4588 4589 if (tb[RTA_OIF]) 4590 oif = nla_get_u32(tb[RTA_OIF]); 4591 4592 if (tb[RTA_MARK]) 4593 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 4594 4595 if (tb[RTA_UID]) 4596 fl6.flowi6_uid = make_kuid(current_user_ns(), 4597 nla_get_u32(tb[RTA_UID])); 4598 else 4599 fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); 4600 4601 if (iif) { 4602 struct net_device *dev; 4603 int flags = 0; 4604 4605 rcu_read_lock(); 4606 4607 dev = dev_get_by_index_rcu(net, iif); 4608 if (!dev) { 4609 rcu_read_unlock(); 4610 err = -ENODEV; 4611 goto errout; 4612 } 4613 4614 fl6.flowi6_iif = iif; 4615 4616 if (!ipv6_addr_any(&fl6.saddr)) 4617 flags |= RT6_LOOKUP_F_HAS_SADDR; 4618 4619 dst = ip6_route_input_lookup(net, dev, &fl6, flags); 4620 4621 rcu_read_unlock(); 4622 } else { 4623 fl6.flowi6_oif = oif; 4624 4625 dst = ip6_route_output(net, NULL, &fl6); 4626 } 4627 4628 4629 rt = container_of(dst, struct rt6_info, dst); 4630 if (rt->dst.error) { 4631 err = rt->dst.error; 4632 ip6_rt_put(rt); 4633 goto errout; 4634 } 4635 4636 if (rt == net->ipv6.ip6_null_entry) { 4637 err = rt->dst.error; 4638 ip6_rt_put(rt); 4639 goto errout; 4640 } 4641 4642 if (fibmatch && rt->from) { 4643 struct rt6_info *ort = rt->from; 4644 4645 dst_hold(&ort->dst); 4646 ip6_rt_put(rt); 4647 rt = ort; 4648 } 4649 4650 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4651 if (!skb) { 4652 ip6_rt_put(rt); 4653 err = -ENOBUFS; 4654 goto errout; 4655 } 4656 4657 skb_dst_set(skb, &rt->dst); 4658 if (fibmatch) 4659 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, 4660 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 4661 nlh->nlmsg_seq, 0); 4662 else 4663 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 4664 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 4665 nlh->nlmsg_seq, 0); 4666 if (err < 0) { 4667 kfree_skb(skb); 4668 goto errout; 4669 } 4670 4671 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 4672 errout: 4673 return err; 4674 } 4675 4676 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, 4677 unsigned int nlm_flags) 4678 { 4679 struct sk_buff *skb; 4680 struct net *net = info->nl_net; 4681 u32 seq; 4682 int err; 4683 4684 err = -ENOBUFS; 4685 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 4686 4687 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 4688 if (!skb) 4689 goto errout; 4690 4691 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 4692 event, info->portid, seq, nlm_flags); 4693 if (err < 0) { 4694 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 4695 WARN_ON(err == -EMSGSIZE); 4696 kfree_skb(skb); 4697 goto errout; 4698 } 4699 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 4700 info->nlh, gfp_any()); 4701 return; 4702 errout: 4703 if (err < 0) 4704 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 4705 } 4706 4707 static int ip6_route_dev_notify(struct notifier_block *this, 4708 unsigned long event, void *ptr) 4709 { 4710 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 4711 struct net *net = dev_net(dev); 4712 4713 if (!(dev->flags & IFF_LOOPBACK)) 4714 return NOTIFY_OK; 4715 4716 if (event == NETDEV_REGISTER) { 4717 net->ipv6.ip6_null_entry->dst.dev = dev; 4718 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 4719 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4720 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 4721 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 4722 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 4723 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 4724 #endif 4725 } else if (event == NETDEV_UNREGISTER && 4726 dev->reg_state != NETREG_UNREGISTERED) { 4727 /* NETDEV_UNREGISTER could be fired for multiple times by 4728 * netdev_wait_allrefs(). Make sure we only call this once. 4729 */ 4730 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); 4731 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4732 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); 4733 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev); 4734 #endif 4735 } 4736 4737 return NOTIFY_OK; 4738 } 4739 4740 /* 4741 * /proc 4742 */ 4743 4744 #ifdef CONFIG_PROC_FS 4745 4746 static const struct file_operations ipv6_route_proc_fops = { 4747 .open = ipv6_route_open, 4748 .read = seq_read, 4749 .llseek = seq_lseek, 4750 .release = seq_release_net, 4751 }; 4752 4753 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 4754 { 4755 struct net *net = (struct net *)seq->private; 4756 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 4757 net->ipv6.rt6_stats->fib_nodes, 4758 net->ipv6.rt6_stats->fib_route_nodes, 4759 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc), 4760 net->ipv6.rt6_stats->fib_rt_entries, 4761 net->ipv6.rt6_stats->fib_rt_cache, 4762 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 4763 net->ipv6.rt6_stats->fib_discarded_routes); 4764 4765 return 0; 4766 } 4767 4768 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 4769 { 4770 return single_open_net(inode, file, rt6_stats_seq_show); 4771 } 4772 4773 static const struct file_operations rt6_stats_seq_fops = { 4774 .open = rt6_stats_seq_open, 4775 .read = seq_read, 4776 .llseek = seq_lseek, 4777 .release = single_release_net, 4778 }; 4779 #endif /* CONFIG_PROC_FS */ 4780 4781 #ifdef CONFIG_SYSCTL 4782 4783 static 4784 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 4785 void __user *buffer, size_t *lenp, loff_t *ppos) 4786 { 4787 struct net *net; 4788 int delay; 4789 if (!write) 4790 return -EINVAL; 4791 4792 net = (struct net *)ctl->extra1; 4793 delay = net->ipv6.sysctl.flush_delay; 4794 proc_dointvec(ctl, write, buffer, lenp, ppos); 4795 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 4796 return 0; 4797 } 4798 4799 struct ctl_table ipv6_route_table_template[] = { 4800 { 4801 .procname = "flush", 4802 .data = &init_net.ipv6.sysctl.flush_delay, 4803 .maxlen = sizeof(int), 4804 .mode = 0200, 4805 .proc_handler = ipv6_sysctl_rtcache_flush 4806 }, 4807 { 4808 .procname = "gc_thresh", 4809 .data = &ip6_dst_ops_template.gc_thresh, 4810 .maxlen = sizeof(int), 4811 .mode = 0644, 4812 .proc_handler = proc_dointvec, 4813 }, 4814 { 4815 .procname = "max_size", 4816 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 4817 .maxlen = sizeof(int), 4818 .mode = 0644, 4819 .proc_handler = proc_dointvec, 4820 }, 4821 { 4822 .procname = "gc_min_interval", 4823 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 4824 .maxlen = sizeof(int), 4825 .mode = 0644, 4826 .proc_handler = proc_dointvec_jiffies, 4827 }, 4828 { 4829 .procname = "gc_timeout", 4830 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 4831 .maxlen = sizeof(int), 4832 .mode = 0644, 4833 .proc_handler = proc_dointvec_jiffies, 4834 }, 4835 { 4836 .procname = "gc_interval", 4837 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 4838 .maxlen = sizeof(int), 4839 .mode = 0644, 4840 .proc_handler = proc_dointvec_jiffies, 4841 }, 4842 { 4843 .procname = "gc_elasticity", 4844 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 4845 .maxlen = sizeof(int), 4846 .mode = 0644, 4847 .proc_handler = proc_dointvec, 4848 }, 4849 { 4850 .procname = "mtu_expires", 4851 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 4852 .maxlen = sizeof(int), 4853 .mode = 0644, 4854 .proc_handler = proc_dointvec_jiffies, 4855 }, 4856 { 4857 .procname = "min_adv_mss", 4858 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 4859 .maxlen = sizeof(int), 4860 .mode = 0644, 4861 .proc_handler = proc_dointvec, 4862 }, 4863 { 4864 .procname = "gc_min_interval_ms", 4865 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 4866 .maxlen = sizeof(int), 4867 .mode = 0644, 4868 .proc_handler = proc_dointvec_ms_jiffies, 4869 }, 4870 { } 4871 }; 4872 4873 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 4874 { 4875 struct ctl_table *table; 4876 4877 table = kmemdup(ipv6_route_table_template, 4878 sizeof(ipv6_route_table_template), 4879 GFP_KERNEL); 4880 4881 if (table) { 4882 table[0].data = &net->ipv6.sysctl.flush_delay; 4883 table[0].extra1 = net; 4884 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 4885 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 4886 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 4887 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 4888 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 4889 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 4890 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 4891 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 4892 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 4893 4894 /* Don't export sysctls to unprivileged users */ 4895 if (net->user_ns != &init_user_ns) 4896 table[0].procname = NULL; 4897 } 4898 4899 return table; 4900 } 4901 #endif 4902 4903 static int __net_init ip6_route_net_init(struct net *net) 4904 { 4905 int ret = -ENOMEM; 4906 4907 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 4908 sizeof(net->ipv6.ip6_dst_ops)); 4909 4910 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 4911 goto out_ip6_dst_ops; 4912 4913 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 4914 sizeof(*net->ipv6.ip6_null_entry), 4915 GFP_KERNEL); 4916 if (!net->ipv6.ip6_null_entry) 4917 goto out_ip6_dst_entries; 4918 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4919 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 4920 ip6_template_metrics, true); 4921 4922 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4923 net->ipv6.fib6_has_custom_rules = false; 4924 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 4925 sizeof(*net->ipv6.ip6_prohibit_entry), 4926 GFP_KERNEL); 4927 if (!net->ipv6.ip6_prohibit_entry) 4928 goto out_ip6_null_entry; 4929 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4930 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 4931 ip6_template_metrics, true); 4932 4933 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 4934 sizeof(*net->ipv6.ip6_blk_hole_entry), 4935 GFP_KERNEL); 4936 if (!net->ipv6.ip6_blk_hole_entry) 4937 goto out_ip6_prohibit_entry; 4938 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4939 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 4940 ip6_template_metrics, true); 4941 #endif 4942 4943 net->ipv6.sysctl.flush_delay = 0; 4944 net->ipv6.sysctl.ip6_rt_max_size = 4096; 4945 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 4946 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 4947 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 4948 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 4949 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 4950 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 4951 4952 net->ipv6.ip6_rt_gc_expire = 30*HZ; 4953 4954 ret = 0; 4955 out: 4956 return ret; 4957 4958 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4959 out_ip6_prohibit_entry: 4960 kfree(net->ipv6.ip6_prohibit_entry); 4961 out_ip6_null_entry: 4962 kfree(net->ipv6.ip6_null_entry); 4963 #endif 4964 out_ip6_dst_entries: 4965 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 4966 out_ip6_dst_ops: 4967 goto out; 4968 } 4969 4970 static void __net_exit ip6_route_net_exit(struct net *net) 4971 { 4972 kfree(net->ipv6.ip6_null_entry); 4973 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4974 kfree(net->ipv6.ip6_prohibit_entry); 4975 kfree(net->ipv6.ip6_blk_hole_entry); 4976 #endif 4977 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 4978 } 4979 4980 static int __net_init ip6_route_net_init_late(struct net *net) 4981 { 4982 #ifdef CONFIG_PROC_FS 4983 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); 4984 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); 4985 #endif 4986 return 0; 4987 } 4988 4989 static void __net_exit ip6_route_net_exit_late(struct net *net) 4990 { 4991 #ifdef CONFIG_PROC_FS 4992 remove_proc_entry("ipv6_route", net->proc_net); 4993 remove_proc_entry("rt6_stats", net->proc_net); 4994 #endif 4995 } 4996 4997 static struct pernet_operations ip6_route_net_ops = { 4998 .init = ip6_route_net_init, 4999 .exit = ip6_route_net_exit, 5000 }; 5001 5002 static int __net_init ipv6_inetpeer_init(struct net *net) 5003 { 5004 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 5005 5006 if (!bp) 5007 return -ENOMEM; 5008 inet_peer_base_init(bp); 5009 net->ipv6.peers = bp; 5010 return 0; 5011 } 5012 5013 static void __net_exit ipv6_inetpeer_exit(struct net *net) 5014 { 5015 struct inet_peer_base *bp = net->ipv6.peers; 5016 5017 net->ipv6.peers = NULL; 5018 inetpeer_invalidate_tree(bp); 5019 kfree(bp); 5020 } 5021 5022 static struct pernet_operations ipv6_inetpeer_ops = { 5023 .init = ipv6_inetpeer_init, 5024 .exit = ipv6_inetpeer_exit, 5025 }; 5026 5027 static struct pernet_operations ip6_route_net_late_ops = { 5028 .init = ip6_route_net_init_late, 5029 .exit = ip6_route_net_exit_late, 5030 }; 5031 5032 static struct notifier_block ip6_route_dev_notifier = { 5033 .notifier_call = ip6_route_dev_notify, 5034 .priority = ADDRCONF_NOTIFY_PRIORITY - 10, 5035 }; 5036 5037 void __init ip6_route_init_special_entries(void) 5038 { 5039 /* Registering of the loopback is done before this portion of code, 5040 * the loopback reference in rt6_info will not be taken, do it 5041 * manually for init_net */ 5042 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 5043 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 5044 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 5045 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 5046 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 5047 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 5048 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 5049 #endif 5050 } 5051 5052 int __init ip6_route_init(void) 5053 { 5054 int ret; 5055 int cpu; 5056 5057 ret = -ENOMEM; 5058 ip6_dst_ops_template.kmem_cachep = 5059 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 5060 SLAB_HWCACHE_ALIGN, NULL); 5061 if (!ip6_dst_ops_template.kmem_cachep) 5062 goto out; 5063 5064 ret = dst_entries_init(&ip6_dst_blackhole_ops); 5065 if (ret) 5066 goto out_kmem_cache; 5067 5068 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 5069 if (ret) 5070 goto out_dst_entries; 5071 5072 ret = register_pernet_subsys(&ip6_route_net_ops); 5073 if (ret) 5074 goto out_register_inetpeer; 5075 5076 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 5077 5078 ret = fib6_init(); 5079 if (ret) 5080 goto out_register_subsys; 5081 5082 ret = xfrm6_init(); 5083 if (ret) 5084 goto out_fib6_init; 5085 5086 ret = fib6_rules_init(); 5087 if (ret) 5088 goto xfrm6_init; 5089 5090 ret = register_pernet_subsys(&ip6_route_net_late_ops); 5091 if (ret) 5092 goto fib6_rules_init; 5093 5094 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE, 5095 inet6_rtm_newroute, NULL, 0); 5096 if (ret < 0) 5097 goto out_register_late_subsys; 5098 5099 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE, 5100 inet6_rtm_delroute, NULL, 0); 5101 if (ret < 0) 5102 goto out_register_late_subsys; 5103 5104 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, 5105 inet6_rtm_getroute, NULL, 5106 RTNL_FLAG_DOIT_UNLOCKED); 5107 if (ret < 0) 5108 goto out_register_late_subsys; 5109 5110 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 5111 if (ret) 5112 goto out_register_late_subsys; 5113 5114 for_each_possible_cpu(cpu) { 5115 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 5116 5117 INIT_LIST_HEAD(&ul->head); 5118 spin_lock_init(&ul->lock); 5119 } 5120 5121 out: 5122 return ret; 5123 5124 out_register_late_subsys: 5125 rtnl_unregister_all(PF_INET6); 5126 unregister_pernet_subsys(&ip6_route_net_late_ops); 5127 fib6_rules_init: 5128 fib6_rules_cleanup(); 5129 xfrm6_init: 5130 xfrm6_fini(); 5131 out_fib6_init: 5132 fib6_gc_cleanup(); 5133 out_register_subsys: 5134 unregister_pernet_subsys(&ip6_route_net_ops); 5135 out_register_inetpeer: 5136 unregister_pernet_subsys(&ipv6_inetpeer_ops); 5137 out_dst_entries: 5138 dst_entries_destroy(&ip6_dst_blackhole_ops); 5139 out_kmem_cache: 5140 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 5141 goto out; 5142 } 5143 5144 void ip6_route_cleanup(void) 5145 { 5146 unregister_netdevice_notifier(&ip6_route_dev_notifier); 5147 unregister_pernet_subsys(&ip6_route_net_late_ops); 5148 fib6_rules_cleanup(); 5149 xfrm6_fini(); 5150 fib6_gc_cleanup(); 5151 unregister_pernet_subsys(&ipv6_inetpeer_ops); 5152 unregister_pernet_subsys(&ip6_route_net_ops); 5153 dst_entries_destroy(&ip6_dst_blackhole_ops); 5154 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 5155 } 5156