1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <net/net_namespace.h> 48 #include <net/snmp.h> 49 #include <net/ipv6.h> 50 #include <net/ip6_fib.h> 51 #include <net/ip6_route.h> 52 #include <net/ndisc.h> 53 #include <net/addrconf.h> 54 #include <net/tcp.h> 55 #include <linux/rtnetlink.h> 56 #include <net/dst.h> 57 #include <net/dst_metadata.h> 58 #include <net/xfrm.h> 59 #include <net/netevent.h> 60 #include <net/netlink.h> 61 #include <net/nexthop.h> 62 #include <net/lwtunnel.h> 63 #include <net/ip_tunnels.h> 64 #include <net/l3mdev.h> 65 #include <trace/events/fib6.h> 66 67 #include <linux/uaccess.h> 68 69 #ifdef CONFIG_SYSCTL 70 #include <linux/sysctl.h> 71 #endif 72 73 enum rt6_nud_state { 74 RT6_NUD_FAIL_HARD = -3, 75 RT6_NUD_FAIL_PROBE = -2, 76 RT6_NUD_FAIL_DO_RR = -1, 77 RT6_NUD_SUCCEED = 1 78 }; 79 80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); 81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 82 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 83 static unsigned int ip6_mtu(const struct dst_entry *dst); 84 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 85 static void ip6_dst_destroy(struct dst_entry *); 86 static void ip6_dst_ifdown(struct dst_entry *, 87 struct net_device *dev, int how); 88 static int ip6_dst_gc(struct dst_ops *ops); 89 90 static int ip6_pkt_discard(struct sk_buff *skb); 91 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); 92 static int ip6_pkt_prohibit(struct sk_buff *skb); 93 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); 94 static void ip6_link_failure(struct sk_buff *skb); 95 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 96 struct sk_buff *skb, u32 mtu); 97 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 98 struct sk_buff *skb); 99 static void rt6_dst_from_metrics_check(struct rt6_info *rt); 100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 101 static size_t rt6_nlmsg_size(struct rt6_info *rt); 102 static int rt6_fill_node(struct net *net, 103 struct sk_buff *skb, struct rt6_info *rt, 104 struct in6_addr *dst, struct in6_addr *src, 105 int iif, int type, u32 portid, u32 seq, 106 unsigned int flags); 107 108 #ifdef CONFIG_IPV6_ROUTE_INFO 109 static struct rt6_info *rt6_add_route_info(struct net *net, 110 const struct in6_addr *prefix, int prefixlen, 111 const struct in6_addr *gwaddr, 112 struct net_device *dev, 113 unsigned int pref); 114 static struct rt6_info *rt6_get_route_info(struct net *net, 115 const struct in6_addr *prefix, int prefixlen, 116 const struct in6_addr *gwaddr, 117 struct net_device *dev); 118 #endif 119 120 struct uncached_list { 121 spinlock_t lock; 122 struct list_head head; 123 }; 124 125 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); 126 127 static void rt6_uncached_list_add(struct rt6_info *rt) 128 { 129 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); 130 131 rt->rt6i_uncached_list = ul; 132 133 spin_lock_bh(&ul->lock); 134 list_add_tail(&rt->rt6i_uncached, &ul->head); 135 spin_unlock_bh(&ul->lock); 136 } 137 138 static void rt6_uncached_list_del(struct rt6_info *rt) 139 { 140 if (!list_empty(&rt->rt6i_uncached)) { 141 struct uncached_list *ul = rt->rt6i_uncached_list; 142 143 spin_lock_bh(&ul->lock); 144 list_del(&rt->rt6i_uncached); 145 spin_unlock_bh(&ul->lock); 146 } 147 } 148 149 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) 150 { 151 struct net_device *loopback_dev = net->loopback_dev; 152 int cpu; 153 154 if (dev == loopback_dev) 155 return; 156 157 for_each_possible_cpu(cpu) { 158 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 159 struct rt6_info *rt; 160 161 spin_lock_bh(&ul->lock); 162 list_for_each_entry(rt, &ul->head, rt6i_uncached) { 163 struct inet6_dev *rt_idev = rt->rt6i_idev; 164 struct net_device *rt_dev = rt->dst.dev; 165 166 if (rt_idev->dev == dev) { 167 rt->rt6i_idev = in6_dev_get(loopback_dev); 168 in6_dev_put(rt_idev); 169 } 170 171 if (rt_dev == dev) { 172 rt->dst.dev = loopback_dev; 173 dev_hold(rt->dst.dev); 174 dev_put(rt_dev); 175 } 176 } 177 spin_unlock_bh(&ul->lock); 178 } 179 } 180 181 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) 182 { 183 return dst_metrics_write_ptr(rt->dst.from); 184 } 185 186 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 187 { 188 struct rt6_info *rt = (struct rt6_info *)dst; 189 190 if (rt->rt6i_flags & RTF_PCPU) 191 return rt6_pcpu_cow_metrics(rt); 192 else if (rt->rt6i_flags & RTF_CACHE) 193 return NULL; 194 else 195 return dst_cow_metrics_generic(dst, old); 196 } 197 198 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 199 struct sk_buff *skb, 200 const void *daddr) 201 { 202 struct in6_addr *p = &rt->rt6i_gateway; 203 204 if (!ipv6_addr_any(p)) 205 return (const void *) p; 206 else if (skb) 207 return &ipv6_hdr(skb)->daddr; 208 return daddr; 209 } 210 211 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 212 struct sk_buff *skb, 213 const void *daddr) 214 { 215 struct rt6_info *rt = (struct rt6_info *) dst; 216 struct neighbour *n; 217 218 daddr = choose_neigh_daddr(rt, skb, daddr); 219 n = __ipv6_neigh_lookup(dst->dev, daddr); 220 if (n) 221 return n; 222 return neigh_create(&nd_tbl, daddr, dst->dev); 223 } 224 225 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) 226 { 227 struct net_device *dev = dst->dev; 228 struct rt6_info *rt = (struct rt6_info *)dst; 229 230 daddr = choose_neigh_daddr(rt, NULL, daddr); 231 if (!daddr) 232 return; 233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) 234 return; 235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) 236 return; 237 __ipv6_confirm_neigh(dev, daddr); 238 } 239 240 static struct dst_ops ip6_dst_ops_template = { 241 .family = AF_INET6, 242 .gc = ip6_dst_gc, 243 .gc_thresh = 1024, 244 .check = ip6_dst_check, 245 .default_advmss = ip6_default_advmss, 246 .mtu = ip6_mtu, 247 .cow_metrics = ipv6_cow_metrics, 248 .destroy = ip6_dst_destroy, 249 .ifdown = ip6_dst_ifdown, 250 .negative_advice = ip6_negative_advice, 251 .link_failure = ip6_link_failure, 252 .update_pmtu = ip6_rt_update_pmtu, 253 .redirect = rt6_do_redirect, 254 .local_out = __ip6_local_out, 255 .neigh_lookup = ip6_neigh_lookup, 256 .confirm_neigh = ip6_confirm_neigh, 257 }; 258 259 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 260 { 261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 262 263 return mtu ? : dst->dev->mtu; 264 } 265 266 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 267 struct sk_buff *skb, u32 mtu) 268 { 269 } 270 271 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 272 struct sk_buff *skb) 273 { 274 } 275 276 static struct dst_ops ip6_dst_blackhole_ops = { 277 .family = AF_INET6, 278 .destroy = ip6_dst_destroy, 279 .check = ip6_dst_check, 280 .mtu = ip6_blackhole_mtu, 281 .default_advmss = ip6_default_advmss, 282 .update_pmtu = ip6_rt_blackhole_update_pmtu, 283 .redirect = ip6_rt_blackhole_redirect, 284 .cow_metrics = dst_cow_metrics_generic, 285 .neigh_lookup = ip6_neigh_lookup, 286 }; 287 288 static const u32 ip6_template_metrics[RTAX_MAX] = { 289 [RTAX_HOPLIMIT - 1] = 0, 290 }; 291 292 static const struct rt6_info ip6_null_entry_template = { 293 .dst = { 294 .__refcnt = ATOMIC_INIT(1), 295 .__use = 1, 296 .obsolete = DST_OBSOLETE_FORCE_CHK, 297 .error = -ENETUNREACH, 298 .input = ip6_pkt_discard, 299 .output = ip6_pkt_discard_out, 300 }, 301 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 302 .rt6i_protocol = RTPROT_KERNEL, 303 .rt6i_metric = ~(u32) 0, 304 .rt6i_ref = ATOMIC_INIT(1), 305 }; 306 307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 308 309 static const struct rt6_info ip6_prohibit_entry_template = { 310 .dst = { 311 .__refcnt = ATOMIC_INIT(1), 312 .__use = 1, 313 .obsolete = DST_OBSOLETE_FORCE_CHK, 314 .error = -EACCES, 315 .input = ip6_pkt_prohibit, 316 .output = ip6_pkt_prohibit_out, 317 }, 318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 319 .rt6i_protocol = RTPROT_KERNEL, 320 .rt6i_metric = ~(u32) 0, 321 .rt6i_ref = ATOMIC_INIT(1), 322 }; 323 324 static const struct rt6_info ip6_blk_hole_entry_template = { 325 .dst = { 326 .__refcnt = ATOMIC_INIT(1), 327 .__use = 1, 328 .obsolete = DST_OBSOLETE_FORCE_CHK, 329 .error = -EINVAL, 330 .input = dst_discard, 331 .output = dst_discard_out, 332 }, 333 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 334 .rt6i_protocol = RTPROT_KERNEL, 335 .rt6i_metric = ~(u32) 0, 336 .rt6i_ref = ATOMIC_INIT(1), 337 }; 338 339 #endif 340 341 static void rt6_info_init(struct rt6_info *rt) 342 { 343 struct dst_entry *dst = &rt->dst; 344 345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 346 INIT_LIST_HEAD(&rt->rt6i_siblings); 347 INIT_LIST_HEAD(&rt->rt6i_uncached); 348 } 349 350 /* allocate dst with ip6_dst_ops */ 351 static struct rt6_info *__ip6_dst_alloc(struct net *net, 352 struct net_device *dev, 353 int flags) 354 { 355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 356 1, DST_OBSOLETE_FORCE_CHK, flags); 357 358 if (rt) 359 rt6_info_init(rt); 360 361 return rt; 362 } 363 364 struct rt6_info *ip6_dst_alloc(struct net *net, 365 struct net_device *dev, 366 int flags) 367 { 368 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); 369 370 if (rt) { 371 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); 372 if (rt->rt6i_pcpu) { 373 int cpu; 374 375 for_each_possible_cpu(cpu) { 376 struct rt6_info **p; 377 378 p = per_cpu_ptr(rt->rt6i_pcpu, cpu); 379 /* no one shares rt */ 380 *p = NULL; 381 } 382 } else { 383 dst_release_immediate(&rt->dst); 384 return NULL; 385 } 386 } 387 388 return rt; 389 } 390 EXPORT_SYMBOL(ip6_dst_alloc); 391 392 static void ip6_dst_destroy(struct dst_entry *dst) 393 { 394 struct rt6_info *rt = (struct rt6_info *)dst; 395 struct dst_entry *from = dst->from; 396 struct inet6_dev *idev; 397 398 dst_destroy_metrics_generic(dst); 399 free_percpu(rt->rt6i_pcpu); 400 rt6_uncached_list_del(rt); 401 402 idev = rt->rt6i_idev; 403 if (idev) { 404 rt->rt6i_idev = NULL; 405 in6_dev_put(idev); 406 } 407 408 dst->from = NULL; 409 dst_release(from); 410 } 411 412 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 413 int how) 414 { 415 struct rt6_info *rt = (struct rt6_info *)dst; 416 struct inet6_dev *idev = rt->rt6i_idev; 417 struct net_device *loopback_dev = 418 dev_net(dev)->loopback_dev; 419 420 if (idev && idev->dev != loopback_dev) { 421 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); 422 if (loopback_idev) { 423 rt->rt6i_idev = loopback_idev; 424 in6_dev_put(idev); 425 } 426 } 427 } 428 429 static bool __rt6_check_expired(const struct rt6_info *rt) 430 { 431 if (rt->rt6i_flags & RTF_EXPIRES) 432 return time_after(jiffies, rt->dst.expires); 433 else 434 return false; 435 } 436 437 static bool rt6_check_expired(const struct rt6_info *rt) 438 { 439 if (rt->rt6i_flags & RTF_EXPIRES) { 440 if (time_after(jiffies, rt->dst.expires)) 441 return true; 442 } else if (rt->dst.from) { 443 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || 444 rt6_check_expired((struct rt6_info *)rt->dst.from); 445 } 446 return false; 447 } 448 449 /* Multipath route selection: 450 * Hash based function using packet header and flowlabel. 451 * Adapted from fib_info_hashfn() 452 */ 453 static int rt6_info_hash_nhsfn(unsigned int candidate_count, 454 const struct flowi6 *fl6) 455 { 456 return get_hash_from_flowi6(fl6) % candidate_count; 457 } 458 459 static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 460 struct flowi6 *fl6, int oif, 461 int strict) 462 { 463 struct rt6_info *sibling, *next_sibling; 464 int route_choosen; 465 466 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); 467 /* Don't change the route, if route_choosen == 0 468 * (siblings does not include ourself) 469 */ 470 if (route_choosen) 471 list_for_each_entry_safe(sibling, next_sibling, 472 &match->rt6i_siblings, rt6i_siblings) { 473 route_choosen--; 474 if (route_choosen == 0) { 475 if (rt6_score_route(sibling, oif, strict) < 0) 476 break; 477 match = sibling; 478 break; 479 } 480 } 481 return match; 482 } 483 484 /* 485 * Route lookup. Any table->tb6_lock is implied. 486 */ 487 488 static inline struct rt6_info *rt6_device_match(struct net *net, 489 struct rt6_info *rt, 490 const struct in6_addr *saddr, 491 int oif, 492 int flags) 493 { 494 struct rt6_info *local = NULL; 495 struct rt6_info *sprt; 496 497 if (!oif && ipv6_addr_any(saddr)) 498 goto out; 499 500 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 501 struct net_device *dev = sprt->dst.dev; 502 503 if (oif) { 504 if (dev->ifindex == oif) 505 return sprt; 506 if (dev->flags & IFF_LOOPBACK) { 507 if (!sprt->rt6i_idev || 508 sprt->rt6i_idev->dev->ifindex != oif) { 509 if (flags & RT6_LOOKUP_F_IFACE) 510 continue; 511 if (local && 512 local->rt6i_idev->dev->ifindex == oif) 513 continue; 514 } 515 local = sprt; 516 } 517 } else { 518 if (ipv6_chk_addr(net, saddr, dev, 519 flags & RT6_LOOKUP_F_IFACE)) 520 return sprt; 521 } 522 } 523 524 if (oif) { 525 if (local) 526 return local; 527 528 if (flags & RT6_LOOKUP_F_IFACE) 529 return net->ipv6.ip6_null_entry; 530 } 531 out: 532 return rt; 533 } 534 535 #ifdef CONFIG_IPV6_ROUTER_PREF 536 struct __rt6_probe_work { 537 struct work_struct work; 538 struct in6_addr target; 539 struct net_device *dev; 540 }; 541 542 static void rt6_probe_deferred(struct work_struct *w) 543 { 544 struct in6_addr mcaddr; 545 struct __rt6_probe_work *work = 546 container_of(w, struct __rt6_probe_work, work); 547 548 addrconf_addr_solict_mult(&work->target, &mcaddr); 549 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); 550 dev_put(work->dev); 551 kfree(work); 552 } 553 554 static void rt6_probe(struct rt6_info *rt) 555 { 556 struct __rt6_probe_work *work; 557 struct neighbour *neigh; 558 /* 559 * Okay, this does not seem to be appropriate 560 * for now, however, we need to check if it 561 * is really so; aka Router Reachability Probing. 562 * 563 * Router Reachability Probe MUST be rate-limited 564 * to no more than one per minute. 565 */ 566 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) 567 return; 568 rcu_read_lock_bh(); 569 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 570 if (neigh) { 571 if (neigh->nud_state & NUD_VALID) 572 goto out; 573 574 work = NULL; 575 write_lock(&neigh->lock); 576 if (!(neigh->nud_state & NUD_VALID) && 577 time_after(jiffies, 578 neigh->updated + 579 rt->rt6i_idev->cnf.rtr_probe_interval)) { 580 work = kmalloc(sizeof(*work), GFP_ATOMIC); 581 if (work) 582 __neigh_set_probe_once(neigh); 583 } 584 write_unlock(&neigh->lock); 585 } else { 586 work = kmalloc(sizeof(*work), GFP_ATOMIC); 587 } 588 589 if (work) { 590 INIT_WORK(&work->work, rt6_probe_deferred); 591 work->target = rt->rt6i_gateway; 592 dev_hold(rt->dst.dev); 593 work->dev = rt->dst.dev; 594 schedule_work(&work->work); 595 } 596 597 out: 598 rcu_read_unlock_bh(); 599 } 600 #else 601 static inline void rt6_probe(struct rt6_info *rt) 602 { 603 } 604 #endif 605 606 /* 607 * Default Router Selection (RFC 2461 6.3.6) 608 */ 609 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 610 { 611 struct net_device *dev = rt->dst.dev; 612 if (!oif || dev->ifindex == oif) 613 return 2; 614 if ((dev->flags & IFF_LOOPBACK) && 615 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 616 return 1; 617 return 0; 618 } 619 620 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) 621 { 622 struct neighbour *neigh; 623 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 624 625 if (rt->rt6i_flags & RTF_NONEXTHOP || 626 !(rt->rt6i_flags & RTF_GATEWAY)) 627 return RT6_NUD_SUCCEED; 628 629 rcu_read_lock_bh(); 630 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 631 if (neigh) { 632 read_lock(&neigh->lock); 633 if (neigh->nud_state & NUD_VALID) 634 ret = RT6_NUD_SUCCEED; 635 #ifdef CONFIG_IPV6_ROUTER_PREF 636 else if (!(neigh->nud_state & NUD_FAILED)) 637 ret = RT6_NUD_SUCCEED; 638 else 639 ret = RT6_NUD_FAIL_PROBE; 640 #endif 641 read_unlock(&neigh->lock); 642 } else { 643 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 644 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 645 } 646 rcu_read_unlock_bh(); 647 648 return ret; 649 } 650 651 static int rt6_score_route(struct rt6_info *rt, int oif, 652 int strict) 653 { 654 int m; 655 656 m = rt6_check_dev(rt, oif); 657 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 658 return RT6_NUD_FAIL_HARD; 659 #ifdef CONFIG_IPV6_ROUTER_PREF 660 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 661 #endif 662 if (strict & RT6_LOOKUP_F_REACHABLE) { 663 int n = rt6_check_neigh(rt); 664 if (n < 0) 665 return n; 666 } 667 return m; 668 } 669 670 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 671 int *mpri, struct rt6_info *match, 672 bool *do_rr) 673 { 674 int m; 675 bool match_do_rr = false; 676 struct inet6_dev *idev = rt->rt6i_idev; 677 struct net_device *dev = rt->dst.dev; 678 679 if (dev && !netif_carrier_ok(dev) && 680 idev->cnf.ignore_routes_with_linkdown && 681 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) 682 goto out; 683 684 if (rt6_check_expired(rt)) 685 goto out; 686 687 m = rt6_score_route(rt, oif, strict); 688 if (m == RT6_NUD_FAIL_DO_RR) { 689 match_do_rr = true; 690 m = 0; /* lowest valid score */ 691 } else if (m == RT6_NUD_FAIL_HARD) { 692 goto out; 693 } 694 695 if (strict & RT6_LOOKUP_F_REACHABLE) 696 rt6_probe(rt); 697 698 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 699 if (m > *mpri) { 700 *do_rr = match_do_rr; 701 *mpri = m; 702 match = rt; 703 } 704 out: 705 return match; 706 } 707 708 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 709 struct rt6_info *rr_head, 710 u32 metric, int oif, int strict, 711 bool *do_rr) 712 { 713 struct rt6_info *rt, *match, *cont; 714 int mpri = -1; 715 716 match = NULL; 717 cont = NULL; 718 for (rt = rr_head; rt; rt = rt->dst.rt6_next) { 719 if (rt->rt6i_metric != metric) { 720 cont = rt; 721 break; 722 } 723 724 match = find_match(rt, oif, strict, &mpri, match, do_rr); 725 } 726 727 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { 728 if (rt->rt6i_metric != metric) { 729 cont = rt; 730 break; 731 } 732 733 match = find_match(rt, oif, strict, &mpri, match, do_rr); 734 } 735 736 if (match || !cont) 737 return match; 738 739 for (rt = cont; rt; rt = rt->dst.rt6_next) 740 match = find_match(rt, oif, strict, &mpri, match, do_rr); 741 742 return match; 743 } 744 745 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 746 { 747 struct rt6_info *match, *rt0; 748 struct net *net; 749 bool do_rr = false; 750 751 rt0 = fn->rr_ptr; 752 if (!rt0) 753 fn->rr_ptr = rt0 = fn->leaf; 754 755 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, 756 &do_rr); 757 758 if (do_rr) { 759 struct rt6_info *next = rt0->dst.rt6_next; 760 761 /* no entries matched; do round-robin */ 762 if (!next || next->rt6i_metric != rt0->rt6i_metric) 763 next = fn->leaf; 764 765 if (next != rt0) 766 fn->rr_ptr = next; 767 } 768 769 net = dev_net(rt0->dst.dev); 770 return match ? match : net->ipv6.ip6_null_entry; 771 } 772 773 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) 774 { 775 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); 776 } 777 778 #ifdef CONFIG_IPV6_ROUTE_INFO 779 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 780 const struct in6_addr *gwaddr) 781 { 782 struct net *net = dev_net(dev); 783 struct route_info *rinfo = (struct route_info *) opt; 784 struct in6_addr prefix_buf, *prefix; 785 unsigned int pref; 786 unsigned long lifetime; 787 struct rt6_info *rt; 788 789 if (len < sizeof(struct route_info)) { 790 return -EINVAL; 791 } 792 793 /* Sanity check for prefix_len and length */ 794 if (rinfo->length > 3) { 795 return -EINVAL; 796 } else if (rinfo->prefix_len > 128) { 797 return -EINVAL; 798 } else if (rinfo->prefix_len > 64) { 799 if (rinfo->length < 2) { 800 return -EINVAL; 801 } 802 } else if (rinfo->prefix_len > 0) { 803 if (rinfo->length < 1) { 804 return -EINVAL; 805 } 806 } 807 808 pref = rinfo->route_pref; 809 if (pref == ICMPV6_ROUTER_PREF_INVALID) 810 return -EINVAL; 811 812 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 813 814 if (rinfo->length == 3) 815 prefix = (struct in6_addr *)rinfo->prefix; 816 else { 817 /* this function is safe */ 818 ipv6_addr_prefix(&prefix_buf, 819 (struct in6_addr *)rinfo->prefix, 820 rinfo->prefix_len); 821 prefix = &prefix_buf; 822 } 823 824 if (rinfo->prefix_len == 0) 825 rt = rt6_get_dflt_router(gwaddr, dev); 826 else 827 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 828 gwaddr, dev); 829 830 if (rt && !lifetime) { 831 ip6_del_rt(rt); 832 rt = NULL; 833 } 834 835 if (!rt && lifetime) 836 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, 837 dev, pref); 838 else if (rt) 839 rt->rt6i_flags = RTF_ROUTEINFO | 840 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 841 842 if (rt) { 843 if (!addrconf_finite_timeout(lifetime)) 844 rt6_clean_expires(rt); 845 else 846 rt6_set_expires(rt, jiffies + HZ * lifetime); 847 848 ip6_rt_put(rt); 849 } 850 return 0; 851 } 852 #endif 853 854 static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 855 struct in6_addr *saddr) 856 { 857 struct fib6_node *pn; 858 while (1) { 859 if (fn->fn_flags & RTN_TL_ROOT) 860 return NULL; 861 pn = fn->parent; 862 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) 863 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); 864 else 865 fn = pn; 866 if (fn->fn_flags & RTN_RTINFO) 867 return fn; 868 } 869 } 870 871 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 872 struct fib6_table *table, 873 struct flowi6 *fl6, int flags) 874 { 875 struct fib6_node *fn; 876 struct rt6_info *rt; 877 878 read_lock_bh(&table->tb6_lock); 879 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 880 restart: 881 rt = fn->leaf; 882 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 883 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 884 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); 885 if (rt == net->ipv6.ip6_null_entry) { 886 fn = fib6_backtrack(fn, &fl6->saddr); 887 if (fn) 888 goto restart; 889 } 890 dst_use(&rt->dst, jiffies); 891 read_unlock_bh(&table->tb6_lock); 892 893 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 894 895 return rt; 896 897 } 898 899 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 900 int flags) 901 { 902 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 903 } 904 EXPORT_SYMBOL_GPL(ip6_route_lookup); 905 906 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 907 const struct in6_addr *saddr, int oif, int strict) 908 { 909 struct flowi6 fl6 = { 910 .flowi6_oif = oif, 911 .daddr = *daddr, 912 }; 913 struct dst_entry *dst; 914 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 915 916 if (saddr) { 917 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 918 flags |= RT6_LOOKUP_F_HAS_SADDR; 919 } 920 921 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 922 if (dst->error == 0) 923 return (struct rt6_info *) dst; 924 925 dst_release(dst); 926 927 return NULL; 928 } 929 EXPORT_SYMBOL(rt6_lookup); 930 931 /* ip6_ins_rt is called with FREE table->tb6_lock. 932 * It takes new route entry, the addition fails by any reason the 933 * route is released. 934 * Caller must hold dst before calling it. 935 */ 936 937 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 938 struct mx6_config *mxc, 939 struct netlink_ext_ack *extack) 940 { 941 int err; 942 struct fib6_table *table; 943 944 table = rt->rt6i_table; 945 write_lock_bh(&table->tb6_lock); 946 err = fib6_add(&table->tb6_root, rt, info, mxc, extack); 947 write_unlock_bh(&table->tb6_lock); 948 949 return err; 950 } 951 952 int ip6_ins_rt(struct rt6_info *rt) 953 { 954 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; 955 struct mx6_config mxc = { .mx = NULL, }; 956 957 /* Hold dst to account for the reference from the fib6 tree */ 958 dst_hold(&rt->dst); 959 return __ip6_ins_rt(rt, &info, &mxc, NULL); 960 } 961 962 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, 963 const struct in6_addr *daddr, 964 const struct in6_addr *saddr) 965 { 966 struct rt6_info *rt; 967 968 /* 969 * Clone the route. 970 */ 971 972 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 973 ort = (struct rt6_info *)ort->dst.from; 974 975 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); 976 977 if (!rt) 978 return NULL; 979 980 ip6_rt_copy_init(rt, ort); 981 rt->rt6i_flags |= RTF_CACHE; 982 rt->rt6i_metric = 0; 983 rt->dst.flags |= DST_HOST; 984 rt->rt6i_dst.addr = *daddr; 985 rt->rt6i_dst.plen = 128; 986 987 if (!rt6_is_gw_or_nonexthop(ort)) { 988 if (ort->rt6i_dst.plen != 128 && 989 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 990 rt->rt6i_flags |= RTF_ANYCAST; 991 #ifdef CONFIG_IPV6_SUBTREES 992 if (rt->rt6i_src.plen && saddr) { 993 rt->rt6i_src.addr = *saddr; 994 rt->rt6i_src.plen = 128; 995 } 996 #endif 997 } 998 999 return rt; 1000 } 1001 1002 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) 1003 { 1004 struct rt6_info *pcpu_rt; 1005 1006 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), 1007 rt->dst.dev, rt->dst.flags); 1008 1009 if (!pcpu_rt) 1010 return NULL; 1011 ip6_rt_copy_init(pcpu_rt, rt); 1012 pcpu_rt->rt6i_protocol = rt->rt6i_protocol; 1013 pcpu_rt->rt6i_flags |= RTF_PCPU; 1014 return pcpu_rt; 1015 } 1016 1017 /* It should be called with read_lock_bh(&tb6_lock) acquired */ 1018 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) 1019 { 1020 struct rt6_info *pcpu_rt, **p; 1021 1022 p = this_cpu_ptr(rt->rt6i_pcpu); 1023 pcpu_rt = *p; 1024 1025 if (pcpu_rt) { 1026 dst_hold(&pcpu_rt->dst); 1027 rt6_dst_from_metrics_check(pcpu_rt); 1028 } 1029 return pcpu_rt; 1030 } 1031 1032 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) 1033 { 1034 struct fib6_table *table = rt->rt6i_table; 1035 struct rt6_info *pcpu_rt, *prev, **p; 1036 1037 pcpu_rt = ip6_rt_pcpu_alloc(rt); 1038 if (!pcpu_rt) { 1039 struct net *net = dev_net(rt->dst.dev); 1040 1041 dst_hold(&net->ipv6.ip6_null_entry->dst); 1042 return net->ipv6.ip6_null_entry; 1043 } 1044 1045 read_lock_bh(&table->tb6_lock); 1046 if (rt->rt6i_pcpu) { 1047 p = this_cpu_ptr(rt->rt6i_pcpu); 1048 prev = cmpxchg(p, NULL, pcpu_rt); 1049 if (prev) { 1050 /* If someone did it before us, return prev instead */ 1051 dst_release_immediate(&pcpu_rt->dst); 1052 pcpu_rt = prev; 1053 } 1054 } else { 1055 /* rt has been removed from the fib6 tree 1056 * before we have a chance to acquire the read_lock. 1057 * In this case, don't brother to create a pcpu rt 1058 * since rt is going away anyway. The next 1059 * dst_check() will trigger a re-lookup. 1060 */ 1061 dst_release_immediate(&pcpu_rt->dst); 1062 pcpu_rt = rt; 1063 } 1064 dst_hold(&pcpu_rt->dst); 1065 rt6_dst_from_metrics_check(pcpu_rt); 1066 read_unlock_bh(&table->tb6_lock); 1067 return pcpu_rt; 1068 } 1069 1070 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 1071 int oif, struct flowi6 *fl6, int flags) 1072 { 1073 struct fib6_node *fn, *saved_fn; 1074 struct rt6_info *rt; 1075 int strict = 0; 1076 1077 strict |= flags & RT6_LOOKUP_F_IFACE; 1078 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; 1079 if (net->ipv6.devconf_all->forwarding == 0) 1080 strict |= RT6_LOOKUP_F_REACHABLE; 1081 1082 read_lock_bh(&table->tb6_lock); 1083 1084 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1085 saved_fn = fn; 1086 1087 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) 1088 oif = 0; 1089 1090 redo_rt6_select: 1091 rt = rt6_select(fn, oif, strict); 1092 if (rt->rt6i_nsiblings) 1093 rt = rt6_multipath_select(rt, fl6, oif, strict); 1094 if (rt == net->ipv6.ip6_null_entry) { 1095 fn = fib6_backtrack(fn, &fl6->saddr); 1096 if (fn) 1097 goto redo_rt6_select; 1098 else if (strict & RT6_LOOKUP_F_REACHABLE) { 1099 /* also consider unreachable route */ 1100 strict &= ~RT6_LOOKUP_F_REACHABLE; 1101 fn = saved_fn; 1102 goto redo_rt6_select; 1103 } 1104 } 1105 1106 1107 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { 1108 dst_use(&rt->dst, jiffies); 1109 read_unlock_bh(&table->tb6_lock); 1110 1111 rt6_dst_from_metrics_check(rt); 1112 1113 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 1114 return rt; 1115 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1116 !(rt->rt6i_flags & RTF_GATEWAY))) { 1117 /* Create a RTF_CACHE clone which will not be 1118 * owned by the fib6 tree. It is for the special case where 1119 * the daddr in the skb during the neighbor look-up is different 1120 * from the fl6->daddr used to look-up route here. 1121 */ 1122 1123 struct rt6_info *uncached_rt; 1124 1125 dst_use(&rt->dst, jiffies); 1126 read_unlock_bh(&table->tb6_lock); 1127 1128 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); 1129 dst_release(&rt->dst); 1130 1131 if (uncached_rt) { 1132 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() 1133 * No need for another dst_hold() 1134 */ 1135 rt6_uncached_list_add(uncached_rt); 1136 } else { 1137 uncached_rt = net->ipv6.ip6_null_entry; 1138 dst_hold(&uncached_rt->dst); 1139 } 1140 1141 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); 1142 return uncached_rt; 1143 1144 } else { 1145 /* Get a percpu copy */ 1146 1147 struct rt6_info *pcpu_rt; 1148 1149 rt->dst.lastuse = jiffies; 1150 rt->dst.__use++; 1151 pcpu_rt = rt6_get_pcpu_route(rt); 1152 1153 if (pcpu_rt) { 1154 read_unlock_bh(&table->tb6_lock); 1155 } else { 1156 /* We have to do the read_unlock first 1157 * because rt6_make_pcpu_route() may trigger 1158 * ip6_dst_gc() which will take the write_lock. 1159 */ 1160 dst_hold(&rt->dst); 1161 read_unlock_bh(&table->tb6_lock); 1162 pcpu_rt = rt6_make_pcpu_route(rt); 1163 dst_release(&rt->dst); 1164 } 1165 1166 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); 1167 return pcpu_rt; 1168 1169 } 1170 } 1171 EXPORT_SYMBOL_GPL(ip6_pol_route); 1172 1173 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 1174 struct flowi6 *fl6, int flags) 1175 { 1176 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 1177 } 1178 1179 struct dst_entry *ip6_route_input_lookup(struct net *net, 1180 struct net_device *dev, 1181 struct flowi6 *fl6, int flags) 1182 { 1183 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 1184 flags |= RT6_LOOKUP_F_IFACE; 1185 1186 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 1187 } 1188 EXPORT_SYMBOL_GPL(ip6_route_input_lookup); 1189 1190 void ip6_route_input(struct sk_buff *skb) 1191 { 1192 const struct ipv6hdr *iph = ipv6_hdr(skb); 1193 struct net *net = dev_net(skb->dev); 1194 int flags = RT6_LOOKUP_F_HAS_SADDR; 1195 struct ip_tunnel_info *tun_info; 1196 struct flowi6 fl6 = { 1197 .flowi6_iif = skb->dev->ifindex, 1198 .daddr = iph->daddr, 1199 .saddr = iph->saddr, 1200 .flowlabel = ip6_flowinfo(iph), 1201 .flowi6_mark = skb->mark, 1202 .flowi6_proto = iph->nexthdr, 1203 }; 1204 1205 tun_info = skb_tunnel_info(skb); 1206 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 1207 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; 1208 skb_dst_drop(skb); 1209 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 1210 } 1211 1212 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 1213 struct flowi6 *fl6, int flags) 1214 { 1215 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1216 } 1217 1218 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 1219 struct flowi6 *fl6, int flags) 1220 { 1221 bool any_src; 1222 1223 if (rt6_need_strict(&fl6->daddr)) { 1224 struct dst_entry *dst; 1225 1226 dst = l3mdev_link_scope_lookup(net, fl6); 1227 if (dst) 1228 return dst; 1229 } 1230 1231 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1232 1233 any_src = ipv6_addr_any(&fl6->saddr); 1234 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 1235 (fl6->flowi6_oif && any_src)) 1236 flags |= RT6_LOOKUP_F_IFACE; 1237 1238 if (!any_src) 1239 flags |= RT6_LOOKUP_F_HAS_SADDR; 1240 else if (sk) 1241 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1242 1243 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1244 } 1245 EXPORT_SYMBOL_GPL(ip6_route_output_flags); 1246 1247 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1248 { 1249 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 1250 struct net_device *loopback_dev = net->loopback_dev; 1251 struct dst_entry *new = NULL; 1252 1253 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, 1254 DST_OBSOLETE_NONE, 0); 1255 if (rt) { 1256 rt6_info_init(rt); 1257 1258 new = &rt->dst; 1259 new->__use = 1; 1260 new->input = dst_discard; 1261 new->output = dst_discard_out; 1262 1263 dst_copy_metrics(new, &ort->dst); 1264 1265 rt->rt6i_idev = in6_dev_get(loopback_dev); 1266 rt->rt6i_gateway = ort->rt6i_gateway; 1267 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; 1268 rt->rt6i_metric = 0; 1269 1270 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1271 #ifdef CONFIG_IPV6_SUBTREES 1272 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1273 #endif 1274 } 1275 1276 dst_release(dst_orig); 1277 return new ? new : ERR_PTR(-ENOMEM); 1278 } 1279 1280 /* 1281 * Destination cache support functions 1282 */ 1283 1284 static void rt6_dst_from_metrics_check(struct rt6_info *rt) 1285 { 1286 if (rt->dst.from && 1287 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) 1288 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); 1289 } 1290 1291 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) 1292 { 1293 u32 rt_cookie = 0; 1294 1295 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) 1296 return NULL; 1297 1298 if (rt6_check_expired(rt)) 1299 return NULL; 1300 1301 return &rt->dst; 1302 } 1303 1304 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1305 { 1306 if (!__rt6_check_expired(rt) && 1307 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1308 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1309 return &rt->dst; 1310 else 1311 return NULL; 1312 } 1313 1314 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1315 { 1316 struct rt6_info *rt; 1317 1318 rt = (struct rt6_info *) dst; 1319 1320 /* All IPV6 dsts are created with ->obsolete set to the value 1321 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1322 * into this function always. 1323 */ 1324 1325 rt6_dst_from_metrics_check(rt); 1326 1327 if (rt->rt6i_flags & RTF_PCPU || 1328 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) 1329 return rt6_dst_from_check(rt, cookie); 1330 else 1331 return rt6_check(rt, cookie); 1332 } 1333 1334 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1335 { 1336 struct rt6_info *rt = (struct rt6_info *) dst; 1337 1338 if (rt) { 1339 if (rt->rt6i_flags & RTF_CACHE) { 1340 if (rt6_check_expired(rt)) { 1341 ip6_del_rt(rt); 1342 dst = NULL; 1343 } 1344 } else { 1345 dst_release(dst); 1346 dst = NULL; 1347 } 1348 } 1349 return dst; 1350 } 1351 1352 static void ip6_link_failure(struct sk_buff *skb) 1353 { 1354 struct rt6_info *rt; 1355 1356 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1357 1358 rt = (struct rt6_info *) skb_dst(skb); 1359 if (rt) { 1360 if (rt->rt6i_flags & RTF_CACHE) { 1361 if (dst_hold_safe(&rt->dst)) 1362 ip6_del_rt(rt); 1363 } else { 1364 struct fib6_node *fn; 1365 1366 rcu_read_lock(); 1367 fn = rcu_dereference(rt->rt6i_node); 1368 if (fn && (rt->rt6i_flags & RTF_DEFAULT)) 1369 fn->fn_sernum = -1; 1370 rcu_read_unlock(); 1371 } 1372 } 1373 } 1374 1375 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) 1376 { 1377 struct net *net = dev_net(rt->dst.dev); 1378 1379 rt->rt6i_flags |= RTF_MODIFIED; 1380 rt->rt6i_pmtu = mtu; 1381 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 1382 } 1383 1384 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) 1385 { 1386 return !(rt->rt6i_flags & RTF_CACHE) && 1387 (rt->rt6i_flags & RTF_PCPU || 1388 rcu_access_pointer(rt->rt6i_node)); 1389 } 1390 1391 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1392 const struct ipv6hdr *iph, u32 mtu) 1393 { 1394 const struct in6_addr *daddr, *saddr; 1395 struct rt6_info *rt6 = (struct rt6_info *)dst; 1396 1397 if (rt6->rt6i_flags & RTF_LOCAL) 1398 return; 1399 1400 if (dst_metric_locked(dst, RTAX_MTU)) 1401 return; 1402 1403 if (iph) { 1404 daddr = &iph->daddr; 1405 saddr = &iph->saddr; 1406 } else if (sk) { 1407 daddr = &sk->sk_v6_daddr; 1408 saddr = &inet6_sk(sk)->saddr; 1409 } else { 1410 daddr = NULL; 1411 saddr = NULL; 1412 } 1413 dst_confirm_neigh(dst, daddr); 1414 mtu = max_t(u32, mtu, IPV6_MIN_MTU); 1415 if (mtu >= dst_mtu(dst)) 1416 return; 1417 1418 if (!rt6_cache_allowed_for_pmtu(rt6)) { 1419 rt6_do_update_pmtu(rt6, mtu); 1420 } else if (daddr) { 1421 struct rt6_info *nrt6; 1422 1423 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 1424 if (nrt6) { 1425 rt6_do_update_pmtu(nrt6, mtu); 1426 1427 /* ip6_ins_rt(nrt6) will bump the 1428 * rt6->rt6i_node->fn_sernum 1429 * which will fail the next rt6_check() and 1430 * invalidate the sk->sk_dst_cache. 1431 */ 1432 ip6_ins_rt(nrt6); 1433 /* Release the reference taken in 1434 * ip6_rt_cache_alloc() 1435 */ 1436 dst_release(&nrt6->dst); 1437 } 1438 } 1439 } 1440 1441 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1442 struct sk_buff *skb, u32 mtu) 1443 { 1444 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); 1445 } 1446 1447 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1448 int oif, u32 mark, kuid_t uid) 1449 { 1450 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1451 struct dst_entry *dst; 1452 struct flowi6 fl6; 1453 1454 memset(&fl6, 0, sizeof(fl6)); 1455 fl6.flowi6_oif = oif; 1456 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); 1457 fl6.daddr = iph->daddr; 1458 fl6.saddr = iph->saddr; 1459 fl6.flowlabel = ip6_flowinfo(iph); 1460 fl6.flowi6_uid = uid; 1461 1462 dst = ip6_route_output(net, NULL, &fl6); 1463 if (!dst->error) 1464 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); 1465 dst_release(dst); 1466 } 1467 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1468 1469 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1470 { 1471 struct dst_entry *dst; 1472 1473 ip6_update_pmtu(skb, sock_net(sk), mtu, 1474 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); 1475 1476 dst = __sk_dst_get(sk); 1477 if (!dst || !dst->obsolete || 1478 dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) 1479 return; 1480 1481 bh_lock_sock(sk); 1482 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) 1483 ip6_datagram_dst_update(sk, false); 1484 bh_unlock_sock(sk); 1485 } 1486 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1487 1488 /* Handle redirects */ 1489 struct ip6rd_flowi { 1490 struct flowi6 fl6; 1491 struct in6_addr gateway; 1492 }; 1493 1494 static struct rt6_info *__ip6_route_redirect(struct net *net, 1495 struct fib6_table *table, 1496 struct flowi6 *fl6, 1497 int flags) 1498 { 1499 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1500 struct rt6_info *rt; 1501 struct fib6_node *fn; 1502 1503 /* Get the "current" route for this destination and 1504 * check if the redirect has come from appropriate router. 1505 * 1506 * RFC 4861 specifies that redirects should only be 1507 * accepted if they come from the nexthop to the target. 1508 * Due to the way the routes are chosen, this notion 1509 * is a bit fuzzy and one might need to check all possible 1510 * routes. 1511 */ 1512 1513 read_lock_bh(&table->tb6_lock); 1514 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1515 restart: 1516 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1517 if (rt6_check_expired(rt)) 1518 continue; 1519 if (rt->dst.error) 1520 break; 1521 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1522 continue; 1523 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 1524 continue; 1525 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1526 continue; 1527 break; 1528 } 1529 1530 if (!rt) 1531 rt = net->ipv6.ip6_null_entry; 1532 else if (rt->dst.error) { 1533 rt = net->ipv6.ip6_null_entry; 1534 goto out; 1535 } 1536 1537 if (rt == net->ipv6.ip6_null_entry) { 1538 fn = fib6_backtrack(fn, &fl6->saddr); 1539 if (fn) 1540 goto restart; 1541 } 1542 1543 out: 1544 dst_hold(&rt->dst); 1545 1546 read_unlock_bh(&table->tb6_lock); 1547 1548 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 1549 return rt; 1550 }; 1551 1552 static struct dst_entry *ip6_route_redirect(struct net *net, 1553 const struct flowi6 *fl6, 1554 const struct in6_addr *gateway) 1555 { 1556 int flags = RT6_LOOKUP_F_HAS_SADDR; 1557 struct ip6rd_flowi rdfl; 1558 1559 rdfl.fl6 = *fl6; 1560 rdfl.gateway = *gateway; 1561 1562 return fib6_rule_lookup(net, &rdfl.fl6, 1563 flags, __ip6_route_redirect); 1564 } 1565 1566 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, 1567 kuid_t uid) 1568 { 1569 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1570 struct dst_entry *dst; 1571 struct flowi6 fl6; 1572 1573 memset(&fl6, 0, sizeof(fl6)); 1574 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1575 fl6.flowi6_oif = oif; 1576 fl6.flowi6_mark = mark; 1577 fl6.daddr = iph->daddr; 1578 fl6.saddr = iph->saddr; 1579 fl6.flowlabel = ip6_flowinfo(iph); 1580 fl6.flowi6_uid = uid; 1581 1582 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 1583 rt6_do_redirect(dst, NULL, skb); 1584 dst_release(dst); 1585 } 1586 EXPORT_SYMBOL_GPL(ip6_redirect); 1587 1588 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, 1589 u32 mark) 1590 { 1591 const struct ipv6hdr *iph = ipv6_hdr(skb); 1592 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 1593 struct dst_entry *dst; 1594 struct flowi6 fl6; 1595 1596 memset(&fl6, 0, sizeof(fl6)); 1597 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1598 fl6.flowi6_oif = oif; 1599 fl6.flowi6_mark = mark; 1600 fl6.daddr = msg->dest; 1601 fl6.saddr = iph->daddr; 1602 fl6.flowi6_uid = sock_net_uid(net, NULL); 1603 1604 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 1605 rt6_do_redirect(dst, NULL, skb); 1606 dst_release(dst); 1607 } 1608 1609 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1610 { 1611 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, 1612 sk->sk_uid); 1613 } 1614 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1615 1616 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1617 { 1618 struct net_device *dev = dst->dev; 1619 unsigned int mtu = dst_mtu(dst); 1620 struct net *net = dev_net(dev); 1621 1622 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1623 1624 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1625 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1626 1627 /* 1628 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1629 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1630 * IPV6_MAXPLEN is also valid and means: "any MSS, 1631 * rely only on pmtu discovery" 1632 */ 1633 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1634 mtu = IPV6_MAXPLEN; 1635 return mtu; 1636 } 1637 1638 static unsigned int ip6_mtu(const struct dst_entry *dst) 1639 { 1640 const struct rt6_info *rt = (const struct rt6_info *)dst; 1641 unsigned int mtu = rt->rt6i_pmtu; 1642 struct inet6_dev *idev; 1643 1644 if (mtu) 1645 goto out; 1646 1647 mtu = dst_metric_raw(dst, RTAX_MTU); 1648 if (mtu) 1649 goto out; 1650 1651 mtu = IPV6_MIN_MTU; 1652 1653 rcu_read_lock(); 1654 idev = __in6_dev_get(dst->dev); 1655 if (idev) 1656 mtu = idev->cnf.mtu6; 1657 rcu_read_unlock(); 1658 1659 out: 1660 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); 1661 1662 return mtu - lwtunnel_headroom(dst->lwtstate, mtu); 1663 } 1664 1665 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1666 struct flowi6 *fl6) 1667 { 1668 struct dst_entry *dst; 1669 struct rt6_info *rt; 1670 struct inet6_dev *idev = in6_dev_get(dev); 1671 struct net *net = dev_net(dev); 1672 1673 if (unlikely(!idev)) 1674 return ERR_PTR(-ENODEV); 1675 1676 rt = ip6_dst_alloc(net, dev, 0); 1677 if (unlikely(!rt)) { 1678 in6_dev_put(idev); 1679 dst = ERR_PTR(-ENOMEM); 1680 goto out; 1681 } 1682 1683 rt->dst.flags |= DST_HOST; 1684 rt->dst.output = ip6_output; 1685 rt->rt6i_gateway = fl6->daddr; 1686 rt->rt6i_dst.addr = fl6->daddr; 1687 rt->rt6i_dst.plen = 128; 1688 rt->rt6i_idev = idev; 1689 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 1690 1691 /* Add this dst into uncached_list so that rt6_ifdown() can 1692 * do proper release of the net_device 1693 */ 1694 rt6_uncached_list_add(rt); 1695 1696 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1697 1698 out: 1699 return dst; 1700 } 1701 1702 static int ip6_dst_gc(struct dst_ops *ops) 1703 { 1704 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1705 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1706 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1707 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1708 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1709 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1710 int entries; 1711 1712 entries = dst_entries_get_fast(ops); 1713 if (time_after(rt_last_gc + rt_min_interval, jiffies) && 1714 entries <= rt_max_size) 1715 goto out; 1716 1717 net->ipv6.ip6_rt_gc_expire++; 1718 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); 1719 entries = dst_entries_get_slow(ops); 1720 if (entries < ops->gc_thresh) 1721 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1722 out: 1723 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1724 return entries > rt_max_size; 1725 } 1726 1727 static int ip6_convert_metrics(struct mx6_config *mxc, 1728 const struct fib6_config *cfg) 1729 { 1730 bool ecn_ca = false; 1731 struct nlattr *nla; 1732 int remaining; 1733 u32 *mp; 1734 1735 if (!cfg->fc_mx) 1736 return 0; 1737 1738 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1739 if (unlikely(!mp)) 1740 return -ENOMEM; 1741 1742 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1743 int type = nla_type(nla); 1744 u32 val; 1745 1746 if (!type) 1747 continue; 1748 if (unlikely(type > RTAX_MAX)) 1749 goto err; 1750 1751 if (type == RTAX_CC_ALGO) { 1752 char tmp[TCP_CA_NAME_MAX]; 1753 1754 nla_strlcpy(tmp, nla, sizeof(tmp)); 1755 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 1756 if (val == TCP_CA_UNSPEC) 1757 goto err; 1758 } else { 1759 val = nla_get_u32(nla); 1760 } 1761 if (type == RTAX_HOPLIMIT && val > 255) 1762 val = 255; 1763 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 1764 goto err; 1765 1766 mp[type - 1] = val; 1767 __set_bit(type - 1, mxc->mx_valid); 1768 } 1769 1770 if (ecn_ca) { 1771 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); 1772 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; 1773 } 1774 1775 mxc->mx = mp; 1776 return 0; 1777 err: 1778 kfree(mp); 1779 return -EINVAL; 1780 } 1781 1782 static struct rt6_info *ip6_nh_lookup_table(struct net *net, 1783 struct fib6_config *cfg, 1784 const struct in6_addr *gw_addr) 1785 { 1786 struct flowi6 fl6 = { 1787 .flowi6_oif = cfg->fc_ifindex, 1788 .daddr = *gw_addr, 1789 .saddr = cfg->fc_prefsrc, 1790 }; 1791 struct fib6_table *table; 1792 struct rt6_info *rt; 1793 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE; 1794 1795 table = fib6_get_table(net, cfg->fc_table); 1796 if (!table) 1797 return NULL; 1798 1799 if (!ipv6_addr_any(&cfg->fc_prefsrc)) 1800 flags |= RT6_LOOKUP_F_HAS_SADDR; 1801 1802 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); 1803 1804 /* if table lookup failed, fall back to full lookup */ 1805 if (rt == net->ipv6.ip6_null_entry) { 1806 ip6_rt_put(rt); 1807 rt = NULL; 1808 } 1809 1810 return rt; 1811 } 1812 1813 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, 1814 struct netlink_ext_ack *extack) 1815 { 1816 struct net *net = cfg->fc_nlinfo.nl_net; 1817 struct rt6_info *rt = NULL; 1818 struct net_device *dev = NULL; 1819 struct inet6_dev *idev = NULL; 1820 struct fib6_table *table; 1821 int addr_type; 1822 int err = -EINVAL; 1823 1824 /* RTF_PCPU is an internal flag; can not be set by userspace */ 1825 if (cfg->fc_flags & RTF_PCPU) { 1826 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); 1827 goto out; 1828 } 1829 1830 if (cfg->fc_dst_len > 128) { 1831 NL_SET_ERR_MSG(extack, "Invalid prefix length"); 1832 goto out; 1833 } 1834 if (cfg->fc_src_len > 128) { 1835 NL_SET_ERR_MSG(extack, "Invalid source address length"); 1836 goto out; 1837 } 1838 #ifndef CONFIG_IPV6_SUBTREES 1839 if (cfg->fc_src_len) { 1840 NL_SET_ERR_MSG(extack, 1841 "Specifying source address requires IPV6_SUBTREES to be enabled"); 1842 goto out; 1843 } 1844 #endif 1845 if (cfg->fc_ifindex) { 1846 err = -ENODEV; 1847 dev = dev_get_by_index(net, cfg->fc_ifindex); 1848 if (!dev) 1849 goto out; 1850 idev = in6_dev_get(dev); 1851 if (!idev) 1852 goto out; 1853 } 1854 1855 if (cfg->fc_metric == 0) 1856 cfg->fc_metric = IP6_RT_PRIO_USER; 1857 1858 err = -ENOBUFS; 1859 if (cfg->fc_nlinfo.nlh && 1860 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1861 table = fib6_get_table(net, cfg->fc_table); 1862 if (!table) { 1863 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1864 table = fib6_new_table(net, cfg->fc_table); 1865 } 1866 } else { 1867 table = fib6_new_table(net, cfg->fc_table); 1868 } 1869 1870 if (!table) 1871 goto out; 1872 1873 rt = ip6_dst_alloc(net, NULL, 1874 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); 1875 1876 if (!rt) { 1877 err = -ENOMEM; 1878 goto out; 1879 } 1880 1881 if (cfg->fc_flags & RTF_EXPIRES) 1882 rt6_set_expires(rt, jiffies + 1883 clock_t_to_jiffies(cfg->fc_expires)); 1884 else 1885 rt6_clean_expires(rt); 1886 1887 if (cfg->fc_protocol == RTPROT_UNSPEC) 1888 cfg->fc_protocol = RTPROT_BOOT; 1889 rt->rt6i_protocol = cfg->fc_protocol; 1890 1891 addr_type = ipv6_addr_type(&cfg->fc_dst); 1892 1893 if (addr_type & IPV6_ADDR_MULTICAST) 1894 rt->dst.input = ip6_mc_input; 1895 else if (cfg->fc_flags & RTF_LOCAL) 1896 rt->dst.input = ip6_input; 1897 else 1898 rt->dst.input = ip6_forward; 1899 1900 rt->dst.output = ip6_output; 1901 1902 if (cfg->fc_encap) { 1903 struct lwtunnel_state *lwtstate; 1904 1905 err = lwtunnel_build_state(cfg->fc_encap_type, 1906 cfg->fc_encap, AF_INET6, cfg, 1907 &lwtstate, extack); 1908 if (err) 1909 goto out; 1910 rt->dst.lwtstate = lwtstate_get(lwtstate); 1911 if (lwtunnel_output_redirect(rt->dst.lwtstate)) { 1912 rt->dst.lwtstate->orig_output = rt->dst.output; 1913 rt->dst.output = lwtunnel_output; 1914 } 1915 if (lwtunnel_input_redirect(rt->dst.lwtstate)) { 1916 rt->dst.lwtstate->orig_input = rt->dst.input; 1917 rt->dst.input = lwtunnel_input; 1918 } 1919 } 1920 1921 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1922 rt->rt6i_dst.plen = cfg->fc_dst_len; 1923 if (rt->rt6i_dst.plen == 128) 1924 rt->dst.flags |= DST_HOST; 1925 1926 #ifdef CONFIG_IPV6_SUBTREES 1927 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1928 rt->rt6i_src.plen = cfg->fc_src_len; 1929 #endif 1930 1931 rt->rt6i_metric = cfg->fc_metric; 1932 1933 /* We cannot add true routes via loopback here, 1934 they would result in kernel looping; promote them to reject routes 1935 */ 1936 if ((cfg->fc_flags & RTF_REJECT) || 1937 (dev && (dev->flags & IFF_LOOPBACK) && 1938 !(addr_type & IPV6_ADDR_LOOPBACK) && 1939 !(cfg->fc_flags & RTF_LOCAL))) { 1940 /* hold loopback dev/idev if we haven't done so. */ 1941 if (dev != net->loopback_dev) { 1942 if (dev) { 1943 dev_put(dev); 1944 in6_dev_put(idev); 1945 } 1946 dev = net->loopback_dev; 1947 dev_hold(dev); 1948 idev = in6_dev_get(dev); 1949 if (!idev) { 1950 err = -ENODEV; 1951 goto out; 1952 } 1953 } 1954 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1955 switch (cfg->fc_type) { 1956 case RTN_BLACKHOLE: 1957 rt->dst.error = -EINVAL; 1958 rt->dst.output = dst_discard_out; 1959 rt->dst.input = dst_discard; 1960 break; 1961 case RTN_PROHIBIT: 1962 rt->dst.error = -EACCES; 1963 rt->dst.output = ip6_pkt_prohibit_out; 1964 rt->dst.input = ip6_pkt_prohibit; 1965 break; 1966 case RTN_THROW: 1967 case RTN_UNREACHABLE: 1968 default: 1969 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 1970 : (cfg->fc_type == RTN_UNREACHABLE) 1971 ? -EHOSTUNREACH : -ENETUNREACH; 1972 rt->dst.output = ip6_pkt_discard_out; 1973 rt->dst.input = ip6_pkt_discard; 1974 break; 1975 } 1976 goto install_route; 1977 } 1978 1979 if (cfg->fc_flags & RTF_GATEWAY) { 1980 const struct in6_addr *gw_addr; 1981 int gwa_type; 1982 1983 gw_addr = &cfg->fc_gateway; 1984 gwa_type = ipv6_addr_type(gw_addr); 1985 1986 /* if gw_addr is local we will fail to detect this in case 1987 * address is still TENTATIVE (DAD in progress). rt6_lookup() 1988 * will return already-added prefix route via interface that 1989 * prefix route was assigned to, which might be non-loopback. 1990 */ 1991 err = -EINVAL; 1992 if (ipv6_chk_addr_and_flags(net, gw_addr, 1993 gwa_type & IPV6_ADDR_LINKLOCAL ? 1994 dev : NULL, 0, 0)) { 1995 NL_SET_ERR_MSG(extack, "Invalid gateway address"); 1996 goto out; 1997 } 1998 rt->rt6i_gateway = *gw_addr; 1999 2000 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 2001 struct rt6_info *grt = NULL; 2002 2003 /* IPv6 strictly inhibits using not link-local 2004 addresses as nexthop address. 2005 Otherwise, router will not able to send redirects. 2006 It is very good, but in some (rare!) circumstances 2007 (SIT, PtP, NBMA NOARP links) it is handy to allow 2008 some exceptions. --ANK 2009 We allow IPv4-mapped nexthops to support RFC4798-type 2010 addressing 2011 */ 2012 if (!(gwa_type & (IPV6_ADDR_UNICAST | 2013 IPV6_ADDR_MAPPED))) { 2014 NL_SET_ERR_MSG(extack, 2015 "Invalid gateway address"); 2016 goto out; 2017 } 2018 2019 if (cfg->fc_table) { 2020 grt = ip6_nh_lookup_table(net, cfg, gw_addr); 2021 2022 if (grt) { 2023 if (grt->rt6i_flags & RTF_GATEWAY || 2024 (dev && dev != grt->dst.dev)) { 2025 ip6_rt_put(grt); 2026 grt = NULL; 2027 } 2028 } 2029 } 2030 2031 if (!grt) 2032 grt = rt6_lookup(net, gw_addr, NULL, 2033 cfg->fc_ifindex, 1); 2034 2035 err = -EHOSTUNREACH; 2036 if (!grt) 2037 goto out; 2038 if (dev) { 2039 if (dev != grt->dst.dev) { 2040 ip6_rt_put(grt); 2041 goto out; 2042 } 2043 } else { 2044 dev = grt->dst.dev; 2045 idev = grt->rt6i_idev; 2046 dev_hold(dev); 2047 in6_dev_hold(grt->rt6i_idev); 2048 } 2049 if (!(grt->rt6i_flags & RTF_GATEWAY)) 2050 err = 0; 2051 ip6_rt_put(grt); 2052 2053 if (err) 2054 goto out; 2055 } 2056 err = -EINVAL; 2057 if (!dev) { 2058 NL_SET_ERR_MSG(extack, "Egress device not specified"); 2059 goto out; 2060 } else if (dev->flags & IFF_LOOPBACK) { 2061 NL_SET_ERR_MSG(extack, 2062 "Egress device can not be loopback device for this route"); 2063 goto out; 2064 } 2065 } 2066 2067 err = -ENODEV; 2068 if (!dev) 2069 goto out; 2070 2071 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 2072 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 2073 NL_SET_ERR_MSG(extack, "Invalid source address"); 2074 err = -EINVAL; 2075 goto out; 2076 } 2077 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 2078 rt->rt6i_prefsrc.plen = 128; 2079 } else 2080 rt->rt6i_prefsrc.plen = 0; 2081 2082 rt->rt6i_flags = cfg->fc_flags; 2083 2084 install_route: 2085 rt->dst.dev = dev; 2086 rt->rt6i_idev = idev; 2087 rt->rt6i_table = table; 2088 2089 cfg->fc_nlinfo.nl_net = dev_net(dev); 2090 2091 return rt; 2092 out: 2093 if (dev) 2094 dev_put(dev); 2095 if (idev) 2096 in6_dev_put(idev); 2097 if (rt) 2098 dst_release_immediate(&rt->dst); 2099 2100 return ERR_PTR(err); 2101 } 2102 2103 int ip6_route_add(struct fib6_config *cfg, 2104 struct netlink_ext_ack *extack) 2105 { 2106 struct mx6_config mxc = { .mx = NULL, }; 2107 struct rt6_info *rt; 2108 int err; 2109 2110 rt = ip6_route_info_create(cfg, extack); 2111 if (IS_ERR(rt)) { 2112 err = PTR_ERR(rt); 2113 rt = NULL; 2114 goto out; 2115 } 2116 2117 err = ip6_convert_metrics(&mxc, cfg); 2118 if (err) 2119 goto out; 2120 2121 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); 2122 2123 kfree(mxc.mx); 2124 2125 return err; 2126 out: 2127 if (rt) 2128 dst_release_immediate(&rt->dst); 2129 2130 return err; 2131 } 2132 2133 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 2134 { 2135 int err; 2136 struct fib6_table *table; 2137 struct net *net = dev_net(rt->dst.dev); 2138 2139 if (rt == net->ipv6.ip6_null_entry) { 2140 err = -ENOENT; 2141 goto out; 2142 } 2143 2144 table = rt->rt6i_table; 2145 write_lock_bh(&table->tb6_lock); 2146 err = fib6_del(rt, info); 2147 write_unlock_bh(&table->tb6_lock); 2148 2149 out: 2150 ip6_rt_put(rt); 2151 return err; 2152 } 2153 2154 int ip6_del_rt(struct rt6_info *rt) 2155 { 2156 struct nl_info info = { 2157 .nl_net = dev_net(rt->dst.dev), 2158 }; 2159 return __ip6_del_rt(rt, &info); 2160 } 2161 2162 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) 2163 { 2164 struct nl_info *info = &cfg->fc_nlinfo; 2165 struct net *net = info->nl_net; 2166 struct sk_buff *skb = NULL; 2167 struct fib6_table *table; 2168 int err = -ENOENT; 2169 2170 if (rt == net->ipv6.ip6_null_entry) 2171 goto out_put; 2172 table = rt->rt6i_table; 2173 write_lock_bh(&table->tb6_lock); 2174 2175 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { 2176 struct rt6_info *sibling, *next_sibling; 2177 2178 /* prefer to send a single notification with all hops */ 2179 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 2180 if (skb) { 2181 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2182 2183 if (rt6_fill_node(net, skb, rt, 2184 NULL, NULL, 0, RTM_DELROUTE, 2185 info->portid, seq, 0) < 0) { 2186 kfree_skb(skb); 2187 skb = NULL; 2188 } else 2189 info->skip_notify = 1; 2190 } 2191 2192 list_for_each_entry_safe(sibling, next_sibling, 2193 &rt->rt6i_siblings, 2194 rt6i_siblings) { 2195 err = fib6_del(sibling, info); 2196 if (err) 2197 goto out_unlock; 2198 } 2199 } 2200 2201 err = fib6_del(rt, info); 2202 out_unlock: 2203 write_unlock_bh(&table->tb6_lock); 2204 out_put: 2205 ip6_rt_put(rt); 2206 2207 if (skb) { 2208 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 2209 info->nlh, gfp_any()); 2210 } 2211 return err; 2212 } 2213 2214 static int ip6_route_del(struct fib6_config *cfg, 2215 struct netlink_ext_ack *extack) 2216 { 2217 struct fib6_table *table; 2218 struct fib6_node *fn; 2219 struct rt6_info *rt; 2220 int err = -ESRCH; 2221 2222 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 2223 if (!table) { 2224 NL_SET_ERR_MSG(extack, "FIB table does not exist"); 2225 return err; 2226 } 2227 2228 read_lock_bh(&table->tb6_lock); 2229 2230 fn = fib6_locate(&table->tb6_root, 2231 &cfg->fc_dst, cfg->fc_dst_len, 2232 &cfg->fc_src, cfg->fc_src_len); 2233 2234 if (fn) { 2235 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2236 if ((rt->rt6i_flags & RTF_CACHE) && 2237 !(cfg->fc_flags & RTF_CACHE)) 2238 continue; 2239 if (cfg->fc_ifindex && 2240 (!rt->dst.dev || 2241 rt->dst.dev->ifindex != cfg->fc_ifindex)) 2242 continue; 2243 if (cfg->fc_flags & RTF_GATEWAY && 2244 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 2245 continue; 2246 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 2247 continue; 2248 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) 2249 continue; 2250 dst_hold(&rt->dst); 2251 read_unlock_bh(&table->tb6_lock); 2252 2253 /* if gateway was specified only delete the one hop */ 2254 if (cfg->fc_flags & RTF_GATEWAY) 2255 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 2256 2257 return __ip6_del_rt_siblings(rt, cfg); 2258 } 2259 } 2260 read_unlock_bh(&table->tb6_lock); 2261 2262 return err; 2263 } 2264 2265 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 2266 { 2267 struct netevent_redirect netevent; 2268 struct rt6_info *rt, *nrt = NULL; 2269 struct ndisc_options ndopts; 2270 struct inet6_dev *in6_dev; 2271 struct neighbour *neigh; 2272 struct rd_msg *msg; 2273 int optlen, on_link; 2274 u8 *lladdr; 2275 2276 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 2277 optlen -= sizeof(*msg); 2278 2279 if (optlen < 0) { 2280 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 2281 return; 2282 } 2283 2284 msg = (struct rd_msg *)icmp6_hdr(skb); 2285 2286 if (ipv6_addr_is_multicast(&msg->dest)) { 2287 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 2288 return; 2289 } 2290 2291 on_link = 0; 2292 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 2293 on_link = 1; 2294 } else if (ipv6_addr_type(&msg->target) != 2295 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 2296 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 2297 return; 2298 } 2299 2300 in6_dev = __in6_dev_get(skb->dev); 2301 if (!in6_dev) 2302 return; 2303 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 2304 return; 2305 2306 /* RFC2461 8.1: 2307 * The IP source address of the Redirect MUST be the same as the current 2308 * first-hop router for the specified ICMP Destination Address. 2309 */ 2310 2311 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) { 2312 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 2313 return; 2314 } 2315 2316 lladdr = NULL; 2317 if (ndopts.nd_opts_tgt_lladdr) { 2318 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 2319 skb->dev); 2320 if (!lladdr) { 2321 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 2322 return; 2323 } 2324 } 2325 2326 rt = (struct rt6_info *) dst; 2327 if (rt->rt6i_flags & RTF_REJECT) { 2328 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 2329 return; 2330 } 2331 2332 /* Redirect received -> path was valid. 2333 * Look, redirects are sent only in response to data packets, 2334 * so that this nexthop apparently is reachable. --ANK 2335 */ 2336 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr); 2337 2338 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 2339 if (!neigh) 2340 return; 2341 2342 /* 2343 * We have finally decided to accept it. 2344 */ 2345 2346 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, 2347 NEIGH_UPDATE_F_WEAK_OVERRIDE| 2348 NEIGH_UPDATE_F_OVERRIDE| 2349 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 2350 NEIGH_UPDATE_F_ISROUTER)), 2351 NDISC_REDIRECT, &ndopts); 2352 2353 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); 2354 if (!nrt) 2355 goto out; 2356 2357 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 2358 if (on_link) 2359 nrt->rt6i_flags &= ~RTF_GATEWAY; 2360 2361 nrt->rt6i_protocol = RTPROT_REDIRECT; 2362 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 2363 2364 if (ip6_ins_rt(nrt)) 2365 goto out_release; 2366 2367 netevent.old = &rt->dst; 2368 netevent.new = &nrt->dst; 2369 netevent.daddr = &msg->dest; 2370 netevent.neigh = neigh; 2371 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 2372 2373 if (rt->rt6i_flags & RTF_CACHE) { 2374 rt = (struct rt6_info *) dst_clone(&rt->dst); 2375 ip6_del_rt(rt); 2376 } 2377 2378 out_release: 2379 /* Release the reference taken in 2380 * ip6_rt_cache_alloc() 2381 */ 2382 dst_release(&nrt->dst); 2383 2384 out: 2385 neigh_release(neigh); 2386 } 2387 2388 /* 2389 * Misc support functions 2390 */ 2391 2392 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) 2393 { 2394 BUG_ON(from->dst.from); 2395 2396 rt->rt6i_flags &= ~RTF_EXPIRES; 2397 dst_hold(&from->dst); 2398 rt->dst.from = &from->dst; 2399 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); 2400 } 2401 2402 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) 2403 { 2404 rt->dst.input = ort->dst.input; 2405 rt->dst.output = ort->dst.output; 2406 rt->rt6i_dst = ort->rt6i_dst; 2407 rt->dst.error = ort->dst.error; 2408 rt->rt6i_idev = ort->rt6i_idev; 2409 if (rt->rt6i_idev) 2410 in6_dev_hold(rt->rt6i_idev); 2411 rt->dst.lastuse = jiffies; 2412 rt->rt6i_gateway = ort->rt6i_gateway; 2413 rt->rt6i_flags = ort->rt6i_flags; 2414 rt6_set_from(rt, ort); 2415 rt->rt6i_metric = ort->rt6i_metric; 2416 #ifdef CONFIG_IPV6_SUBTREES 2417 rt->rt6i_src = ort->rt6i_src; 2418 #endif 2419 rt->rt6i_prefsrc = ort->rt6i_prefsrc; 2420 rt->rt6i_table = ort->rt6i_table; 2421 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); 2422 } 2423 2424 #ifdef CONFIG_IPV6_ROUTE_INFO 2425 static struct rt6_info *rt6_get_route_info(struct net *net, 2426 const struct in6_addr *prefix, int prefixlen, 2427 const struct in6_addr *gwaddr, 2428 struct net_device *dev) 2429 { 2430 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; 2431 int ifindex = dev->ifindex; 2432 struct fib6_node *fn; 2433 struct rt6_info *rt = NULL; 2434 struct fib6_table *table; 2435 2436 table = fib6_get_table(net, tb_id); 2437 if (!table) 2438 return NULL; 2439 2440 read_lock_bh(&table->tb6_lock); 2441 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); 2442 if (!fn) 2443 goto out; 2444 2445 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2446 if (rt->dst.dev->ifindex != ifindex) 2447 continue; 2448 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 2449 continue; 2450 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 2451 continue; 2452 dst_hold(&rt->dst); 2453 break; 2454 } 2455 out: 2456 read_unlock_bh(&table->tb6_lock); 2457 return rt; 2458 } 2459 2460 static struct rt6_info *rt6_add_route_info(struct net *net, 2461 const struct in6_addr *prefix, int prefixlen, 2462 const struct in6_addr *gwaddr, 2463 struct net_device *dev, 2464 unsigned int pref) 2465 { 2466 struct fib6_config cfg = { 2467 .fc_metric = IP6_RT_PRIO_USER, 2468 .fc_ifindex = dev->ifindex, 2469 .fc_dst_len = prefixlen, 2470 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 2471 RTF_UP | RTF_PREF(pref), 2472 .fc_protocol = RTPROT_RA, 2473 .fc_nlinfo.portid = 0, 2474 .fc_nlinfo.nlh = NULL, 2475 .fc_nlinfo.nl_net = net, 2476 }; 2477 2478 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, 2479 cfg.fc_dst = *prefix; 2480 cfg.fc_gateway = *gwaddr; 2481 2482 /* We should treat it as a default route if prefix length is 0. */ 2483 if (!prefixlen) 2484 cfg.fc_flags |= RTF_DEFAULT; 2485 2486 ip6_route_add(&cfg, NULL); 2487 2488 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); 2489 } 2490 #endif 2491 2492 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 2493 { 2494 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; 2495 struct rt6_info *rt; 2496 struct fib6_table *table; 2497 2498 table = fib6_get_table(dev_net(dev), tb_id); 2499 if (!table) 2500 return NULL; 2501 2502 read_lock_bh(&table->tb6_lock); 2503 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2504 if (dev == rt->dst.dev && 2505 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 2506 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 2507 break; 2508 } 2509 if (rt) 2510 dst_hold(&rt->dst); 2511 read_unlock_bh(&table->tb6_lock); 2512 return rt; 2513 } 2514 2515 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 2516 struct net_device *dev, 2517 unsigned int pref) 2518 { 2519 struct fib6_config cfg = { 2520 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, 2521 .fc_metric = IP6_RT_PRIO_USER, 2522 .fc_ifindex = dev->ifindex, 2523 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 2524 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 2525 .fc_protocol = RTPROT_RA, 2526 .fc_nlinfo.portid = 0, 2527 .fc_nlinfo.nlh = NULL, 2528 .fc_nlinfo.nl_net = dev_net(dev), 2529 }; 2530 2531 cfg.fc_gateway = *gwaddr; 2532 2533 if (!ip6_route_add(&cfg, NULL)) { 2534 struct fib6_table *table; 2535 2536 table = fib6_get_table(dev_net(dev), cfg.fc_table); 2537 if (table) 2538 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; 2539 } 2540 2541 return rt6_get_dflt_router(gwaddr, dev); 2542 } 2543 2544 static void __rt6_purge_dflt_routers(struct fib6_table *table) 2545 { 2546 struct rt6_info *rt; 2547 2548 restart: 2549 read_lock_bh(&table->tb6_lock); 2550 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2551 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 2552 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 2553 dst_hold(&rt->dst); 2554 read_unlock_bh(&table->tb6_lock); 2555 ip6_del_rt(rt); 2556 goto restart; 2557 } 2558 } 2559 read_unlock_bh(&table->tb6_lock); 2560 2561 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; 2562 } 2563 2564 void rt6_purge_dflt_routers(struct net *net) 2565 { 2566 struct fib6_table *table; 2567 struct hlist_head *head; 2568 unsigned int h; 2569 2570 rcu_read_lock(); 2571 2572 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 2573 head = &net->ipv6.fib_table_hash[h]; 2574 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 2575 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) 2576 __rt6_purge_dflt_routers(table); 2577 } 2578 } 2579 2580 rcu_read_unlock(); 2581 } 2582 2583 static void rtmsg_to_fib6_config(struct net *net, 2584 struct in6_rtmsg *rtmsg, 2585 struct fib6_config *cfg) 2586 { 2587 memset(cfg, 0, sizeof(*cfg)); 2588 2589 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? 2590 : RT6_TABLE_MAIN; 2591 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 2592 cfg->fc_metric = rtmsg->rtmsg_metric; 2593 cfg->fc_expires = rtmsg->rtmsg_info; 2594 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 2595 cfg->fc_src_len = rtmsg->rtmsg_src_len; 2596 cfg->fc_flags = rtmsg->rtmsg_flags; 2597 2598 cfg->fc_nlinfo.nl_net = net; 2599 2600 cfg->fc_dst = rtmsg->rtmsg_dst; 2601 cfg->fc_src = rtmsg->rtmsg_src; 2602 cfg->fc_gateway = rtmsg->rtmsg_gateway; 2603 } 2604 2605 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 2606 { 2607 struct fib6_config cfg; 2608 struct in6_rtmsg rtmsg; 2609 int err; 2610 2611 switch (cmd) { 2612 case SIOCADDRT: /* Add a route */ 2613 case SIOCDELRT: /* Delete a route */ 2614 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2615 return -EPERM; 2616 err = copy_from_user(&rtmsg, arg, 2617 sizeof(struct in6_rtmsg)); 2618 if (err) 2619 return -EFAULT; 2620 2621 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 2622 2623 rtnl_lock(); 2624 switch (cmd) { 2625 case SIOCADDRT: 2626 err = ip6_route_add(&cfg, NULL); 2627 break; 2628 case SIOCDELRT: 2629 err = ip6_route_del(&cfg, NULL); 2630 break; 2631 default: 2632 err = -EINVAL; 2633 } 2634 rtnl_unlock(); 2635 2636 return err; 2637 } 2638 2639 return -EINVAL; 2640 } 2641 2642 /* 2643 * Drop the packet on the floor 2644 */ 2645 2646 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2647 { 2648 int type; 2649 struct dst_entry *dst = skb_dst(skb); 2650 switch (ipstats_mib_noroutes) { 2651 case IPSTATS_MIB_INNOROUTES: 2652 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2653 if (type == IPV6_ADDR_ANY) { 2654 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2655 IPSTATS_MIB_INADDRERRORS); 2656 break; 2657 } 2658 /* FALLTHROUGH */ 2659 case IPSTATS_MIB_OUTNOROUTES: 2660 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2661 ipstats_mib_noroutes); 2662 break; 2663 } 2664 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2665 kfree_skb(skb); 2666 return 0; 2667 } 2668 2669 static int ip6_pkt_discard(struct sk_buff *skb) 2670 { 2671 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2672 } 2673 2674 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) 2675 { 2676 skb->dev = skb_dst(skb)->dev; 2677 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2678 } 2679 2680 static int ip6_pkt_prohibit(struct sk_buff *skb) 2681 { 2682 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2683 } 2684 2685 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb) 2686 { 2687 skb->dev = skb_dst(skb)->dev; 2688 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2689 } 2690 2691 /* 2692 * Allocate a dst for local (unicast / anycast) address. 2693 */ 2694 2695 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2696 const struct in6_addr *addr, 2697 bool anycast) 2698 { 2699 u32 tb_id; 2700 struct net *net = dev_net(idev->dev); 2701 struct net_device *dev = net->loopback_dev; 2702 struct rt6_info *rt; 2703 2704 /* use L3 Master device as loopback for host routes if device 2705 * is enslaved and address is not link local or multicast 2706 */ 2707 if (!rt6_need_strict(addr)) 2708 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev; 2709 2710 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); 2711 if (!rt) 2712 return ERR_PTR(-ENOMEM); 2713 2714 in6_dev_hold(idev); 2715 2716 rt->dst.flags |= DST_HOST; 2717 rt->dst.input = ip6_input; 2718 rt->dst.output = ip6_output; 2719 rt->rt6i_idev = idev; 2720 2721 rt->rt6i_protocol = RTPROT_KERNEL; 2722 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2723 if (anycast) 2724 rt->rt6i_flags |= RTF_ANYCAST; 2725 else 2726 rt->rt6i_flags |= RTF_LOCAL; 2727 2728 rt->rt6i_gateway = *addr; 2729 rt->rt6i_dst.addr = *addr; 2730 rt->rt6i_dst.plen = 128; 2731 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; 2732 rt->rt6i_table = fib6_get_table(net, tb_id); 2733 2734 return rt; 2735 } 2736 2737 /* remove deleted ip from prefsrc entries */ 2738 struct arg_dev_net_ip { 2739 struct net_device *dev; 2740 struct net *net; 2741 struct in6_addr *addr; 2742 }; 2743 2744 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2745 { 2746 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2747 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2748 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2749 2750 if (((void *)rt->dst.dev == dev || !dev) && 2751 rt != net->ipv6.ip6_null_entry && 2752 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2753 /* remove prefsrc entry */ 2754 rt->rt6i_prefsrc.plen = 0; 2755 } 2756 return 0; 2757 } 2758 2759 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2760 { 2761 struct net *net = dev_net(ifp->idev->dev); 2762 struct arg_dev_net_ip adni = { 2763 .dev = ifp->idev->dev, 2764 .net = net, 2765 .addr = &ifp->addr, 2766 }; 2767 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 2768 } 2769 2770 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 2771 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 2772 2773 /* Remove routers and update dst entries when gateway turn into host. */ 2774 static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 2775 { 2776 struct in6_addr *gateway = (struct in6_addr *)arg; 2777 2778 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || 2779 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && 2780 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { 2781 return -1; 2782 } 2783 return 0; 2784 } 2785 2786 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 2787 { 2788 fib6_clean_all(net, fib6_clean_tohost, gateway); 2789 } 2790 2791 struct arg_dev_net { 2792 struct net_device *dev; 2793 struct net *net; 2794 }; 2795 2796 /* called with write lock held for table with rt */ 2797 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2798 { 2799 const struct arg_dev_net *adn = arg; 2800 const struct net_device *dev = adn->dev; 2801 2802 if ((rt->dst.dev == dev || !dev) && 2803 rt != adn->net->ipv6.ip6_null_entry && 2804 (rt->rt6i_nsiblings == 0 || 2805 (dev && netdev_unregistering(dev)) || 2806 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) 2807 return -1; 2808 2809 return 0; 2810 } 2811 2812 void rt6_ifdown(struct net *net, struct net_device *dev) 2813 { 2814 struct arg_dev_net adn = { 2815 .dev = dev, 2816 .net = net, 2817 }; 2818 2819 fib6_clean_all(net, fib6_ifdown, &adn); 2820 if (dev) 2821 rt6_uncached_list_flush_dev(net, dev); 2822 } 2823 2824 struct rt6_mtu_change_arg { 2825 struct net_device *dev; 2826 unsigned int mtu; 2827 }; 2828 2829 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2830 { 2831 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2832 struct inet6_dev *idev; 2833 2834 /* In IPv6 pmtu discovery is not optional, 2835 so that RTAX_MTU lock cannot disable it. 2836 We still use this lock to block changes 2837 caused by addrconf/ndisc. 2838 */ 2839 2840 idev = __in6_dev_get(arg->dev); 2841 if (!idev) 2842 return 0; 2843 2844 /* For administrative MTU increase, there is no way to discover 2845 IPv6 PMTU increase, so PMTU increase should be updated here. 2846 Since RFC 1981 doesn't include administrative MTU increase 2847 update PMTU increase is a MUST. (i.e. jumbo frame) 2848 */ 2849 /* 2850 If new MTU is less than route PMTU, this new MTU will be the 2851 lowest MTU in the path, update the route PMTU to reflect PMTU 2852 decreases; if new MTU is greater than route PMTU, and the 2853 old MTU is the lowest MTU in the path, update the route PMTU 2854 to reflect the increase. In this case if the other nodes' MTU 2855 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2856 PMTU discovery. 2857 */ 2858 if (rt->dst.dev == arg->dev && 2859 dst_metric_raw(&rt->dst, RTAX_MTU) && 2860 !dst_metric_locked(&rt->dst, RTAX_MTU)) { 2861 if (rt->rt6i_flags & RTF_CACHE) { 2862 /* For RTF_CACHE with rt6i_pmtu == 0 2863 * (i.e. a redirected route), 2864 * the metrics of its rt->dst.from has already 2865 * been updated. 2866 */ 2867 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) 2868 rt->rt6i_pmtu = arg->mtu; 2869 } else if (dst_mtu(&rt->dst) >= arg->mtu || 2870 (dst_mtu(&rt->dst) < arg->mtu && 2871 dst_mtu(&rt->dst) == idev->cnf.mtu6)) { 2872 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2873 } 2874 } 2875 return 0; 2876 } 2877 2878 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2879 { 2880 struct rt6_mtu_change_arg arg = { 2881 .dev = dev, 2882 .mtu = mtu, 2883 }; 2884 2885 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 2886 } 2887 2888 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2889 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2890 [RTA_OIF] = { .type = NLA_U32 }, 2891 [RTA_IIF] = { .type = NLA_U32 }, 2892 [RTA_PRIORITY] = { .type = NLA_U32 }, 2893 [RTA_METRICS] = { .type = NLA_NESTED }, 2894 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2895 [RTA_PREF] = { .type = NLA_U8 }, 2896 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 2897 [RTA_ENCAP] = { .type = NLA_NESTED }, 2898 [RTA_EXPIRES] = { .type = NLA_U32 }, 2899 [RTA_UID] = { .type = NLA_U32 }, 2900 [RTA_MARK] = { .type = NLA_U32 }, 2901 }; 2902 2903 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2904 struct fib6_config *cfg, 2905 struct netlink_ext_ack *extack) 2906 { 2907 struct rtmsg *rtm; 2908 struct nlattr *tb[RTA_MAX+1]; 2909 unsigned int pref; 2910 int err; 2911 2912 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, 2913 NULL); 2914 if (err < 0) 2915 goto errout; 2916 2917 err = -EINVAL; 2918 rtm = nlmsg_data(nlh); 2919 memset(cfg, 0, sizeof(*cfg)); 2920 2921 cfg->fc_table = rtm->rtm_table; 2922 cfg->fc_dst_len = rtm->rtm_dst_len; 2923 cfg->fc_src_len = rtm->rtm_src_len; 2924 cfg->fc_flags = RTF_UP; 2925 cfg->fc_protocol = rtm->rtm_protocol; 2926 cfg->fc_type = rtm->rtm_type; 2927 2928 if (rtm->rtm_type == RTN_UNREACHABLE || 2929 rtm->rtm_type == RTN_BLACKHOLE || 2930 rtm->rtm_type == RTN_PROHIBIT || 2931 rtm->rtm_type == RTN_THROW) 2932 cfg->fc_flags |= RTF_REJECT; 2933 2934 if (rtm->rtm_type == RTN_LOCAL) 2935 cfg->fc_flags |= RTF_LOCAL; 2936 2937 if (rtm->rtm_flags & RTM_F_CLONED) 2938 cfg->fc_flags |= RTF_CACHE; 2939 2940 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2941 cfg->fc_nlinfo.nlh = nlh; 2942 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2943 2944 if (tb[RTA_GATEWAY]) { 2945 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 2946 cfg->fc_flags |= RTF_GATEWAY; 2947 } 2948 2949 if (tb[RTA_DST]) { 2950 int plen = (rtm->rtm_dst_len + 7) >> 3; 2951 2952 if (nla_len(tb[RTA_DST]) < plen) 2953 goto errout; 2954 2955 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2956 } 2957 2958 if (tb[RTA_SRC]) { 2959 int plen = (rtm->rtm_src_len + 7) >> 3; 2960 2961 if (nla_len(tb[RTA_SRC]) < plen) 2962 goto errout; 2963 2964 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2965 } 2966 2967 if (tb[RTA_PREFSRC]) 2968 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 2969 2970 if (tb[RTA_OIF]) 2971 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2972 2973 if (tb[RTA_PRIORITY]) 2974 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2975 2976 if (tb[RTA_METRICS]) { 2977 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2978 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2979 } 2980 2981 if (tb[RTA_TABLE]) 2982 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2983 2984 if (tb[RTA_MULTIPATH]) { 2985 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 2986 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 2987 2988 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, 2989 cfg->fc_mp_len, extack); 2990 if (err < 0) 2991 goto errout; 2992 } 2993 2994 if (tb[RTA_PREF]) { 2995 pref = nla_get_u8(tb[RTA_PREF]); 2996 if (pref != ICMPV6_ROUTER_PREF_LOW && 2997 pref != ICMPV6_ROUTER_PREF_HIGH) 2998 pref = ICMPV6_ROUTER_PREF_MEDIUM; 2999 cfg->fc_flags |= RTF_PREF(pref); 3000 } 3001 3002 if (tb[RTA_ENCAP]) 3003 cfg->fc_encap = tb[RTA_ENCAP]; 3004 3005 if (tb[RTA_ENCAP_TYPE]) { 3006 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); 3007 3008 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); 3009 if (err < 0) 3010 goto errout; 3011 } 3012 3013 if (tb[RTA_EXPIRES]) { 3014 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); 3015 3016 if (addrconf_finite_timeout(timeout)) { 3017 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ); 3018 cfg->fc_flags |= RTF_EXPIRES; 3019 } 3020 } 3021 3022 err = 0; 3023 errout: 3024 return err; 3025 } 3026 3027 struct rt6_nh { 3028 struct rt6_info *rt6_info; 3029 struct fib6_config r_cfg; 3030 struct mx6_config mxc; 3031 struct list_head next; 3032 }; 3033 3034 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) 3035 { 3036 struct rt6_nh *nh; 3037 3038 list_for_each_entry(nh, rt6_nh_list, next) { 3039 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n", 3040 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, 3041 nh->r_cfg.fc_ifindex); 3042 } 3043 } 3044 3045 static int ip6_route_info_append(struct list_head *rt6_nh_list, 3046 struct rt6_info *rt, struct fib6_config *r_cfg) 3047 { 3048 struct rt6_nh *nh; 3049 int err = -EEXIST; 3050 3051 list_for_each_entry(nh, rt6_nh_list, next) { 3052 /* check if rt6_info already exists */ 3053 if (rt6_duplicate_nexthop(nh->rt6_info, rt)) 3054 return err; 3055 } 3056 3057 nh = kzalloc(sizeof(*nh), GFP_KERNEL); 3058 if (!nh) 3059 return -ENOMEM; 3060 nh->rt6_info = rt; 3061 err = ip6_convert_metrics(&nh->mxc, r_cfg); 3062 if (err) { 3063 kfree(nh); 3064 return err; 3065 } 3066 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); 3067 list_add_tail(&nh->next, rt6_nh_list); 3068 3069 return 0; 3070 } 3071 3072 static void ip6_route_mpath_notify(struct rt6_info *rt, 3073 struct rt6_info *rt_last, 3074 struct nl_info *info, 3075 __u16 nlflags) 3076 { 3077 /* if this is an APPEND route, then rt points to the first route 3078 * inserted and rt_last points to last route inserted. Userspace 3079 * wants a consistent dump of the route which starts at the first 3080 * nexthop. Since sibling routes are always added at the end of 3081 * the list, find the first sibling of the last route appended 3082 */ 3083 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { 3084 rt = list_first_entry(&rt_last->rt6i_siblings, 3085 struct rt6_info, 3086 rt6i_siblings); 3087 } 3088 3089 if (rt) 3090 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 3091 } 3092 3093 static int ip6_route_multipath_add(struct fib6_config *cfg, 3094 struct netlink_ext_ack *extack) 3095 { 3096 struct rt6_info *rt_notif = NULL, *rt_last = NULL; 3097 struct nl_info *info = &cfg->fc_nlinfo; 3098 struct fib6_config r_cfg; 3099 struct rtnexthop *rtnh; 3100 struct rt6_info *rt; 3101 struct rt6_nh *err_nh; 3102 struct rt6_nh *nh, *nh_safe; 3103 __u16 nlflags; 3104 int remaining; 3105 int attrlen; 3106 int err = 1; 3107 int nhn = 0; 3108 int replace = (cfg->fc_nlinfo.nlh && 3109 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 3110 LIST_HEAD(rt6_nh_list); 3111 3112 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; 3113 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) 3114 nlflags |= NLM_F_APPEND; 3115 3116 remaining = cfg->fc_mp_len; 3117 rtnh = (struct rtnexthop *)cfg->fc_mp; 3118 3119 /* Parse a Multipath Entry and build a list (rt6_nh_list) of 3120 * rt6_info structs per nexthop 3121 */ 3122 while (rtnh_ok(rtnh, remaining)) { 3123 memcpy(&r_cfg, cfg, sizeof(*cfg)); 3124 if (rtnh->rtnh_ifindex) 3125 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 3126 3127 attrlen = rtnh_attrlen(rtnh); 3128 if (attrlen > 0) { 3129 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 3130 3131 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 3132 if (nla) { 3133 r_cfg.fc_gateway = nla_get_in6_addr(nla); 3134 r_cfg.fc_flags |= RTF_GATEWAY; 3135 } 3136 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 3137 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 3138 if (nla) 3139 r_cfg.fc_encap_type = nla_get_u16(nla); 3140 } 3141 3142 rt = ip6_route_info_create(&r_cfg, extack); 3143 if (IS_ERR(rt)) { 3144 err = PTR_ERR(rt); 3145 rt = NULL; 3146 goto cleanup; 3147 } 3148 3149 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 3150 if (err) { 3151 dst_release_immediate(&rt->dst); 3152 goto cleanup; 3153 } 3154 3155 rtnh = rtnh_next(rtnh, &remaining); 3156 } 3157 3158 /* for add and replace send one notification with all nexthops. 3159 * Skip the notification in fib6_add_rt2node and send one with 3160 * the full route when done 3161 */ 3162 info->skip_notify = 1; 3163 3164 err_nh = NULL; 3165 list_for_each_entry(nh, &rt6_nh_list, next) { 3166 rt_last = nh->rt6_info; 3167 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); 3168 /* save reference to first route for notification */ 3169 if (!rt_notif && !err) 3170 rt_notif = nh->rt6_info; 3171 3172 /* nh->rt6_info is used or freed at this point, reset to NULL*/ 3173 nh->rt6_info = NULL; 3174 if (err) { 3175 if (replace && nhn) 3176 ip6_print_replace_route_err(&rt6_nh_list); 3177 err_nh = nh; 3178 goto add_errout; 3179 } 3180 3181 /* Because each route is added like a single route we remove 3182 * these flags after the first nexthop: if there is a collision, 3183 * we have already failed to add the first nexthop: 3184 * fib6_add_rt2node() has rejected it; when replacing, old 3185 * nexthops have been replaced by first new, the rest should 3186 * be added to it. 3187 */ 3188 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 3189 NLM_F_REPLACE); 3190 nhn++; 3191 } 3192 3193 /* success ... tell user about new route */ 3194 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 3195 goto cleanup; 3196 3197 add_errout: 3198 /* send notification for routes that were added so that 3199 * the delete notifications sent by ip6_route_del are 3200 * coherent 3201 */ 3202 if (rt_notif) 3203 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 3204 3205 /* Delete routes that were already added */ 3206 list_for_each_entry(nh, &rt6_nh_list, next) { 3207 if (err_nh == nh) 3208 break; 3209 ip6_route_del(&nh->r_cfg, extack); 3210 } 3211 3212 cleanup: 3213 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { 3214 if (nh->rt6_info) 3215 dst_release_immediate(&nh->rt6_info->dst); 3216 kfree(nh->mxc.mx); 3217 list_del(&nh->next); 3218 kfree(nh); 3219 } 3220 3221 return err; 3222 } 3223 3224 static int ip6_route_multipath_del(struct fib6_config *cfg, 3225 struct netlink_ext_ack *extack) 3226 { 3227 struct fib6_config r_cfg; 3228 struct rtnexthop *rtnh; 3229 int remaining; 3230 int attrlen; 3231 int err = 1, last_err = 0; 3232 3233 remaining = cfg->fc_mp_len; 3234 rtnh = (struct rtnexthop *)cfg->fc_mp; 3235 3236 /* Parse a Multipath Entry */ 3237 while (rtnh_ok(rtnh, remaining)) { 3238 memcpy(&r_cfg, cfg, sizeof(*cfg)); 3239 if (rtnh->rtnh_ifindex) 3240 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 3241 3242 attrlen = rtnh_attrlen(rtnh); 3243 if (attrlen > 0) { 3244 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 3245 3246 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 3247 if (nla) { 3248 nla_memcpy(&r_cfg.fc_gateway, nla, 16); 3249 r_cfg.fc_flags |= RTF_GATEWAY; 3250 } 3251 } 3252 err = ip6_route_del(&r_cfg, extack); 3253 if (err) 3254 last_err = err; 3255 3256 rtnh = rtnh_next(rtnh, &remaining); 3257 } 3258 3259 return last_err; 3260 } 3261 3262 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 3263 struct netlink_ext_ack *extack) 3264 { 3265 struct fib6_config cfg; 3266 int err; 3267 3268 err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 3269 if (err < 0) 3270 return err; 3271 3272 if (cfg.fc_mp) 3273 return ip6_route_multipath_del(&cfg, extack); 3274 else { 3275 cfg.fc_delete_all_nh = 1; 3276 return ip6_route_del(&cfg, extack); 3277 } 3278 } 3279 3280 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 3281 struct netlink_ext_ack *extack) 3282 { 3283 struct fib6_config cfg; 3284 int err; 3285 3286 err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 3287 if (err < 0) 3288 return err; 3289 3290 if (cfg.fc_mp) 3291 return ip6_route_multipath_add(&cfg, extack); 3292 else 3293 return ip6_route_add(&cfg, extack); 3294 } 3295 3296 static size_t rt6_nlmsg_size(struct rt6_info *rt) 3297 { 3298 int nexthop_len = 0; 3299 3300 if (rt->rt6i_nsiblings) { 3301 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ 3302 + NLA_ALIGN(sizeof(struct rtnexthop)) 3303 + nla_total_size(16) /* RTA_GATEWAY */ 3304 + lwtunnel_get_encap_size(rt->dst.lwtstate); 3305 3306 nexthop_len *= rt->rt6i_nsiblings; 3307 } 3308 3309 return NLMSG_ALIGN(sizeof(struct rtmsg)) 3310 + nla_total_size(16) /* RTA_SRC */ 3311 + nla_total_size(16) /* RTA_DST */ 3312 + nla_total_size(16) /* RTA_GATEWAY */ 3313 + nla_total_size(16) /* RTA_PREFSRC */ 3314 + nla_total_size(4) /* RTA_TABLE */ 3315 + nla_total_size(4) /* RTA_IIF */ 3316 + nla_total_size(4) /* RTA_OIF */ 3317 + nla_total_size(4) /* RTA_PRIORITY */ 3318 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 3319 + nla_total_size(sizeof(struct rta_cacheinfo)) 3320 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 3321 + nla_total_size(1) /* RTA_PREF */ 3322 + lwtunnel_get_encap_size(rt->dst.lwtstate) 3323 + nexthop_len; 3324 } 3325 3326 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, 3327 unsigned int *flags, bool skip_oif) 3328 { 3329 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { 3330 *flags |= RTNH_F_LINKDOWN; 3331 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) 3332 *flags |= RTNH_F_DEAD; 3333 } 3334 3335 if (rt->rt6i_flags & RTF_GATEWAY) { 3336 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) 3337 goto nla_put_failure; 3338 } 3339 3340 /* not needed for multipath encoding b/c it has a rtnexthop struct */ 3341 if (!skip_oif && rt->dst.dev && 3342 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 3343 goto nla_put_failure; 3344 3345 if (rt->dst.lwtstate && 3346 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) 3347 goto nla_put_failure; 3348 3349 return 0; 3350 3351 nla_put_failure: 3352 return -EMSGSIZE; 3353 } 3354 3355 /* add multipath next hop */ 3356 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) 3357 { 3358 struct rtnexthop *rtnh; 3359 unsigned int flags = 0; 3360 3361 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 3362 if (!rtnh) 3363 goto nla_put_failure; 3364 3365 rtnh->rtnh_hops = 0; 3366 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; 3367 3368 if (rt6_nexthop_info(skb, rt, &flags, true) < 0) 3369 goto nla_put_failure; 3370 3371 rtnh->rtnh_flags = flags; 3372 3373 /* length of rtnetlink header + attributes */ 3374 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 3375 3376 return 0; 3377 3378 nla_put_failure: 3379 return -EMSGSIZE; 3380 } 3381 3382 static int rt6_fill_node(struct net *net, 3383 struct sk_buff *skb, struct rt6_info *rt, 3384 struct in6_addr *dst, struct in6_addr *src, 3385 int iif, int type, u32 portid, u32 seq, 3386 unsigned int flags) 3387 { 3388 u32 metrics[RTAX_MAX]; 3389 struct rtmsg *rtm; 3390 struct nlmsghdr *nlh; 3391 long expires; 3392 u32 table; 3393 3394 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 3395 if (!nlh) 3396 return -EMSGSIZE; 3397 3398 rtm = nlmsg_data(nlh); 3399 rtm->rtm_family = AF_INET6; 3400 rtm->rtm_dst_len = rt->rt6i_dst.plen; 3401 rtm->rtm_src_len = rt->rt6i_src.plen; 3402 rtm->rtm_tos = 0; 3403 if (rt->rt6i_table) 3404 table = rt->rt6i_table->tb6_id; 3405 else 3406 table = RT6_TABLE_UNSPEC; 3407 rtm->rtm_table = table; 3408 if (nla_put_u32(skb, RTA_TABLE, table)) 3409 goto nla_put_failure; 3410 if (rt->rt6i_flags & RTF_REJECT) { 3411 switch (rt->dst.error) { 3412 case -EINVAL: 3413 rtm->rtm_type = RTN_BLACKHOLE; 3414 break; 3415 case -EACCES: 3416 rtm->rtm_type = RTN_PROHIBIT; 3417 break; 3418 case -EAGAIN: 3419 rtm->rtm_type = RTN_THROW; 3420 break; 3421 default: 3422 rtm->rtm_type = RTN_UNREACHABLE; 3423 break; 3424 } 3425 } 3426 else if (rt->rt6i_flags & RTF_LOCAL) 3427 rtm->rtm_type = RTN_LOCAL; 3428 else if (rt->rt6i_flags & RTF_ANYCAST) 3429 rtm->rtm_type = RTN_ANYCAST; 3430 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 3431 rtm->rtm_type = RTN_LOCAL; 3432 else 3433 rtm->rtm_type = RTN_UNICAST; 3434 rtm->rtm_flags = 0; 3435 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 3436 rtm->rtm_protocol = rt->rt6i_protocol; 3437 3438 if (rt->rt6i_flags & RTF_CACHE) 3439 rtm->rtm_flags |= RTM_F_CLONED; 3440 3441 if (dst) { 3442 if (nla_put_in6_addr(skb, RTA_DST, dst)) 3443 goto nla_put_failure; 3444 rtm->rtm_dst_len = 128; 3445 } else if (rtm->rtm_dst_len) 3446 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) 3447 goto nla_put_failure; 3448 #ifdef CONFIG_IPV6_SUBTREES 3449 if (src) { 3450 if (nla_put_in6_addr(skb, RTA_SRC, src)) 3451 goto nla_put_failure; 3452 rtm->rtm_src_len = 128; 3453 } else if (rtm->rtm_src_len && 3454 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) 3455 goto nla_put_failure; 3456 #endif 3457 if (iif) { 3458 #ifdef CONFIG_IPV6_MROUTE 3459 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 3460 int err = ip6mr_get_route(net, skb, rtm, portid); 3461 3462 if (err == 0) 3463 return 0; 3464 if (err < 0) 3465 goto nla_put_failure; 3466 } else 3467 #endif 3468 if (nla_put_u32(skb, RTA_IIF, iif)) 3469 goto nla_put_failure; 3470 } else if (dst) { 3471 struct in6_addr saddr_buf; 3472 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 3473 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 3474 goto nla_put_failure; 3475 } 3476 3477 if (rt->rt6i_prefsrc.plen) { 3478 struct in6_addr saddr_buf; 3479 saddr_buf = rt->rt6i_prefsrc.addr; 3480 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 3481 goto nla_put_failure; 3482 } 3483 3484 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 3485 if (rt->rt6i_pmtu) 3486 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; 3487 if (rtnetlink_put_metrics(skb, metrics) < 0) 3488 goto nla_put_failure; 3489 3490 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 3491 goto nla_put_failure; 3492 3493 /* For multipath routes, walk the siblings list and add 3494 * each as a nexthop within RTA_MULTIPATH. 3495 */ 3496 if (rt->rt6i_nsiblings) { 3497 struct rt6_info *sibling, *next_sibling; 3498 struct nlattr *mp; 3499 3500 mp = nla_nest_start(skb, RTA_MULTIPATH); 3501 if (!mp) 3502 goto nla_put_failure; 3503 3504 if (rt6_add_nexthop(skb, rt) < 0) 3505 goto nla_put_failure; 3506 3507 list_for_each_entry_safe(sibling, next_sibling, 3508 &rt->rt6i_siblings, rt6i_siblings) { 3509 if (rt6_add_nexthop(skb, sibling) < 0) 3510 goto nla_put_failure; 3511 } 3512 3513 nla_nest_end(skb, mp); 3514 } else { 3515 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) 3516 goto nla_put_failure; 3517 } 3518 3519 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 3520 3521 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 3522 goto nla_put_failure; 3523 3524 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) 3525 goto nla_put_failure; 3526 3527 3528 nlmsg_end(skb, nlh); 3529 return 0; 3530 3531 nla_put_failure: 3532 nlmsg_cancel(skb, nlh); 3533 return -EMSGSIZE; 3534 } 3535 3536 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 3537 { 3538 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 3539 struct net *net = arg->net; 3540 3541 if (rt == net->ipv6.ip6_null_entry) 3542 return 0; 3543 3544 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 3545 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 3546 3547 /* user wants prefix routes only */ 3548 if (rtm->rtm_flags & RTM_F_PREFIX && 3549 !(rt->rt6i_flags & RTF_PREFIX_RT)) { 3550 /* success since this is not a prefix route */ 3551 return 1; 3552 } 3553 } 3554 3555 return rt6_fill_node(net, 3556 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 3557 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 3558 NLM_F_MULTI); 3559 } 3560 3561 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 3562 struct netlink_ext_ack *extack) 3563 { 3564 struct net *net = sock_net(in_skb->sk); 3565 struct nlattr *tb[RTA_MAX+1]; 3566 int err, iif = 0, oif = 0; 3567 struct dst_entry *dst; 3568 struct rt6_info *rt; 3569 struct sk_buff *skb; 3570 struct rtmsg *rtm; 3571 struct flowi6 fl6; 3572 bool fibmatch; 3573 3574 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, 3575 extack); 3576 if (err < 0) 3577 goto errout; 3578 3579 err = -EINVAL; 3580 memset(&fl6, 0, sizeof(fl6)); 3581 rtm = nlmsg_data(nlh); 3582 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); 3583 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); 3584 3585 if (tb[RTA_SRC]) { 3586 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 3587 goto errout; 3588 3589 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 3590 } 3591 3592 if (tb[RTA_DST]) { 3593 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 3594 goto errout; 3595 3596 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 3597 } 3598 3599 if (tb[RTA_IIF]) 3600 iif = nla_get_u32(tb[RTA_IIF]); 3601 3602 if (tb[RTA_OIF]) 3603 oif = nla_get_u32(tb[RTA_OIF]); 3604 3605 if (tb[RTA_MARK]) 3606 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 3607 3608 if (tb[RTA_UID]) 3609 fl6.flowi6_uid = make_kuid(current_user_ns(), 3610 nla_get_u32(tb[RTA_UID])); 3611 else 3612 fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); 3613 3614 if (iif) { 3615 struct net_device *dev; 3616 int flags = 0; 3617 3618 dev = __dev_get_by_index(net, iif); 3619 if (!dev) { 3620 err = -ENODEV; 3621 goto errout; 3622 } 3623 3624 fl6.flowi6_iif = iif; 3625 3626 if (!ipv6_addr_any(&fl6.saddr)) 3627 flags |= RT6_LOOKUP_F_HAS_SADDR; 3628 3629 if (!fibmatch) 3630 dst = ip6_route_input_lookup(net, dev, &fl6, flags); 3631 } else { 3632 fl6.flowi6_oif = oif; 3633 3634 if (!fibmatch) 3635 dst = ip6_route_output(net, NULL, &fl6); 3636 } 3637 3638 if (fibmatch) 3639 dst = ip6_route_lookup(net, &fl6, 0); 3640 3641 rt = container_of(dst, struct rt6_info, dst); 3642 if (rt->dst.error) { 3643 err = rt->dst.error; 3644 ip6_rt_put(rt); 3645 goto errout; 3646 } 3647 3648 if (rt == net->ipv6.ip6_null_entry) { 3649 err = rt->dst.error; 3650 ip6_rt_put(rt); 3651 goto errout; 3652 } 3653 3654 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 3655 if (!skb) { 3656 ip6_rt_put(rt); 3657 err = -ENOBUFS; 3658 goto errout; 3659 } 3660 3661 skb_dst_set(skb, &rt->dst); 3662 if (fibmatch) 3663 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, 3664 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 3665 nlh->nlmsg_seq, 0); 3666 else 3667 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 3668 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 3669 nlh->nlmsg_seq, 0); 3670 if (err < 0) { 3671 kfree_skb(skb); 3672 goto errout; 3673 } 3674 3675 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 3676 errout: 3677 return err; 3678 } 3679 3680 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, 3681 unsigned int nlm_flags) 3682 { 3683 struct sk_buff *skb; 3684 struct net *net = info->nl_net; 3685 u32 seq; 3686 int err; 3687 3688 err = -ENOBUFS; 3689 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 3690 3691 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 3692 if (!skb) 3693 goto errout; 3694 3695 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 3696 event, info->portid, seq, nlm_flags); 3697 if (err < 0) { 3698 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 3699 WARN_ON(err == -EMSGSIZE); 3700 kfree_skb(skb); 3701 goto errout; 3702 } 3703 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 3704 info->nlh, gfp_any()); 3705 return; 3706 errout: 3707 if (err < 0) 3708 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 3709 } 3710 3711 static int ip6_route_dev_notify(struct notifier_block *this, 3712 unsigned long event, void *ptr) 3713 { 3714 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 3715 struct net *net = dev_net(dev); 3716 3717 if (!(dev->flags & IFF_LOOPBACK)) 3718 return NOTIFY_OK; 3719 3720 if (event == NETDEV_REGISTER) { 3721 net->ipv6.ip6_null_entry->dst.dev = dev; 3722 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 3723 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3724 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 3725 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 3726 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 3727 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 3728 #endif 3729 } else if (event == NETDEV_UNREGISTER && 3730 dev->reg_state != NETREG_UNREGISTERED) { 3731 /* NETDEV_UNREGISTER could be fired for multiple times by 3732 * netdev_wait_allrefs(). Make sure we only call this once. 3733 */ 3734 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); 3735 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3736 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); 3737 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev); 3738 #endif 3739 } 3740 3741 return NOTIFY_OK; 3742 } 3743 3744 /* 3745 * /proc 3746 */ 3747 3748 #ifdef CONFIG_PROC_FS 3749 3750 static const struct file_operations ipv6_route_proc_fops = { 3751 .owner = THIS_MODULE, 3752 .open = ipv6_route_open, 3753 .read = seq_read, 3754 .llseek = seq_lseek, 3755 .release = seq_release_net, 3756 }; 3757 3758 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 3759 { 3760 struct net *net = (struct net *)seq->private; 3761 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 3762 net->ipv6.rt6_stats->fib_nodes, 3763 net->ipv6.rt6_stats->fib_route_nodes, 3764 net->ipv6.rt6_stats->fib_rt_alloc, 3765 net->ipv6.rt6_stats->fib_rt_entries, 3766 net->ipv6.rt6_stats->fib_rt_cache, 3767 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 3768 net->ipv6.rt6_stats->fib_discarded_routes); 3769 3770 return 0; 3771 } 3772 3773 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 3774 { 3775 return single_open_net(inode, file, rt6_stats_seq_show); 3776 } 3777 3778 static const struct file_operations rt6_stats_seq_fops = { 3779 .owner = THIS_MODULE, 3780 .open = rt6_stats_seq_open, 3781 .read = seq_read, 3782 .llseek = seq_lseek, 3783 .release = single_release_net, 3784 }; 3785 #endif /* CONFIG_PROC_FS */ 3786 3787 #ifdef CONFIG_SYSCTL 3788 3789 static 3790 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 3791 void __user *buffer, size_t *lenp, loff_t *ppos) 3792 { 3793 struct net *net; 3794 int delay; 3795 if (!write) 3796 return -EINVAL; 3797 3798 net = (struct net *)ctl->extra1; 3799 delay = net->ipv6.sysctl.flush_delay; 3800 proc_dointvec(ctl, write, buffer, lenp, ppos); 3801 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 3802 return 0; 3803 } 3804 3805 struct ctl_table ipv6_route_table_template[] = { 3806 { 3807 .procname = "flush", 3808 .data = &init_net.ipv6.sysctl.flush_delay, 3809 .maxlen = sizeof(int), 3810 .mode = 0200, 3811 .proc_handler = ipv6_sysctl_rtcache_flush 3812 }, 3813 { 3814 .procname = "gc_thresh", 3815 .data = &ip6_dst_ops_template.gc_thresh, 3816 .maxlen = sizeof(int), 3817 .mode = 0644, 3818 .proc_handler = proc_dointvec, 3819 }, 3820 { 3821 .procname = "max_size", 3822 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 3823 .maxlen = sizeof(int), 3824 .mode = 0644, 3825 .proc_handler = proc_dointvec, 3826 }, 3827 { 3828 .procname = "gc_min_interval", 3829 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3830 .maxlen = sizeof(int), 3831 .mode = 0644, 3832 .proc_handler = proc_dointvec_jiffies, 3833 }, 3834 { 3835 .procname = "gc_timeout", 3836 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 3837 .maxlen = sizeof(int), 3838 .mode = 0644, 3839 .proc_handler = proc_dointvec_jiffies, 3840 }, 3841 { 3842 .procname = "gc_interval", 3843 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 3844 .maxlen = sizeof(int), 3845 .mode = 0644, 3846 .proc_handler = proc_dointvec_jiffies, 3847 }, 3848 { 3849 .procname = "gc_elasticity", 3850 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 3851 .maxlen = sizeof(int), 3852 .mode = 0644, 3853 .proc_handler = proc_dointvec, 3854 }, 3855 { 3856 .procname = "mtu_expires", 3857 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 3858 .maxlen = sizeof(int), 3859 .mode = 0644, 3860 .proc_handler = proc_dointvec_jiffies, 3861 }, 3862 { 3863 .procname = "min_adv_mss", 3864 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 3865 .maxlen = sizeof(int), 3866 .mode = 0644, 3867 .proc_handler = proc_dointvec, 3868 }, 3869 { 3870 .procname = "gc_min_interval_ms", 3871 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3872 .maxlen = sizeof(int), 3873 .mode = 0644, 3874 .proc_handler = proc_dointvec_ms_jiffies, 3875 }, 3876 { } 3877 }; 3878 3879 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 3880 { 3881 struct ctl_table *table; 3882 3883 table = kmemdup(ipv6_route_table_template, 3884 sizeof(ipv6_route_table_template), 3885 GFP_KERNEL); 3886 3887 if (table) { 3888 table[0].data = &net->ipv6.sysctl.flush_delay; 3889 table[0].extra1 = net; 3890 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 3891 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 3892 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3893 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 3894 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 3895 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 3896 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 3897 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 3898 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3899 3900 /* Don't export sysctls to unprivileged users */ 3901 if (net->user_ns != &init_user_ns) 3902 table[0].procname = NULL; 3903 } 3904 3905 return table; 3906 } 3907 #endif 3908 3909 static int __net_init ip6_route_net_init(struct net *net) 3910 { 3911 int ret = -ENOMEM; 3912 3913 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 3914 sizeof(net->ipv6.ip6_dst_ops)); 3915 3916 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 3917 goto out_ip6_dst_ops; 3918 3919 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 3920 sizeof(*net->ipv6.ip6_null_entry), 3921 GFP_KERNEL); 3922 if (!net->ipv6.ip6_null_entry) 3923 goto out_ip6_dst_entries; 3924 net->ipv6.ip6_null_entry->dst.path = 3925 (struct dst_entry *)net->ipv6.ip6_null_entry; 3926 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3927 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 3928 ip6_template_metrics, true); 3929 3930 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3931 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 3932 sizeof(*net->ipv6.ip6_prohibit_entry), 3933 GFP_KERNEL); 3934 if (!net->ipv6.ip6_prohibit_entry) 3935 goto out_ip6_null_entry; 3936 net->ipv6.ip6_prohibit_entry->dst.path = 3937 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 3938 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3939 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 3940 ip6_template_metrics, true); 3941 3942 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 3943 sizeof(*net->ipv6.ip6_blk_hole_entry), 3944 GFP_KERNEL); 3945 if (!net->ipv6.ip6_blk_hole_entry) 3946 goto out_ip6_prohibit_entry; 3947 net->ipv6.ip6_blk_hole_entry->dst.path = 3948 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 3949 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3950 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 3951 ip6_template_metrics, true); 3952 #endif 3953 3954 net->ipv6.sysctl.flush_delay = 0; 3955 net->ipv6.sysctl.ip6_rt_max_size = 4096; 3956 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 3957 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 3958 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 3959 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 3960 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 3961 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 3962 3963 net->ipv6.ip6_rt_gc_expire = 30*HZ; 3964 3965 ret = 0; 3966 out: 3967 return ret; 3968 3969 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3970 out_ip6_prohibit_entry: 3971 kfree(net->ipv6.ip6_prohibit_entry); 3972 out_ip6_null_entry: 3973 kfree(net->ipv6.ip6_null_entry); 3974 #endif 3975 out_ip6_dst_entries: 3976 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3977 out_ip6_dst_ops: 3978 goto out; 3979 } 3980 3981 static void __net_exit ip6_route_net_exit(struct net *net) 3982 { 3983 kfree(net->ipv6.ip6_null_entry); 3984 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3985 kfree(net->ipv6.ip6_prohibit_entry); 3986 kfree(net->ipv6.ip6_blk_hole_entry); 3987 #endif 3988 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3989 } 3990 3991 static int __net_init ip6_route_net_init_late(struct net *net) 3992 { 3993 #ifdef CONFIG_PROC_FS 3994 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); 3995 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); 3996 #endif 3997 return 0; 3998 } 3999 4000 static void __net_exit ip6_route_net_exit_late(struct net *net) 4001 { 4002 #ifdef CONFIG_PROC_FS 4003 remove_proc_entry("ipv6_route", net->proc_net); 4004 remove_proc_entry("rt6_stats", net->proc_net); 4005 #endif 4006 } 4007 4008 static struct pernet_operations ip6_route_net_ops = { 4009 .init = ip6_route_net_init, 4010 .exit = ip6_route_net_exit, 4011 }; 4012 4013 static int __net_init ipv6_inetpeer_init(struct net *net) 4014 { 4015 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 4016 4017 if (!bp) 4018 return -ENOMEM; 4019 inet_peer_base_init(bp); 4020 net->ipv6.peers = bp; 4021 return 0; 4022 } 4023 4024 static void __net_exit ipv6_inetpeer_exit(struct net *net) 4025 { 4026 struct inet_peer_base *bp = net->ipv6.peers; 4027 4028 net->ipv6.peers = NULL; 4029 inetpeer_invalidate_tree(bp); 4030 kfree(bp); 4031 } 4032 4033 static struct pernet_operations ipv6_inetpeer_ops = { 4034 .init = ipv6_inetpeer_init, 4035 .exit = ipv6_inetpeer_exit, 4036 }; 4037 4038 static struct pernet_operations ip6_route_net_late_ops = { 4039 .init = ip6_route_net_init_late, 4040 .exit = ip6_route_net_exit_late, 4041 }; 4042 4043 static struct notifier_block ip6_route_dev_notifier = { 4044 .notifier_call = ip6_route_dev_notify, 4045 .priority = ADDRCONF_NOTIFY_PRIORITY - 10, 4046 }; 4047 4048 void __init ip6_route_init_special_entries(void) 4049 { 4050 /* Registering of the loopback is done before this portion of code, 4051 * the loopback reference in rt6_info will not be taken, do it 4052 * manually for init_net */ 4053 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 4054 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4055 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4056 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 4057 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4058 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 4059 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4060 #endif 4061 } 4062 4063 int __init ip6_route_init(void) 4064 { 4065 int ret; 4066 int cpu; 4067 4068 ret = -ENOMEM; 4069 ip6_dst_ops_template.kmem_cachep = 4070 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 4071 SLAB_HWCACHE_ALIGN, NULL); 4072 if (!ip6_dst_ops_template.kmem_cachep) 4073 goto out; 4074 4075 ret = dst_entries_init(&ip6_dst_blackhole_ops); 4076 if (ret) 4077 goto out_kmem_cache; 4078 4079 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 4080 if (ret) 4081 goto out_dst_entries; 4082 4083 ret = register_pernet_subsys(&ip6_route_net_ops); 4084 if (ret) 4085 goto out_register_inetpeer; 4086 4087 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 4088 4089 ret = fib6_init(); 4090 if (ret) 4091 goto out_register_subsys; 4092 4093 ret = xfrm6_init(); 4094 if (ret) 4095 goto out_fib6_init; 4096 4097 ret = fib6_rules_init(); 4098 if (ret) 4099 goto xfrm6_init; 4100 4101 ret = register_pernet_subsys(&ip6_route_net_late_ops); 4102 if (ret) 4103 goto fib6_rules_init; 4104 4105 ret = -ENOBUFS; 4106 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 4107 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 4108 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 4109 goto out_register_late_subsys; 4110 4111 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 4112 if (ret) 4113 goto out_register_late_subsys; 4114 4115 for_each_possible_cpu(cpu) { 4116 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 4117 4118 INIT_LIST_HEAD(&ul->head); 4119 spin_lock_init(&ul->lock); 4120 } 4121 4122 out: 4123 return ret; 4124 4125 out_register_late_subsys: 4126 unregister_pernet_subsys(&ip6_route_net_late_ops); 4127 fib6_rules_init: 4128 fib6_rules_cleanup(); 4129 xfrm6_init: 4130 xfrm6_fini(); 4131 out_fib6_init: 4132 fib6_gc_cleanup(); 4133 out_register_subsys: 4134 unregister_pernet_subsys(&ip6_route_net_ops); 4135 out_register_inetpeer: 4136 unregister_pernet_subsys(&ipv6_inetpeer_ops); 4137 out_dst_entries: 4138 dst_entries_destroy(&ip6_dst_blackhole_ops); 4139 out_kmem_cache: 4140 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 4141 goto out; 4142 } 4143 4144 void ip6_route_cleanup(void) 4145 { 4146 unregister_netdevice_notifier(&ip6_route_dev_notifier); 4147 unregister_pernet_subsys(&ip6_route_net_late_ops); 4148 fib6_rules_cleanup(); 4149 xfrm6_fini(); 4150 fib6_gc_cleanup(); 4151 unregister_pernet_subsys(&ipv6_inetpeer_ops); 4152 unregister_pernet_subsys(&ip6_route_net_ops); 4153 dst_entries_destroy(&ip6_dst_blackhole_ops); 4154 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 4155 } 4156