1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <net/net_namespace.h> 48 #include <net/snmp.h> 49 #include <net/ipv6.h> 50 #include <net/ip6_fib.h> 51 #include <net/ip6_route.h> 52 #include <net/ndisc.h> 53 #include <net/addrconf.h> 54 #include <net/tcp.h> 55 #include <linux/rtnetlink.h> 56 #include <net/dst.h> 57 #include <net/dst_metadata.h> 58 #include <net/xfrm.h> 59 #include <net/netevent.h> 60 #include <net/netlink.h> 61 #include <net/nexthop.h> 62 #include <net/lwtunnel.h> 63 #include <net/ip_tunnels.h> 64 65 #include <asm/uaccess.h> 66 67 #ifdef CONFIG_SYSCTL 68 #include <linux/sysctl.h> 69 #endif 70 71 enum rt6_nud_state { 72 RT6_NUD_FAIL_HARD = -3, 73 RT6_NUD_FAIL_PROBE = -2, 74 RT6_NUD_FAIL_DO_RR = -1, 75 RT6_NUD_SUCCEED = 1 76 }; 77 78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); 79 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 80 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 81 static unsigned int ip6_mtu(const struct dst_entry *dst); 82 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 83 static void ip6_dst_destroy(struct dst_entry *); 84 static void ip6_dst_ifdown(struct dst_entry *, 85 struct net_device *dev, int how); 86 static int ip6_dst_gc(struct dst_ops *ops); 87 88 static int ip6_pkt_discard(struct sk_buff *skb); 89 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); 90 static int ip6_pkt_prohibit(struct sk_buff *skb); 91 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); 92 static void ip6_link_failure(struct sk_buff *skb); 93 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 94 struct sk_buff *skb, u32 mtu); 95 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 96 struct sk_buff *skb); 97 static void rt6_dst_from_metrics_check(struct rt6_info *rt); 98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 99 100 #ifdef CONFIG_IPV6_ROUTE_INFO 101 static struct rt6_info *rt6_add_route_info(struct net *net, 102 const struct in6_addr *prefix, int prefixlen, 103 const struct in6_addr *gwaddr, int ifindex, 104 unsigned int pref); 105 static struct rt6_info *rt6_get_route_info(struct net *net, 106 const struct in6_addr *prefix, int prefixlen, 107 const struct in6_addr *gwaddr, int ifindex); 108 #endif 109 110 struct uncached_list { 111 spinlock_t lock; 112 struct list_head head; 113 }; 114 115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); 116 117 static void rt6_uncached_list_add(struct rt6_info *rt) 118 { 119 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); 120 121 rt->dst.flags |= DST_NOCACHE; 122 rt->rt6i_uncached_list = ul; 123 124 spin_lock_bh(&ul->lock); 125 list_add_tail(&rt->rt6i_uncached, &ul->head); 126 spin_unlock_bh(&ul->lock); 127 } 128 129 static void rt6_uncached_list_del(struct rt6_info *rt) 130 { 131 if (!list_empty(&rt->rt6i_uncached)) { 132 struct uncached_list *ul = rt->rt6i_uncached_list; 133 134 spin_lock_bh(&ul->lock); 135 list_del(&rt->rt6i_uncached); 136 spin_unlock_bh(&ul->lock); 137 } 138 } 139 140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) 141 { 142 struct net_device *loopback_dev = net->loopback_dev; 143 int cpu; 144 145 if (dev == loopback_dev) 146 return; 147 148 for_each_possible_cpu(cpu) { 149 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 150 struct rt6_info *rt; 151 152 spin_lock_bh(&ul->lock); 153 list_for_each_entry(rt, &ul->head, rt6i_uncached) { 154 struct inet6_dev *rt_idev = rt->rt6i_idev; 155 struct net_device *rt_dev = rt->dst.dev; 156 157 if (rt_idev->dev == dev) { 158 rt->rt6i_idev = in6_dev_get(loopback_dev); 159 in6_dev_put(rt_idev); 160 } 161 162 if (rt_dev == dev) { 163 rt->dst.dev = loopback_dev; 164 dev_hold(rt->dst.dev); 165 dev_put(rt_dev); 166 } 167 } 168 spin_unlock_bh(&ul->lock); 169 } 170 } 171 172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) 173 { 174 return dst_metrics_write_ptr(rt->dst.from); 175 } 176 177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 178 { 179 struct rt6_info *rt = (struct rt6_info *)dst; 180 181 if (rt->rt6i_flags & RTF_PCPU) 182 return rt6_pcpu_cow_metrics(rt); 183 else if (rt->rt6i_flags & RTF_CACHE) 184 return NULL; 185 else 186 return dst_cow_metrics_generic(dst, old); 187 } 188 189 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 190 struct sk_buff *skb, 191 const void *daddr) 192 { 193 struct in6_addr *p = &rt->rt6i_gateway; 194 195 if (!ipv6_addr_any(p)) 196 return (const void *) p; 197 else if (skb) 198 return &ipv6_hdr(skb)->daddr; 199 return daddr; 200 } 201 202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 203 struct sk_buff *skb, 204 const void *daddr) 205 { 206 struct rt6_info *rt = (struct rt6_info *) dst; 207 struct neighbour *n; 208 209 daddr = choose_neigh_daddr(rt, skb, daddr); 210 n = __ipv6_neigh_lookup(dst->dev, daddr); 211 if (n) 212 return n; 213 return neigh_create(&nd_tbl, daddr, dst->dev); 214 } 215 216 static struct dst_ops ip6_dst_ops_template = { 217 .family = AF_INET6, 218 .gc = ip6_dst_gc, 219 .gc_thresh = 1024, 220 .check = ip6_dst_check, 221 .default_advmss = ip6_default_advmss, 222 .mtu = ip6_mtu, 223 .cow_metrics = ipv6_cow_metrics, 224 .destroy = ip6_dst_destroy, 225 .ifdown = ip6_dst_ifdown, 226 .negative_advice = ip6_negative_advice, 227 .link_failure = ip6_link_failure, 228 .update_pmtu = ip6_rt_update_pmtu, 229 .redirect = rt6_do_redirect, 230 .local_out = __ip6_local_out, 231 .neigh_lookup = ip6_neigh_lookup, 232 }; 233 234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 235 { 236 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 237 238 return mtu ? : dst->dev->mtu; 239 } 240 241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 242 struct sk_buff *skb, u32 mtu) 243 { 244 } 245 246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 247 struct sk_buff *skb) 248 { 249 } 250 251 static struct dst_ops ip6_dst_blackhole_ops = { 252 .family = AF_INET6, 253 .destroy = ip6_dst_destroy, 254 .check = ip6_dst_check, 255 .mtu = ip6_blackhole_mtu, 256 .default_advmss = ip6_default_advmss, 257 .update_pmtu = ip6_rt_blackhole_update_pmtu, 258 .redirect = ip6_rt_blackhole_redirect, 259 .cow_metrics = dst_cow_metrics_generic, 260 .neigh_lookup = ip6_neigh_lookup, 261 }; 262 263 static const u32 ip6_template_metrics[RTAX_MAX] = { 264 [RTAX_HOPLIMIT - 1] = 0, 265 }; 266 267 static const struct rt6_info ip6_null_entry_template = { 268 .dst = { 269 .__refcnt = ATOMIC_INIT(1), 270 .__use = 1, 271 .obsolete = DST_OBSOLETE_FORCE_CHK, 272 .error = -ENETUNREACH, 273 .input = ip6_pkt_discard, 274 .output = ip6_pkt_discard_out, 275 }, 276 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 277 .rt6i_protocol = RTPROT_KERNEL, 278 .rt6i_metric = ~(u32) 0, 279 .rt6i_ref = ATOMIC_INIT(1), 280 }; 281 282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 283 284 static const struct rt6_info ip6_prohibit_entry_template = { 285 .dst = { 286 .__refcnt = ATOMIC_INIT(1), 287 .__use = 1, 288 .obsolete = DST_OBSOLETE_FORCE_CHK, 289 .error = -EACCES, 290 .input = ip6_pkt_prohibit, 291 .output = ip6_pkt_prohibit_out, 292 }, 293 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 294 .rt6i_protocol = RTPROT_KERNEL, 295 .rt6i_metric = ~(u32) 0, 296 .rt6i_ref = ATOMIC_INIT(1), 297 }; 298 299 static const struct rt6_info ip6_blk_hole_entry_template = { 300 .dst = { 301 .__refcnt = ATOMIC_INIT(1), 302 .__use = 1, 303 .obsolete = DST_OBSOLETE_FORCE_CHK, 304 .error = -EINVAL, 305 .input = dst_discard, 306 .output = dst_discard_sk, 307 }, 308 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 309 .rt6i_protocol = RTPROT_KERNEL, 310 .rt6i_metric = ~(u32) 0, 311 .rt6i_ref = ATOMIC_INIT(1), 312 }; 313 314 #endif 315 316 static void rt6_info_init(struct rt6_info *rt) 317 { 318 struct dst_entry *dst = &rt->dst; 319 320 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 321 INIT_LIST_HEAD(&rt->rt6i_siblings); 322 INIT_LIST_HEAD(&rt->rt6i_uncached); 323 } 324 325 /* allocate dst with ip6_dst_ops */ 326 static struct rt6_info *__ip6_dst_alloc(struct net *net, 327 struct net_device *dev, 328 int flags) 329 { 330 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 331 0, DST_OBSOLETE_FORCE_CHK, flags); 332 333 if (rt) 334 rt6_info_init(rt); 335 336 return rt; 337 } 338 339 static struct rt6_info *ip6_dst_alloc(struct net *net, 340 struct net_device *dev, 341 int flags) 342 { 343 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); 344 345 if (rt) { 346 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); 347 if (rt->rt6i_pcpu) { 348 int cpu; 349 350 for_each_possible_cpu(cpu) { 351 struct rt6_info **p; 352 353 p = per_cpu_ptr(rt->rt6i_pcpu, cpu); 354 /* no one shares rt */ 355 *p = NULL; 356 } 357 } else { 358 dst_destroy((struct dst_entry *)rt); 359 return NULL; 360 } 361 } 362 363 return rt; 364 } 365 366 static void ip6_dst_destroy(struct dst_entry *dst) 367 { 368 struct rt6_info *rt = (struct rt6_info *)dst; 369 struct dst_entry *from = dst->from; 370 struct inet6_dev *idev; 371 372 dst_destroy_metrics_generic(dst); 373 free_percpu(rt->rt6i_pcpu); 374 rt6_uncached_list_del(rt); 375 376 idev = rt->rt6i_idev; 377 if (idev) { 378 rt->rt6i_idev = NULL; 379 in6_dev_put(idev); 380 } 381 382 dst->from = NULL; 383 dst_release(from); 384 } 385 386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 387 int how) 388 { 389 struct rt6_info *rt = (struct rt6_info *)dst; 390 struct inet6_dev *idev = rt->rt6i_idev; 391 struct net_device *loopback_dev = 392 dev_net(dev)->loopback_dev; 393 394 if (dev != loopback_dev) { 395 if (idev && idev->dev == dev) { 396 struct inet6_dev *loopback_idev = 397 in6_dev_get(loopback_dev); 398 if (loopback_idev) { 399 rt->rt6i_idev = loopback_idev; 400 in6_dev_put(idev); 401 } 402 } 403 } 404 } 405 406 static bool rt6_check_expired(const struct rt6_info *rt) 407 { 408 if (rt->rt6i_flags & RTF_EXPIRES) { 409 if (time_after(jiffies, rt->dst.expires)) 410 return true; 411 } else if (rt->dst.from) { 412 return rt6_check_expired((struct rt6_info *) rt->dst.from); 413 } 414 return false; 415 } 416 417 /* Multipath route selection: 418 * Hash based function using packet header and flowlabel. 419 * Adapted from fib_info_hashfn() 420 */ 421 static int rt6_info_hash_nhsfn(unsigned int candidate_count, 422 const struct flowi6 *fl6) 423 { 424 unsigned int val = fl6->flowi6_proto; 425 426 val ^= ipv6_addr_hash(&fl6->daddr); 427 val ^= ipv6_addr_hash(&fl6->saddr); 428 429 /* Work only if this not encapsulated */ 430 switch (fl6->flowi6_proto) { 431 case IPPROTO_UDP: 432 case IPPROTO_TCP: 433 case IPPROTO_SCTP: 434 val ^= (__force u16)fl6->fl6_sport; 435 val ^= (__force u16)fl6->fl6_dport; 436 break; 437 438 case IPPROTO_ICMPV6: 439 val ^= (__force u16)fl6->fl6_icmp_type; 440 val ^= (__force u16)fl6->fl6_icmp_code; 441 break; 442 } 443 /* RFC6438 recommands to use flowlabel */ 444 val ^= (__force u32)fl6->flowlabel; 445 446 /* Perhaps, we need to tune, this function? */ 447 val = val ^ (val >> 7) ^ (val >> 12); 448 return val % candidate_count; 449 } 450 451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 452 struct flowi6 *fl6, int oif, 453 int strict) 454 { 455 struct rt6_info *sibling, *next_sibling; 456 int route_choosen; 457 458 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); 459 /* Don't change the route, if route_choosen == 0 460 * (siblings does not include ourself) 461 */ 462 if (route_choosen) 463 list_for_each_entry_safe(sibling, next_sibling, 464 &match->rt6i_siblings, rt6i_siblings) { 465 route_choosen--; 466 if (route_choosen == 0) { 467 if (rt6_score_route(sibling, oif, strict) < 0) 468 break; 469 match = sibling; 470 break; 471 } 472 } 473 return match; 474 } 475 476 /* 477 * Route lookup. Any table->tb6_lock is implied. 478 */ 479 480 static inline struct rt6_info *rt6_device_match(struct net *net, 481 struct rt6_info *rt, 482 const struct in6_addr *saddr, 483 int oif, 484 int flags) 485 { 486 struct rt6_info *local = NULL; 487 struct rt6_info *sprt; 488 489 if (!oif && ipv6_addr_any(saddr)) 490 goto out; 491 492 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 493 struct net_device *dev = sprt->dst.dev; 494 495 if (oif) { 496 if (dev->ifindex == oif) 497 return sprt; 498 if (dev->flags & IFF_LOOPBACK) { 499 if (!sprt->rt6i_idev || 500 sprt->rt6i_idev->dev->ifindex != oif) { 501 if (flags & RT6_LOOKUP_F_IFACE && oif) 502 continue; 503 if (local && (!oif || 504 local->rt6i_idev->dev->ifindex == oif)) 505 continue; 506 } 507 local = sprt; 508 } 509 } else { 510 if (ipv6_chk_addr(net, saddr, dev, 511 flags & RT6_LOOKUP_F_IFACE)) 512 return sprt; 513 } 514 } 515 516 if (oif) { 517 if (local) 518 return local; 519 520 if (flags & RT6_LOOKUP_F_IFACE) 521 return net->ipv6.ip6_null_entry; 522 } 523 out: 524 return rt; 525 } 526 527 #ifdef CONFIG_IPV6_ROUTER_PREF 528 struct __rt6_probe_work { 529 struct work_struct work; 530 struct in6_addr target; 531 struct net_device *dev; 532 }; 533 534 static void rt6_probe_deferred(struct work_struct *w) 535 { 536 struct in6_addr mcaddr; 537 struct __rt6_probe_work *work = 538 container_of(w, struct __rt6_probe_work, work); 539 540 addrconf_addr_solict_mult(&work->target, &mcaddr); 541 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL); 542 dev_put(work->dev); 543 kfree(work); 544 } 545 546 static void rt6_probe(struct rt6_info *rt) 547 { 548 struct __rt6_probe_work *work; 549 struct neighbour *neigh; 550 /* 551 * Okay, this does not seem to be appropriate 552 * for now, however, we need to check if it 553 * is really so; aka Router Reachability Probing. 554 * 555 * Router Reachability Probe MUST be rate-limited 556 * to no more than one per minute. 557 */ 558 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) 559 return; 560 rcu_read_lock_bh(); 561 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 562 if (neigh) { 563 if (neigh->nud_state & NUD_VALID) 564 goto out; 565 566 work = NULL; 567 write_lock(&neigh->lock); 568 if (!(neigh->nud_state & NUD_VALID) && 569 time_after(jiffies, 570 neigh->updated + 571 rt->rt6i_idev->cnf.rtr_probe_interval)) { 572 work = kmalloc(sizeof(*work), GFP_ATOMIC); 573 if (work) 574 __neigh_set_probe_once(neigh); 575 } 576 write_unlock(&neigh->lock); 577 } else { 578 work = kmalloc(sizeof(*work), GFP_ATOMIC); 579 } 580 581 if (work) { 582 INIT_WORK(&work->work, rt6_probe_deferred); 583 work->target = rt->rt6i_gateway; 584 dev_hold(rt->dst.dev); 585 work->dev = rt->dst.dev; 586 schedule_work(&work->work); 587 } 588 589 out: 590 rcu_read_unlock_bh(); 591 } 592 #else 593 static inline void rt6_probe(struct rt6_info *rt) 594 { 595 } 596 #endif 597 598 /* 599 * Default Router Selection (RFC 2461 6.3.6) 600 */ 601 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 602 { 603 struct net_device *dev = rt->dst.dev; 604 if (!oif || dev->ifindex == oif) 605 return 2; 606 if ((dev->flags & IFF_LOOPBACK) && 607 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 608 return 1; 609 return 0; 610 } 611 612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) 613 { 614 struct neighbour *neigh; 615 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 616 617 if (rt->rt6i_flags & RTF_NONEXTHOP || 618 !(rt->rt6i_flags & RTF_GATEWAY)) 619 return RT6_NUD_SUCCEED; 620 621 rcu_read_lock_bh(); 622 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 623 if (neigh) { 624 read_lock(&neigh->lock); 625 if (neigh->nud_state & NUD_VALID) 626 ret = RT6_NUD_SUCCEED; 627 #ifdef CONFIG_IPV6_ROUTER_PREF 628 else if (!(neigh->nud_state & NUD_FAILED)) 629 ret = RT6_NUD_SUCCEED; 630 else 631 ret = RT6_NUD_FAIL_PROBE; 632 #endif 633 read_unlock(&neigh->lock); 634 } else { 635 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 636 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 637 } 638 rcu_read_unlock_bh(); 639 640 return ret; 641 } 642 643 static int rt6_score_route(struct rt6_info *rt, int oif, 644 int strict) 645 { 646 int m; 647 648 m = rt6_check_dev(rt, oif); 649 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 650 return RT6_NUD_FAIL_HARD; 651 #ifdef CONFIG_IPV6_ROUTER_PREF 652 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 653 #endif 654 if (strict & RT6_LOOKUP_F_REACHABLE) { 655 int n = rt6_check_neigh(rt); 656 if (n < 0) 657 return n; 658 } 659 return m; 660 } 661 662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 663 int *mpri, struct rt6_info *match, 664 bool *do_rr) 665 { 666 int m; 667 bool match_do_rr = false; 668 struct inet6_dev *idev = rt->rt6i_idev; 669 struct net_device *dev = rt->dst.dev; 670 671 if (dev && !netif_carrier_ok(dev) && 672 idev->cnf.ignore_routes_with_linkdown) 673 goto out; 674 675 if (rt6_check_expired(rt)) 676 goto out; 677 678 m = rt6_score_route(rt, oif, strict); 679 if (m == RT6_NUD_FAIL_DO_RR) { 680 match_do_rr = true; 681 m = 0; /* lowest valid score */ 682 } else if (m == RT6_NUD_FAIL_HARD) { 683 goto out; 684 } 685 686 if (strict & RT6_LOOKUP_F_REACHABLE) 687 rt6_probe(rt); 688 689 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 690 if (m > *mpri) { 691 *do_rr = match_do_rr; 692 *mpri = m; 693 match = rt; 694 } 695 out: 696 return match; 697 } 698 699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 700 struct rt6_info *rr_head, 701 u32 metric, int oif, int strict, 702 bool *do_rr) 703 { 704 struct rt6_info *rt, *match, *cont; 705 int mpri = -1; 706 707 match = NULL; 708 cont = NULL; 709 for (rt = rr_head; rt; rt = rt->dst.rt6_next) { 710 if (rt->rt6i_metric != metric) { 711 cont = rt; 712 break; 713 } 714 715 match = find_match(rt, oif, strict, &mpri, match, do_rr); 716 } 717 718 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { 719 if (rt->rt6i_metric != metric) { 720 cont = rt; 721 break; 722 } 723 724 match = find_match(rt, oif, strict, &mpri, match, do_rr); 725 } 726 727 if (match || !cont) 728 return match; 729 730 for (rt = cont; rt; rt = rt->dst.rt6_next) 731 match = find_match(rt, oif, strict, &mpri, match, do_rr); 732 733 return match; 734 } 735 736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 737 { 738 struct rt6_info *match, *rt0; 739 struct net *net; 740 bool do_rr = false; 741 742 rt0 = fn->rr_ptr; 743 if (!rt0) 744 fn->rr_ptr = rt0 = fn->leaf; 745 746 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, 747 &do_rr); 748 749 if (do_rr) { 750 struct rt6_info *next = rt0->dst.rt6_next; 751 752 /* no entries matched; do round-robin */ 753 if (!next || next->rt6i_metric != rt0->rt6i_metric) 754 next = fn->leaf; 755 756 if (next != rt0) 757 fn->rr_ptr = next; 758 } 759 760 net = dev_net(rt0->dst.dev); 761 return match ? match : net->ipv6.ip6_null_entry; 762 } 763 764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) 765 { 766 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); 767 } 768 769 #ifdef CONFIG_IPV6_ROUTE_INFO 770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 771 const struct in6_addr *gwaddr) 772 { 773 struct net *net = dev_net(dev); 774 struct route_info *rinfo = (struct route_info *) opt; 775 struct in6_addr prefix_buf, *prefix; 776 unsigned int pref; 777 unsigned long lifetime; 778 struct rt6_info *rt; 779 780 if (len < sizeof(struct route_info)) { 781 return -EINVAL; 782 } 783 784 /* Sanity check for prefix_len and length */ 785 if (rinfo->length > 3) { 786 return -EINVAL; 787 } else if (rinfo->prefix_len > 128) { 788 return -EINVAL; 789 } else if (rinfo->prefix_len > 64) { 790 if (rinfo->length < 2) { 791 return -EINVAL; 792 } 793 } else if (rinfo->prefix_len > 0) { 794 if (rinfo->length < 1) { 795 return -EINVAL; 796 } 797 } 798 799 pref = rinfo->route_pref; 800 if (pref == ICMPV6_ROUTER_PREF_INVALID) 801 return -EINVAL; 802 803 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 804 805 if (rinfo->length == 3) 806 prefix = (struct in6_addr *)rinfo->prefix; 807 else { 808 /* this function is safe */ 809 ipv6_addr_prefix(&prefix_buf, 810 (struct in6_addr *)rinfo->prefix, 811 rinfo->prefix_len); 812 prefix = &prefix_buf; 813 } 814 815 if (rinfo->prefix_len == 0) 816 rt = rt6_get_dflt_router(gwaddr, dev); 817 else 818 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 819 gwaddr, dev->ifindex); 820 821 if (rt && !lifetime) { 822 ip6_del_rt(rt); 823 rt = NULL; 824 } 825 826 if (!rt && lifetime) 827 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 828 pref); 829 else if (rt) 830 rt->rt6i_flags = RTF_ROUTEINFO | 831 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 832 833 if (rt) { 834 if (!addrconf_finite_timeout(lifetime)) 835 rt6_clean_expires(rt); 836 else 837 rt6_set_expires(rt, jiffies + HZ * lifetime); 838 839 ip6_rt_put(rt); 840 } 841 return 0; 842 } 843 #endif 844 845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 846 struct in6_addr *saddr) 847 { 848 struct fib6_node *pn; 849 while (1) { 850 if (fn->fn_flags & RTN_TL_ROOT) 851 return NULL; 852 pn = fn->parent; 853 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) 854 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); 855 else 856 fn = pn; 857 if (fn->fn_flags & RTN_RTINFO) 858 return fn; 859 } 860 } 861 862 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 863 struct fib6_table *table, 864 struct flowi6 *fl6, int flags) 865 { 866 struct fib6_node *fn; 867 struct rt6_info *rt; 868 869 read_lock_bh(&table->tb6_lock); 870 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 871 restart: 872 rt = fn->leaf; 873 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 874 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 875 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); 876 if (rt == net->ipv6.ip6_null_entry) { 877 fn = fib6_backtrack(fn, &fl6->saddr); 878 if (fn) 879 goto restart; 880 } 881 dst_use(&rt->dst, jiffies); 882 read_unlock_bh(&table->tb6_lock); 883 return rt; 884 885 } 886 887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 888 int flags) 889 { 890 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 891 } 892 EXPORT_SYMBOL_GPL(ip6_route_lookup); 893 894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 895 const struct in6_addr *saddr, int oif, int strict) 896 { 897 struct flowi6 fl6 = { 898 .flowi6_oif = oif, 899 .daddr = *daddr, 900 }; 901 struct dst_entry *dst; 902 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 903 904 if (saddr) { 905 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 906 flags |= RT6_LOOKUP_F_HAS_SADDR; 907 } 908 909 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 910 if (dst->error == 0) 911 return (struct rt6_info *) dst; 912 913 dst_release(dst); 914 915 return NULL; 916 } 917 EXPORT_SYMBOL(rt6_lookup); 918 919 /* ip6_ins_rt is called with FREE table->tb6_lock. 920 It takes new route entry, the addition fails by any reason the 921 route is freed. In any case, if caller does not hold it, it may 922 be destroyed. 923 */ 924 925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 926 struct mx6_config *mxc) 927 { 928 int err; 929 struct fib6_table *table; 930 931 table = rt->rt6i_table; 932 write_lock_bh(&table->tb6_lock); 933 err = fib6_add(&table->tb6_root, rt, info, mxc); 934 write_unlock_bh(&table->tb6_lock); 935 936 return err; 937 } 938 939 int ip6_ins_rt(struct rt6_info *rt) 940 { 941 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; 942 struct mx6_config mxc = { .mx = NULL, }; 943 944 return __ip6_ins_rt(rt, &info, &mxc); 945 } 946 947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, 948 const struct in6_addr *daddr, 949 const struct in6_addr *saddr) 950 { 951 struct rt6_info *rt; 952 953 /* 954 * Clone the route. 955 */ 956 957 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 958 ort = (struct rt6_info *)ort->dst.from; 959 960 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); 961 962 if (!rt) 963 return NULL; 964 965 ip6_rt_copy_init(rt, ort); 966 rt->rt6i_flags |= RTF_CACHE; 967 rt->rt6i_metric = 0; 968 rt->dst.flags |= DST_HOST; 969 rt->rt6i_dst.addr = *daddr; 970 rt->rt6i_dst.plen = 128; 971 972 if (!rt6_is_gw_or_nonexthop(ort)) { 973 if (ort->rt6i_dst.plen != 128 && 974 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 975 rt->rt6i_flags |= RTF_ANYCAST; 976 #ifdef CONFIG_IPV6_SUBTREES 977 if (rt->rt6i_src.plen && saddr) { 978 rt->rt6i_src.addr = *saddr; 979 rt->rt6i_src.plen = 128; 980 } 981 #endif 982 } 983 984 return rt; 985 } 986 987 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) 988 { 989 struct rt6_info *pcpu_rt; 990 991 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), 992 rt->dst.dev, rt->dst.flags); 993 994 if (!pcpu_rt) 995 return NULL; 996 ip6_rt_copy_init(pcpu_rt, rt); 997 pcpu_rt->rt6i_protocol = rt->rt6i_protocol; 998 pcpu_rt->rt6i_flags |= RTF_PCPU; 999 return pcpu_rt; 1000 } 1001 1002 /* It should be called with read_lock_bh(&tb6_lock) acquired */ 1003 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) 1004 { 1005 struct rt6_info *pcpu_rt, **p; 1006 1007 p = this_cpu_ptr(rt->rt6i_pcpu); 1008 pcpu_rt = *p; 1009 1010 if (pcpu_rt) { 1011 dst_hold(&pcpu_rt->dst); 1012 rt6_dst_from_metrics_check(pcpu_rt); 1013 } 1014 return pcpu_rt; 1015 } 1016 1017 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) 1018 { 1019 struct fib6_table *table = rt->rt6i_table; 1020 struct rt6_info *pcpu_rt, *prev, **p; 1021 1022 pcpu_rt = ip6_rt_pcpu_alloc(rt); 1023 if (!pcpu_rt) { 1024 struct net *net = dev_net(rt->dst.dev); 1025 1026 dst_hold(&net->ipv6.ip6_null_entry->dst); 1027 return net->ipv6.ip6_null_entry; 1028 } 1029 1030 read_lock_bh(&table->tb6_lock); 1031 if (rt->rt6i_pcpu) { 1032 p = this_cpu_ptr(rt->rt6i_pcpu); 1033 prev = cmpxchg(p, NULL, pcpu_rt); 1034 if (prev) { 1035 /* If someone did it before us, return prev instead */ 1036 dst_destroy(&pcpu_rt->dst); 1037 pcpu_rt = prev; 1038 } 1039 } else { 1040 /* rt has been removed from the fib6 tree 1041 * before we have a chance to acquire the read_lock. 1042 * In this case, don't brother to create a pcpu rt 1043 * since rt is going away anyway. The next 1044 * dst_check() will trigger a re-lookup. 1045 */ 1046 dst_destroy(&pcpu_rt->dst); 1047 pcpu_rt = rt; 1048 } 1049 dst_hold(&pcpu_rt->dst); 1050 rt6_dst_from_metrics_check(pcpu_rt); 1051 read_unlock_bh(&table->tb6_lock); 1052 return pcpu_rt; 1053 } 1054 1055 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 1056 struct flowi6 *fl6, int flags) 1057 { 1058 struct fib6_node *fn, *saved_fn; 1059 struct rt6_info *rt; 1060 int strict = 0; 1061 1062 strict |= flags & RT6_LOOKUP_F_IFACE; 1063 if (net->ipv6.devconf_all->forwarding == 0) 1064 strict |= RT6_LOOKUP_F_REACHABLE; 1065 1066 read_lock_bh(&table->tb6_lock); 1067 1068 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1069 saved_fn = fn; 1070 1071 redo_rt6_select: 1072 rt = rt6_select(fn, oif, strict); 1073 if (rt->rt6i_nsiblings) 1074 rt = rt6_multipath_select(rt, fl6, oif, strict); 1075 if (rt == net->ipv6.ip6_null_entry) { 1076 fn = fib6_backtrack(fn, &fl6->saddr); 1077 if (fn) 1078 goto redo_rt6_select; 1079 else if (strict & RT6_LOOKUP_F_REACHABLE) { 1080 /* also consider unreachable route */ 1081 strict &= ~RT6_LOOKUP_F_REACHABLE; 1082 fn = saved_fn; 1083 goto redo_rt6_select; 1084 } 1085 } 1086 1087 1088 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { 1089 dst_use(&rt->dst, jiffies); 1090 read_unlock_bh(&table->tb6_lock); 1091 1092 rt6_dst_from_metrics_check(rt); 1093 return rt; 1094 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1095 !(rt->rt6i_flags & RTF_GATEWAY))) { 1096 /* Create a RTF_CACHE clone which will not be 1097 * owned by the fib6 tree. It is for the special case where 1098 * the daddr in the skb during the neighbor look-up is different 1099 * from the fl6->daddr used to look-up route here. 1100 */ 1101 1102 struct rt6_info *uncached_rt; 1103 1104 dst_use(&rt->dst, jiffies); 1105 read_unlock_bh(&table->tb6_lock); 1106 1107 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); 1108 dst_release(&rt->dst); 1109 1110 if (uncached_rt) 1111 rt6_uncached_list_add(uncached_rt); 1112 else 1113 uncached_rt = net->ipv6.ip6_null_entry; 1114 1115 dst_hold(&uncached_rt->dst); 1116 return uncached_rt; 1117 1118 } else { 1119 /* Get a percpu copy */ 1120 1121 struct rt6_info *pcpu_rt; 1122 1123 rt->dst.lastuse = jiffies; 1124 rt->dst.__use++; 1125 pcpu_rt = rt6_get_pcpu_route(rt); 1126 1127 if (pcpu_rt) { 1128 read_unlock_bh(&table->tb6_lock); 1129 } else { 1130 /* We have to do the read_unlock first 1131 * because rt6_make_pcpu_route() may trigger 1132 * ip6_dst_gc() which will take the write_lock. 1133 */ 1134 dst_hold(&rt->dst); 1135 read_unlock_bh(&table->tb6_lock); 1136 pcpu_rt = rt6_make_pcpu_route(rt); 1137 dst_release(&rt->dst); 1138 } 1139 1140 return pcpu_rt; 1141 1142 } 1143 } 1144 1145 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 1146 struct flowi6 *fl6, int flags) 1147 { 1148 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 1149 } 1150 1151 static struct dst_entry *ip6_route_input_lookup(struct net *net, 1152 struct net_device *dev, 1153 struct flowi6 *fl6, int flags) 1154 { 1155 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 1156 flags |= RT6_LOOKUP_F_IFACE; 1157 1158 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 1159 } 1160 1161 void ip6_route_input(struct sk_buff *skb) 1162 { 1163 const struct ipv6hdr *iph = ipv6_hdr(skb); 1164 struct net *net = dev_net(skb->dev); 1165 int flags = RT6_LOOKUP_F_HAS_SADDR; 1166 struct ip_tunnel_info *tun_info; 1167 struct flowi6 fl6 = { 1168 .flowi6_iif = skb->dev->ifindex, 1169 .daddr = iph->daddr, 1170 .saddr = iph->saddr, 1171 .flowlabel = ip6_flowinfo(iph), 1172 .flowi6_mark = skb->mark, 1173 .flowi6_proto = iph->nexthdr, 1174 }; 1175 1176 tun_info = skb_tunnel_info(skb); 1177 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 1178 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; 1179 skb_dst_drop(skb); 1180 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 1181 } 1182 1183 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 1184 struct flowi6 *fl6, int flags) 1185 { 1186 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1187 } 1188 1189 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, 1190 struct flowi6 *fl6) 1191 { 1192 int flags = 0; 1193 1194 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1195 1196 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 1197 fl6->flowi6_oif) 1198 flags |= RT6_LOOKUP_F_IFACE; 1199 1200 if (!ipv6_addr_any(&fl6->saddr)) 1201 flags |= RT6_LOOKUP_F_HAS_SADDR; 1202 else if (sk) 1203 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1204 1205 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1206 } 1207 EXPORT_SYMBOL(ip6_route_output); 1208 1209 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1210 { 1211 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 1212 struct dst_entry *new = NULL; 1213 1214 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 1215 if (rt) { 1216 rt6_info_init(rt); 1217 1218 new = &rt->dst; 1219 new->__use = 1; 1220 new->input = dst_discard; 1221 new->output = dst_discard_sk; 1222 1223 dst_copy_metrics(new, &ort->dst); 1224 rt->rt6i_idev = ort->rt6i_idev; 1225 if (rt->rt6i_idev) 1226 in6_dev_hold(rt->rt6i_idev); 1227 1228 rt->rt6i_gateway = ort->rt6i_gateway; 1229 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; 1230 rt->rt6i_metric = 0; 1231 1232 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1233 #ifdef CONFIG_IPV6_SUBTREES 1234 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1235 #endif 1236 1237 dst_free(new); 1238 } 1239 1240 dst_release(dst_orig); 1241 return new ? new : ERR_PTR(-ENOMEM); 1242 } 1243 1244 /* 1245 * Destination cache support functions 1246 */ 1247 1248 static void rt6_dst_from_metrics_check(struct rt6_info *rt) 1249 { 1250 if (rt->dst.from && 1251 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) 1252 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); 1253 } 1254 1255 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) 1256 { 1257 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) 1258 return NULL; 1259 1260 if (rt6_check_expired(rt)) 1261 return NULL; 1262 1263 return &rt->dst; 1264 } 1265 1266 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1267 { 1268 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1269 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1270 return &rt->dst; 1271 else 1272 return NULL; 1273 } 1274 1275 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1276 { 1277 struct rt6_info *rt; 1278 1279 rt = (struct rt6_info *) dst; 1280 1281 /* All IPV6 dsts are created with ->obsolete set to the value 1282 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1283 * into this function always. 1284 */ 1285 1286 rt6_dst_from_metrics_check(rt); 1287 1288 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) 1289 return rt6_dst_from_check(rt, cookie); 1290 else 1291 return rt6_check(rt, cookie); 1292 } 1293 1294 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1295 { 1296 struct rt6_info *rt = (struct rt6_info *) dst; 1297 1298 if (rt) { 1299 if (rt->rt6i_flags & RTF_CACHE) { 1300 if (rt6_check_expired(rt)) { 1301 ip6_del_rt(rt); 1302 dst = NULL; 1303 } 1304 } else { 1305 dst_release(dst); 1306 dst = NULL; 1307 } 1308 } 1309 return dst; 1310 } 1311 1312 static void ip6_link_failure(struct sk_buff *skb) 1313 { 1314 struct rt6_info *rt; 1315 1316 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1317 1318 rt = (struct rt6_info *) skb_dst(skb); 1319 if (rt) { 1320 if (rt->rt6i_flags & RTF_CACHE) { 1321 dst_hold(&rt->dst); 1322 ip6_del_rt(rt); 1323 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { 1324 rt->rt6i_node->fn_sernum = -1; 1325 } 1326 } 1327 } 1328 1329 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) 1330 { 1331 struct net *net = dev_net(rt->dst.dev); 1332 1333 rt->rt6i_flags |= RTF_MODIFIED; 1334 rt->rt6i_pmtu = mtu; 1335 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 1336 } 1337 1338 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1339 const struct ipv6hdr *iph, u32 mtu) 1340 { 1341 struct rt6_info *rt6 = (struct rt6_info *)dst; 1342 1343 if (rt6->rt6i_flags & RTF_LOCAL) 1344 return; 1345 1346 dst_confirm(dst); 1347 mtu = max_t(u32, mtu, IPV6_MIN_MTU); 1348 if (mtu >= dst_mtu(dst)) 1349 return; 1350 1351 if (rt6->rt6i_flags & RTF_CACHE) { 1352 rt6_do_update_pmtu(rt6, mtu); 1353 } else { 1354 const struct in6_addr *daddr, *saddr; 1355 struct rt6_info *nrt6; 1356 1357 if (iph) { 1358 daddr = &iph->daddr; 1359 saddr = &iph->saddr; 1360 } else if (sk) { 1361 daddr = &sk->sk_v6_daddr; 1362 saddr = &inet6_sk(sk)->saddr; 1363 } else { 1364 return; 1365 } 1366 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 1367 if (nrt6) { 1368 rt6_do_update_pmtu(nrt6, mtu); 1369 1370 /* ip6_ins_rt(nrt6) will bump the 1371 * rt6->rt6i_node->fn_sernum 1372 * which will fail the next rt6_check() and 1373 * invalidate the sk->sk_dst_cache. 1374 */ 1375 ip6_ins_rt(nrt6); 1376 } 1377 } 1378 } 1379 1380 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1381 struct sk_buff *skb, u32 mtu) 1382 { 1383 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); 1384 } 1385 1386 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1387 int oif, u32 mark) 1388 { 1389 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1390 struct dst_entry *dst; 1391 struct flowi6 fl6; 1392 1393 memset(&fl6, 0, sizeof(fl6)); 1394 fl6.flowi6_oif = oif; 1395 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); 1396 fl6.daddr = iph->daddr; 1397 fl6.saddr = iph->saddr; 1398 fl6.flowlabel = ip6_flowinfo(iph); 1399 1400 dst = ip6_route_output(net, NULL, &fl6); 1401 if (!dst->error) 1402 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); 1403 dst_release(dst); 1404 } 1405 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1406 1407 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1408 { 1409 ip6_update_pmtu(skb, sock_net(sk), mtu, 1410 sk->sk_bound_dev_if, sk->sk_mark); 1411 } 1412 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1413 1414 /* Handle redirects */ 1415 struct ip6rd_flowi { 1416 struct flowi6 fl6; 1417 struct in6_addr gateway; 1418 }; 1419 1420 static struct rt6_info *__ip6_route_redirect(struct net *net, 1421 struct fib6_table *table, 1422 struct flowi6 *fl6, 1423 int flags) 1424 { 1425 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1426 struct rt6_info *rt; 1427 struct fib6_node *fn; 1428 1429 /* Get the "current" route for this destination and 1430 * check if the redirect has come from approriate router. 1431 * 1432 * RFC 4861 specifies that redirects should only be 1433 * accepted if they come from the nexthop to the target. 1434 * Due to the way the routes are chosen, this notion 1435 * is a bit fuzzy and one might need to check all possible 1436 * routes. 1437 */ 1438 1439 read_lock_bh(&table->tb6_lock); 1440 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1441 restart: 1442 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1443 if (rt6_check_expired(rt)) 1444 continue; 1445 if (rt->dst.error) 1446 break; 1447 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1448 continue; 1449 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 1450 continue; 1451 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1452 continue; 1453 break; 1454 } 1455 1456 if (!rt) 1457 rt = net->ipv6.ip6_null_entry; 1458 else if (rt->dst.error) { 1459 rt = net->ipv6.ip6_null_entry; 1460 goto out; 1461 } 1462 1463 if (rt == net->ipv6.ip6_null_entry) { 1464 fn = fib6_backtrack(fn, &fl6->saddr); 1465 if (fn) 1466 goto restart; 1467 } 1468 1469 out: 1470 dst_hold(&rt->dst); 1471 1472 read_unlock_bh(&table->tb6_lock); 1473 1474 return rt; 1475 }; 1476 1477 static struct dst_entry *ip6_route_redirect(struct net *net, 1478 const struct flowi6 *fl6, 1479 const struct in6_addr *gateway) 1480 { 1481 int flags = RT6_LOOKUP_F_HAS_SADDR; 1482 struct ip6rd_flowi rdfl; 1483 1484 rdfl.fl6 = *fl6; 1485 rdfl.gateway = *gateway; 1486 1487 return fib6_rule_lookup(net, &rdfl.fl6, 1488 flags, __ip6_route_redirect); 1489 } 1490 1491 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1492 { 1493 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1494 struct dst_entry *dst; 1495 struct flowi6 fl6; 1496 1497 memset(&fl6, 0, sizeof(fl6)); 1498 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1499 fl6.flowi6_oif = oif; 1500 fl6.flowi6_mark = mark; 1501 fl6.daddr = iph->daddr; 1502 fl6.saddr = iph->saddr; 1503 fl6.flowlabel = ip6_flowinfo(iph); 1504 1505 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 1506 rt6_do_redirect(dst, NULL, skb); 1507 dst_release(dst); 1508 } 1509 EXPORT_SYMBOL_GPL(ip6_redirect); 1510 1511 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, 1512 u32 mark) 1513 { 1514 const struct ipv6hdr *iph = ipv6_hdr(skb); 1515 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 1516 struct dst_entry *dst; 1517 struct flowi6 fl6; 1518 1519 memset(&fl6, 0, sizeof(fl6)); 1520 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1521 fl6.flowi6_oif = oif; 1522 fl6.flowi6_mark = mark; 1523 fl6.daddr = msg->dest; 1524 fl6.saddr = iph->daddr; 1525 1526 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 1527 rt6_do_redirect(dst, NULL, skb); 1528 dst_release(dst); 1529 } 1530 1531 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1532 { 1533 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1534 } 1535 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1536 1537 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1538 { 1539 struct net_device *dev = dst->dev; 1540 unsigned int mtu = dst_mtu(dst); 1541 struct net *net = dev_net(dev); 1542 1543 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1544 1545 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1546 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1547 1548 /* 1549 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1550 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1551 * IPV6_MAXPLEN is also valid and means: "any MSS, 1552 * rely only on pmtu discovery" 1553 */ 1554 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1555 mtu = IPV6_MAXPLEN; 1556 return mtu; 1557 } 1558 1559 static unsigned int ip6_mtu(const struct dst_entry *dst) 1560 { 1561 const struct rt6_info *rt = (const struct rt6_info *)dst; 1562 unsigned int mtu = rt->rt6i_pmtu; 1563 struct inet6_dev *idev; 1564 1565 if (mtu) 1566 goto out; 1567 1568 mtu = dst_metric_raw(dst, RTAX_MTU); 1569 if (mtu) 1570 goto out; 1571 1572 mtu = IPV6_MIN_MTU; 1573 1574 rcu_read_lock(); 1575 idev = __in6_dev_get(dst->dev); 1576 if (idev) 1577 mtu = idev->cnf.mtu6; 1578 rcu_read_unlock(); 1579 1580 out: 1581 return min_t(unsigned int, mtu, IP6_MAX_MTU); 1582 } 1583 1584 static struct dst_entry *icmp6_dst_gc_list; 1585 static DEFINE_SPINLOCK(icmp6_dst_lock); 1586 1587 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1588 struct flowi6 *fl6) 1589 { 1590 struct dst_entry *dst; 1591 struct rt6_info *rt; 1592 struct inet6_dev *idev = in6_dev_get(dev); 1593 struct net *net = dev_net(dev); 1594 1595 if (unlikely(!idev)) 1596 return ERR_PTR(-ENODEV); 1597 1598 rt = ip6_dst_alloc(net, dev, 0); 1599 if (unlikely(!rt)) { 1600 in6_dev_put(idev); 1601 dst = ERR_PTR(-ENOMEM); 1602 goto out; 1603 } 1604 1605 rt->dst.flags |= DST_HOST; 1606 rt->dst.output = ip6_output; 1607 atomic_set(&rt->dst.__refcnt, 1); 1608 rt->rt6i_gateway = fl6->daddr; 1609 rt->rt6i_dst.addr = fl6->daddr; 1610 rt->rt6i_dst.plen = 128; 1611 rt->rt6i_idev = idev; 1612 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 1613 1614 spin_lock_bh(&icmp6_dst_lock); 1615 rt->dst.next = icmp6_dst_gc_list; 1616 icmp6_dst_gc_list = &rt->dst; 1617 spin_unlock_bh(&icmp6_dst_lock); 1618 1619 fib6_force_start_gc(net); 1620 1621 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1622 1623 out: 1624 return dst; 1625 } 1626 1627 int icmp6_dst_gc(void) 1628 { 1629 struct dst_entry *dst, **pprev; 1630 int more = 0; 1631 1632 spin_lock_bh(&icmp6_dst_lock); 1633 pprev = &icmp6_dst_gc_list; 1634 1635 while ((dst = *pprev) != NULL) { 1636 if (!atomic_read(&dst->__refcnt)) { 1637 *pprev = dst->next; 1638 dst_free(dst); 1639 } else { 1640 pprev = &dst->next; 1641 ++more; 1642 } 1643 } 1644 1645 spin_unlock_bh(&icmp6_dst_lock); 1646 1647 return more; 1648 } 1649 1650 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1651 void *arg) 1652 { 1653 struct dst_entry *dst, **pprev; 1654 1655 spin_lock_bh(&icmp6_dst_lock); 1656 pprev = &icmp6_dst_gc_list; 1657 while ((dst = *pprev) != NULL) { 1658 struct rt6_info *rt = (struct rt6_info *) dst; 1659 if (func(rt, arg)) { 1660 *pprev = dst->next; 1661 dst_free(dst); 1662 } else { 1663 pprev = &dst->next; 1664 } 1665 } 1666 spin_unlock_bh(&icmp6_dst_lock); 1667 } 1668 1669 static int ip6_dst_gc(struct dst_ops *ops) 1670 { 1671 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1672 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1673 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1674 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1675 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1676 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1677 int entries; 1678 1679 entries = dst_entries_get_fast(ops); 1680 if (time_after(rt_last_gc + rt_min_interval, jiffies) && 1681 entries <= rt_max_size) 1682 goto out; 1683 1684 net->ipv6.ip6_rt_gc_expire++; 1685 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); 1686 entries = dst_entries_get_slow(ops); 1687 if (entries < ops->gc_thresh) 1688 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1689 out: 1690 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1691 return entries > rt_max_size; 1692 } 1693 1694 static int ip6_convert_metrics(struct mx6_config *mxc, 1695 const struct fib6_config *cfg) 1696 { 1697 bool ecn_ca = false; 1698 struct nlattr *nla; 1699 int remaining; 1700 u32 *mp; 1701 1702 if (!cfg->fc_mx) 1703 return 0; 1704 1705 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1706 if (unlikely(!mp)) 1707 return -ENOMEM; 1708 1709 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1710 int type = nla_type(nla); 1711 u32 val; 1712 1713 if (!type) 1714 continue; 1715 if (unlikely(type > RTAX_MAX)) 1716 goto err; 1717 1718 if (type == RTAX_CC_ALGO) { 1719 char tmp[TCP_CA_NAME_MAX]; 1720 1721 nla_strlcpy(tmp, nla, sizeof(tmp)); 1722 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 1723 if (val == TCP_CA_UNSPEC) 1724 goto err; 1725 } else { 1726 val = nla_get_u32(nla); 1727 } 1728 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 1729 goto err; 1730 1731 mp[type - 1] = val; 1732 __set_bit(type - 1, mxc->mx_valid); 1733 } 1734 1735 if (ecn_ca) { 1736 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); 1737 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; 1738 } 1739 1740 mxc->mx = mp; 1741 return 0; 1742 err: 1743 kfree(mp); 1744 return -EINVAL; 1745 } 1746 1747 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) 1748 { 1749 int err; 1750 struct net *net = cfg->fc_nlinfo.nl_net; 1751 struct rt6_info *rt = NULL; 1752 struct net_device *dev = NULL; 1753 struct inet6_dev *idev = NULL; 1754 struct fib6_table *table; 1755 int addr_type; 1756 1757 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1758 return -EINVAL; 1759 #ifndef CONFIG_IPV6_SUBTREES 1760 if (cfg->fc_src_len) 1761 return -EINVAL; 1762 #endif 1763 if (cfg->fc_ifindex) { 1764 err = -ENODEV; 1765 dev = dev_get_by_index(net, cfg->fc_ifindex); 1766 if (!dev) 1767 goto out; 1768 idev = in6_dev_get(dev); 1769 if (!idev) 1770 goto out; 1771 } 1772 1773 if (cfg->fc_metric == 0) 1774 cfg->fc_metric = IP6_RT_PRIO_USER; 1775 1776 err = -ENOBUFS; 1777 if (cfg->fc_nlinfo.nlh && 1778 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1779 table = fib6_get_table(net, cfg->fc_table); 1780 if (!table) { 1781 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1782 table = fib6_new_table(net, cfg->fc_table); 1783 } 1784 } else { 1785 table = fib6_new_table(net, cfg->fc_table); 1786 } 1787 1788 if (!table) 1789 goto out; 1790 1791 rt = ip6_dst_alloc(net, NULL, 1792 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); 1793 1794 if (!rt) { 1795 err = -ENOMEM; 1796 goto out; 1797 } 1798 1799 if (cfg->fc_flags & RTF_EXPIRES) 1800 rt6_set_expires(rt, jiffies + 1801 clock_t_to_jiffies(cfg->fc_expires)); 1802 else 1803 rt6_clean_expires(rt); 1804 1805 if (cfg->fc_protocol == RTPROT_UNSPEC) 1806 cfg->fc_protocol = RTPROT_BOOT; 1807 rt->rt6i_protocol = cfg->fc_protocol; 1808 1809 addr_type = ipv6_addr_type(&cfg->fc_dst); 1810 1811 if (addr_type & IPV6_ADDR_MULTICAST) 1812 rt->dst.input = ip6_mc_input; 1813 else if (cfg->fc_flags & RTF_LOCAL) 1814 rt->dst.input = ip6_input; 1815 else 1816 rt->dst.input = ip6_forward; 1817 1818 rt->dst.output = ip6_output; 1819 1820 if (cfg->fc_encap) { 1821 struct lwtunnel_state *lwtstate; 1822 1823 err = lwtunnel_build_state(dev, cfg->fc_encap_type, 1824 cfg->fc_encap, AF_INET6, cfg, 1825 &lwtstate); 1826 if (err) 1827 goto out; 1828 rt->dst.lwtstate = lwtstate_get(lwtstate); 1829 if (lwtunnel_output_redirect(rt->dst.lwtstate)) { 1830 rt->dst.lwtstate->orig_output = rt->dst.output; 1831 rt->dst.output = lwtunnel_output; 1832 } 1833 if (lwtunnel_input_redirect(rt->dst.lwtstate)) { 1834 rt->dst.lwtstate->orig_input = rt->dst.input; 1835 rt->dst.input = lwtunnel_input; 1836 } 1837 } 1838 1839 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1840 rt->rt6i_dst.plen = cfg->fc_dst_len; 1841 if (rt->rt6i_dst.plen == 128) 1842 rt->dst.flags |= DST_HOST; 1843 1844 #ifdef CONFIG_IPV6_SUBTREES 1845 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1846 rt->rt6i_src.plen = cfg->fc_src_len; 1847 #endif 1848 1849 rt->rt6i_metric = cfg->fc_metric; 1850 1851 /* We cannot add true routes via loopback here, 1852 they would result in kernel looping; promote them to reject routes 1853 */ 1854 if ((cfg->fc_flags & RTF_REJECT) || 1855 (dev && (dev->flags & IFF_LOOPBACK) && 1856 !(addr_type & IPV6_ADDR_LOOPBACK) && 1857 !(cfg->fc_flags & RTF_LOCAL))) { 1858 /* hold loopback dev/idev if we haven't done so. */ 1859 if (dev != net->loopback_dev) { 1860 if (dev) { 1861 dev_put(dev); 1862 in6_dev_put(idev); 1863 } 1864 dev = net->loopback_dev; 1865 dev_hold(dev); 1866 idev = in6_dev_get(dev); 1867 if (!idev) { 1868 err = -ENODEV; 1869 goto out; 1870 } 1871 } 1872 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1873 switch (cfg->fc_type) { 1874 case RTN_BLACKHOLE: 1875 rt->dst.error = -EINVAL; 1876 rt->dst.output = dst_discard_sk; 1877 rt->dst.input = dst_discard; 1878 break; 1879 case RTN_PROHIBIT: 1880 rt->dst.error = -EACCES; 1881 rt->dst.output = ip6_pkt_prohibit_out; 1882 rt->dst.input = ip6_pkt_prohibit; 1883 break; 1884 case RTN_THROW: 1885 case RTN_UNREACHABLE: 1886 default: 1887 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 1888 : (cfg->fc_type == RTN_UNREACHABLE) 1889 ? -EHOSTUNREACH : -ENETUNREACH; 1890 rt->dst.output = ip6_pkt_discard_out; 1891 rt->dst.input = ip6_pkt_discard; 1892 break; 1893 } 1894 goto install_route; 1895 } 1896 1897 if (cfg->fc_flags & RTF_GATEWAY) { 1898 const struct in6_addr *gw_addr; 1899 int gwa_type; 1900 1901 gw_addr = &cfg->fc_gateway; 1902 gwa_type = ipv6_addr_type(gw_addr); 1903 1904 /* if gw_addr is local we will fail to detect this in case 1905 * address is still TENTATIVE (DAD in progress). rt6_lookup() 1906 * will return already-added prefix route via interface that 1907 * prefix route was assigned to, which might be non-loopback. 1908 */ 1909 err = -EINVAL; 1910 if (ipv6_chk_addr_and_flags(net, gw_addr, 1911 gwa_type & IPV6_ADDR_LINKLOCAL ? 1912 dev : NULL, 0, 0)) 1913 goto out; 1914 1915 rt->rt6i_gateway = *gw_addr; 1916 1917 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1918 struct rt6_info *grt; 1919 1920 /* IPv6 strictly inhibits using not link-local 1921 addresses as nexthop address. 1922 Otherwise, router will not able to send redirects. 1923 It is very good, but in some (rare!) circumstances 1924 (SIT, PtP, NBMA NOARP links) it is handy to allow 1925 some exceptions. --ANK 1926 */ 1927 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1928 goto out; 1929 1930 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1931 1932 err = -EHOSTUNREACH; 1933 if (!grt) 1934 goto out; 1935 if (dev) { 1936 if (dev != grt->dst.dev) { 1937 ip6_rt_put(grt); 1938 goto out; 1939 } 1940 } else { 1941 dev = grt->dst.dev; 1942 idev = grt->rt6i_idev; 1943 dev_hold(dev); 1944 in6_dev_hold(grt->rt6i_idev); 1945 } 1946 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1947 err = 0; 1948 ip6_rt_put(grt); 1949 1950 if (err) 1951 goto out; 1952 } 1953 err = -EINVAL; 1954 if (!dev || (dev->flags & IFF_LOOPBACK)) 1955 goto out; 1956 } 1957 1958 err = -ENODEV; 1959 if (!dev) 1960 goto out; 1961 1962 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1963 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1964 err = -EINVAL; 1965 goto out; 1966 } 1967 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1968 rt->rt6i_prefsrc.plen = 128; 1969 } else 1970 rt->rt6i_prefsrc.plen = 0; 1971 1972 rt->rt6i_flags = cfg->fc_flags; 1973 1974 install_route: 1975 rt->dst.dev = dev; 1976 rt->rt6i_idev = idev; 1977 rt->rt6i_table = table; 1978 1979 cfg->fc_nlinfo.nl_net = dev_net(dev); 1980 1981 *rt_ret = rt; 1982 1983 return 0; 1984 out: 1985 if (dev) 1986 dev_put(dev); 1987 if (idev) 1988 in6_dev_put(idev); 1989 if (rt) 1990 dst_free(&rt->dst); 1991 1992 *rt_ret = NULL; 1993 1994 return err; 1995 } 1996 1997 int ip6_route_add(struct fib6_config *cfg) 1998 { 1999 struct mx6_config mxc = { .mx = NULL, }; 2000 struct rt6_info *rt = NULL; 2001 int err; 2002 2003 err = ip6_route_info_create(cfg, &rt); 2004 if (err) 2005 goto out; 2006 2007 err = ip6_convert_metrics(&mxc, cfg); 2008 if (err) 2009 goto out; 2010 2011 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); 2012 2013 kfree(mxc.mx); 2014 2015 return err; 2016 out: 2017 if (rt) 2018 dst_free(&rt->dst); 2019 2020 return err; 2021 } 2022 2023 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 2024 { 2025 int err; 2026 struct fib6_table *table; 2027 struct net *net = dev_net(rt->dst.dev); 2028 2029 if (rt == net->ipv6.ip6_null_entry || 2030 rt->dst.flags & DST_NOCACHE) { 2031 err = -ENOENT; 2032 goto out; 2033 } 2034 2035 table = rt->rt6i_table; 2036 write_lock_bh(&table->tb6_lock); 2037 err = fib6_del(rt, info); 2038 write_unlock_bh(&table->tb6_lock); 2039 2040 out: 2041 ip6_rt_put(rt); 2042 return err; 2043 } 2044 2045 int ip6_del_rt(struct rt6_info *rt) 2046 { 2047 struct nl_info info = { 2048 .nl_net = dev_net(rt->dst.dev), 2049 }; 2050 return __ip6_del_rt(rt, &info); 2051 } 2052 2053 static int ip6_route_del(struct fib6_config *cfg) 2054 { 2055 struct fib6_table *table; 2056 struct fib6_node *fn; 2057 struct rt6_info *rt; 2058 int err = -ESRCH; 2059 2060 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 2061 if (!table) 2062 return err; 2063 2064 read_lock_bh(&table->tb6_lock); 2065 2066 fn = fib6_locate(&table->tb6_root, 2067 &cfg->fc_dst, cfg->fc_dst_len, 2068 &cfg->fc_src, cfg->fc_src_len); 2069 2070 if (fn) { 2071 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2072 if ((rt->rt6i_flags & RTF_CACHE) && 2073 !(cfg->fc_flags & RTF_CACHE)) 2074 continue; 2075 if (cfg->fc_ifindex && 2076 (!rt->dst.dev || 2077 rt->dst.dev->ifindex != cfg->fc_ifindex)) 2078 continue; 2079 if (cfg->fc_flags & RTF_GATEWAY && 2080 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 2081 continue; 2082 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 2083 continue; 2084 dst_hold(&rt->dst); 2085 read_unlock_bh(&table->tb6_lock); 2086 2087 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 2088 } 2089 } 2090 read_unlock_bh(&table->tb6_lock); 2091 2092 return err; 2093 } 2094 2095 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 2096 { 2097 struct net *net = dev_net(skb->dev); 2098 struct netevent_redirect netevent; 2099 struct rt6_info *rt, *nrt = NULL; 2100 struct ndisc_options ndopts; 2101 struct inet6_dev *in6_dev; 2102 struct neighbour *neigh; 2103 struct rd_msg *msg; 2104 int optlen, on_link; 2105 u8 *lladdr; 2106 2107 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 2108 optlen -= sizeof(*msg); 2109 2110 if (optlen < 0) { 2111 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 2112 return; 2113 } 2114 2115 msg = (struct rd_msg *)icmp6_hdr(skb); 2116 2117 if (ipv6_addr_is_multicast(&msg->dest)) { 2118 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 2119 return; 2120 } 2121 2122 on_link = 0; 2123 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 2124 on_link = 1; 2125 } else if (ipv6_addr_type(&msg->target) != 2126 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 2127 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 2128 return; 2129 } 2130 2131 in6_dev = __in6_dev_get(skb->dev); 2132 if (!in6_dev) 2133 return; 2134 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 2135 return; 2136 2137 /* RFC2461 8.1: 2138 * The IP source address of the Redirect MUST be the same as the current 2139 * first-hop router for the specified ICMP Destination Address. 2140 */ 2141 2142 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { 2143 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 2144 return; 2145 } 2146 2147 lladdr = NULL; 2148 if (ndopts.nd_opts_tgt_lladdr) { 2149 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 2150 skb->dev); 2151 if (!lladdr) { 2152 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 2153 return; 2154 } 2155 } 2156 2157 rt = (struct rt6_info *) dst; 2158 if (rt == net->ipv6.ip6_null_entry) { 2159 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 2160 return; 2161 } 2162 2163 /* Redirect received -> path was valid. 2164 * Look, redirects are sent only in response to data packets, 2165 * so that this nexthop apparently is reachable. --ANK 2166 */ 2167 dst_confirm(&rt->dst); 2168 2169 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 2170 if (!neigh) 2171 return; 2172 2173 /* 2174 * We have finally decided to accept it. 2175 */ 2176 2177 neigh_update(neigh, lladdr, NUD_STALE, 2178 NEIGH_UPDATE_F_WEAK_OVERRIDE| 2179 NEIGH_UPDATE_F_OVERRIDE| 2180 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 2181 NEIGH_UPDATE_F_ISROUTER)) 2182 ); 2183 2184 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); 2185 if (!nrt) 2186 goto out; 2187 2188 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 2189 if (on_link) 2190 nrt->rt6i_flags &= ~RTF_GATEWAY; 2191 2192 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 2193 2194 if (ip6_ins_rt(nrt)) 2195 goto out; 2196 2197 netevent.old = &rt->dst; 2198 netevent.new = &nrt->dst; 2199 netevent.daddr = &msg->dest; 2200 netevent.neigh = neigh; 2201 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 2202 2203 if (rt->rt6i_flags & RTF_CACHE) { 2204 rt = (struct rt6_info *) dst_clone(&rt->dst); 2205 ip6_del_rt(rt); 2206 } 2207 2208 out: 2209 neigh_release(neigh); 2210 } 2211 2212 /* 2213 * Misc support functions 2214 */ 2215 2216 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) 2217 { 2218 BUG_ON(from->dst.from); 2219 2220 rt->rt6i_flags &= ~RTF_EXPIRES; 2221 dst_hold(&from->dst); 2222 rt->dst.from = &from->dst; 2223 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); 2224 } 2225 2226 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) 2227 { 2228 rt->dst.input = ort->dst.input; 2229 rt->dst.output = ort->dst.output; 2230 rt->rt6i_dst = ort->rt6i_dst; 2231 rt->dst.error = ort->dst.error; 2232 rt->rt6i_idev = ort->rt6i_idev; 2233 if (rt->rt6i_idev) 2234 in6_dev_hold(rt->rt6i_idev); 2235 rt->dst.lastuse = jiffies; 2236 rt->rt6i_gateway = ort->rt6i_gateway; 2237 rt->rt6i_flags = ort->rt6i_flags; 2238 rt6_set_from(rt, ort); 2239 rt->rt6i_metric = ort->rt6i_metric; 2240 #ifdef CONFIG_IPV6_SUBTREES 2241 rt->rt6i_src = ort->rt6i_src; 2242 #endif 2243 rt->rt6i_prefsrc = ort->rt6i_prefsrc; 2244 rt->rt6i_table = ort->rt6i_table; 2245 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); 2246 } 2247 2248 #ifdef CONFIG_IPV6_ROUTE_INFO 2249 static struct rt6_info *rt6_get_route_info(struct net *net, 2250 const struct in6_addr *prefix, int prefixlen, 2251 const struct in6_addr *gwaddr, int ifindex) 2252 { 2253 struct fib6_node *fn; 2254 struct rt6_info *rt = NULL; 2255 struct fib6_table *table; 2256 2257 table = fib6_get_table(net, RT6_TABLE_INFO); 2258 if (!table) 2259 return NULL; 2260 2261 read_lock_bh(&table->tb6_lock); 2262 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); 2263 if (!fn) 2264 goto out; 2265 2266 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2267 if (rt->dst.dev->ifindex != ifindex) 2268 continue; 2269 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 2270 continue; 2271 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 2272 continue; 2273 dst_hold(&rt->dst); 2274 break; 2275 } 2276 out: 2277 read_unlock_bh(&table->tb6_lock); 2278 return rt; 2279 } 2280 2281 static struct rt6_info *rt6_add_route_info(struct net *net, 2282 const struct in6_addr *prefix, int prefixlen, 2283 const struct in6_addr *gwaddr, int ifindex, 2284 unsigned int pref) 2285 { 2286 struct fib6_config cfg = { 2287 .fc_table = RT6_TABLE_INFO, 2288 .fc_metric = IP6_RT_PRIO_USER, 2289 .fc_ifindex = ifindex, 2290 .fc_dst_len = prefixlen, 2291 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 2292 RTF_UP | RTF_PREF(pref), 2293 .fc_nlinfo.portid = 0, 2294 .fc_nlinfo.nlh = NULL, 2295 .fc_nlinfo.nl_net = net, 2296 }; 2297 2298 cfg.fc_dst = *prefix; 2299 cfg.fc_gateway = *gwaddr; 2300 2301 /* We should treat it as a default route if prefix length is 0. */ 2302 if (!prefixlen) 2303 cfg.fc_flags |= RTF_DEFAULT; 2304 2305 ip6_route_add(&cfg); 2306 2307 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 2308 } 2309 #endif 2310 2311 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 2312 { 2313 struct rt6_info *rt; 2314 struct fib6_table *table; 2315 2316 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 2317 if (!table) 2318 return NULL; 2319 2320 read_lock_bh(&table->tb6_lock); 2321 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2322 if (dev == rt->dst.dev && 2323 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 2324 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 2325 break; 2326 } 2327 if (rt) 2328 dst_hold(&rt->dst); 2329 read_unlock_bh(&table->tb6_lock); 2330 return rt; 2331 } 2332 2333 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 2334 struct net_device *dev, 2335 unsigned int pref) 2336 { 2337 struct fib6_config cfg = { 2338 .fc_table = RT6_TABLE_DFLT, 2339 .fc_metric = IP6_RT_PRIO_USER, 2340 .fc_ifindex = dev->ifindex, 2341 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 2342 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 2343 .fc_nlinfo.portid = 0, 2344 .fc_nlinfo.nlh = NULL, 2345 .fc_nlinfo.nl_net = dev_net(dev), 2346 }; 2347 2348 cfg.fc_gateway = *gwaddr; 2349 2350 ip6_route_add(&cfg); 2351 2352 return rt6_get_dflt_router(gwaddr, dev); 2353 } 2354 2355 void rt6_purge_dflt_routers(struct net *net) 2356 { 2357 struct rt6_info *rt; 2358 struct fib6_table *table; 2359 2360 /* NOTE: Keep consistent with rt6_get_dflt_router */ 2361 table = fib6_get_table(net, RT6_TABLE_DFLT); 2362 if (!table) 2363 return; 2364 2365 restart: 2366 read_lock_bh(&table->tb6_lock); 2367 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2368 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 2369 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 2370 dst_hold(&rt->dst); 2371 read_unlock_bh(&table->tb6_lock); 2372 ip6_del_rt(rt); 2373 goto restart; 2374 } 2375 } 2376 read_unlock_bh(&table->tb6_lock); 2377 } 2378 2379 static void rtmsg_to_fib6_config(struct net *net, 2380 struct in6_rtmsg *rtmsg, 2381 struct fib6_config *cfg) 2382 { 2383 memset(cfg, 0, sizeof(*cfg)); 2384 2385 cfg->fc_table = RT6_TABLE_MAIN; 2386 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 2387 cfg->fc_metric = rtmsg->rtmsg_metric; 2388 cfg->fc_expires = rtmsg->rtmsg_info; 2389 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 2390 cfg->fc_src_len = rtmsg->rtmsg_src_len; 2391 cfg->fc_flags = rtmsg->rtmsg_flags; 2392 2393 cfg->fc_nlinfo.nl_net = net; 2394 2395 cfg->fc_dst = rtmsg->rtmsg_dst; 2396 cfg->fc_src = rtmsg->rtmsg_src; 2397 cfg->fc_gateway = rtmsg->rtmsg_gateway; 2398 } 2399 2400 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 2401 { 2402 struct fib6_config cfg; 2403 struct in6_rtmsg rtmsg; 2404 int err; 2405 2406 switch (cmd) { 2407 case SIOCADDRT: /* Add a route */ 2408 case SIOCDELRT: /* Delete a route */ 2409 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2410 return -EPERM; 2411 err = copy_from_user(&rtmsg, arg, 2412 sizeof(struct in6_rtmsg)); 2413 if (err) 2414 return -EFAULT; 2415 2416 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 2417 2418 rtnl_lock(); 2419 switch (cmd) { 2420 case SIOCADDRT: 2421 err = ip6_route_add(&cfg); 2422 break; 2423 case SIOCDELRT: 2424 err = ip6_route_del(&cfg); 2425 break; 2426 default: 2427 err = -EINVAL; 2428 } 2429 rtnl_unlock(); 2430 2431 return err; 2432 } 2433 2434 return -EINVAL; 2435 } 2436 2437 /* 2438 * Drop the packet on the floor 2439 */ 2440 2441 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2442 { 2443 int type; 2444 struct dst_entry *dst = skb_dst(skb); 2445 switch (ipstats_mib_noroutes) { 2446 case IPSTATS_MIB_INNOROUTES: 2447 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2448 if (type == IPV6_ADDR_ANY) { 2449 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2450 IPSTATS_MIB_INADDRERRORS); 2451 break; 2452 } 2453 /* FALLTHROUGH */ 2454 case IPSTATS_MIB_OUTNOROUTES: 2455 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2456 ipstats_mib_noroutes); 2457 break; 2458 } 2459 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2460 kfree_skb(skb); 2461 return 0; 2462 } 2463 2464 static int ip6_pkt_discard(struct sk_buff *skb) 2465 { 2466 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2467 } 2468 2469 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) 2470 { 2471 skb->dev = skb_dst(skb)->dev; 2472 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2473 } 2474 2475 static int ip6_pkt_prohibit(struct sk_buff *skb) 2476 { 2477 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2478 } 2479 2480 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) 2481 { 2482 skb->dev = skb_dst(skb)->dev; 2483 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2484 } 2485 2486 /* 2487 * Allocate a dst for local (unicast / anycast) address. 2488 */ 2489 2490 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2491 const struct in6_addr *addr, 2492 bool anycast) 2493 { 2494 struct net *net = dev_net(idev->dev); 2495 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 2496 DST_NOCOUNT); 2497 if (!rt) 2498 return ERR_PTR(-ENOMEM); 2499 2500 in6_dev_hold(idev); 2501 2502 rt->dst.flags |= DST_HOST; 2503 rt->dst.input = ip6_input; 2504 rt->dst.output = ip6_output; 2505 rt->rt6i_idev = idev; 2506 2507 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2508 if (anycast) 2509 rt->rt6i_flags |= RTF_ANYCAST; 2510 else 2511 rt->rt6i_flags |= RTF_LOCAL; 2512 2513 rt->rt6i_gateway = *addr; 2514 rt->rt6i_dst.addr = *addr; 2515 rt->rt6i_dst.plen = 128; 2516 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2517 rt->dst.flags |= DST_NOCACHE; 2518 2519 atomic_set(&rt->dst.__refcnt, 1); 2520 2521 return rt; 2522 } 2523 2524 int ip6_route_get_saddr(struct net *net, 2525 struct rt6_info *rt, 2526 const struct in6_addr *daddr, 2527 unsigned int prefs, 2528 struct in6_addr *saddr) 2529 { 2530 struct inet6_dev *idev = 2531 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; 2532 int err = 0; 2533 if (rt && rt->rt6i_prefsrc.plen) 2534 *saddr = rt->rt6i_prefsrc.addr; 2535 else 2536 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2537 daddr, prefs, saddr); 2538 return err; 2539 } 2540 2541 /* remove deleted ip from prefsrc entries */ 2542 struct arg_dev_net_ip { 2543 struct net_device *dev; 2544 struct net *net; 2545 struct in6_addr *addr; 2546 }; 2547 2548 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2549 { 2550 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2551 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2552 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2553 2554 if (((void *)rt->dst.dev == dev || !dev) && 2555 rt != net->ipv6.ip6_null_entry && 2556 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2557 /* remove prefsrc entry */ 2558 rt->rt6i_prefsrc.plen = 0; 2559 } 2560 return 0; 2561 } 2562 2563 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2564 { 2565 struct net *net = dev_net(ifp->idev->dev); 2566 struct arg_dev_net_ip adni = { 2567 .dev = ifp->idev->dev, 2568 .net = net, 2569 .addr = &ifp->addr, 2570 }; 2571 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 2572 } 2573 2574 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 2575 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 2576 2577 /* Remove routers and update dst entries when gateway turn into host. */ 2578 static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 2579 { 2580 struct in6_addr *gateway = (struct in6_addr *)arg; 2581 2582 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || 2583 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && 2584 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { 2585 return -1; 2586 } 2587 return 0; 2588 } 2589 2590 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 2591 { 2592 fib6_clean_all(net, fib6_clean_tohost, gateway); 2593 } 2594 2595 struct arg_dev_net { 2596 struct net_device *dev; 2597 struct net *net; 2598 }; 2599 2600 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2601 { 2602 const struct arg_dev_net *adn = arg; 2603 const struct net_device *dev = adn->dev; 2604 2605 if ((rt->dst.dev == dev || !dev) && 2606 rt != adn->net->ipv6.ip6_null_entry) 2607 return -1; 2608 2609 return 0; 2610 } 2611 2612 void rt6_ifdown(struct net *net, struct net_device *dev) 2613 { 2614 struct arg_dev_net adn = { 2615 .dev = dev, 2616 .net = net, 2617 }; 2618 2619 fib6_clean_all(net, fib6_ifdown, &adn); 2620 icmp6_clean_all(fib6_ifdown, &adn); 2621 if (dev) 2622 rt6_uncached_list_flush_dev(net, dev); 2623 } 2624 2625 struct rt6_mtu_change_arg { 2626 struct net_device *dev; 2627 unsigned int mtu; 2628 }; 2629 2630 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2631 { 2632 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2633 struct inet6_dev *idev; 2634 2635 /* In IPv6 pmtu discovery is not optional, 2636 so that RTAX_MTU lock cannot disable it. 2637 We still use this lock to block changes 2638 caused by addrconf/ndisc. 2639 */ 2640 2641 idev = __in6_dev_get(arg->dev); 2642 if (!idev) 2643 return 0; 2644 2645 /* For administrative MTU increase, there is no way to discover 2646 IPv6 PMTU increase, so PMTU increase should be updated here. 2647 Since RFC 1981 doesn't include administrative MTU increase 2648 update PMTU increase is a MUST. (i.e. jumbo frame) 2649 */ 2650 /* 2651 If new MTU is less than route PMTU, this new MTU will be the 2652 lowest MTU in the path, update the route PMTU to reflect PMTU 2653 decreases; if new MTU is greater than route PMTU, and the 2654 old MTU is the lowest MTU in the path, update the route PMTU 2655 to reflect the increase. In this case if the other nodes' MTU 2656 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2657 PMTU discouvery. 2658 */ 2659 if (rt->dst.dev == arg->dev && 2660 !dst_metric_locked(&rt->dst, RTAX_MTU)) { 2661 if (rt->rt6i_flags & RTF_CACHE) { 2662 /* For RTF_CACHE with rt6i_pmtu == 0 2663 * (i.e. a redirected route), 2664 * the metrics of its rt->dst.from has already 2665 * been updated. 2666 */ 2667 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) 2668 rt->rt6i_pmtu = arg->mtu; 2669 } else if (dst_mtu(&rt->dst) >= arg->mtu || 2670 (dst_mtu(&rt->dst) < arg->mtu && 2671 dst_mtu(&rt->dst) == idev->cnf.mtu6)) { 2672 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2673 } 2674 } 2675 return 0; 2676 } 2677 2678 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2679 { 2680 struct rt6_mtu_change_arg arg = { 2681 .dev = dev, 2682 .mtu = mtu, 2683 }; 2684 2685 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 2686 } 2687 2688 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2689 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2690 [RTA_OIF] = { .type = NLA_U32 }, 2691 [RTA_IIF] = { .type = NLA_U32 }, 2692 [RTA_PRIORITY] = { .type = NLA_U32 }, 2693 [RTA_METRICS] = { .type = NLA_NESTED }, 2694 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2695 [RTA_PREF] = { .type = NLA_U8 }, 2696 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 2697 [RTA_ENCAP] = { .type = NLA_NESTED }, 2698 }; 2699 2700 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2701 struct fib6_config *cfg) 2702 { 2703 struct rtmsg *rtm; 2704 struct nlattr *tb[RTA_MAX+1]; 2705 unsigned int pref; 2706 int err; 2707 2708 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2709 if (err < 0) 2710 goto errout; 2711 2712 err = -EINVAL; 2713 rtm = nlmsg_data(nlh); 2714 memset(cfg, 0, sizeof(*cfg)); 2715 2716 cfg->fc_table = rtm->rtm_table; 2717 cfg->fc_dst_len = rtm->rtm_dst_len; 2718 cfg->fc_src_len = rtm->rtm_src_len; 2719 cfg->fc_flags = RTF_UP; 2720 cfg->fc_protocol = rtm->rtm_protocol; 2721 cfg->fc_type = rtm->rtm_type; 2722 2723 if (rtm->rtm_type == RTN_UNREACHABLE || 2724 rtm->rtm_type == RTN_BLACKHOLE || 2725 rtm->rtm_type == RTN_PROHIBIT || 2726 rtm->rtm_type == RTN_THROW) 2727 cfg->fc_flags |= RTF_REJECT; 2728 2729 if (rtm->rtm_type == RTN_LOCAL) 2730 cfg->fc_flags |= RTF_LOCAL; 2731 2732 if (rtm->rtm_flags & RTM_F_CLONED) 2733 cfg->fc_flags |= RTF_CACHE; 2734 2735 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2736 cfg->fc_nlinfo.nlh = nlh; 2737 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2738 2739 if (tb[RTA_GATEWAY]) { 2740 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 2741 cfg->fc_flags |= RTF_GATEWAY; 2742 } 2743 2744 if (tb[RTA_DST]) { 2745 int plen = (rtm->rtm_dst_len + 7) >> 3; 2746 2747 if (nla_len(tb[RTA_DST]) < plen) 2748 goto errout; 2749 2750 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2751 } 2752 2753 if (tb[RTA_SRC]) { 2754 int plen = (rtm->rtm_src_len + 7) >> 3; 2755 2756 if (nla_len(tb[RTA_SRC]) < plen) 2757 goto errout; 2758 2759 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2760 } 2761 2762 if (tb[RTA_PREFSRC]) 2763 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 2764 2765 if (tb[RTA_OIF]) 2766 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2767 2768 if (tb[RTA_PRIORITY]) 2769 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2770 2771 if (tb[RTA_METRICS]) { 2772 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2773 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2774 } 2775 2776 if (tb[RTA_TABLE]) 2777 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2778 2779 if (tb[RTA_MULTIPATH]) { 2780 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 2781 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 2782 } 2783 2784 if (tb[RTA_PREF]) { 2785 pref = nla_get_u8(tb[RTA_PREF]); 2786 if (pref != ICMPV6_ROUTER_PREF_LOW && 2787 pref != ICMPV6_ROUTER_PREF_HIGH) 2788 pref = ICMPV6_ROUTER_PREF_MEDIUM; 2789 cfg->fc_flags |= RTF_PREF(pref); 2790 } 2791 2792 if (tb[RTA_ENCAP]) 2793 cfg->fc_encap = tb[RTA_ENCAP]; 2794 2795 if (tb[RTA_ENCAP_TYPE]) 2796 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); 2797 2798 err = 0; 2799 errout: 2800 return err; 2801 } 2802 2803 struct rt6_nh { 2804 struct rt6_info *rt6_info; 2805 struct fib6_config r_cfg; 2806 struct mx6_config mxc; 2807 struct list_head next; 2808 }; 2809 2810 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) 2811 { 2812 struct rt6_nh *nh; 2813 2814 list_for_each_entry(nh, rt6_nh_list, next) { 2815 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", 2816 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, 2817 nh->r_cfg.fc_ifindex); 2818 } 2819 } 2820 2821 static int ip6_route_info_append(struct list_head *rt6_nh_list, 2822 struct rt6_info *rt, struct fib6_config *r_cfg) 2823 { 2824 struct rt6_nh *nh; 2825 struct rt6_info *rtnh; 2826 int err = -EEXIST; 2827 2828 list_for_each_entry(nh, rt6_nh_list, next) { 2829 /* check if rt6_info already exists */ 2830 rtnh = nh->rt6_info; 2831 2832 if (rtnh->dst.dev == rt->dst.dev && 2833 rtnh->rt6i_idev == rt->rt6i_idev && 2834 ipv6_addr_equal(&rtnh->rt6i_gateway, 2835 &rt->rt6i_gateway)) 2836 return err; 2837 } 2838 2839 nh = kzalloc(sizeof(*nh), GFP_KERNEL); 2840 if (!nh) 2841 return -ENOMEM; 2842 nh->rt6_info = rt; 2843 err = ip6_convert_metrics(&nh->mxc, r_cfg); 2844 if (err) { 2845 kfree(nh); 2846 return err; 2847 } 2848 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); 2849 list_add_tail(&nh->next, rt6_nh_list); 2850 2851 return 0; 2852 } 2853 2854 static int ip6_route_multipath_add(struct fib6_config *cfg) 2855 { 2856 struct fib6_config r_cfg; 2857 struct rtnexthop *rtnh; 2858 struct rt6_info *rt; 2859 struct rt6_nh *err_nh; 2860 struct rt6_nh *nh, *nh_safe; 2861 int remaining; 2862 int attrlen; 2863 int err = 1; 2864 int nhn = 0; 2865 int replace = (cfg->fc_nlinfo.nlh && 2866 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 2867 LIST_HEAD(rt6_nh_list); 2868 2869 remaining = cfg->fc_mp_len; 2870 rtnh = (struct rtnexthop *)cfg->fc_mp; 2871 2872 /* Parse a Multipath Entry and build a list (rt6_nh_list) of 2873 * rt6_info structs per nexthop 2874 */ 2875 while (rtnh_ok(rtnh, remaining)) { 2876 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2877 if (rtnh->rtnh_ifindex) 2878 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 2879 2880 attrlen = rtnh_attrlen(rtnh); 2881 if (attrlen > 0) { 2882 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 2883 2884 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 2885 if (nla) { 2886 r_cfg.fc_gateway = nla_get_in6_addr(nla); 2887 r_cfg.fc_flags |= RTF_GATEWAY; 2888 } 2889 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 2890 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 2891 if (nla) 2892 r_cfg.fc_encap_type = nla_get_u16(nla); 2893 } 2894 2895 err = ip6_route_info_create(&r_cfg, &rt); 2896 if (err) 2897 goto cleanup; 2898 2899 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 2900 if (err) { 2901 dst_free(&rt->dst); 2902 goto cleanup; 2903 } 2904 2905 rtnh = rtnh_next(rtnh, &remaining); 2906 } 2907 2908 err_nh = NULL; 2909 list_for_each_entry(nh, &rt6_nh_list, next) { 2910 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); 2911 /* nh->rt6_info is used or freed at this point, reset to NULL*/ 2912 nh->rt6_info = NULL; 2913 if (err) { 2914 if (replace && nhn) 2915 ip6_print_replace_route_err(&rt6_nh_list); 2916 err_nh = nh; 2917 goto add_errout; 2918 } 2919 2920 /* Because each route is added like a single route we remove 2921 * these flags after the first nexthop: if there is a collision, 2922 * we have already failed to add the first nexthop: 2923 * fib6_add_rt2node() has rejected it; when replacing, old 2924 * nexthops have been replaced by first new, the rest should 2925 * be added to it. 2926 */ 2927 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 2928 NLM_F_REPLACE); 2929 nhn++; 2930 } 2931 2932 goto cleanup; 2933 2934 add_errout: 2935 /* Delete routes that were already added */ 2936 list_for_each_entry(nh, &rt6_nh_list, next) { 2937 if (err_nh == nh) 2938 break; 2939 ip6_route_del(&nh->r_cfg); 2940 } 2941 2942 cleanup: 2943 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { 2944 if (nh->rt6_info) 2945 dst_free(&nh->rt6_info->dst); 2946 kfree(nh->mxc.mx); 2947 list_del(&nh->next); 2948 kfree(nh); 2949 } 2950 2951 return err; 2952 } 2953 2954 static int ip6_route_multipath_del(struct fib6_config *cfg) 2955 { 2956 struct fib6_config r_cfg; 2957 struct rtnexthop *rtnh; 2958 int remaining; 2959 int attrlen; 2960 int err = 1, last_err = 0; 2961 2962 remaining = cfg->fc_mp_len; 2963 rtnh = (struct rtnexthop *)cfg->fc_mp; 2964 2965 /* Parse a Multipath Entry */ 2966 while (rtnh_ok(rtnh, remaining)) { 2967 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2968 if (rtnh->rtnh_ifindex) 2969 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 2970 2971 attrlen = rtnh_attrlen(rtnh); 2972 if (attrlen > 0) { 2973 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 2974 2975 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 2976 if (nla) { 2977 nla_memcpy(&r_cfg.fc_gateway, nla, 16); 2978 r_cfg.fc_flags |= RTF_GATEWAY; 2979 } 2980 } 2981 err = ip6_route_del(&r_cfg); 2982 if (err) 2983 last_err = err; 2984 2985 rtnh = rtnh_next(rtnh, &remaining); 2986 } 2987 2988 return last_err; 2989 } 2990 2991 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) 2992 { 2993 struct fib6_config cfg; 2994 int err; 2995 2996 err = rtm_to_fib6_config(skb, nlh, &cfg); 2997 if (err < 0) 2998 return err; 2999 3000 if (cfg.fc_mp) 3001 return ip6_route_multipath_del(&cfg); 3002 else 3003 return ip6_route_del(&cfg); 3004 } 3005 3006 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 3007 { 3008 struct fib6_config cfg; 3009 int err; 3010 3011 err = rtm_to_fib6_config(skb, nlh, &cfg); 3012 if (err < 0) 3013 return err; 3014 3015 if (cfg.fc_mp) 3016 return ip6_route_multipath_add(&cfg); 3017 else 3018 return ip6_route_add(&cfg); 3019 } 3020 3021 static inline size_t rt6_nlmsg_size(struct rt6_info *rt) 3022 { 3023 return NLMSG_ALIGN(sizeof(struct rtmsg)) 3024 + nla_total_size(16) /* RTA_SRC */ 3025 + nla_total_size(16) /* RTA_DST */ 3026 + nla_total_size(16) /* RTA_GATEWAY */ 3027 + nla_total_size(16) /* RTA_PREFSRC */ 3028 + nla_total_size(4) /* RTA_TABLE */ 3029 + nla_total_size(4) /* RTA_IIF */ 3030 + nla_total_size(4) /* RTA_OIF */ 3031 + nla_total_size(4) /* RTA_PRIORITY */ 3032 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 3033 + nla_total_size(sizeof(struct rta_cacheinfo)) 3034 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 3035 + nla_total_size(1) /* RTA_PREF */ 3036 + lwtunnel_get_encap_size(rt->dst.lwtstate); 3037 } 3038 3039 static int rt6_fill_node(struct net *net, 3040 struct sk_buff *skb, struct rt6_info *rt, 3041 struct in6_addr *dst, struct in6_addr *src, 3042 int iif, int type, u32 portid, u32 seq, 3043 int prefix, int nowait, unsigned int flags) 3044 { 3045 u32 metrics[RTAX_MAX]; 3046 struct rtmsg *rtm; 3047 struct nlmsghdr *nlh; 3048 long expires; 3049 u32 table; 3050 3051 if (prefix) { /* user wants prefix routes only */ 3052 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 3053 /* success since this is not a prefix route */ 3054 return 1; 3055 } 3056 } 3057 3058 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 3059 if (!nlh) 3060 return -EMSGSIZE; 3061 3062 rtm = nlmsg_data(nlh); 3063 rtm->rtm_family = AF_INET6; 3064 rtm->rtm_dst_len = rt->rt6i_dst.plen; 3065 rtm->rtm_src_len = rt->rt6i_src.plen; 3066 rtm->rtm_tos = 0; 3067 if (rt->rt6i_table) 3068 table = rt->rt6i_table->tb6_id; 3069 else 3070 table = RT6_TABLE_UNSPEC; 3071 rtm->rtm_table = table; 3072 if (nla_put_u32(skb, RTA_TABLE, table)) 3073 goto nla_put_failure; 3074 if (rt->rt6i_flags & RTF_REJECT) { 3075 switch (rt->dst.error) { 3076 case -EINVAL: 3077 rtm->rtm_type = RTN_BLACKHOLE; 3078 break; 3079 case -EACCES: 3080 rtm->rtm_type = RTN_PROHIBIT; 3081 break; 3082 case -EAGAIN: 3083 rtm->rtm_type = RTN_THROW; 3084 break; 3085 default: 3086 rtm->rtm_type = RTN_UNREACHABLE; 3087 break; 3088 } 3089 } 3090 else if (rt->rt6i_flags & RTF_LOCAL) 3091 rtm->rtm_type = RTN_LOCAL; 3092 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 3093 rtm->rtm_type = RTN_LOCAL; 3094 else 3095 rtm->rtm_type = RTN_UNICAST; 3096 rtm->rtm_flags = 0; 3097 if (!netif_carrier_ok(rt->dst.dev)) { 3098 rtm->rtm_flags |= RTNH_F_LINKDOWN; 3099 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) 3100 rtm->rtm_flags |= RTNH_F_DEAD; 3101 } 3102 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 3103 rtm->rtm_protocol = rt->rt6i_protocol; 3104 if (rt->rt6i_flags & RTF_DYNAMIC) 3105 rtm->rtm_protocol = RTPROT_REDIRECT; 3106 else if (rt->rt6i_flags & RTF_ADDRCONF) { 3107 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) 3108 rtm->rtm_protocol = RTPROT_RA; 3109 else 3110 rtm->rtm_protocol = RTPROT_KERNEL; 3111 } 3112 3113 if (rt->rt6i_flags & RTF_CACHE) 3114 rtm->rtm_flags |= RTM_F_CLONED; 3115 3116 if (dst) { 3117 if (nla_put_in6_addr(skb, RTA_DST, dst)) 3118 goto nla_put_failure; 3119 rtm->rtm_dst_len = 128; 3120 } else if (rtm->rtm_dst_len) 3121 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) 3122 goto nla_put_failure; 3123 #ifdef CONFIG_IPV6_SUBTREES 3124 if (src) { 3125 if (nla_put_in6_addr(skb, RTA_SRC, src)) 3126 goto nla_put_failure; 3127 rtm->rtm_src_len = 128; 3128 } else if (rtm->rtm_src_len && 3129 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) 3130 goto nla_put_failure; 3131 #endif 3132 if (iif) { 3133 #ifdef CONFIG_IPV6_MROUTE 3134 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 3135 int err = ip6mr_get_route(net, skb, rtm, nowait); 3136 if (err <= 0) { 3137 if (!nowait) { 3138 if (err == 0) 3139 return 0; 3140 goto nla_put_failure; 3141 } else { 3142 if (err == -EMSGSIZE) 3143 goto nla_put_failure; 3144 } 3145 } 3146 } else 3147 #endif 3148 if (nla_put_u32(skb, RTA_IIF, iif)) 3149 goto nla_put_failure; 3150 } else if (dst) { 3151 struct in6_addr saddr_buf; 3152 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 3153 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 3154 goto nla_put_failure; 3155 } 3156 3157 if (rt->rt6i_prefsrc.plen) { 3158 struct in6_addr saddr_buf; 3159 saddr_buf = rt->rt6i_prefsrc.addr; 3160 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 3161 goto nla_put_failure; 3162 } 3163 3164 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 3165 if (rt->rt6i_pmtu) 3166 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; 3167 if (rtnetlink_put_metrics(skb, metrics) < 0) 3168 goto nla_put_failure; 3169 3170 if (rt->rt6i_flags & RTF_GATEWAY) { 3171 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) 3172 goto nla_put_failure; 3173 } 3174 3175 if (rt->dst.dev && 3176 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 3177 goto nla_put_failure; 3178 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 3179 goto nla_put_failure; 3180 3181 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 3182 3183 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 3184 goto nla_put_failure; 3185 3186 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) 3187 goto nla_put_failure; 3188 3189 lwtunnel_fill_encap(skb, rt->dst.lwtstate); 3190 3191 nlmsg_end(skb, nlh); 3192 return 0; 3193 3194 nla_put_failure: 3195 nlmsg_cancel(skb, nlh); 3196 return -EMSGSIZE; 3197 } 3198 3199 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 3200 { 3201 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 3202 int prefix; 3203 3204 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 3205 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 3206 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 3207 } else 3208 prefix = 0; 3209 3210 return rt6_fill_node(arg->net, 3211 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 3212 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 3213 prefix, 0, NLM_F_MULTI); 3214 } 3215 3216 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) 3217 { 3218 struct net *net = sock_net(in_skb->sk); 3219 struct nlattr *tb[RTA_MAX+1]; 3220 struct rt6_info *rt; 3221 struct sk_buff *skb; 3222 struct rtmsg *rtm; 3223 struct flowi6 fl6; 3224 int err, iif = 0, oif = 0; 3225 3226 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 3227 if (err < 0) 3228 goto errout; 3229 3230 err = -EINVAL; 3231 memset(&fl6, 0, sizeof(fl6)); 3232 3233 if (tb[RTA_SRC]) { 3234 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 3235 goto errout; 3236 3237 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 3238 } 3239 3240 if (tb[RTA_DST]) { 3241 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 3242 goto errout; 3243 3244 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 3245 } 3246 3247 if (tb[RTA_IIF]) 3248 iif = nla_get_u32(tb[RTA_IIF]); 3249 3250 if (tb[RTA_OIF]) 3251 oif = nla_get_u32(tb[RTA_OIF]); 3252 3253 if (tb[RTA_MARK]) 3254 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 3255 3256 if (iif) { 3257 struct net_device *dev; 3258 int flags = 0; 3259 3260 dev = __dev_get_by_index(net, iif); 3261 if (!dev) { 3262 err = -ENODEV; 3263 goto errout; 3264 } 3265 3266 fl6.flowi6_iif = iif; 3267 3268 if (!ipv6_addr_any(&fl6.saddr)) 3269 flags |= RT6_LOOKUP_F_HAS_SADDR; 3270 3271 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, 3272 flags); 3273 } else { 3274 fl6.flowi6_oif = oif; 3275 3276 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 3277 } 3278 3279 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 3280 if (!skb) { 3281 ip6_rt_put(rt); 3282 err = -ENOBUFS; 3283 goto errout; 3284 } 3285 3286 /* Reserve room for dummy headers, this skb can pass 3287 through good chunk of routing engine. 3288 */ 3289 skb_reset_mac_header(skb); 3290 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 3291 3292 skb_dst_set(skb, &rt->dst); 3293 3294 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 3295 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 3296 nlh->nlmsg_seq, 0, 0, 0); 3297 if (err < 0) { 3298 kfree_skb(skb); 3299 goto errout; 3300 } 3301 3302 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 3303 errout: 3304 return err; 3305 } 3306 3307 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, 3308 unsigned int nlm_flags) 3309 { 3310 struct sk_buff *skb; 3311 struct net *net = info->nl_net; 3312 u32 seq; 3313 int err; 3314 3315 err = -ENOBUFS; 3316 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 3317 3318 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 3319 if (!skb) 3320 goto errout; 3321 3322 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 3323 event, info->portid, seq, 0, 0, nlm_flags); 3324 if (err < 0) { 3325 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 3326 WARN_ON(err == -EMSGSIZE); 3327 kfree_skb(skb); 3328 goto errout; 3329 } 3330 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 3331 info->nlh, gfp_any()); 3332 return; 3333 errout: 3334 if (err < 0) 3335 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 3336 } 3337 3338 static int ip6_route_dev_notify(struct notifier_block *this, 3339 unsigned long event, void *ptr) 3340 { 3341 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 3342 struct net *net = dev_net(dev); 3343 3344 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 3345 net->ipv6.ip6_null_entry->dst.dev = dev; 3346 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 3347 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3348 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 3349 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 3350 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 3351 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 3352 #endif 3353 } 3354 3355 return NOTIFY_OK; 3356 } 3357 3358 /* 3359 * /proc 3360 */ 3361 3362 #ifdef CONFIG_PROC_FS 3363 3364 static const struct file_operations ipv6_route_proc_fops = { 3365 .owner = THIS_MODULE, 3366 .open = ipv6_route_open, 3367 .read = seq_read, 3368 .llseek = seq_lseek, 3369 .release = seq_release_net, 3370 }; 3371 3372 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 3373 { 3374 struct net *net = (struct net *)seq->private; 3375 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 3376 net->ipv6.rt6_stats->fib_nodes, 3377 net->ipv6.rt6_stats->fib_route_nodes, 3378 net->ipv6.rt6_stats->fib_rt_alloc, 3379 net->ipv6.rt6_stats->fib_rt_entries, 3380 net->ipv6.rt6_stats->fib_rt_cache, 3381 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 3382 net->ipv6.rt6_stats->fib_discarded_routes); 3383 3384 return 0; 3385 } 3386 3387 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 3388 { 3389 return single_open_net(inode, file, rt6_stats_seq_show); 3390 } 3391 3392 static const struct file_operations rt6_stats_seq_fops = { 3393 .owner = THIS_MODULE, 3394 .open = rt6_stats_seq_open, 3395 .read = seq_read, 3396 .llseek = seq_lseek, 3397 .release = single_release_net, 3398 }; 3399 #endif /* CONFIG_PROC_FS */ 3400 3401 #ifdef CONFIG_SYSCTL 3402 3403 static 3404 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 3405 void __user *buffer, size_t *lenp, loff_t *ppos) 3406 { 3407 struct net *net; 3408 int delay; 3409 if (!write) 3410 return -EINVAL; 3411 3412 net = (struct net *)ctl->extra1; 3413 delay = net->ipv6.sysctl.flush_delay; 3414 proc_dointvec(ctl, write, buffer, lenp, ppos); 3415 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 3416 return 0; 3417 } 3418 3419 struct ctl_table ipv6_route_table_template[] = { 3420 { 3421 .procname = "flush", 3422 .data = &init_net.ipv6.sysctl.flush_delay, 3423 .maxlen = sizeof(int), 3424 .mode = 0200, 3425 .proc_handler = ipv6_sysctl_rtcache_flush 3426 }, 3427 { 3428 .procname = "gc_thresh", 3429 .data = &ip6_dst_ops_template.gc_thresh, 3430 .maxlen = sizeof(int), 3431 .mode = 0644, 3432 .proc_handler = proc_dointvec, 3433 }, 3434 { 3435 .procname = "max_size", 3436 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 3437 .maxlen = sizeof(int), 3438 .mode = 0644, 3439 .proc_handler = proc_dointvec, 3440 }, 3441 { 3442 .procname = "gc_min_interval", 3443 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3444 .maxlen = sizeof(int), 3445 .mode = 0644, 3446 .proc_handler = proc_dointvec_jiffies, 3447 }, 3448 { 3449 .procname = "gc_timeout", 3450 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 3451 .maxlen = sizeof(int), 3452 .mode = 0644, 3453 .proc_handler = proc_dointvec_jiffies, 3454 }, 3455 { 3456 .procname = "gc_interval", 3457 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 3458 .maxlen = sizeof(int), 3459 .mode = 0644, 3460 .proc_handler = proc_dointvec_jiffies, 3461 }, 3462 { 3463 .procname = "gc_elasticity", 3464 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 3465 .maxlen = sizeof(int), 3466 .mode = 0644, 3467 .proc_handler = proc_dointvec, 3468 }, 3469 { 3470 .procname = "mtu_expires", 3471 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 3472 .maxlen = sizeof(int), 3473 .mode = 0644, 3474 .proc_handler = proc_dointvec_jiffies, 3475 }, 3476 { 3477 .procname = "min_adv_mss", 3478 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 3479 .maxlen = sizeof(int), 3480 .mode = 0644, 3481 .proc_handler = proc_dointvec, 3482 }, 3483 { 3484 .procname = "gc_min_interval_ms", 3485 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3486 .maxlen = sizeof(int), 3487 .mode = 0644, 3488 .proc_handler = proc_dointvec_ms_jiffies, 3489 }, 3490 { } 3491 }; 3492 3493 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 3494 { 3495 struct ctl_table *table; 3496 3497 table = kmemdup(ipv6_route_table_template, 3498 sizeof(ipv6_route_table_template), 3499 GFP_KERNEL); 3500 3501 if (table) { 3502 table[0].data = &net->ipv6.sysctl.flush_delay; 3503 table[0].extra1 = net; 3504 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 3505 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 3506 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3507 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 3508 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 3509 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 3510 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 3511 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 3512 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3513 3514 /* Don't export sysctls to unprivileged users */ 3515 if (net->user_ns != &init_user_ns) 3516 table[0].procname = NULL; 3517 } 3518 3519 return table; 3520 } 3521 #endif 3522 3523 static int __net_init ip6_route_net_init(struct net *net) 3524 { 3525 int ret = -ENOMEM; 3526 3527 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 3528 sizeof(net->ipv6.ip6_dst_ops)); 3529 3530 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 3531 goto out_ip6_dst_ops; 3532 3533 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 3534 sizeof(*net->ipv6.ip6_null_entry), 3535 GFP_KERNEL); 3536 if (!net->ipv6.ip6_null_entry) 3537 goto out_ip6_dst_entries; 3538 net->ipv6.ip6_null_entry->dst.path = 3539 (struct dst_entry *)net->ipv6.ip6_null_entry; 3540 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3541 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 3542 ip6_template_metrics, true); 3543 3544 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3545 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 3546 sizeof(*net->ipv6.ip6_prohibit_entry), 3547 GFP_KERNEL); 3548 if (!net->ipv6.ip6_prohibit_entry) 3549 goto out_ip6_null_entry; 3550 net->ipv6.ip6_prohibit_entry->dst.path = 3551 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 3552 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3553 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 3554 ip6_template_metrics, true); 3555 3556 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 3557 sizeof(*net->ipv6.ip6_blk_hole_entry), 3558 GFP_KERNEL); 3559 if (!net->ipv6.ip6_blk_hole_entry) 3560 goto out_ip6_prohibit_entry; 3561 net->ipv6.ip6_blk_hole_entry->dst.path = 3562 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 3563 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3564 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 3565 ip6_template_metrics, true); 3566 #endif 3567 3568 net->ipv6.sysctl.flush_delay = 0; 3569 net->ipv6.sysctl.ip6_rt_max_size = 4096; 3570 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 3571 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 3572 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 3573 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 3574 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 3575 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 3576 3577 net->ipv6.ip6_rt_gc_expire = 30*HZ; 3578 3579 ret = 0; 3580 out: 3581 return ret; 3582 3583 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3584 out_ip6_prohibit_entry: 3585 kfree(net->ipv6.ip6_prohibit_entry); 3586 out_ip6_null_entry: 3587 kfree(net->ipv6.ip6_null_entry); 3588 #endif 3589 out_ip6_dst_entries: 3590 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3591 out_ip6_dst_ops: 3592 goto out; 3593 } 3594 3595 static void __net_exit ip6_route_net_exit(struct net *net) 3596 { 3597 kfree(net->ipv6.ip6_null_entry); 3598 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3599 kfree(net->ipv6.ip6_prohibit_entry); 3600 kfree(net->ipv6.ip6_blk_hole_entry); 3601 #endif 3602 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3603 } 3604 3605 static int __net_init ip6_route_net_init_late(struct net *net) 3606 { 3607 #ifdef CONFIG_PROC_FS 3608 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); 3609 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); 3610 #endif 3611 return 0; 3612 } 3613 3614 static void __net_exit ip6_route_net_exit_late(struct net *net) 3615 { 3616 #ifdef CONFIG_PROC_FS 3617 remove_proc_entry("ipv6_route", net->proc_net); 3618 remove_proc_entry("rt6_stats", net->proc_net); 3619 #endif 3620 } 3621 3622 static struct pernet_operations ip6_route_net_ops = { 3623 .init = ip6_route_net_init, 3624 .exit = ip6_route_net_exit, 3625 }; 3626 3627 static int __net_init ipv6_inetpeer_init(struct net *net) 3628 { 3629 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 3630 3631 if (!bp) 3632 return -ENOMEM; 3633 inet_peer_base_init(bp); 3634 net->ipv6.peers = bp; 3635 return 0; 3636 } 3637 3638 static void __net_exit ipv6_inetpeer_exit(struct net *net) 3639 { 3640 struct inet_peer_base *bp = net->ipv6.peers; 3641 3642 net->ipv6.peers = NULL; 3643 inetpeer_invalidate_tree(bp); 3644 kfree(bp); 3645 } 3646 3647 static struct pernet_operations ipv6_inetpeer_ops = { 3648 .init = ipv6_inetpeer_init, 3649 .exit = ipv6_inetpeer_exit, 3650 }; 3651 3652 static struct pernet_operations ip6_route_net_late_ops = { 3653 .init = ip6_route_net_init_late, 3654 .exit = ip6_route_net_exit_late, 3655 }; 3656 3657 static struct notifier_block ip6_route_dev_notifier = { 3658 .notifier_call = ip6_route_dev_notify, 3659 .priority = 0, 3660 }; 3661 3662 int __init ip6_route_init(void) 3663 { 3664 int ret; 3665 int cpu; 3666 3667 ret = -ENOMEM; 3668 ip6_dst_ops_template.kmem_cachep = 3669 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 3670 SLAB_HWCACHE_ALIGN, NULL); 3671 if (!ip6_dst_ops_template.kmem_cachep) 3672 goto out; 3673 3674 ret = dst_entries_init(&ip6_dst_blackhole_ops); 3675 if (ret) 3676 goto out_kmem_cache; 3677 3678 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 3679 if (ret) 3680 goto out_dst_entries; 3681 3682 ret = register_pernet_subsys(&ip6_route_net_ops); 3683 if (ret) 3684 goto out_register_inetpeer; 3685 3686 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3687 3688 /* Registering of the loopback is done before this portion of code, 3689 * the loopback reference in rt6_info will not be taken, do it 3690 * manually for init_net */ 3691 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 3692 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3693 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3694 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 3695 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3696 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 3697 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3698 #endif 3699 ret = fib6_init(); 3700 if (ret) 3701 goto out_register_subsys; 3702 3703 ret = xfrm6_init(); 3704 if (ret) 3705 goto out_fib6_init; 3706 3707 ret = fib6_rules_init(); 3708 if (ret) 3709 goto xfrm6_init; 3710 3711 ret = register_pernet_subsys(&ip6_route_net_late_ops); 3712 if (ret) 3713 goto fib6_rules_init; 3714 3715 ret = -ENOBUFS; 3716 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 3717 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 3718 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 3719 goto out_register_late_subsys; 3720 3721 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 3722 if (ret) 3723 goto out_register_late_subsys; 3724 3725 for_each_possible_cpu(cpu) { 3726 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 3727 3728 INIT_LIST_HEAD(&ul->head); 3729 spin_lock_init(&ul->lock); 3730 } 3731 3732 out: 3733 return ret; 3734 3735 out_register_late_subsys: 3736 unregister_pernet_subsys(&ip6_route_net_late_ops); 3737 fib6_rules_init: 3738 fib6_rules_cleanup(); 3739 xfrm6_init: 3740 xfrm6_fini(); 3741 out_fib6_init: 3742 fib6_gc_cleanup(); 3743 out_register_subsys: 3744 unregister_pernet_subsys(&ip6_route_net_ops); 3745 out_register_inetpeer: 3746 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3747 out_dst_entries: 3748 dst_entries_destroy(&ip6_dst_blackhole_ops); 3749 out_kmem_cache: 3750 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3751 goto out; 3752 } 3753 3754 void ip6_route_cleanup(void) 3755 { 3756 unregister_netdevice_notifier(&ip6_route_dev_notifier); 3757 unregister_pernet_subsys(&ip6_route_net_late_ops); 3758 fib6_rules_cleanup(); 3759 xfrm6_fini(); 3760 fib6_gc_cleanup(); 3761 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3762 unregister_pernet_subsys(&ip6_route_net_ops); 3763 dst_entries_destroy(&ip6_dst_blackhole_ops); 3764 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3765 } 3766