1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <net/net_namespace.h> 48 #include <net/snmp.h> 49 #include <net/ipv6.h> 50 #include <net/ip6_fib.h> 51 #include <net/ip6_route.h> 52 #include <net/ndisc.h> 53 #include <net/addrconf.h> 54 #include <net/tcp.h> 55 #include <linux/rtnetlink.h> 56 #include <net/dst.h> 57 #include <net/dst_metadata.h> 58 #include <net/xfrm.h> 59 #include <net/netevent.h> 60 #include <net/netlink.h> 61 #include <net/nexthop.h> 62 #include <net/lwtunnel.h> 63 #include <net/ip_tunnels.h> 64 65 #include <asm/uaccess.h> 66 67 #ifdef CONFIG_SYSCTL 68 #include <linux/sysctl.h> 69 #endif 70 71 enum rt6_nud_state { 72 RT6_NUD_FAIL_HARD = -3, 73 RT6_NUD_FAIL_PROBE = -2, 74 RT6_NUD_FAIL_DO_RR = -1, 75 RT6_NUD_SUCCEED = 1 76 }; 77 78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); 79 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 80 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 81 static unsigned int ip6_mtu(const struct dst_entry *dst); 82 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 83 static void ip6_dst_destroy(struct dst_entry *); 84 static void ip6_dst_ifdown(struct dst_entry *, 85 struct net_device *dev, int how); 86 static int ip6_dst_gc(struct dst_ops *ops); 87 88 static int ip6_pkt_discard(struct sk_buff *skb); 89 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); 90 static int ip6_pkt_prohibit(struct sk_buff *skb); 91 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); 92 static void ip6_link_failure(struct sk_buff *skb); 93 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 94 struct sk_buff *skb, u32 mtu); 95 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 96 struct sk_buff *skb); 97 static void rt6_dst_from_metrics_check(struct rt6_info *rt); 98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 99 100 #ifdef CONFIG_IPV6_ROUTE_INFO 101 static struct rt6_info *rt6_add_route_info(struct net *net, 102 const struct in6_addr *prefix, int prefixlen, 103 const struct in6_addr *gwaddr, int ifindex, 104 unsigned int pref); 105 static struct rt6_info *rt6_get_route_info(struct net *net, 106 const struct in6_addr *prefix, int prefixlen, 107 const struct in6_addr *gwaddr, int ifindex); 108 #endif 109 110 struct uncached_list { 111 spinlock_t lock; 112 struct list_head head; 113 }; 114 115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); 116 117 static void rt6_uncached_list_add(struct rt6_info *rt) 118 { 119 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); 120 121 rt->dst.flags |= DST_NOCACHE; 122 rt->rt6i_uncached_list = ul; 123 124 spin_lock_bh(&ul->lock); 125 list_add_tail(&rt->rt6i_uncached, &ul->head); 126 spin_unlock_bh(&ul->lock); 127 } 128 129 static void rt6_uncached_list_del(struct rt6_info *rt) 130 { 131 if (!list_empty(&rt->rt6i_uncached)) { 132 struct uncached_list *ul = rt->rt6i_uncached_list; 133 134 spin_lock_bh(&ul->lock); 135 list_del(&rt->rt6i_uncached); 136 spin_unlock_bh(&ul->lock); 137 } 138 } 139 140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) 141 { 142 struct net_device *loopback_dev = net->loopback_dev; 143 int cpu; 144 145 if (dev == loopback_dev) 146 return; 147 148 for_each_possible_cpu(cpu) { 149 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 150 struct rt6_info *rt; 151 152 spin_lock_bh(&ul->lock); 153 list_for_each_entry(rt, &ul->head, rt6i_uncached) { 154 struct inet6_dev *rt_idev = rt->rt6i_idev; 155 struct net_device *rt_dev = rt->dst.dev; 156 157 if (rt_idev->dev == dev) { 158 rt->rt6i_idev = in6_dev_get(loopback_dev); 159 in6_dev_put(rt_idev); 160 } 161 162 if (rt_dev == dev) { 163 rt->dst.dev = loopback_dev; 164 dev_hold(rt->dst.dev); 165 dev_put(rt_dev); 166 } 167 } 168 spin_unlock_bh(&ul->lock); 169 } 170 } 171 172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) 173 { 174 return dst_metrics_write_ptr(rt->dst.from); 175 } 176 177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 178 { 179 struct rt6_info *rt = (struct rt6_info *)dst; 180 181 if (rt->rt6i_flags & RTF_PCPU) 182 return rt6_pcpu_cow_metrics(rt); 183 else if (rt->rt6i_flags & RTF_CACHE) 184 return NULL; 185 else 186 return dst_cow_metrics_generic(dst, old); 187 } 188 189 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 190 struct sk_buff *skb, 191 const void *daddr) 192 { 193 struct in6_addr *p = &rt->rt6i_gateway; 194 195 if (!ipv6_addr_any(p)) 196 return (const void *) p; 197 else if (skb) 198 return &ipv6_hdr(skb)->daddr; 199 return daddr; 200 } 201 202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 203 struct sk_buff *skb, 204 const void *daddr) 205 { 206 struct rt6_info *rt = (struct rt6_info *) dst; 207 struct neighbour *n; 208 209 daddr = choose_neigh_daddr(rt, skb, daddr); 210 n = __ipv6_neigh_lookup(dst->dev, daddr); 211 if (n) 212 return n; 213 return neigh_create(&nd_tbl, daddr, dst->dev); 214 } 215 216 static struct dst_ops ip6_dst_ops_template = { 217 .family = AF_INET6, 218 .gc = ip6_dst_gc, 219 .gc_thresh = 1024, 220 .check = ip6_dst_check, 221 .default_advmss = ip6_default_advmss, 222 .mtu = ip6_mtu, 223 .cow_metrics = ipv6_cow_metrics, 224 .destroy = ip6_dst_destroy, 225 .ifdown = ip6_dst_ifdown, 226 .negative_advice = ip6_negative_advice, 227 .link_failure = ip6_link_failure, 228 .update_pmtu = ip6_rt_update_pmtu, 229 .redirect = rt6_do_redirect, 230 .local_out = __ip6_local_out, 231 .neigh_lookup = ip6_neigh_lookup, 232 }; 233 234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 235 { 236 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 237 238 return mtu ? : dst->dev->mtu; 239 } 240 241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 242 struct sk_buff *skb, u32 mtu) 243 { 244 } 245 246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 247 struct sk_buff *skb) 248 { 249 } 250 251 static struct dst_ops ip6_dst_blackhole_ops = { 252 .family = AF_INET6, 253 .destroy = ip6_dst_destroy, 254 .check = ip6_dst_check, 255 .mtu = ip6_blackhole_mtu, 256 .default_advmss = ip6_default_advmss, 257 .update_pmtu = ip6_rt_blackhole_update_pmtu, 258 .redirect = ip6_rt_blackhole_redirect, 259 .cow_metrics = dst_cow_metrics_generic, 260 .neigh_lookup = ip6_neigh_lookup, 261 }; 262 263 static const u32 ip6_template_metrics[RTAX_MAX] = { 264 [RTAX_HOPLIMIT - 1] = 0, 265 }; 266 267 static const struct rt6_info ip6_null_entry_template = { 268 .dst = { 269 .__refcnt = ATOMIC_INIT(1), 270 .__use = 1, 271 .obsolete = DST_OBSOLETE_FORCE_CHK, 272 .error = -ENETUNREACH, 273 .input = ip6_pkt_discard, 274 .output = ip6_pkt_discard_out, 275 }, 276 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 277 .rt6i_protocol = RTPROT_KERNEL, 278 .rt6i_metric = ~(u32) 0, 279 .rt6i_ref = ATOMIC_INIT(1), 280 }; 281 282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 283 284 static const struct rt6_info ip6_prohibit_entry_template = { 285 .dst = { 286 .__refcnt = ATOMIC_INIT(1), 287 .__use = 1, 288 .obsolete = DST_OBSOLETE_FORCE_CHK, 289 .error = -EACCES, 290 .input = ip6_pkt_prohibit, 291 .output = ip6_pkt_prohibit_out, 292 }, 293 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 294 .rt6i_protocol = RTPROT_KERNEL, 295 .rt6i_metric = ~(u32) 0, 296 .rt6i_ref = ATOMIC_INIT(1), 297 }; 298 299 static const struct rt6_info ip6_blk_hole_entry_template = { 300 .dst = { 301 .__refcnt = ATOMIC_INIT(1), 302 .__use = 1, 303 .obsolete = DST_OBSOLETE_FORCE_CHK, 304 .error = -EINVAL, 305 .input = dst_discard, 306 .output = dst_discard_sk, 307 }, 308 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 309 .rt6i_protocol = RTPROT_KERNEL, 310 .rt6i_metric = ~(u32) 0, 311 .rt6i_ref = ATOMIC_INIT(1), 312 }; 313 314 #endif 315 316 static void rt6_info_init(struct rt6_info *rt) 317 { 318 struct dst_entry *dst = &rt->dst; 319 320 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 321 INIT_LIST_HEAD(&rt->rt6i_siblings); 322 INIT_LIST_HEAD(&rt->rt6i_uncached); 323 } 324 325 /* allocate dst with ip6_dst_ops */ 326 static struct rt6_info *__ip6_dst_alloc(struct net *net, 327 struct net_device *dev, 328 int flags) 329 { 330 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 331 0, DST_OBSOLETE_FORCE_CHK, flags); 332 333 if (rt) 334 rt6_info_init(rt); 335 336 return rt; 337 } 338 339 static struct rt6_info *ip6_dst_alloc(struct net *net, 340 struct net_device *dev, 341 int flags) 342 { 343 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); 344 345 if (rt) { 346 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); 347 if (rt->rt6i_pcpu) { 348 int cpu; 349 350 for_each_possible_cpu(cpu) { 351 struct rt6_info **p; 352 353 p = per_cpu_ptr(rt->rt6i_pcpu, cpu); 354 /* no one shares rt */ 355 *p = NULL; 356 } 357 } else { 358 dst_destroy((struct dst_entry *)rt); 359 return NULL; 360 } 361 } 362 363 return rt; 364 } 365 366 static void ip6_dst_destroy(struct dst_entry *dst) 367 { 368 struct rt6_info *rt = (struct rt6_info *)dst; 369 struct dst_entry *from = dst->from; 370 struct inet6_dev *idev; 371 372 dst_destroy_metrics_generic(dst); 373 free_percpu(rt->rt6i_pcpu); 374 rt6_uncached_list_del(rt); 375 376 idev = rt->rt6i_idev; 377 if (idev) { 378 rt->rt6i_idev = NULL; 379 in6_dev_put(idev); 380 } 381 382 dst->from = NULL; 383 dst_release(from); 384 } 385 386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 387 int how) 388 { 389 struct rt6_info *rt = (struct rt6_info *)dst; 390 struct inet6_dev *idev = rt->rt6i_idev; 391 struct net_device *loopback_dev = 392 dev_net(dev)->loopback_dev; 393 394 if (dev != loopback_dev) { 395 if (idev && idev->dev == dev) { 396 struct inet6_dev *loopback_idev = 397 in6_dev_get(loopback_dev); 398 if (loopback_idev) { 399 rt->rt6i_idev = loopback_idev; 400 in6_dev_put(idev); 401 } 402 } 403 } 404 } 405 406 static bool rt6_check_expired(const struct rt6_info *rt) 407 { 408 if (rt->rt6i_flags & RTF_EXPIRES) { 409 if (time_after(jiffies, rt->dst.expires)) 410 return true; 411 } else if (rt->dst.from) { 412 return rt6_check_expired((struct rt6_info *) rt->dst.from); 413 } 414 return false; 415 } 416 417 /* Multipath route selection: 418 * Hash based function using packet header and flowlabel. 419 * Adapted from fib_info_hashfn() 420 */ 421 static int rt6_info_hash_nhsfn(unsigned int candidate_count, 422 const struct flowi6 *fl6) 423 { 424 unsigned int val = fl6->flowi6_proto; 425 426 val ^= ipv6_addr_hash(&fl6->daddr); 427 val ^= ipv6_addr_hash(&fl6->saddr); 428 429 /* Work only if this not encapsulated */ 430 switch (fl6->flowi6_proto) { 431 case IPPROTO_UDP: 432 case IPPROTO_TCP: 433 case IPPROTO_SCTP: 434 val ^= (__force u16)fl6->fl6_sport; 435 val ^= (__force u16)fl6->fl6_dport; 436 break; 437 438 case IPPROTO_ICMPV6: 439 val ^= (__force u16)fl6->fl6_icmp_type; 440 val ^= (__force u16)fl6->fl6_icmp_code; 441 break; 442 } 443 /* RFC6438 recommands to use flowlabel */ 444 val ^= (__force u32)fl6->flowlabel; 445 446 /* Perhaps, we need to tune, this function? */ 447 val = val ^ (val >> 7) ^ (val >> 12); 448 return val % candidate_count; 449 } 450 451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 452 struct flowi6 *fl6, int oif, 453 int strict) 454 { 455 struct rt6_info *sibling, *next_sibling; 456 int route_choosen; 457 458 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); 459 /* Don't change the route, if route_choosen == 0 460 * (siblings does not include ourself) 461 */ 462 if (route_choosen) 463 list_for_each_entry_safe(sibling, next_sibling, 464 &match->rt6i_siblings, rt6i_siblings) { 465 route_choosen--; 466 if (route_choosen == 0) { 467 if (rt6_score_route(sibling, oif, strict) < 0) 468 break; 469 match = sibling; 470 break; 471 } 472 } 473 return match; 474 } 475 476 /* 477 * Route lookup. Any table->tb6_lock is implied. 478 */ 479 480 static inline struct rt6_info *rt6_device_match(struct net *net, 481 struct rt6_info *rt, 482 const struct in6_addr *saddr, 483 int oif, 484 int flags) 485 { 486 struct rt6_info *local = NULL; 487 struct rt6_info *sprt; 488 489 if (!oif && ipv6_addr_any(saddr)) 490 goto out; 491 492 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 493 struct net_device *dev = sprt->dst.dev; 494 495 if (oif) { 496 if (dev->ifindex == oif) 497 return sprt; 498 if (dev->flags & IFF_LOOPBACK) { 499 if (!sprt->rt6i_idev || 500 sprt->rt6i_idev->dev->ifindex != oif) { 501 if (flags & RT6_LOOKUP_F_IFACE && oif) 502 continue; 503 if (local && (!oif || 504 local->rt6i_idev->dev->ifindex == oif)) 505 continue; 506 } 507 local = sprt; 508 } 509 } else { 510 if (ipv6_chk_addr(net, saddr, dev, 511 flags & RT6_LOOKUP_F_IFACE)) 512 return sprt; 513 } 514 } 515 516 if (oif) { 517 if (local) 518 return local; 519 520 if (flags & RT6_LOOKUP_F_IFACE) 521 return net->ipv6.ip6_null_entry; 522 } 523 out: 524 return rt; 525 } 526 527 #ifdef CONFIG_IPV6_ROUTER_PREF 528 struct __rt6_probe_work { 529 struct work_struct work; 530 struct in6_addr target; 531 struct net_device *dev; 532 }; 533 534 static void rt6_probe_deferred(struct work_struct *w) 535 { 536 struct in6_addr mcaddr; 537 struct __rt6_probe_work *work = 538 container_of(w, struct __rt6_probe_work, work); 539 540 addrconf_addr_solict_mult(&work->target, &mcaddr); 541 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL); 542 dev_put(work->dev); 543 kfree(work); 544 } 545 546 static void rt6_probe(struct rt6_info *rt) 547 { 548 struct __rt6_probe_work *work; 549 struct neighbour *neigh; 550 /* 551 * Okay, this does not seem to be appropriate 552 * for now, however, we need to check if it 553 * is really so; aka Router Reachability Probing. 554 * 555 * Router Reachability Probe MUST be rate-limited 556 * to no more than one per minute. 557 */ 558 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) 559 return; 560 rcu_read_lock_bh(); 561 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 562 if (neigh) { 563 if (neigh->nud_state & NUD_VALID) 564 goto out; 565 566 work = NULL; 567 write_lock(&neigh->lock); 568 if (!(neigh->nud_state & NUD_VALID) && 569 time_after(jiffies, 570 neigh->updated + 571 rt->rt6i_idev->cnf.rtr_probe_interval)) { 572 work = kmalloc(sizeof(*work), GFP_ATOMIC); 573 if (work) 574 __neigh_set_probe_once(neigh); 575 } 576 write_unlock(&neigh->lock); 577 } else { 578 work = kmalloc(sizeof(*work), GFP_ATOMIC); 579 } 580 581 if (work) { 582 INIT_WORK(&work->work, rt6_probe_deferred); 583 work->target = rt->rt6i_gateway; 584 dev_hold(rt->dst.dev); 585 work->dev = rt->dst.dev; 586 schedule_work(&work->work); 587 } 588 589 out: 590 rcu_read_unlock_bh(); 591 } 592 #else 593 static inline void rt6_probe(struct rt6_info *rt) 594 { 595 } 596 #endif 597 598 /* 599 * Default Router Selection (RFC 2461 6.3.6) 600 */ 601 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 602 { 603 struct net_device *dev = rt->dst.dev; 604 if (!oif || dev->ifindex == oif) 605 return 2; 606 if ((dev->flags & IFF_LOOPBACK) && 607 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 608 return 1; 609 return 0; 610 } 611 612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) 613 { 614 struct neighbour *neigh; 615 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 616 617 if (rt->rt6i_flags & RTF_NONEXTHOP || 618 !(rt->rt6i_flags & RTF_GATEWAY)) 619 return RT6_NUD_SUCCEED; 620 621 rcu_read_lock_bh(); 622 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 623 if (neigh) { 624 read_lock(&neigh->lock); 625 if (neigh->nud_state & NUD_VALID) 626 ret = RT6_NUD_SUCCEED; 627 #ifdef CONFIG_IPV6_ROUTER_PREF 628 else if (!(neigh->nud_state & NUD_FAILED)) 629 ret = RT6_NUD_SUCCEED; 630 else 631 ret = RT6_NUD_FAIL_PROBE; 632 #endif 633 read_unlock(&neigh->lock); 634 } else { 635 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 636 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 637 } 638 rcu_read_unlock_bh(); 639 640 return ret; 641 } 642 643 static int rt6_score_route(struct rt6_info *rt, int oif, 644 int strict) 645 { 646 int m; 647 648 m = rt6_check_dev(rt, oif); 649 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 650 return RT6_NUD_FAIL_HARD; 651 #ifdef CONFIG_IPV6_ROUTER_PREF 652 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 653 #endif 654 if (strict & RT6_LOOKUP_F_REACHABLE) { 655 int n = rt6_check_neigh(rt); 656 if (n < 0) 657 return n; 658 } 659 return m; 660 } 661 662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 663 int *mpri, struct rt6_info *match, 664 bool *do_rr) 665 { 666 int m; 667 bool match_do_rr = false; 668 struct inet6_dev *idev = rt->rt6i_idev; 669 struct net_device *dev = rt->dst.dev; 670 671 if (dev && !netif_carrier_ok(dev) && 672 idev->cnf.ignore_routes_with_linkdown) 673 goto out; 674 675 if (rt6_check_expired(rt)) 676 goto out; 677 678 m = rt6_score_route(rt, oif, strict); 679 if (m == RT6_NUD_FAIL_DO_RR) { 680 match_do_rr = true; 681 m = 0; /* lowest valid score */ 682 } else if (m == RT6_NUD_FAIL_HARD) { 683 goto out; 684 } 685 686 if (strict & RT6_LOOKUP_F_REACHABLE) 687 rt6_probe(rt); 688 689 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 690 if (m > *mpri) { 691 *do_rr = match_do_rr; 692 *mpri = m; 693 match = rt; 694 } 695 out: 696 return match; 697 } 698 699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 700 struct rt6_info *rr_head, 701 u32 metric, int oif, int strict, 702 bool *do_rr) 703 { 704 struct rt6_info *rt, *match, *cont; 705 int mpri = -1; 706 707 match = NULL; 708 cont = NULL; 709 for (rt = rr_head; rt; rt = rt->dst.rt6_next) { 710 if (rt->rt6i_metric != metric) { 711 cont = rt; 712 break; 713 } 714 715 match = find_match(rt, oif, strict, &mpri, match, do_rr); 716 } 717 718 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { 719 if (rt->rt6i_metric != metric) { 720 cont = rt; 721 break; 722 } 723 724 match = find_match(rt, oif, strict, &mpri, match, do_rr); 725 } 726 727 if (match || !cont) 728 return match; 729 730 for (rt = cont; rt; rt = rt->dst.rt6_next) 731 match = find_match(rt, oif, strict, &mpri, match, do_rr); 732 733 return match; 734 } 735 736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 737 { 738 struct rt6_info *match, *rt0; 739 struct net *net; 740 bool do_rr = false; 741 742 rt0 = fn->rr_ptr; 743 if (!rt0) 744 fn->rr_ptr = rt0 = fn->leaf; 745 746 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, 747 &do_rr); 748 749 if (do_rr) { 750 struct rt6_info *next = rt0->dst.rt6_next; 751 752 /* no entries matched; do round-robin */ 753 if (!next || next->rt6i_metric != rt0->rt6i_metric) 754 next = fn->leaf; 755 756 if (next != rt0) 757 fn->rr_ptr = next; 758 } 759 760 net = dev_net(rt0->dst.dev); 761 return match ? match : net->ipv6.ip6_null_entry; 762 } 763 764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) 765 { 766 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); 767 } 768 769 #ifdef CONFIG_IPV6_ROUTE_INFO 770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 771 const struct in6_addr *gwaddr) 772 { 773 struct net *net = dev_net(dev); 774 struct route_info *rinfo = (struct route_info *) opt; 775 struct in6_addr prefix_buf, *prefix; 776 unsigned int pref; 777 unsigned long lifetime; 778 struct rt6_info *rt; 779 780 if (len < sizeof(struct route_info)) { 781 return -EINVAL; 782 } 783 784 /* Sanity check for prefix_len and length */ 785 if (rinfo->length > 3) { 786 return -EINVAL; 787 } else if (rinfo->prefix_len > 128) { 788 return -EINVAL; 789 } else if (rinfo->prefix_len > 64) { 790 if (rinfo->length < 2) { 791 return -EINVAL; 792 } 793 } else if (rinfo->prefix_len > 0) { 794 if (rinfo->length < 1) { 795 return -EINVAL; 796 } 797 } 798 799 pref = rinfo->route_pref; 800 if (pref == ICMPV6_ROUTER_PREF_INVALID) 801 return -EINVAL; 802 803 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 804 805 if (rinfo->length == 3) 806 prefix = (struct in6_addr *)rinfo->prefix; 807 else { 808 /* this function is safe */ 809 ipv6_addr_prefix(&prefix_buf, 810 (struct in6_addr *)rinfo->prefix, 811 rinfo->prefix_len); 812 prefix = &prefix_buf; 813 } 814 815 if (rinfo->prefix_len == 0) 816 rt = rt6_get_dflt_router(gwaddr, dev); 817 else 818 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 819 gwaddr, dev->ifindex); 820 821 if (rt && !lifetime) { 822 ip6_del_rt(rt); 823 rt = NULL; 824 } 825 826 if (!rt && lifetime) 827 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 828 pref); 829 else if (rt) 830 rt->rt6i_flags = RTF_ROUTEINFO | 831 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 832 833 if (rt) { 834 if (!addrconf_finite_timeout(lifetime)) 835 rt6_clean_expires(rt); 836 else 837 rt6_set_expires(rt, jiffies + HZ * lifetime); 838 839 ip6_rt_put(rt); 840 } 841 return 0; 842 } 843 #endif 844 845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 846 struct in6_addr *saddr) 847 { 848 struct fib6_node *pn; 849 while (1) { 850 if (fn->fn_flags & RTN_TL_ROOT) 851 return NULL; 852 pn = fn->parent; 853 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) 854 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); 855 else 856 fn = pn; 857 if (fn->fn_flags & RTN_RTINFO) 858 return fn; 859 } 860 } 861 862 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 863 struct fib6_table *table, 864 struct flowi6 *fl6, int flags) 865 { 866 struct fib6_node *fn; 867 struct rt6_info *rt; 868 869 read_lock_bh(&table->tb6_lock); 870 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 871 restart: 872 rt = fn->leaf; 873 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 874 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 875 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); 876 if (rt == net->ipv6.ip6_null_entry) { 877 fn = fib6_backtrack(fn, &fl6->saddr); 878 if (fn) 879 goto restart; 880 } 881 dst_use(&rt->dst, jiffies); 882 read_unlock_bh(&table->tb6_lock); 883 return rt; 884 885 } 886 887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 888 int flags) 889 { 890 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 891 } 892 EXPORT_SYMBOL_GPL(ip6_route_lookup); 893 894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 895 const struct in6_addr *saddr, int oif, int strict) 896 { 897 struct flowi6 fl6 = { 898 .flowi6_oif = oif, 899 .daddr = *daddr, 900 }; 901 struct dst_entry *dst; 902 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 903 904 if (saddr) { 905 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 906 flags |= RT6_LOOKUP_F_HAS_SADDR; 907 } 908 909 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 910 if (dst->error == 0) 911 return (struct rt6_info *) dst; 912 913 dst_release(dst); 914 915 return NULL; 916 } 917 EXPORT_SYMBOL(rt6_lookup); 918 919 /* ip6_ins_rt is called with FREE table->tb6_lock. 920 It takes new route entry, the addition fails by any reason the 921 route is freed. In any case, if caller does not hold it, it may 922 be destroyed. 923 */ 924 925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 926 struct mx6_config *mxc) 927 { 928 int err; 929 struct fib6_table *table; 930 931 table = rt->rt6i_table; 932 write_lock_bh(&table->tb6_lock); 933 err = fib6_add(&table->tb6_root, rt, info, mxc); 934 write_unlock_bh(&table->tb6_lock); 935 936 return err; 937 } 938 939 int ip6_ins_rt(struct rt6_info *rt) 940 { 941 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; 942 struct mx6_config mxc = { .mx = NULL, }; 943 944 return __ip6_ins_rt(rt, &info, &mxc); 945 } 946 947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, 948 const struct in6_addr *daddr, 949 const struct in6_addr *saddr) 950 { 951 struct rt6_info *rt; 952 953 /* 954 * Clone the route. 955 */ 956 957 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 958 ort = (struct rt6_info *)ort->dst.from; 959 960 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); 961 962 if (!rt) 963 return NULL; 964 965 ip6_rt_copy_init(rt, ort); 966 rt->rt6i_flags |= RTF_CACHE; 967 rt->rt6i_metric = 0; 968 rt->dst.flags |= DST_HOST; 969 rt->rt6i_dst.addr = *daddr; 970 rt->rt6i_dst.plen = 128; 971 972 if (!rt6_is_gw_or_nonexthop(ort)) { 973 if (ort->rt6i_dst.plen != 128 && 974 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 975 rt->rt6i_flags |= RTF_ANYCAST; 976 #ifdef CONFIG_IPV6_SUBTREES 977 if (rt->rt6i_src.plen && saddr) { 978 rt->rt6i_src.addr = *saddr; 979 rt->rt6i_src.plen = 128; 980 } 981 #endif 982 } 983 984 return rt; 985 } 986 987 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) 988 { 989 struct rt6_info *pcpu_rt; 990 991 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), 992 rt->dst.dev, rt->dst.flags); 993 994 if (!pcpu_rt) 995 return NULL; 996 ip6_rt_copy_init(pcpu_rt, rt); 997 pcpu_rt->rt6i_protocol = rt->rt6i_protocol; 998 pcpu_rt->rt6i_flags |= RTF_PCPU; 999 return pcpu_rt; 1000 } 1001 1002 /* It should be called with read_lock_bh(&tb6_lock) acquired */ 1003 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) 1004 { 1005 struct rt6_info *pcpu_rt, **p; 1006 1007 p = this_cpu_ptr(rt->rt6i_pcpu); 1008 pcpu_rt = *p; 1009 1010 if (pcpu_rt) { 1011 dst_hold(&pcpu_rt->dst); 1012 rt6_dst_from_metrics_check(pcpu_rt); 1013 } 1014 return pcpu_rt; 1015 } 1016 1017 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) 1018 { 1019 struct fib6_table *table = rt->rt6i_table; 1020 struct rt6_info *pcpu_rt, *prev, **p; 1021 1022 pcpu_rt = ip6_rt_pcpu_alloc(rt); 1023 if (!pcpu_rt) { 1024 struct net *net = dev_net(rt->dst.dev); 1025 1026 dst_hold(&net->ipv6.ip6_null_entry->dst); 1027 return net->ipv6.ip6_null_entry; 1028 } 1029 1030 read_lock_bh(&table->tb6_lock); 1031 if (rt->rt6i_pcpu) { 1032 p = this_cpu_ptr(rt->rt6i_pcpu); 1033 prev = cmpxchg(p, NULL, pcpu_rt); 1034 if (prev) { 1035 /* If someone did it before us, return prev instead */ 1036 dst_destroy(&pcpu_rt->dst); 1037 pcpu_rt = prev; 1038 } 1039 } else { 1040 /* rt has been removed from the fib6 tree 1041 * before we have a chance to acquire the read_lock. 1042 * In this case, don't brother to create a pcpu rt 1043 * since rt is going away anyway. The next 1044 * dst_check() will trigger a re-lookup. 1045 */ 1046 dst_destroy(&pcpu_rt->dst); 1047 pcpu_rt = rt; 1048 } 1049 dst_hold(&pcpu_rt->dst); 1050 rt6_dst_from_metrics_check(pcpu_rt); 1051 read_unlock_bh(&table->tb6_lock); 1052 return pcpu_rt; 1053 } 1054 1055 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 1056 struct flowi6 *fl6, int flags) 1057 { 1058 struct fib6_node *fn, *saved_fn; 1059 struct rt6_info *rt; 1060 int strict = 0; 1061 1062 strict |= flags & RT6_LOOKUP_F_IFACE; 1063 if (net->ipv6.devconf_all->forwarding == 0) 1064 strict |= RT6_LOOKUP_F_REACHABLE; 1065 1066 read_lock_bh(&table->tb6_lock); 1067 1068 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1069 saved_fn = fn; 1070 1071 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) 1072 oif = 0; 1073 1074 redo_rt6_select: 1075 rt = rt6_select(fn, oif, strict); 1076 if (rt->rt6i_nsiblings) 1077 rt = rt6_multipath_select(rt, fl6, oif, strict); 1078 if (rt == net->ipv6.ip6_null_entry) { 1079 fn = fib6_backtrack(fn, &fl6->saddr); 1080 if (fn) 1081 goto redo_rt6_select; 1082 else if (strict & RT6_LOOKUP_F_REACHABLE) { 1083 /* also consider unreachable route */ 1084 strict &= ~RT6_LOOKUP_F_REACHABLE; 1085 fn = saved_fn; 1086 goto redo_rt6_select; 1087 } 1088 } 1089 1090 1091 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { 1092 dst_use(&rt->dst, jiffies); 1093 read_unlock_bh(&table->tb6_lock); 1094 1095 rt6_dst_from_metrics_check(rt); 1096 return rt; 1097 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1098 !(rt->rt6i_flags & RTF_GATEWAY))) { 1099 /* Create a RTF_CACHE clone which will not be 1100 * owned by the fib6 tree. It is for the special case where 1101 * the daddr in the skb during the neighbor look-up is different 1102 * from the fl6->daddr used to look-up route here. 1103 */ 1104 1105 struct rt6_info *uncached_rt; 1106 1107 dst_use(&rt->dst, jiffies); 1108 read_unlock_bh(&table->tb6_lock); 1109 1110 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); 1111 dst_release(&rt->dst); 1112 1113 if (uncached_rt) 1114 rt6_uncached_list_add(uncached_rt); 1115 else 1116 uncached_rt = net->ipv6.ip6_null_entry; 1117 1118 dst_hold(&uncached_rt->dst); 1119 return uncached_rt; 1120 1121 } else { 1122 /* Get a percpu copy */ 1123 1124 struct rt6_info *pcpu_rt; 1125 1126 rt->dst.lastuse = jiffies; 1127 rt->dst.__use++; 1128 pcpu_rt = rt6_get_pcpu_route(rt); 1129 1130 if (pcpu_rt) { 1131 read_unlock_bh(&table->tb6_lock); 1132 } else { 1133 /* We have to do the read_unlock first 1134 * because rt6_make_pcpu_route() may trigger 1135 * ip6_dst_gc() which will take the write_lock. 1136 */ 1137 dst_hold(&rt->dst); 1138 read_unlock_bh(&table->tb6_lock); 1139 pcpu_rt = rt6_make_pcpu_route(rt); 1140 dst_release(&rt->dst); 1141 } 1142 1143 return pcpu_rt; 1144 1145 } 1146 } 1147 1148 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 1149 struct flowi6 *fl6, int flags) 1150 { 1151 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 1152 } 1153 1154 static struct dst_entry *ip6_route_input_lookup(struct net *net, 1155 struct net_device *dev, 1156 struct flowi6 *fl6, int flags) 1157 { 1158 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 1159 flags |= RT6_LOOKUP_F_IFACE; 1160 1161 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 1162 } 1163 1164 void ip6_route_input(struct sk_buff *skb) 1165 { 1166 const struct ipv6hdr *iph = ipv6_hdr(skb); 1167 struct net *net = dev_net(skb->dev); 1168 int flags = RT6_LOOKUP_F_HAS_SADDR; 1169 struct ip_tunnel_info *tun_info; 1170 struct flowi6 fl6 = { 1171 .flowi6_iif = skb->dev->ifindex, 1172 .daddr = iph->daddr, 1173 .saddr = iph->saddr, 1174 .flowlabel = ip6_flowinfo(iph), 1175 .flowi6_mark = skb->mark, 1176 .flowi6_proto = iph->nexthdr, 1177 }; 1178 1179 tun_info = skb_tunnel_info(skb); 1180 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 1181 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; 1182 skb_dst_drop(skb); 1183 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 1184 } 1185 1186 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 1187 struct flowi6 *fl6, int flags) 1188 { 1189 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1190 } 1191 1192 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, 1193 struct flowi6 *fl6) 1194 { 1195 int flags = 0; 1196 bool any_src; 1197 1198 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1199 1200 any_src = ipv6_addr_any(&fl6->saddr); 1201 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 1202 (fl6->flowi6_oif && any_src)) 1203 flags |= RT6_LOOKUP_F_IFACE; 1204 1205 if (!any_src) 1206 flags |= RT6_LOOKUP_F_HAS_SADDR; 1207 else if (sk) 1208 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1209 1210 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1211 } 1212 EXPORT_SYMBOL(ip6_route_output); 1213 1214 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1215 { 1216 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 1217 struct dst_entry *new = NULL; 1218 1219 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 1220 if (rt) { 1221 rt6_info_init(rt); 1222 1223 new = &rt->dst; 1224 new->__use = 1; 1225 new->input = dst_discard; 1226 new->output = dst_discard_sk; 1227 1228 dst_copy_metrics(new, &ort->dst); 1229 rt->rt6i_idev = ort->rt6i_idev; 1230 if (rt->rt6i_idev) 1231 in6_dev_hold(rt->rt6i_idev); 1232 1233 rt->rt6i_gateway = ort->rt6i_gateway; 1234 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; 1235 rt->rt6i_metric = 0; 1236 1237 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1238 #ifdef CONFIG_IPV6_SUBTREES 1239 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1240 #endif 1241 1242 dst_free(new); 1243 } 1244 1245 dst_release(dst_orig); 1246 return new ? new : ERR_PTR(-ENOMEM); 1247 } 1248 1249 /* 1250 * Destination cache support functions 1251 */ 1252 1253 static void rt6_dst_from_metrics_check(struct rt6_info *rt) 1254 { 1255 if (rt->dst.from && 1256 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) 1257 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); 1258 } 1259 1260 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) 1261 { 1262 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) 1263 return NULL; 1264 1265 if (rt6_check_expired(rt)) 1266 return NULL; 1267 1268 return &rt->dst; 1269 } 1270 1271 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1272 { 1273 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1274 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1275 return &rt->dst; 1276 else 1277 return NULL; 1278 } 1279 1280 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1281 { 1282 struct rt6_info *rt; 1283 1284 rt = (struct rt6_info *) dst; 1285 1286 /* All IPV6 dsts are created with ->obsolete set to the value 1287 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1288 * into this function always. 1289 */ 1290 1291 rt6_dst_from_metrics_check(rt); 1292 1293 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) 1294 return rt6_dst_from_check(rt, cookie); 1295 else 1296 return rt6_check(rt, cookie); 1297 } 1298 1299 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1300 { 1301 struct rt6_info *rt = (struct rt6_info *) dst; 1302 1303 if (rt) { 1304 if (rt->rt6i_flags & RTF_CACHE) { 1305 if (rt6_check_expired(rt)) { 1306 ip6_del_rt(rt); 1307 dst = NULL; 1308 } 1309 } else { 1310 dst_release(dst); 1311 dst = NULL; 1312 } 1313 } 1314 return dst; 1315 } 1316 1317 static void ip6_link_failure(struct sk_buff *skb) 1318 { 1319 struct rt6_info *rt; 1320 1321 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1322 1323 rt = (struct rt6_info *) skb_dst(skb); 1324 if (rt) { 1325 if (rt->rt6i_flags & RTF_CACHE) { 1326 dst_hold(&rt->dst); 1327 ip6_del_rt(rt); 1328 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { 1329 rt->rt6i_node->fn_sernum = -1; 1330 } 1331 } 1332 } 1333 1334 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) 1335 { 1336 struct net *net = dev_net(rt->dst.dev); 1337 1338 rt->rt6i_flags |= RTF_MODIFIED; 1339 rt->rt6i_pmtu = mtu; 1340 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 1341 } 1342 1343 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1344 const struct ipv6hdr *iph, u32 mtu) 1345 { 1346 struct rt6_info *rt6 = (struct rt6_info *)dst; 1347 1348 if (rt6->rt6i_flags & RTF_LOCAL) 1349 return; 1350 1351 dst_confirm(dst); 1352 mtu = max_t(u32, mtu, IPV6_MIN_MTU); 1353 if (mtu >= dst_mtu(dst)) 1354 return; 1355 1356 if (rt6->rt6i_flags & RTF_CACHE) { 1357 rt6_do_update_pmtu(rt6, mtu); 1358 } else { 1359 const struct in6_addr *daddr, *saddr; 1360 struct rt6_info *nrt6; 1361 1362 if (iph) { 1363 daddr = &iph->daddr; 1364 saddr = &iph->saddr; 1365 } else if (sk) { 1366 daddr = &sk->sk_v6_daddr; 1367 saddr = &inet6_sk(sk)->saddr; 1368 } else { 1369 return; 1370 } 1371 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 1372 if (nrt6) { 1373 rt6_do_update_pmtu(nrt6, mtu); 1374 1375 /* ip6_ins_rt(nrt6) will bump the 1376 * rt6->rt6i_node->fn_sernum 1377 * which will fail the next rt6_check() and 1378 * invalidate the sk->sk_dst_cache. 1379 */ 1380 ip6_ins_rt(nrt6); 1381 } 1382 } 1383 } 1384 1385 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1386 struct sk_buff *skb, u32 mtu) 1387 { 1388 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); 1389 } 1390 1391 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1392 int oif, u32 mark) 1393 { 1394 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1395 struct dst_entry *dst; 1396 struct flowi6 fl6; 1397 1398 memset(&fl6, 0, sizeof(fl6)); 1399 fl6.flowi6_oif = oif; 1400 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); 1401 fl6.daddr = iph->daddr; 1402 fl6.saddr = iph->saddr; 1403 fl6.flowlabel = ip6_flowinfo(iph); 1404 1405 dst = ip6_route_output(net, NULL, &fl6); 1406 if (!dst->error) 1407 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); 1408 dst_release(dst); 1409 } 1410 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1411 1412 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1413 { 1414 ip6_update_pmtu(skb, sock_net(sk), mtu, 1415 sk->sk_bound_dev_if, sk->sk_mark); 1416 } 1417 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1418 1419 /* Handle redirects */ 1420 struct ip6rd_flowi { 1421 struct flowi6 fl6; 1422 struct in6_addr gateway; 1423 }; 1424 1425 static struct rt6_info *__ip6_route_redirect(struct net *net, 1426 struct fib6_table *table, 1427 struct flowi6 *fl6, 1428 int flags) 1429 { 1430 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1431 struct rt6_info *rt; 1432 struct fib6_node *fn; 1433 1434 /* Get the "current" route for this destination and 1435 * check if the redirect has come from approriate router. 1436 * 1437 * RFC 4861 specifies that redirects should only be 1438 * accepted if they come from the nexthop to the target. 1439 * Due to the way the routes are chosen, this notion 1440 * is a bit fuzzy and one might need to check all possible 1441 * routes. 1442 */ 1443 1444 read_lock_bh(&table->tb6_lock); 1445 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1446 restart: 1447 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1448 if (rt6_check_expired(rt)) 1449 continue; 1450 if (rt->dst.error) 1451 break; 1452 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1453 continue; 1454 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 1455 continue; 1456 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1457 continue; 1458 break; 1459 } 1460 1461 if (!rt) 1462 rt = net->ipv6.ip6_null_entry; 1463 else if (rt->dst.error) { 1464 rt = net->ipv6.ip6_null_entry; 1465 goto out; 1466 } 1467 1468 if (rt == net->ipv6.ip6_null_entry) { 1469 fn = fib6_backtrack(fn, &fl6->saddr); 1470 if (fn) 1471 goto restart; 1472 } 1473 1474 out: 1475 dst_hold(&rt->dst); 1476 1477 read_unlock_bh(&table->tb6_lock); 1478 1479 return rt; 1480 }; 1481 1482 static struct dst_entry *ip6_route_redirect(struct net *net, 1483 const struct flowi6 *fl6, 1484 const struct in6_addr *gateway) 1485 { 1486 int flags = RT6_LOOKUP_F_HAS_SADDR; 1487 struct ip6rd_flowi rdfl; 1488 1489 rdfl.fl6 = *fl6; 1490 rdfl.gateway = *gateway; 1491 1492 return fib6_rule_lookup(net, &rdfl.fl6, 1493 flags, __ip6_route_redirect); 1494 } 1495 1496 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1497 { 1498 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1499 struct dst_entry *dst; 1500 struct flowi6 fl6; 1501 1502 memset(&fl6, 0, sizeof(fl6)); 1503 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1504 fl6.flowi6_oif = oif; 1505 fl6.flowi6_mark = mark; 1506 fl6.daddr = iph->daddr; 1507 fl6.saddr = iph->saddr; 1508 fl6.flowlabel = ip6_flowinfo(iph); 1509 1510 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 1511 rt6_do_redirect(dst, NULL, skb); 1512 dst_release(dst); 1513 } 1514 EXPORT_SYMBOL_GPL(ip6_redirect); 1515 1516 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, 1517 u32 mark) 1518 { 1519 const struct ipv6hdr *iph = ipv6_hdr(skb); 1520 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 1521 struct dst_entry *dst; 1522 struct flowi6 fl6; 1523 1524 memset(&fl6, 0, sizeof(fl6)); 1525 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1526 fl6.flowi6_oif = oif; 1527 fl6.flowi6_mark = mark; 1528 fl6.daddr = msg->dest; 1529 fl6.saddr = iph->daddr; 1530 1531 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 1532 rt6_do_redirect(dst, NULL, skb); 1533 dst_release(dst); 1534 } 1535 1536 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1537 { 1538 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1539 } 1540 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1541 1542 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1543 { 1544 struct net_device *dev = dst->dev; 1545 unsigned int mtu = dst_mtu(dst); 1546 struct net *net = dev_net(dev); 1547 1548 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1549 1550 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1551 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1552 1553 /* 1554 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1555 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1556 * IPV6_MAXPLEN is also valid and means: "any MSS, 1557 * rely only on pmtu discovery" 1558 */ 1559 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1560 mtu = IPV6_MAXPLEN; 1561 return mtu; 1562 } 1563 1564 static unsigned int ip6_mtu(const struct dst_entry *dst) 1565 { 1566 const struct rt6_info *rt = (const struct rt6_info *)dst; 1567 unsigned int mtu = rt->rt6i_pmtu; 1568 struct inet6_dev *idev; 1569 1570 if (mtu) 1571 goto out; 1572 1573 mtu = dst_metric_raw(dst, RTAX_MTU); 1574 if (mtu) 1575 goto out; 1576 1577 mtu = IPV6_MIN_MTU; 1578 1579 rcu_read_lock(); 1580 idev = __in6_dev_get(dst->dev); 1581 if (idev) 1582 mtu = idev->cnf.mtu6; 1583 rcu_read_unlock(); 1584 1585 out: 1586 return min_t(unsigned int, mtu, IP6_MAX_MTU); 1587 } 1588 1589 static struct dst_entry *icmp6_dst_gc_list; 1590 static DEFINE_SPINLOCK(icmp6_dst_lock); 1591 1592 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1593 struct flowi6 *fl6) 1594 { 1595 struct dst_entry *dst; 1596 struct rt6_info *rt; 1597 struct inet6_dev *idev = in6_dev_get(dev); 1598 struct net *net = dev_net(dev); 1599 1600 if (unlikely(!idev)) 1601 return ERR_PTR(-ENODEV); 1602 1603 rt = ip6_dst_alloc(net, dev, 0); 1604 if (unlikely(!rt)) { 1605 in6_dev_put(idev); 1606 dst = ERR_PTR(-ENOMEM); 1607 goto out; 1608 } 1609 1610 rt->dst.flags |= DST_HOST; 1611 rt->dst.output = ip6_output; 1612 atomic_set(&rt->dst.__refcnt, 1); 1613 rt->rt6i_gateway = fl6->daddr; 1614 rt->rt6i_dst.addr = fl6->daddr; 1615 rt->rt6i_dst.plen = 128; 1616 rt->rt6i_idev = idev; 1617 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 1618 1619 spin_lock_bh(&icmp6_dst_lock); 1620 rt->dst.next = icmp6_dst_gc_list; 1621 icmp6_dst_gc_list = &rt->dst; 1622 spin_unlock_bh(&icmp6_dst_lock); 1623 1624 fib6_force_start_gc(net); 1625 1626 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1627 1628 out: 1629 return dst; 1630 } 1631 1632 int icmp6_dst_gc(void) 1633 { 1634 struct dst_entry *dst, **pprev; 1635 int more = 0; 1636 1637 spin_lock_bh(&icmp6_dst_lock); 1638 pprev = &icmp6_dst_gc_list; 1639 1640 while ((dst = *pprev) != NULL) { 1641 if (!atomic_read(&dst->__refcnt)) { 1642 *pprev = dst->next; 1643 dst_free(dst); 1644 } else { 1645 pprev = &dst->next; 1646 ++more; 1647 } 1648 } 1649 1650 spin_unlock_bh(&icmp6_dst_lock); 1651 1652 return more; 1653 } 1654 1655 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1656 void *arg) 1657 { 1658 struct dst_entry *dst, **pprev; 1659 1660 spin_lock_bh(&icmp6_dst_lock); 1661 pprev = &icmp6_dst_gc_list; 1662 while ((dst = *pprev) != NULL) { 1663 struct rt6_info *rt = (struct rt6_info *) dst; 1664 if (func(rt, arg)) { 1665 *pprev = dst->next; 1666 dst_free(dst); 1667 } else { 1668 pprev = &dst->next; 1669 } 1670 } 1671 spin_unlock_bh(&icmp6_dst_lock); 1672 } 1673 1674 static int ip6_dst_gc(struct dst_ops *ops) 1675 { 1676 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1677 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1678 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1679 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1680 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1681 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1682 int entries; 1683 1684 entries = dst_entries_get_fast(ops); 1685 if (time_after(rt_last_gc + rt_min_interval, jiffies) && 1686 entries <= rt_max_size) 1687 goto out; 1688 1689 net->ipv6.ip6_rt_gc_expire++; 1690 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); 1691 entries = dst_entries_get_slow(ops); 1692 if (entries < ops->gc_thresh) 1693 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1694 out: 1695 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1696 return entries > rt_max_size; 1697 } 1698 1699 static int ip6_convert_metrics(struct mx6_config *mxc, 1700 const struct fib6_config *cfg) 1701 { 1702 bool ecn_ca = false; 1703 struct nlattr *nla; 1704 int remaining; 1705 u32 *mp; 1706 1707 if (!cfg->fc_mx) 1708 return 0; 1709 1710 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1711 if (unlikely(!mp)) 1712 return -ENOMEM; 1713 1714 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1715 int type = nla_type(nla); 1716 u32 val; 1717 1718 if (!type) 1719 continue; 1720 if (unlikely(type > RTAX_MAX)) 1721 goto err; 1722 1723 if (type == RTAX_CC_ALGO) { 1724 char tmp[TCP_CA_NAME_MAX]; 1725 1726 nla_strlcpy(tmp, nla, sizeof(tmp)); 1727 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 1728 if (val == TCP_CA_UNSPEC) 1729 goto err; 1730 } else { 1731 val = nla_get_u32(nla); 1732 } 1733 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 1734 goto err; 1735 1736 mp[type - 1] = val; 1737 __set_bit(type - 1, mxc->mx_valid); 1738 } 1739 1740 if (ecn_ca) { 1741 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); 1742 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; 1743 } 1744 1745 mxc->mx = mp; 1746 return 0; 1747 err: 1748 kfree(mp); 1749 return -EINVAL; 1750 } 1751 1752 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) 1753 { 1754 int err; 1755 struct net *net = cfg->fc_nlinfo.nl_net; 1756 struct rt6_info *rt = NULL; 1757 struct net_device *dev = NULL; 1758 struct inet6_dev *idev = NULL; 1759 struct fib6_table *table; 1760 int addr_type; 1761 1762 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1763 return -EINVAL; 1764 #ifndef CONFIG_IPV6_SUBTREES 1765 if (cfg->fc_src_len) 1766 return -EINVAL; 1767 #endif 1768 if (cfg->fc_ifindex) { 1769 err = -ENODEV; 1770 dev = dev_get_by_index(net, cfg->fc_ifindex); 1771 if (!dev) 1772 goto out; 1773 idev = in6_dev_get(dev); 1774 if (!idev) 1775 goto out; 1776 } 1777 1778 if (cfg->fc_metric == 0) 1779 cfg->fc_metric = IP6_RT_PRIO_USER; 1780 1781 err = -ENOBUFS; 1782 if (cfg->fc_nlinfo.nlh && 1783 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1784 table = fib6_get_table(net, cfg->fc_table); 1785 if (!table) { 1786 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1787 table = fib6_new_table(net, cfg->fc_table); 1788 } 1789 } else { 1790 table = fib6_new_table(net, cfg->fc_table); 1791 } 1792 1793 if (!table) 1794 goto out; 1795 1796 rt = ip6_dst_alloc(net, NULL, 1797 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); 1798 1799 if (!rt) { 1800 err = -ENOMEM; 1801 goto out; 1802 } 1803 1804 if (cfg->fc_flags & RTF_EXPIRES) 1805 rt6_set_expires(rt, jiffies + 1806 clock_t_to_jiffies(cfg->fc_expires)); 1807 else 1808 rt6_clean_expires(rt); 1809 1810 if (cfg->fc_protocol == RTPROT_UNSPEC) 1811 cfg->fc_protocol = RTPROT_BOOT; 1812 rt->rt6i_protocol = cfg->fc_protocol; 1813 1814 addr_type = ipv6_addr_type(&cfg->fc_dst); 1815 1816 if (addr_type & IPV6_ADDR_MULTICAST) 1817 rt->dst.input = ip6_mc_input; 1818 else if (cfg->fc_flags & RTF_LOCAL) 1819 rt->dst.input = ip6_input; 1820 else 1821 rt->dst.input = ip6_forward; 1822 1823 rt->dst.output = ip6_output; 1824 1825 if (cfg->fc_encap) { 1826 struct lwtunnel_state *lwtstate; 1827 1828 err = lwtunnel_build_state(dev, cfg->fc_encap_type, 1829 cfg->fc_encap, AF_INET6, cfg, 1830 &lwtstate); 1831 if (err) 1832 goto out; 1833 rt->dst.lwtstate = lwtstate_get(lwtstate); 1834 if (lwtunnel_output_redirect(rt->dst.lwtstate)) { 1835 rt->dst.lwtstate->orig_output = rt->dst.output; 1836 rt->dst.output = lwtunnel_output; 1837 } 1838 if (lwtunnel_input_redirect(rt->dst.lwtstate)) { 1839 rt->dst.lwtstate->orig_input = rt->dst.input; 1840 rt->dst.input = lwtunnel_input; 1841 } 1842 } 1843 1844 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1845 rt->rt6i_dst.plen = cfg->fc_dst_len; 1846 if (rt->rt6i_dst.plen == 128) 1847 rt->dst.flags |= DST_HOST; 1848 1849 #ifdef CONFIG_IPV6_SUBTREES 1850 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1851 rt->rt6i_src.plen = cfg->fc_src_len; 1852 #endif 1853 1854 rt->rt6i_metric = cfg->fc_metric; 1855 1856 /* We cannot add true routes via loopback here, 1857 they would result in kernel looping; promote them to reject routes 1858 */ 1859 if ((cfg->fc_flags & RTF_REJECT) || 1860 (dev && (dev->flags & IFF_LOOPBACK) && 1861 !(addr_type & IPV6_ADDR_LOOPBACK) && 1862 !(cfg->fc_flags & RTF_LOCAL))) { 1863 /* hold loopback dev/idev if we haven't done so. */ 1864 if (dev != net->loopback_dev) { 1865 if (dev) { 1866 dev_put(dev); 1867 in6_dev_put(idev); 1868 } 1869 dev = net->loopback_dev; 1870 dev_hold(dev); 1871 idev = in6_dev_get(dev); 1872 if (!idev) { 1873 err = -ENODEV; 1874 goto out; 1875 } 1876 } 1877 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1878 switch (cfg->fc_type) { 1879 case RTN_BLACKHOLE: 1880 rt->dst.error = -EINVAL; 1881 rt->dst.output = dst_discard_sk; 1882 rt->dst.input = dst_discard; 1883 break; 1884 case RTN_PROHIBIT: 1885 rt->dst.error = -EACCES; 1886 rt->dst.output = ip6_pkt_prohibit_out; 1887 rt->dst.input = ip6_pkt_prohibit; 1888 break; 1889 case RTN_THROW: 1890 case RTN_UNREACHABLE: 1891 default: 1892 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 1893 : (cfg->fc_type == RTN_UNREACHABLE) 1894 ? -EHOSTUNREACH : -ENETUNREACH; 1895 rt->dst.output = ip6_pkt_discard_out; 1896 rt->dst.input = ip6_pkt_discard; 1897 break; 1898 } 1899 goto install_route; 1900 } 1901 1902 if (cfg->fc_flags & RTF_GATEWAY) { 1903 const struct in6_addr *gw_addr; 1904 int gwa_type; 1905 1906 gw_addr = &cfg->fc_gateway; 1907 gwa_type = ipv6_addr_type(gw_addr); 1908 1909 /* if gw_addr is local we will fail to detect this in case 1910 * address is still TENTATIVE (DAD in progress). rt6_lookup() 1911 * will return already-added prefix route via interface that 1912 * prefix route was assigned to, which might be non-loopback. 1913 */ 1914 err = -EINVAL; 1915 if (ipv6_chk_addr_and_flags(net, gw_addr, 1916 gwa_type & IPV6_ADDR_LINKLOCAL ? 1917 dev : NULL, 0, 0)) 1918 goto out; 1919 1920 rt->rt6i_gateway = *gw_addr; 1921 1922 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1923 struct rt6_info *grt; 1924 1925 /* IPv6 strictly inhibits using not link-local 1926 addresses as nexthop address. 1927 Otherwise, router will not able to send redirects. 1928 It is very good, but in some (rare!) circumstances 1929 (SIT, PtP, NBMA NOARP links) it is handy to allow 1930 some exceptions. --ANK 1931 */ 1932 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1933 goto out; 1934 1935 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1936 1937 err = -EHOSTUNREACH; 1938 if (!grt) 1939 goto out; 1940 if (dev) { 1941 if (dev != grt->dst.dev) { 1942 ip6_rt_put(grt); 1943 goto out; 1944 } 1945 } else { 1946 dev = grt->dst.dev; 1947 idev = grt->rt6i_idev; 1948 dev_hold(dev); 1949 in6_dev_hold(grt->rt6i_idev); 1950 } 1951 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1952 err = 0; 1953 ip6_rt_put(grt); 1954 1955 if (err) 1956 goto out; 1957 } 1958 err = -EINVAL; 1959 if (!dev || (dev->flags & IFF_LOOPBACK)) 1960 goto out; 1961 } 1962 1963 err = -ENODEV; 1964 if (!dev) 1965 goto out; 1966 1967 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1968 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1969 err = -EINVAL; 1970 goto out; 1971 } 1972 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1973 rt->rt6i_prefsrc.plen = 128; 1974 } else 1975 rt->rt6i_prefsrc.plen = 0; 1976 1977 rt->rt6i_flags = cfg->fc_flags; 1978 1979 install_route: 1980 rt->dst.dev = dev; 1981 rt->rt6i_idev = idev; 1982 rt->rt6i_table = table; 1983 1984 cfg->fc_nlinfo.nl_net = dev_net(dev); 1985 1986 *rt_ret = rt; 1987 1988 return 0; 1989 out: 1990 if (dev) 1991 dev_put(dev); 1992 if (idev) 1993 in6_dev_put(idev); 1994 if (rt) 1995 dst_free(&rt->dst); 1996 1997 *rt_ret = NULL; 1998 1999 return err; 2000 } 2001 2002 int ip6_route_add(struct fib6_config *cfg) 2003 { 2004 struct mx6_config mxc = { .mx = NULL, }; 2005 struct rt6_info *rt = NULL; 2006 int err; 2007 2008 err = ip6_route_info_create(cfg, &rt); 2009 if (err) 2010 goto out; 2011 2012 err = ip6_convert_metrics(&mxc, cfg); 2013 if (err) 2014 goto out; 2015 2016 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); 2017 2018 kfree(mxc.mx); 2019 2020 return err; 2021 out: 2022 if (rt) 2023 dst_free(&rt->dst); 2024 2025 return err; 2026 } 2027 2028 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 2029 { 2030 int err; 2031 struct fib6_table *table; 2032 struct net *net = dev_net(rt->dst.dev); 2033 2034 if (rt == net->ipv6.ip6_null_entry || 2035 rt->dst.flags & DST_NOCACHE) { 2036 err = -ENOENT; 2037 goto out; 2038 } 2039 2040 table = rt->rt6i_table; 2041 write_lock_bh(&table->tb6_lock); 2042 err = fib6_del(rt, info); 2043 write_unlock_bh(&table->tb6_lock); 2044 2045 out: 2046 ip6_rt_put(rt); 2047 return err; 2048 } 2049 2050 int ip6_del_rt(struct rt6_info *rt) 2051 { 2052 struct nl_info info = { 2053 .nl_net = dev_net(rt->dst.dev), 2054 }; 2055 return __ip6_del_rt(rt, &info); 2056 } 2057 2058 static int ip6_route_del(struct fib6_config *cfg) 2059 { 2060 struct fib6_table *table; 2061 struct fib6_node *fn; 2062 struct rt6_info *rt; 2063 int err = -ESRCH; 2064 2065 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 2066 if (!table) 2067 return err; 2068 2069 read_lock_bh(&table->tb6_lock); 2070 2071 fn = fib6_locate(&table->tb6_root, 2072 &cfg->fc_dst, cfg->fc_dst_len, 2073 &cfg->fc_src, cfg->fc_src_len); 2074 2075 if (fn) { 2076 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2077 if ((rt->rt6i_flags & RTF_CACHE) && 2078 !(cfg->fc_flags & RTF_CACHE)) 2079 continue; 2080 if (cfg->fc_ifindex && 2081 (!rt->dst.dev || 2082 rt->dst.dev->ifindex != cfg->fc_ifindex)) 2083 continue; 2084 if (cfg->fc_flags & RTF_GATEWAY && 2085 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 2086 continue; 2087 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 2088 continue; 2089 dst_hold(&rt->dst); 2090 read_unlock_bh(&table->tb6_lock); 2091 2092 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 2093 } 2094 } 2095 read_unlock_bh(&table->tb6_lock); 2096 2097 return err; 2098 } 2099 2100 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 2101 { 2102 struct net *net = dev_net(skb->dev); 2103 struct netevent_redirect netevent; 2104 struct rt6_info *rt, *nrt = NULL; 2105 struct ndisc_options ndopts; 2106 struct inet6_dev *in6_dev; 2107 struct neighbour *neigh; 2108 struct rd_msg *msg; 2109 int optlen, on_link; 2110 u8 *lladdr; 2111 2112 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 2113 optlen -= sizeof(*msg); 2114 2115 if (optlen < 0) { 2116 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 2117 return; 2118 } 2119 2120 msg = (struct rd_msg *)icmp6_hdr(skb); 2121 2122 if (ipv6_addr_is_multicast(&msg->dest)) { 2123 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 2124 return; 2125 } 2126 2127 on_link = 0; 2128 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 2129 on_link = 1; 2130 } else if (ipv6_addr_type(&msg->target) != 2131 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 2132 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 2133 return; 2134 } 2135 2136 in6_dev = __in6_dev_get(skb->dev); 2137 if (!in6_dev) 2138 return; 2139 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 2140 return; 2141 2142 /* RFC2461 8.1: 2143 * The IP source address of the Redirect MUST be the same as the current 2144 * first-hop router for the specified ICMP Destination Address. 2145 */ 2146 2147 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { 2148 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 2149 return; 2150 } 2151 2152 lladdr = NULL; 2153 if (ndopts.nd_opts_tgt_lladdr) { 2154 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 2155 skb->dev); 2156 if (!lladdr) { 2157 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 2158 return; 2159 } 2160 } 2161 2162 rt = (struct rt6_info *) dst; 2163 if (rt == net->ipv6.ip6_null_entry) { 2164 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 2165 return; 2166 } 2167 2168 /* Redirect received -> path was valid. 2169 * Look, redirects are sent only in response to data packets, 2170 * so that this nexthop apparently is reachable. --ANK 2171 */ 2172 dst_confirm(&rt->dst); 2173 2174 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 2175 if (!neigh) 2176 return; 2177 2178 /* 2179 * We have finally decided to accept it. 2180 */ 2181 2182 neigh_update(neigh, lladdr, NUD_STALE, 2183 NEIGH_UPDATE_F_WEAK_OVERRIDE| 2184 NEIGH_UPDATE_F_OVERRIDE| 2185 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 2186 NEIGH_UPDATE_F_ISROUTER)) 2187 ); 2188 2189 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); 2190 if (!nrt) 2191 goto out; 2192 2193 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 2194 if (on_link) 2195 nrt->rt6i_flags &= ~RTF_GATEWAY; 2196 2197 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 2198 2199 if (ip6_ins_rt(nrt)) 2200 goto out; 2201 2202 netevent.old = &rt->dst; 2203 netevent.new = &nrt->dst; 2204 netevent.daddr = &msg->dest; 2205 netevent.neigh = neigh; 2206 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 2207 2208 if (rt->rt6i_flags & RTF_CACHE) { 2209 rt = (struct rt6_info *) dst_clone(&rt->dst); 2210 ip6_del_rt(rt); 2211 } 2212 2213 out: 2214 neigh_release(neigh); 2215 } 2216 2217 /* 2218 * Misc support functions 2219 */ 2220 2221 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) 2222 { 2223 BUG_ON(from->dst.from); 2224 2225 rt->rt6i_flags &= ~RTF_EXPIRES; 2226 dst_hold(&from->dst); 2227 rt->dst.from = &from->dst; 2228 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); 2229 } 2230 2231 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) 2232 { 2233 rt->dst.input = ort->dst.input; 2234 rt->dst.output = ort->dst.output; 2235 rt->rt6i_dst = ort->rt6i_dst; 2236 rt->dst.error = ort->dst.error; 2237 rt->rt6i_idev = ort->rt6i_idev; 2238 if (rt->rt6i_idev) 2239 in6_dev_hold(rt->rt6i_idev); 2240 rt->dst.lastuse = jiffies; 2241 rt->rt6i_gateway = ort->rt6i_gateway; 2242 rt->rt6i_flags = ort->rt6i_flags; 2243 rt6_set_from(rt, ort); 2244 rt->rt6i_metric = ort->rt6i_metric; 2245 #ifdef CONFIG_IPV6_SUBTREES 2246 rt->rt6i_src = ort->rt6i_src; 2247 #endif 2248 rt->rt6i_prefsrc = ort->rt6i_prefsrc; 2249 rt->rt6i_table = ort->rt6i_table; 2250 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); 2251 } 2252 2253 #ifdef CONFIG_IPV6_ROUTE_INFO 2254 static struct rt6_info *rt6_get_route_info(struct net *net, 2255 const struct in6_addr *prefix, int prefixlen, 2256 const struct in6_addr *gwaddr, int ifindex) 2257 { 2258 struct fib6_node *fn; 2259 struct rt6_info *rt = NULL; 2260 struct fib6_table *table; 2261 2262 table = fib6_get_table(net, RT6_TABLE_INFO); 2263 if (!table) 2264 return NULL; 2265 2266 read_lock_bh(&table->tb6_lock); 2267 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); 2268 if (!fn) 2269 goto out; 2270 2271 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2272 if (rt->dst.dev->ifindex != ifindex) 2273 continue; 2274 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 2275 continue; 2276 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 2277 continue; 2278 dst_hold(&rt->dst); 2279 break; 2280 } 2281 out: 2282 read_unlock_bh(&table->tb6_lock); 2283 return rt; 2284 } 2285 2286 static struct rt6_info *rt6_add_route_info(struct net *net, 2287 const struct in6_addr *prefix, int prefixlen, 2288 const struct in6_addr *gwaddr, int ifindex, 2289 unsigned int pref) 2290 { 2291 struct fib6_config cfg = { 2292 .fc_table = RT6_TABLE_INFO, 2293 .fc_metric = IP6_RT_PRIO_USER, 2294 .fc_ifindex = ifindex, 2295 .fc_dst_len = prefixlen, 2296 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 2297 RTF_UP | RTF_PREF(pref), 2298 .fc_nlinfo.portid = 0, 2299 .fc_nlinfo.nlh = NULL, 2300 .fc_nlinfo.nl_net = net, 2301 }; 2302 2303 cfg.fc_dst = *prefix; 2304 cfg.fc_gateway = *gwaddr; 2305 2306 /* We should treat it as a default route if prefix length is 0. */ 2307 if (!prefixlen) 2308 cfg.fc_flags |= RTF_DEFAULT; 2309 2310 ip6_route_add(&cfg); 2311 2312 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 2313 } 2314 #endif 2315 2316 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 2317 { 2318 struct rt6_info *rt; 2319 struct fib6_table *table; 2320 2321 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 2322 if (!table) 2323 return NULL; 2324 2325 read_lock_bh(&table->tb6_lock); 2326 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2327 if (dev == rt->dst.dev && 2328 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 2329 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 2330 break; 2331 } 2332 if (rt) 2333 dst_hold(&rt->dst); 2334 read_unlock_bh(&table->tb6_lock); 2335 return rt; 2336 } 2337 2338 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 2339 struct net_device *dev, 2340 unsigned int pref) 2341 { 2342 struct fib6_config cfg = { 2343 .fc_table = RT6_TABLE_DFLT, 2344 .fc_metric = IP6_RT_PRIO_USER, 2345 .fc_ifindex = dev->ifindex, 2346 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 2347 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 2348 .fc_nlinfo.portid = 0, 2349 .fc_nlinfo.nlh = NULL, 2350 .fc_nlinfo.nl_net = dev_net(dev), 2351 }; 2352 2353 cfg.fc_gateway = *gwaddr; 2354 2355 ip6_route_add(&cfg); 2356 2357 return rt6_get_dflt_router(gwaddr, dev); 2358 } 2359 2360 void rt6_purge_dflt_routers(struct net *net) 2361 { 2362 struct rt6_info *rt; 2363 struct fib6_table *table; 2364 2365 /* NOTE: Keep consistent with rt6_get_dflt_router */ 2366 table = fib6_get_table(net, RT6_TABLE_DFLT); 2367 if (!table) 2368 return; 2369 2370 restart: 2371 read_lock_bh(&table->tb6_lock); 2372 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2373 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 2374 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 2375 dst_hold(&rt->dst); 2376 read_unlock_bh(&table->tb6_lock); 2377 ip6_del_rt(rt); 2378 goto restart; 2379 } 2380 } 2381 read_unlock_bh(&table->tb6_lock); 2382 } 2383 2384 static void rtmsg_to_fib6_config(struct net *net, 2385 struct in6_rtmsg *rtmsg, 2386 struct fib6_config *cfg) 2387 { 2388 memset(cfg, 0, sizeof(*cfg)); 2389 2390 cfg->fc_table = RT6_TABLE_MAIN; 2391 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 2392 cfg->fc_metric = rtmsg->rtmsg_metric; 2393 cfg->fc_expires = rtmsg->rtmsg_info; 2394 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 2395 cfg->fc_src_len = rtmsg->rtmsg_src_len; 2396 cfg->fc_flags = rtmsg->rtmsg_flags; 2397 2398 cfg->fc_nlinfo.nl_net = net; 2399 2400 cfg->fc_dst = rtmsg->rtmsg_dst; 2401 cfg->fc_src = rtmsg->rtmsg_src; 2402 cfg->fc_gateway = rtmsg->rtmsg_gateway; 2403 } 2404 2405 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 2406 { 2407 struct fib6_config cfg; 2408 struct in6_rtmsg rtmsg; 2409 int err; 2410 2411 switch (cmd) { 2412 case SIOCADDRT: /* Add a route */ 2413 case SIOCDELRT: /* Delete a route */ 2414 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2415 return -EPERM; 2416 err = copy_from_user(&rtmsg, arg, 2417 sizeof(struct in6_rtmsg)); 2418 if (err) 2419 return -EFAULT; 2420 2421 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 2422 2423 rtnl_lock(); 2424 switch (cmd) { 2425 case SIOCADDRT: 2426 err = ip6_route_add(&cfg); 2427 break; 2428 case SIOCDELRT: 2429 err = ip6_route_del(&cfg); 2430 break; 2431 default: 2432 err = -EINVAL; 2433 } 2434 rtnl_unlock(); 2435 2436 return err; 2437 } 2438 2439 return -EINVAL; 2440 } 2441 2442 /* 2443 * Drop the packet on the floor 2444 */ 2445 2446 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2447 { 2448 int type; 2449 struct dst_entry *dst = skb_dst(skb); 2450 switch (ipstats_mib_noroutes) { 2451 case IPSTATS_MIB_INNOROUTES: 2452 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2453 if (type == IPV6_ADDR_ANY) { 2454 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2455 IPSTATS_MIB_INADDRERRORS); 2456 break; 2457 } 2458 /* FALLTHROUGH */ 2459 case IPSTATS_MIB_OUTNOROUTES: 2460 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2461 ipstats_mib_noroutes); 2462 break; 2463 } 2464 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2465 kfree_skb(skb); 2466 return 0; 2467 } 2468 2469 static int ip6_pkt_discard(struct sk_buff *skb) 2470 { 2471 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2472 } 2473 2474 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) 2475 { 2476 skb->dev = skb_dst(skb)->dev; 2477 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2478 } 2479 2480 static int ip6_pkt_prohibit(struct sk_buff *skb) 2481 { 2482 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2483 } 2484 2485 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) 2486 { 2487 skb->dev = skb_dst(skb)->dev; 2488 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2489 } 2490 2491 /* 2492 * Allocate a dst for local (unicast / anycast) address. 2493 */ 2494 2495 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2496 const struct in6_addr *addr, 2497 bool anycast) 2498 { 2499 struct net *net = dev_net(idev->dev); 2500 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 2501 DST_NOCOUNT); 2502 if (!rt) 2503 return ERR_PTR(-ENOMEM); 2504 2505 in6_dev_hold(idev); 2506 2507 rt->dst.flags |= DST_HOST; 2508 rt->dst.input = ip6_input; 2509 rt->dst.output = ip6_output; 2510 rt->rt6i_idev = idev; 2511 2512 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2513 if (anycast) 2514 rt->rt6i_flags |= RTF_ANYCAST; 2515 else 2516 rt->rt6i_flags |= RTF_LOCAL; 2517 2518 rt->rt6i_gateway = *addr; 2519 rt->rt6i_dst.addr = *addr; 2520 rt->rt6i_dst.plen = 128; 2521 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2522 rt->dst.flags |= DST_NOCACHE; 2523 2524 atomic_set(&rt->dst.__refcnt, 1); 2525 2526 return rt; 2527 } 2528 2529 int ip6_route_get_saddr(struct net *net, 2530 struct rt6_info *rt, 2531 const struct in6_addr *daddr, 2532 unsigned int prefs, 2533 struct in6_addr *saddr) 2534 { 2535 struct inet6_dev *idev = 2536 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; 2537 int err = 0; 2538 if (rt && rt->rt6i_prefsrc.plen) 2539 *saddr = rt->rt6i_prefsrc.addr; 2540 else 2541 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2542 daddr, prefs, saddr); 2543 return err; 2544 } 2545 2546 /* remove deleted ip from prefsrc entries */ 2547 struct arg_dev_net_ip { 2548 struct net_device *dev; 2549 struct net *net; 2550 struct in6_addr *addr; 2551 }; 2552 2553 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2554 { 2555 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2556 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2557 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2558 2559 if (((void *)rt->dst.dev == dev || !dev) && 2560 rt != net->ipv6.ip6_null_entry && 2561 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2562 /* remove prefsrc entry */ 2563 rt->rt6i_prefsrc.plen = 0; 2564 } 2565 return 0; 2566 } 2567 2568 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2569 { 2570 struct net *net = dev_net(ifp->idev->dev); 2571 struct arg_dev_net_ip adni = { 2572 .dev = ifp->idev->dev, 2573 .net = net, 2574 .addr = &ifp->addr, 2575 }; 2576 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 2577 } 2578 2579 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 2580 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 2581 2582 /* Remove routers and update dst entries when gateway turn into host. */ 2583 static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 2584 { 2585 struct in6_addr *gateway = (struct in6_addr *)arg; 2586 2587 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || 2588 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && 2589 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { 2590 return -1; 2591 } 2592 return 0; 2593 } 2594 2595 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 2596 { 2597 fib6_clean_all(net, fib6_clean_tohost, gateway); 2598 } 2599 2600 struct arg_dev_net { 2601 struct net_device *dev; 2602 struct net *net; 2603 }; 2604 2605 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2606 { 2607 const struct arg_dev_net *adn = arg; 2608 const struct net_device *dev = adn->dev; 2609 2610 if ((rt->dst.dev == dev || !dev) && 2611 rt != adn->net->ipv6.ip6_null_entry) 2612 return -1; 2613 2614 return 0; 2615 } 2616 2617 void rt6_ifdown(struct net *net, struct net_device *dev) 2618 { 2619 struct arg_dev_net adn = { 2620 .dev = dev, 2621 .net = net, 2622 }; 2623 2624 fib6_clean_all(net, fib6_ifdown, &adn); 2625 icmp6_clean_all(fib6_ifdown, &adn); 2626 if (dev) 2627 rt6_uncached_list_flush_dev(net, dev); 2628 } 2629 2630 struct rt6_mtu_change_arg { 2631 struct net_device *dev; 2632 unsigned int mtu; 2633 }; 2634 2635 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2636 { 2637 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2638 struct inet6_dev *idev; 2639 2640 /* In IPv6 pmtu discovery is not optional, 2641 so that RTAX_MTU lock cannot disable it. 2642 We still use this lock to block changes 2643 caused by addrconf/ndisc. 2644 */ 2645 2646 idev = __in6_dev_get(arg->dev); 2647 if (!idev) 2648 return 0; 2649 2650 /* For administrative MTU increase, there is no way to discover 2651 IPv6 PMTU increase, so PMTU increase should be updated here. 2652 Since RFC 1981 doesn't include administrative MTU increase 2653 update PMTU increase is a MUST. (i.e. jumbo frame) 2654 */ 2655 /* 2656 If new MTU is less than route PMTU, this new MTU will be the 2657 lowest MTU in the path, update the route PMTU to reflect PMTU 2658 decreases; if new MTU is greater than route PMTU, and the 2659 old MTU is the lowest MTU in the path, update the route PMTU 2660 to reflect the increase. In this case if the other nodes' MTU 2661 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2662 PMTU discouvery. 2663 */ 2664 if (rt->dst.dev == arg->dev && 2665 !dst_metric_locked(&rt->dst, RTAX_MTU)) { 2666 if (rt->rt6i_flags & RTF_CACHE) { 2667 /* For RTF_CACHE with rt6i_pmtu == 0 2668 * (i.e. a redirected route), 2669 * the metrics of its rt->dst.from has already 2670 * been updated. 2671 */ 2672 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) 2673 rt->rt6i_pmtu = arg->mtu; 2674 } else if (dst_mtu(&rt->dst) >= arg->mtu || 2675 (dst_mtu(&rt->dst) < arg->mtu && 2676 dst_mtu(&rt->dst) == idev->cnf.mtu6)) { 2677 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2678 } 2679 } 2680 return 0; 2681 } 2682 2683 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2684 { 2685 struct rt6_mtu_change_arg arg = { 2686 .dev = dev, 2687 .mtu = mtu, 2688 }; 2689 2690 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 2691 } 2692 2693 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2694 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2695 [RTA_OIF] = { .type = NLA_U32 }, 2696 [RTA_IIF] = { .type = NLA_U32 }, 2697 [RTA_PRIORITY] = { .type = NLA_U32 }, 2698 [RTA_METRICS] = { .type = NLA_NESTED }, 2699 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2700 [RTA_PREF] = { .type = NLA_U8 }, 2701 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 2702 [RTA_ENCAP] = { .type = NLA_NESTED }, 2703 }; 2704 2705 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2706 struct fib6_config *cfg) 2707 { 2708 struct rtmsg *rtm; 2709 struct nlattr *tb[RTA_MAX+1]; 2710 unsigned int pref; 2711 int err; 2712 2713 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2714 if (err < 0) 2715 goto errout; 2716 2717 err = -EINVAL; 2718 rtm = nlmsg_data(nlh); 2719 memset(cfg, 0, sizeof(*cfg)); 2720 2721 cfg->fc_table = rtm->rtm_table; 2722 cfg->fc_dst_len = rtm->rtm_dst_len; 2723 cfg->fc_src_len = rtm->rtm_src_len; 2724 cfg->fc_flags = RTF_UP; 2725 cfg->fc_protocol = rtm->rtm_protocol; 2726 cfg->fc_type = rtm->rtm_type; 2727 2728 if (rtm->rtm_type == RTN_UNREACHABLE || 2729 rtm->rtm_type == RTN_BLACKHOLE || 2730 rtm->rtm_type == RTN_PROHIBIT || 2731 rtm->rtm_type == RTN_THROW) 2732 cfg->fc_flags |= RTF_REJECT; 2733 2734 if (rtm->rtm_type == RTN_LOCAL) 2735 cfg->fc_flags |= RTF_LOCAL; 2736 2737 if (rtm->rtm_flags & RTM_F_CLONED) 2738 cfg->fc_flags |= RTF_CACHE; 2739 2740 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2741 cfg->fc_nlinfo.nlh = nlh; 2742 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2743 2744 if (tb[RTA_GATEWAY]) { 2745 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 2746 cfg->fc_flags |= RTF_GATEWAY; 2747 } 2748 2749 if (tb[RTA_DST]) { 2750 int plen = (rtm->rtm_dst_len + 7) >> 3; 2751 2752 if (nla_len(tb[RTA_DST]) < plen) 2753 goto errout; 2754 2755 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2756 } 2757 2758 if (tb[RTA_SRC]) { 2759 int plen = (rtm->rtm_src_len + 7) >> 3; 2760 2761 if (nla_len(tb[RTA_SRC]) < plen) 2762 goto errout; 2763 2764 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2765 } 2766 2767 if (tb[RTA_PREFSRC]) 2768 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 2769 2770 if (tb[RTA_OIF]) 2771 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2772 2773 if (tb[RTA_PRIORITY]) 2774 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2775 2776 if (tb[RTA_METRICS]) { 2777 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2778 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2779 } 2780 2781 if (tb[RTA_TABLE]) 2782 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2783 2784 if (tb[RTA_MULTIPATH]) { 2785 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 2786 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 2787 } 2788 2789 if (tb[RTA_PREF]) { 2790 pref = nla_get_u8(tb[RTA_PREF]); 2791 if (pref != ICMPV6_ROUTER_PREF_LOW && 2792 pref != ICMPV6_ROUTER_PREF_HIGH) 2793 pref = ICMPV6_ROUTER_PREF_MEDIUM; 2794 cfg->fc_flags |= RTF_PREF(pref); 2795 } 2796 2797 if (tb[RTA_ENCAP]) 2798 cfg->fc_encap = tb[RTA_ENCAP]; 2799 2800 if (tb[RTA_ENCAP_TYPE]) 2801 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); 2802 2803 err = 0; 2804 errout: 2805 return err; 2806 } 2807 2808 struct rt6_nh { 2809 struct rt6_info *rt6_info; 2810 struct fib6_config r_cfg; 2811 struct mx6_config mxc; 2812 struct list_head next; 2813 }; 2814 2815 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) 2816 { 2817 struct rt6_nh *nh; 2818 2819 list_for_each_entry(nh, rt6_nh_list, next) { 2820 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", 2821 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, 2822 nh->r_cfg.fc_ifindex); 2823 } 2824 } 2825 2826 static int ip6_route_info_append(struct list_head *rt6_nh_list, 2827 struct rt6_info *rt, struct fib6_config *r_cfg) 2828 { 2829 struct rt6_nh *nh; 2830 struct rt6_info *rtnh; 2831 int err = -EEXIST; 2832 2833 list_for_each_entry(nh, rt6_nh_list, next) { 2834 /* check if rt6_info already exists */ 2835 rtnh = nh->rt6_info; 2836 2837 if (rtnh->dst.dev == rt->dst.dev && 2838 rtnh->rt6i_idev == rt->rt6i_idev && 2839 ipv6_addr_equal(&rtnh->rt6i_gateway, 2840 &rt->rt6i_gateway)) 2841 return err; 2842 } 2843 2844 nh = kzalloc(sizeof(*nh), GFP_KERNEL); 2845 if (!nh) 2846 return -ENOMEM; 2847 nh->rt6_info = rt; 2848 err = ip6_convert_metrics(&nh->mxc, r_cfg); 2849 if (err) { 2850 kfree(nh); 2851 return err; 2852 } 2853 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); 2854 list_add_tail(&nh->next, rt6_nh_list); 2855 2856 return 0; 2857 } 2858 2859 static int ip6_route_multipath_add(struct fib6_config *cfg) 2860 { 2861 struct fib6_config r_cfg; 2862 struct rtnexthop *rtnh; 2863 struct rt6_info *rt; 2864 struct rt6_nh *err_nh; 2865 struct rt6_nh *nh, *nh_safe; 2866 int remaining; 2867 int attrlen; 2868 int err = 1; 2869 int nhn = 0; 2870 int replace = (cfg->fc_nlinfo.nlh && 2871 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 2872 LIST_HEAD(rt6_nh_list); 2873 2874 remaining = cfg->fc_mp_len; 2875 rtnh = (struct rtnexthop *)cfg->fc_mp; 2876 2877 /* Parse a Multipath Entry and build a list (rt6_nh_list) of 2878 * rt6_info structs per nexthop 2879 */ 2880 while (rtnh_ok(rtnh, remaining)) { 2881 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2882 if (rtnh->rtnh_ifindex) 2883 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 2884 2885 attrlen = rtnh_attrlen(rtnh); 2886 if (attrlen > 0) { 2887 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 2888 2889 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 2890 if (nla) { 2891 r_cfg.fc_gateway = nla_get_in6_addr(nla); 2892 r_cfg.fc_flags |= RTF_GATEWAY; 2893 } 2894 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 2895 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 2896 if (nla) 2897 r_cfg.fc_encap_type = nla_get_u16(nla); 2898 } 2899 2900 err = ip6_route_info_create(&r_cfg, &rt); 2901 if (err) 2902 goto cleanup; 2903 2904 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 2905 if (err) { 2906 dst_free(&rt->dst); 2907 goto cleanup; 2908 } 2909 2910 rtnh = rtnh_next(rtnh, &remaining); 2911 } 2912 2913 err_nh = NULL; 2914 list_for_each_entry(nh, &rt6_nh_list, next) { 2915 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); 2916 /* nh->rt6_info is used or freed at this point, reset to NULL*/ 2917 nh->rt6_info = NULL; 2918 if (err) { 2919 if (replace && nhn) 2920 ip6_print_replace_route_err(&rt6_nh_list); 2921 err_nh = nh; 2922 goto add_errout; 2923 } 2924 2925 /* Because each route is added like a single route we remove 2926 * these flags after the first nexthop: if there is a collision, 2927 * we have already failed to add the first nexthop: 2928 * fib6_add_rt2node() has rejected it; when replacing, old 2929 * nexthops have been replaced by first new, the rest should 2930 * be added to it. 2931 */ 2932 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 2933 NLM_F_REPLACE); 2934 nhn++; 2935 } 2936 2937 goto cleanup; 2938 2939 add_errout: 2940 /* Delete routes that were already added */ 2941 list_for_each_entry(nh, &rt6_nh_list, next) { 2942 if (err_nh == nh) 2943 break; 2944 ip6_route_del(&nh->r_cfg); 2945 } 2946 2947 cleanup: 2948 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { 2949 if (nh->rt6_info) 2950 dst_free(&nh->rt6_info->dst); 2951 kfree(nh->mxc.mx); 2952 list_del(&nh->next); 2953 kfree(nh); 2954 } 2955 2956 return err; 2957 } 2958 2959 static int ip6_route_multipath_del(struct fib6_config *cfg) 2960 { 2961 struct fib6_config r_cfg; 2962 struct rtnexthop *rtnh; 2963 int remaining; 2964 int attrlen; 2965 int err = 1, last_err = 0; 2966 2967 remaining = cfg->fc_mp_len; 2968 rtnh = (struct rtnexthop *)cfg->fc_mp; 2969 2970 /* Parse a Multipath Entry */ 2971 while (rtnh_ok(rtnh, remaining)) { 2972 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2973 if (rtnh->rtnh_ifindex) 2974 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 2975 2976 attrlen = rtnh_attrlen(rtnh); 2977 if (attrlen > 0) { 2978 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 2979 2980 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 2981 if (nla) { 2982 nla_memcpy(&r_cfg.fc_gateway, nla, 16); 2983 r_cfg.fc_flags |= RTF_GATEWAY; 2984 } 2985 } 2986 err = ip6_route_del(&r_cfg); 2987 if (err) 2988 last_err = err; 2989 2990 rtnh = rtnh_next(rtnh, &remaining); 2991 } 2992 2993 return last_err; 2994 } 2995 2996 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) 2997 { 2998 struct fib6_config cfg; 2999 int err; 3000 3001 err = rtm_to_fib6_config(skb, nlh, &cfg); 3002 if (err < 0) 3003 return err; 3004 3005 if (cfg.fc_mp) 3006 return ip6_route_multipath_del(&cfg); 3007 else 3008 return ip6_route_del(&cfg); 3009 } 3010 3011 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 3012 { 3013 struct fib6_config cfg; 3014 int err; 3015 3016 err = rtm_to_fib6_config(skb, nlh, &cfg); 3017 if (err < 0) 3018 return err; 3019 3020 if (cfg.fc_mp) 3021 return ip6_route_multipath_add(&cfg); 3022 else 3023 return ip6_route_add(&cfg); 3024 } 3025 3026 static inline size_t rt6_nlmsg_size(struct rt6_info *rt) 3027 { 3028 return NLMSG_ALIGN(sizeof(struct rtmsg)) 3029 + nla_total_size(16) /* RTA_SRC */ 3030 + nla_total_size(16) /* RTA_DST */ 3031 + nla_total_size(16) /* RTA_GATEWAY */ 3032 + nla_total_size(16) /* RTA_PREFSRC */ 3033 + nla_total_size(4) /* RTA_TABLE */ 3034 + nla_total_size(4) /* RTA_IIF */ 3035 + nla_total_size(4) /* RTA_OIF */ 3036 + nla_total_size(4) /* RTA_PRIORITY */ 3037 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 3038 + nla_total_size(sizeof(struct rta_cacheinfo)) 3039 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 3040 + nla_total_size(1) /* RTA_PREF */ 3041 + lwtunnel_get_encap_size(rt->dst.lwtstate); 3042 } 3043 3044 static int rt6_fill_node(struct net *net, 3045 struct sk_buff *skb, struct rt6_info *rt, 3046 struct in6_addr *dst, struct in6_addr *src, 3047 int iif, int type, u32 portid, u32 seq, 3048 int prefix, int nowait, unsigned int flags) 3049 { 3050 u32 metrics[RTAX_MAX]; 3051 struct rtmsg *rtm; 3052 struct nlmsghdr *nlh; 3053 long expires; 3054 u32 table; 3055 3056 if (prefix) { /* user wants prefix routes only */ 3057 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 3058 /* success since this is not a prefix route */ 3059 return 1; 3060 } 3061 } 3062 3063 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 3064 if (!nlh) 3065 return -EMSGSIZE; 3066 3067 rtm = nlmsg_data(nlh); 3068 rtm->rtm_family = AF_INET6; 3069 rtm->rtm_dst_len = rt->rt6i_dst.plen; 3070 rtm->rtm_src_len = rt->rt6i_src.plen; 3071 rtm->rtm_tos = 0; 3072 if (rt->rt6i_table) 3073 table = rt->rt6i_table->tb6_id; 3074 else 3075 table = RT6_TABLE_UNSPEC; 3076 rtm->rtm_table = table; 3077 if (nla_put_u32(skb, RTA_TABLE, table)) 3078 goto nla_put_failure; 3079 if (rt->rt6i_flags & RTF_REJECT) { 3080 switch (rt->dst.error) { 3081 case -EINVAL: 3082 rtm->rtm_type = RTN_BLACKHOLE; 3083 break; 3084 case -EACCES: 3085 rtm->rtm_type = RTN_PROHIBIT; 3086 break; 3087 case -EAGAIN: 3088 rtm->rtm_type = RTN_THROW; 3089 break; 3090 default: 3091 rtm->rtm_type = RTN_UNREACHABLE; 3092 break; 3093 } 3094 } 3095 else if (rt->rt6i_flags & RTF_LOCAL) 3096 rtm->rtm_type = RTN_LOCAL; 3097 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 3098 rtm->rtm_type = RTN_LOCAL; 3099 else 3100 rtm->rtm_type = RTN_UNICAST; 3101 rtm->rtm_flags = 0; 3102 if (!netif_carrier_ok(rt->dst.dev)) { 3103 rtm->rtm_flags |= RTNH_F_LINKDOWN; 3104 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) 3105 rtm->rtm_flags |= RTNH_F_DEAD; 3106 } 3107 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 3108 rtm->rtm_protocol = rt->rt6i_protocol; 3109 if (rt->rt6i_flags & RTF_DYNAMIC) 3110 rtm->rtm_protocol = RTPROT_REDIRECT; 3111 else if (rt->rt6i_flags & RTF_ADDRCONF) { 3112 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) 3113 rtm->rtm_protocol = RTPROT_RA; 3114 else 3115 rtm->rtm_protocol = RTPROT_KERNEL; 3116 } 3117 3118 if (rt->rt6i_flags & RTF_CACHE) 3119 rtm->rtm_flags |= RTM_F_CLONED; 3120 3121 if (dst) { 3122 if (nla_put_in6_addr(skb, RTA_DST, dst)) 3123 goto nla_put_failure; 3124 rtm->rtm_dst_len = 128; 3125 } else if (rtm->rtm_dst_len) 3126 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) 3127 goto nla_put_failure; 3128 #ifdef CONFIG_IPV6_SUBTREES 3129 if (src) { 3130 if (nla_put_in6_addr(skb, RTA_SRC, src)) 3131 goto nla_put_failure; 3132 rtm->rtm_src_len = 128; 3133 } else if (rtm->rtm_src_len && 3134 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) 3135 goto nla_put_failure; 3136 #endif 3137 if (iif) { 3138 #ifdef CONFIG_IPV6_MROUTE 3139 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 3140 int err = ip6mr_get_route(net, skb, rtm, nowait); 3141 if (err <= 0) { 3142 if (!nowait) { 3143 if (err == 0) 3144 return 0; 3145 goto nla_put_failure; 3146 } else { 3147 if (err == -EMSGSIZE) 3148 goto nla_put_failure; 3149 } 3150 } 3151 } else 3152 #endif 3153 if (nla_put_u32(skb, RTA_IIF, iif)) 3154 goto nla_put_failure; 3155 } else if (dst) { 3156 struct in6_addr saddr_buf; 3157 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 3158 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 3159 goto nla_put_failure; 3160 } 3161 3162 if (rt->rt6i_prefsrc.plen) { 3163 struct in6_addr saddr_buf; 3164 saddr_buf = rt->rt6i_prefsrc.addr; 3165 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 3166 goto nla_put_failure; 3167 } 3168 3169 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 3170 if (rt->rt6i_pmtu) 3171 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; 3172 if (rtnetlink_put_metrics(skb, metrics) < 0) 3173 goto nla_put_failure; 3174 3175 if (rt->rt6i_flags & RTF_GATEWAY) { 3176 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) 3177 goto nla_put_failure; 3178 } 3179 3180 if (rt->dst.dev && 3181 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 3182 goto nla_put_failure; 3183 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 3184 goto nla_put_failure; 3185 3186 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 3187 3188 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 3189 goto nla_put_failure; 3190 3191 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) 3192 goto nla_put_failure; 3193 3194 lwtunnel_fill_encap(skb, rt->dst.lwtstate); 3195 3196 nlmsg_end(skb, nlh); 3197 return 0; 3198 3199 nla_put_failure: 3200 nlmsg_cancel(skb, nlh); 3201 return -EMSGSIZE; 3202 } 3203 3204 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 3205 { 3206 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 3207 int prefix; 3208 3209 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 3210 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 3211 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 3212 } else 3213 prefix = 0; 3214 3215 return rt6_fill_node(arg->net, 3216 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 3217 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 3218 prefix, 0, NLM_F_MULTI); 3219 } 3220 3221 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) 3222 { 3223 struct net *net = sock_net(in_skb->sk); 3224 struct nlattr *tb[RTA_MAX+1]; 3225 struct rt6_info *rt; 3226 struct sk_buff *skb; 3227 struct rtmsg *rtm; 3228 struct flowi6 fl6; 3229 int err, iif = 0, oif = 0; 3230 3231 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 3232 if (err < 0) 3233 goto errout; 3234 3235 err = -EINVAL; 3236 memset(&fl6, 0, sizeof(fl6)); 3237 3238 if (tb[RTA_SRC]) { 3239 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 3240 goto errout; 3241 3242 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 3243 } 3244 3245 if (tb[RTA_DST]) { 3246 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 3247 goto errout; 3248 3249 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 3250 } 3251 3252 if (tb[RTA_IIF]) 3253 iif = nla_get_u32(tb[RTA_IIF]); 3254 3255 if (tb[RTA_OIF]) 3256 oif = nla_get_u32(tb[RTA_OIF]); 3257 3258 if (tb[RTA_MARK]) 3259 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 3260 3261 if (iif) { 3262 struct net_device *dev; 3263 int flags = 0; 3264 3265 dev = __dev_get_by_index(net, iif); 3266 if (!dev) { 3267 err = -ENODEV; 3268 goto errout; 3269 } 3270 3271 fl6.flowi6_iif = iif; 3272 3273 if (!ipv6_addr_any(&fl6.saddr)) 3274 flags |= RT6_LOOKUP_F_HAS_SADDR; 3275 3276 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, 3277 flags); 3278 } else { 3279 fl6.flowi6_oif = oif; 3280 3281 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 3282 } 3283 3284 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 3285 if (!skb) { 3286 ip6_rt_put(rt); 3287 err = -ENOBUFS; 3288 goto errout; 3289 } 3290 3291 /* Reserve room for dummy headers, this skb can pass 3292 through good chunk of routing engine. 3293 */ 3294 skb_reset_mac_header(skb); 3295 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 3296 3297 skb_dst_set(skb, &rt->dst); 3298 3299 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 3300 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 3301 nlh->nlmsg_seq, 0, 0, 0); 3302 if (err < 0) { 3303 kfree_skb(skb); 3304 goto errout; 3305 } 3306 3307 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 3308 errout: 3309 return err; 3310 } 3311 3312 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, 3313 unsigned int nlm_flags) 3314 { 3315 struct sk_buff *skb; 3316 struct net *net = info->nl_net; 3317 u32 seq; 3318 int err; 3319 3320 err = -ENOBUFS; 3321 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 3322 3323 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 3324 if (!skb) 3325 goto errout; 3326 3327 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 3328 event, info->portid, seq, 0, 0, nlm_flags); 3329 if (err < 0) { 3330 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 3331 WARN_ON(err == -EMSGSIZE); 3332 kfree_skb(skb); 3333 goto errout; 3334 } 3335 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 3336 info->nlh, gfp_any()); 3337 return; 3338 errout: 3339 if (err < 0) 3340 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 3341 } 3342 3343 static int ip6_route_dev_notify(struct notifier_block *this, 3344 unsigned long event, void *ptr) 3345 { 3346 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 3347 struct net *net = dev_net(dev); 3348 3349 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 3350 net->ipv6.ip6_null_entry->dst.dev = dev; 3351 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 3352 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3353 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 3354 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 3355 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 3356 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 3357 #endif 3358 } 3359 3360 return NOTIFY_OK; 3361 } 3362 3363 /* 3364 * /proc 3365 */ 3366 3367 #ifdef CONFIG_PROC_FS 3368 3369 static const struct file_operations ipv6_route_proc_fops = { 3370 .owner = THIS_MODULE, 3371 .open = ipv6_route_open, 3372 .read = seq_read, 3373 .llseek = seq_lseek, 3374 .release = seq_release_net, 3375 }; 3376 3377 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 3378 { 3379 struct net *net = (struct net *)seq->private; 3380 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 3381 net->ipv6.rt6_stats->fib_nodes, 3382 net->ipv6.rt6_stats->fib_route_nodes, 3383 net->ipv6.rt6_stats->fib_rt_alloc, 3384 net->ipv6.rt6_stats->fib_rt_entries, 3385 net->ipv6.rt6_stats->fib_rt_cache, 3386 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 3387 net->ipv6.rt6_stats->fib_discarded_routes); 3388 3389 return 0; 3390 } 3391 3392 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 3393 { 3394 return single_open_net(inode, file, rt6_stats_seq_show); 3395 } 3396 3397 static const struct file_operations rt6_stats_seq_fops = { 3398 .owner = THIS_MODULE, 3399 .open = rt6_stats_seq_open, 3400 .read = seq_read, 3401 .llseek = seq_lseek, 3402 .release = single_release_net, 3403 }; 3404 #endif /* CONFIG_PROC_FS */ 3405 3406 #ifdef CONFIG_SYSCTL 3407 3408 static 3409 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 3410 void __user *buffer, size_t *lenp, loff_t *ppos) 3411 { 3412 struct net *net; 3413 int delay; 3414 if (!write) 3415 return -EINVAL; 3416 3417 net = (struct net *)ctl->extra1; 3418 delay = net->ipv6.sysctl.flush_delay; 3419 proc_dointvec(ctl, write, buffer, lenp, ppos); 3420 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 3421 return 0; 3422 } 3423 3424 struct ctl_table ipv6_route_table_template[] = { 3425 { 3426 .procname = "flush", 3427 .data = &init_net.ipv6.sysctl.flush_delay, 3428 .maxlen = sizeof(int), 3429 .mode = 0200, 3430 .proc_handler = ipv6_sysctl_rtcache_flush 3431 }, 3432 { 3433 .procname = "gc_thresh", 3434 .data = &ip6_dst_ops_template.gc_thresh, 3435 .maxlen = sizeof(int), 3436 .mode = 0644, 3437 .proc_handler = proc_dointvec, 3438 }, 3439 { 3440 .procname = "max_size", 3441 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 3442 .maxlen = sizeof(int), 3443 .mode = 0644, 3444 .proc_handler = proc_dointvec, 3445 }, 3446 { 3447 .procname = "gc_min_interval", 3448 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3449 .maxlen = sizeof(int), 3450 .mode = 0644, 3451 .proc_handler = proc_dointvec_jiffies, 3452 }, 3453 { 3454 .procname = "gc_timeout", 3455 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 3456 .maxlen = sizeof(int), 3457 .mode = 0644, 3458 .proc_handler = proc_dointvec_jiffies, 3459 }, 3460 { 3461 .procname = "gc_interval", 3462 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 3463 .maxlen = sizeof(int), 3464 .mode = 0644, 3465 .proc_handler = proc_dointvec_jiffies, 3466 }, 3467 { 3468 .procname = "gc_elasticity", 3469 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 3470 .maxlen = sizeof(int), 3471 .mode = 0644, 3472 .proc_handler = proc_dointvec, 3473 }, 3474 { 3475 .procname = "mtu_expires", 3476 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 3477 .maxlen = sizeof(int), 3478 .mode = 0644, 3479 .proc_handler = proc_dointvec_jiffies, 3480 }, 3481 { 3482 .procname = "min_adv_mss", 3483 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 3484 .maxlen = sizeof(int), 3485 .mode = 0644, 3486 .proc_handler = proc_dointvec, 3487 }, 3488 { 3489 .procname = "gc_min_interval_ms", 3490 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3491 .maxlen = sizeof(int), 3492 .mode = 0644, 3493 .proc_handler = proc_dointvec_ms_jiffies, 3494 }, 3495 { } 3496 }; 3497 3498 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 3499 { 3500 struct ctl_table *table; 3501 3502 table = kmemdup(ipv6_route_table_template, 3503 sizeof(ipv6_route_table_template), 3504 GFP_KERNEL); 3505 3506 if (table) { 3507 table[0].data = &net->ipv6.sysctl.flush_delay; 3508 table[0].extra1 = net; 3509 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 3510 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 3511 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3512 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 3513 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 3514 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 3515 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 3516 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 3517 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3518 3519 /* Don't export sysctls to unprivileged users */ 3520 if (net->user_ns != &init_user_ns) 3521 table[0].procname = NULL; 3522 } 3523 3524 return table; 3525 } 3526 #endif 3527 3528 static int __net_init ip6_route_net_init(struct net *net) 3529 { 3530 int ret = -ENOMEM; 3531 3532 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 3533 sizeof(net->ipv6.ip6_dst_ops)); 3534 3535 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 3536 goto out_ip6_dst_ops; 3537 3538 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 3539 sizeof(*net->ipv6.ip6_null_entry), 3540 GFP_KERNEL); 3541 if (!net->ipv6.ip6_null_entry) 3542 goto out_ip6_dst_entries; 3543 net->ipv6.ip6_null_entry->dst.path = 3544 (struct dst_entry *)net->ipv6.ip6_null_entry; 3545 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3546 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 3547 ip6_template_metrics, true); 3548 3549 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3550 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 3551 sizeof(*net->ipv6.ip6_prohibit_entry), 3552 GFP_KERNEL); 3553 if (!net->ipv6.ip6_prohibit_entry) 3554 goto out_ip6_null_entry; 3555 net->ipv6.ip6_prohibit_entry->dst.path = 3556 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 3557 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3558 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 3559 ip6_template_metrics, true); 3560 3561 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 3562 sizeof(*net->ipv6.ip6_blk_hole_entry), 3563 GFP_KERNEL); 3564 if (!net->ipv6.ip6_blk_hole_entry) 3565 goto out_ip6_prohibit_entry; 3566 net->ipv6.ip6_blk_hole_entry->dst.path = 3567 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 3568 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3569 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 3570 ip6_template_metrics, true); 3571 #endif 3572 3573 net->ipv6.sysctl.flush_delay = 0; 3574 net->ipv6.sysctl.ip6_rt_max_size = 4096; 3575 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 3576 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 3577 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 3578 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 3579 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 3580 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 3581 3582 net->ipv6.ip6_rt_gc_expire = 30*HZ; 3583 3584 ret = 0; 3585 out: 3586 return ret; 3587 3588 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3589 out_ip6_prohibit_entry: 3590 kfree(net->ipv6.ip6_prohibit_entry); 3591 out_ip6_null_entry: 3592 kfree(net->ipv6.ip6_null_entry); 3593 #endif 3594 out_ip6_dst_entries: 3595 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3596 out_ip6_dst_ops: 3597 goto out; 3598 } 3599 3600 static void __net_exit ip6_route_net_exit(struct net *net) 3601 { 3602 kfree(net->ipv6.ip6_null_entry); 3603 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3604 kfree(net->ipv6.ip6_prohibit_entry); 3605 kfree(net->ipv6.ip6_blk_hole_entry); 3606 #endif 3607 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3608 } 3609 3610 static int __net_init ip6_route_net_init_late(struct net *net) 3611 { 3612 #ifdef CONFIG_PROC_FS 3613 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); 3614 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); 3615 #endif 3616 return 0; 3617 } 3618 3619 static void __net_exit ip6_route_net_exit_late(struct net *net) 3620 { 3621 #ifdef CONFIG_PROC_FS 3622 remove_proc_entry("ipv6_route", net->proc_net); 3623 remove_proc_entry("rt6_stats", net->proc_net); 3624 #endif 3625 } 3626 3627 static struct pernet_operations ip6_route_net_ops = { 3628 .init = ip6_route_net_init, 3629 .exit = ip6_route_net_exit, 3630 }; 3631 3632 static int __net_init ipv6_inetpeer_init(struct net *net) 3633 { 3634 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 3635 3636 if (!bp) 3637 return -ENOMEM; 3638 inet_peer_base_init(bp); 3639 net->ipv6.peers = bp; 3640 return 0; 3641 } 3642 3643 static void __net_exit ipv6_inetpeer_exit(struct net *net) 3644 { 3645 struct inet_peer_base *bp = net->ipv6.peers; 3646 3647 net->ipv6.peers = NULL; 3648 inetpeer_invalidate_tree(bp); 3649 kfree(bp); 3650 } 3651 3652 static struct pernet_operations ipv6_inetpeer_ops = { 3653 .init = ipv6_inetpeer_init, 3654 .exit = ipv6_inetpeer_exit, 3655 }; 3656 3657 static struct pernet_operations ip6_route_net_late_ops = { 3658 .init = ip6_route_net_init_late, 3659 .exit = ip6_route_net_exit_late, 3660 }; 3661 3662 static struct notifier_block ip6_route_dev_notifier = { 3663 .notifier_call = ip6_route_dev_notify, 3664 .priority = 0, 3665 }; 3666 3667 int __init ip6_route_init(void) 3668 { 3669 int ret; 3670 int cpu; 3671 3672 ret = -ENOMEM; 3673 ip6_dst_ops_template.kmem_cachep = 3674 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 3675 SLAB_HWCACHE_ALIGN, NULL); 3676 if (!ip6_dst_ops_template.kmem_cachep) 3677 goto out; 3678 3679 ret = dst_entries_init(&ip6_dst_blackhole_ops); 3680 if (ret) 3681 goto out_kmem_cache; 3682 3683 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 3684 if (ret) 3685 goto out_dst_entries; 3686 3687 ret = register_pernet_subsys(&ip6_route_net_ops); 3688 if (ret) 3689 goto out_register_inetpeer; 3690 3691 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3692 3693 /* Registering of the loopback is done before this portion of code, 3694 * the loopback reference in rt6_info will not be taken, do it 3695 * manually for init_net */ 3696 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 3697 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3698 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3699 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 3700 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3701 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 3702 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3703 #endif 3704 ret = fib6_init(); 3705 if (ret) 3706 goto out_register_subsys; 3707 3708 ret = xfrm6_init(); 3709 if (ret) 3710 goto out_fib6_init; 3711 3712 ret = fib6_rules_init(); 3713 if (ret) 3714 goto xfrm6_init; 3715 3716 ret = register_pernet_subsys(&ip6_route_net_late_ops); 3717 if (ret) 3718 goto fib6_rules_init; 3719 3720 ret = -ENOBUFS; 3721 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 3722 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 3723 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 3724 goto out_register_late_subsys; 3725 3726 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 3727 if (ret) 3728 goto out_register_late_subsys; 3729 3730 for_each_possible_cpu(cpu) { 3731 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 3732 3733 INIT_LIST_HEAD(&ul->head); 3734 spin_lock_init(&ul->lock); 3735 } 3736 3737 out: 3738 return ret; 3739 3740 out_register_late_subsys: 3741 unregister_pernet_subsys(&ip6_route_net_late_ops); 3742 fib6_rules_init: 3743 fib6_rules_cleanup(); 3744 xfrm6_init: 3745 xfrm6_fini(); 3746 out_fib6_init: 3747 fib6_gc_cleanup(); 3748 out_register_subsys: 3749 unregister_pernet_subsys(&ip6_route_net_ops); 3750 out_register_inetpeer: 3751 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3752 out_dst_entries: 3753 dst_entries_destroy(&ip6_dst_blackhole_ops); 3754 out_kmem_cache: 3755 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3756 goto out; 3757 } 3758 3759 void ip6_route_cleanup(void) 3760 { 3761 unregister_netdevice_notifier(&ip6_route_dev_notifier); 3762 unregister_pernet_subsys(&ip6_route_net_late_ops); 3763 fib6_rules_cleanup(); 3764 xfrm6_fini(); 3765 fib6_gc_cleanup(); 3766 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3767 unregister_pernet_subsys(&ip6_route_net_ops); 3768 dst_entries_destroy(&ip6_dst_blackhole_ops); 3769 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3770 } 3771