1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <net/net_namespace.h> 48 #include <net/snmp.h> 49 #include <net/ipv6.h> 50 #include <net/ip6_fib.h> 51 #include <net/ip6_route.h> 52 #include <net/ndisc.h> 53 #include <net/addrconf.h> 54 #include <net/tcp.h> 55 #include <linux/rtnetlink.h> 56 #include <net/dst.h> 57 #include <net/xfrm.h> 58 #include <net/netevent.h> 59 #include <net/netlink.h> 60 61 #include <asm/uaccess.h> 62 63 #ifdef CONFIG_SYSCTL 64 #include <linux/sysctl.h> 65 #endif 66 67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 68 const struct in6_addr *dest); 69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 70 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 71 static unsigned int ip6_mtu(const struct dst_entry *dst); 72 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 73 static void ip6_dst_destroy(struct dst_entry *); 74 static void ip6_dst_ifdown(struct dst_entry *, 75 struct net_device *dev, int how); 76 static int ip6_dst_gc(struct dst_ops *ops); 77 78 static int ip6_pkt_discard(struct sk_buff *skb); 79 static int ip6_pkt_discard_out(struct sk_buff *skb); 80 static void ip6_link_failure(struct sk_buff *skb); 81 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 82 struct sk_buff *skb, u32 mtu); 83 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 84 struct sk_buff *skb); 85 86 #ifdef CONFIG_IPV6_ROUTE_INFO 87 static struct rt6_info *rt6_add_route_info(struct net *net, 88 const struct in6_addr *prefix, int prefixlen, 89 const struct in6_addr *gwaddr, int ifindex, 90 unsigned int pref); 91 static struct rt6_info *rt6_get_route_info(struct net *net, 92 const struct in6_addr *prefix, int prefixlen, 93 const struct in6_addr *gwaddr, int ifindex); 94 #endif 95 96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 97 { 98 struct rt6_info *rt = (struct rt6_info *) dst; 99 struct inet_peer *peer; 100 u32 *p = NULL; 101 102 if (!(rt->dst.flags & DST_HOST)) 103 return NULL; 104 105 peer = rt6_get_peer_create(rt); 106 if (peer) { 107 u32 *old_p = __DST_METRICS_PTR(old); 108 unsigned long prev, new; 109 110 p = peer->metrics; 111 if (inet_metrics_new(peer)) 112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 113 114 new = (unsigned long) p; 115 prev = cmpxchg(&dst->_metrics, old, new); 116 117 if (prev != old) { 118 p = __DST_METRICS_PTR(prev); 119 if (prev & DST_METRICS_READ_ONLY) 120 p = NULL; 121 } 122 } 123 return p; 124 } 125 126 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 127 struct sk_buff *skb, 128 const void *daddr) 129 { 130 struct in6_addr *p = &rt->rt6i_gateway; 131 132 if (!ipv6_addr_any(p)) 133 return (const void *) p; 134 else if (skb) 135 return &ipv6_hdr(skb)->daddr; 136 return daddr; 137 } 138 139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 140 struct sk_buff *skb, 141 const void *daddr) 142 { 143 struct rt6_info *rt = (struct rt6_info *) dst; 144 struct neighbour *n; 145 146 daddr = choose_neigh_daddr(rt, skb, daddr); 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); 148 if (n) 149 return n; 150 return neigh_create(&nd_tbl, daddr, dst->dev); 151 } 152 153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) 154 { 155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway); 156 if (!n) { 157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev); 158 if (IS_ERR(n)) 159 return PTR_ERR(n); 160 } 161 rt->n = n; 162 163 return 0; 164 } 165 166 static struct dst_ops ip6_dst_ops_template = { 167 .family = AF_INET6, 168 .protocol = cpu_to_be16(ETH_P_IPV6), 169 .gc = ip6_dst_gc, 170 .gc_thresh = 1024, 171 .check = ip6_dst_check, 172 .default_advmss = ip6_default_advmss, 173 .mtu = ip6_mtu, 174 .cow_metrics = ipv6_cow_metrics, 175 .destroy = ip6_dst_destroy, 176 .ifdown = ip6_dst_ifdown, 177 .negative_advice = ip6_negative_advice, 178 .link_failure = ip6_link_failure, 179 .update_pmtu = ip6_rt_update_pmtu, 180 .redirect = rt6_do_redirect, 181 .local_out = __ip6_local_out, 182 .neigh_lookup = ip6_neigh_lookup, 183 }; 184 185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 186 { 187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 188 189 return mtu ? : dst->dev->mtu; 190 } 191 192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 193 struct sk_buff *skb, u32 mtu) 194 { 195 } 196 197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 198 struct sk_buff *skb) 199 { 200 } 201 202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 203 unsigned long old) 204 { 205 return NULL; 206 } 207 208 static struct dst_ops ip6_dst_blackhole_ops = { 209 .family = AF_INET6, 210 .protocol = cpu_to_be16(ETH_P_IPV6), 211 .destroy = ip6_dst_destroy, 212 .check = ip6_dst_check, 213 .mtu = ip6_blackhole_mtu, 214 .default_advmss = ip6_default_advmss, 215 .update_pmtu = ip6_rt_blackhole_update_pmtu, 216 .redirect = ip6_rt_blackhole_redirect, 217 .cow_metrics = ip6_rt_blackhole_cow_metrics, 218 .neigh_lookup = ip6_neigh_lookup, 219 }; 220 221 static const u32 ip6_template_metrics[RTAX_MAX] = { 222 [RTAX_HOPLIMIT - 1] = 255, 223 }; 224 225 static struct rt6_info ip6_null_entry_template = { 226 .dst = { 227 .__refcnt = ATOMIC_INIT(1), 228 .__use = 1, 229 .obsolete = DST_OBSOLETE_FORCE_CHK, 230 .error = -ENETUNREACH, 231 .input = ip6_pkt_discard, 232 .output = ip6_pkt_discard_out, 233 }, 234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 235 .rt6i_protocol = RTPROT_KERNEL, 236 .rt6i_metric = ~(u32) 0, 237 .rt6i_ref = ATOMIC_INIT(1), 238 }; 239 240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 241 242 static int ip6_pkt_prohibit(struct sk_buff *skb); 243 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 244 245 static struct rt6_info ip6_prohibit_entry_template = { 246 .dst = { 247 .__refcnt = ATOMIC_INIT(1), 248 .__use = 1, 249 .obsolete = DST_OBSOLETE_FORCE_CHK, 250 .error = -EACCES, 251 .input = ip6_pkt_prohibit, 252 .output = ip6_pkt_prohibit_out, 253 }, 254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 255 .rt6i_protocol = RTPROT_KERNEL, 256 .rt6i_metric = ~(u32) 0, 257 .rt6i_ref = ATOMIC_INIT(1), 258 }; 259 260 static struct rt6_info ip6_blk_hole_entry_template = { 261 .dst = { 262 .__refcnt = ATOMIC_INIT(1), 263 .__use = 1, 264 .obsolete = DST_OBSOLETE_FORCE_CHK, 265 .error = -EINVAL, 266 .input = dst_discard, 267 .output = dst_discard, 268 }, 269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 270 .rt6i_protocol = RTPROT_KERNEL, 271 .rt6i_metric = ~(u32) 0, 272 .rt6i_ref = ATOMIC_INIT(1), 273 }; 274 275 #endif 276 277 /* allocate dst with ip6_dst_ops */ 278 static inline struct rt6_info *ip6_dst_alloc(struct net *net, 279 struct net_device *dev, 280 int flags, 281 struct fib6_table *table) 282 { 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 284 0, DST_OBSOLETE_FORCE_CHK, flags); 285 286 if (rt) { 287 struct dst_entry *dst = &rt->dst; 288 289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 291 rt->rt6i_genid = rt_genid(net); 292 } 293 return rt; 294 } 295 296 static void ip6_dst_destroy(struct dst_entry *dst) 297 { 298 struct rt6_info *rt = (struct rt6_info *)dst; 299 struct inet6_dev *idev = rt->rt6i_idev; 300 301 if (rt->n) 302 neigh_release(rt->n); 303 304 if (!(rt->dst.flags & DST_HOST)) 305 dst_destroy_metrics_generic(dst); 306 307 if (idev) { 308 rt->rt6i_idev = NULL; 309 in6_dev_put(idev); 310 } 311 312 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) 313 dst_release(dst->from); 314 315 if (rt6_has_peer(rt)) { 316 struct inet_peer *peer = rt6_peer_ptr(rt); 317 inet_putpeer(peer); 318 } 319 } 320 321 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); 322 323 static u32 rt6_peer_genid(void) 324 { 325 return atomic_read(&__rt6_peer_genid); 326 } 327 328 void rt6_bind_peer(struct rt6_info *rt, int create) 329 { 330 struct inet_peer_base *base; 331 struct inet_peer *peer; 332 333 base = inetpeer_base_ptr(rt->_rt6i_peer); 334 if (!base) 335 return; 336 337 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); 338 if (peer) { 339 if (!rt6_set_peer(rt, peer)) 340 inet_putpeer(peer); 341 else 342 rt->rt6i_peer_genid = rt6_peer_genid(); 343 } 344 } 345 346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 347 int how) 348 { 349 struct rt6_info *rt = (struct rt6_info *)dst; 350 struct inet6_dev *idev = rt->rt6i_idev; 351 struct net_device *loopback_dev = 352 dev_net(dev)->loopback_dev; 353 354 if (dev != loopback_dev) { 355 if (idev && idev->dev == dev) { 356 struct inet6_dev *loopback_idev = 357 in6_dev_get(loopback_dev); 358 if (loopback_idev) { 359 rt->rt6i_idev = loopback_idev; 360 in6_dev_put(idev); 361 } 362 } 363 if (rt->n && rt->n->dev == dev) { 364 rt->n->dev = loopback_dev; 365 dev_hold(loopback_dev); 366 dev_put(dev); 367 } 368 } 369 } 370 371 static bool rt6_check_expired(const struct rt6_info *rt) 372 { 373 struct rt6_info *ort = NULL; 374 375 if (rt->rt6i_flags & RTF_EXPIRES) { 376 if (time_after(jiffies, rt->dst.expires)) 377 return true; 378 } else if (rt->dst.from) { 379 ort = (struct rt6_info *) rt->dst.from; 380 return (ort->rt6i_flags & RTF_EXPIRES) && 381 time_after(jiffies, ort->dst.expires); 382 } 383 return false; 384 } 385 386 static bool rt6_need_strict(const struct in6_addr *daddr) 387 { 388 return ipv6_addr_type(daddr) & 389 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 390 } 391 392 /* 393 * Route lookup. Any table->tb6_lock is implied. 394 */ 395 396 static inline struct rt6_info *rt6_device_match(struct net *net, 397 struct rt6_info *rt, 398 const struct in6_addr *saddr, 399 int oif, 400 int flags) 401 { 402 struct rt6_info *local = NULL; 403 struct rt6_info *sprt; 404 405 if (!oif && ipv6_addr_any(saddr)) 406 goto out; 407 408 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 409 struct net_device *dev = sprt->dst.dev; 410 411 if (oif) { 412 if (dev->ifindex == oif) 413 return sprt; 414 if (dev->flags & IFF_LOOPBACK) { 415 if (!sprt->rt6i_idev || 416 sprt->rt6i_idev->dev->ifindex != oif) { 417 if (flags & RT6_LOOKUP_F_IFACE && oif) 418 continue; 419 if (local && (!oif || 420 local->rt6i_idev->dev->ifindex == oif)) 421 continue; 422 } 423 local = sprt; 424 } 425 } else { 426 if (ipv6_chk_addr(net, saddr, dev, 427 flags & RT6_LOOKUP_F_IFACE)) 428 return sprt; 429 } 430 } 431 432 if (oif) { 433 if (local) 434 return local; 435 436 if (flags & RT6_LOOKUP_F_IFACE) 437 return net->ipv6.ip6_null_entry; 438 } 439 out: 440 return rt; 441 } 442 443 #ifdef CONFIG_IPV6_ROUTER_PREF 444 static void rt6_probe(struct rt6_info *rt) 445 { 446 struct neighbour *neigh; 447 /* 448 * Okay, this does not seem to be appropriate 449 * for now, however, we need to check if it 450 * is really so; aka Router Reachability Probing. 451 * 452 * Router Reachability Probe MUST be rate-limited 453 * to no more than one per minute. 454 */ 455 rcu_read_lock(); 456 neigh = rt ? rt->n : NULL; 457 if (!neigh || (neigh->nud_state & NUD_VALID)) 458 goto out; 459 read_lock_bh(&neigh->lock); 460 if (!(neigh->nud_state & NUD_VALID) && 461 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 462 struct in6_addr mcaddr; 463 struct in6_addr *target; 464 465 neigh->updated = jiffies; 466 read_unlock_bh(&neigh->lock); 467 468 target = (struct in6_addr *)&neigh->primary_key; 469 addrconf_addr_solict_mult(target, &mcaddr); 470 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); 471 } else { 472 read_unlock_bh(&neigh->lock); 473 } 474 out: 475 rcu_read_unlock(); 476 } 477 #else 478 static inline void rt6_probe(struct rt6_info *rt) 479 { 480 } 481 #endif 482 483 /* 484 * Default Router Selection (RFC 2461 6.3.6) 485 */ 486 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 487 { 488 struct net_device *dev = rt->dst.dev; 489 if (!oif || dev->ifindex == oif) 490 return 2; 491 if ((dev->flags & IFF_LOOPBACK) && 492 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 493 return 1; 494 return 0; 495 } 496 497 static inline int rt6_check_neigh(struct rt6_info *rt) 498 { 499 struct neighbour *neigh; 500 int m; 501 502 rcu_read_lock(); 503 neigh = rt->n; 504 if (rt->rt6i_flags & RTF_NONEXTHOP || 505 !(rt->rt6i_flags & RTF_GATEWAY)) 506 m = 1; 507 else if (neigh) { 508 read_lock_bh(&neigh->lock); 509 if (neigh->nud_state & NUD_VALID) 510 m = 2; 511 #ifdef CONFIG_IPV6_ROUTER_PREF 512 else if (neigh->nud_state & NUD_FAILED) 513 m = 0; 514 #endif 515 else 516 m = 1; 517 read_unlock_bh(&neigh->lock); 518 } else 519 m = 0; 520 rcu_read_unlock(); 521 return m; 522 } 523 524 static int rt6_score_route(struct rt6_info *rt, int oif, 525 int strict) 526 { 527 int m, n; 528 529 m = rt6_check_dev(rt, oif); 530 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 531 return -1; 532 #ifdef CONFIG_IPV6_ROUTER_PREF 533 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 534 #endif 535 n = rt6_check_neigh(rt); 536 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 537 return -1; 538 return m; 539 } 540 541 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 542 int *mpri, struct rt6_info *match) 543 { 544 int m; 545 546 if (rt6_check_expired(rt)) 547 goto out; 548 549 m = rt6_score_route(rt, oif, strict); 550 if (m < 0) 551 goto out; 552 553 if (m > *mpri) { 554 if (strict & RT6_LOOKUP_F_REACHABLE) 555 rt6_probe(match); 556 *mpri = m; 557 match = rt; 558 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 559 rt6_probe(rt); 560 } 561 562 out: 563 return match; 564 } 565 566 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 567 struct rt6_info *rr_head, 568 u32 metric, int oif, int strict) 569 { 570 struct rt6_info *rt, *match; 571 int mpri = -1; 572 573 match = NULL; 574 for (rt = rr_head; rt && rt->rt6i_metric == metric; 575 rt = rt->dst.rt6_next) 576 match = find_match(rt, oif, strict, &mpri, match); 577 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 578 rt = rt->dst.rt6_next) 579 match = find_match(rt, oif, strict, &mpri, match); 580 581 return match; 582 } 583 584 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 585 { 586 struct rt6_info *match, *rt0; 587 struct net *net; 588 589 rt0 = fn->rr_ptr; 590 if (!rt0) 591 fn->rr_ptr = rt0 = fn->leaf; 592 593 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 594 595 if (!match && 596 (strict & RT6_LOOKUP_F_REACHABLE)) { 597 struct rt6_info *next = rt0->dst.rt6_next; 598 599 /* no entries matched; do round-robin */ 600 if (!next || next->rt6i_metric != rt0->rt6i_metric) 601 next = fn->leaf; 602 603 if (next != rt0) 604 fn->rr_ptr = next; 605 } 606 607 net = dev_net(rt0->dst.dev); 608 return match ? match : net->ipv6.ip6_null_entry; 609 } 610 611 #ifdef CONFIG_IPV6_ROUTE_INFO 612 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 613 const struct in6_addr *gwaddr) 614 { 615 struct net *net = dev_net(dev); 616 struct route_info *rinfo = (struct route_info *) opt; 617 struct in6_addr prefix_buf, *prefix; 618 unsigned int pref; 619 unsigned long lifetime; 620 struct rt6_info *rt; 621 622 if (len < sizeof(struct route_info)) { 623 return -EINVAL; 624 } 625 626 /* Sanity check for prefix_len and length */ 627 if (rinfo->length > 3) { 628 return -EINVAL; 629 } else if (rinfo->prefix_len > 128) { 630 return -EINVAL; 631 } else if (rinfo->prefix_len > 64) { 632 if (rinfo->length < 2) { 633 return -EINVAL; 634 } 635 } else if (rinfo->prefix_len > 0) { 636 if (rinfo->length < 1) { 637 return -EINVAL; 638 } 639 } 640 641 pref = rinfo->route_pref; 642 if (pref == ICMPV6_ROUTER_PREF_INVALID) 643 return -EINVAL; 644 645 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 646 647 if (rinfo->length == 3) 648 prefix = (struct in6_addr *)rinfo->prefix; 649 else { 650 /* this function is safe */ 651 ipv6_addr_prefix(&prefix_buf, 652 (struct in6_addr *)rinfo->prefix, 653 rinfo->prefix_len); 654 prefix = &prefix_buf; 655 } 656 657 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 658 dev->ifindex); 659 660 if (rt && !lifetime) { 661 ip6_del_rt(rt); 662 rt = NULL; 663 } 664 665 if (!rt && lifetime) 666 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 667 pref); 668 else if (rt) 669 rt->rt6i_flags = RTF_ROUTEINFO | 670 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 671 672 if (rt) { 673 if (!addrconf_finite_timeout(lifetime)) 674 rt6_clean_expires(rt); 675 else 676 rt6_set_expires(rt, jiffies + HZ * lifetime); 677 678 dst_release(&rt->dst); 679 } 680 return 0; 681 } 682 #endif 683 684 #define BACKTRACK(__net, saddr) \ 685 do { \ 686 if (rt == __net->ipv6.ip6_null_entry) { \ 687 struct fib6_node *pn; \ 688 while (1) { \ 689 if (fn->fn_flags & RTN_TL_ROOT) \ 690 goto out; \ 691 pn = fn->parent; \ 692 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 693 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 694 else \ 695 fn = pn; \ 696 if (fn->fn_flags & RTN_RTINFO) \ 697 goto restart; \ 698 } \ 699 } \ 700 } while (0) 701 702 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 703 struct fib6_table *table, 704 struct flowi6 *fl6, int flags) 705 { 706 struct fib6_node *fn; 707 struct rt6_info *rt; 708 709 read_lock_bh(&table->tb6_lock); 710 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 711 restart: 712 rt = fn->leaf; 713 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 714 BACKTRACK(net, &fl6->saddr); 715 out: 716 dst_use(&rt->dst, jiffies); 717 read_unlock_bh(&table->tb6_lock); 718 return rt; 719 720 } 721 722 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6, 723 int flags) 724 { 725 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 726 } 727 EXPORT_SYMBOL_GPL(ip6_route_lookup); 728 729 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 730 const struct in6_addr *saddr, int oif, int strict) 731 { 732 struct flowi6 fl6 = { 733 .flowi6_oif = oif, 734 .daddr = *daddr, 735 }; 736 struct dst_entry *dst; 737 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 738 739 if (saddr) { 740 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 741 flags |= RT6_LOOKUP_F_HAS_SADDR; 742 } 743 744 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 745 if (dst->error == 0) 746 return (struct rt6_info *) dst; 747 748 dst_release(dst); 749 750 return NULL; 751 } 752 753 EXPORT_SYMBOL(rt6_lookup); 754 755 /* ip6_ins_rt is called with FREE table->tb6_lock. 756 It takes new route entry, the addition fails by any reason the 757 route is freed. In any case, if caller does not hold it, it may 758 be destroyed. 759 */ 760 761 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 762 { 763 int err; 764 struct fib6_table *table; 765 766 table = rt->rt6i_table; 767 write_lock_bh(&table->tb6_lock); 768 err = fib6_add(&table->tb6_root, rt, info); 769 write_unlock_bh(&table->tb6_lock); 770 771 return err; 772 } 773 774 int ip6_ins_rt(struct rt6_info *rt) 775 { 776 struct nl_info info = { 777 .nl_net = dev_net(rt->dst.dev), 778 }; 779 return __ip6_ins_rt(rt, &info); 780 } 781 782 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, 783 const struct in6_addr *daddr, 784 const struct in6_addr *saddr) 785 { 786 struct rt6_info *rt; 787 788 /* 789 * Clone the route. 790 */ 791 792 rt = ip6_rt_copy(ort, daddr); 793 794 if (rt) { 795 int attempts = !in_softirq(); 796 797 if (!(rt->rt6i_flags & RTF_GATEWAY)) { 798 if (ort->rt6i_dst.plen != 128 && 799 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 800 rt->rt6i_flags |= RTF_ANYCAST; 801 rt->rt6i_gateway = *daddr; 802 } 803 804 rt->rt6i_flags |= RTF_CACHE; 805 806 #ifdef CONFIG_IPV6_SUBTREES 807 if (rt->rt6i_src.plen && saddr) { 808 rt->rt6i_src.addr = *saddr; 809 rt->rt6i_src.plen = 128; 810 } 811 #endif 812 813 retry: 814 if (rt6_bind_neighbour(rt, rt->dst.dev)) { 815 struct net *net = dev_net(rt->dst.dev); 816 int saved_rt_min_interval = 817 net->ipv6.sysctl.ip6_rt_gc_min_interval; 818 int saved_rt_elasticity = 819 net->ipv6.sysctl.ip6_rt_gc_elasticity; 820 821 if (attempts-- > 0) { 822 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 823 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 824 825 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 826 827 net->ipv6.sysctl.ip6_rt_gc_elasticity = 828 saved_rt_elasticity; 829 net->ipv6.sysctl.ip6_rt_gc_min_interval = 830 saved_rt_min_interval; 831 goto retry; 832 } 833 834 net_warn_ratelimited("Neighbour table overflow\n"); 835 dst_free(&rt->dst); 836 return NULL; 837 } 838 } 839 840 return rt; 841 } 842 843 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, 844 const struct in6_addr *daddr) 845 { 846 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 847 848 if (rt) { 849 rt->rt6i_flags |= RTF_CACHE; 850 rt->n = neigh_clone(ort->n); 851 } 852 return rt; 853 } 854 855 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 856 struct flowi6 *fl6, int flags) 857 { 858 struct fib6_node *fn; 859 struct rt6_info *rt, *nrt; 860 int strict = 0; 861 int attempts = 3; 862 int err; 863 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 864 865 strict |= flags & RT6_LOOKUP_F_IFACE; 866 867 relookup: 868 read_lock_bh(&table->tb6_lock); 869 870 restart_2: 871 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 872 873 restart: 874 rt = rt6_select(fn, oif, strict | reachable); 875 876 BACKTRACK(net, &fl6->saddr); 877 if (rt == net->ipv6.ip6_null_entry || 878 rt->rt6i_flags & RTF_CACHE) 879 goto out; 880 881 dst_hold(&rt->dst); 882 read_unlock_bh(&table->tb6_lock); 883 884 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) 885 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 886 else if (!(rt->dst.flags & DST_HOST)) 887 nrt = rt6_alloc_clone(rt, &fl6->daddr); 888 else 889 goto out2; 890 891 dst_release(&rt->dst); 892 rt = nrt ? : net->ipv6.ip6_null_entry; 893 894 dst_hold(&rt->dst); 895 if (nrt) { 896 err = ip6_ins_rt(nrt); 897 if (!err) 898 goto out2; 899 } 900 901 if (--attempts <= 0) 902 goto out2; 903 904 /* 905 * Race condition! In the gap, when table->tb6_lock was 906 * released someone could insert this route. Relookup. 907 */ 908 dst_release(&rt->dst); 909 goto relookup; 910 911 out: 912 if (reachable) { 913 reachable = 0; 914 goto restart_2; 915 } 916 dst_hold(&rt->dst); 917 read_unlock_bh(&table->tb6_lock); 918 out2: 919 rt->dst.lastuse = jiffies; 920 rt->dst.__use++; 921 922 return rt; 923 } 924 925 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 926 struct flowi6 *fl6, int flags) 927 { 928 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 929 } 930 931 static struct dst_entry *ip6_route_input_lookup(struct net *net, 932 struct net_device *dev, 933 struct flowi6 *fl6, int flags) 934 { 935 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 936 flags |= RT6_LOOKUP_F_IFACE; 937 938 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 939 } 940 941 void ip6_route_input(struct sk_buff *skb) 942 { 943 const struct ipv6hdr *iph = ipv6_hdr(skb); 944 struct net *net = dev_net(skb->dev); 945 int flags = RT6_LOOKUP_F_HAS_SADDR; 946 struct flowi6 fl6 = { 947 .flowi6_iif = skb->dev->ifindex, 948 .daddr = iph->daddr, 949 .saddr = iph->saddr, 950 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, 951 .flowi6_mark = skb->mark, 952 .flowi6_proto = iph->nexthdr, 953 }; 954 955 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 956 } 957 958 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 959 struct flowi6 *fl6, int flags) 960 { 961 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 962 } 963 964 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, 965 struct flowi6 *fl6) 966 { 967 int flags = 0; 968 969 fl6->flowi6_iif = net->loopback_dev->ifindex; 970 971 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 972 flags |= RT6_LOOKUP_F_IFACE; 973 974 if (!ipv6_addr_any(&fl6->saddr)) 975 flags |= RT6_LOOKUP_F_HAS_SADDR; 976 else if (sk) 977 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 978 979 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 980 } 981 982 EXPORT_SYMBOL(ip6_route_output); 983 984 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 985 { 986 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 987 struct dst_entry *new = NULL; 988 989 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 990 if (rt) { 991 new = &rt->dst; 992 993 memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); 994 rt6_init_peer(rt, net->ipv6.peers); 995 996 new->__use = 1; 997 new->input = dst_discard; 998 new->output = dst_discard; 999 1000 if (dst_metrics_read_only(&ort->dst)) 1001 new->_metrics = ort->dst._metrics; 1002 else 1003 dst_copy_metrics(new, &ort->dst); 1004 rt->rt6i_idev = ort->rt6i_idev; 1005 if (rt->rt6i_idev) 1006 in6_dev_hold(rt->rt6i_idev); 1007 1008 rt->rt6i_gateway = ort->rt6i_gateway; 1009 rt->rt6i_flags = ort->rt6i_flags; 1010 rt6_clean_expires(rt); 1011 rt->rt6i_metric = 0; 1012 1013 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1014 #ifdef CONFIG_IPV6_SUBTREES 1015 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1016 #endif 1017 1018 dst_free(new); 1019 } 1020 1021 dst_release(dst_orig); 1022 return new ? new : ERR_PTR(-ENOMEM); 1023 } 1024 1025 /* 1026 * Destination cache support functions 1027 */ 1028 1029 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1030 { 1031 struct rt6_info *rt; 1032 1033 rt = (struct rt6_info *) dst; 1034 1035 /* All IPV6 dsts are created with ->obsolete set to the value 1036 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1037 * into this function always. 1038 */ 1039 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) 1040 return NULL; 1041 1042 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 1043 if (rt->rt6i_peer_genid != rt6_peer_genid()) { 1044 if (!rt6_has_peer(rt)) 1045 rt6_bind_peer(rt, 0); 1046 rt->rt6i_peer_genid = rt6_peer_genid(); 1047 } 1048 return dst; 1049 } 1050 return NULL; 1051 } 1052 1053 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1054 { 1055 struct rt6_info *rt = (struct rt6_info *) dst; 1056 1057 if (rt) { 1058 if (rt->rt6i_flags & RTF_CACHE) { 1059 if (rt6_check_expired(rt)) { 1060 ip6_del_rt(rt); 1061 dst = NULL; 1062 } 1063 } else { 1064 dst_release(dst); 1065 dst = NULL; 1066 } 1067 } 1068 return dst; 1069 } 1070 1071 static void ip6_link_failure(struct sk_buff *skb) 1072 { 1073 struct rt6_info *rt; 1074 1075 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1076 1077 rt = (struct rt6_info *) skb_dst(skb); 1078 if (rt) { 1079 if (rt->rt6i_flags & RTF_CACHE) 1080 rt6_update_expires(rt, 0); 1081 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 1082 rt->rt6i_node->fn_sernum = -1; 1083 } 1084 } 1085 1086 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1087 struct sk_buff *skb, u32 mtu) 1088 { 1089 struct rt6_info *rt6 = (struct rt6_info*)dst; 1090 1091 dst_confirm(dst); 1092 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1093 struct net *net = dev_net(dst->dev); 1094 1095 rt6->rt6i_flags |= RTF_MODIFIED; 1096 if (mtu < IPV6_MIN_MTU) { 1097 u32 features = dst_metric(dst, RTAX_FEATURES); 1098 mtu = IPV6_MIN_MTU; 1099 features |= RTAX_FEATURE_ALLFRAG; 1100 dst_metric_set(dst, RTAX_FEATURES, features); 1101 } 1102 dst_metric_set(dst, RTAX_MTU, mtu); 1103 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); 1104 } 1105 } 1106 1107 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1108 int oif, u32 mark) 1109 { 1110 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1111 struct dst_entry *dst; 1112 struct flowi6 fl6; 1113 1114 memset(&fl6, 0, sizeof(fl6)); 1115 fl6.flowi6_oif = oif; 1116 fl6.flowi6_mark = mark; 1117 fl6.flowi6_flags = 0; 1118 fl6.daddr = iph->daddr; 1119 fl6.saddr = iph->saddr; 1120 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; 1121 1122 dst = ip6_route_output(net, NULL, &fl6); 1123 if (!dst->error) 1124 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); 1125 dst_release(dst); 1126 } 1127 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1128 1129 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1130 { 1131 ip6_update_pmtu(skb, sock_net(sk), mtu, 1132 sk->sk_bound_dev_if, sk->sk_mark); 1133 } 1134 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1135 1136 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1137 { 1138 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1139 struct dst_entry *dst; 1140 struct flowi6 fl6; 1141 1142 memset(&fl6, 0, sizeof(fl6)); 1143 fl6.flowi6_oif = oif; 1144 fl6.flowi6_mark = mark; 1145 fl6.flowi6_flags = 0; 1146 fl6.daddr = iph->daddr; 1147 fl6.saddr = iph->saddr; 1148 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; 1149 1150 dst = ip6_route_output(net, NULL, &fl6); 1151 if (!dst->error) 1152 rt6_do_redirect(dst, NULL, skb); 1153 dst_release(dst); 1154 } 1155 EXPORT_SYMBOL_GPL(ip6_redirect); 1156 1157 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1158 { 1159 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1160 } 1161 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1162 1163 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1164 { 1165 struct net_device *dev = dst->dev; 1166 unsigned int mtu = dst_mtu(dst); 1167 struct net *net = dev_net(dev); 1168 1169 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1170 1171 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1172 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1173 1174 /* 1175 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1176 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1177 * IPV6_MAXPLEN is also valid and means: "any MSS, 1178 * rely only on pmtu discovery" 1179 */ 1180 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1181 mtu = IPV6_MAXPLEN; 1182 return mtu; 1183 } 1184 1185 static unsigned int ip6_mtu(const struct dst_entry *dst) 1186 { 1187 struct inet6_dev *idev; 1188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 1189 1190 if (mtu) 1191 return mtu; 1192 1193 mtu = IPV6_MIN_MTU; 1194 1195 rcu_read_lock(); 1196 idev = __in6_dev_get(dst->dev); 1197 if (idev) 1198 mtu = idev->cnf.mtu6; 1199 rcu_read_unlock(); 1200 1201 return mtu; 1202 } 1203 1204 static struct dst_entry *icmp6_dst_gc_list; 1205 static DEFINE_SPINLOCK(icmp6_dst_lock); 1206 1207 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1208 struct neighbour *neigh, 1209 struct flowi6 *fl6) 1210 { 1211 struct dst_entry *dst; 1212 struct rt6_info *rt; 1213 struct inet6_dev *idev = in6_dev_get(dev); 1214 struct net *net = dev_net(dev); 1215 1216 if (unlikely(!idev)) 1217 return ERR_PTR(-ENODEV); 1218 1219 rt = ip6_dst_alloc(net, dev, 0, NULL); 1220 if (unlikely(!rt)) { 1221 in6_dev_put(idev); 1222 dst = ERR_PTR(-ENOMEM); 1223 goto out; 1224 } 1225 1226 if (neigh) 1227 neigh_hold(neigh); 1228 else { 1229 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); 1230 if (IS_ERR(neigh)) { 1231 in6_dev_put(idev); 1232 dst_free(&rt->dst); 1233 return ERR_CAST(neigh); 1234 } 1235 } 1236 1237 rt->dst.flags |= DST_HOST; 1238 rt->dst.output = ip6_output; 1239 rt->n = neigh; 1240 atomic_set(&rt->dst.__refcnt, 1); 1241 rt->rt6i_dst.addr = fl6->daddr; 1242 rt->rt6i_dst.plen = 128; 1243 rt->rt6i_idev = idev; 1244 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); 1245 1246 spin_lock_bh(&icmp6_dst_lock); 1247 rt->dst.next = icmp6_dst_gc_list; 1248 icmp6_dst_gc_list = &rt->dst; 1249 spin_unlock_bh(&icmp6_dst_lock); 1250 1251 fib6_force_start_gc(net); 1252 1253 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1254 1255 out: 1256 return dst; 1257 } 1258 1259 int icmp6_dst_gc(void) 1260 { 1261 struct dst_entry *dst, **pprev; 1262 int more = 0; 1263 1264 spin_lock_bh(&icmp6_dst_lock); 1265 pprev = &icmp6_dst_gc_list; 1266 1267 while ((dst = *pprev) != NULL) { 1268 if (!atomic_read(&dst->__refcnt)) { 1269 *pprev = dst->next; 1270 dst_free(dst); 1271 } else { 1272 pprev = &dst->next; 1273 ++more; 1274 } 1275 } 1276 1277 spin_unlock_bh(&icmp6_dst_lock); 1278 1279 return more; 1280 } 1281 1282 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1283 void *arg) 1284 { 1285 struct dst_entry *dst, **pprev; 1286 1287 spin_lock_bh(&icmp6_dst_lock); 1288 pprev = &icmp6_dst_gc_list; 1289 while ((dst = *pprev) != NULL) { 1290 struct rt6_info *rt = (struct rt6_info *) dst; 1291 if (func(rt, arg)) { 1292 *pprev = dst->next; 1293 dst_free(dst); 1294 } else { 1295 pprev = &dst->next; 1296 } 1297 } 1298 spin_unlock_bh(&icmp6_dst_lock); 1299 } 1300 1301 static int ip6_dst_gc(struct dst_ops *ops) 1302 { 1303 unsigned long now = jiffies; 1304 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1305 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1306 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1307 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1308 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1309 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1310 int entries; 1311 1312 entries = dst_entries_get_fast(ops); 1313 if (time_after(rt_last_gc + rt_min_interval, now) && 1314 entries <= rt_max_size) 1315 goto out; 1316 1317 net->ipv6.ip6_rt_gc_expire++; 1318 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1319 net->ipv6.ip6_rt_last_gc = now; 1320 entries = dst_entries_get_slow(ops); 1321 if (entries < ops->gc_thresh) 1322 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1323 out: 1324 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1325 return entries > rt_max_size; 1326 } 1327 1328 /* Clean host part of a prefix. Not necessary in radix tree, 1329 but results in cleaner routing tables. 1330 1331 Remove it only when all the things will work! 1332 */ 1333 1334 int ip6_dst_hoplimit(struct dst_entry *dst) 1335 { 1336 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); 1337 if (hoplimit == 0) { 1338 struct net_device *dev = dst->dev; 1339 struct inet6_dev *idev; 1340 1341 rcu_read_lock(); 1342 idev = __in6_dev_get(dev); 1343 if (idev) 1344 hoplimit = idev->cnf.hop_limit; 1345 else 1346 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1347 rcu_read_unlock(); 1348 } 1349 return hoplimit; 1350 } 1351 EXPORT_SYMBOL(ip6_dst_hoplimit); 1352 1353 /* 1354 * 1355 */ 1356 1357 int ip6_route_add(struct fib6_config *cfg) 1358 { 1359 int err; 1360 struct net *net = cfg->fc_nlinfo.nl_net; 1361 struct rt6_info *rt = NULL; 1362 struct net_device *dev = NULL; 1363 struct inet6_dev *idev = NULL; 1364 struct fib6_table *table; 1365 int addr_type; 1366 1367 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1368 return -EINVAL; 1369 #ifndef CONFIG_IPV6_SUBTREES 1370 if (cfg->fc_src_len) 1371 return -EINVAL; 1372 #endif 1373 if (cfg->fc_ifindex) { 1374 err = -ENODEV; 1375 dev = dev_get_by_index(net, cfg->fc_ifindex); 1376 if (!dev) 1377 goto out; 1378 idev = in6_dev_get(dev); 1379 if (!idev) 1380 goto out; 1381 } 1382 1383 if (cfg->fc_metric == 0) 1384 cfg->fc_metric = IP6_RT_PRIO_USER; 1385 1386 err = -ENOBUFS; 1387 if (cfg->fc_nlinfo.nlh && 1388 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1389 table = fib6_get_table(net, cfg->fc_table); 1390 if (!table) { 1391 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1392 table = fib6_new_table(net, cfg->fc_table); 1393 } 1394 } else { 1395 table = fib6_new_table(net, cfg->fc_table); 1396 } 1397 1398 if (!table) 1399 goto out; 1400 1401 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); 1402 1403 if (!rt) { 1404 err = -ENOMEM; 1405 goto out; 1406 } 1407 1408 if (cfg->fc_flags & RTF_EXPIRES) 1409 rt6_set_expires(rt, jiffies + 1410 clock_t_to_jiffies(cfg->fc_expires)); 1411 else 1412 rt6_clean_expires(rt); 1413 1414 if (cfg->fc_protocol == RTPROT_UNSPEC) 1415 cfg->fc_protocol = RTPROT_BOOT; 1416 rt->rt6i_protocol = cfg->fc_protocol; 1417 1418 addr_type = ipv6_addr_type(&cfg->fc_dst); 1419 1420 if (addr_type & IPV6_ADDR_MULTICAST) 1421 rt->dst.input = ip6_mc_input; 1422 else if (cfg->fc_flags & RTF_LOCAL) 1423 rt->dst.input = ip6_input; 1424 else 1425 rt->dst.input = ip6_forward; 1426 1427 rt->dst.output = ip6_output; 1428 1429 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1430 rt->rt6i_dst.plen = cfg->fc_dst_len; 1431 if (rt->rt6i_dst.plen == 128) 1432 rt->dst.flags |= DST_HOST; 1433 1434 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { 1435 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1436 if (!metrics) { 1437 err = -ENOMEM; 1438 goto out; 1439 } 1440 dst_init_metrics(&rt->dst, metrics, 0); 1441 } 1442 #ifdef CONFIG_IPV6_SUBTREES 1443 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1444 rt->rt6i_src.plen = cfg->fc_src_len; 1445 #endif 1446 1447 rt->rt6i_metric = cfg->fc_metric; 1448 1449 /* We cannot add true routes via loopback here, 1450 they would result in kernel looping; promote them to reject routes 1451 */ 1452 if ((cfg->fc_flags & RTF_REJECT) || 1453 (dev && (dev->flags & IFF_LOOPBACK) && 1454 !(addr_type & IPV6_ADDR_LOOPBACK) && 1455 !(cfg->fc_flags & RTF_LOCAL))) { 1456 /* hold loopback dev/idev if we haven't done so. */ 1457 if (dev != net->loopback_dev) { 1458 if (dev) { 1459 dev_put(dev); 1460 in6_dev_put(idev); 1461 } 1462 dev = net->loopback_dev; 1463 dev_hold(dev); 1464 idev = in6_dev_get(dev); 1465 if (!idev) { 1466 err = -ENODEV; 1467 goto out; 1468 } 1469 } 1470 rt->dst.output = ip6_pkt_discard_out; 1471 rt->dst.input = ip6_pkt_discard; 1472 rt->dst.error = -ENETUNREACH; 1473 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1474 goto install_route; 1475 } 1476 1477 if (cfg->fc_flags & RTF_GATEWAY) { 1478 const struct in6_addr *gw_addr; 1479 int gwa_type; 1480 1481 gw_addr = &cfg->fc_gateway; 1482 rt->rt6i_gateway = *gw_addr; 1483 gwa_type = ipv6_addr_type(gw_addr); 1484 1485 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1486 struct rt6_info *grt; 1487 1488 /* IPv6 strictly inhibits using not link-local 1489 addresses as nexthop address. 1490 Otherwise, router will not able to send redirects. 1491 It is very good, but in some (rare!) circumstances 1492 (SIT, PtP, NBMA NOARP links) it is handy to allow 1493 some exceptions. --ANK 1494 */ 1495 err = -EINVAL; 1496 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1497 goto out; 1498 1499 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1500 1501 err = -EHOSTUNREACH; 1502 if (!grt) 1503 goto out; 1504 if (dev) { 1505 if (dev != grt->dst.dev) { 1506 dst_release(&grt->dst); 1507 goto out; 1508 } 1509 } else { 1510 dev = grt->dst.dev; 1511 idev = grt->rt6i_idev; 1512 dev_hold(dev); 1513 in6_dev_hold(grt->rt6i_idev); 1514 } 1515 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1516 err = 0; 1517 dst_release(&grt->dst); 1518 1519 if (err) 1520 goto out; 1521 } 1522 err = -EINVAL; 1523 if (!dev || (dev->flags & IFF_LOOPBACK)) 1524 goto out; 1525 } 1526 1527 err = -ENODEV; 1528 if (!dev) 1529 goto out; 1530 1531 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1532 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1533 err = -EINVAL; 1534 goto out; 1535 } 1536 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1537 rt->rt6i_prefsrc.plen = 128; 1538 } else 1539 rt->rt6i_prefsrc.plen = 0; 1540 1541 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1542 err = rt6_bind_neighbour(rt, dev); 1543 if (err) 1544 goto out; 1545 } 1546 1547 rt->rt6i_flags = cfg->fc_flags; 1548 1549 install_route: 1550 if (cfg->fc_mx) { 1551 struct nlattr *nla; 1552 int remaining; 1553 1554 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1555 int type = nla_type(nla); 1556 1557 if (type) { 1558 if (type > RTAX_MAX) { 1559 err = -EINVAL; 1560 goto out; 1561 } 1562 1563 dst_metric_set(&rt->dst, type, nla_get_u32(nla)); 1564 } 1565 } 1566 } 1567 1568 rt->dst.dev = dev; 1569 rt->rt6i_idev = idev; 1570 rt->rt6i_table = table; 1571 1572 cfg->fc_nlinfo.nl_net = dev_net(dev); 1573 1574 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1575 1576 out: 1577 if (dev) 1578 dev_put(dev); 1579 if (idev) 1580 in6_dev_put(idev); 1581 if (rt) 1582 dst_free(&rt->dst); 1583 return err; 1584 } 1585 1586 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1587 { 1588 int err; 1589 struct fib6_table *table; 1590 struct net *net = dev_net(rt->dst.dev); 1591 1592 if (rt == net->ipv6.ip6_null_entry) 1593 return -ENOENT; 1594 1595 table = rt->rt6i_table; 1596 write_lock_bh(&table->tb6_lock); 1597 1598 err = fib6_del(rt, info); 1599 dst_release(&rt->dst); 1600 1601 write_unlock_bh(&table->tb6_lock); 1602 1603 return err; 1604 } 1605 1606 int ip6_del_rt(struct rt6_info *rt) 1607 { 1608 struct nl_info info = { 1609 .nl_net = dev_net(rt->dst.dev), 1610 }; 1611 return __ip6_del_rt(rt, &info); 1612 } 1613 1614 static int ip6_route_del(struct fib6_config *cfg) 1615 { 1616 struct fib6_table *table; 1617 struct fib6_node *fn; 1618 struct rt6_info *rt; 1619 int err = -ESRCH; 1620 1621 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1622 if (!table) 1623 return err; 1624 1625 read_lock_bh(&table->tb6_lock); 1626 1627 fn = fib6_locate(&table->tb6_root, 1628 &cfg->fc_dst, cfg->fc_dst_len, 1629 &cfg->fc_src, cfg->fc_src_len); 1630 1631 if (fn) { 1632 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1633 if (cfg->fc_ifindex && 1634 (!rt->dst.dev || 1635 rt->dst.dev->ifindex != cfg->fc_ifindex)) 1636 continue; 1637 if (cfg->fc_flags & RTF_GATEWAY && 1638 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1639 continue; 1640 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1641 continue; 1642 dst_hold(&rt->dst); 1643 read_unlock_bh(&table->tb6_lock); 1644 1645 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1646 } 1647 } 1648 read_unlock_bh(&table->tb6_lock); 1649 1650 return err; 1651 } 1652 1653 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 1654 { 1655 struct net *net = dev_net(skb->dev); 1656 struct netevent_redirect netevent; 1657 struct rt6_info *rt, *nrt = NULL; 1658 const struct in6_addr *target; 1659 struct ndisc_options ndopts; 1660 const struct in6_addr *dest; 1661 struct neighbour *old_neigh; 1662 struct inet6_dev *in6_dev; 1663 struct neighbour *neigh; 1664 struct icmp6hdr *icmph; 1665 int optlen, on_link; 1666 u8 *lladdr; 1667 1668 optlen = skb->tail - skb->transport_header; 1669 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); 1670 1671 if (optlen < 0) { 1672 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 1673 return; 1674 } 1675 1676 icmph = icmp6_hdr(skb); 1677 target = (const struct in6_addr *) (icmph + 1); 1678 dest = target + 1; 1679 1680 if (ipv6_addr_is_multicast(dest)) { 1681 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 1682 return; 1683 } 1684 1685 on_link = 0; 1686 if (ipv6_addr_equal(dest, target)) { 1687 on_link = 1; 1688 } else if (ipv6_addr_type(target) != 1689 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 1690 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 1691 return; 1692 } 1693 1694 in6_dev = __in6_dev_get(skb->dev); 1695 if (!in6_dev) 1696 return; 1697 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 1698 return; 1699 1700 /* RFC2461 8.1: 1701 * The IP source address of the Redirect MUST be the same as the current 1702 * first-hop router for the specified ICMP Destination Address. 1703 */ 1704 1705 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { 1706 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 1707 return; 1708 } 1709 1710 lladdr = NULL; 1711 if (ndopts.nd_opts_tgt_lladdr) { 1712 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 1713 skb->dev); 1714 if (!lladdr) { 1715 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 1716 return; 1717 } 1718 } 1719 1720 rt = (struct rt6_info *) dst; 1721 if (rt == net->ipv6.ip6_null_entry) { 1722 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 1723 return; 1724 } 1725 1726 /* Redirect received -> path was valid. 1727 * Look, redirects are sent only in response to data packets, 1728 * so that this nexthop apparently is reachable. --ANK 1729 */ 1730 dst_confirm(&rt->dst); 1731 1732 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); 1733 if (!neigh) 1734 return; 1735 1736 /* Duplicate redirect: silently ignore. */ 1737 old_neigh = rt->n; 1738 if (neigh == old_neigh) 1739 goto out; 1740 1741 /* 1742 * We have finally decided to accept it. 1743 */ 1744 1745 neigh_update(neigh, lladdr, NUD_STALE, 1746 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1747 NEIGH_UPDATE_F_OVERRIDE| 1748 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1749 NEIGH_UPDATE_F_ISROUTER)) 1750 ); 1751 1752 nrt = ip6_rt_copy(rt, dest); 1753 if (!nrt) 1754 goto out; 1755 1756 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1757 if (on_link) 1758 nrt->rt6i_flags &= ~RTF_GATEWAY; 1759 1760 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 1761 nrt->n = neigh_clone(neigh); 1762 1763 if (ip6_ins_rt(nrt)) 1764 goto out; 1765 1766 netevent.old = &rt->dst; 1767 netevent.old_neigh = old_neigh; 1768 netevent.new = &nrt->dst; 1769 netevent.new_neigh = neigh; 1770 netevent.daddr = dest; 1771 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1772 1773 if (rt->rt6i_flags & RTF_CACHE) { 1774 rt = (struct rt6_info *) dst_clone(&rt->dst); 1775 ip6_del_rt(rt); 1776 } 1777 1778 out: 1779 neigh_release(neigh); 1780 } 1781 1782 /* 1783 * Misc support functions 1784 */ 1785 1786 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 1787 const struct in6_addr *dest) 1788 { 1789 struct net *net = dev_net(ort->dst.dev); 1790 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, 1791 ort->rt6i_table); 1792 1793 if (rt) { 1794 rt->dst.input = ort->dst.input; 1795 rt->dst.output = ort->dst.output; 1796 rt->dst.flags |= DST_HOST; 1797 1798 rt->rt6i_dst.addr = *dest; 1799 rt->rt6i_dst.plen = 128; 1800 dst_copy_metrics(&rt->dst, &ort->dst); 1801 rt->dst.error = ort->dst.error; 1802 rt->rt6i_idev = ort->rt6i_idev; 1803 if (rt->rt6i_idev) 1804 in6_dev_hold(rt->rt6i_idev); 1805 rt->dst.lastuse = jiffies; 1806 1807 rt->rt6i_gateway = ort->rt6i_gateway; 1808 rt->rt6i_flags = ort->rt6i_flags; 1809 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == 1810 (RTF_DEFAULT | RTF_ADDRCONF)) 1811 rt6_set_from(rt, ort); 1812 else 1813 rt6_clean_expires(rt); 1814 rt->rt6i_metric = 0; 1815 1816 #ifdef CONFIG_IPV6_SUBTREES 1817 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1818 #endif 1819 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); 1820 rt->rt6i_table = ort->rt6i_table; 1821 } 1822 return rt; 1823 } 1824 1825 #ifdef CONFIG_IPV6_ROUTE_INFO 1826 static struct rt6_info *rt6_get_route_info(struct net *net, 1827 const struct in6_addr *prefix, int prefixlen, 1828 const struct in6_addr *gwaddr, int ifindex) 1829 { 1830 struct fib6_node *fn; 1831 struct rt6_info *rt = NULL; 1832 struct fib6_table *table; 1833 1834 table = fib6_get_table(net, RT6_TABLE_INFO); 1835 if (!table) 1836 return NULL; 1837 1838 write_lock_bh(&table->tb6_lock); 1839 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1840 if (!fn) 1841 goto out; 1842 1843 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1844 if (rt->dst.dev->ifindex != ifindex) 1845 continue; 1846 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1847 continue; 1848 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1849 continue; 1850 dst_hold(&rt->dst); 1851 break; 1852 } 1853 out: 1854 write_unlock_bh(&table->tb6_lock); 1855 return rt; 1856 } 1857 1858 static struct rt6_info *rt6_add_route_info(struct net *net, 1859 const struct in6_addr *prefix, int prefixlen, 1860 const struct in6_addr *gwaddr, int ifindex, 1861 unsigned int pref) 1862 { 1863 struct fib6_config cfg = { 1864 .fc_table = RT6_TABLE_INFO, 1865 .fc_metric = IP6_RT_PRIO_USER, 1866 .fc_ifindex = ifindex, 1867 .fc_dst_len = prefixlen, 1868 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1869 RTF_UP | RTF_PREF(pref), 1870 .fc_nlinfo.pid = 0, 1871 .fc_nlinfo.nlh = NULL, 1872 .fc_nlinfo.nl_net = net, 1873 }; 1874 1875 cfg.fc_dst = *prefix; 1876 cfg.fc_gateway = *gwaddr; 1877 1878 /* We should treat it as a default route if prefix length is 0. */ 1879 if (!prefixlen) 1880 cfg.fc_flags |= RTF_DEFAULT; 1881 1882 ip6_route_add(&cfg); 1883 1884 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1885 } 1886 #endif 1887 1888 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 1889 { 1890 struct rt6_info *rt; 1891 struct fib6_table *table; 1892 1893 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1894 if (!table) 1895 return NULL; 1896 1897 write_lock_bh(&table->tb6_lock); 1898 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1899 if (dev == rt->dst.dev && 1900 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1901 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1902 break; 1903 } 1904 if (rt) 1905 dst_hold(&rt->dst); 1906 write_unlock_bh(&table->tb6_lock); 1907 return rt; 1908 } 1909 1910 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 1911 struct net_device *dev, 1912 unsigned int pref) 1913 { 1914 struct fib6_config cfg = { 1915 .fc_table = RT6_TABLE_DFLT, 1916 .fc_metric = IP6_RT_PRIO_USER, 1917 .fc_ifindex = dev->ifindex, 1918 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1919 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1920 .fc_nlinfo.pid = 0, 1921 .fc_nlinfo.nlh = NULL, 1922 .fc_nlinfo.nl_net = dev_net(dev), 1923 }; 1924 1925 cfg.fc_gateway = *gwaddr; 1926 1927 ip6_route_add(&cfg); 1928 1929 return rt6_get_dflt_router(gwaddr, dev); 1930 } 1931 1932 void rt6_purge_dflt_routers(struct net *net) 1933 { 1934 struct rt6_info *rt; 1935 struct fib6_table *table; 1936 1937 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1938 table = fib6_get_table(net, RT6_TABLE_DFLT); 1939 if (!table) 1940 return; 1941 1942 restart: 1943 read_lock_bh(&table->tb6_lock); 1944 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 1945 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1946 dst_hold(&rt->dst); 1947 read_unlock_bh(&table->tb6_lock); 1948 ip6_del_rt(rt); 1949 goto restart; 1950 } 1951 } 1952 read_unlock_bh(&table->tb6_lock); 1953 } 1954 1955 static void rtmsg_to_fib6_config(struct net *net, 1956 struct in6_rtmsg *rtmsg, 1957 struct fib6_config *cfg) 1958 { 1959 memset(cfg, 0, sizeof(*cfg)); 1960 1961 cfg->fc_table = RT6_TABLE_MAIN; 1962 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1963 cfg->fc_metric = rtmsg->rtmsg_metric; 1964 cfg->fc_expires = rtmsg->rtmsg_info; 1965 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1966 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1967 cfg->fc_flags = rtmsg->rtmsg_flags; 1968 1969 cfg->fc_nlinfo.nl_net = net; 1970 1971 cfg->fc_dst = rtmsg->rtmsg_dst; 1972 cfg->fc_src = rtmsg->rtmsg_src; 1973 cfg->fc_gateway = rtmsg->rtmsg_gateway; 1974 } 1975 1976 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1977 { 1978 struct fib6_config cfg; 1979 struct in6_rtmsg rtmsg; 1980 int err; 1981 1982 switch(cmd) { 1983 case SIOCADDRT: /* Add a route */ 1984 case SIOCDELRT: /* Delete a route */ 1985 if (!capable(CAP_NET_ADMIN)) 1986 return -EPERM; 1987 err = copy_from_user(&rtmsg, arg, 1988 sizeof(struct in6_rtmsg)); 1989 if (err) 1990 return -EFAULT; 1991 1992 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1993 1994 rtnl_lock(); 1995 switch (cmd) { 1996 case SIOCADDRT: 1997 err = ip6_route_add(&cfg); 1998 break; 1999 case SIOCDELRT: 2000 err = ip6_route_del(&cfg); 2001 break; 2002 default: 2003 err = -EINVAL; 2004 } 2005 rtnl_unlock(); 2006 2007 return err; 2008 } 2009 2010 return -EINVAL; 2011 } 2012 2013 /* 2014 * Drop the packet on the floor 2015 */ 2016 2017 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2018 { 2019 int type; 2020 struct dst_entry *dst = skb_dst(skb); 2021 switch (ipstats_mib_noroutes) { 2022 case IPSTATS_MIB_INNOROUTES: 2023 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2024 if (type == IPV6_ADDR_ANY) { 2025 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2026 IPSTATS_MIB_INADDRERRORS); 2027 break; 2028 } 2029 /* FALLTHROUGH */ 2030 case IPSTATS_MIB_OUTNOROUTES: 2031 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2032 ipstats_mib_noroutes); 2033 break; 2034 } 2035 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2036 kfree_skb(skb); 2037 return 0; 2038 } 2039 2040 static int ip6_pkt_discard(struct sk_buff *skb) 2041 { 2042 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2043 } 2044 2045 static int ip6_pkt_discard_out(struct sk_buff *skb) 2046 { 2047 skb->dev = skb_dst(skb)->dev; 2048 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2049 } 2050 2051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2052 2053 static int ip6_pkt_prohibit(struct sk_buff *skb) 2054 { 2055 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2056 } 2057 2058 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 2059 { 2060 skb->dev = skb_dst(skb)->dev; 2061 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2062 } 2063 2064 #endif 2065 2066 /* 2067 * Allocate a dst for local (unicast / anycast) address. 2068 */ 2069 2070 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2071 const struct in6_addr *addr, 2072 bool anycast) 2073 { 2074 struct net *net = dev_net(idev->dev); 2075 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); 2076 int err; 2077 2078 if (!rt) { 2079 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); 2080 return ERR_PTR(-ENOMEM); 2081 } 2082 2083 in6_dev_hold(idev); 2084 2085 rt->dst.flags |= DST_HOST; 2086 rt->dst.input = ip6_input; 2087 rt->dst.output = ip6_output; 2088 rt->rt6i_idev = idev; 2089 2090 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2091 if (anycast) 2092 rt->rt6i_flags |= RTF_ANYCAST; 2093 else 2094 rt->rt6i_flags |= RTF_LOCAL; 2095 err = rt6_bind_neighbour(rt, rt->dst.dev); 2096 if (err) { 2097 dst_free(&rt->dst); 2098 return ERR_PTR(err); 2099 } 2100 2101 rt->rt6i_dst.addr = *addr; 2102 rt->rt6i_dst.plen = 128; 2103 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2104 2105 atomic_set(&rt->dst.__refcnt, 1); 2106 2107 return rt; 2108 } 2109 2110 int ip6_route_get_saddr(struct net *net, 2111 struct rt6_info *rt, 2112 const struct in6_addr *daddr, 2113 unsigned int prefs, 2114 struct in6_addr *saddr) 2115 { 2116 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); 2117 int err = 0; 2118 if (rt->rt6i_prefsrc.plen) 2119 *saddr = rt->rt6i_prefsrc.addr; 2120 else 2121 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2122 daddr, prefs, saddr); 2123 return err; 2124 } 2125 2126 /* remove deleted ip from prefsrc entries */ 2127 struct arg_dev_net_ip { 2128 struct net_device *dev; 2129 struct net *net; 2130 struct in6_addr *addr; 2131 }; 2132 2133 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2134 { 2135 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2136 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2137 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2138 2139 if (((void *)rt->dst.dev == dev || !dev) && 2140 rt != net->ipv6.ip6_null_entry && 2141 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2142 /* remove prefsrc entry */ 2143 rt->rt6i_prefsrc.plen = 0; 2144 } 2145 return 0; 2146 } 2147 2148 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2149 { 2150 struct net *net = dev_net(ifp->idev->dev); 2151 struct arg_dev_net_ip adni = { 2152 .dev = ifp->idev->dev, 2153 .net = net, 2154 .addr = &ifp->addr, 2155 }; 2156 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); 2157 } 2158 2159 struct arg_dev_net { 2160 struct net_device *dev; 2161 struct net *net; 2162 }; 2163 2164 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2165 { 2166 const struct arg_dev_net *adn = arg; 2167 const struct net_device *dev = adn->dev; 2168 2169 if ((rt->dst.dev == dev || !dev) && 2170 rt != adn->net->ipv6.ip6_null_entry) 2171 return -1; 2172 2173 return 0; 2174 } 2175 2176 void rt6_ifdown(struct net *net, struct net_device *dev) 2177 { 2178 struct arg_dev_net adn = { 2179 .dev = dev, 2180 .net = net, 2181 }; 2182 2183 fib6_clean_all(net, fib6_ifdown, 0, &adn); 2184 icmp6_clean_all(fib6_ifdown, &adn); 2185 } 2186 2187 struct rt6_mtu_change_arg { 2188 struct net_device *dev; 2189 unsigned int mtu; 2190 }; 2191 2192 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2193 { 2194 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2195 struct inet6_dev *idev; 2196 2197 /* In IPv6 pmtu discovery is not optional, 2198 so that RTAX_MTU lock cannot disable it. 2199 We still use this lock to block changes 2200 caused by addrconf/ndisc. 2201 */ 2202 2203 idev = __in6_dev_get(arg->dev); 2204 if (!idev) 2205 return 0; 2206 2207 /* For administrative MTU increase, there is no way to discover 2208 IPv6 PMTU increase, so PMTU increase should be updated here. 2209 Since RFC 1981 doesn't include administrative MTU increase 2210 update PMTU increase is a MUST. (i.e. jumbo frame) 2211 */ 2212 /* 2213 If new MTU is less than route PMTU, this new MTU will be the 2214 lowest MTU in the path, update the route PMTU to reflect PMTU 2215 decreases; if new MTU is greater than route PMTU, and the 2216 old MTU is the lowest MTU in the path, update the route PMTU 2217 to reflect the increase. In this case if the other nodes' MTU 2218 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2219 PMTU discouvery. 2220 */ 2221 if (rt->dst.dev == arg->dev && 2222 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2223 (dst_mtu(&rt->dst) >= arg->mtu || 2224 (dst_mtu(&rt->dst) < arg->mtu && 2225 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2226 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2227 } 2228 return 0; 2229 } 2230 2231 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2232 { 2233 struct rt6_mtu_change_arg arg = { 2234 .dev = dev, 2235 .mtu = mtu, 2236 }; 2237 2238 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2239 } 2240 2241 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2242 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2243 [RTA_OIF] = { .type = NLA_U32 }, 2244 [RTA_IIF] = { .type = NLA_U32 }, 2245 [RTA_PRIORITY] = { .type = NLA_U32 }, 2246 [RTA_METRICS] = { .type = NLA_NESTED }, 2247 }; 2248 2249 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2250 struct fib6_config *cfg) 2251 { 2252 struct rtmsg *rtm; 2253 struct nlattr *tb[RTA_MAX+1]; 2254 int err; 2255 2256 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2257 if (err < 0) 2258 goto errout; 2259 2260 err = -EINVAL; 2261 rtm = nlmsg_data(nlh); 2262 memset(cfg, 0, sizeof(*cfg)); 2263 2264 cfg->fc_table = rtm->rtm_table; 2265 cfg->fc_dst_len = rtm->rtm_dst_len; 2266 cfg->fc_src_len = rtm->rtm_src_len; 2267 cfg->fc_flags = RTF_UP; 2268 cfg->fc_protocol = rtm->rtm_protocol; 2269 2270 if (rtm->rtm_type == RTN_UNREACHABLE) 2271 cfg->fc_flags |= RTF_REJECT; 2272 2273 if (rtm->rtm_type == RTN_LOCAL) 2274 cfg->fc_flags |= RTF_LOCAL; 2275 2276 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2277 cfg->fc_nlinfo.nlh = nlh; 2278 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2279 2280 if (tb[RTA_GATEWAY]) { 2281 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2282 cfg->fc_flags |= RTF_GATEWAY; 2283 } 2284 2285 if (tb[RTA_DST]) { 2286 int plen = (rtm->rtm_dst_len + 7) >> 3; 2287 2288 if (nla_len(tb[RTA_DST]) < plen) 2289 goto errout; 2290 2291 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2292 } 2293 2294 if (tb[RTA_SRC]) { 2295 int plen = (rtm->rtm_src_len + 7) >> 3; 2296 2297 if (nla_len(tb[RTA_SRC]) < plen) 2298 goto errout; 2299 2300 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2301 } 2302 2303 if (tb[RTA_PREFSRC]) 2304 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); 2305 2306 if (tb[RTA_OIF]) 2307 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2308 2309 if (tb[RTA_PRIORITY]) 2310 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2311 2312 if (tb[RTA_METRICS]) { 2313 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2314 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2315 } 2316 2317 if (tb[RTA_TABLE]) 2318 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2319 2320 err = 0; 2321 errout: 2322 return err; 2323 } 2324 2325 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2326 { 2327 struct fib6_config cfg; 2328 int err; 2329 2330 err = rtm_to_fib6_config(skb, nlh, &cfg); 2331 if (err < 0) 2332 return err; 2333 2334 return ip6_route_del(&cfg); 2335 } 2336 2337 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2338 { 2339 struct fib6_config cfg; 2340 int err; 2341 2342 err = rtm_to_fib6_config(skb, nlh, &cfg); 2343 if (err < 0) 2344 return err; 2345 2346 return ip6_route_add(&cfg); 2347 } 2348 2349 static inline size_t rt6_nlmsg_size(void) 2350 { 2351 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2352 + nla_total_size(16) /* RTA_SRC */ 2353 + nla_total_size(16) /* RTA_DST */ 2354 + nla_total_size(16) /* RTA_GATEWAY */ 2355 + nla_total_size(16) /* RTA_PREFSRC */ 2356 + nla_total_size(4) /* RTA_TABLE */ 2357 + nla_total_size(4) /* RTA_IIF */ 2358 + nla_total_size(4) /* RTA_OIF */ 2359 + nla_total_size(4) /* RTA_PRIORITY */ 2360 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2361 + nla_total_size(sizeof(struct rta_cacheinfo)); 2362 } 2363 2364 static int rt6_fill_node(struct net *net, 2365 struct sk_buff *skb, struct rt6_info *rt, 2366 struct in6_addr *dst, struct in6_addr *src, 2367 int iif, int type, u32 pid, u32 seq, 2368 int prefix, int nowait, unsigned int flags) 2369 { 2370 struct rtmsg *rtm; 2371 struct nlmsghdr *nlh; 2372 long expires; 2373 u32 table; 2374 struct neighbour *n; 2375 2376 if (prefix) { /* user wants prefix routes only */ 2377 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2378 /* success since this is not a prefix route */ 2379 return 1; 2380 } 2381 } 2382 2383 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2384 if (!nlh) 2385 return -EMSGSIZE; 2386 2387 rtm = nlmsg_data(nlh); 2388 rtm->rtm_family = AF_INET6; 2389 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2390 rtm->rtm_src_len = rt->rt6i_src.plen; 2391 rtm->rtm_tos = 0; 2392 if (rt->rt6i_table) 2393 table = rt->rt6i_table->tb6_id; 2394 else 2395 table = RT6_TABLE_UNSPEC; 2396 rtm->rtm_table = table; 2397 if (nla_put_u32(skb, RTA_TABLE, table)) 2398 goto nla_put_failure; 2399 if (rt->rt6i_flags & RTF_REJECT) 2400 rtm->rtm_type = RTN_UNREACHABLE; 2401 else if (rt->rt6i_flags & RTF_LOCAL) 2402 rtm->rtm_type = RTN_LOCAL; 2403 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2404 rtm->rtm_type = RTN_LOCAL; 2405 else 2406 rtm->rtm_type = RTN_UNICAST; 2407 rtm->rtm_flags = 0; 2408 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2409 rtm->rtm_protocol = rt->rt6i_protocol; 2410 if (rt->rt6i_flags & RTF_DYNAMIC) 2411 rtm->rtm_protocol = RTPROT_REDIRECT; 2412 else if (rt->rt6i_flags & RTF_ADDRCONF) { 2413 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) 2414 rtm->rtm_protocol = RTPROT_RA; 2415 else 2416 rtm->rtm_protocol = RTPROT_KERNEL; 2417 } 2418 2419 if (rt->rt6i_flags & RTF_CACHE) 2420 rtm->rtm_flags |= RTM_F_CLONED; 2421 2422 if (dst) { 2423 if (nla_put(skb, RTA_DST, 16, dst)) 2424 goto nla_put_failure; 2425 rtm->rtm_dst_len = 128; 2426 } else if (rtm->rtm_dst_len) 2427 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) 2428 goto nla_put_failure; 2429 #ifdef CONFIG_IPV6_SUBTREES 2430 if (src) { 2431 if (nla_put(skb, RTA_SRC, 16, src)) 2432 goto nla_put_failure; 2433 rtm->rtm_src_len = 128; 2434 } else if (rtm->rtm_src_len && 2435 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) 2436 goto nla_put_failure; 2437 #endif 2438 if (iif) { 2439 #ifdef CONFIG_IPV6_MROUTE 2440 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2441 int err = ip6mr_get_route(net, skb, rtm, nowait); 2442 if (err <= 0) { 2443 if (!nowait) { 2444 if (err == 0) 2445 return 0; 2446 goto nla_put_failure; 2447 } else { 2448 if (err == -EMSGSIZE) 2449 goto nla_put_failure; 2450 } 2451 } 2452 } else 2453 #endif 2454 if (nla_put_u32(skb, RTA_IIF, iif)) 2455 goto nla_put_failure; 2456 } else if (dst) { 2457 struct in6_addr saddr_buf; 2458 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 2459 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2460 goto nla_put_failure; 2461 } 2462 2463 if (rt->rt6i_prefsrc.plen) { 2464 struct in6_addr saddr_buf; 2465 saddr_buf = rt->rt6i_prefsrc.addr; 2466 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2467 goto nla_put_failure; 2468 } 2469 2470 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2471 goto nla_put_failure; 2472 2473 rcu_read_lock(); 2474 n = rt->n; 2475 if (n) { 2476 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { 2477 rcu_read_unlock(); 2478 goto nla_put_failure; 2479 } 2480 } 2481 rcu_read_unlock(); 2482 2483 if (rt->dst.dev && 2484 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2485 goto nla_put_failure; 2486 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2487 goto nla_put_failure; 2488 2489 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 2490 2491 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2492 goto nla_put_failure; 2493 2494 return nlmsg_end(skb, nlh); 2495 2496 nla_put_failure: 2497 nlmsg_cancel(skb, nlh); 2498 return -EMSGSIZE; 2499 } 2500 2501 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2502 { 2503 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2504 int prefix; 2505 2506 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2507 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2508 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2509 } else 2510 prefix = 0; 2511 2512 return rt6_fill_node(arg->net, 2513 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2514 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2515 prefix, 0, NLM_F_MULTI); 2516 } 2517 2518 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2519 { 2520 struct net *net = sock_net(in_skb->sk); 2521 struct nlattr *tb[RTA_MAX+1]; 2522 struct rt6_info *rt; 2523 struct sk_buff *skb; 2524 struct rtmsg *rtm; 2525 struct flowi6 fl6; 2526 int err, iif = 0, oif = 0; 2527 2528 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2529 if (err < 0) 2530 goto errout; 2531 2532 err = -EINVAL; 2533 memset(&fl6, 0, sizeof(fl6)); 2534 2535 if (tb[RTA_SRC]) { 2536 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2537 goto errout; 2538 2539 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 2540 } 2541 2542 if (tb[RTA_DST]) { 2543 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2544 goto errout; 2545 2546 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 2547 } 2548 2549 if (tb[RTA_IIF]) 2550 iif = nla_get_u32(tb[RTA_IIF]); 2551 2552 if (tb[RTA_OIF]) 2553 oif = nla_get_u32(tb[RTA_OIF]); 2554 2555 if (iif) { 2556 struct net_device *dev; 2557 int flags = 0; 2558 2559 dev = __dev_get_by_index(net, iif); 2560 if (!dev) { 2561 err = -ENODEV; 2562 goto errout; 2563 } 2564 2565 fl6.flowi6_iif = iif; 2566 2567 if (!ipv6_addr_any(&fl6.saddr)) 2568 flags |= RT6_LOOKUP_F_HAS_SADDR; 2569 2570 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, 2571 flags); 2572 } else { 2573 fl6.flowi6_oif = oif; 2574 2575 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 2576 } 2577 2578 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2579 if (!skb) { 2580 dst_release(&rt->dst); 2581 err = -ENOBUFS; 2582 goto errout; 2583 } 2584 2585 /* Reserve room for dummy headers, this skb can pass 2586 through good chunk of routing engine. 2587 */ 2588 skb_reset_mac_header(skb); 2589 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2590 2591 skb_dst_set(skb, &rt->dst); 2592 2593 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2594 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2595 nlh->nlmsg_seq, 0, 0, 0); 2596 if (err < 0) { 2597 kfree_skb(skb); 2598 goto errout; 2599 } 2600 2601 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2602 errout: 2603 return err; 2604 } 2605 2606 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2607 { 2608 struct sk_buff *skb; 2609 struct net *net = info->nl_net; 2610 u32 seq; 2611 int err; 2612 2613 err = -ENOBUFS; 2614 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2615 2616 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2617 if (!skb) 2618 goto errout; 2619 2620 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2621 event, info->pid, seq, 0, 0, 0); 2622 if (err < 0) { 2623 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2624 WARN_ON(err == -EMSGSIZE); 2625 kfree_skb(skb); 2626 goto errout; 2627 } 2628 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2629 info->nlh, gfp_any()); 2630 return; 2631 errout: 2632 if (err < 0) 2633 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2634 } 2635 2636 static int ip6_route_dev_notify(struct notifier_block *this, 2637 unsigned long event, void *data) 2638 { 2639 struct net_device *dev = (struct net_device *)data; 2640 struct net *net = dev_net(dev); 2641 2642 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2643 net->ipv6.ip6_null_entry->dst.dev = dev; 2644 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2645 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2646 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2647 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2648 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2649 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2650 #endif 2651 } 2652 2653 return NOTIFY_OK; 2654 } 2655 2656 /* 2657 * /proc 2658 */ 2659 2660 #ifdef CONFIG_PROC_FS 2661 2662 struct rt6_proc_arg 2663 { 2664 char *buffer; 2665 int offset; 2666 int length; 2667 int skip; 2668 int len; 2669 }; 2670 2671 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2672 { 2673 struct seq_file *m = p_arg; 2674 struct neighbour *n; 2675 2676 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2677 2678 #ifdef CONFIG_IPV6_SUBTREES 2679 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2680 #else 2681 seq_puts(m, "00000000000000000000000000000000 00 "); 2682 #endif 2683 rcu_read_lock(); 2684 n = rt->n; 2685 if (n) { 2686 seq_printf(m, "%pi6", n->primary_key); 2687 } else { 2688 seq_puts(m, "00000000000000000000000000000000"); 2689 } 2690 rcu_read_unlock(); 2691 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2692 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2693 rt->dst.__use, rt->rt6i_flags, 2694 rt->dst.dev ? rt->dst.dev->name : ""); 2695 return 0; 2696 } 2697 2698 static int ipv6_route_show(struct seq_file *m, void *v) 2699 { 2700 struct net *net = (struct net *)m->private; 2701 fib6_clean_all_ro(net, rt6_info_route, 0, m); 2702 return 0; 2703 } 2704 2705 static int ipv6_route_open(struct inode *inode, struct file *file) 2706 { 2707 return single_open_net(inode, file, ipv6_route_show); 2708 } 2709 2710 static const struct file_operations ipv6_route_proc_fops = { 2711 .owner = THIS_MODULE, 2712 .open = ipv6_route_open, 2713 .read = seq_read, 2714 .llseek = seq_lseek, 2715 .release = single_release_net, 2716 }; 2717 2718 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2719 { 2720 struct net *net = (struct net *)seq->private; 2721 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2722 net->ipv6.rt6_stats->fib_nodes, 2723 net->ipv6.rt6_stats->fib_route_nodes, 2724 net->ipv6.rt6_stats->fib_rt_alloc, 2725 net->ipv6.rt6_stats->fib_rt_entries, 2726 net->ipv6.rt6_stats->fib_rt_cache, 2727 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2728 net->ipv6.rt6_stats->fib_discarded_routes); 2729 2730 return 0; 2731 } 2732 2733 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2734 { 2735 return single_open_net(inode, file, rt6_stats_seq_show); 2736 } 2737 2738 static const struct file_operations rt6_stats_seq_fops = { 2739 .owner = THIS_MODULE, 2740 .open = rt6_stats_seq_open, 2741 .read = seq_read, 2742 .llseek = seq_lseek, 2743 .release = single_release_net, 2744 }; 2745 #endif /* CONFIG_PROC_FS */ 2746 2747 #ifdef CONFIG_SYSCTL 2748 2749 static 2750 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2751 void __user *buffer, size_t *lenp, loff_t *ppos) 2752 { 2753 struct net *net; 2754 int delay; 2755 if (!write) 2756 return -EINVAL; 2757 2758 net = (struct net *)ctl->extra1; 2759 delay = net->ipv6.sysctl.flush_delay; 2760 proc_dointvec(ctl, write, buffer, lenp, ppos); 2761 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2762 return 0; 2763 } 2764 2765 ctl_table ipv6_route_table_template[] = { 2766 { 2767 .procname = "flush", 2768 .data = &init_net.ipv6.sysctl.flush_delay, 2769 .maxlen = sizeof(int), 2770 .mode = 0200, 2771 .proc_handler = ipv6_sysctl_rtcache_flush 2772 }, 2773 { 2774 .procname = "gc_thresh", 2775 .data = &ip6_dst_ops_template.gc_thresh, 2776 .maxlen = sizeof(int), 2777 .mode = 0644, 2778 .proc_handler = proc_dointvec, 2779 }, 2780 { 2781 .procname = "max_size", 2782 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2783 .maxlen = sizeof(int), 2784 .mode = 0644, 2785 .proc_handler = proc_dointvec, 2786 }, 2787 { 2788 .procname = "gc_min_interval", 2789 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2790 .maxlen = sizeof(int), 2791 .mode = 0644, 2792 .proc_handler = proc_dointvec_jiffies, 2793 }, 2794 { 2795 .procname = "gc_timeout", 2796 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2797 .maxlen = sizeof(int), 2798 .mode = 0644, 2799 .proc_handler = proc_dointvec_jiffies, 2800 }, 2801 { 2802 .procname = "gc_interval", 2803 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2804 .maxlen = sizeof(int), 2805 .mode = 0644, 2806 .proc_handler = proc_dointvec_jiffies, 2807 }, 2808 { 2809 .procname = "gc_elasticity", 2810 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2811 .maxlen = sizeof(int), 2812 .mode = 0644, 2813 .proc_handler = proc_dointvec, 2814 }, 2815 { 2816 .procname = "mtu_expires", 2817 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2818 .maxlen = sizeof(int), 2819 .mode = 0644, 2820 .proc_handler = proc_dointvec_jiffies, 2821 }, 2822 { 2823 .procname = "min_adv_mss", 2824 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2825 .maxlen = sizeof(int), 2826 .mode = 0644, 2827 .proc_handler = proc_dointvec, 2828 }, 2829 { 2830 .procname = "gc_min_interval_ms", 2831 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2832 .maxlen = sizeof(int), 2833 .mode = 0644, 2834 .proc_handler = proc_dointvec_ms_jiffies, 2835 }, 2836 { } 2837 }; 2838 2839 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2840 { 2841 struct ctl_table *table; 2842 2843 table = kmemdup(ipv6_route_table_template, 2844 sizeof(ipv6_route_table_template), 2845 GFP_KERNEL); 2846 2847 if (table) { 2848 table[0].data = &net->ipv6.sysctl.flush_delay; 2849 table[0].extra1 = net; 2850 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2851 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2852 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2853 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2854 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2855 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2856 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2857 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2858 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2859 } 2860 2861 return table; 2862 } 2863 #endif 2864 2865 static int __net_init ip6_route_net_init(struct net *net) 2866 { 2867 int ret = -ENOMEM; 2868 2869 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2870 sizeof(net->ipv6.ip6_dst_ops)); 2871 2872 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2873 goto out_ip6_dst_ops; 2874 2875 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2876 sizeof(*net->ipv6.ip6_null_entry), 2877 GFP_KERNEL); 2878 if (!net->ipv6.ip6_null_entry) 2879 goto out_ip6_dst_entries; 2880 net->ipv6.ip6_null_entry->dst.path = 2881 (struct dst_entry *)net->ipv6.ip6_null_entry; 2882 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2883 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 2884 ip6_template_metrics, true); 2885 2886 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2887 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2888 sizeof(*net->ipv6.ip6_prohibit_entry), 2889 GFP_KERNEL); 2890 if (!net->ipv6.ip6_prohibit_entry) 2891 goto out_ip6_null_entry; 2892 net->ipv6.ip6_prohibit_entry->dst.path = 2893 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2894 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2895 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 2896 ip6_template_metrics, true); 2897 2898 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2899 sizeof(*net->ipv6.ip6_blk_hole_entry), 2900 GFP_KERNEL); 2901 if (!net->ipv6.ip6_blk_hole_entry) 2902 goto out_ip6_prohibit_entry; 2903 net->ipv6.ip6_blk_hole_entry->dst.path = 2904 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2905 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2906 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 2907 ip6_template_metrics, true); 2908 #endif 2909 2910 net->ipv6.sysctl.flush_delay = 0; 2911 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2912 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2913 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2914 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2915 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2916 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2917 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2918 2919 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2920 2921 ret = 0; 2922 out: 2923 return ret; 2924 2925 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2926 out_ip6_prohibit_entry: 2927 kfree(net->ipv6.ip6_prohibit_entry); 2928 out_ip6_null_entry: 2929 kfree(net->ipv6.ip6_null_entry); 2930 #endif 2931 out_ip6_dst_entries: 2932 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2933 out_ip6_dst_ops: 2934 goto out; 2935 } 2936 2937 static void __net_exit ip6_route_net_exit(struct net *net) 2938 { 2939 kfree(net->ipv6.ip6_null_entry); 2940 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2941 kfree(net->ipv6.ip6_prohibit_entry); 2942 kfree(net->ipv6.ip6_blk_hole_entry); 2943 #endif 2944 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2945 } 2946 2947 static int __net_init ip6_route_net_init_late(struct net *net) 2948 { 2949 #ifdef CONFIG_PROC_FS 2950 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2951 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2952 #endif 2953 return 0; 2954 } 2955 2956 static void __net_exit ip6_route_net_exit_late(struct net *net) 2957 { 2958 #ifdef CONFIG_PROC_FS 2959 proc_net_remove(net, "ipv6_route"); 2960 proc_net_remove(net, "rt6_stats"); 2961 #endif 2962 } 2963 2964 static struct pernet_operations ip6_route_net_ops = { 2965 .init = ip6_route_net_init, 2966 .exit = ip6_route_net_exit, 2967 }; 2968 2969 static int __net_init ipv6_inetpeer_init(struct net *net) 2970 { 2971 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 2972 2973 if (!bp) 2974 return -ENOMEM; 2975 inet_peer_base_init(bp); 2976 net->ipv6.peers = bp; 2977 return 0; 2978 } 2979 2980 static void __net_exit ipv6_inetpeer_exit(struct net *net) 2981 { 2982 struct inet_peer_base *bp = net->ipv6.peers; 2983 2984 net->ipv6.peers = NULL; 2985 inetpeer_invalidate_tree(bp); 2986 kfree(bp); 2987 } 2988 2989 static struct pernet_operations ipv6_inetpeer_ops = { 2990 .init = ipv6_inetpeer_init, 2991 .exit = ipv6_inetpeer_exit, 2992 }; 2993 2994 static struct pernet_operations ip6_route_net_late_ops = { 2995 .init = ip6_route_net_init_late, 2996 .exit = ip6_route_net_exit_late, 2997 }; 2998 2999 static struct notifier_block ip6_route_dev_notifier = { 3000 .notifier_call = ip6_route_dev_notify, 3001 .priority = 0, 3002 }; 3003 3004 int __init ip6_route_init(void) 3005 { 3006 int ret; 3007 3008 ret = -ENOMEM; 3009 ip6_dst_ops_template.kmem_cachep = 3010 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 3011 SLAB_HWCACHE_ALIGN, NULL); 3012 if (!ip6_dst_ops_template.kmem_cachep) 3013 goto out; 3014 3015 ret = dst_entries_init(&ip6_dst_blackhole_ops); 3016 if (ret) 3017 goto out_kmem_cache; 3018 3019 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 3020 if (ret) 3021 goto out_dst_entries; 3022 3023 ret = register_pernet_subsys(&ip6_route_net_ops); 3024 if (ret) 3025 goto out_register_inetpeer; 3026 3027 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3028 3029 /* Registering of the loopback is done before this portion of code, 3030 * the loopback reference in rt6_info will not be taken, do it 3031 * manually for init_net */ 3032 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 3033 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3034 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3035 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 3036 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3037 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 3038 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3039 #endif 3040 ret = fib6_init(); 3041 if (ret) 3042 goto out_register_subsys; 3043 3044 ret = xfrm6_init(); 3045 if (ret) 3046 goto out_fib6_init; 3047 3048 ret = fib6_rules_init(); 3049 if (ret) 3050 goto xfrm6_init; 3051 3052 ret = register_pernet_subsys(&ip6_route_net_late_ops); 3053 if (ret) 3054 goto fib6_rules_init; 3055 3056 ret = -ENOBUFS; 3057 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 3058 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 3059 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 3060 goto out_register_late_subsys; 3061 3062 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 3063 if (ret) 3064 goto out_register_late_subsys; 3065 3066 out: 3067 return ret; 3068 3069 out_register_late_subsys: 3070 unregister_pernet_subsys(&ip6_route_net_late_ops); 3071 fib6_rules_init: 3072 fib6_rules_cleanup(); 3073 xfrm6_init: 3074 xfrm6_fini(); 3075 out_fib6_init: 3076 fib6_gc_cleanup(); 3077 out_register_subsys: 3078 unregister_pernet_subsys(&ip6_route_net_ops); 3079 out_register_inetpeer: 3080 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3081 out_dst_entries: 3082 dst_entries_destroy(&ip6_dst_blackhole_ops); 3083 out_kmem_cache: 3084 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3085 goto out; 3086 } 3087 3088 void ip6_route_cleanup(void) 3089 { 3090 unregister_netdevice_notifier(&ip6_route_dev_notifier); 3091 unregister_pernet_subsys(&ip6_route_net_late_ops); 3092 fib6_rules_cleanup(); 3093 xfrm6_fini(); 3094 fib6_gc_cleanup(); 3095 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3096 unregister_pernet_subsys(&ip6_route_net_ops); 3097 dst_entries_destroy(&ip6_dst_blackhole_ops); 3098 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3099 } 3100