1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <net/net_namespace.h> 48 #include <net/snmp.h> 49 #include <net/ipv6.h> 50 #include <net/ip6_fib.h> 51 #include <net/ip6_route.h> 52 #include <net/ndisc.h> 53 #include <net/addrconf.h> 54 #include <net/tcp.h> 55 #include <linux/rtnetlink.h> 56 #include <net/dst.h> 57 #include <net/xfrm.h> 58 #include <net/netevent.h> 59 #include <net/netlink.h> 60 61 #include <asm/uaccess.h> 62 63 #ifdef CONFIG_SYSCTL 64 #include <linux/sysctl.h> 65 #endif 66 67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 68 const struct in6_addr *dest); 69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 70 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 71 static unsigned int ip6_mtu(const struct dst_entry *dst); 72 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 73 static void ip6_dst_destroy(struct dst_entry *); 74 static void ip6_dst_ifdown(struct dst_entry *, 75 struct net_device *dev, int how); 76 static int ip6_dst_gc(struct dst_ops *ops); 77 78 static int ip6_pkt_discard(struct sk_buff *skb); 79 static int ip6_pkt_discard_out(struct sk_buff *skb); 80 static void ip6_link_failure(struct sk_buff *skb); 81 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 82 struct sk_buff *skb, u32 mtu); 83 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 84 struct sk_buff *skb); 85 86 #ifdef CONFIG_IPV6_ROUTE_INFO 87 static struct rt6_info *rt6_add_route_info(struct net *net, 88 const struct in6_addr *prefix, int prefixlen, 89 const struct in6_addr *gwaddr, int ifindex, 90 unsigned int pref); 91 static struct rt6_info *rt6_get_route_info(struct net *net, 92 const struct in6_addr *prefix, int prefixlen, 93 const struct in6_addr *gwaddr, int ifindex); 94 #endif 95 96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 97 { 98 struct rt6_info *rt = (struct rt6_info *) dst; 99 struct inet_peer *peer; 100 u32 *p = NULL; 101 102 if (!(rt->dst.flags & DST_HOST)) 103 return NULL; 104 105 peer = rt6_get_peer_create(rt); 106 if (peer) { 107 u32 *old_p = __DST_METRICS_PTR(old); 108 unsigned long prev, new; 109 110 p = peer->metrics; 111 if (inet_metrics_new(peer)) 112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 113 114 new = (unsigned long) p; 115 prev = cmpxchg(&dst->_metrics, old, new); 116 117 if (prev != old) { 118 p = __DST_METRICS_PTR(prev); 119 if (prev & DST_METRICS_READ_ONLY) 120 p = NULL; 121 } 122 } 123 return p; 124 } 125 126 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 127 struct sk_buff *skb, 128 const void *daddr) 129 { 130 struct in6_addr *p = &rt->rt6i_gateway; 131 132 if (!ipv6_addr_any(p)) 133 return (const void *) p; 134 else if (skb) 135 return &ipv6_hdr(skb)->daddr; 136 return daddr; 137 } 138 139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 140 struct sk_buff *skb, 141 const void *daddr) 142 { 143 struct rt6_info *rt = (struct rt6_info *) dst; 144 struct neighbour *n; 145 146 daddr = choose_neigh_daddr(rt, skb, daddr); 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); 148 if (n) 149 return n; 150 return neigh_create(&nd_tbl, daddr, dst->dev); 151 } 152 153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) 154 { 155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway); 156 if (!n) { 157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev); 158 if (IS_ERR(n)) 159 return PTR_ERR(n); 160 } 161 rt->n = n; 162 163 return 0; 164 } 165 166 static struct dst_ops ip6_dst_ops_template = { 167 .family = AF_INET6, 168 .protocol = cpu_to_be16(ETH_P_IPV6), 169 .gc = ip6_dst_gc, 170 .gc_thresh = 1024, 171 .check = ip6_dst_check, 172 .default_advmss = ip6_default_advmss, 173 .mtu = ip6_mtu, 174 .cow_metrics = ipv6_cow_metrics, 175 .destroy = ip6_dst_destroy, 176 .ifdown = ip6_dst_ifdown, 177 .negative_advice = ip6_negative_advice, 178 .link_failure = ip6_link_failure, 179 .update_pmtu = ip6_rt_update_pmtu, 180 .redirect = rt6_do_redirect, 181 .local_out = __ip6_local_out, 182 .neigh_lookup = ip6_neigh_lookup, 183 }; 184 185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 186 { 187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 188 189 return mtu ? : dst->dev->mtu; 190 } 191 192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 193 struct sk_buff *skb, u32 mtu) 194 { 195 } 196 197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 198 struct sk_buff *skb) 199 { 200 } 201 202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 203 unsigned long old) 204 { 205 return NULL; 206 } 207 208 static struct dst_ops ip6_dst_blackhole_ops = { 209 .family = AF_INET6, 210 .protocol = cpu_to_be16(ETH_P_IPV6), 211 .destroy = ip6_dst_destroy, 212 .check = ip6_dst_check, 213 .mtu = ip6_blackhole_mtu, 214 .default_advmss = ip6_default_advmss, 215 .update_pmtu = ip6_rt_blackhole_update_pmtu, 216 .redirect = ip6_rt_blackhole_redirect, 217 .cow_metrics = ip6_rt_blackhole_cow_metrics, 218 .neigh_lookup = ip6_neigh_lookup, 219 }; 220 221 static const u32 ip6_template_metrics[RTAX_MAX] = { 222 [RTAX_HOPLIMIT - 1] = 0, 223 }; 224 225 static const struct rt6_info ip6_null_entry_template = { 226 .dst = { 227 .__refcnt = ATOMIC_INIT(1), 228 .__use = 1, 229 .obsolete = DST_OBSOLETE_FORCE_CHK, 230 .error = -ENETUNREACH, 231 .input = ip6_pkt_discard, 232 .output = ip6_pkt_discard_out, 233 }, 234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 235 .rt6i_protocol = RTPROT_KERNEL, 236 .rt6i_metric = ~(u32) 0, 237 .rt6i_ref = ATOMIC_INIT(1), 238 }; 239 240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 241 242 static int ip6_pkt_prohibit(struct sk_buff *skb); 243 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 244 245 static const struct rt6_info ip6_prohibit_entry_template = { 246 .dst = { 247 .__refcnt = ATOMIC_INIT(1), 248 .__use = 1, 249 .obsolete = DST_OBSOLETE_FORCE_CHK, 250 .error = -EACCES, 251 .input = ip6_pkt_prohibit, 252 .output = ip6_pkt_prohibit_out, 253 }, 254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 255 .rt6i_protocol = RTPROT_KERNEL, 256 .rt6i_metric = ~(u32) 0, 257 .rt6i_ref = ATOMIC_INIT(1), 258 }; 259 260 static const struct rt6_info ip6_blk_hole_entry_template = { 261 .dst = { 262 .__refcnt = ATOMIC_INIT(1), 263 .__use = 1, 264 .obsolete = DST_OBSOLETE_FORCE_CHK, 265 .error = -EINVAL, 266 .input = dst_discard, 267 .output = dst_discard, 268 }, 269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 270 .rt6i_protocol = RTPROT_KERNEL, 271 .rt6i_metric = ~(u32) 0, 272 .rt6i_ref = ATOMIC_INIT(1), 273 }; 274 275 #endif 276 277 /* allocate dst with ip6_dst_ops */ 278 static inline struct rt6_info *ip6_dst_alloc(struct net *net, 279 struct net_device *dev, 280 int flags, 281 struct fib6_table *table) 282 { 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 284 0, DST_OBSOLETE_FORCE_CHK, flags); 285 286 if (rt) { 287 struct dst_entry *dst = &rt->dst; 288 289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 291 rt->rt6i_genid = rt_genid(net); 292 } 293 return rt; 294 } 295 296 static void ip6_dst_destroy(struct dst_entry *dst) 297 { 298 struct rt6_info *rt = (struct rt6_info *)dst; 299 struct inet6_dev *idev = rt->rt6i_idev; 300 301 if (rt->n) 302 neigh_release(rt->n); 303 304 if (!(rt->dst.flags & DST_HOST)) 305 dst_destroy_metrics_generic(dst); 306 307 if (idev) { 308 rt->rt6i_idev = NULL; 309 in6_dev_put(idev); 310 } 311 312 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) 313 dst_release(dst->from); 314 315 if (rt6_has_peer(rt)) { 316 struct inet_peer *peer = rt6_peer_ptr(rt); 317 inet_putpeer(peer); 318 } 319 } 320 321 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); 322 323 static u32 rt6_peer_genid(void) 324 { 325 return atomic_read(&__rt6_peer_genid); 326 } 327 328 void rt6_bind_peer(struct rt6_info *rt, int create) 329 { 330 struct inet_peer_base *base; 331 struct inet_peer *peer; 332 333 base = inetpeer_base_ptr(rt->_rt6i_peer); 334 if (!base) 335 return; 336 337 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); 338 if (peer) { 339 if (!rt6_set_peer(rt, peer)) 340 inet_putpeer(peer); 341 else 342 rt->rt6i_peer_genid = rt6_peer_genid(); 343 } 344 } 345 346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 347 int how) 348 { 349 struct rt6_info *rt = (struct rt6_info *)dst; 350 struct inet6_dev *idev = rt->rt6i_idev; 351 struct net_device *loopback_dev = 352 dev_net(dev)->loopback_dev; 353 354 if (dev != loopback_dev) { 355 if (idev && idev->dev == dev) { 356 struct inet6_dev *loopback_idev = 357 in6_dev_get(loopback_dev); 358 if (loopback_idev) { 359 rt->rt6i_idev = loopback_idev; 360 in6_dev_put(idev); 361 } 362 } 363 if (rt->n && rt->n->dev == dev) { 364 rt->n->dev = loopback_dev; 365 dev_hold(loopback_dev); 366 dev_put(dev); 367 } 368 } 369 } 370 371 static bool rt6_check_expired(const struct rt6_info *rt) 372 { 373 if (rt->rt6i_flags & RTF_EXPIRES) { 374 if (time_after(jiffies, rt->dst.expires)) 375 return true; 376 } else if (rt->dst.from) { 377 return rt6_check_expired((struct rt6_info *) rt->dst.from); 378 } 379 return false; 380 } 381 382 static bool rt6_need_strict(const struct in6_addr *daddr) 383 { 384 return ipv6_addr_type(daddr) & 385 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 386 } 387 388 /* 389 * Route lookup. Any table->tb6_lock is implied. 390 */ 391 392 static inline struct rt6_info *rt6_device_match(struct net *net, 393 struct rt6_info *rt, 394 const struct in6_addr *saddr, 395 int oif, 396 int flags) 397 { 398 struct rt6_info *local = NULL; 399 struct rt6_info *sprt; 400 401 if (!oif && ipv6_addr_any(saddr)) 402 goto out; 403 404 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 405 struct net_device *dev = sprt->dst.dev; 406 407 if (oif) { 408 if (dev->ifindex == oif) 409 return sprt; 410 if (dev->flags & IFF_LOOPBACK) { 411 if (!sprt->rt6i_idev || 412 sprt->rt6i_idev->dev->ifindex != oif) { 413 if (flags & RT6_LOOKUP_F_IFACE && oif) 414 continue; 415 if (local && (!oif || 416 local->rt6i_idev->dev->ifindex == oif)) 417 continue; 418 } 419 local = sprt; 420 } 421 } else { 422 if (ipv6_chk_addr(net, saddr, dev, 423 flags & RT6_LOOKUP_F_IFACE)) 424 return sprt; 425 } 426 } 427 428 if (oif) { 429 if (local) 430 return local; 431 432 if (flags & RT6_LOOKUP_F_IFACE) 433 return net->ipv6.ip6_null_entry; 434 } 435 out: 436 return rt; 437 } 438 439 #ifdef CONFIG_IPV6_ROUTER_PREF 440 static void rt6_probe(struct rt6_info *rt) 441 { 442 struct neighbour *neigh; 443 /* 444 * Okay, this does not seem to be appropriate 445 * for now, however, we need to check if it 446 * is really so; aka Router Reachability Probing. 447 * 448 * Router Reachability Probe MUST be rate-limited 449 * to no more than one per minute. 450 */ 451 neigh = rt ? rt->n : NULL; 452 if (!neigh || (neigh->nud_state & NUD_VALID)) 453 return; 454 read_lock_bh(&neigh->lock); 455 if (!(neigh->nud_state & NUD_VALID) && 456 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 457 struct in6_addr mcaddr; 458 struct in6_addr *target; 459 460 neigh->updated = jiffies; 461 read_unlock_bh(&neigh->lock); 462 463 target = (struct in6_addr *)&neigh->primary_key; 464 addrconf_addr_solict_mult(target, &mcaddr); 465 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); 466 } else { 467 read_unlock_bh(&neigh->lock); 468 } 469 } 470 #else 471 static inline void rt6_probe(struct rt6_info *rt) 472 { 473 } 474 #endif 475 476 /* 477 * Default Router Selection (RFC 2461 6.3.6) 478 */ 479 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 480 { 481 struct net_device *dev = rt->dst.dev; 482 if (!oif || dev->ifindex == oif) 483 return 2; 484 if ((dev->flags & IFF_LOOPBACK) && 485 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 486 return 1; 487 return 0; 488 } 489 490 static inline int rt6_check_neigh(struct rt6_info *rt) 491 { 492 struct neighbour *neigh; 493 int m; 494 495 neigh = rt->n; 496 if (rt->rt6i_flags & RTF_NONEXTHOP || 497 !(rt->rt6i_flags & RTF_GATEWAY)) 498 m = 1; 499 else if (neigh) { 500 read_lock_bh(&neigh->lock); 501 if (neigh->nud_state & NUD_VALID) 502 m = 2; 503 #ifdef CONFIG_IPV6_ROUTER_PREF 504 else if (neigh->nud_state & NUD_FAILED) 505 m = 0; 506 #endif 507 else 508 m = 1; 509 read_unlock_bh(&neigh->lock); 510 } else 511 m = 0; 512 return m; 513 } 514 515 static int rt6_score_route(struct rt6_info *rt, int oif, 516 int strict) 517 { 518 int m, n; 519 520 m = rt6_check_dev(rt, oif); 521 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 522 return -1; 523 #ifdef CONFIG_IPV6_ROUTER_PREF 524 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 525 #endif 526 n = rt6_check_neigh(rt); 527 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 528 return -1; 529 return m; 530 } 531 532 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 533 int *mpri, struct rt6_info *match) 534 { 535 int m; 536 537 if (rt6_check_expired(rt)) 538 goto out; 539 540 m = rt6_score_route(rt, oif, strict); 541 if (m < 0) 542 goto out; 543 544 if (m > *mpri) { 545 if (strict & RT6_LOOKUP_F_REACHABLE) 546 rt6_probe(match); 547 *mpri = m; 548 match = rt; 549 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 550 rt6_probe(rt); 551 } 552 553 out: 554 return match; 555 } 556 557 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 558 struct rt6_info *rr_head, 559 u32 metric, int oif, int strict) 560 { 561 struct rt6_info *rt, *match; 562 int mpri = -1; 563 564 match = NULL; 565 for (rt = rr_head; rt && rt->rt6i_metric == metric; 566 rt = rt->dst.rt6_next) 567 match = find_match(rt, oif, strict, &mpri, match); 568 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 569 rt = rt->dst.rt6_next) 570 match = find_match(rt, oif, strict, &mpri, match); 571 572 return match; 573 } 574 575 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 576 { 577 struct rt6_info *match, *rt0; 578 struct net *net; 579 580 rt0 = fn->rr_ptr; 581 if (!rt0) 582 fn->rr_ptr = rt0 = fn->leaf; 583 584 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 585 586 if (!match && 587 (strict & RT6_LOOKUP_F_REACHABLE)) { 588 struct rt6_info *next = rt0->dst.rt6_next; 589 590 /* no entries matched; do round-robin */ 591 if (!next || next->rt6i_metric != rt0->rt6i_metric) 592 next = fn->leaf; 593 594 if (next != rt0) 595 fn->rr_ptr = next; 596 } 597 598 net = dev_net(rt0->dst.dev); 599 return match ? match : net->ipv6.ip6_null_entry; 600 } 601 602 #ifdef CONFIG_IPV6_ROUTE_INFO 603 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 604 const struct in6_addr *gwaddr) 605 { 606 struct net *net = dev_net(dev); 607 struct route_info *rinfo = (struct route_info *) opt; 608 struct in6_addr prefix_buf, *prefix; 609 unsigned int pref; 610 unsigned long lifetime; 611 struct rt6_info *rt; 612 613 if (len < sizeof(struct route_info)) { 614 return -EINVAL; 615 } 616 617 /* Sanity check for prefix_len and length */ 618 if (rinfo->length > 3) { 619 return -EINVAL; 620 } else if (rinfo->prefix_len > 128) { 621 return -EINVAL; 622 } else if (rinfo->prefix_len > 64) { 623 if (rinfo->length < 2) { 624 return -EINVAL; 625 } 626 } else if (rinfo->prefix_len > 0) { 627 if (rinfo->length < 1) { 628 return -EINVAL; 629 } 630 } 631 632 pref = rinfo->route_pref; 633 if (pref == ICMPV6_ROUTER_PREF_INVALID) 634 return -EINVAL; 635 636 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 637 638 if (rinfo->length == 3) 639 prefix = (struct in6_addr *)rinfo->prefix; 640 else { 641 /* this function is safe */ 642 ipv6_addr_prefix(&prefix_buf, 643 (struct in6_addr *)rinfo->prefix, 644 rinfo->prefix_len); 645 prefix = &prefix_buf; 646 } 647 648 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 649 dev->ifindex); 650 651 if (rt && !lifetime) { 652 ip6_del_rt(rt); 653 rt = NULL; 654 } 655 656 if (!rt && lifetime) 657 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 658 pref); 659 else if (rt) 660 rt->rt6i_flags = RTF_ROUTEINFO | 661 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 662 663 if (rt) { 664 if (!addrconf_finite_timeout(lifetime)) 665 rt6_clean_expires(rt); 666 else 667 rt6_set_expires(rt, jiffies + HZ * lifetime); 668 669 dst_release(&rt->dst); 670 } 671 return 0; 672 } 673 #endif 674 675 #define BACKTRACK(__net, saddr) \ 676 do { \ 677 if (rt == __net->ipv6.ip6_null_entry) { \ 678 struct fib6_node *pn; \ 679 while (1) { \ 680 if (fn->fn_flags & RTN_TL_ROOT) \ 681 goto out; \ 682 pn = fn->parent; \ 683 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 684 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 685 else \ 686 fn = pn; \ 687 if (fn->fn_flags & RTN_RTINFO) \ 688 goto restart; \ 689 } \ 690 } \ 691 } while (0) 692 693 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 694 struct fib6_table *table, 695 struct flowi6 *fl6, int flags) 696 { 697 struct fib6_node *fn; 698 struct rt6_info *rt; 699 700 read_lock_bh(&table->tb6_lock); 701 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 702 restart: 703 rt = fn->leaf; 704 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 705 BACKTRACK(net, &fl6->saddr); 706 out: 707 dst_use(&rt->dst, jiffies); 708 read_unlock_bh(&table->tb6_lock); 709 return rt; 710 711 } 712 713 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6, 714 int flags) 715 { 716 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 717 } 718 EXPORT_SYMBOL_GPL(ip6_route_lookup); 719 720 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 721 const struct in6_addr *saddr, int oif, int strict) 722 { 723 struct flowi6 fl6 = { 724 .flowi6_oif = oif, 725 .daddr = *daddr, 726 }; 727 struct dst_entry *dst; 728 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 729 730 if (saddr) { 731 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 732 flags |= RT6_LOOKUP_F_HAS_SADDR; 733 } 734 735 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 736 if (dst->error == 0) 737 return (struct rt6_info *) dst; 738 739 dst_release(dst); 740 741 return NULL; 742 } 743 744 EXPORT_SYMBOL(rt6_lookup); 745 746 /* ip6_ins_rt is called with FREE table->tb6_lock. 747 It takes new route entry, the addition fails by any reason the 748 route is freed. In any case, if caller does not hold it, it may 749 be destroyed. 750 */ 751 752 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 753 { 754 int err; 755 struct fib6_table *table; 756 757 table = rt->rt6i_table; 758 write_lock_bh(&table->tb6_lock); 759 err = fib6_add(&table->tb6_root, rt, info); 760 write_unlock_bh(&table->tb6_lock); 761 762 return err; 763 } 764 765 int ip6_ins_rt(struct rt6_info *rt) 766 { 767 struct nl_info info = { 768 .nl_net = dev_net(rt->dst.dev), 769 }; 770 return __ip6_ins_rt(rt, &info); 771 } 772 773 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, 774 const struct in6_addr *daddr, 775 const struct in6_addr *saddr) 776 { 777 struct rt6_info *rt; 778 779 /* 780 * Clone the route. 781 */ 782 783 rt = ip6_rt_copy(ort, daddr); 784 785 if (rt) { 786 int attempts = !in_softirq(); 787 788 if (!(rt->rt6i_flags & RTF_GATEWAY)) { 789 if (ort->rt6i_dst.plen != 128 && 790 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 791 rt->rt6i_flags |= RTF_ANYCAST; 792 rt->rt6i_gateway = *daddr; 793 } 794 795 rt->rt6i_flags |= RTF_CACHE; 796 797 #ifdef CONFIG_IPV6_SUBTREES 798 if (rt->rt6i_src.plen && saddr) { 799 rt->rt6i_src.addr = *saddr; 800 rt->rt6i_src.plen = 128; 801 } 802 #endif 803 804 retry: 805 if (rt6_bind_neighbour(rt, rt->dst.dev)) { 806 struct net *net = dev_net(rt->dst.dev); 807 int saved_rt_min_interval = 808 net->ipv6.sysctl.ip6_rt_gc_min_interval; 809 int saved_rt_elasticity = 810 net->ipv6.sysctl.ip6_rt_gc_elasticity; 811 812 if (attempts-- > 0) { 813 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 814 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 815 816 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 817 818 net->ipv6.sysctl.ip6_rt_gc_elasticity = 819 saved_rt_elasticity; 820 net->ipv6.sysctl.ip6_rt_gc_min_interval = 821 saved_rt_min_interval; 822 goto retry; 823 } 824 825 net_warn_ratelimited("Neighbour table overflow\n"); 826 dst_free(&rt->dst); 827 return NULL; 828 } 829 } 830 831 return rt; 832 } 833 834 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, 835 const struct in6_addr *daddr) 836 { 837 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 838 839 if (rt) { 840 rt->rt6i_flags |= RTF_CACHE; 841 rt->n = neigh_clone(ort->n); 842 } 843 return rt; 844 } 845 846 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 847 struct flowi6 *fl6, int flags) 848 { 849 struct fib6_node *fn; 850 struct rt6_info *rt, *nrt; 851 int strict = 0; 852 int attempts = 3; 853 int err; 854 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 855 856 strict |= flags & RT6_LOOKUP_F_IFACE; 857 858 relookup: 859 read_lock_bh(&table->tb6_lock); 860 861 restart_2: 862 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 863 864 restart: 865 rt = rt6_select(fn, oif, strict | reachable); 866 867 BACKTRACK(net, &fl6->saddr); 868 if (rt == net->ipv6.ip6_null_entry || 869 rt->rt6i_flags & RTF_CACHE) 870 goto out; 871 872 dst_hold(&rt->dst); 873 read_unlock_bh(&table->tb6_lock); 874 875 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) 876 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 877 else if (!(rt->dst.flags & DST_HOST)) 878 nrt = rt6_alloc_clone(rt, &fl6->daddr); 879 else 880 goto out2; 881 882 dst_release(&rt->dst); 883 rt = nrt ? : net->ipv6.ip6_null_entry; 884 885 dst_hold(&rt->dst); 886 if (nrt) { 887 err = ip6_ins_rt(nrt); 888 if (!err) 889 goto out2; 890 } 891 892 if (--attempts <= 0) 893 goto out2; 894 895 /* 896 * Race condition! In the gap, when table->tb6_lock was 897 * released someone could insert this route. Relookup. 898 */ 899 dst_release(&rt->dst); 900 goto relookup; 901 902 out: 903 if (reachable) { 904 reachable = 0; 905 goto restart_2; 906 } 907 dst_hold(&rt->dst); 908 read_unlock_bh(&table->tb6_lock); 909 out2: 910 rt->dst.lastuse = jiffies; 911 rt->dst.__use++; 912 913 return rt; 914 } 915 916 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 917 struct flowi6 *fl6, int flags) 918 { 919 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 920 } 921 922 static struct dst_entry *ip6_route_input_lookup(struct net *net, 923 struct net_device *dev, 924 struct flowi6 *fl6, int flags) 925 { 926 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 927 flags |= RT6_LOOKUP_F_IFACE; 928 929 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 930 } 931 932 void ip6_route_input(struct sk_buff *skb) 933 { 934 const struct ipv6hdr *iph = ipv6_hdr(skb); 935 struct net *net = dev_net(skb->dev); 936 int flags = RT6_LOOKUP_F_HAS_SADDR; 937 struct flowi6 fl6 = { 938 .flowi6_iif = skb->dev->ifindex, 939 .daddr = iph->daddr, 940 .saddr = iph->saddr, 941 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, 942 .flowi6_mark = skb->mark, 943 .flowi6_proto = iph->nexthdr, 944 }; 945 946 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 947 } 948 949 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 950 struct flowi6 *fl6, int flags) 951 { 952 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 953 } 954 955 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, 956 struct flowi6 *fl6) 957 { 958 int flags = 0; 959 960 fl6->flowi6_iif = LOOPBACK_IFINDEX; 961 962 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 963 flags |= RT6_LOOKUP_F_IFACE; 964 965 if (!ipv6_addr_any(&fl6->saddr)) 966 flags |= RT6_LOOKUP_F_HAS_SADDR; 967 else if (sk) 968 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 969 970 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 971 } 972 973 EXPORT_SYMBOL(ip6_route_output); 974 975 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 976 { 977 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 978 struct dst_entry *new = NULL; 979 980 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 981 if (rt) { 982 new = &rt->dst; 983 984 memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); 985 rt6_init_peer(rt, net->ipv6.peers); 986 987 new->__use = 1; 988 new->input = dst_discard; 989 new->output = dst_discard; 990 991 if (dst_metrics_read_only(&ort->dst)) 992 new->_metrics = ort->dst._metrics; 993 else 994 dst_copy_metrics(new, &ort->dst); 995 rt->rt6i_idev = ort->rt6i_idev; 996 if (rt->rt6i_idev) 997 in6_dev_hold(rt->rt6i_idev); 998 999 rt->rt6i_gateway = ort->rt6i_gateway; 1000 rt->rt6i_flags = ort->rt6i_flags; 1001 rt6_clean_expires(rt); 1002 rt->rt6i_metric = 0; 1003 1004 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1005 #ifdef CONFIG_IPV6_SUBTREES 1006 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1007 #endif 1008 1009 dst_free(new); 1010 } 1011 1012 dst_release(dst_orig); 1013 return new ? new : ERR_PTR(-ENOMEM); 1014 } 1015 1016 /* 1017 * Destination cache support functions 1018 */ 1019 1020 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1021 { 1022 struct rt6_info *rt; 1023 1024 rt = (struct rt6_info *) dst; 1025 1026 /* All IPV6 dsts are created with ->obsolete set to the value 1027 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1028 * into this function always. 1029 */ 1030 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) 1031 return NULL; 1032 1033 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 1034 if (rt->rt6i_peer_genid != rt6_peer_genid()) { 1035 if (!rt6_has_peer(rt)) 1036 rt6_bind_peer(rt, 0); 1037 rt->rt6i_peer_genid = rt6_peer_genid(); 1038 } 1039 return dst; 1040 } 1041 return NULL; 1042 } 1043 1044 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1045 { 1046 struct rt6_info *rt = (struct rt6_info *) dst; 1047 1048 if (rt) { 1049 if (rt->rt6i_flags & RTF_CACHE) { 1050 if (rt6_check_expired(rt)) { 1051 ip6_del_rt(rt); 1052 dst = NULL; 1053 } 1054 } else { 1055 dst_release(dst); 1056 dst = NULL; 1057 } 1058 } 1059 return dst; 1060 } 1061 1062 static void ip6_link_failure(struct sk_buff *skb) 1063 { 1064 struct rt6_info *rt; 1065 1066 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1067 1068 rt = (struct rt6_info *) skb_dst(skb); 1069 if (rt) { 1070 if (rt->rt6i_flags & RTF_CACHE) 1071 rt6_update_expires(rt, 0); 1072 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 1073 rt->rt6i_node->fn_sernum = -1; 1074 } 1075 } 1076 1077 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1078 struct sk_buff *skb, u32 mtu) 1079 { 1080 struct rt6_info *rt6 = (struct rt6_info*)dst; 1081 1082 dst_confirm(dst); 1083 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1084 struct net *net = dev_net(dst->dev); 1085 1086 rt6->rt6i_flags |= RTF_MODIFIED; 1087 if (mtu < IPV6_MIN_MTU) { 1088 u32 features = dst_metric(dst, RTAX_FEATURES); 1089 mtu = IPV6_MIN_MTU; 1090 features |= RTAX_FEATURE_ALLFRAG; 1091 dst_metric_set(dst, RTAX_FEATURES, features); 1092 } 1093 dst_metric_set(dst, RTAX_MTU, mtu); 1094 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); 1095 } 1096 } 1097 1098 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1099 int oif, u32 mark) 1100 { 1101 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1102 struct dst_entry *dst; 1103 struct flowi6 fl6; 1104 1105 memset(&fl6, 0, sizeof(fl6)); 1106 fl6.flowi6_oif = oif; 1107 fl6.flowi6_mark = mark; 1108 fl6.flowi6_flags = 0; 1109 fl6.daddr = iph->daddr; 1110 fl6.saddr = iph->saddr; 1111 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; 1112 1113 dst = ip6_route_output(net, NULL, &fl6); 1114 if (!dst->error) 1115 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); 1116 dst_release(dst); 1117 } 1118 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1119 1120 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1121 { 1122 ip6_update_pmtu(skb, sock_net(sk), mtu, 1123 sk->sk_bound_dev_if, sk->sk_mark); 1124 } 1125 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1126 1127 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1128 { 1129 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1130 struct dst_entry *dst; 1131 struct flowi6 fl6; 1132 1133 memset(&fl6, 0, sizeof(fl6)); 1134 fl6.flowi6_oif = oif; 1135 fl6.flowi6_mark = mark; 1136 fl6.flowi6_flags = 0; 1137 fl6.daddr = iph->daddr; 1138 fl6.saddr = iph->saddr; 1139 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; 1140 1141 dst = ip6_route_output(net, NULL, &fl6); 1142 if (!dst->error) 1143 rt6_do_redirect(dst, NULL, skb); 1144 dst_release(dst); 1145 } 1146 EXPORT_SYMBOL_GPL(ip6_redirect); 1147 1148 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1149 { 1150 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1151 } 1152 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1153 1154 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1155 { 1156 struct net_device *dev = dst->dev; 1157 unsigned int mtu = dst_mtu(dst); 1158 struct net *net = dev_net(dev); 1159 1160 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1161 1162 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1163 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1164 1165 /* 1166 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1167 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1168 * IPV6_MAXPLEN is also valid and means: "any MSS, 1169 * rely only on pmtu discovery" 1170 */ 1171 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1172 mtu = IPV6_MAXPLEN; 1173 return mtu; 1174 } 1175 1176 static unsigned int ip6_mtu(const struct dst_entry *dst) 1177 { 1178 struct inet6_dev *idev; 1179 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 1180 1181 if (mtu) 1182 return mtu; 1183 1184 mtu = IPV6_MIN_MTU; 1185 1186 rcu_read_lock(); 1187 idev = __in6_dev_get(dst->dev); 1188 if (idev) 1189 mtu = idev->cnf.mtu6; 1190 rcu_read_unlock(); 1191 1192 return mtu; 1193 } 1194 1195 static struct dst_entry *icmp6_dst_gc_list; 1196 static DEFINE_SPINLOCK(icmp6_dst_lock); 1197 1198 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1199 struct neighbour *neigh, 1200 struct flowi6 *fl6) 1201 { 1202 struct dst_entry *dst; 1203 struct rt6_info *rt; 1204 struct inet6_dev *idev = in6_dev_get(dev); 1205 struct net *net = dev_net(dev); 1206 1207 if (unlikely(!idev)) 1208 return ERR_PTR(-ENODEV); 1209 1210 rt = ip6_dst_alloc(net, dev, 0, NULL); 1211 if (unlikely(!rt)) { 1212 in6_dev_put(idev); 1213 dst = ERR_PTR(-ENOMEM); 1214 goto out; 1215 } 1216 1217 if (neigh) 1218 neigh_hold(neigh); 1219 else { 1220 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); 1221 if (IS_ERR(neigh)) { 1222 in6_dev_put(idev); 1223 dst_free(&rt->dst); 1224 return ERR_CAST(neigh); 1225 } 1226 } 1227 1228 rt->dst.flags |= DST_HOST; 1229 rt->dst.output = ip6_output; 1230 rt->n = neigh; 1231 atomic_set(&rt->dst.__refcnt, 1); 1232 rt->rt6i_dst.addr = fl6->daddr; 1233 rt->rt6i_dst.plen = 128; 1234 rt->rt6i_idev = idev; 1235 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 1236 1237 spin_lock_bh(&icmp6_dst_lock); 1238 rt->dst.next = icmp6_dst_gc_list; 1239 icmp6_dst_gc_list = &rt->dst; 1240 spin_unlock_bh(&icmp6_dst_lock); 1241 1242 fib6_force_start_gc(net); 1243 1244 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1245 1246 out: 1247 return dst; 1248 } 1249 1250 int icmp6_dst_gc(void) 1251 { 1252 struct dst_entry *dst, **pprev; 1253 int more = 0; 1254 1255 spin_lock_bh(&icmp6_dst_lock); 1256 pprev = &icmp6_dst_gc_list; 1257 1258 while ((dst = *pprev) != NULL) { 1259 if (!atomic_read(&dst->__refcnt)) { 1260 *pprev = dst->next; 1261 dst_free(dst); 1262 } else { 1263 pprev = &dst->next; 1264 ++more; 1265 } 1266 } 1267 1268 spin_unlock_bh(&icmp6_dst_lock); 1269 1270 return more; 1271 } 1272 1273 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1274 void *arg) 1275 { 1276 struct dst_entry *dst, **pprev; 1277 1278 spin_lock_bh(&icmp6_dst_lock); 1279 pprev = &icmp6_dst_gc_list; 1280 while ((dst = *pprev) != NULL) { 1281 struct rt6_info *rt = (struct rt6_info *) dst; 1282 if (func(rt, arg)) { 1283 *pprev = dst->next; 1284 dst_free(dst); 1285 } else { 1286 pprev = &dst->next; 1287 } 1288 } 1289 spin_unlock_bh(&icmp6_dst_lock); 1290 } 1291 1292 static int ip6_dst_gc(struct dst_ops *ops) 1293 { 1294 unsigned long now = jiffies; 1295 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1296 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1297 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1298 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1299 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1300 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1301 int entries; 1302 1303 entries = dst_entries_get_fast(ops); 1304 if (time_after(rt_last_gc + rt_min_interval, now) && 1305 entries <= rt_max_size) 1306 goto out; 1307 1308 net->ipv6.ip6_rt_gc_expire++; 1309 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1310 net->ipv6.ip6_rt_last_gc = now; 1311 entries = dst_entries_get_slow(ops); 1312 if (entries < ops->gc_thresh) 1313 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1314 out: 1315 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1316 return entries > rt_max_size; 1317 } 1318 1319 /* Clean host part of a prefix. Not necessary in radix tree, 1320 but results in cleaner routing tables. 1321 1322 Remove it only when all the things will work! 1323 */ 1324 1325 int ip6_dst_hoplimit(struct dst_entry *dst) 1326 { 1327 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); 1328 if (hoplimit == 0) { 1329 struct net_device *dev = dst->dev; 1330 struct inet6_dev *idev; 1331 1332 rcu_read_lock(); 1333 idev = __in6_dev_get(dev); 1334 if (idev) 1335 hoplimit = idev->cnf.hop_limit; 1336 else 1337 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1338 rcu_read_unlock(); 1339 } 1340 return hoplimit; 1341 } 1342 EXPORT_SYMBOL(ip6_dst_hoplimit); 1343 1344 /* 1345 * 1346 */ 1347 1348 int ip6_route_add(struct fib6_config *cfg) 1349 { 1350 int err; 1351 struct net *net = cfg->fc_nlinfo.nl_net; 1352 struct rt6_info *rt = NULL; 1353 struct net_device *dev = NULL; 1354 struct inet6_dev *idev = NULL; 1355 struct fib6_table *table; 1356 int addr_type; 1357 1358 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1359 return -EINVAL; 1360 #ifndef CONFIG_IPV6_SUBTREES 1361 if (cfg->fc_src_len) 1362 return -EINVAL; 1363 #endif 1364 if (cfg->fc_ifindex) { 1365 err = -ENODEV; 1366 dev = dev_get_by_index(net, cfg->fc_ifindex); 1367 if (!dev) 1368 goto out; 1369 idev = in6_dev_get(dev); 1370 if (!idev) 1371 goto out; 1372 } 1373 1374 if (cfg->fc_metric == 0) 1375 cfg->fc_metric = IP6_RT_PRIO_USER; 1376 1377 err = -ENOBUFS; 1378 if (cfg->fc_nlinfo.nlh && 1379 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1380 table = fib6_get_table(net, cfg->fc_table); 1381 if (!table) { 1382 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1383 table = fib6_new_table(net, cfg->fc_table); 1384 } 1385 } else { 1386 table = fib6_new_table(net, cfg->fc_table); 1387 } 1388 1389 if (!table) 1390 goto out; 1391 1392 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); 1393 1394 if (!rt) { 1395 err = -ENOMEM; 1396 goto out; 1397 } 1398 1399 if (cfg->fc_flags & RTF_EXPIRES) 1400 rt6_set_expires(rt, jiffies + 1401 clock_t_to_jiffies(cfg->fc_expires)); 1402 else 1403 rt6_clean_expires(rt); 1404 1405 if (cfg->fc_protocol == RTPROT_UNSPEC) 1406 cfg->fc_protocol = RTPROT_BOOT; 1407 rt->rt6i_protocol = cfg->fc_protocol; 1408 1409 addr_type = ipv6_addr_type(&cfg->fc_dst); 1410 1411 if (addr_type & IPV6_ADDR_MULTICAST) 1412 rt->dst.input = ip6_mc_input; 1413 else if (cfg->fc_flags & RTF_LOCAL) 1414 rt->dst.input = ip6_input; 1415 else 1416 rt->dst.input = ip6_forward; 1417 1418 rt->dst.output = ip6_output; 1419 1420 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1421 rt->rt6i_dst.plen = cfg->fc_dst_len; 1422 if (rt->rt6i_dst.plen == 128) 1423 rt->dst.flags |= DST_HOST; 1424 1425 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { 1426 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1427 if (!metrics) { 1428 err = -ENOMEM; 1429 goto out; 1430 } 1431 dst_init_metrics(&rt->dst, metrics, 0); 1432 } 1433 #ifdef CONFIG_IPV6_SUBTREES 1434 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1435 rt->rt6i_src.plen = cfg->fc_src_len; 1436 #endif 1437 1438 rt->rt6i_metric = cfg->fc_metric; 1439 1440 /* We cannot add true routes via loopback here, 1441 they would result in kernel looping; promote them to reject routes 1442 */ 1443 if ((cfg->fc_flags & RTF_REJECT) || 1444 (dev && (dev->flags & IFF_LOOPBACK) && 1445 !(addr_type & IPV6_ADDR_LOOPBACK) && 1446 !(cfg->fc_flags & RTF_LOCAL))) { 1447 /* hold loopback dev/idev if we haven't done so. */ 1448 if (dev != net->loopback_dev) { 1449 if (dev) { 1450 dev_put(dev); 1451 in6_dev_put(idev); 1452 } 1453 dev = net->loopback_dev; 1454 dev_hold(dev); 1455 idev = in6_dev_get(dev); 1456 if (!idev) { 1457 err = -ENODEV; 1458 goto out; 1459 } 1460 } 1461 rt->dst.output = ip6_pkt_discard_out; 1462 rt->dst.input = ip6_pkt_discard; 1463 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1464 switch (cfg->fc_type) { 1465 case RTN_BLACKHOLE: 1466 rt->dst.error = -EINVAL; 1467 break; 1468 case RTN_PROHIBIT: 1469 rt->dst.error = -EACCES; 1470 break; 1471 case RTN_THROW: 1472 rt->dst.error = -EAGAIN; 1473 break; 1474 default: 1475 rt->dst.error = -ENETUNREACH; 1476 break; 1477 } 1478 goto install_route; 1479 } 1480 1481 if (cfg->fc_flags & RTF_GATEWAY) { 1482 const struct in6_addr *gw_addr; 1483 int gwa_type; 1484 1485 gw_addr = &cfg->fc_gateway; 1486 rt->rt6i_gateway = *gw_addr; 1487 gwa_type = ipv6_addr_type(gw_addr); 1488 1489 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1490 struct rt6_info *grt; 1491 1492 /* IPv6 strictly inhibits using not link-local 1493 addresses as nexthop address. 1494 Otherwise, router will not able to send redirects. 1495 It is very good, but in some (rare!) circumstances 1496 (SIT, PtP, NBMA NOARP links) it is handy to allow 1497 some exceptions. --ANK 1498 */ 1499 err = -EINVAL; 1500 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1501 goto out; 1502 1503 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1504 1505 err = -EHOSTUNREACH; 1506 if (!grt) 1507 goto out; 1508 if (dev) { 1509 if (dev != grt->dst.dev) { 1510 dst_release(&grt->dst); 1511 goto out; 1512 } 1513 } else { 1514 dev = grt->dst.dev; 1515 idev = grt->rt6i_idev; 1516 dev_hold(dev); 1517 in6_dev_hold(grt->rt6i_idev); 1518 } 1519 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1520 err = 0; 1521 dst_release(&grt->dst); 1522 1523 if (err) 1524 goto out; 1525 } 1526 err = -EINVAL; 1527 if (!dev || (dev->flags & IFF_LOOPBACK)) 1528 goto out; 1529 } 1530 1531 err = -ENODEV; 1532 if (!dev) 1533 goto out; 1534 1535 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1536 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1537 err = -EINVAL; 1538 goto out; 1539 } 1540 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1541 rt->rt6i_prefsrc.plen = 128; 1542 } else 1543 rt->rt6i_prefsrc.plen = 0; 1544 1545 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1546 err = rt6_bind_neighbour(rt, dev); 1547 if (err) 1548 goto out; 1549 } 1550 1551 rt->rt6i_flags = cfg->fc_flags; 1552 1553 install_route: 1554 if (cfg->fc_mx) { 1555 struct nlattr *nla; 1556 int remaining; 1557 1558 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1559 int type = nla_type(nla); 1560 1561 if (type) { 1562 if (type > RTAX_MAX) { 1563 err = -EINVAL; 1564 goto out; 1565 } 1566 1567 dst_metric_set(&rt->dst, type, nla_get_u32(nla)); 1568 } 1569 } 1570 } 1571 1572 rt->dst.dev = dev; 1573 rt->rt6i_idev = idev; 1574 rt->rt6i_table = table; 1575 1576 cfg->fc_nlinfo.nl_net = dev_net(dev); 1577 1578 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1579 1580 out: 1581 if (dev) 1582 dev_put(dev); 1583 if (idev) 1584 in6_dev_put(idev); 1585 if (rt) 1586 dst_free(&rt->dst); 1587 return err; 1588 } 1589 1590 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1591 { 1592 int err; 1593 struct fib6_table *table; 1594 struct net *net = dev_net(rt->dst.dev); 1595 1596 if (rt == net->ipv6.ip6_null_entry) { 1597 err = -ENOENT; 1598 goto out; 1599 } 1600 1601 table = rt->rt6i_table; 1602 write_lock_bh(&table->tb6_lock); 1603 err = fib6_del(rt, info); 1604 write_unlock_bh(&table->tb6_lock); 1605 1606 out: 1607 dst_release(&rt->dst); 1608 return err; 1609 } 1610 1611 int ip6_del_rt(struct rt6_info *rt) 1612 { 1613 struct nl_info info = { 1614 .nl_net = dev_net(rt->dst.dev), 1615 }; 1616 return __ip6_del_rt(rt, &info); 1617 } 1618 1619 static int ip6_route_del(struct fib6_config *cfg) 1620 { 1621 struct fib6_table *table; 1622 struct fib6_node *fn; 1623 struct rt6_info *rt; 1624 int err = -ESRCH; 1625 1626 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1627 if (!table) 1628 return err; 1629 1630 read_lock_bh(&table->tb6_lock); 1631 1632 fn = fib6_locate(&table->tb6_root, 1633 &cfg->fc_dst, cfg->fc_dst_len, 1634 &cfg->fc_src, cfg->fc_src_len); 1635 1636 if (fn) { 1637 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1638 if (cfg->fc_ifindex && 1639 (!rt->dst.dev || 1640 rt->dst.dev->ifindex != cfg->fc_ifindex)) 1641 continue; 1642 if (cfg->fc_flags & RTF_GATEWAY && 1643 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1644 continue; 1645 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1646 continue; 1647 dst_hold(&rt->dst); 1648 read_unlock_bh(&table->tb6_lock); 1649 1650 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1651 } 1652 } 1653 read_unlock_bh(&table->tb6_lock); 1654 1655 return err; 1656 } 1657 1658 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 1659 { 1660 struct net *net = dev_net(skb->dev); 1661 struct netevent_redirect netevent; 1662 struct rt6_info *rt, *nrt = NULL; 1663 const struct in6_addr *target; 1664 struct ndisc_options ndopts; 1665 const struct in6_addr *dest; 1666 struct neighbour *old_neigh; 1667 struct inet6_dev *in6_dev; 1668 struct neighbour *neigh; 1669 struct icmp6hdr *icmph; 1670 int optlen, on_link; 1671 u8 *lladdr; 1672 1673 optlen = skb->tail - skb->transport_header; 1674 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); 1675 1676 if (optlen < 0) { 1677 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 1678 return; 1679 } 1680 1681 icmph = icmp6_hdr(skb); 1682 target = (const struct in6_addr *) (icmph + 1); 1683 dest = target + 1; 1684 1685 if (ipv6_addr_is_multicast(dest)) { 1686 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 1687 return; 1688 } 1689 1690 on_link = 0; 1691 if (ipv6_addr_equal(dest, target)) { 1692 on_link = 1; 1693 } else if (ipv6_addr_type(target) != 1694 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 1695 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 1696 return; 1697 } 1698 1699 in6_dev = __in6_dev_get(skb->dev); 1700 if (!in6_dev) 1701 return; 1702 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 1703 return; 1704 1705 /* RFC2461 8.1: 1706 * The IP source address of the Redirect MUST be the same as the current 1707 * first-hop router for the specified ICMP Destination Address. 1708 */ 1709 1710 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { 1711 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 1712 return; 1713 } 1714 1715 lladdr = NULL; 1716 if (ndopts.nd_opts_tgt_lladdr) { 1717 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 1718 skb->dev); 1719 if (!lladdr) { 1720 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 1721 return; 1722 } 1723 } 1724 1725 rt = (struct rt6_info *) dst; 1726 if (rt == net->ipv6.ip6_null_entry) { 1727 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 1728 return; 1729 } 1730 1731 /* Redirect received -> path was valid. 1732 * Look, redirects are sent only in response to data packets, 1733 * so that this nexthop apparently is reachable. --ANK 1734 */ 1735 dst_confirm(&rt->dst); 1736 1737 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); 1738 if (!neigh) 1739 return; 1740 1741 /* Duplicate redirect: silently ignore. */ 1742 old_neigh = rt->n; 1743 if (neigh == old_neigh) 1744 goto out; 1745 1746 /* 1747 * We have finally decided to accept it. 1748 */ 1749 1750 neigh_update(neigh, lladdr, NUD_STALE, 1751 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1752 NEIGH_UPDATE_F_OVERRIDE| 1753 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1754 NEIGH_UPDATE_F_ISROUTER)) 1755 ); 1756 1757 nrt = ip6_rt_copy(rt, dest); 1758 if (!nrt) 1759 goto out; 1760 1761 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1762 if (on_link) 1763 nrt->rt6i_flags &= ~RTF_GATEWAY; 1764 1765 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 1766 nrt->n = neigh_clone(neigh); 1767 1768 if (ip6_ins_rt(nrt)) 1769 goto out; 1770 1771 netevent.old = &rt->dst; 1772 netevent.old_neigh = old_neigh; 1773 netevent.new = &nrt->dst; 1774 netevent.new_neigh = neigh; 1775 netevent.daddr = dest; 1776 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1777 1778 if (rt->rt6i_flags & RTF_CACHE) { 1779 rt = (struct rt6_info *) dst_clone(&rt->dst); 1780 ip6_del_rt(rt); 1781 } 1782 1783 out: 1784 neigh_release(neigh); 1785 } 1786 1787 /* 1788 * Misc support functions 1789 */ 1790 1791 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 1792 const struct in6_addr *dest) 1793 { 1794 struct net *net = dev_net(ort->dst.dev); 1795 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, 1796 ort->rt6i_table); 1797 1798 if (rt) { 1799 rt->dst.input = ort->dst.input; 1800 rt->dst.output = ort->dst.output; 1801 rt->dst.flags |= DST_HOST; 1802 1803 rt->rt6i_dst.addr = *dest; 1804 rt->rt6i_dst.plen = 128; 1805 dst_copy_metrics(&rt->dst, &ort->dst); 1806 rt->dst.error = ort->dst.error; 1807 rt->rt6i_idev = ort->rt6i_idev; 1808 if (rt->rt6i_idev) 1809 in6_dev_hold(rt->rt6i_idev); 1810 rt->dst.lastuse = jiffies; 1811 1812 rt->rt6i_gateway = ort->rt6i_gateway; 1813 rt->rt6i_flags = ort->rt6i_flags; 1814 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == 1815 (RTF_DEFAULT | RTF_ADDRCONF)) 1816 rt6_set_from(rt, ort); 1817 else 1818 rt6_clean_expires(rt); 1819 rt->rt6i_metric = 0; 1820 1821 #ifdef CONFIG_IPV6_SUBTREES 1822 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1823 #endif 1824 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); 1825 rt->rt6i_table = ort->rt6i_table; 1826 } 1827 return rt; 1828 } 1829 1830 #ifdef CONFIG_IPV6_ROUTE_INFO 1831 static struct rt6_info *rt6_get_route_info(struct net *net, 1832 const struct in6_addr *prefix, int prefixlen, 1833 const struct in6_addr *gwaddr, int ifindex) 1834 { 1835 struct fib6_node *fn; 1836 struct rt6_info *rt = NULL; 1837 struct fib6_table *table; 1838 1839 table = fib6_get_table(net, RT6_TABLE_INFO); 1840 if (!table) 1841 return NULL; 1842 1843 read_lock_bh(&table->tb6_lock); 1844 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1845 if (!fn) 1846 goto out; 1847 1848 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1849 if (rt->dst.dev->ifindex != ifindex) 1850 continue; 1851 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1852 continue; 1853 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1854 continue; 1855 dst_hold(&rt->dst); 1856 break; 1857 } 1858 out: 1859 read_unlock_bh(&table->tb6_lock); 1860 return rt; 1861 } 1862 1863 static struct rt6_info *rt6_add_route_info(struct net *net, 1864 const struct in6_addr *prefix, int prefixlen, 1865 const struct in6_addr *gwaddr, int ifindex, 1866 unsigned int pref) 1867 { 1868 struct fib6_config cfg = { 1869 .fc_table = RT6_TABLE_INFO, 1870 .fc_metric = IP6_RT_PRIO_USER, 1871 .fc_ifindex = ifindex, 1872 .fc_dst_len = prefixlen, 1873 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1874 RTF_UP | RTF_PREF(pref), 1875 .fc_nlinfo.portid = 0, 1876 .fc_nlinfo.nlh = NULL, 1877 .fc_nlinfo.nl_net = net, 1878 }; 1879 1880 cfg.fc_dst = *prefix; 1881 cfg.fc_gateway = *gwaddr; 1882 1883 /* We should treat it as a default route if prefix length is 0. */ 1884 if (!prefixlen) 1885 cfg.fc_flags |= RTF_DEFAULT; 1886 1887 ip6_route_add(&cfg); 1888 1889 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1890 } 1891 #endif 1892 1893 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 1894 { 1895 struct rt6_info *rt; 1896 struct fib6_table *table; 1897 1898 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1899 if (!table) 1900 return NULL; 1901 1902 read_lock_bh(&table->tb6_lock); 1903 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1904 if (dev == rt->dst.dev && 1905 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1906 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1907 break; 1908 } 1909 if (rt) 1910 dst_hold(&rt->dst); 1911 read_unlock_bh(&table->tb6_lock); 1912 return rt; 1913 } 1914 1915 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 1916 struct net_device *dev, 1917 unsigned int pref) 1918 { 1919 struct fib6_config cfg = { 1920 .fc_table = RT6_TABLE_DFLT, 1921 .fc_metric = IP6_RT_PRIO_USER, 1922 .fc_ifindex = dev->ifindex, 1923 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1924 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1925 .fc_nlinfo.portid = 0, 1926 .fc_nlinfo.nlh = NULL, 1927 .fc_nlinfo.nl_net = dev_net(dev), 1928 }; 1929 1930 cfg.fc_gateway = *gwaddr; 1931 1932 ip6_route_add(&cfg); 1933 1934 return rt6_get_dflt_router(gwaddr, dev); 1935 } 1936 1937 void rt6_purge_dflt_routers(struct net *net) 1938 { 1939 struct rt6_info *rt; 1940 struct fib6_table *table; 1941 1942 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1943 table = fib6_get_table(net, RT6_TABLE_DFLT); 1944 if (!table) 1945 return; 1946 1947 restart: 1948 read_lock_bh(&table->tb6_lock); 1949 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 1950 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1951 dst_hold(&rt->dst); 1952 read_unlock_bh(&table->tb6_lock); 1953 ip6_del_rt(rt); 1954 goto restart; 1955 } 1956 } 1957 read_unlock_bh(&table->tb6_lock); 1958 } 1959 1960 static void rtmsg_to_fib6_config(struct net *net, 1961 struct in6_rtmsg *rtmsg, 1962 struct fib6_config *cfg) 1963 { 1964 memset(cfg, 0, sizeof(*cfg)); 1965 1966 cfg->fc_table = RT6_TABLE_MAIN; 1967 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1968 cfg->fc_metric = rtmsg->rtmsg_metric; 1969 cfg->fc_expires = rtmsg->rtmsg_info; 1970 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1971 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1972 cfg->fc_flags = rtmsg->rtmsg_flags; 1973 1974 cfg->fc_nlinfo.nl_net = net; 1975 1976 cfg->fc_dst = rtmsg->rtmsg_dst; 1977 cfg->fc_src = rtmsg->rtmsg_src; 1978 cfg->fc_gateway = rtmsg->rtmsg_gateway; 1979 } 1980 1981 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1982 { 1983 struct fib6_config cfg; 1984 struct in6_rtmsg rtmsg; 1985 int err; 1986 1987 switch(cmd) { 1988 case SIOCADDRT: /* Add a route */ 1989 case SIOCDELRT: /* Delete a route */ 1990 if (!capable(CAP_NET_ADMIN)) 1991 return -EPERM; 1992 err = copy_from_user(&rtmsg, arg, 1993 sizeof(struct in6_rtmsg)); 1994 if (err) 1995 return -EFAULT; 1996 1997 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1998 1999 rtnl_lock(); 2000 switch (cmd) { 2001 case SIOCADDRT: 2002 err = ip6_route_add(&cfg); 2003 break; 2004 case SIOCDELRT: 2005 err = ip6_route_del(&cfg); 2006 break; 2007 default: 2008 err = -EINVAL; 2009 } 2010 rtnl_unlock(); 2011 2012 return err; 2013 } 2014 2015 return -EINVAL; 2016 } 2017 2018 /* 2019 * Drop the packet on the floor 2020 */ 2021 2022 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2023 { 2024 int type; 2025 struct dst_entry *dst = skb_dst(skb); 2026 switch (ipstats_mib_noroutes) { 2027 case IPSTATS_MIB_INNOROUTES: 2028 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2029 if (type == IPV6_ADDR_ANY) { 2030 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2031 IPSTATS_MIB_INADDRERRORS); 2032 break; 2033 } 2034 /* FALLTHROUGH */ 2035 case IPSTATS_MIB_OUTNOROUTES: 2036 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2037 ipstats_mib_noroutes); 2038 break; 2039 } 2040 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2041 kfree_skb(skb); 2042 return 0; 2043 } 2044 2045 static int ip6_pkt_discard(struct sk_buff *skb) 2046 { 2047 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2048 } 2049 2050 static int ip6_pkt_discard_out(struct sk_buff *skb) 2051 { 2052 skb->dev = skb_dst(skb)->dev; 2053 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2054 } 2055 2056 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2057 2058 static int ip6_pkt_prohibit(struct sk_buff *skb) 2059 { 2060 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2061 } 2062 2063 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 2064 { 2065 skb->dev = skb_dst(skb)->dev; 2066 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2067 } 2068 2069 #endif 2070 2071 /* 2072 * Allocate a dst for local (unicast / anycast) address. 2073 */ 2074 2075 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2076 const struct in6_addr *addr, 2077 bool anycast) 2078 { 2079 struct net *net = dev_net(idev->dev); 2080 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); 2081 int err; 2082 2083 if (!rt) { 2084 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); 2085 return ERR_PTR(-ENOMEM); 2086 } 2087 2088 in6_dev_hold(idev); 2089 2090 rt->dst.flags |= DST_HOST; 2091 rt->dst.input = ip6_input; 2092 rt->dst.output = ip6_output; 2093 rt->rt6i_idev = idev; 2094 2095 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2096 if (anycast) 2097 rt->rt6i_flags |= RTF_ANYCAST; 2098 else 2099 rt->rt6i_flags |= RTF_LOCAL; 2100 err = rt6_bind_neighbour(rt, rt->dst.dev); 2101 if (err) { 2102 dst_free(&rt->dst); 2103 return ERR_PTR(err); 2104 } 2105 2106 rt->rt6i_dst.addr = *addr; 2107 rt->rt6i_dst.plen = 128; 2108 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2109 2110 atomic_set(&rt->dst.__refcnt, 1); 2111 2112 return rt; 2113 } 2114 2115 int ip6_route_get_saddr(struct net *net, 2116 struct rt6_info *rt, 2117 const struct in6_addr *daddr, 2118 unsigned int prefs, 2119 struct in6_addr *saddr) 2120 { 2121 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); 2122 int err = 0; 2123 if (rt->rt6i_prefsrc.plen) 2124 *saddr = rt->rt6i_prefsrc.addr; 2125 else 2126 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2127 daddr, prefs, saddr); 2128 return err; 2129 } 2130 2131 /* remove deleted ip from prefsrc entries */ 2132 struct arg_dev_net_ip { 2133 struct net_device *dev; 2134 struct net *net; 2135 struct in6_addr *addr; 2136 }; 2137 2138 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2139 { 2140 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2141 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2142 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2143 2144 if (((void *)rt->dst.dev == dev || !dev) && 2145 rt != net->ipv6.ip6_null_entry && 2146 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2147 /* remove prefsrc entry */ 2148 rt->rt6i_prefsrc.plen = 0; 2149 } 2150 return 0; 2151 } 2152 2153 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2154 { 2155 struct net *net = dev_net(ifp->idev->dev); 2156 struct arg_dev_net_ip adni = { 2157 .dev = ifp->idev->dev, 2158 .net = net, 2159 .addr = &ifp->addr, 2160 }; 2161 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); 2162 } 2163 2164 struct arg_dev_net { 2165 struct net_device *dev; 2166 struct net *net; 2167 }; 2168 2169 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2170 { 2171 const struct arg_dev_net *adn = arg; 2172 const struct net_device *dev = adn->dev; 2173 2174 if ((rt->dst.dev == dev || !dev) && 2175 rt != adn->net->ipv6.ip6_null_entry) 2176 return -1; 2177 2178 return 0; 2179 } 2180 2181 void rt6_ifdown(struct net *net, struct net_device *dev) 2182 { 2183 struct arg_dev_net adn = { 2184 .dev = dev, 2185 .net = net, 2186 }; 2187 2188 fib6_clean_all(net, fib6_ifdown, 0, &adn); 2189 icmp6_clean_all(fib6_ifdown, &adn); 2190 } 2191 2192 struct rt6_mtu_change_arg { 2193 struct net_device *dev; 2194 unsigned int mtu; 2195 }; 2196 2197 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2198 { 2199 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2200 struct inet6_dev *idev; 2201 2202 /* In IPv6 pmtu discovery is not optional, 2203 so that RTAX_MTU lock cannot disable it. 2204 We still use this lock to block changes 2205 caused by addrconf/ndisc. 2206 */ 2207 2208 idev = __in6_dev_get(arg->dev); 2209 if (!idev) 2210 return 0; 2211 2212 /* For administrative MTU increase, there is no way to discover 2213 IPv6 PMTU increase, so PMTU increase should be updated here. 2214 Since RFC 1981 doesn't include administrative MTU increase 2215 update PMTU increase is a MUST. (i.e. jumbo frame) 2216 */ 2217 /* 2218 If new MTU is less than route PMTU, this new MTU will be the 2219 lowest MTU in the path, update the route PMTU to reflect PMTU 2220 decreases; if new MTU is greater than route PMTU, and the 2221 old MTU is the lowest MTU in the path, update the route PMTU 2222 to reflect the increase. In this case if the other nodes' MTU 2223 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2224 PMTU discouvery. 2225 */ 2226 if (rt->dst.dev == arg->dev && 2227 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2228 (dst_mtu(&rt->dst) >= arg->mtu || 2229 (dst_mtu(&rt->dst) < arg->mtu && 2230 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2231 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2232 } 2233 return 0; 2234 } 2235 2236 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2237 { 2238 struct rt6_mtu_change_arg arg = { 2239 .dev = dev, 2240 .mtu = mtu, 2241 }; 2242 2243 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2244 } 2245 2246 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2247 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2248 [RTA_OIF] = { .type = NLA_U32 }, 2249 [RTA_IIF] = { .type = NLA_U32 }, 2250 [RTA_PRIORITY] = { .type = NLA_U32 }, 2251 [RTA_METRICS] = { .type = NLA_NESTED }, 2252 }; 2253 2254 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2255 struct fib6_config *cfg) 2256 { 2257 struct rtmsg *rtm; 2258 struct nlattr *tb[RTA_MAX+1]; 2259 int err; 2260 2261 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2262 if (err < 0) 2263 goto errout; 2264 2265 err = -EINVAL; 2266 rtm = nlmsg_data(nlh); 2267 memset(cfg, 0, sizeof(*cfg)); 2268 2269 cfg->fc_table = rtm->rtm_table; 2270 cfg->fc_dst_len = rtm->rtm_dst_len; 2271 cfg->fc_src_len = rtm->rtm_src_len; 2272 cfg->fc_flags = RTF_UP; 2273 cfg->fc_protocol = rtm->rtm_protocol; 2274 cfg->fc_type = rtm->rtm_type; 2275 2276 if (rtm->rtm_type == RTN_UNREACHABLE || 2277 rtm->rtm_type == RTN_BLACKHOLE || 2278 rtm->rtm_type == RTN_PROHIBIT || 2279 rtm->rtm_type == RTN_THROW) 2280 cfg->fc_flags |= RTF_REJECT; 2281 2282 if (rtm->rtm_type == RTN_LOCAL) 2283 cfg->fc_flags |= RTF_LOCAL; 2284 2285 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2286 cfg->fc_nlinfo.nlh = nlh; 2287 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2288 2289 if (tb[RTA_GATEWAY]) { 2290 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2291 cfg->fc_flags |= RTF_GATEWAY; 2292 } 2293 2294 if (tb[RTA_DST]) { 2295 int plen = (rtm->rtm_dst_len + 7) >> 3; 2296 2297 if (nla_len(tb[RTA_DST]) < plen) 2298 goto errout; 2299 2300 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2301 } 2302 2303 if (tb[RTA_SRC]) { 2304 int plen = (rtm->rtm_src_len + 7) >> 3; 2305 2306 if (nla_len(tb[RTA_SRC]) < plen) 2307 goto errout; 2308 2309 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2310 } 2311 2312 if (tb[RTA_PREFSRC]) 2313 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); 2314 2315 if (tb[RTA_OIF]) 2316 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2317 2318 if (tb[RTA_PRIORITY]) 2319 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2320 2321 if (tb[RTA_METRICS]) { 2322 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2323 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2324 } 2325 2326 if (tb[RTA_TABLE]) 2327 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2328 2329 err = 0; 2330 errout: 2331 return err; 2332 } 2333 2334 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2335 { 2336 struct fib6_config cfg; 2337 int err; 2338 2339 err = rtm_to_fib6_config(skb, nlh, &cfg); 2340 if (err < 0) 2341 return err; 2342 2343 return ip6_route_del(&cfg); 2344 } 2345 2346 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2347 { 2348 struct fib6_config cfg; 2349 int err; 2350 2351 err = rtm_to_fib6_config(skb, nlh, &cfg); 2352 if (err < 0) 2353 return err; 2354 2355 return ip6_route_add(&cfg); 2356 } 2357 2358 static inline size_t rt6_nlmsg_size(void) 2359 { 2360 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2361 + nla_total_size(16) /* RTA_SRC */ 2362 + nla_total_size(16) /* RTA_DST */ 2363 + nla_total_size(16) /* RTA_GATEWAY */ 2364 + nla_total_size(16) /* RTA_PREFSRC */ 2365 + nla_total_size(4) /* RTA_TABLE */ 2366 + nla_total_size(4) /* RTA_IIF */ 2367 + nla_total_size(4) /* RTA_OIF */ 2368 + nla_total_size(4) /* RTA_PRIORITY */ 2369 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2370 + nla_total_size(sizeof(struct rta_cacheinfo)); 2371 } 2372 2373 static int rt6_fill_node(struct net *net, 2374 struct sk_buff *skb, struct rt6_info *rt, 2375 struct in6_addr *dst, struct in6_addr *src, 2376 int iif, int type, u32 portid, u32 seq, 2377 int prefix, int nowait, unsigned int flags) 2378 { 2379 struct rtmsg *rtm; 2380 struct nlmsghdr *nlh; 2381 long expires; 2382 u32 table; 2383 struct neighbour *n; 2384 2385 if (prefix) { /* user wants prefix routes only */ 2386 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2387 /* success since this is not a prefix route */ 2388 return 1; 2389 } 2390 } 2391 2392 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 2393 if (!nlh) 2394 return -EMSGSIZE; 2395 2396 rtm = nlmsg_data(nlh); 2397 rtm->rtm_family = AF_INET6; 2398 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2399 rtm->rtm_src_len = rt->rt6i_src.plen; 2400 rtm->rtm_tos = 0; 2401 if (rt->rt6i_table) 2402 table = rt->rt6i_table->tb6_id; 2403 else 2404 table = RT6_TABLE_UNSPEC; 2405 rtm->rtm_table = table; 2406 if (nla_put_u32(skb, RTA_TABLE, table)) 2407 goto nla_put_failure; 2408 if (rt->rt6i_flags & RTF_REJECT) { 2409 switch (rt->dst.error) { 2410 case -EINVAL: 2411 rtm->rtm_type = RTN_BLACKHOLE; 2412 break; 2413 case -EACCES: 2414 rtm->rtm_type = RTN_PROHIBIT; 2415 break; 2416 case -EAGAIN: 2417 rtm->rtm_type = RTN_THROW; 2418 break; 2419 default: 2420 rtm->rtm_type = RTN_UNREACHABLE; 2421 break; 2422 } 2423 } 2424 else if (rt->rt6i_flags & RTF_LOCAL) 2425 rtm->rtm_type = RTN_LOCAL; 2426 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2427 rtm->rtm_type = RTN_LOCAL; 2428 else 2429 rtm->rtm_type = RTN_UNICAST; 2430 rtm->rtm_flags = 0; 2431 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2432 rtm->rtm_protocol = rt->rt6i_protocol; 2433 if (rt->rt6i_flags & RTF_DYNAMIC) 2434 rtm->rtm_protocol = RTPROT_REDIRECT; 2435 else if (rt->rt6i_flags & RTF_ADDRCONF) { 2436 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) 2437 rtm->rtm_protocol = RTPROT_RA; 2438 else 2439 rtm->rtm_protocol = RTPROT_KERNEL; 2440 } 2441 2442 if (rt->rt6i_flags & RTF_CACHE) 2443 rtm->rtm_flags |= RTM_F_CLONED; 2444 2445 if (dst) { 2446 if (nla_put(skb, RTA_DST, 16, dst)) 2447 goto nla_put_failure; 2448 rtm->rtm_dst_len = 128; 2449 } else if (rtm->rtm_dst_len) 2450 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) 2451 goto nla_put_failure; 2452 #ifdef CONFIG_IPV6_SUBTREES 2453 if (src) { 2454 if (nla_put(skb, RTA_SRC, 16, src)) 2455 goto nla_put_failure; 2456 rtm->rtm_src_len = 128; 2457 } else if (rtm->rtm_src_len && 2458 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) 2459 goto nla_put_failure; 2460 #endif 2461 if (iif) { 2462 #ifdef CONFIG_IPV6_MROUTE 2463 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2464 int err = ip6mr_get_route(net, skb, rtm, nowait); 2465 if (err <= 0) { 2466 if (!nowait) { 2467 if (err == 0) 2468 return 0; 2469 goto nla_put_failure; 2470 } else { 2471 if (err == -EMSGSIZE) 2472 goto nla_put_failure; 2473 } 2474 } 2475 } else 2476 #endif 2477 if (nla_put_u32(skb, RTA_IIF, iif)) 2478 goto nla_put_failure; 2479 } else if (dst) { 2480 struct in6_addr saddr_buf; 2481 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 2482 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2483 goto nla_put_failure; 2484 } 2485 2486 if (rt->rt6i_prefsrc.plen) { 2487 struct in6_addr saddr_buf; 2488 saddr_buf = rt->rt6i_prefsrc.addr; 2489 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2490 goto nla_put_failure; 2491 } 2492 2493 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2494 goto nla_put_failure; 2495 2496 n = rt->n; 2497 if (n) { 2498 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) 2499 goto nla_put_failure; 2500 } 2501 2502 if (rt->dst.dev && 2503 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2504 goto nla_put_failure; 2505 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2506 goto nla_put_failure; 2507 2508 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 2509 2510 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2511 goto nla_put_failure; 2512 2513 return nlmsg_end(skb, nlh); 2514 2515 nla_put_failure: 2516 nlmsg_cancel(skb, nlh); 2517 return -EMSGSIZE; 2518 } 2519 2520 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2521 { 2522 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2523 int prefix; 2524 2525 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2526 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2527 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2528 } else 2529 prefix = 0; 2530 2531 return rt6_fill_node(arg->net, 2532 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2533 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 2534 prefix, 0, NLM_F_MULTI); 2535 } 2536 2537 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2538 { 2539 struct net *net = sock_net(in_skb->sk); 2540 struct nlattr *tb[RTA_MAX+1]; 2541 struct rt6_info *rt; 2542 struct sk_buff *skb; 2543 struct rtmsg *rtm; 2544 struct flowi6 fl6; 2545 int err, iif = 0, oif = 0; 2546 2547 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2548 if (err < 0) 2549 goto errout; 2550 2551 err = -EINVAL; 2552 memset(&fl6, 0, sizeof(fl6)); 2553 2554 if (tb[RTA_SRC]) { 2555 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2556 goto errout; 2557 2558 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 2559 } 2560 2561 if (tb[RTA_DST]) { 2562 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2563 goto errout; 2564 2565 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 2566 } 2567 2568 if (tb[RTA_IIF]) 2569 iif = nla_get_u32(tb[RTA_IIF]); 2570 2571 if (tb[RTA_OIF]) 2572 oif = nla_get_u32(tb[RTA_OIF]); 2573 2574 if (iif) { 2575 struct net_device *dev; 2576 int flags = 0; 2577 2578 dev = __dev_get_by_index(net, iif); 2579 if (!dev) { 2580 err = -ENODEV; 2581 goto errout; 2582 } 2583 2584 fl6.flowi6_iif = iif; 2585 2586 if (!ipv6_addr_any(&fl6.saddr)) 2587 flags |= RT6_LOOKUP_F_HAS_SADDR; 2588 2589 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, 2590 flags); 2591 } else { 2592 fl6.flowi6_oif = oif; 2593 2594 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 2595 } 2596 2597 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2598 if (!skb) { 2599 dst_release(&rt->dst); 2600 err = -ENOBUFS; 2601 goto errout; 2602 } 2603 2604 /* Reserve room for dummy headers, this skb can pass 2605 through good chunk of routing engine. 2606 */ 2607 skb_reset_mac_header(skb); 2608 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2609 2610 skb_dst_set(skb, &rt->dst); 2611 2612 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2613 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 2614 nlh->nlmsg_seq, 0, 0, 0); 2615 if (err < 0) { 2616 kfree_skb(skb); 2617 goto errout; 2618 } 2619 2620 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2621 errout: 2622 return err; 2623 } 2624 2625 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2626 { 2627 struct sk_buff *skb; 2628 struct net *net = info->nl_net; 2629 u32 seq; 2630 int err; 2631 2632 err = -ENOBUFS; 2633 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2634 2635 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2636 if (!skb) 2637 goto errout; 2638 2639 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2640 event, info->portid, seq, 0, 0, 0); 2641 if (err < 0) { 2642 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2643 WARN_ON(err == -EMSGSIZE); 2644 kfree_skb(skb); 2645 goto errout; 2646 } 2647 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 2648 info->nlh, gfp_any()); 2649 return; 2650 errout: 2651 if (err < 0) 2652 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2653 } 2654 2655 static int ip6_route_dev_notify(struct notifier_block *this, 2656 unsigned long event, void *data) 2657 { 2658 struct net_device *dev = (struct net_device *)data; 2659 struct net *net = dev_net(dev); 2660 2661 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2662 net->ipv6.ip6_null_entry->dst.dev = dev; 2663 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2664 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2665 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2666 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2667 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2668 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2669 #endif 2670 } 2671 2672 return NOTIFY_OK; 2673 } 2674 2675 /* 2676 * /proc 2677 */ 2678 2679 #ifdef CONFIG_PROC_FS 2680 2681 struct rt6_proc_arg 2682 { 2683 char *buffer; 2684 int offset; 2685 int length; 2686 int skip; 2687 int len; 2688 }; 2689 2690 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2691 { 2692 struct seq_file *m = p_arg; 2693 struct neighbour *n; 2694 2695 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2696 2697 #ifdef CONFIG_IPV6_SUBTREES 2698 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2699 #else 2700 seq_puts(m, "00000000000000000000000000000000 00 "); 2701 #endif 2702 n = rt->n; 2703 if (n) { 2704 seq_printf(m, "%pi6", n->primary_key); 2705 } else { 2706 seq_puts(m, "00000000000000000000000000000000"); 2707 } 2708 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2709 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2710 rt->dst.__use, rt->rt6i_flags, 2711 rt->dst.dev ? rt->dst.dev->name : ""); 2712 return 0; 2713 } 2714 2715 static int ipv6_route_show(struct seq_file *m, void *v) 2716 { 2717 struct net *net = (struct net *)m->private; 2718 fib6_clean_all_ro(net, rt6_info_route, 0, m); 2719 return 0; 2720 } 2721 2722 static int ipv6_route_open(struct inode *inode, struct file *file) 2723 { 2724 return single_open_net(inode, file, ipv6_route_show); 2725 } 2726 2727 static const struct file_operations ipv6_route_proc_fops = { 2728 .owner = THIS_MODULE, 2729 .open = ipv6_route_open, 2730 .read = seq_read, 2731 .llseek = seq_lseek, 2732 .release = single_release_net, 2733 }; 2734 2735 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2736 { 2737 struct net *net = (struct net *)seq->private; 2738 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2739 net->ipv6.rt6_stats->fib_nodes, 2740 net->ipv6.rt6_stats->fib_route_nodes, 2741 net->ipv6.rt6_stats->fib_rt_alloc, 2742 net->ipv6.rt6_stats->fib_rt_entries, 2743 net->ipv6.rt6_stats->fib_rt_cache, 2744 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2745 net->ipv6.rt6_stats->fib_discarded_routes); 2746 2747 return 0; 2748 } 2749 2750 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2751 { 2752 return single_open_net(inode, file, rt6_stats_seq_show); 2753 } 2754 2755 static const struct file_operations rt6_stats_seq_fops = { 2756 .owner = THIS_MODULE, 2757 .open = rt6_stats_seq_open, 2758 .read = seq_read, 2759 .llseek = seq_lseek, 2760 .release = single_release_net, 2761 }; 2762 #endif /* CONFIG_PROC_FS */ 2763 2764 #ifdef CONFIG_SYSCTL 2765 2766 static 2767 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2768 void __user *buffer, size_t *lenp, loff_t *ppos) 2769 { 2770 struct net *net; 2771 int delay; 2772 if (!write) 2773 return -EINVAL; 2774 2775 net = (struct net *)ctl->extra1; 2776 delay = net->ipv6.sysctl.flush_delay; 2777 proc_dointvec(ctl, write, buffer, lenp, ppos); 2778 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2779 return 0; 2780 } 2781 2782 ctl_table ipv6_route_table_template[] = { 2783 { 2784 .procname = "flush", 2785 .data = &init_net.ipv6.sysctl.flush_delay, 2786 .maxlen = sizeof(int), 2787 .mode = 0200, 2788 .proc_handler = ipv6_sysctl_rtcache_flush 2789 }, 2790 { 2791 .procname = "gc_thresh", 2792 .data = &ip6_dst_ops_template.gc_thresh, 2793 .maxlen = sizeof(int), 2794 .mode = 0644, 2795 .proc_handler = proc_dointvec, 2796 }, 2797 { 2798 .procname = "max_size", 2799 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2800 .maxlen = sizeof(int), 2801 .mode = 0644, 2802 .proc_handler = proc_dointvec, 2803 }, 2804 { 2805 .procname = "gc_min_interval", 2806 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2807 .maxlen = sizeof(int), 2808 .mode = 0644, 2809 .proc_handler = proc_dointvec_jiffies, 2810 }, 2811 { 2812 .procname = "gc_timeout", 2813 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2814 .maxlen = sizeof(int), 2815 .mode = 0644, 2816 .proc_handler = proc_dointvec_jiffies, 2817 }, 2818 { 2819 .procname = "gc_interval", 2820 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2821 .maxlen = sizeof(int), 2822 .mode = 0644, 2823 .proc_handler = proc_dointvec_jiffies, 2824 }, 2825 { 2826 .procname = "gc_elasticity", 2827 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2828 .maxlen = sizeof(int), 2829 .mode = 0644, 2830 .proc_handler = proc_dointvec, 2831 }, 2832 { 2833 .procname = "mtu_expires", 2834 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2835 .maxlen = sizeof(int), 2836 .mode = 0644, 2837 .proc_handler = proc_dointvec_jiffies, 2838 }, 2839 { 2840 .procname = "min_adv_mss", 2841 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2842 .maxlen = sizeof(int), 2843 .mode = 0644, 2844 .proc_handler = proc_dointvec, 2845 }, 2846 { 2847 .procname = "gc_min_interval_ms", 2848 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2849 .maxlen = sizeof(int), 2850 .mode = 0644, 2851 .proc_handler = proc_dointvec_ms_jiffies, 2852 }, 2853 { } 2854 }; 2855 2856 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2857 { 2858 struct ctl_table *table; 2859 2860 table = kmemdup(ipv6_route_table_template, 2861 sizeof(ipv6_route_table_template), 2862 GFP_KERNEL); 2863 2864 if (table) { 2865 table[0].data = &net->ipv6.sysctl.flush_delay; 2866 table[0].extra1 = net; 2867 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2868 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2869 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2870 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2871 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2872 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2873 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2874 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2875 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2876 } 2877 2878 return table; 2879 } 2880 #endif 2881 2882 static int __net_init ip6_route_net_init(struct net *net) 2883 { 2884 int ret = -ENOMEM; 2885 2886 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2887 sizeof(net->ipv6.ip6_dst_ops)); 2888 2889 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2890 goto out_ip6_dst_ops; 2891 2892 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2893 sizeof(*net->ipv6.ip6_null_entry), 2894 GFP_KERNEL); 2895 if (!net->ipv6.ip6_null_entry) 2896 goto out_ip6_dst_entries; 2897 net->ipv6.ip6_null_entry->dst.path = 2898 (struct dst_entry *)net->ipv6.ip6_null_entry; 2899 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2900 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 2901 ip6_template_metrics, true); 2902 2903 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2904 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2905 sizeof(*net->ipv6.ip6_prohibit_entry), 2906 GFP_KERNEL); 2907 if (!net->ipv6.ip6_prohibit_entry) 2908 goto out_ip6_null_entry; 2909 net->ipv6.ip6_prohibit_entry->dst.path = 2910 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2911 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2912 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 2913 ip6_template_metrics, true); 2914 2915 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2916 sizeof(*net->ipv6.ip6_blk_hole_entry), 2917 GFP_KERNEL); 2918 if (!net->ipv6.ip6_blk_hole_entry) 2919 goto out_ip6_prohibit_entry; 2920 net->ipv6.ip6_blk_hole_entry->dst.path = 2921 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2922 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2923 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 2924 ip6_template_metrics, true); 2925 #endif 2926 2927 net->ipv6.sysctl.flush_delay = 0; 2928 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2929 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2930 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2931 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2932 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2933 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2934 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2935 2936 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2937 2938 ret = 0; 2939 out: 2940 return ret; 2941 2942 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2943 out_ip6_prohibit_entry: 2944 kfree(net->ipv6.ip6_prohibit_entry); 2945 out_ip6_null_entry: 2946 kfree(net->ipv6.ip6_null_entry); 2947 #endif 2948 out_ip6_dst_entries: 2949 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2950 out_ip6_dst_ops: 2951 goto out; 2952 } 2953 2954 static void __net_exit ip6_route_net_exit(struct net *net) 2955 { 2956 kfree(net->ipv6.ip6_null_entry); 2957 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2958 kfree(net->ipv6.ip6_prohibit_entry); 2959 kfree(net->ipv6.ip6_blk_hole_entry); 2960 #endif 2961 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2962 } 2963 2964 static int __net_init ip6_route_net_init_late(struct net *net) 2965 { 2966 #ifdef CONFIG_PROC_FS 2967 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2968 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2969 #endif 2970 return 0; 2971 } 2972 2973 static void __net_exit ip6_route_net_exit_late(struct net *net) 2974 { 2975 #ifdef CONFIG_PROC_FS 2976 proc_net_remove(net, "ipv6_route"); 2977 proc_net_remove(net, "rt6_stats"); 2978 #endif 2979 } 2980 2981 static struct pernet_operations ip6_route_net_ops = { 2982 .init = ip6_route_net_init, 2983 .exit = ip6_route_net_exit, 2984 }; 2985 2986 static int __net_init ipv6_inetpeer_init(struct net *net) 2987 { 2988 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 2989 2990 if (!bp) 2991 return -ENOMEM; 2992 inet_peer_base_init(bp); 2993 net->ipv6.peers = bp; 2994 return 0; 2995 } 2996 2997 static void __net_exit ipv6_inetpeer_exit(struct net *net) 2998 { 2999 struct inet_peer_base *bp = net->ipv6.peers; 3000 3001 net->ipv6.peers = NULL; 3002 inetpeer_invalidate_tree(bp); 3003 kfree(bp); 3004 } 3005 3006 static struct pernet_operations ipv6_inetpeer_ops = { 3007 .init = ipv6_inetpeer_init, 3008 .exit = ipv6_inetpeer_exit, 3009 }; 3010 3011 static struct pernet_operations ip6_route_net_late_ops = { 3012 .init = ip6_route_net_init_late, 3013 .exit = ip6_route_net_exit_late, 3014 }; 3015 3016 static struct notifier_block ip6_route_dev_notifier = { 3017 .notifier_call = ip6_route_dev_notify, 3018 .priority = 0, 3019 }; 3020 3021 int __init ip6_route_init(void) 3022 { 3023 int ret; 3024 3025 ret = -ENOMEM; 3026 ip6_dst_ops_template.kmem_cachep = 3027 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 3028 SLAB_HWCACHE_ALIGN, NULL); 3029 if (!ip6_dst_ops_template.kmem_cachep) 3030 goto out; 3031 3032 ret = dst_entries_init(&ip6_dst_blackhole_ops); 3033 if (ret) 3034 goto out_kmem_cache; 3035 3036 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 3037 if (ret) 3038 goto out_dst_entries; 3039 3040 ret = register_pernet_subsys(&ip6_route_net_ops); 3041 if (ret) 3042 goto out_register_inetpeer; 3043 3044 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3045 3046 /* Registering of the loopback is done before this portion of code, 3047 * the loopback reference in rt6_info will not be taken, do it 3048 * manually for init_net */ 3049 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 3050 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3052 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 3053 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3054 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 3055 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3056 #endif 3057 ret = fib6_init(); 3058 if (ret) 3059 goto out_register_subsys; 3060 3061 ret = xfrm6_init(); 3062 if (ret) 3063 goto out_fib6_init; 3064 3065 ret = fib6_rules_init(); 3066 if (ret) 3067 goto xfrm6_init; 3068 3069 ret = register_pernet_subsys(&ip6_route_net_late_ops); 3070 if (ret) 3071 goto fib6_rules_init; 3072 3073 ret = -ENOBUFS; 3074 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 3075 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 3076 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 3077 goto out_register_late_subsys; 3078 3079 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 3080 if (ret) 3081 goto out_register_late_subsys; 3082 3083 out: 3084 return ret; 3085 3086 out_register_late_subsys: 3087 unregister_pernet_subsys(&ip6_route_net_late_ops); 3088 fib6_rules_init: 3089 fib6_rules_cleanup(); 3090 xfrm6_init: 3091 xfrm6_fini(); 3092 out_fib6_init: 3093 fib6_gc_cleanup(); 3094 out_register_subsys: 3095 unregister_pernet_subsys(&ip6_route_net_ops); 3096 out_register_inetpeer: 3097 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3098 out_dst_entries: 3099 dst_entries_destroy(&ip6_dst_blackhole_ops); 3100 out_kmem_cache: 3101 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3102 goto out; 3103 } 3104 3105 void ip6_route_cleanup(void) 3106 { 3107 unregister_netdevice_notifier(&ip6_route_dev_notifier); 3108 unregister_pernet_subsys(&ip6_route_net_late_ops); 3109 fib6_rules_cleanup(); 3110 xfrm6_fini(); 3111 fib6_gc_cleanup(); 3112 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3113 unregister_pernet_subsys(&ip6_route_net_ops); 3114 dst_entries_destroy(&ip6_dst_blackhole_ops); 3115 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3116 } 3117