1 /* 2 * IPv6 tunneling device 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Ville Nuorvala <vnuorval@tcs.hut.fi> 7 * Yasuyuki Kozakai <kozakai@linux-ipv6.org> 8 * 9 * Based on: 10 * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c 11 * 12 * RFC 2473 13 * 14 * This program is free software; you can redistribute it and/or 15 * modify it under the terms of the GNU General Public License 16 * as published by the Free Software Foundation; either version 17 * 2 of the License, or (at your option) any later version. 18 * 19 */ 20 21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 22 23 #include <linux/module.h> 24 #include <linux/capability.h> 25 #include <linux/errno.h> 26 #include <linux/types.h> 27 #include <linux/sockios.h> 28 #include <linux/icmp.h> 29 #include <linux/if.h> 30 #include <linux/in.h> 31 #include <linux/ip.h> 32 #include <linux/if_tunnel.h> 33 #include <linux/net.h> 34 #include <linux/in6.h> 35 #include <linux/netdevice.h> 36 #include <linux/if_arp.h> 37 #include <linux/icmpv6.h> 38 #include <linux/init.h> 39 #include <linux/route.h> 40 #include <linux/rtnetlink.h> 41 #include <linux/netfilter_ipv6.h> 42 #include <linux/slab.h> 43 #include <linux/hash.h> 44 #include <linux/etherdevice.h> 45 46 #include <asm/uaccess.h> 47 #include <linux/atomic.h> 48 49 #include <net/icmp.h> 50 #include <net/ip.h> 51 #include <net/ip_tunnels.h> 52 #include <net/ipv6.h> 53 #include <net/ip6_route.h> 54 #include <net/addrconf.h> 55 #include <net/ip6_tunnel.h> 56 #include <net/xfrm.h> 57 #include <net/dsfield.h> 58 #include <net/inet_ecn.h> 59 #include <net/net_namespace.h> 60 #include <net/netns/generic.h> 61 62 MODULE_AUTHOR("Ville Nuorvala"); 63 MODULE_DESCRIPTION("IPv6 tunneling device"); 64 MODULE_LICENSE("GPL"); 65 MODULE_ALIAS_NETDEV("ip6tnl0"); 66 67 #ifdef IP6_TNL_DEBUG 68 #define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__) 69 #else 70 #define IP6_TNL_TRACE(x...) do {;} while(0) 71 #endif 72 73 #define IPV6_TCLASS_SHIFT 20 74 75 #define HASH_SIZE_SHIFT 5 76 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) 77 78 static bool log_ecn_error = true; 79 module_param(log_ecn_error, bool, 0644); 80 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 81 82 static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) 83 { 84 u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); 85 86 return hash_32(hash, HASH_SIZE_SHIFT); 87 } 88 89 static int ip6_tnl_dev_init(struct net_device *dev); 90 static void ip6_tnl_dev_setup(struct net_device *dev); 91 static struct rtnl_link_ops ip6_link_ops __read_mostly; 92 93 static int ip6_tnl_net_id __read_mostly; 94 struct ip6_tnl_net { 95 /* the IPv6 tunnel fallback device */ 96 struct net_device *fb_tnl_dev; 97 /* lists for storing tunnels in use */ 98 struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; 99 struct ip6_tnl __rcu *tnls_wc[1]; 100 struct ip6_tnl __rcu **tnls[2]; 101 }; 102 103 static struct net_device_stats *ip6_get_stats(struct net_device *dev) 104 { 105 struct pcpu_tstats sum = { 0 }; 106 int i; 107 108 for_each_possible_cpu(i) { 109 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); 110 111 sum.rx_packets += tstats->rx_packets; 112 sum.rx_bytes += tstats->rx_bytes; 113 sum.tx_packets += tstats->tx_packets; 114 sum.tx_bytes += tstats->tx_bytes; 115 } 116 dev->stats.rx_packets = sum.rx_packets; 117 dev->stats.rx_bytes = sum.rx_bytes; 118 dev->stats.tx_packets = sum.tx_packets; 119 dev->stats.tx_bytes = sum.tx_bytes; 120 return &dev->stats; 121 } 122 123 /* 124 * Locking : hash tables are protected by RCU and RTNL 125 */ 126 127 struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 128 { 129 struct dst_entry *dst = t->dst_cache; 130 131 if (dst && dst->obsolete && 132 dst->ops->check(dst, t->dst_cookie) == NULL) { 133 t->dst_cache = NULL; 134 dst_release(dst); 135 return NULL; 136 } 137 138 return dst; 139 } 140 EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); 141 142 void ip6_tnl_dst_reset(struct ip6_tnl *t) 143 { 144 dst_release(t->dst_cache); 145 t->dst_cache = NULL; 146 } 147 EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); 148 149 void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) 150 { 151 struct rt6_info *rt = (struct rt6_info *) dst; 152 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 153 dst_release(t->dst_cache); 154 t->dst_cache = dst; 155 } 156 EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); 157 158 /** 159 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 160 * @remote: the address of the tunnel exit-point 161 * @local: the address of the tunnel entry-point 162 * 163 * Return: 164 * tunnel matching given end-points if found, 165 * else fallback tunnel if its device is up, 166 * else %NULL 167 **/ 168 169 #define for_each_ip6_tunnel_rcu(start) \ 170 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 171 172 static struct ip6_tnl * 173 ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) 174 { 175 unsigned int hash = HASH(remote, local); 176 struct ip6_tnl *t; 177 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 178 179 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 180 if (ipv6_addr_equal(local, &t->parms.laddr) && 181 ipv6_addr_equal(remote, &t->parms.raddr) && 182 (t->dev->flags & IFF_UP)) 183 return t; 184 } 185 t = rcu_dereference(ip6n->tnls_wc[0]); 186 if (t && (t->dev->flags & IFF_UP)) 187 return t; 188 189 return NULL; 190 } 191 192 /** 193 * ip6_tnl_bucket - get head of list matching given tunnel parameters 194 * @p: parameters containing tunnel end-points 195 * 196 * Description: 197 * ip6_tnl_bucket() returns the head of the list matching the 198 * &struct in6_addr entries laddr and raddr in @p. 199 * 200 * Return: head of IPv6 tunnel list 201 **/ 202 203 static struct ip6_tnl __rcu ** 204 ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) 205 { 206 const struct in6_addr *remote = &p->raddr; 207 const struct in6_addr *local = &p->laddr; 208 unsigned int h = 0; 209 int prio = 0; 210 211 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { 212 prio = 1; 213 h = HASH(remote, local); 214 } 215 return &ip6n->tnls[prio][h]; 216 } 217 218 /** 219 * ip6_tnl_link - add tunnel to hash table 220 * @t: tunnel to be added 221 **/ 222 223 static void 224 ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 225 { 226 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); 227 228 rcu_assign_pointer(t->next , rtnl_dereference(*tp)); 229 rcu_assign_pointer(*tp, t); 230 } 231 232 /** 233 * ip6_tnl_unlink - remove tunnel from hash table 234 * @t: tunnel to be removed 235 **/ 236 237 static void 238 ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 239 { 240 struct ip6_tnl __rcu **tp; 241 struct ip6_tnl *iter; 242 243 for (tp = ip6_tnl_bucket(ip6n, &t->parms); 244 (iter = rtnl_dereference(*tp)) != NULL; 245 tp = &iter->next) { 246 if (t == iter) { 247 rcu_assign_pointer(*tp, t->next); 248 break; 249 } 250 } 251 } 252 253 static void ip6_dev_free(struct net_device *dev) 254 { 255 free_percpu(dev->tstats); 256 free_netdev(dev); 257 } 258 259 static int ip6_tnl_create2(struct net_device *dev) 260 { 261 struct ip6_tnl *t = netdev_priv(dev); 262 struct net *net = dev_net(dev); 263 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 264 int err; 265 266 t = netdev_priv(dev); 267 err = ip6_tnl_dev_init(dev); 268 if (err < 0) 269 goto out; 270 271 err = register_netdevice(dev); 272 if (err < 0) 273 goto out; 274 275 strcpy(t->parms.name, dev->name); 276 dev->rtnl_link_ops = &ip6_link_ops; 277 278 dev_hold(dev); 279 ip6_tnl_link(ip6n, t); 280 return 0; 281 282 out: 283 return err; 284 } 285 286 /** 287 * ip6_tnl_create - create a new tunnel 288 * @p: tunnel parameters 289 * @pt: pointer to new tunnel 290 * 291 * Description: 292 * Create tunnel matching given parameters. 293 * 294 * Return: 295 * created tunnel or NULL 296 **/ 297 298 static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) 299 { 300 struct net_device *dev; 301 struct ip6_tnl *t; 302 char name[IFNAMSIZ]; 303 int err; 304 305 if (p->name[0]) 306 strlcpy(name, p->name, IFNAMSIZ); 307 else 308 sprintf(name, "ip6tnl%%d"); 309 310 dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup); 311 if (dev == NULL) 312 goto failed; 313 314 dev_net_set(dev, net); 315 316 t = netdev_priv(dev); 317 t->parms = *p; 318 t->net = dev_net(dev); 319 err = ip6_tnl_create2(dev); 320 if (err < 0) 321 goto failed_free; 322 323 return t; 324 325 failed_free: 326 ip6_dev_free(dev); 327 failed: 328 return NULL; 329 } 330 331 /** 332 * ip6_tnl_locate - find or create tunnel matching given parameters 333 * @p: tunnel parameters 334 * @create: != 0 if allowed to create new tunnel if no match found 335 * 336 * Description: 337 * ip6_tnl_locate() first tries to locate an existing tunnel 338 * based on @parms. If this is unsuccessful, but @create is set a new 339 * tunnel device is created and registered for use. 340 * 341 * Return: 342 * matching tunnel or NULL 343 **/ 344 345 static struct ip6_tnl *ip6_tnl_locate(struct net *net, 346 struct __ip6_tnl_parm *p, int create) 347 { 348 const struct in6_addr *remote = &p->raddr; 349 const struct in6_addr *local = &p->laddr; 350 struct ip6_tnl __rcu **tp; 351 struct ip6_tnl *t; 352 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 353 354 for (tp = ip6_tnl_bucket(ip6n, p); 355 (t = rtnl_dereference(*tp)) != NULL; 356 tp = &t->next) { 357 if (ipv6_addr_equal(local, &t->parms.laddr) && 358 ipv6_addr_equal(remote, &t->parms.raddr)) 359 return t; 360 } 361 if (!create) 362 return NULL; 363 return ip6_tnl_create(net, p); 364 } 365 366 /** 367 * ip6_tnl_dev_uninit - tunnel device uninitializer 368 * @dev: the device to be destroyed 369 * 370 * Description: 371 * ip6_tnl_dev_uninit() removes tunnel from its list 372 **/ 373 374 static void 375 ip6_tnl_dev_uninit(struct net_device *dev) 376 { 377 struct ip6_tnl *t = netdev_priv(dev); 378 struct net *net = t->net; 379 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 380 381 if (dev == ip6n->fb_tnl_dev) 382 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); 383 else 384 ip6_tnl_unlink(ip6n, t); 385 ip6_tnl_dst_reset(t); 386 dev_put(dev); 387 } 388 389 /** 390 * parse_tvl_tnl_enc_lim - handle encapsulation limit option 391 * @skb: received socket buffer 392 * 393 * Return: 394 * 0 if none was found, 395 * else index to encapsulation limit 396 **/ 397 398 __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) 399 { 400 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; 401 __u8 nexthdr = ipv6h->nexthdr; 402 __u16 off = sizeof (*ipv6h); 403 404 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { 405 __u16 optlen = 0; 406 struct ipv6_opt_hdr *hdr; 407 if (raw + off + sizeof (*hdr) > skb->data && 408 !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) 409 break; 410 411 hdr = (struct ipv6_opt_hdr *) (raw + off); 412 if (nexthdr == NEXTHDR_FRAGMENT) { 413 struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; 414 if (frag_hdr->frag_off) 415 break; 416 optlen = 8; 417 } else if (nexthdr == NEXTHDR_AUTH) { 418 optlen = (hdr->hdrlen + 2) << 2; 419 } else { 420 optlen = ipv6_optlen(hdr); 421 } 422 if (nexthdr == NEXTHDR_DEST) { 423 __u16 i = off + 2; 424 while (1) { 425 struct ipv6_tlv_tnl_enc_lim *tel; 426 427 /* No more room for encapsulation limit */ 428 if (i + sizeof (*tel) > off + optlen) 429 break; 430 431 tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; 432 /* return index of option if found and valid */ 433 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && 434 tel->length == 1) 435 return i; 436 /* else jump to next option */ 437 if (tel->type) 438 i += tel->length + 2; 439 else 440 i++; 441 } 442 } 443 nexthdr = hdr->nexthdr; 444 off += optlen; 445 } 446 return 0; 447 } 448 EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); 449 450 /** 451 * ip6_tnl_err - tunnel error handler 452 * 453 * Description: 454 * ip6_tnl_err() should handle errors in the tunnel according 455 * to the specifications in RFC 2473. 456 **/ 457 458 static int 459 ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 460 u8 *type, u8 *code, int *msg, __u32 *info, int offset) 461 { 462 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data; 463 struct ip6_tnl *t; 464 int rel_msg = 0; 465 u8 rel_type = ICMPV6_DEST_UNREACH; 466 u8 rel_code = ICMPV6_ADDR_UNREACH; 467 __u32 rel_info = 0; 468 __u16 len; 469 int err = -ENOENT; 470 471 /* If the packet doesn't contain the original IPv6 header we are 472 in trouble since we might need the source address for further 473 processing of the error. */ 474 475 rcu_read_lock(); 476 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, 477 &ipv6h->saddr)) == NULL) 478 goto out; 479 480 if (t->parms.proto != ipproto && t->parms.proto != 0) 481 goto out; 482 483 err = 0; 484 485 switch (*type) { 486 __u32 teli; 487 struct ipv6_tlv_tnl_enc_lim *tel; 488 __u32 mtu; 489 case ICMPV6_DEST_UNREACH: 490 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", 491 t->parms.name); 492 rel_msg = 1; 493 break; 494 case ICMPV6_TIME_EXCEED: 495 if ((*code) == ICMPV6_EXC_HOPLIMIT) { 496 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", 497 t->parms.name); 498 rel_msg = 1; 499 } 500 break; 501 case ICMPV6_PARAMPROB: 502 teli = 0; 503 if ((*code) == ICMPV6_HDR_FIELD) 504 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); 505 506 if (teli && teli == *info - 2) { 507 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 508 if (tel->encap_limit == 0) { 509 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", 510 t->parms.name); 511 rel_msg = 1; 512 } 513 } else { 514 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", 515 t->parms.name); 516 } 517 break; 518 case ICMPV6_PKT_TOOBIG: 519 mtu = *info - offset; 520 if (mtu < IPV6_MIN_MTU) 521 mtu = IPV6_MIN_MTU; 522 t->dev->mtu = mtu; 523 524 if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { 525 rel_type = ICMPV6_PKT_TOOBIG; 526 rel_code = 0; 527 rel_info = mtu; 528 rel_msg = 1; 529 } 530 break; 531 } 532 533 *type = rel_type; 534 *code = rel_code; 535 *info = rel_info; 536 *msg = rel_msg; 537 538 out: 539 rcu_read_unlock(); 540 return err; 541 } 542 543 static int 544 ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 545 u8 type, u8 code, int offset, __be32 info) 546 { 547 int rel_msg = 0; 548 u8 rel_type = type; 549 u8 rel_code = code; 550 __u32 rel_info = ntohl(info); 551 int err; 552 struct sk_buff *skb2; 553 const struct iphdr *eiph; 554 struct rtable *rt; 555 struct flowi4 fl4; 556 557 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, 558 &rel_msg, &rel_info, offset); 559 if (err < 0) 560 return err; 561 562 if (rel_msg == 0) 563 return 0; 564 565 switch (rel_type) { 566 case ICMPV6_DEST_UNREACH: 567 if (rel_code != ICMPV6_ADDR_UNREACH) 568 return 0; 569 rel_type = ICMP_DEST_UNREACH; 570 rel_code = ICMP_HOST_UNREACH; 571 break; 572 case ICMPV6_PKT_TOOBIG: 573 if (rel_code != 0) 574 return 0; 575 rel_type = ICMP_DEST_UNREACH; 576 rel_code = ICMP_FRAG_NEEDED; 577 break; 578 case NDISC_REDIRECT: 579 rel_type = ICMP_REDIRECT; 580 rel_code = ICMP_REDIR_HOST; 581 default: 582 return 0; 583 } 584 585 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) 586 return 0; 587 588 skb2 = skb_clone(skb, GFP_ATOMIC); 589 if (!skb2) 590 return 0; 591 592 skb_dst_drop(skb2); 593 594 skb_pull(skb2, offset); 595 skb_reset_network_header(skb2); 596 eiph = ip_hdr(skb2); 597 598 /* Try to guess incoming interface */ 599 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 600 eiph->saddr, 0, 601 0, 0, 602 IPPROTO_IPIP, RT_TOS(eiph->tos), 0); 603 if (IS_ERR(rt)) 604 goto out; 605 606 skb2->dev = rt->dst.dev; 607 608 /* route "incoming" packet */ 609 if (rt->rt_flags & RTCF_LOCAL) { 610 ip_rt_put(rt); 611 rt = NULL; 612 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 613 eiph->daddr, eiph->saddr, 614 0, 0, 615 IPPROTO_IPIP, 616 RT_TOS(eiph->tos), 0); 617 if (IS_ERR(rt) || 618 rt->dst.dev->type != ARPHRD_TUNNEL) { 619 if (!IS_ERR(rt)) 620 ip_rt_put(rt); 621 goto out; 622 } 623 skb_dst_set(skb2, &rt->dst); 624 } else { 625 ip_rt_put(rt); 626 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, 627 skb2->dev) || 628 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) 629 goto out; 630 } 631 632 /* change mtu on this route */ 633 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { 634 if (rel_info > dst_mtu(skb_dst(skb2))) 635 goto out; 636 637 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); 638 } 639 if (rel_type == ICMP_REDIRECT) 640 skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); 641 642 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 643 644 out: 645 kfree_skb(skb2); 646 return 0; 647 } 648 649 static int 650 ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 651 u8 type, u8 code, int offset, __be32 info) 652 { 653 int rel_msg = 0; 654 u8 rel_type = type; 655 u8 rel_code = code; 656 __u32 rel_info = ntohl(info); 657 int err; 658 659 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, 660 &rel_msg, &rel_info, offset); 661 if (err < 0) 662 return err; 663 664 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) { 665 struct rt6_info *rt; 666 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 667 668 if (!skb2) 669 return 0; 670 671 skb_dst_drop(skb2); 672 skb_pull(skb2, offset); 673 skb_reset_network_header(skb2); 674 675 /* Try to guess incoming interface */ 676 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, 677 NULL, 0, 0); 678 679 if (rt && rt->dst.dev) 680 skb2->dev = rt->dst.dev; 681 682 icmpv6_send(skb2, rel_type, rel_code, rel_info); 683 684 ip6_rt_put(rt); 685 686 kfree_skb(skb2); 687 } 688 689 return 0; 690 } 691 692 static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 693 const struct ipv6hdr *ipv6h, 694 struct sk_buff *skb) 695 { 696 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; 697 698 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 699 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); 700 701 return IP6_ECN_decapsulate(ipv6h, skb); 702 } 703 704 static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 705 const struct ipv6hdr *ipv6h, 706 struct sk_buff *skb) 707 { 708 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 709 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); 710 711 return IP6_ECN_decapsulate(ipv6h, skb); 712 } 713 714 __u32 ip6_tnl_get_cap(struct ip6_tnl *t, 715 const struct in6_addr *laddr, 716 const struct in6_addr *raddr) 717 { 718 struct __ip6_tnl_parm *p = &t->parms; 719 int ltype = ipv6_addr_type(laddr); 720 int rtype = ipv6_addr_type(raddr); 721 __u32 flags = 0; 722 723 if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { 724 flags = IP6_TNL_F_CAP_PER_PACKET; 725 } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && 726 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && 727 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && 728 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { 729 if (ltype&IPV6_ADDR_UNICAST) 730 flags |= IP6_TNL_F_CAP_XMIT; 731 if (rtype&IPV6_ADDR_UNICAST) 732 flags |= IP6_TNL_F_CAP_RCV; 733 } 734 return flags; 735 } 736 EXPORT_SYMBOL(ip6_tnl_get_cap); 737 738 /* called with rcu_read_lock() */ 739 int ip6_tnl_rcv_ctl(struct ip6_tnl *t, 740 const struct in6_addr *laddr, 741 const struct in6_addr *raddr) 742 { 743 struct __ip6_tnl_parm *p = &t->parms; 744 int ret = 0; 745 struct net *net = t->net; 746 747 if ((p->flags & IP6_TNL_F_CAP_RCV) || 748 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && 749 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { 750 struct net_device *ldev = NULL; 751 752 if (p->link) 753 ldev = dev_get_by_index_rcu(net, p->link); 754 755 if ((ipv6_addr_is_multicast(laddr) || 756 likely(ipv6_chk_addr(net, laddr, ldev, 0))) && 757 likely(!ipv6_chk_addr(net, raddr, NULL, 0))) 758 ret = 1; 759 } 760 return ret; 761 } 762 EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); 763 764 /** 765 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally 766 * @skb: received socket buffer 767 * @protocol: ethernet protocol ID 768 * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN 769 * 770 * Return: 0 771 **/ 772 773 static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, 774 __u8 ipproto, 775 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, 776 const struct ipv6hdr *ipv6h, 777 struct sk_buff *skb)) 778 { 779 struct ip6_tnl *t; 780 const struct ipv6hdr *ipv6h = ipv6_hdr(skb); 781 int err; 782 783 rcu_read_lock(); 784 785 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, 786 &ipv6h->daddr)) != NULL) { 787 struct pcpu_tstats *tstats; 788 789 if (t->parms.proto != ipproto && t->parms.proto != 0) { 790 rcu_read_unlock(); 791 goto discard; 792 } 793 794 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 795 rcu_read_unlock(); 796 goto discard; 797 } 798 799 if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { 800 t->dev->stats.rx_dropped++; 801 rcu_read_unlock(); 802 goto discard; 803 } 804 skb->mac_header = skb->network_header; 805 skb_reset_network_header(skb); 806 skb->protocol = htons(protocol); 807 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 808 809 __skb_tunnel_rx(skb, t->dev, t->net); 810 811 err = dscp_ecn_decapsulate(t, ipv6h, skb); 812 if (unlikely(err)) { 813 if (log_ecn_error) 814 net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", 815 &ipv6h->saddr, 816 ipv6_get_dsfield(ipv6h)); 817 if (err > 1) { 818 ++t->dev->stats.rx_frame_errors; 819 ++t->dev->stats.rx_errors; 820 rcu_read_unlock(); 821 goto discard; 822 } 823 } 824 825 tstats = this_cpu_ptr(t->dev->tstats); 826 tstats->rx_packets++; 827 tstats->rx_bytes += skb->len; 828 829 netif_rx(skb); 830 831 rcu_read_unlock(); 832 return 0; 833 } 834 rcu_read_unlock(); 835 return 1; 836 837 discard: 838 kfree_skb(skb); 839 return 0; 840 } 841 842 static int ip4ip6_rcv(struct sk_buff *skb) 843 { 844 return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP, 845 ip4ip6_dscp_ecn_decapsulate); 846 } 847 848 static int ip6ip6_rcv(struct sk_buff *skb) 849 { 850 return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6, 851 ip6ip6_dscp_ecn_decapsulate); 852 } 853 854 struct ipv6_tel_txoption { 855 struct ipv6_txoptions ops; 856 __u8 dst_opt[8]; 857 }; 858 859 static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) 860 { 861 memset(opt, 0, sizeof(struct ipv6_tel_txoption)); 862 863 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; 864 opt->dst_opt[3] = 1; 865 opt->dst_opt[4] = encap_limit; 866 opt->dst_opt[5] = IPV6_TLV_PADN; 867 opt->dst_opt[6] = 1; 868 869 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; 870 opt->ops.opt_nflen = 8; 871 } 872 873 /** 874 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own 875 * @t: the outgoing tunnel device 876 * @hdr: IPv6 header from the incoming packet 877 * 878 * Description: 879 * Avoid trivial tunneling loop by checking that tunnel exit-point 880 * doesn't match source of incoming packet. 881 * 882 * Return: 883 * 1 if conflict, 884 * 0 else 885 **/ 886 887 static inline bool 888 ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) 889 { 890 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 891 } 892 893 int ip6_tnl_xmit_ctl(struct ip6_tnl *t) 894 { 895 struct __ip6_tnl_parm *p = &t->parms; 896 int ret = 0; 897 struct net *net = t->net; 898 899 if (p->flags & IP6_TNL_F_CAP_XMIT) { 900 struct net_device *ldev = NULL; 901 902 rcu_read_lock(); 903 if (p->link) 904 ldev = dev_get_by_index_rcu(net, p->link); 905 906 if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) 907 pr_warn("%s xmit: Local address not yet configured!\n", 908 p->name); 909 else if (!ipv6_addr_is_multicast(&p->raddr) && 910 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) 911 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", 912 p->name); 913 else 914 ret = 1; 915 rcu_read_unlock(); 916 } 917 return ret; 918 } 919 EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); 920 921 /** 922 * ip6_tnl_xmit2 - encapsulate packet and send 923 * @skb: the outgoing socket buffer 924 * @dev: the outgoing tunnel device 925 * @dsfield: dscp code for outer header 926 * @fl: flow of tunneled packet 927 * @encap_limit: encapsulation limit 928 * @pmtu: Path MTU is stored if packet is too big 929 * 930 * Description: 931 * Build new header and do some sanity checks on the packet before sending 932 * it. 933 * 934 * Return: 935 * 0 on success 936 * -1 fail 937 * %-EMSGSIZE message too big. return mtu in this case. 938 **/ 939 940 static int ip6_tnl_xmit2(struct sk_buff *skb, 941 struct net_device *dev, 942 __u8 dsfield, 943 struct flowi6 *fl6, 944 int encap_limit, 945 __u32 *pmtu) 946 { 947 struct ip6_tnl *t = netdev_priv(dev); 948 struct net *net = t->net; 949 struct net_device_stats *stats = &t->dev->stats; 950 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 951 struct ipv6_tel_txoption opt; 952 struct dst_entry *dst = NULL, *ndst = NULL; 953 struct net_device *tdev; 954 int mtu; 955 unsigned int max_headroom = sizeof(struct ipv6hdr); 956 u8 proto; 957 int err = -1; 958 959 if (!fl6->flowi6_mark) 960 dst = ip6_tnl_dst_check(t); 961 if (!dst) { 962 ndst = ip6_route_output(net, NULL, fl6); 963 964 if (ndst->error) 965 goto tx_err_link_failure; 966 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); 967 if (IS_ERR(ndst)) { 968 err = PTR_ERR(ndst); 969 ndst = NULL; 970 goto tx_err_link_failure; 971 } 972 dst = ndst; 973 } 974 975 tdev = dst->dev; 976 977 if (tdev == dev) { 978 stats->collisions++; 979 net_warn_ratelimited("%s: Local routing loop detected!\n", 980 t->parms.name); 981 goto tx_err_dst_release; 982 } 983 mtu = dst_mtu(dst) - sizeof (*ipv6h); 984 if (encap_limit >= 0) { 985 max_headroom += 8; 986 mtu -= 8; 987 } 988 if (mtu < IPV6_MIN_MTU) 989 mtu = IPV6_MIN_MTU; 990 if (skb_dst(skb)) 991 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 992 if (skb->len > mtu) { 993 *pmtu = mtu; 994 err = -EMSGSIZE; 995 goto tx_err_dst_release; 996 } 997 998 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); 999 1000 /* 1001 * Okay, now see if we can stuff it in the buffer as-is. 1002 */ 1003 max_headroom += LL_RESERVED_SPACE(tdev); 1004 1005 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 1006 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 1007 struct sk_buff *new_skb; 1008 1009 if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) 1010 goto tx_err_dst_release; 1011 1012 if (skb->sk) 1013 skb_set_owner_w(new_skb, skb->sk); 1014 consume_skb(skb); 1015 skb = new_skb; 1016 } 1017 if (fl6->flowi6_mark) { 1018 skb_dst_set(skb, dst); 1019 ndst = NULL; 1020 } else { 1021 skb_dst_set_noref(skb, dst); 1022 } 1023 skb->transport_header = skb->network_header; 1024 1025 proto = fl6->flowi6_proto; 1026 if (encap_limit >= 0) { 1027 init_tel_txopt(&opt, encap_limit); 1028 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 1029 } 1030 1031 if (likely(!skb->encapsulation)) { 1032 skb_reset_inner_headers(skb); 1033 skb->encapsulation = 1; 1034 } 1035 1036 skb_push(skb, sizeof(struct ipv6hdr)); 1037 skb_reset_network_header(skb); 1038 ipv6h = ipv6_hdr(skb); 1039 ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); 1040 ipv6h->hop_limit = t->parms.hop_limit; 1041 ipv6h->nexthdr = proto; 1042 ipv6h->saddr = fl6->saddr; 1043 ipv6h->daddr = fl6->daddr; 1044 ip6tunnel_xmit(skb, dev); 1045 if (ndst) 1046 ip6_tnl_dst_store(t, ndst); 1047 return 0; 1048 tx_err_link_failure: 1049 stats->tx_carrier_errors++; 1050 dst_link_failure(skb); 1051 tx_err_dst_release: 1052 dst_release(ndst); 1053 return err; 1054 } 1055 1056 static inline int 1057 ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1058 { 1059 struct ip6_tnl *t = netdev_priv(dev); 1060 const struct iphdr *iph = ip_hdr(skb); 1061 int encap_limit = -1; 1062 struct flowi6 fl6; 1063 __u8 dsfield; 1064 __u32 mtu; 1065 int err; 1066 1067 if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || 1068 !ip6_tnl_xmit_ctl(t)) 1069 return -1; 1070 1071 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1072 encap_limit = t->parms.encap_limit; 1073 1074 memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); 1075 fl6.flowi6_proto = IPPROTO_IPIP; 1076 1077 dsfield = ipv4_get_dsfield(iph); 1078 1079 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1080 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) 1081 & IPV6_TCLASS_MASK; 1082 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 1083 fl6.flowi6_mark = skb->mark; 1084 1085 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1086 if (err != 0) { 1087 /* XXX: send ICMP error even if DF is not set. */ 1088 if (err == -EMSGSIZE) 1089 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 1090 htonl(mtu)); 1091 return -1; 1092 } 1093 1094 return 0; 1095 } 1096 1097 static inline int 1098 ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1099 { 1100 struct ip6_tnl *t = netdev_priv(dev); 1101 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1102 int encap_limit = -1; 1103 __u16 offset; 1104 struct flowi6 fl6; 1105 __u8 dsfield; 1106 __u32 mtu; 1107 int err; 1108 1109 if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || 1110 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) 1111 return -1; 1112 1113 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); 1114 if (offset > 0) { 1115 struct ipv6_tlv_tnl_enc_lim *tel; 1116 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; 1117 if (tel->encap_limit == 0) { 1118 icmpv6_send(skb, ICMPV6_PARAMPROB, 1119 ICMPV6_HDR_FIELD, offset + 2); 1120 return -1; 1121 } 1122 encap_limit = tel->encap_limit - 1; 1123 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1124 encap_limit = t->parms.encap_limit; 1125 1126 memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); 1127 fl6.flowi6_proto = IPPROTO_IPV6; 1128 1129 dsfield = ipv6_get_dsfield(ipv6h); 1130 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1131 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); 1132 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) 1133 fl6.flowlabel |= ip6_flowlabel(ipv6h); 1134 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 1135 fl6.flowi6_mark = skb->mark; 1136 1137 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1138 if (err != 0) { 1139 if (err == -EMSGSIZE) 1140 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1141 return -1; 1142 } 1143 1144 return 0; 1145 } 1146 1147 static netdev_tx_t 1148 ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1149 { 1150 struct ip6_tnl *t = netdev_priv(dev); 1151 struct net_device_stats *stats = &t->dev->stats; 1152 int ret; 1153 1154 switch (skb->protocol) { 1155 case htons(ETH_P_IP): 1156 ret = ip4ip6_tnl_xmit(skb, dev); 1157 break; 1158 case htons(ETH_P_IPV6): 1159 ret = ip6ip6_tnl_xmit(skb, dev); 1160 break; 1161 default: 1162 goto tx_err; 1163 } 1164 1165 if (ret < 0) 1166 goto tx_err; 1167 1168 return NETDEV_TX_OK; 1169 1170 tx_err: 1171 stats->tx_errors++; 1172 stats->tx_dropped++; 1173 kfree_skb(skb); 1174 return NETDEV_TX_OK; 1175 } 1176 1177 static void ip6_tnl_link_config(struct ip6_tnl *t) 1178 { 1179 struct net_device *dev = t->dev; 1180 struct __ip6_tnl_parm *p = &t->parms; 1181 struct flowi6 *fl6 = &t->fl.u.ip6; 1182 1183 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1184 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); 1185 1186 /* Set up flowi template */ 1187 fl6->saddr = p->laddr; 1188 fl6->daddr = p->raddr; 1189 fl6->flowi6_oif = p->link; 1190 fl6->flowlabel = 0; 1191 1192 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) 1193 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; 1194 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1195 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; 1196 1197 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); 1198 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); 1199 1200 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) 1201 dev->flags |= IFF_POINTOPOINT; 1202 else 1203 dev->flags &= ~IFF_POINTOPOINT; 1204 1205 dev->iflink = p->link; 1206 1207 if (p->flags & IP6_TNL_F_CAP_XMIT) { 1208 int strict = (ipv6_addr_type(&p->raddr) & 1209 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); 1210 1211 struct rt6_info *rt = rt6_lookup(t->net, 1212 &p->raddr, &p->laddr, 1213 p->link, strict); 1214 1215 if (rt == NULL) 1216 return; 1217 1218 if (rt->dst.dev) { 1219 dev->hard_header_len = rt->dst.dev->hard_header_len + 1220 sizeof (struct ipv6hdr); 1221 1222 dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr); 1223 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1224 dev->mtu-=8; 1225 1226 if (dev->mtu < IPV6_MIN_MTU) 1227 dev->mtu = IPV6_MIN_MTU; 1228 } 1229 ip6_rt_put(rt); 1230 } 1231 } 1232 1233 /** 1234 * ip6_tnl_change - update the tunnel parameters 1235 * @t: tunnel to be changed 1236 * @p: tunnel configuration parameters 1237 * 1238 * Description: 1239 * ip6_tnl_change() updates the tunnel parameters 1240 **/ 1241 1242 static int 1243 ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) 1244 { 1245 t->parms.laddr = p->laddr; 1246 t->parms.raddr = p->raddr; 1247 t->parms.flags = p->flags; 1248 t->parms.hop_limit = p->hop_limit; 1249 t->parms.encap_limit = p->encap_limit; 1250 t->parms.flowinfo = p->flowinfo; 1251 t->parms.link = p->link; 1252 t->parms.proto = p->proto; 1253 ip6_tnl_dst_reset(t); 1254 ip6_tnl_link_config(t); 1255 return 0; 1256 } 1257 1258 static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) 1259 { 1260 struct net *net = t->net; 1261 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1262 int err; 1263 1264 ip6_tnl_unlink(ip6n, t); 1265 synchronize_net(); 1266 err = ip6_tnl_change(t, p); 1267 ip6_tnl_link(ip6n, t); 1268 netdev_state_change(t->dev); 1269 return err; 1270 } 1271 1272 static void 1273 ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) 1274 { 1275 p->laddr = u->laddr; 1276 p->raddr = u->raddr; 1277 p->flags = u->flags; 1278 p->hop_limit = u->hop_limit; 1279 p->encap_limit = u->encap_limit; 1280 p->flowinfo = u->flowinfo; 1281 p->link = u->link; 1282 p->proto = u->proto; 1283 memcpy(p->name, u->name, sizeof(u->name)); 1284 } 1285 1286 static void 1287 ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) 1288 { 1289 u->laddr = p->laddr; 1290 u->raddr = p->raddr; 1291 u->flags = p->flags; 1292 u->hop_limit = p->hop_limit; 1293 u->encap_limit = p->encap_limit; 1294 u->flowinfo = p->flowinfo; 1295 u->link = p->link; 1296 u->proto = p->proto; 1297 memcpy(u->name, p->name, sizeof(u->name)); 1298 } 1299 1300 /** 1301 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace 1302 * @dev: virtual device associated with tunnel 1303 * @ifr: parameters passed from userspace 1304 * @cmd: command to be performed 1305 * 1306 * Description: 1307 * ip6_tnl_ioctl() is used for managing IPv6 tunnels 1308 * from userspace. 1309 * 1310 * The possible commands are the following: 1311 * %SIOCGETTUNNEL: get tunnel parameters for device 1312 * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters 1313 * %SIOCCHGTUNNEL: change tunnel parameters to those given 1314 * %SIOCDELTUNNEL: delete tunnel 1315 * 1316 * The fallback device "ip6tnl0", created during module 1317 * initialization, can be used for creating other tunnel devices. 1318 * 1319 * Return: 1320 * 0 on success, 1321 * %-EFAULT if unable to copy data to or from userspace, 1322 * %-EPERM if current process hasn't %CAP_NET_ADMIN set 1323 * %-EINVAL if passed tunnel parameters are invalid, 1324 * %-EEXIST if changing a tunnel's parameters would cause a conflict 1325 * %-ENODEV if attempting to change or delete a nonexisting device 1326 **/ 1327 1328 static int 1329 ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 1330 { 1331 int err = 0; 1332 struct ip6_tnl_parm p; 1333 struct __ip6_tnl_parm p1; 1334 struct ip6_tnl *t = NULL; 1335 struct net *net = dev_net(dev); 1336 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1337 1338 switch (cmd) { 1339 case SIOCGETTUNNEL: 1340 if (dev == ip6n->fb_tnl_dev) { 1341 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { 1342 err = -EFAULT; 1343 break; 1344 } 1345 ip6_tnl_parm_from_user(&p1, &p); 1346 t = ip6_tnl_locate(net, &p1, 0); 1347 } else { 1348 memset(&p, 0, sizeof(p)); 1349 } 1350 if (t == NULL) 1351 t = netdev_priv(dev); 1352 ip6_tnl_parm_to_user(&p, &t->parms); 1353 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { 1354 err = -EFAULT; 1355 } 1356 break; 1357 case SIOCADDTUNNEL: 1358 case SIOCCHGTUNNEL: 1359 err = -EPERM; 1360 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1361 break; 1362 err = -EFAULT; 1363 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1364 break; 1365 err = -EINVAL; 1366 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && 1367 p.proto != 0) 1368 break; 1369 ip6_tnl_parm_from_user(&p1, &p); 1370 t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); 1371 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { 1372 if (t != NULL) { 1373 if (t->dev != dev) { 1374 err = -EEXIST; 1375 break; 1376 } 1377 } else 1378 t = netdev_priv(dev); 1379 1380 err = ip6_tnl_update(t, &p1); 1381 } 1382 if (t) { 1383 err = 0; 1384 ip6_tnl_parm_to_user(&p, &t->parms); 1385 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 1386 err = -EFAULT; 1387 1388 } else 1389 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 1390 break; 1391 case SIOCDELTUNNEL: 1392 err = -EPERM; 1393 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1394 break; 1395 1396 if (dev == ip6n->fb_tnl_dev) { 1397 err = -EFAULT; 1398 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1399 break; 1400 err = -ENOENT; 1401 ip6_tnl_parm_from_user(&p1, &p); 1402 t = ip6_tnl_locate(net, &p1, 0); 1403 if (t == NULL) 1404 break; 1405 err = -EPERM; 1406 if (t->dev == ip6n->fb_tnl_dev) 1407 break; 1408 dev = t->dev; 1409 } 1410 err = 0; 1411 unregister_netdevice(dev); 1412 break; 1413 default: 1414 err = -EINVAL; 1415 } 1416 return err; 1417 } 1418 1419 /** 1420 * ip6_tnl_change_mtu - change mtu manually for tunnel device 1421 * @dev: virtual device associated with tunnel 1422 * @new_mtu: the new mtu 1423 * 1424 * Return: 1425 * 0 on success, 1426 * %-EINVAL if mtu too small 1427 **/ 1428 1429 static int 1430 ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) 1431 { 1432 struct ip6_tnl *tnl = netdev_priv(dev); 1433 1434 if (tnl->parms.proto == IPPROTO_IPIP) { 1435 if (new_mtu < 68) 1436 return -EINVAL; 1437 } else { 1438 if (new_mtu < IPV6_MIN_MTU) 1439 return -EINVAL; 1440 } 1441 if (new_mtu > 0xFFF8 - dev->hard_header_len) 1442 return -EINVAL; 1443 dev->mtu = new_mtu; 1444 return 0; 1445 } 1446 1447 1448 static const struct net_device_ops ip6_tnl_netdev_ops = { 1449 .ndo_uninit = ip6_tnl_dev_uninit, 1450 .ndo_start_xmit = ip6_tnl_xmit, 1451 .ndo_do_ioctl = ip6_tnl_ioctl, 1452 .ndo_change_mtu = ip6_tnl_change_mtu, 1453 .ndo_get_stats = ip6_get_stats, 1454 }; 1455 1456 1457 /** 1458 * ip6_tnl_dev_setup - setup virtual tunnel device 1459 * @dev: virtual device associated with tunnel 1460 * 1461 * Description: 1462 * Initialize function pointers and device parameters 1463 **/ 1464 1465 static void ip6_tnl_dev_setup(struct net_device *dev) 1466 { 1467 struct ip6_tnl *t; 1468 1469 dev->netdev_ops = &ip6_tnl_netdev_ops; 1470 dev->destructor = ip6_dev_free; 1471 1472 dev->type = ARPHRD_TUNNEL6; 1473 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); 1474 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); 1475 t = netdev_priv(dev); 1476 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1477 dev->mtu-=8; 1478 dev->flags |= IFF_NOARP; 1479 dev->addr_len = sizeof(struct in6_addr); 1480 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1481 /* This perm addr will be used as interface identifier by IPv6 */ 1482 dev->addr_assign_type = NET_ADDR_RANDOM; 1483 eth_random_addr(dev->perm_addr); 1484 } 1485 1486 1487 /** 1488 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices 1489 * @dev: virtual device associated with tunnel 1490 **/ 1491 1492 static inline int 1493 ip6_tnl_dev_init_gen(struct net_device *dev) 1494 { 1495 struct ip6_tnl *t = netdev_priv(dev); 1496 int i; 1497 1498 t->dev = dev; 1499 t->net = dev_net(dev); 1500 dev->tstats = alloc_percpu(struct pcpu_tstats); 1501 if (!dev->tstats) 1502 return -ENOMEM; 1503 1504 for_each_possible_cpu(i) { 1505 struct pcpu_tstats *ip6_tnl_stats; 1506 ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); 1507 u64_stats_init(&ip6_tnl_stats->syncp); 1508 } 1509 return 0; 1510 } 1511 1512 /** 1513 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices 1514 * @dev: virtual device associated with tunnel 1515 **/ 1516 1517 static int ip6_tnl_dev_init(struct net_device *dev) 1518 { 1519 struct ip6_tnl *t = netdev_priv(dev); 1520 int err = ip6_tnl_dev_init_gen(dev); 1521 1522 if (err) 1523 return err; 1524 ip6_tnl_link_config(t); 1525 return 0; 1526 } 1527 1528 /** 1529 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device 1530 * @dev: fallback device 1531 * 1532 * Return: 0 1533 **/ 1534 1535 static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) 1536 { 1537 struct ip6_tnl *t = netdev_priv(dev); 1538 struct net *net = dev_net(dev); 1539 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1540 int err = ip6_tnl_dev_init_gen(dev); 1541 1542 if (err) 1543 return err; 1544 1545 t->parms.proto = IPPROTO_IPV6; 1546 dev_hold(dev); 1547 1548 ip6_tnl_link_config(t); 1549 1550 rcu_assign_pointer(ip6n->tnls_wc[0], t); 1551 return 0; 1552 } 1553 1554 static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) 1555 { 1556 u8 proto; 1557 1558 if (!data) 1559 return 0; 1560 1561 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); 1562 if (proto != IPPROTO_IPV6 && 1563 proto != IPPROTO_IPIP && 1564 proto != 0) 1565 return -EINVAL; 1566 1567 return 0; 1568 } 1569 1570 static void ip6_tnl_netlink_parms(struct nlattr *data[], 1571 struct __ip6_tnl_parm *parms) 1572 { 1573 memset(parms, 0, sizeof(*parms)); 1574 1575 if (!data) 1576 return; 1577 1578 if (data[IFLA_IPTUN_LINK]) 1579 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); 1580 1581 if (data[IFLA_IPTUN_LOCAL]) 1582 nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL], 1583 sizeof(struct in6_addr)); 1584 1585 if (data[IFLA_IPTUN_REMOTE]) 1586 nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE], 1587 sizeof(struct in6_addr)); 1588 1589 if (data[IFLA_IPTUN_TTL]) 1590 parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); 1591 1592 if (data[IFLA_IPTUN_ENCAP_LIMIT]) 1593 parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); 1594 1595 if (data[IFLA_IPTUN_FLOWINFO]) 1596 parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]); 1597 1598 if (data[IFLA_IPTUN_FLAGS]) 1599 parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); 1600 1601 if (data[IFLA_IPTUN_PROTO]) 1602 parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); 1603 } 1604 1605 static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, 1606 struct nlattr *tb[], struct nlattr *data[]) 1607 { 1608 struct net *net = dev_net(dev); 1609 struct ip6_tnl *nt; 1610 1611 nt = netdev_priv(dev); 1612 ip6_tnl_netlink_parms(data, &nt->parms); 1613 1614 if (ip6_tnl_locate(net, &nt->parms, 0)) 1615 return -EEXIST; 1616 1617 return ip6_tnl_create2(dev); 1618 } 1619 1620 static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], 1621 struct nlattr *data[]) 1622 { 1623 struct ip6_tnl *t = netdev_priv(dev); 1624 struct __ip6_tnl_parm p; 1625 struct net *net = t->net; 1626 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1627 1628 if (dev == ip6n->fb_tnl_dev) 1629 return -EINVAL; 1630 1631 ip6_tnl_netlink_parms(data, &p); 1632 1633 t = ip6_tnl_locate(net, &p, 0); 1634 1635 if (t) { 1636 if (t->dev != dev) 1637 return -EEXIST; 1638 } else 1639 t = netdev_priv(dev); 1640 1641 return ip6_tnl_update(t, &p); 1642 } 1643 1644 static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) 1645 { 1646 struct net *net = dev_net(dev); 1647 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1648 1649 if (dev != ip6n->fb_tnl_dev) 1650 unregister_netdevice_queue(dev, head); 1651 } 1652 1653 static size_t ip6_tnl_get_size(const struct net_device *dev) 1654 { 1655 return 1656 /* IFLA_IPTUN_LINK */ 1657 nla_total_size(4) + 1658 /* IFLA_IPTUN_LOCAL */ 1659 nla_total_size(sizeof(struct in6_addr)) + 1660 /* IFLA_IPTUN_REMOTE */ 1661 nla_total_size(sizeof(struct in6_addr)) + 1662 /* IFLA_IPTUN_TTL */ 1663 nla_total_size(1) + 1664 /* IFLA_IPTUN_ENCAP_LIMIT */ 1665 nla_total_size(1) + 1666 /* IFLA_IPTUN_FLOWINFO */ 1667 nla_total_size(4) + 1668 /* IFLA_IPTUN_FLAGS */ 1669 nla_total_size(4) + 1670 /* IFLA_IPTUN_PROTO */ 1671 nla_total_size(1) + 1672 0; 1673 } 1674 1675 static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) 1676 { 1677 struct ip6_tnl *tunnel = netdev_priv(dev); 1678 struct __ip6_tnl_parm *parm = &tunnel->parms; 1679 1680 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || 1681 nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), 1682 &parm->laddr) || 1683 nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), 1684 &parm->raddr) || 1685 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || 1686 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || 1687 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || 1688 nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || 1689 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) 1690 goto nla_put_failure; 1691 return 0; 1692 1693 nla_put_failure: 1694 return -EMSGSIZE; 1695 } 1696 1697 static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { 1698 [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, 1699 [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, 1700 [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) }, 1701 [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, 1702 [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 }, 1703 [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, 1704 [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, 1705 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, 1706 }; 1707 1708 static struct rtnl_link_ops ip6_link_ops __read_mostly = { 1709 .kind = "ip6tnl", 1710 .maxtype = IFLA_IPTUN_MAX, 1711 .policy = ip6_tnl_policy, 1712 .priv_size = sizeof(struct ip6_tnl), 1713 .setup = ip6_tnl_dev_setup, 1714 .validate = ip6_tnl_validate, 1715 .newlink = ip6_tnl_newlink, 1716 .changelink = ip6_tnl_changelink, 1717 .dellink = ip6_tnl_dellink, 1718 .get_size = ip6_tnl_get_size, 1719 .fill_info = ip6_tnl_fill_info, 1720 }; 1721 1722 static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { 1723 .handler = ip4ip6_rcv, 1724 .err_handler = ip4ip6_err, 1725 .priority = 1, 1726 }; 1727 1728 static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { 1729 .handler = ip6ip6_rcv, 1730 .err_handler = ip6ip6_err, 1731 .priority = 1, 1732 }; 1733 1734 static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) 1735 { 1736 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1737 struct net_device *dev, *aux; 1738 int h; 1739 struct ip6_tnl *t; 1740 LIST_HEAD(list); 1741 1742 for_each_netdev_safe(net, dev, aux) 1743 if (dev->rtnl_link_ops == &ip6_link_ops) 1744 unregister_netdevice_queue(dev, &list); 1745 1746 for (h = 0; h < HASH_SIZE; h++) { 1747 t = rtnl_dereference(ip6n->tnls_r_l[h]); 1748 while (t != NULL) { 1749 /* If dev is in the same netns, it has already 1750 * been added to the list by the previous loop. 1751 */ 1752 if (!net_eq(dev_net(t->dev), net)) 1753 unregister_netdevice_queue(t->dev, &list); 1754 t = rtnl_dereference(t->next); 1755 } 1756 } 1757 1758 unregister_netdevice_many(&list); 1759 } 1760 1761 static int __net_init ip6_tnl_init_net(struct net *net) 1762 { 1763 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1764 struct ip6_tnl *t = NULL; 1765 int err; 1766 1767 ip6n->tnls[0] = ip6n->tnls_wc; 1768 ip6n->tnls[1] = ip6n->tnls_r_l; 1769 1770 err = -ENOMEM; 1771 ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", 1772 ip6_tnl_dev_setup); 1773 1774 if (!ip6n->fb_tnl_dev) 1775 goto err_alloc_dev; 1776 dev_net_set(ip6n->fb_tnl_dev, net); 1777 ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; 1778 /* FB netdevice is special: we have one, and only one per netns. 1779 * Allowing to move it to another netns is clearly unsafe. 1780 */ 1781 ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL; 1782 1783 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 1784 if (err < 0) 1785 goto err_register; 1786 1787 err = register_netdev(ip6n->fb_tnl_dev); 1788 if (err < 0) 1789 goto err_register; 1790 1791 t = netdev_priv(ip6n->fb_tnl_dev); 1792 1793 strcpy(t->parms.name, ip6n->fb_tnl_dev->name); 1794 return 0; 1795 1796 err_register: 1797 ip6_dev_free(ip6n->fb_tnl_dev); 1798 err_alloc_dev: 1799 return err; 1800 } 1801 1802 static void __net_exit ip6_tnl_exit_net(struct net *net) 1803 { 1804 rtnl_lock(); 1805 ip6_tnl_destroy_tunnels(net); 1806 rtnl_unlock(); 1807 } 1808 1809 static struct pernet_operations ip6_tnl_net_ops = { 1810 .init = ip6_tnl_init_net, 1811 .exit = ip6_tnl_exit_net, 1812 .id = &ip6_tnl_net_id, 1813 .size = sizeof(struct ip6_tnl_net), 1814 }; 1815 1816 /** 1817 * ip6_tunnel_init - register protocol and reserve needed resources 1818 * 1819 * Return: 0 on success 1820 **/ 1821 1822 static int __init ip6_tunnel_init(void) 1823 { 1824 int err; 1825 1826 err = register_pernet_device(&ip6_tnl_net_ops); 1827 if (err < 0) 1828 goto out_pernet; 1829 1830 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); 1831 if (err < 0) { 1832 pr_err("%s: can't register ip4ip6\n", __func__); 1833 goto out_ip4ip6; 1834 } 1835 1836 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); 1837 if (err < 0) { 1838 pr_err("%s: can't register ip6ip6\n", __func__); 1839 goto out_ip6ip6; 1840 } 1841 err = rtnl_link_register(&ip6_link_ops); 1842 if (err < 0) 1843 goto rtnl_link_failed; 1844 1845 return 0; 1846 1847 rtnl_link_failed: 1848 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); 1849 out_ip6ip6: 1850 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); 1851 out_ip4ip6: 1852 unregister_pernet_device(&ip6_tnl_net_ops); 1853 out_pernet: 1854 return err; 1855 } 1856 1857 /** 1858 * ip6_tunnel_cleanup - free resources and unregister protocol 1859 **/ 1860 1861 static void __exit ip6_tunnel_cleanup(void) 1862 { 1863 rtnl_link_unregister(&ip6_link_ops); 1864 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) 1865 pr_info("%s: can't deregister ip4ip6\n", __func__); 1866 1867 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) 1868 pr_info("%s: can't deregister ip6ip6\n", __func__); 1869 1870 unregister_pernet_device(&ip6_tnl_net_ops); 1871 } 1872 1873 module_init(ip6_tunnel_init); 1874 module_exit(ip6_tunnel_cleanup); 1875