1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/in6.h> 36 #include <linux/inetdevice.h> 37 #include <linux/igmp.h> 38 #include <linux/netfilter_ipv4.h> 39 #include <linux/etherdevice.h> 40 #include <linux/if_ether.h> 41 #include <linux/if_vlan.h> 42 #include <linux/rculist.h> 43 #include <linux/err.h> 44 45 #include <net/sock.h> 46 #include <net/ip.h> 47 #include <net/icmp.h> 48 #include <net/protocol.h> 49 #include <net/ip_tunnels.h> 50 #include <net/arp.h> 51 #include <net/checksum.h> 52 #include <net/dsfield.h> 53 #include <net/inet_ecn.h> 54 #include <net/xfrm.h> 55 #include <net/net_namespace.h> 56 #include <net/netns/generic.h> 57 #include <net/rtnetlink.h> 58 #include <net/udp.h> 59 60 #if IS_ENABLED(CONFIG_IPV6) 61 #include <net/ipv6.h> 62 #include <net/ip6_fib.h> 63 #include <net/ip6_route.h> 64 #endif 65 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 67 { 68 return hash_32((__force u32)key ^ (__force u32)remote, 69 IP_TNL_HASH_BITS); 70 } 71 72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst, 73 struct dst_entry *dst, __be32 saddr) 74 { 75 struct dst_entry *old_dst; 76 77 dst_clone(dst); 78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); 79 dst_release(old_dst); 80 idst->saddr = saddr; 81 } 82 83 static noinline void tunnel_dst_set(struct ip_tunnel *t, 84 struct dst_entry *dst, __be32 saddr) 85 { 86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); 87 } 88 89 static void tunnel_dst_reset(struct ip_tunnel *t) 90 { 91 tunnel_dst_set(t, NULL, 0); 92 } 93 94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t) 95 { 96 int i; 97 98 for_each_possible_cpu(i) 99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); 100 } 101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all); 102 103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, 104 u32 cookie, __be32 *saddr) 105 { 106 struct ip_tunnel_dst *idst; 107 struct dst_entry *dst; 108 109 rcu_read_lock(); 110 idst = raw_cpu_ptr(t->dst_cache); 111 dst = rcu_dereference(idst->dst); 112 if (dst && !atomic_inc_not_zero(&dst->__refcnt)) 113 dst = NULL; 114 if (dst) { 115 if (!dst->obsolete || dst->ops->check(dst, cookie)) { 116 *saddr = idst->saddr; 117 } else { 118 tunnel_dst_reset(t); 119 dst_release(dst); 120 dst = NULL; 121 } 122 } 123 rcu_read_unlock(); 124 return (struct rtable *)dst; 125 } 126 127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 128 __be16 flags, __be32 key) 129 { 130 if (p->i_flags & TUNNEL_KEY) { 131 if (flags & TUNNEL_KEY) 132 return key == p->i_key; 133 else 134 /* key expected, none present */ 135 return false; 136 } else 137 return !(flags & TUNNEL_KEY); 138 } 139 140 /* Fallback tunnel: no source, no destination, no key, no options 141 142 Tunnel hash table: 143 We require exact key match i.e. if a key is present in packet 144 it will match only tunnel with the same key; if it is not present, 145 it will match only keyless tunnel. 146 147 All keysless packets, if not matched configured keyless tunnels 148 will match fallback tunnel. 149 Given src, dst and key, find appropriate for input tunnel. 150 */ 151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 152 int link, __be16 flags, 153 __be32 remote, __be32 local, 154 __be32 key) 155 { 156 unsigned int hash; 157 struct ip_tunnel *t, *cand = NULL; 158 struct hlist_head *head; 159 160 hash = ip_tunnel_hash(key, remote); 161 head = &itn->tunnels[hash]; 162 163 hlist_for_each_entry_rcu(t, head, hash_node) { 164 if (local != t->parms.iph.saddr || 165 remote != t->parms.iph.daddr || 166 !(t->dev->flags & IFF_UP)) 167 continue; 168 169 if (!ip_tunnel_key_match(&t->parms, flags, key)) 170 continue; 171 172 if (t->parms.link == link) 173 return t; 174 else 175 cand = t; 176 } 177 178 hlist_for_each_entry_rcu(t, head, hash_node) { 179 if (remote != t->parms.iph.daddr || 180 t->parms.iph.saddr != 0 || 181 !(t->dev->flags & IFF_UP)) 182 continue; 183 184 if (!ip_tunnel_key_match(&t->parms, flags, key)) 185 continue; 186 187 if (t->parms.link == link) 188 return t; 189 else if (!cand) 190 cand = t; 191 } 192 193 hash = ip_tunnel_hash(key, 0); 194 head = &itn->tunnels[hash]; 195 196 hlist_for_each_entry_rcu(t, head, hash_node) { 197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 199 continue; 200 201 if (!(t->dev->flags & IFF_UP)) 202 continue; 203 204 if (!ip_tunnel_key_match(&t->parms, flags, key)) 205 continue; 206 207 if (t->parms.link == link) 208 return t; 209 else if (!cand) 210 cand = t; 211 } 212 213 if (flags & TUNNEL_NO_KEY) 214 goto skip_key_lookup; 215 216 hlist_for_each_entry_rcu(t, head, hash_node) { 217 if (t->parms.i_key != key || 218 t->parms.iph.saddr != 0 || 219 t->parms.iph.daddr != 0 || 220 !(t->dev->flags & IFF_UP)) 221 continue; 222 223 if (t->parms.link == link) 224 return t; 225 else if (!cand) 226 cand = t; 227 } 228 229 skip_key_lookup: 230 if (cand) 231 return cand; 232 233 t = rcu_dereference(itn->collect_md_tun); 234 if (t) 235 return t; 236 237 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 238 return netdev_priv(itn->fb_tunnel_dev); 239 240 return NULL; 241 } 242 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 243 244 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 245 struct ip_tunnel_parm *parms) 246 { 247 unsigned int h; 248 __be32 remote; 249 __be32 i_key = parms->i_key; 250 251 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 252 remote = parms->iph.daddr; 253 else 254 remote = 0; 255 256 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 257 i_key = 0; 258 259 h = ip_tunnel_hash(i_key, remote); 260 return &itn->tunnels[h]; 261 } 262 263 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 264 { 265 struct hlist_head *head = ip_bucket(itn, &t->parms); 266 267 if (t->collect_md) 268 rcu_assign_pointer(itn->collect_md_tun, t); 269 hlist_add_head_rcu(&t->hash_node, head); 270 } 271 272 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) 273 { 274 if (t->collect_md) 275 rcu_assign_pointer(itn->collect_md_tun, NULL); 276 hlist_del_init_rcu(&t->hash_node); 277 } 278 279 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 280 struct ip_tunnel_parm *parms, 281 int type) 282 { 283 __be32 remote = parms->iph.daddr; 284 __be32 local = parms->iph.saddr; 285 __be32 key = parms->i_key; 286 __be16 flags = parms->i_flags; 287 int link = parms->link; 288 struct ip_tunnel *t = NULL; 289 struct hlist_head *head = ip_bucket(itn, parms); 290 291 hlist_for_each_entry_rcu(t, head, hash_node) { 292 if (local == t->parms.iph.saddr && 293 remote == t->parms.iph.daddr && 294 link == t->parms.link && 295 type == t->dev->type && 296 ip_tunnel_key_match(&t->parms, flags, key)) 297 break; 298 } 299 return t; 300 } 301 302 static struct net_device *__ip_tunnel_create(struct net *net, 303 const struct rtnl_link_ops *ops, 304 struct ip_tunnel_parm *parms) 305 { 306 int err; 307 struct ip_tunnel *tunnel; 308 struct net_device *dev; 309 char name[IFNAMSIZ]; 310 311 if (parms->name[0]) 312 strlcpy(name, parms->name, IFNAMSIZ); 313 else { 314 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 315 err = -E2BIG; 316 goto failed; 317 } 318 strlcpy(name, ops->kind, IFNAMSIZ); 319 strncat(name, "%d", 2); 320 } 321 322 ASSERT_RTNL(); 323 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 324 if (!dev) { 325 err = -ENOMEM; 326 goto failed; 327 } 328 dev_net_set(dev, net); 329 330 dev->rtnl_link_ops = ops; 331 332 tunnel = netdev_priv(dev); 333 tunnel->parms = *parms; 334 tunnel->net = net; 335 336 err = register_netdevice(dev); 337 if (err) 338 goto failed_free; 339 340 return dev; 341 342 failed_free: 343 free_netdev(dev); 344 failed: 345 return ERR_PTR(err); 346 } 347 348 static inline void init_tunnel_flow(struct flowi4 *fl4, 349 int proto, 350 __be32 daddr, __be32 saddr, 351 __be32 key, __u8 tos, int oif) 352 { 353 memset(fl4, 0, sizeof(*fl4)); 354 fl4->flowi4_oif = oif; 355 fl4->daddr = daddr; 356 fl4->saddr = saddr; 357 fl4->flowi4_tos = tos; 358 fl4->flowi4_proto = proto; 359 fl4->fl4_gre_key = key; 360 } 361 362 static int ip_tunnel_bind_dev(struct net_device *dev) 363 { 364 struct net_device *tdev = NULL; 365 struct ip_tunnel *tunnel = netdev_priv(dev); 366 const struct iphdr *iph; 367 int hlen = LL_MAX_HEADER; 368 int mtu = ETH_DATA_LEN; 369 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 370 371 iph = &tunnel->parms.iph; 372 373 /* Guess output device to choose reasonable mtu and needed_headroom */ 374 if (iph->daddr) { 375 struct flowi4 fl4; 376 struct rtable *rt; 377 378 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 379 iph->saddr, tunnel->parms.o_key, 380 RT_TOS(iph->tos), tunnel->parms.link); 381 rt = ip_route_output_key(tunnel->net, &fl4); 382 383 if (!IS_ERR(rt)) { 384 tdev = rt->dst.dev; 385 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 386 ip_rt_put(rt); 387 } 388 if (dev->type != ARPHRD_ETHER) 389 dev->flags |= IFF_POINTOPOINT; 390 } 391 392 if (!tdev && tunnel->parms.link) 393 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 394 395 if (tdev) { 396 hlen = tdev->hard_header_len + tdev->needed_headroom; 397 mtu = tdev->mtu; 398 } 399 400 dev->needed_headroom = t_hlen + hlen; 401 mtu -= (dev->hard_header_len + t_hlen); 402 403 if (mtu < 68) 404 mtu = 68; 405 406 return mtu; 407 } 408 409 static struct ip_tunnel *ip_tunnel_create(struct net *net, 410 struct ip_tunnel_net *itn, 411 struct ip_tunnel_parm *parms) 412 { 413 struct ip_tunnel *nt; 414 struct net_device *dev; 415 416 BUG_ON(!itn->fb_tunnel_dev); 417 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 418 if (IS_ERR(dev)) 419 return ERR_CAST(dev); 420 421 dev->mtu = ip_tunnel_bind_dev(dev); 422 423 nt = netdev_priv(dev); 424 ip_tunnel_add(itn, nt); 425 return nt; 426 } 427 428 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 429 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, 430 bool log_ecn_error) 431 { 432 struct pcpu_sw_netstats *tstats; 433 const struct iphdr *iph = ip_hdr(skb); 434 int err; 435 436 #ifdef CONFIG_NET_IPGRE_BROADCAST 437 if (ipv4_is_multicast(iph->daddr)) { 438 tunnel->dev->stats.multicast++; 439 skb->pkt_type = PACKET_BROADCAST; 440 } 441 #endif 442 443 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 444 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 445 tunnel->dev->stats.rx_crc_errors++; 446 tunnel->dev->stats.rx_errors++; 447 goto drop; 448 } 449 450 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 451 if (!(tpi->flags&TUNNEL_SEQ) || 452 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 453 tunnel->dev->stats.rx_fifo_errors++; 454 tunnel->dev->stats.rx_errors++; 455 goto drop; 456 } 457 tunnel->i_seqno = ntohl(tpi->seq) + 1; 458 } 459 460 skb_reset_network_header(skb); 461 462 err = IP_ECN_decapsulate(iph, skb); 463 if (unlikely(err)) { 464 if (log_ecn_error) 465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 466 &iph->saddr, iph->tos); 467 if (err > 1) { 468 ++tunnel->dev->stats.rx_frame_errors; 469 ++tunnel->dev->stats.rx_errors; 470 goto drop; 471 } 472 } 473 474 tstats = this_cpu_ptr(tunnel->dev->tstats); 475 u64_stats_update_begin(&tstats->syncp); 476 tstats->rx_packets++; 477 tstats->rx_bytes += skb->len; 478 u64_stats_update_end(&tstats->syncp); 479 480 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 481 482 if (tunnel->dev->type == ARPHRD_ETHER) { 483 skb->protocol = eth_type_trans(skb, tunnel->dev); 484 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 485 } else { 486 skb->dev = tunnel->dev; 487 } 488 489 if (tun_dst) 490 skb_dst_set(skb, (struct dst_entry *)tun_dst); 491 492 gro_cells_receive(&tunnel->gro_cells, skb); 493 return 0; 494 495 drop: 496 kfree_skb(skb); 497 return 0; 498 } 499 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 500 501 static int ip_encap_hlen(struct ip_tunnel_encap *e) 502 { 503 const struct ip_tunnel_encap_ops *ops; 504 int hlen = -EINVAL; 505 506 if (e->type == TUNNEL_ENCAP_NONE) 507 return 0; 508 509 if (e->type >= MAX_IPTUN_ENCAP_OPS) 510 return -EINVAL; 511 512 rcu_read_lock(); 513 ops = rcu_dereference(iptun_encaps[e->type]); 514 if (likely(ops && ops->encap_hlen)) 515 hlen = ops->encap_hlen(e); 516 rcu_read_unlock(); 517 518 return hlen; 519 } 520 521 const struct ip_tunnel_encap_ops __rcu * 522 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; 523 524 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 525 unsigned int num) 526 { 527 if (num >= MAX_IPTUN_ENCAP_OPS) 528 return -ERANGE; 529 530 return !cmpxchg((const struct ip_tunnel_encap_ops **) 531 &iptun_encaps[num], 532 NULL, ops) ? 0 : -1; 533 } 534 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 535 536 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 537 unsigned int num) 538 { 539 int ret; 540 541 if (num >= MAX_IPTUN_ENCAP_OPS) 542 return -ERANGE; 543 544 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 545 &iptun_encaps[num], 546 ops, NULL) == ops) ? 0 : -1; 547 548 synchronize_net(); 549 550 return ret; 551 } 552 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 553 554 int ip_tunnel_encap_setup(struct ip_tunnel *t, 555 struct ip_tunnel_encap *ipencap) 556 { 557 int hlen; 558 559 memset(&t->encap, 0, sizeof(t->encap)); 560 561 hlen = ip_encap_hlen(ipencap); 562 if (hlen < 0) 563 return hlen; 564 565 t->encap.type = ipencap->type; 566 t->encap.sport = ipencap->sport; 567 t->encap.dport = ipencap->dport; 568 t->encap.flags = ipencap->flags; 569 570 t->encap_hlen = hlen; 571 t->hlen = t->encap_hlen + t->tun_hlen; 572 573 return 0; 574 } 575 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 576 577 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, 578 u8 *protocol, struct flowi4 *fl4) 579 { 580 const struct ip_tunnel_encap_ops *ops; 581 int ret = -EINVAL; 582 583 if (t->encap.type == TUNNEL_ENCAP_NONE) 584 return 0; 585 586 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) 587 return -EINVAL; 588 589 rcu_read_lock(); 590 ops = rcu_dereference(iptun_encaps[t->encap.type]); 591 if (likely(ops && ops->build_header)) 592 ret = ops->build_header(skb, &t->encap, protocol, fl4); 593 rcu_read_unlock(); 594 595 return ret; 596 } 597 EXPORT_SYMBOL(ip_tunnel_encap); 598 599 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 600 struct rtable *rt, __be16 df, 601 const struct iphdr *inner_iph) 602 { 603 struct ip_tunnel *tunnel = netdev_priv(dev); 604 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 605 int mtu; 606 607 if (df) 608 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 609 - sizeof(struct iphdr) - tunnel->hlen; 610 else 611 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 612 613 if (skb_dst(skb)) 614 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 615 616 if (skb->protocol == htons(ETH_P_IP)) { 617 if (!skb_is_gso(skb) && 618 (inner_iph->frag_off & htons(IP_DF)) && 619 mtu < pkt_size) { 620 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 621 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 622 return -E2BIG; 623 } 624 } 625 #if IS_ENABLED(CONFIG_IPV6) 626 else if (skb->protocol == htons(ETH_P_IPV6)) { 627 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 628 629 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 630 mtu >= IPV6_MIN_MTU) { 631 if ((tunnel->parms.iph.daddr && 632 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 633 rt6->rt6i_dst.plen == 128) { 634 rt6->rt6i_flags |= RTF_MODIFIED; 635 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 636 } 637 } 638 639 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 640 mtu < pkt_size) { 641 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 642 return -E2BIG; 643 } 644 } 645 #endif 646 return 0; 647 } 648 649 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 650 const struct iphdr *tnl_params, u8 protocol) 651 { 652 struct ip_tunnel *tunnel = netdev_priv(dev); 653 const struct iphdr *inner_iph; 654 struct flowi4 fl4; 655 u8 tos, ttl; 656 __be16 df; 657 struct rtable *rt; /* Route to the other host */ 658 unsigned int max_headroom; /* The extra header space needed */ 659 __be32 dst; 660 int err; 661 bool connected; 662 663 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 664 connected = (tunnel->parms.iph.daddr != 0); 665 666 dst = tnl_params->daddr; 667 if (dst == 0) { 668 /* NBMA tunnel */ 669 670 if (!skb_dst(skb)) { 671 dev->stats.tx_fifo_errors++; 672 goto tx_error; 673 } 674 675 if (skb->protocol == htons(ETH_P_IP)) { 676 rt = skb_rtable(skb); 677 dst = rt_nexthop(rt, inner_iph->daddr); 678 } 679 #if IS_ENABLED(CONFIG_IPV6) 680 else if (skb->protocol == htons(ETH_P_IPV6)) { 681 const struct in6_addr *addr6; 682 struct neighbour *neigh; 683 bool do_tx_error_icmp; 684 int addr_type; 685 686 neigh = dst_neigh_lookup(skb_dst(skb), 687 &ipv6_hdr(skb)->daddr); 688 if (!neigh) 689 goto tx_error; 690 691 addr6 = (const struct in6_addr *)&neigh->primary_key; 692 addr_type = ipv6_addr_type(addr6); 693 694 if (addr_type == IPV6_ADDR_ANY) { 695 addr6 = &ipv6_hdr(skb)->daddr; 696 addr_type = ipv6_addr_type(addr6); 697 } 698 699 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 700 do_tx_error_icmp = true; 701 else { 702 do_tx_error_icmp = false; 703 dst = addr6->s6_addr32[3]; 704 } 705 neigh_release(neigh); 706 if (do_tx_error_icmp) 707 goto tx_error_icmp; 708 } 709 #endif 710 else 711 goto tx_error; 712 713 connected = false; 714 } 715 716 tos = tnl_params->tos; 717 if (tos & 0x1) { 718 tos &= ~0x1; 719 if (skb->protocol == htons(ETH_P_IP)) { 720 tos = inner_iph->tos; 721 connected = false; 722 } else if (skb->protocol == htons(ETH_P_IPV6)) { 723 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 724 connected = false; 725 } 726 } 727 728 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 729 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 730 731 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 732 goto tx_error; 733 734 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; 735 736 if (!rt) { 737 rt = ip_route_output_key(tunnel->net, &fl4); 738 739 if (IS_ERR(rt)) { 740 dev->stats.tx_carrier_errors++; 741 goto tx_error; 742 } 743 if (connected) 744 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 745 } 746 747 if (rt->dst.dev == dev) { 748 ip_rt_put(rt); 749 dev->stats.collisions++; 750 goto tx_error; 751 } 752 753 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { 754 ip_rt_put(rt); 755 goto tx_error; 756 } 757 758 if (tunnel->err_count > 0) { 759 if (time_before(jiffies, 760 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 761 tunnel->err_count--; 762 763 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 764 dst_link_failure(skb); 765 } else 766 tunnel->err_count = 0; 767 } 768 769 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 770 ttl = tnl_params->ttl; 771 if (ttl == 0) { 772 if (skb->protocol == htons(ETH_P_IP)) 773 ttl = inner_iph->ttl; 774 #if IS_ENABLED(CONFIG_IPV6) 775 else if (skb->protocol == htons(ETH_P_IPV6)) 776 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 777 #endif 778 else 779 ttl = ip4_dst_hoplimit(&rt->dst); 780 } 781 782 df = tnl_params->frag_off; 783 if (skb->protocol == htons(ETH_P_IP)) 784 df |= (inner_iph->frag_off&htons(IP_DF)); 785 786 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 787 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 788 if (max_headroom > dev->needed_headroom) 789 dev->needed_headroom = max_headroom; 790 791 if (skb_cow_head(skb, dev->needed_headroom)) { 792 ip_rt_put(rt); 793 dev->stats.tx_dropped++; 794 kfree_skb(skb); 795 return; 796 } 797 798 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, 799 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 800 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 801 802 return; 803 804 #if IS_ENABLED(CONFIG_IPV6) 805 tx_error_icmp: 806 dst_link_failure(skb); 807 #endif 808 tx_error: 809 dev->stats.tx_errors++; 810 kfree_skb(skb); 811 } 812 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 813 814 static void ip_tunnel_update(struct ip_tunnel_net *itn, 815 struct ip_tunnel *t, 816 struct net_device *dev, 817 struct ip_tunnel_parm *p, 818 bool set_mtu) 819 { 820 ip_tunnel_del(itn, t); 821 t->parms.iph.saddr = p->iph.saddr; 822 t->parms.iph.daddr = p->iph.daddr; 823 t->parms.i_key = p->i_key; 824 t->parms.o_key = p->o_key; 825 if (dev->type != ARPHRD_ETHER) { 826 memcpy(dev->dev_addr, &p->iph.saddr, 4); 827 memcpy(dev->broadcast, &p->iph.daddr, 4); 828 } 829 ip_tunnel_add(itn, t); 830 831 t->parms.iph.ttl = p->iph.ttl; 832 t->parms.iph.tos = p->iph.tos; 833 t->parms.iph.frag_off = p->iph.frag_off; 834 835 if (t->parms.link != p->link) { 836 int mtu; 837 838 t->parms.link = p->link; 839 mtu = ip_tunnel_bind_dev(dev); 840 if (set_mtu) 841 dev->mtu = mtu; 842 } 843 ip_tunnel_dst_reset_all(t); 844 netdev_state_change(dev); 845 } 846 847 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 848 { 849 int err = 0; 850 struct ip_tunnel *t = netdev_priv(dev); 851 struct net *net = t->net; 852 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 853 854 BUG_ON(!itn->fb_tunnel_dev); 855 switch (cmd) { 856 case SIOCGETTUNNEL: 857 if (dev == itn->fb_tunnel_dev) { 858 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 859 if (!t) 860 t = netdev_priv(dev); 861 } 862 memcpy(p, &t->parms, sizeof(*p)); 863 break; 864 865 case SIOCADDTUNNEL: 866 case SIOCCHGTUNNEL: 867 err = -EPERM; 868 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 869 goto done; 870 if (p->iph.ttl) 871 p->iph.frag_off |= htons(IP_DF); 872 if (!(p->i_flags & VTI_ISVTI)) { 873 if (!(p->i_flags & TUNNEL_KEY)) 874 p->i_key = 0; 875 if (!(p->o_flags & TUNNEL_KEY)) 876 p->o_key = 0; 877 } 878 879 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 880 881 if (cmd == SIOCADDTUNNEL) { 882 if (!t) { 883 t = ip_tunnel_create(net, itn, p); 884 err = PTR_ERR_OR_ZERO(t); 885 break; 886 } 887 888 err = -EEXIST; 889 break; 890 } 891 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 892 if (t) { 893 if (t->dev != dev) { 894 err = -EEXIST; 895 break; 896 } 897 } else { 898 unsigned int nflags = 0; 899 900 if (ipv4_is_multicast(p->iph.daddr)) 901 nflags = IFF_BROADCAST; 902 else if (p->iph.daddr) 903 nflags = IFF_POINTOPOINT; 904 905 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 906 err = -EINVAL; 907 break; 908 } 909 910 t = netdev_priv(dev); 911 } 912 } 913 914 if (t) { 915 err = 0; 916 ip_tunnel_update(itn, t, dev, p, true); 917 } else { 918 err = -ENOENT; 919 } 920 break; 921 922 case SIOCDELTUNNEL: 923 err = -EPERM; 924 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 925 goto done; 926 927 if (dev == itn->fb_tunnel_dev) { 928 err = -ENOENT; 929 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 930 if (!t) 931 goto done; 932 err = -EPERM; 933 if (t == netdev_priv(itn->fb_tunnel_dev)) 934 goto done; 935 dev = t->dev; 936 } 937 unregister_netdevice(dev); 938 err = 0; 939 break; 940 941 default: 942 err = -EINVAL; 943 } 944 945 done: 946 return err; 947 } 948 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 949 950 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 951 { 952 struct ip_tunnel *tunnel = netdev_priv(dev); 953 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 954 955 if (new_mtu < 68 || 956 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) 957 return -EINVAL; 958 dev->mtu = new_mtu; 959 return 0; 960 } 961 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 962 963 static void ip_tunnel_dev_free(struct net_device *dev) 964 { 965 struct ip_tunnel *tunnel = netdev_priv(dev); 966 967 gro_cells_destroy(&tunnel->gro_cells); 968 free_percpu(tunnel->dst_cache); 969 free_percpu(dev->tstats); 970 free_netdev(dev); 971 } 972 973 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 974 { 975 struct ip_tunnel *tunnel = netdev_priv(dev); 976 struct ip_tunnel_net *itn; 977 978 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 979 980 if (itn->fb_tunnel_dev != dev) { 981 ip_tunnel_del(itn, netdev_priv(dev)); 982 unregister_netdevice_queue(dev, head); 983 } 984 } 985 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 986 987 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 988 { 989 struct ip_tunnel *tunnel = netdev_priv(dev); 990 991 return tunnel->net; 992 } 993 EXPORT_SYMBOL(ip_tunnel_get_link_net); 994 995 int ip_tunnel_get_iflink(const struct net_device *dev) 996 { 997 struct ip_tunnel *tunnel = netdev_priv(dev); 998 999 return tunnel->parms.link; 1000 } 1001 EXPORT_SYMBOL(ip_tunnel_get_iflink); 1002 1003 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 1004 struct rtnl_link_ops *ops, char *devname) 1005 { 1006 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 1007 struct ip_tunnel_parm parms; 1008 unsigned int i; 1009 1010 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 1011 INIT_HLIST_HEAD(&itn->tunnels[i]); 1012 1013 if (!ops) { 1014 itn->fb_tunnel_dev = NULL; 1015 return 0; 1016 } 1017 1018 memset(&parms, 0, sizeof(parms)); 1019 if (devname) 1020 strlcpy(parms.name, devname, IFNAMSIZ); 1021 1022 rtnl_lock(); 1023 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 1024 /* FB netdevice is special: we have one, and only one per netns. 1025 * Allowing to move it to another netns is clearly unsafe. 1026 */ 1027 if (!IS_ERR(itn->fb_tunnel_dev)) { 1028 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1029 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 1030 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 1031 } 1032 rtnl_unlock(); 1033 1034 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 1035 } 1036 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1037 1038 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 1039 struct rtnl_link_ops *ops) 1040 { 1041 struct net *net = dev_net(itn->fb_tunnel_dev); 1042 struct net_device *dev, *aux; 1043 int h; 1044 1045 for_each_netdev_safe(net, dev, aux) 1046 if (dev->rtnl_link_ops == ops) 1047 unregister_netdevice_queue(dev, head); 1048 1049 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1050 struct ip_tunnel *t; 1051 struct hlist_node *n; 1052 struct hlist_head *thead = &itn->tunnels[h]; 1053 1054 hlist_for_each_entry_safe(t, n, thead, hash_node) 1055 /* If dev is in the same netns, it has already 1056 * been added to the list by the previous loop. 1057 */ 1058 if (!net_eq(dev_net(t->dev), net)) 1059 unregister_netdevice_queue(t->dev, head); 1060 } 1061 } 1062 1063 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1064 { 1065 LIST_HEAD(list); 1066 1067 rtnl_lock(); 1068 ip_tunnel_destroy(itn, &list, ops); 1069 unregister_netdevice_many(&list); 1070 rtnl_unlock(); 1071 } 1072 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1073 1074 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1075 struct ip_tunnel_parm *p) 1076 { 1077 struct ip_tunnel *nt; 1078 struct net *net = dev_net(dev); 1079 struct ip_tunnel_net *itn; 1080 int mtu; 1081 int err; 1082 1083 nt = netdev_priv(dev); 1084 itn = net_generic(net, nt->ip_tnl_net_id); 1085 1086 if (nt->collect_md) { 1087 if (rtnl_dereference(itn->collect_md_tun)) 1088 return -EEXIST; 1089 } else { 1090 if (ip_tunnel_find(itn, p, dev->type)) 1091 return -EEXIST; 1092 } 1093 1094 nt->net = net; 1095 nt->parms = *p; 1096 err = register_netdevice(dev); 1097 if (err) 1098 goto out; 1099 1100 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1101 eth_hw_addr_random(dev); 1102 1103 mtu = ip_tunnel_bind_dev(dev); 1104 if (!tb[IFLA_MTU]) 1105 dev->mtu = mtu; 1106 1107 ip_tunnel_add(itn, nt); 1108 out: 1109 return err; 1110 } 1111 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1112 1113 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1114 struct ip_tunnel_parm *p) 1115 { 1116 struct ip_tunnel *t; 1117 struct ip_tunnel *tunnel = netdev_priv(dev); 1118 struct net *net = tunnel->net; 1119 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1120 1121 if (dev == itn->fb_tunnel_dev) 1122 return -EINVAL; 1123 1124 t = ip_tunnel_find(itn, p, dev->type); 1125 1126 if (t) { 1127 if (t->dev != dev) 1128 return -EEXIST; 1129 } else { 1130 t = tunnel; 1131 1132 if (dev->type != ARPHRD_ETHER) { 1133 unsigned int nflags = 0; 1134 1135 if (ipv4_is_multicast(p->iph.daddr)) 1136 nflags = IFF_BROADCAST; 1137 else if (p->iph.daddr) 1138 nflags = IFF_POINTOPOINT; 1139 1140 if ((dev->flags ^ nflags) & 1141 (IFF_POINTOPOINT | IFF_BROADCAST)) 1142 return -EINVAL; 1143 } 1144 } 1145 1146 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1147 return 0; 1148 } 1149 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1150 1151 int ip_tunnel_init(struct net_device *dev) 1152 { 1153 struct ip_tunnel *tunnel = netdev_priv(dev); 1154 struct iphdr *iph = &tunnel->parms.iph; 1155 int err; 1156 1157 dev->destructor = ip_tunnel_dev_free; 1158 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1159 if (!dev->tstats) 1160 return -ENOMEM; 1161 1162 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1163 if (!tunnel->dst_cache) { 1164 free_percpu(dev->tstats); 1165 return -ENOMEM; 1166 } 1167 1168 err = gro_cells_init(&tunnel->gro_cells, dev); 1169 if (err) { 1170 free_percpu(tunnel->dst_cache); 1171 free_percpu(dev->tstats); 1172 return err; 1173 } 1174 1175 tunnel->dev = dev; 1176 tunnel->net = dev_net(dev); 1177 strcpy(tunnel->parms.name, dev->name); 1178 iph->version = 4; 1179 iph->ihl = 5; 1180 1181 if (tunnel->collect_md) { 1182 dev->features |= NETIF_F_NETNS_LOCAL; 1183 netif_keep_dst(dev); 1184 } 1185 return 0; 1186 } 1187 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1188 1189 void ip_tunnel_uninit(struct net_device *dev) 1190 { 1191 struct ip_tunnel *tunnel = netdev_priv(dev); 1192 struct net *net = tunnel->net; 1193 struct ip_tunnel_net *itn; 1194 1195 itn = net_generic(net, tunnel->ip_tnl_net_id); 1196 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1197 if (itn->fb_tunnel_dev != dev) 1198 ip_tunnel_del(itn, netdev_priv(dev)); 1199 1200 ip_tunnel_dst_reset_all(tunnel); 1201 } 1202 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1203 1204 /* Do least required initialization, rest of init is done in tunnel_init call */ 1205 void ip_tunnel_setup(struct net_device *dev, int net_id) 1206 { 1207 struct ip_tunnel *tunnel = netdev_priv(dev); 1208 tunnel->ip_tnl_net_id = net_id; 1209 } 1210 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1211 1212 MODULE_LICENSE("GPL"); 1213