1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/init.h> 34 #include <linux/in6.h> 35 #include <linux/inetdevice.h> 36 #include <linux/igmp.h> 37 #include <linux/netfilter_ipv4.h> 38 #include <linux/etherdevice.h> 39 #include <linux/if_ether.h> 40 #include <linux/if_vlan.h> 41 #include <linux/rculist.h> 42 #include <linux/err.h> 43 44 #include <net/sock.h> 45 #include <net/ip.h> 46 #include <net/icmp.h> 47 #include <net/protocol.h> 48 #include <net/ip_tunnels.h> 49 #include <net/arp.h> 50 #include <net/checksum.h> 51 #include <net/dsfield.h> 52 #include <net/inet_ecn.h> 53 #include <net/xfrm.h> 54 #include <net/net_namespace.h> 55 #include <net/netns/generic.h> 56 #include <net/rtnetlink.h> 57 #include <net/udp.h> 58 59 #if IS_ENABLED(CONFIG_IPV6) 60 #include <net/ipv6.h> 61 #include <net/ip6_fib.h> 62 #include <net/ip6_route.h> 63 #endif 64 65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 66 { 67 return hash_32((__force u32)key ^ (__force u32)remote, 68 IP_TNL_HASH_BITS); 69 } 70 71 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 72 __be16 flags, __be32 key) 73 { 74 if (p->i_flags & TUNNEL_KEY) { 75 if (flags & TUNNEL_KEY) 76 return key == p->i_key; 77 else 78 /* key expected, none present */ 79 return false; 80 } else 81 return !(flags & TUNNEL_KEY); 82 } 83 84 /* Fallback tunnel: no source, no destination, no key, no options 85 86 Tunnel hash table: 87 We require exact key match i.e. if a key is present in packet 88 it will match only tunnel with the same key; if it is not present, 89 it will match only keyless tunnel. 90 91 All keysless packets, if not matched configured keyless tunnels 92 will match fallback tunnel. 93 Given src, dst and key, find appropriate for input tunnel. 94 */ 95 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 96 int link, __be16 flags, 97 __be32 remote, __be32 local, 98 __be32 key) 99 { 100 unsigned int hash; 101 struct ip_tunnel *t, *cand = NULL; 102 struct hlist_head *head; 103 104 hash = ip_tunnel_hash(key, remote); 105 head = &itn->tunnels[hash]; 106 107 hlist_for_each_entry_rcu(t, head, hash_node) { 108 if (local != t->parms.iph.saddr || 109 remote != t->parms.iph.daddr || 110 !(t->dev->flags & IFF_UP)) 111 continue; 112 113 if (!ip_tunnel_key_match(&t->parms, flags, key)) 114 continue; 115 116 if (t->parms.link == link) 117 return t; 118 else 119 cand = t; 120 } 121 122 hlist_for_each_entry_rcu(t, head, hash_node) { 123 if (remote != t->parms.iph.daddr || 124 t->parms.iph.saddr != 0 || 125 !(t->dev->flags & IFF_UP)) 126 continue; 127 128 if (!ip_tunnel_key_match(&t->parms, flags, key)) 129 continue; 130 131 if (t->parms.link == link) 132 return t; 133 else if (!cand) 134 cand = t; 135 } 136 137 hash = ip_tunnel_hash(key, 0); 138 head = &itn->tunnels[hash]; 139 140 hlist_for_each_entry_rcu(t, head, hash_node) { 141 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 142 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 143 continue; 144 145 if (!(t->dev->flags & IFF_UP)) 146 continue; 147 148 if (!ip_tunnel_key_match(&t->parms, flags, key)) 149 continue; 150 151 if (t->parms.link == link) 152 return t; 153 else if (!cand) 154 cand = t; 155 } 156 157 if (flags & TUNNEL_NO_KEY) 158 goto skip_key_lookup; 159 160 hlist_for_each_entry_rcu(t, head, hash_node) { 161 if (t->parms.i_key != key || 162 t->parms.iph.saddr != 0 || 163 t->parms.iph.daddr != 0 || 164 !(t->dev->flags & IFF_UP)) 165 continue; 166 167 if (t->parms.link == link) 168 return t; 169 else if (!cand) 170 cand = t; 171 } 172 173 skip_key_lookup: 174 if (cand) 175 return cand; 176 177 t = rcu_dereference(itn->collect_md_tun); 178 if (t) 179 return t; 180 181 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 182 return netdev_priv(itn->fb_tunnel_dev); 183 184 return NULL; 185 } 186 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 187 188 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 189 struct ip_tunnel_parm *parms) 190 { 191 unsigned int h; 192 __be32 remote; 193 __be32 i_key = parms->i_key; 194 195 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 196 remote = parms->iph.daddr; 197 else 198 remote = 0; 199 200 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 201 i_key = 0; 202 203 h = ip_tunnel_hash(i_key, remote); 204 return &itn->tunnels[h]; 205 } 206 207 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 208 { 209 struct hlist_head *head = ip_bucket(itn, &t->parms); 210 211 if (t->collect_md) 212 rcu_assign_pointer(itn->collect_md_tun, t); 213 hlist_add_head_rcu(&t->hash_node, head); 214 } 215 216 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) 217 { 218 if (t->collect_md) 219 rcu_assign_pointer(itn->collect_md_tun, NULL); 220 hlist_del_init_rcu(&t->hash_node); 221 } 222 223 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 224 struct ip_tunnel_parm *parms, 225 int type) 226 { 227 __be32 remote = parms->iph.daddr; 228 __be32 local = parms->iph.saddr; 229 __be32 key = parms->i_key; 230 __be16 flags = parms->i_flags; 231 int link = parms->link; 232 struct ip_tunnel *t = NULL; 233 struct hlist_head *head = ip_bucket(itn, parms); 234 235 hlist_for_each_entry_rcu(t, head, hash_node) { 236 if (local == t->parms.iph.saddr && 237 remote == t->parms.iph.daddr && 238 link == t->parms.link && 239 type == t->dev->type && 240 ip_tunnel_key_match(&t->parms, flags, key)) 241 break; 242 } 243 return t; 244 } 245 246 static struct net_device *__ip_tunnel_create(struct net *net, 247 const struct rtnl_link_ops *ops, 248 struct ip_tunnel_parm *parms) 249 { 250 int err; 251 struct ip_tunnel *tunnel; 252 struct net_device *dev; 253 char name[IFNAMSIZ]; 254 255 if (parms->name[0]) 256 strlcpy(name, parms->name, IFNAMSIZ); 257 else { 258 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 259 err = -E2BIG; 260 goto failed; 261 } 262 strlcpy(name, ops->kind, IFNAMSIZ); 263 strncat(name, "%d", 2); 264 } 265 266 ASSERT_RTNL(); 267 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 268 if (!dev) { 269 err = -ENOMEM; 270 goto failed; 271 } 272 dev_net_set(dev, net); 273 274 dev->rtnl_link_ops = ops; 275 276 tunnel = netdev_priv(dev); 277 tunnel->parms = *parms; 278 tunnel->net = net; 279 280 err = register_netdevice(dev); 281 if (err) 282 goto failed_free; 283 284 return dev; 285 286 failed_free: 287 free_netdev(dev); 288 failed: 289 return ERR_PTR(err); 290 } 291 292 static inline void init_tunnel_flow(struct flowi4 *fl4, 293 int proto, 294 __be32 daddr, __be32 saddr, 295 __be32 key, __u8 tos, int oif) 296 { 297 memset(fl4, 0, sizeof(*fl4)); 298 fl4->flowi4_oif = oif; 299 fl4->daddr = daddr; 300 fl4->saddr = saddr; 301 fl4->flowi4_tos = tos; 302 fl4->flowi4_proto = proto; 303 fl4->fl4_gre_key = key; 304 } 305 306 static int ip_tunnel_bind_dev(struct net_device *dev) 307 { 308 struct net_device *tdev = NULL; 309 struct ip_tunnel *tunnel = netdev_priv(dev); 310 const struct iphdr *iph; 311 int hlen = LL_MAX_HEADER; 312 int mtu = ETH_DATA_LEN; 313 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 314 315 iph = &tunnel->parms.iph; 316 317 /* Guess output device to choose reasonable mtu and needed_headroom */ 318 if (iph->daddr) { 319 struct flowi4 fl4; 320 struct rtable *rt; 321 322 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 323 iph->saddr, tunnel->parms.o_key, 324 RT_TOS(iph->tos), tunnel->parms.link); 325 rt = ip_route_output_key(tunnel->net, &fl4); 326 327 if (!IS_ERR(rt)) { 328 tdev = rt->dst.dev; 329 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, 330 fl4.saddr); 331 ip_rt_put(rt); 332 } 333 if (dev->type != ARPHRD_ETHER) 334 dev->flags |= IFF_POINTOPOINT; 335 } 336 337 if (!tdev && tunnel->parms.link) 338 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 339 340 if (tdev) { 341 hlen = tdev->hard_header_len + tdev->needed_headroom; 342 mtu = tdev->mtu; 343 } 344 345 dev->needed_headroom = t_hlen + hlen; 346 mtu -= (dev->hard_header_len + t_hlen); 347 348 if (mtu < 68) 349 mtu = 68; 350 351 return mtu; 352 } 353 354 static struct ip_tunnel *ip_tunnel_create(struct net *net, 355 struct ip_tunnel_net *itn, 356 struct ip_tunnel_parm *parms) 357 { 358 struct ip_tunnel *nt; 359 struct net_device *dev; 360 361 BUG_ON(!itn->fb_tunnel_dev); 362 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 363 if (IS_ERR(dev)) 364 return ERR_CAST(dev); 365 366 dev->mtu = ip_tunnel_bind_dev(dev); 367 368 nt = netdev_priv(dev); 369 ip_tunnel_add(itn, nt); 370 return nt; 371 } 372 373 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, 375 bool log_ecn_error) 376 { 377 struct pcpu_sw_netstats *tstats; 378 const struct iphdr *iph = ip_hdr(skb); 379 int err; 380 381 #ifdef CONFIG_NET_IPGRE_BROADCAST 382 if (ipv4_is_multicast(iph->daddr)) { 383 tunnel->dev->stats.multicast++; 384 skb->pkt_type = PACKET_BROADCAST; 385 } 386 #endif 387 388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 390 tunnel->dev->stats.rx_crc_errors++; 391 tunnel->dev->stats.rx_errors++; 392 goto drop; 393 } 394 395 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 396 if (!(tpi->flags&TUNNEL_SEQ) || 397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 398 tunnel->dev->stats.rx_fifo_errors++; 399 tunnel->dev->stats.rx_errors++; 400 goto drop; 401 } 402 tunnel->i_seqno = ntohl(tpi->seq) + 1; 403 } 404 405 skb_reset_network_header(skb); 406 407 err = IP_ECN_decapsulate(iph, skb); 408 if (unlikely(err)) { 409 if (log_ecn_error) 410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 411 &iph->saddr, iph->tos); 412 if (err > 1) { 413 ++tunnel->dev->stats.rx_frame_errors; 414 ++tunnel->dev->stats.rx_errors; 415 goto drop; 416 } 417 } 418 419 tstats = this_cpu_ptr(tunnel->dev->tstats); 420 u64_stats_update_begin(&tstats->syncp); 421 tstats->rx_packets++; 422 tstats->rx_bytes += skb->len; 423 u64_stats_update_end(&tstats->syncp); 424 425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 426 427 if (tunnel->dev->type == ARPHRD_ETHER) { 428 skb->protocol = eth_type_trans(skb, tunnel->dev); 429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 430 } else { 431 skb->dev = tunnel->dev; 432 } 433 434 if (tun_dst) 435 skb_dst_set(skb, (struct dst_entry *)tun_dst); 436 437 gro_cells_receive(&tunnel->gro_cells, skb); 438 return 0; 439 440 drop: 441 kfree_skb(skb); 442 return 0; 443 } 444 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 445 446 static int ip_encap_hlen(struct ip_tunnel_encap *e) 447 { 448 const struct ip_tunnel_encap_ops *ops; 449 int hlen = -EINVAL; 450 451 if (e->type == TUNNEL_ENCAP_NONE) 452 return 0; 453 454 if (e->type >= MAX_IPTUN_ENCAP_OPS) 455 return -EINVAL; 456 457 rcu_read_lock(); 458 ops = rcu_dereference(iptun_encaps[e->type]); 459 if (likely(ops && ops->encap_hlen)) 460 hlen = ops->encap_hlen(e); 461 rcu_read_unlock(); 462 463 return hlen; 464 } 465 466 const struct ip_tunnel_encap_ops __rcu * 467 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; 468 469 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 470 unsigned int num) 471 { 472 if (num >= MAX_IPTUN_ENCAP_OPS) 473 return -ERANGE; 474 475 return !cmpxchg((const struct ip_tunnel_encap_ops **) 476 &iptun_encaps[num], 477 NULL, ops) ? 0 : -1; 478 } 479 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 480 481 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 482 unsigned int num) 483 { 484 int ret; 485 486 if (num >= MAX_IPTUN_ENCAP_OPS) 487 return -ERANGE; 488 489 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 490 &iptun_encaps[num], 491 ops, NULL) == ops) ? 0 : -1; 492 493 synchronize_net(); 494 495 return ret; 496 } 497 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 498 499 int ip_tunnel_encap_setup(struct ip_tunnel *t, 500 struct ip_tunnel_encap *ipencap) 501 { 502 int hlen; 503 504 memset(&t->encap, 0, sizeof(t->encap)); 505 506 hlen = ip_encap_hlen(ipencap); 507 if (hlen < 0) 508 return hlen; 509 510 t->encap.type = ipencap->type; 511 t->encap.sport = ipencap->sport; 512 t->encap.dport = ipencap->dport; 513 t->encap.flags = ipencap->flags; 514 515 t->encap_hlen = hlen; 516 t->hlen = t->encap_hlen + t->tun_hlen; 517 518 return 0; 519 } 520 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 521 522 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, 523 u8 *protocol, struct flowi4 *fl4) 524 { 525 const struct ip_tunnel_encap_ops *ops; 526 int ret = -EINVAL; 527 528 if (t->encap.type == TUNNEL_ENCAP_NONE) 529 return 0; 530 531 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) 532 return -EINVAL; 533 534 rcu_read_lock(); 535 ops = rcu_dereference(iptun_encaps[t->encap.type]); 536 if (likely(ops && ops->build_header)) 537 ret = ops->build_header(skb, &t->encap, protocol, fl4); 538 rcu_read_unlock(); 539 540 return ret; 541 } 542 EXPORT_SYMBOL(ip_tunnel_encap); 543 544 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 545 struct rtable *rt, __be16 df, 546 const struct iphdr *inner_iph) 547 { 548 struct ip_tunnel *tunnel = netdev_priv(dev); 549 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 550 int mtu; 551 552 if (df) 553 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 554 - sizeof(struct iphdr) - tunnel->hlen; 555 else 556 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 557 558 if (skb_dst(skb)) 559 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 560 561 if (skb->protocol == htons(ETH_P_IP)) { 562 if (!skb_is_gso(skb) && 563 (inner_iph->frag_off & htons(IP_DF)) && 564 mtu < pkt_size) { 565 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 566 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 567 return -E2BIG; 568 } 569 } 570 #if IS_ENABLED(CONFIG_IPV6) 571 else if (skb->protocol == htons(ETH_P_IPV6)) { 572 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 573 574 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 575 mtu >= IPV6_MIN_MTU) { 576 if ((tunnel->parms.iph.daddr && 577 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 578 rt6->rt6i_dst.plen == 128) { 579 rt6->rt6i_flags |= RTF_MODIFIED; 580 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 581 } 582 } 583 584 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 585 mtu < pkt_size) { 586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 587 return -E2BIG; 588 } 589 } 590 #endif 591 return 0; 592 } 593 594 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 595 const struct iphdr *tnl_params, u8 protocol) 596 { 597 struct ip_tunnel *tunnel = netdev_priv(dev); 598 const struct iphdr *inner_iph; 599 struct flowi4 fl4; 600 u8 tos, ttl; 601 __be16 df; 602 struct rtable *rt; /* Route to the other host */ 603 unsigned int max_headroom; /* The extra header space needed */ 604 __be32 dst; 605 bool connected; 606 607 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 608 connected = (tunnel->parms.iph.daddr != 0); 609 610 dst = tnl_params->daddr; 611 if (dst == 0) { 612 /* NBMA tunnel */ 613 614 if (!skb_dst(skb)) { 615 dev->stats.tx_fifo_errors++; 616 goto tx_error; 617 } 618 619 if (skb->protocol == htons(ETH_P_IP)) { 620 rt = skb_rtable(skb); 621 dst = rt_nexthop(rt, inner_iph->daddr); 622 } 623 #if IS_ENABLED(CONFIG_IPV6) 624 else if (skb->protocol == htons(ETH_P_IPV6)) { 625 const struct in6_addr *addr6; 626 struct neighbour *neigh; 627 bool do_tx_error_icmp; 628 int addr_type; 629 630 neigh = dst_neigh_lookup(skb_dst(skb), 631 &ipv6_hdr(skb)->daddr); 632 if (!neigh) 633 goto tx_error; 634 635 addr6 = (const struct in6_addr *)&neigh->primary_key; 636 addr_type = ipv6_addr_type(addr6); 637 638 if (addr_type == IPV6_ADDR_ANY) { 639 addr6 = &ipv6_hdr(skb)->daddr; 640 addr_type = ipv6_addr_type(addr6); 641 } 642 643 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 644 do_tx_error_icmp = true; 645 else { 646 do_tx_error_icmp = false; 647 dst = addr6->s6_addr32[3]; 648 } 649 neigh_release(neigh); 650 if (do_tx_error_icmp) 651 goto tx_error_icmp; 652 } 653 #endif 654 else 655 goto tx_error; 656 657 connected = false; 658 } 659 660 tos = tnl_params->tos; 661 if (tos & 0x1) { 662 tos &= ~0x1; 663 if (skb->protocol == htons(ETH_P_IP)) { 664 tos = inner_iph->tos; 665 connected = false; 666 } else if (skb->protocol == htons(ETH_P_IPV6)) { 667 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 668 connected = false; 669 } 670 } 671 672 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 673 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 674 675 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 676 goto tx_error; 677 678 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) : 679 NULL; 680 681 if (!rt) { 682 rt = ip_route_output_key(tunnel->net, &fl4); 683 684 if (IS_ERR(rt)) { 685 dev->stats.tx_carrier_errors++; 686 goto tx_error; 687 } 688 if (connected) 689 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, 690 fl4.saddr); 691 } 692 693 if (rt->dst.dev == dev) { 694 ip_rt_put(rt); 695 dev->stats.collisions++; 696 goto tx_error; 697 } 698 699 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { 700 ip_rt_put(rt); 701 goto tx_error; 702 } 703 704 if (tunnel->err_count > 0) { 705 if (time_before(jiffies, 706 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 707 tunnel->err_count--; 708 709 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 710 dst_link_failure(skb); 711 } else 712 tunnel->err_count = 0; 713 } 714 715 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 716 ttl = tnl_params->ttl; 717 if (ttl == 0) { 718 if (skb->protocol == htons(ETH_P_IP)) 719 ttl = inner_iph->ttl; 720 #if IS_ENABLED(CONFIG_IPV6) 721 else if (skb->protocol == htons(ETH_P_IPV6)) 722 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 723 #endif 724 else 725 ttl = ip4_dst_hoplimit(&rt->dst); 726 } 727 728 df = tnl_params->frag_off; 729 if (skb->protocol == htons(ETH_P_IP)) 730 df |= (inner_iph->frag_off&htons(IP_DF)); 731 732 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 733 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 734 if (max_headroom > dev->needed_headroom) 735 dev->needed_headroom = max_headroom; 736 737 if (skb_cow_head(skb, dev->needed_headroom)) { 738 ip_rt_put(rt); 739 dev->stats.tx_dropped++; 740 kfree_skb(skb); 741 return; 742 } 743 744 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, 745 df, !net_eq(tunnel->net, dev_net(dev))); 746 return; 747 748 #if IS_ENABLED(CONFIG_IPV6) 749 tx_error_icmp: 750 dst_link_failure(skb); 751 #endif 752 tx_error: 753 dev->stats.tx_errors++; 754 kfree_skb(skb); 755 } 756 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 757 758 static void ip_tunnel_update(struct ip_tunnel_net *itn, 759 struct ip_tunnel *t, 760 struct net_device *dev, 761 struct ip_tunnel_parm *p, 762 bool set_mtu) 763 { 764 ip_tunnel_del(itn, t); 765 t->parms.iph.saddr = p->iph.saddr; 766 t->parms.iph.daddr = p->iph.daddr; 767 t->parms.i_key = p->i_key; 768 t->parms.o_key = p->o_key; 769 if (dev->type != ARPHRD_ETHER) { 770 memcpy(dev->dev_addr, &p->iph.saddr, 4); 771 memcpy(dev->broadcast, &p->iph.daddr, 4); 772 } 773 ip_tunnel_add(itn, t); 774 775 t->parms.iph.ttl = p->iph.ttl; 776 t->parms.iph.tos = p->iph.tos; 777 t->parms.iph.frag_off = p->iph.frag_off; 778 779 if (t->parms.link != p->link) { 780 int mtu; 781 782 t->parms.link = p->link; 783 mtu = ip_tunnel_bind_dev(dev); 784 if (set_mtu) 785 dev->mtu = mtu; 786 } 787 dst_cache_reset(&t->dst_cache); 788 netdev_state_change(dev); 789 } 790 791 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 792 { 793 int err = 0; 794 struct ip_tunnel *t = netdev_priv(dev); 795 struct net *net = t->net; 796 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 797 798 BUG_ON(!itn->fb_tunnel_dev); 799 switch (cmd) { 800 case SIOCGETTUNNEL: 801 if (dev == itn->fb_tunnel_dev) { 802 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 803 if (!t) 804 t = netdev_priv(dev); 805 } 806 memcpy(p, &t->parms, sizeof(*p)); 807 break; 808 809 case SIOCADDTUNNEL: 810 case SIOCCHGTUNNEL: 811 err = -EPERM; 812 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 813 goto done; 814 if (p->iph.ttl) 815 p->iph.frag_off |= htons(IP_DF); 816 if (!(p->i_flags & VTI_ISVTI)) { 817 if (!(p->i_flags & TUNNEL_KEY)) 818 p->i_key = 0; 819 if (!(p->o_flags & TUNNEL_KEY)) 820 p->o_key = 0; 821 } 822 823 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 824 825 if (cmd == SIOCADDTUNNEL) { 826 if (!t) { 827 t = ip_tunnel_create(net, itn, p); 828 err = PTR_ERR_OR_ZERO(t); 829 break; 830 } 831 832 err = -EEXIST; 833 break; 834 } 835 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 836 if (t) { 837 if (t->dev != dev) { 838 err = -EEXIST; 839 break; 840 } 841 } else { 842 unsigned int nflags = 0; 843 844 if (ipv4_is_multicast(p->iph.daddr)) 845 nflags = IFF_BROADCAST; 846 else if (p->iph.daddr) 847 nflags = IFF_POINTOPOINT; 848 849 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 850 err = -EINVAL; 851 break; 852 } 853 854 t = netdev_priv(dev); 855 } 856 } 857 858 if (t) { 859 err = 0; 860 ip_tunnel_update(itn, t, dev, p, true); 861 } else { 862 err = -ENOENT; 863 } 864 break; 865 866 case SIOCDELTUNNEL: 867 err = -EPERM; 868 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 869 goto done; 870 871 if (dev == itn->fb_tunnel_dev) { 872 err = -ENOENT; 873 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 874 if (!t) 875 goto done; 876 err = -EPERM; 877 if (t == netdev_priv(itn->fb_tunnel_dev)) 878 goto done; 879 dev = t->dev; 880 } 881 unregister_netdevice(dev); 882 err = 0; 883 break; 884 885 default: 886 err = -EINVAL; 887 } 888 889 done: 890 return err; 891 } 892 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 893 894 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) 895 { 896 struct ip_tunnel *tunnel = netdev_priv(dev); 897 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 898 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; 899 900 if (new_mtu < 68) 901 return -EINVAL; 902 903 if (new_mtu > max_mtu) { 904 if (strict) 905 return -EINVAL; 906 907 new_mtu = max_mtu; 908 } 909 910 dev->mtu = new_mtu; 911 return 0; 912 } 913 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); 914 915 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 916 { 917 return __ip_tunnel_change_mtu(dev, new_mtu, true); 918 } 919 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 920 921 static void ip_tunnel_dev_free(struct net_device *dev) 922 { 923 struct ip_tunnel *tunnel = netdev_priv(dev); 924 925 gro_cells_destroy(&tunnel->gro_cells); 926 dst_cache_destroy(&tunnel->dst_cache); 927 free_percpu(dev->tstats); 928 free_netdev(dev); 929 } 930 931 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 932 { 933 struct ip_tunnel *tunnel = netdev_priv(dev); 934 struct ip_tunnel_net *itn; 935 936 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 937 938 if (itn->fb_tunnel_dev != dev) { 939 ip_tunnel_del(itn, netdev_priv(dev)); 940 unregister_netdevice_queue(dev, head); 941 } 942 } 943 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 944 945 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 946 { 947 struct ip_tunnel *tunnel = netdev_priv(dev); 948 949 return tunnel->net; 950 } 951 EXPORT_SYMBOL(ip_tunnel_get_link_net); 952 953 int ip_tunnel_get_iflink(const struct net_device *dev) 954 { 955 struct ip_tunnel *tunnel = netdev_priv(dev); 956 957 return tunnel->parms.link; 958 } 959 EXPORT_SYMBOL(ip_tunnel_get_iflink); 960 961 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 962 struct rtnl_link_ops *ops, char *devname) 963 { 964 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 965 struct ip_tunnel_parm parms; 966 unsigned int i; 967 968 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 969 INIT_HLIST_HEAD(&itn->tunnels[i]); 970 971 if (!ops) { 972 itn->fb_tunnel_dev = NULL; 973 return 0; 974 } 975 976 memset(&parms, 0, sizeof(parms)); 977 if (devname) 978 strlcpy(parms.name, devname, IFNAMSIZ); 979 980 rtnl_lock(); 981 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 982 /* FB netdevice is special: we have one, and only one per netns. 983 * Allowing to move it to another netns is clearly unsafe. 984 */ 985 if (!IS_ERR(itn->fb_tunnel_dev)) { 986 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 987 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 988 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 989 } 990 rtnl_unlock(); 991 992 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 993 } 994 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 995 996 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 997 struct rtnl_link_ops *ops) 998 { 999 struct net *net = dev_net(itn->fb_tunnel_dev); 1000 struct net_device *dev, *aux; 1001 int h; 1002 1003 for_each_netdev_safe(net, dev, aux) 1004 if (dev->rtnl_link_ops == ops) 1005 unregister_netdevice_queue(dev, head); 1006 1007 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1008 struct ip_tunnel *t; 1009 struct hlist_node *n; 1010 struct hlist_head *thead = &itn->tunnels[h]; 1011 1012 hlist_for_each_entry_safe(t, n, thead, hash_node) 1013 /* If dev is in the same netns, it has already 1014 * been added to the list by the previous loop. 1015 */ 1016 if (!net_eq(dev_net(t->dev), net)) 1017 unregister_netdevice_queue(t->dev, head); 1018 } 1019 } 1020 1021 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1022 { 1023 LIST_HEAD(list); 1024 1025 rtnl_lock(); 1026 ip_tunnel_destroy(itn, &list, ops); 1027 unregister_netdevice_many(&list); 1028 rtnl_unlock(); 1029 } 1030 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1031 1032 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1033 struct ip_tunnel_parm *p) 1034 { 1035 struct ip_tunnel *nt; 1036 struct net *net = dev_net(dev); 1037 struct ip_tunnel_net *itn; 1038 int mtu; 1039 int err; 1040 1041 nt = netdev_priv(dev); 1042 itn = net_generic(net, nt->ip_tnl_net_id); 1043 1044 if (nt->collect_md) { 1045 if (rtnl_dereference(itn->collect_md_tun)) 1046 return -EEXIST; 1047 } else { 1048 if (ip_tunnel_find(itn, p, dev->type)) 1049 return -EEXIST; 1050 } 1051 1052 nt->net = net; 1053 nt->parms = *p; 1054 err = register_netdevice(dev); 1055 if (err) 1056 goto out; 1057 1058 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1059 eth_hw_addr_random(dev); 1060 1061 mtu = ip_tunnel_bind_dev(dev); 1062 if (!tb[IFLA_MTU]) 1063 dev->mtu = mtu; 1064 1065 ip_tunnel_add(itn, nt); 1066 out: 1067 return err; 1068 } 1069 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1070 1071 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1072 struct ip_tunnel_parm *p) 1073 { 1074 struct ip_tunnel *t; 1075 struct ip_tunnel *tunnel = netdev_priv(dev); 1076 struct net *net = tunnel->net; 1077 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1078 1079 if (dev == itn->fb_tunnel_dev) 1080 return -EINVAL; 1081 1082 t = ip_tunnel_find(itn, p, dev->type); 1083 1084 if (t) { 1085 if (t->dev != dev) 1086 return -EEXIST; 1087 } else { 1088 t = tunnel; 1089 1090 if (dev->type != ARPHRD_ETHER) { 1091 unsigned int nflags = 0; 1092 1093 if (ipv4_is_multicast(p->iph.daddr)) 1094 nflags = IFF_BROADCAST; 1095 else if (p->iph.daddr) 1096 nflags = IFF_POINTOPOINT; 1097 1098 if ((dev->flags ^ nflags) & 1099 (IFF_POINTOPOINT | IFF_BROADCAST)) 1100 return -EINVAL; 1101 } 1102 } 1103 1104 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1105 return 0; 1106 } 1107 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1108 1109 int ip_tunnel_init(struct net_device *dev) 1110 { 1111 struct ip_tunnel *tunnel = netdev_priv(dev); 1112 struct iphdr *iph = &tunnel->parms.iph; 1113 int err; 1114 1115 dev->destructor = ip_tunnel_dev_free; 1116 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1117 if (!dev->tstats) 1118 return -ENOMEM; 1119 1120 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); 1121 if (err) { 1122 free_percpu(dev->tstats); 1123 return err; 1124 } 1125 1126 err = gro_cells_init(&tunnel->gro_cells, dev); 1127 if (err) { 1128 dst_cache_destroy(&tunnel->dst_cache); 1129 free_percpu(dev->tstats); 1130 return err; 1131 } 1132 1133 tunnel->dev = dev; 1134 tunnel->net = dev_net(dev); 1135 strcpy(tunnel->parms.name, dev->name); 1136 iph->version = 4; 1137 iph->ihl = 5; 1138 1139 if (tunnel->collect_md) { 1140 dev->features |= NETIF_F_NETNS_LOCAL; 1141 netif_keep_dst(dev); 1142 } 1143 return 0; 1144 } 1145 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1146 1147 void ip_tunnel_uninit(struct net_device *dev) 1148 { 1149 struct ip_tunnel *tunnel = netdev_priv(dev); 1150 struct net *net = tunnel->net; 1151 struct ip_tunnel_net *itn; 1152 1153 itn = net_generic(net, tunnel->ip_tnl_net_id); 1154 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1155 if (itn->fb_tunnel_dev != dev) 1156 ip_tunnel_del(itn, netdev_priv(dev)); 1157 1158 dst_cache_reset(&tunnel->dst_cache); 1159 } 1160 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1161 1162 /* Do least required initialization, rest of init is done in tunnel_init call */ 1163 void ip_tunnel_setup(struct net_device *dev, int net_id) 1164 { 1165 struct ip_tunnel *tunnel = netdev_priv(dev); 1166 tunnel->ip_tnl_net_id = net_id; 1167 } 1168 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1169 1170 MODULE_LICENSE("GPL"); 1171